Mô đun:EtymologicalTree

Từ điển mở Wiktionary

local p = {}

local stats = require("Mô đun:Thống kê Wiktionary")

local contentLang = mw.getContentLanguage()

local tagsByScript = {
    Armn = "i", Cyrl = "i", Geor = "i", Grek = "i", Hebr = "i", Latn = "i"
}

function p.entry(text)
    local entry = {from = {}}
    local equivalences = mw.text.split(text, "%s*=%s*")
    
    local link, star
    link, entry.romanization = mw.ustring.match(equivalences[1], "^%[%[(.-)%]%]%s*%(?%s*([^%)]*)%s*%)?$")
    entry.language, star, entry.page, entry.term =
        mw.ustring.match(link, "^([%a-]-):?(%*?)([^%]|:]*)|?([^%]:]*)$")
    entry.meaning = equivalences[2]
    entry.relationship = equivalences[3]
    if entry.meaning and mw.ustring.sub(entry.meaning, -1) == ":" then
    	entry.relationship = entry.meaning
    	entry.meaning = ""
	end
    entry.relationship = mw.text.trim(entry.relationship or "", ":", true)
    entry.attested = #star < 1
    if #entry.term < 1 then entry.term = entry.page end
    return entry
end

function p.foreignTextFormat(language, script)
	if language == "vi" or language == "vie" then return "<!-- %s -->“%s”" end
    if #script < 1 or tagsByScript[script] then return "<i lang='%s'>%s</i>" end
    return "<span lang='%s'>%s</span>"
end

function p.formatEntry(entry, child, frame)
    local lcLanguage = ""
    if #entry.language > 0 then
        local langName = (frame and frame:expandTemplate{
            title = entry.language,
            args = {style = "plain"}
        })
        lcLanguage = "<abbr class='lang' title='" .. langName .. "'>" ..
        	entry.language .. "</abbr>"
    end
    local line = lcLanguage .. " "
    
    if not entry.attested then
        line = line .. "[[Wiktionary:Từ tái tạo|<b title='Đây là từ tái tạo. " ..
            "Nó không bao giờ được viết ra trong lịch sử.'>*</b>]]"
    end
    local anchor = ""
    local linkFormat = "<i lang='%s'>%s</i>"
    if frame and #entry.language > 0 then
        anchor = "#" .. contentLang:ucfirst(frame:expandTemplate{
            title = entry.language,
            args = {style = "plain"}
        })
        local script = frame:expandTemplate{
            title = entry.language,
            args = {style = "script"}
        }
        if script and entry.language:sub(1, 1):upper() == entry.language:sub(1, 1) then
        	script = entry.language
        end
        linkFormat = p.foreignTextFormat(entry.language, script)
    end
    local langData = stats.categoriesByCode[entry.language]
	local bcp = (langData and langData.short) or entry.language
    line = line .. mw.ustring.format(linkFormat, bcp,
        "[[" .. entry.page .. anchor .. "|" .. entry.term .. "]]")
    
    local glosses = {}
    if entry.romanization and #entry.romanization > 0 then
        local gloss
        if entry.language == "vie" or entry.language == "vi" then
            gloss = frame:expandTemplate{
                title = "Hani char",
                args = {"[[" .. entry.romanization .. "]]"},
            }
        elseif entry.language == "Hani" or entry.language == "Hant" or
        		entry.language == "Hans" or
        		(stats.categoriesByCode[entry.language] and
        			stats.categoriesByCode[entry.language].isChinese) then
            gloss = "“[[" .. entry.romanization .. "#Tiếng Việt|" .. entry.romanization .. "]]”"
        else
        	gloss = "<i>" .. entry.romanization .. "</i>"
		end
        table.insert(glosses, gloss)
    end
    if entry.meaning and #entry.meaning > 0 then
        table.insert(glosses, "<q class='meaning'>" .. entry.meaning .. "</q>")
    end
    if #glosses > 0 then
        line = line .. " <span class='glosses'>(" .. table.concat(glosses, ", ") .. ")</span>"
    end
    return line
end

function p.buildOutput(node, child, frame)
    local output = ""
    if child then
        output = "<span>" .. p.formatEntry(node, child, frame) .. "</span>"
    end
    if #node.from == 0 then
        return output
    end
    
    local relationship = node.relationship or ""
    if #relationship > 0 then
    	output = output .. " <span class='relationship'>" .. relationship .. "</span>"
    end
    
    local fromsWithFroms = 0
    local fromOutput = {}
    for i = 1, #node.from do
        if #node.from[i].from > 0 then fromsWithFroms = fromsWithFroms + 1 end
        local nodeOutput = p.buildOutput(node.from[i], node, frame)
        table.insert(fromOutput, nodeOutput)
    end
    
    return output .. "<ul><li>" .. table.concat(fromOutput, "</li><li>") ..
        "</li></ul>"
end

function p.tree(frame)
    local page = frame.args.term or frame:preprocess("{{PAGENAME}}")
    assert(frame.args.lang and #frame.args.lang > 0, "Chưa định rõ ngôn ngữ của mục từ")
    assert(frame.args[1] and #frame.args[1] > 0, "Chưa định rõ từ nguyên")
    local isCollapsed = frame.args.collapsed and #frame.args.collapsed > 0
    local suppressCats = frame.args.nocat and #frame.args.nocat > 0
    return p._tree(page, frame.args.lang, frame.args[1], isCollapsed, suppressCats, frame)
end
function p._tree(page, language, text, isCollapsed, suppressCats, frame)
	-- The terminology is a bit confusing here: in an etymology’s tree data
    -- structure, there is only one leaf but potentially many roots.
    local nodes = {
		{
			language = language,
			page = page,
			term = page,
			attested = true,
			from = {},
		},
	}
    local firstAncestors = {
		nodes[1],
	}
	local cats = {}
    for line in mw.ustring.gmatch(text, "([^\r\n]+)") do
        local node = p.entry(mw.text.trim(line, "*%s"))
        
        local bullets = mw.ustring.match(line, "^%*+")
        assert(bullets and #bullets > 0, "Phải đặt ít nhất một dấu * đằng trước mỗi dòng để tạo danh sách từ nguyên")
        local level = #(bullets) + 1
        local child = firstAncestors[level - 1]
        if not child then child = firstAncestors[#firstAncestors] end
        
        if child then
            if #node.language < 1 then
                node.language = child.language
            elseif #child.language > 0 and node.language ~= child.language and not mw.ustring.match(node.language, "^[A-Z]") then
            	table.insert(cats, mw.ustring.format("[[Thể loại:Từ %s gốc %s]]", frame:expandTemplate{
		            title = language,
		            args = {style = "plain"}
		        }, mw.ustring.gsub(frame:expandTemplate{
		            title = node.language,
		            args = {style = "plain"}
		        }, "^[Tt]iếng ", "")))
            end
            if #node.page < 1 then
                node.page = child.page
                node.term = child.term
            end
            table.insert(child.from, node)
        end
        firstAncestors[level] = node
        for i = level + 1, #firstAncestors do firstAncestors[i] = nil end
    end
    
    local output = p.buildOutput({
        language = language,
        page = page,
        term = page,
        attested = true,
        from = nodes
    }, nil, frame)
    if mw.ustring.sub(output, -5) ~= "</ul>" then output = output .. "." end
    if cats and not suppressCats then output = output .. table.concat(cats) end
    return "<div class='etymology NavFrame " .. (isCollapsed and "collapsed" or "") ..
    	"'><div class='NavHead' align='left'>" ..
    	"Từ nguyên của " .. page .. "</div><div class='NavContent'>" .. output ..
    	"</div></div>"
end

return p