Mô đun:sla-headword

Từ điển mở Wiktionary
local export = {}

local lang = require("Module:languages").getByCode("sla-pro")

local GRAVE = mw.ustring.char(0x300)
local TILDE = mw.ustring.char(0x303)
local MACRON = mw.ustring.char(0x304)
local CARON = mw.ustring.char(0x30C)
local DGRAVE = mw.ustring.char(0x30F)
local INVBREVE = mw.ustring.char(0x311)
local OGONEK = mw.ustring.char(0x328)


-- This could be moved to the common module
local function are_accents_valid(word)
	-- Split into syllables
	local rest = word:gsub("^%*", "")
	local consonants, vowel
	local syllables = {}
	
	while true do
		consonants, rest = mw.ustring.match(rest, "^([bcčdďfgjklľmnňprřsšśtťvxzž()]*)(.-)$")
		
		-- Shift initial l, r to previous syllable where necessary
		if #syllables > 0 and mw.ustring.find(consonants, "^[lr].") then
			syllables[#syllables].vowel = syllables[#syllables].vowel .. consonants:sub(1, 1)
			consonants = consonants:sub(2)
		end
		
		rest = mw.ustring.toNFD(rest)
		vowel, rest = mw.ustring.match(rest, "^([aeiouyьъ][" .. mw.ustring.char(0x300) .. "-" .. mw.ustring.char(0x36F) .. "]*)(.-)$")
		
		if not vowel then
			break
		end
		
		rest = mw.ustring.toNFC(rest)
		
		-- Recombine ě, ę, ǫ
		if vowel:sub(1, 1) == "e" and vowel:find(CARON, nil, true) then
			vowel = vowel:gsub("^e", "ě")
			vowel = vowel:gsub(CARON, "")
		elseif vowel:sub(1, 1) == "e" and vowel:find(OGONEK, nil, true) then
			vowel = vowel:gsub("^e", "ę")
			vowel = vowel:gsub(OGONEK, "")
		elseif vowel:sub(1, 1) == "o" and vowel:find(OGONEK, nil, true) then
			vowel = vowel:gsub("^o", "ǫ")
			vowel = vowel:gsub(OGONEK, "")
		end
		
		local dia = mw.ustring.sub(vowel, 2)
		vowel = mw.ustring.sub(vowel, 1, 1)
		
		table.insert(syllables, {consonants = consonants, vowel = vowel, dia = dia})
	end
	
	-- Check each syllable's diacritics
	for i, syllable in ipairs(syllables) do
		if syllable.dia ~= "" then
			-- Historical short or long vowel?
			if mw.ustring.find(syllable.vowel, "^[eoьъ]$") then
				if i == 1 then
					if not (syllable.dia == GRAVE or syllable.dia == DGRAVE) then
						-- First syllable only allows short rising and falling
						return false
					end
				elseif i == #syllables and mw.ustring.find(syllable.vowel, "^[ьъ]$") then
					-- Final yer doesn't allow any diacritics
					return false
				elseif not (syllable.dia == GRAVE) then
					-- Any remaining vowels allow only short rising
					return false
				end
			else
				-- Acute and macron allowed on any long syllable
				if not (syllable.dia == GRAVE or syllable.dia == MACRON) then
					if i == 1 then
						if not (syllable.dia == TILDE or syllable.dia == INVBREVE) then
							-- First syllable only allows neoacute and circumflex
							return false
						end
					elseif i == #syllables then
						-- Last syllable doesn't allow any of the remaining diacritics
						return false
					elseif not (syllable.dia == TILDE) then
						-- Medial syllable only allows neoacute
						return false
					end
				end
			end
		end
	end
	
	return true
end

-- Checks if the accents in the headword conform to [[WT:ASLA]].
local function check_accents(heads, categories)
	for _, head in ipairs(heads) do
		if not are_accents_valid(head) then
			table.insert(categories, lang:getCanonicalName() .. " entries with invalid diacritics")
		end
	end
end


function export.adjective(frame)
	local params = {
		[1] = {list = true},
		
		["cat"] = {},
		["head"] = {list = true},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local data = {lang = lang, pos_category = "Tính từ", categories = {}, heads = args["head"], inflections = {}}
	
	if args["cat"] == "Hậu tố hình thành tính từ" then
		data.pos_category = "Hậu tố"
		table.insert(data.categories, lang:getCanonicalName() .. " " .. args["cat"])
	end
	
	-- Comparative
	if args[1][1] then
		args[1].label = "so sánh hơn"
		table.insert(data.inflections, args[1])
	end
	
	check_accents(data.heads, data.categories)
	
	return require("Module:headword").full_headword(data)
end


function export.adverb(frame)
	local params = {
		["cat"] = {},
		["cat2"] = {},
		["head"] = {list = true},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local data = {lang = lang, pos_category = "Phó từ", categories = {}, heads = args["head"]}
	
	if args["cat"] == "Hậu tố hình thành phó từ" then
		data.pos_category = "Hậu tố"
		table.insert(data.categories, lang:getCanonicalName() .. " " .. args["cat"])
	end
	
	if args["cat2"] then
		table.insert(data.categories, lang:getCanonicalName() .. " " .. args["cat2"])
	end
	
	check_accents(data.heads, data.categories)
	
	return require("Module:headword").full_headword(data)
end


function export.noun(frame)
	local params = {
		[1] = {list = true, default = "?"},
		
		["cat"] = {},
		["head"] = {list = true},
		["m"] = {list = true},
		["f"] = {list = true},
        ["dim"] = {list = true},
        ["aug"] = {list = true},
        ["adj"] = {list = true}
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local data = {lang = lang, pos_category = "Danh từ", categories = {}, heads = args["head"], genders = {}, inflections = {}}
	
	if args["cat"] == "Hậu tố hình thành danh từ" then
		data.pos_category = "Hậu tố"
		table.insert(data.categories, lang:getCanonicalName() .. " " .. args["cat"])
	end
	
	-- Genders
	for i, val in ipairs(args[1]) do
		if val == "m" or val == "f" or val == "n" or val == "m-d" or val == "f-d" or val == "n-d" or val == "m-p" or val == "f-p" or val == "n-p" then
			table.insert(data.genders, val)
			
			if val == "m-d" or val == "f-d" or val == "n-d" then
				table.insert(data.categories, lang:getCanonicalName() .. " dualia tantum")
			elseif val == "m-p" or val == "f-p" or val == "n-p" then
				table.insert(data.categories, lang:getCanonicalName() .. " pluralia tantum")
			end
		else
			table.insert(data.genders, "?")
		end
	end
	
	-- add parameters
	for _, val in pairs {{ "f", "giống cái" }, { "m", "giống đực" }, {"dim", "từ nhỏ"}, {"aug", "từ tăng to"}, {"adj", "tính từ quan hệ"}} do
		local param_name, label = unpack(val)
		local forms = args[param_name]
		if forms[1] then
			forms.label = label
			table.insert(data.inflections, forms)
		end
	end

	check_accents(data.heads, data.categories)
	
	return require("Module:headword").full_headword(data)
end


function export.verb(frame)
	local params = {
		["a"] = {list = true},
		["cat"] = {},
		["head"] = {list = true},
		["impf"] = {list = true},
		["pf"] = {list = true},
		["det"] = {list = true},
		["indet"] = {list = true},
		["freq"] = {list = true}
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local data = {lang = lang, pos_category = "Động từ", categories = {}, heads = args["head"], genders = {}, inflections = {}}
	
	if args["cat"] == "Hậu tố hình thành động từ" then
		data.pos_category = "Hậu tố"
		table.insert(data.categories, lang:getCanonicalName() .. " " .. args["cat"])
	end
	
	-- Aspects
	for i, val in ipairs(args["a"]) do
		if val == "pf" then
			table.insert(data.genders, val)
			table.insert(data.categories, lang:getCanonicalName() .. " perfective verbs")
		elseif (val == "impf") or (val == "impf-det") or (val == "impf-indet") or (val == "impf-freq") then	
			table.insert(data.genders, "impf")
			table.insert(data.categories, lang:getCanonicalName() .. " imperfective verbs")
		else
			table.insert(data.genders, "?")
		end
		
		if val == "impf-det" then
			table.insert(data.inflections, {label = "xác định"})
		elseif val == "impf-indet" then
			table.insert(data.inflections, {label = "không xác định"})
		elseif val  == "impf-freq" then
			table.insert(data.inflections, {label = "không xác định"})
			table.insert(data.inflections, {label = "tiếp diẽn"})
		end

	end
	
	-- Imperfective equivalent
	if args["impf"][1] then
		args["impf"].label = "chưa hoàn thành"
		table.insert(data.inflections, args["impf"])
	end
	
	-- Perfective equivalent
	if args["pf"][1] then
		args["pf"].label = "hoàn thành"
		table.insert(data.inflections, args["pf"])
	end
	
	--Indeterminate equivalent
	if args["indet"][1] then
		args["indet"].label = "không xác định"
		table.insert(data.inflections, args["indet"])
	end
	
	--Frequentative equivalent
	if args["freq"][1] then
		args["freq"].label = "tiếp diễn"
		table.insert(data.inflections, args["freq"])
	end
	
	--Imperfective determinate equivalent
	if args["det"][1] then
		args["det"].label = "xác định chưa hoàn thành"
		table.insert(data.inflections, args["det"])
	end
	
	check_accents(data.heads, data.categories)
	
	return require("Module:headword").full_headword(data)
end

function export.root(frame)
	local params = {
		[1] = {list = true, default = "?"},
		
		["cat"] = {},
		["head"] = {list = true},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local data = {lang = lang, pos_category = "roots", categories = {}, heads = args["head"], genders = {}}

	check_accents(data.heads, data.categories)
	
	return require("Module:headword").full_headword(data)
end

return export