Bước tới nội dung

Mô đun:lic-pron

Từ điển mở Wiktionary
local export = {}
local gsub = mw.ustring.gsub
local find = mw.ustring.find
local match = mw.ustring.match

local lang = require("Module:languages").getByCode("lic")

local convert_initial = {
	["b"] = "p", ["p"] = "pʰ", ["bl"] = "pl", ["bh"] = "ɓ",
	["m"] = "m", ["w"] = "ˀw", ["f"] = "f", ["v"] = "v",
	["d"] = "t", ["t"] = "tʰ", ["dh"] = "ɗ",
	["z"] = "t͡s", ["j"] = "t͡s", ["c"] = "t͡sʰ", ["q"] = "t͡sʰ",
	["n"] = "n", ["r"] = "r", ["l"] = "l", ["hl"] = "ɬ", ["dz"] = "z",
	["ny"] = "ɲ", ["y"] = "ˀj",
	["g"] = "k", ["k"] = "kʰ", ["gh"] = "ɡ", ["ng"] = "ŋ",
	["gw"] = "kʷ", ["kw"] = "kʷʰ", ["ghw"] = "ɡʷ", ["ngw"] = "ŋʷ",
	["h"] = "h", ["hy"] = "hʲ", ["hw"] = "hʷ",
	[""] = "ʔ",
}

local convert_final = {
	["a"] = "a",
	["aei"] = "aːi", ["aeu"] = "aːu",
	["aem"] = "aːm", ["aen"] = "aːn", ["aeny"] = "aːɲ", ["aeng"] = "aːŋ",
	["aep"] = "aːp", ["aet"] = "aːt", ["aety"] = "aːc", ["aek"] = "aːk",
	["ai"] = "ai", ["au"] = "au", ["auu"] = "aɯ",
	["am"] = "am", ["an"] = "an", ["any"] = "aɲ", ["ang"] = "aŋ",
	["ap"] = "aːp", ["at"] = "at", ["aty"] = "ac", ["ak"] = "ak",

	["e"] = "eː",
	["eeu"] = "eːu",
	["eem"] = "eːm", ["een"] = "eːn", ["eeng"] = "eːŋ",
	["eep"] = "eːp", ["eet"] = "eːt", ["eek"] = "eːk",
	["ei"] = "ei", ["euu"] = "eɯ",
	["em"] = "em", ["en"] = "en", ["eny"] = "eɲ", ["eng"] = "eŋ",
	["ep"] = "ep", ["et"] = "et", ["ety"] = "ec", ["ek"] = "ek",
	
	["i"] = "i",
	["ieu"] = "iːu",
	["iem"] = "iːm", ["ien"] = "iːn", ["ieng"] = "iːŋ",
	["iep"] = "iːp", ["iet"] = "iːt", ["iek"] = "iːk",
	["ia"] = "ia", ["iu"] = "iu",
	["im"] = "im", ["in"] = "in", ["ing"] = "iŋ",
	["ip"] = "ip", ["it"] = "it", ["ik"] = "ik",
	
	["o"] = "oː",
	["oei"] = "oːi",
	["oem"] = "oːm", ["oen"] = "oːn", ["oeng"] = "oːŋ",
	["oep"] = "oːp", ["oet"] = "oːt", ["oety"] = "oːc", ["oek"] = "oːk",
	["ou"] = "ou",
	["om"] = "om", ["ong"] = "oŋ",
	["op"] = "op", ["ok"] = "ok",
	
	["u"] = "u",
	["uei"] = "uːi",
	["uen"] = "uːn", ["ueny"] = "uːɲ", ["ueng"] = "uːŋ",
	["uet"] = "uːt", ["uety"] = "uːc", ["uek"] = "uːk",
	["ua"] = "ua", ["ui"] = "ui",
	["un"] = "un", ["uny"] = "uɲ",
	["ut"] = "ut", ["uty"] = "uc",
	
	["uu"] = "ɯ",
	["uuei"] = "ɯːi",
	["uuem"] = "ɯːm", ["uuen"] = "ɯːn", ["uueng"] = "ɯːŋ",
	["uuep"] = "ɯːp", ["uuet"] = "ɯːt", ["uuek"] = "ɯːk",
	["uua"] = "ɯa",
	["uum"] = "ɯm", ["uun"] = "ɯn", ["uung"] = "ɯŋ",
	["uup"] = "ɯp", ["uut"] = "ɯt", ["uuk"] = "ɯk",
}

local function get_tone(syllable)
	if find(syllable, "x$") then
		return "˥", gsub(syllable, "x$", "")
	elseif find(syllable, "s$") then
		return "˩", gsub(syllable, "s$", "")
	elseif find(syllable, "[ptk]y?$") then
		local coda = match(syllable, "([ptk]y?)$")
		if find(gsub(syllable, coda .. "$", ""), "[ptk]y?$") then
			return "˥˧", gsub(syllable, coda .. "$", "")
		end
		return "˥", syllable
	else
		return "˥˧", syllable
	end
end

local function syllabify(text)
	text = gsub(text, "'", ".")
	text = gsub(text, "([aeiou])([bcdfghjklmnpqrtvwyz][aeiou])", "%1.%2")
	text = gsub(text, "([sx])([a-z])", "%1.%2")
	text = gsub(text, "([aeiou])([bdgh][ghlwyz][aeiou])", "%1.%2")
	text = gsub(text, "([aeiou]pp)([bcdfghjklmnpqrtvwyz][ghlwyz]?w?[aeiou])", "%1.%2")
	text = gsub(text, "([aeiou]tt)([bcdfghjklmnpqrtvwyz][ghlwyz]?w?[aeiou])", "%1.%2")
	text = gsub(text, "([aeiou]kk)([bcdfghjklmnpqrtvwyz][ghlwyz]?w?[aeiou])", "%1.%2")
	text = gsub(text, "([aeiou][nt]?[gkmnpty])([bcdfghjklmnpqrtvwyz][ghlwyz]?w?[aeiou])", "%1.%2")
	return mw.text.gsplit(text, "%.")
end

function export.ipa(text)
	text = string.lower(text)
	local syllables = {}
	for syllable in syllabify(text) do
		if match(syllable, "^g?uu$") then
			syllable = syllable .. "s"
		elseif syllable == "zuu" then
			syllable = syllable .. "x"
		end
		local initial, final, tone_value
		tone_value, syllable = get_tone(syllable)
		initial, final = match(syllable, "^([bpmwfvdtzjcqnrlygkh]?[lhzywg]?w?)([aeioumngptky]+)$")
		if not initial or not final then
			error(syllable .. "cannot be recognized")
		end
		local initial_ipa, final_ipa = convert_initial[initial], convert_final[final]
		if not initial_ipa then
			error(initial .. " is not a valid initial")
		elseif not final_ipa then
			error(final .. " is not a valid final")
		end
		table.insert(syllables, initial_ipa..final_ipa..tone_value)
	end
	return "/" .. table.concat(syllables, ".") .. "/"
end

function export.show(frame)
	local params = {
		[1] = { },
	}
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local text = args[1]
	if not text then text = mw.title.getCurrentTitle().text end
	
	local display = ("* %s %s"):format(
		require("Module:accent qualifier").format_qualifiers(lang, {"Hlai tiêu chuẩn", "[[w:Tiếng Lauhut|Bảo Định]]"}),
		require("Module:IPA").format_IPA_full { lang = lang, items = {{ pron = export.ipa(text) }} }
	)
	
	return display
end

return export