Bước tới nội dung

Mô đun:gan-pron

Từ điển mở Wiktionary
local export = {}

local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local len = mw.ustring.len
local match = mw.ustring.match

local initialConv = {
	["b"] = "p", ["p"] = "pʰ", ["m"] = "m", ["f"] = "f", 
	["d"] = "t", ["t"] = "tʰ", ["l"] = "l", 
	["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "s",
	["j"] = "t͡ɕ", ["q"] = "t͡ɕʰ", ["x"] = "ɕ", ["ny"] = "n̠ʲ",
	["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ",
	["h"] = "h", [""] = ""
}

local finalConv = {
	["z"] = "z̩",
	["i"] = "i", 
	["u"] = "u", 
	["y"] = "y", 
	["a"] = "a", ["ia"] = "ia", ["ua"] = "ua",
	["o"] = "o", ["uo"] = "uo", 
	["e"] = "e", ["ie"] = "ie", ["ue"] = "ue", ["ye"] = "ye",
	["eo"] = "ɵ",
	["ai"] = "ai", ["uai"] = "uai",
	["oi"] = "oi", ["ei"] = "ei", ["ii"] = "ɨi", ["ui"] = "ui",
	["au"] = "au", ["eu"] = "ɛu", ["ieu"] = "iɛu",
	["iu"] = "iu", ["iiu"] = "ɨu", 
	["an"] = "an", ["uan"] = "uan",
	["on"] = "ɵn", ["uon"] = "uɵn", ["yon"] = "yɵn",
	["en"] = "ɛn", ["ien"] = "iɛn", ["in"] = "in", ["iin"] = "ɨn",
	["un"] = "un", ["yn"] = "yn",
	["ang"] = "aŋ", ["iang"] = "iaŋ", ["uang"] = "uaŋ",
	["ong"] = "ɔŋ", ["iong"] = "iɔŋ", ["uong"] = "uɔŋ",
	["ung"] = "uŋ", ["iung"] = "iuŋ",
	["at"] = "at̚", ["uat"] = "uat̚",
	["ot"] = "ɵt̚", ["uot"] = "uɵt̚", ["yot"] = "yɵt̚",
	["et"] = "ɛt̚", ["iet"] = "iɛt̚", ["uet"] = "uɛt̚",
	["it"] = "it̚", ["iit"] = "ɨt̚", ["ut"] = "ut̚", ["yt"] = "yt̚",
	["ah"] = "aʔ", ["iah"] = "iaʔ", ["uah"] = "uaʔ",
	["oh"] = "ɔʔ", ["ioh"] = "iɔʔ", ["uoh"] = "uɔʔ",
	["uh"] = "uʔ", ["iuh"] = "iuʔ",
	["m"] = "m̩", ["n"] = "n̩", ["ng"] = "ŋ̍"
}

local toneConv = {
	["1"] = "⁴²", ["2"] = "²⁴", ["3"] = "²¹³", ["4"] = "³⁵",
	["5"] = "¹¹", ["6"] = "⁵", ["7"] = "²", ["8-1"] = "¹", ["8-2"] = "²",
	["3-1"] = "²¹³⁻¹³", ["3-2"] = "²¹³⁻²⁴", ["3-3"] = "²¹³⁻²¹", [""] = "",
}

function export.ipa(text)
	if type(text) == "table" then
		text = text.args[1]
	end
	local syllables, stress, initial, final, tone, ipa, result = {}, {}, {}, {}, {}, {}, {}
	local has_stress, has_neutral, attention = false, false, ""
	local words = mw.text.split(text, "/")
	for _, word in ipairs(words) do
		syllables = mw.text.split(word, " ")
		for index, syllable in ipairs(syllables) do
			stress[index] = match(syllable, "^'") and "ˈ" or ""
			has_stress = has_stress or stress[index] == "ˈ"
			syllable = mw.ustring.gsub(syllable, "^'", "")
			initial[index] = match(syllable, "^[bpmfdtlnzcsjqxgkh]?[gy]?")
			if match(initial[index], "^.y$") and initial[index] ~= "ny" then
				initial[index] = sub(initial[index], 1, 1)
			end
			initial[index] = initial[index] == "y" and "" or initial[index]
			final[index] = match(sub(syllable, len(initial[index]) + 1, -1), "^[^1-8\*]*")
			if final[index] == "" then
				final[index] = initial[index]
				initial[index] = ""
			end
			tone[index] = match(syllable, "[1-7]+$") or (index ~= 1 and "8" or "")
			has_neutral = has_neutral or tone[index] == "8"
			
			-- checks validity of the syllable
			local aspirated = match(initial[index], "^[ptcqkh]$")
			local checked = match(final[index], "[th]$")
			if tone[index] == "2" and not aspirated then
				error("The 2nd tone can only go with aspirated initials. Use the 4th tone instead.")
			elseif tone[index] == "4" and aspirated then
				error("The 4th tone can only go with unaspirated initials. Use the 2nd tone instead.")
			elseif match(tone[index], "^[67]$") and not checked then
				error(string.format("Tone %s can only go with checked finals.", tone[index]))
			elseif match(tone[index], "^[12345]$") and checked then
				error(string.format("Tone %s cannot go with a checked final.", tone[index]))
			end
		end
		for index = 1, #syllables do
			initial[index] = initialConv[initial[index]] or error(("Unrecognised initial: \"%s\""):format(initial[index]))
			final[index] = (match(initial[index], "s") and final[index] == "i") and "z" or final[index]
			final[index] = (initial[index] == "f" and final[index] == "i") and "ii" or final[index]
			final[index] = (match(initial[index], "s") and final[index] == "iu") and "iiu" or final[index]
			final[index] = finalConv[final[index]] or error(("Unrecognised final: \"%s\""):format(final[index]))
			if tone[index] == "3" then
				if match(tone[index+1] or "", "[1246]") then
					tone[index] = "3-1"
				elseif match(tone[index+1] or "", "[357]") then
					tone[index] = "3-2"
				elseif tone[index+1] then -- tone[index+1] == "8"
					tone[index] = "3-3"
				end
			elseif tone[index] == "8" then
				if tone[index-1] == "²¹³⁻²¹" or tone[index-1] == "¹¹" or tone[index-1] == "²" then
					tone[index] = "8-1"
				else
					tone[index] = "8-2"
				end
			end
			tone[index] = toneConv[tone[index]] or error(("Unrecognised tone: \"%s\""):format(tone[index]))
			ipa[index] = stress[index] .. initial[index] .. final[index] .. tone[index]
		end
		table.insert(result, table.concat(ipa, " "))
	end
	
	-- check for stress if needed
	if #syllables > 1 and not has_neutral and not has_stress then
		attention = "[[Category:Gan terms needing pronunciation attention|*]]"
	end
	
	return table.concat(result, "/, /") .. attention
end

function export.rom(text)
	text = gsub(text, "/", " / ")
	text = gsub(text, '([1-9-]+)', '<sup>%1</sup>')
	return text
end

return export