Bước tới nội dung

Mô đun:yor-pron

Từ điển mở Wiktionary
local gsub = mw.ustring.gsub
local lower = mw.ustring.lower
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local u = require("Module:string/char")

local export = {}

local m_IPA = require("Module:IPA")
local lang = require("Module:languages").getByCode("yo")

-- apply rsub() repeatedly until no change
local function gsub_repeatedly(term, foo, bar)
	while true do
		local new_term = gsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

local MACRON = u(0x0304) -- macron =  ̄
local DOT = u(0x0323) -- dot =  ̣
local NASAL = u(0x0303) -- nasal =  ̃

local consonants = {
	["b"] = "b",
	["d"] = "d",
	["f"] = "f",
	["g"] = "ɡ",
	["gb"] = "ɡ͡b",
	["h"] = "h",
	["j"] = "d͡ʒ",
	["k"] = "k",
	["l"] = "l",
	["m"] = "m",
	["n"] = "n",
	["p"] = "k͡p",
	["r"] = "ɾ",
	["s"] = "s",
	["ṣ"] = "ʃ",
	["t"] = "t",
	["w"] = "w",
	["y"] = "j",
	
	["gh"] = "ɣ",
	["gw"] = "ɡʷ",
}

local vowels = {
	["a"] = "a",
	["e"] = "e",
	["ẹ"] = "ɛ",
	["i"] = "i",
	["o"] = "o",
	["ọ"] = "ɔ",
	["u"] = "u",
	
	["ị"] = "ɪ",
	["ụ"] = "ʊ",
}

local nasals = {
	["an"] = "ã",
	["ẹn"] = "ɛ̃",
	["in"] = "ĩ",
	["ọn"] = "ɔ̃",
	["un"] = "ũ",
	["m"] = "m",
	["n"] = "ŋ",
}

function export.IPA(text, loanword, downstep)
	text = lower(text)
	text = gsub(text, "%-", "z")
	text = toNFD(text)
	text = gsub(text, ".[" .. DOT .. "]", {
		["e" .. DOT] = "ẹ",
		["o" .. DOT] = "ọ",
		["s" .. DOT] = "ṣ",
		["i" .. DOT] = "ị",
		["u" .. DOT] = "ụ",
	})

	text = text .. "x"

	text = gsub(text, "%s+", " ")
	text = gsub(text, "^ ", "")
	text = gsub(text, " $", "")
	text = gsub(text, "\n", "")
	text = gsub(text, "'", "")
	text = gsub(text, "ẹ", "ɛ")
	text = gsub(text, "gb", "ɡ͡b")
	text = gsub(text, "gh", "ɣ")
	text = gsub(text, "gw", "ɡʷ")
	text = gsub(text, "g", "ɡ")
	text = gsub(text, "ị", "ɪ")
	text = gsub(text, "j", "d͡ʒ")
	text = gsub(text, "ọ", "ɔ")
	text = gsub(text, "p", "k͡p")
	text = gsub(text, "r", "ɾ")
	text = gsub(text, "ṣ", "ʃ")
	text = gsub(text, "ụ", "ʊ")
	text = gsub(text, "y", "j")

	text = gsub(text, "m([́̀̄])", "ŋ" .. "%1")
	text = gsub(text, "n([́̀̄])", "ŋ" .. "%1")

	text = gsub_repeatedly(text, "([aeɛiɪoɔuʊ])([aeɛiɪoɔuʊnbdfɡhjklmnŋɾsʃtwxzɣ ])", "%1" .. MACRON .. "%2")

	text = gsub(text, "([aɛiɪɔuʊ])([́̀̄])n([bdfɡhjklmnŋɾsʃtwxzɣ ])", "%1" .. NASAL .. "%2%3")

	--TODO: FIX LOANWORDS
	if not loanword then
		text = gsub(text, "n([aiɪuʊ])([́̀̄])", "n%1" .. NASAL .. "%2")
	end

	if not loanword then
		text = gsub(text, "m([iɪɔuʊ])([́̀̄])", "m%1" .. NASAL .. "%2")
	end
	text = gsub_repeatedly(text, "([aɛiɪɔuʊ])([̃])([́̀̄])([aɛiɔu])([́̀̄])", "%1%2%3%4" .. NASAL .. "%5")

	text = gsub(text, "([aeɛiɪoɔuʊ])([́̀̄])", "%1" .. "%2")

	text = gsub(text, "a([̃])([́̀̄])za","a" .. "%1" .. "%2a")
	text = gsub(text, "ɛ([̃])([́̀̄])zɛ","ɛ" .. "%1" .. "%2ɛ")
	text = gsub(text, "i([̃])([́̀̄])zi","i" .. "%1" .. "%2i")
	text = gsub(text, "ɪ([̃])([́̀̄])zɪ","ɪ" .. "%1" .. "%2ɪ")
	text = gsub(text, "ɔ([̃])([́̀̄])zɔ","ɔ" .. "%1" .. "%2ɔ")
	text = gsub(text, "u([̃])([́̀̄])zu","u" .. "%1" .. "%2u")
	text = gsub(text, "ʊ([̃])([́̀̄])zʊ","ʊ" .. "%1" .. "%2ʊ")

	text = gsub(text, "z", ".")

	-- Split into syllables
	text = gsub(text, "a([́̀̄])([eɛiɪoɔuʊbdfɡhjklmnŋɾsʃtwɣ])","a" .. "%1.%2")
	text = gsub(text, "a([̃])([́̀̄])([eɛiɪoɔuʊbdfɡhjklmnŋɾsʃtwɣ])","a" .. "%1" .. "%2.%3")
	text = gsub(text, "e([́̀̄])([aɛiɪoɔuʊbdfɡhjklmnŋɾsʃtwɣ])","e" .. "%1.%2")
	text = gsub(text, "ɛ([́̀̄])([aeiɪoɔuʊbdfɡhjklmnŋɾsʃtwɣ])","ɛ" .. "%1.%2")
	text = gsub(text, "ɛ([̃])([́̀̄])([aeiɪoɔuʊbdfɡhjklmnŋɾsʃtwɣ])","ɛ" .. "%1" .. "%2.%3")
	text = gsub(text, "i([́̀̄])([aeɛɪɔuʊbdfɡhjklmnŋɾsʃtwɣ])","i" .. "%1.%2")
	text = gsub(text, "i([̃])([́̀̄])([aeɛɪoɔuʊbdfɡhjklmnŋɾsʃtwɣ])","i" .. "%1" .. "%2.%3")
	text = gsub(text, "ɪ([́̀̄])([aeɛioɔuʊbdfɡhjklmnŋɾsʃtwɣ])","ɪ" .. "%1.%2")
	text = gsub(text, "ɪ([̃])([́̀̄])([aeɛioɔuʊbdfɡhjklmnŋɾsʃtwɣ])","ɪ" .. "%1" .. "%2.%3")
	text = gsub(text, "o([́̀̄])([aeɛiɪɔuʊbdfɡhjklmnŋɾsʃtwɣ])","o" .. "%1.%2")
	text = gsub(text, "ɔ([́̀̄])([aeɛiɪouʊbdfɡhjklmnŋɾsʃtwɣ])","ɔ" .. "%1.%2")
	text = gsub(text, "ɔ([̃])([́̀̄])([aeɛiɪouʊbdfɡhjklmnŋɾsʃtwɣ])","ɔ" .. "%1" .. "%2.%3")
	text = gsub(text, "u([́̀̄])([aeɛiɪɔoʊbdfɡhjklmnŋɾsʃtwɣ])","u" .. "%1.%2")
	text = gsub(text, "u([̃])([́̀̄])([aeɛiɪɔoʊbdfɡhjklmnŋɾsʃtwɣ])","u" .. "%1" .. "%2.%3")
	text = gsub(text, "ʊ([́̀̄])([aeɛiɪɔobdfɡhjklmnŋɾsʃtwɣ])","ʊ" .. "%1.%2")
	text = gsub(text, "ʊ([̃])([́̀̄])([aeɛiɪoɔubdfɡhjklmnŋɾsʃtwɣ])","ʊ" .. "%1" .. "%2.%3")
	text = gsub_repeatedly(text, "m([́̀̄])([aeɛiɪoɔuʊbdfɡhjklnŋɾsʃtwɣ])","m" .. "%1.%2")
	text = gsub_repeatedly(text, "ŋ([́̀̄])([aeɛiɪoɔuʊbdfɡhjklmnɾsʃtwɣ])","ŋ" .. "%1.%2")

	text = gsub(text, "x", "")
	
	-- Adds downstep ꜜ before provided syllable
	if downstep then
		downstep = tonumber(downstep)
		local i = 0
		local result = (text):gsub("[^.]+", function(syllable)
    		i = i + 1
    		if i == downstep then
        		return "ꜜ" .. syllable
    		end -- else leave it alone
		end)
		text = result
	end
		
	return toNFC(text)
end

function export.show(frame)
	local args = frame:getParent().args
	local p, results = {}, {}

	if args[1] then
		for _, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		error("Please provide a tone marked term.")
	end

	for _, text in ipairs(p) do
		if args["loan"] == "1" then
			table.insert(results, {pron = "/" .. export.IPA(text, true, args["downstep"]) .. "/", note = nil})
		else
			table.insert(results, {pron = "/" .. export.IPA(text, false, args["downstep"]) .. "/", note = nil})
		end
	end

	return m_IPA.format_IPA_full { lang = lang, items = results }
end

return export