Bước tới nội dung

Mô đun:zhx-sic-pron

Từ điển mở Wiktionary
local export = {}
local m_string_utils = require("Module:string utilities")

local find = m_string_utils.find
local gsub = m_string_utils.gsub
local match = m_string_utils.match
local gmatch = m_string_utils.gmatch
local gsplit = mw.text.gsplit
local lower = m_string_utils.lower
local upper = m_string_utils.upper

local initialConv = {
	["b"] = "p", ["d"] = "t", ["g"] = "k",
	["p"] = "pʰ", ["t"] = "tʰ", ["k"] = "kʰ",
	["z"] = "t͡s", ["j"] = "t͡ɕ",
	["c"] = "t͡sʰ", ["q"] = "t͡ɕʰ",
	["m"] = "m", ["n"] = "n", ["ny"] = "nʲ", ["ng"] = "ŋ",
	["f"] = "f", ["s"] = "s", ["x"] = "ɕ", ["h"] = "x",
	["w"] = "v", ["r"] = "z",
	[""] = "",
}

-- note that 'ir' is for internal use by the code and not used in actual sichuanese pinyin
local finalConv = {
	["ir"] = "z̩", ["er"] = "ɚ",

	["a"] = "a", ["o"] = "o", ["e"] = "ɛ",
	["ai"] = "ai", ["ei"] = "ei", ["ao"] = "au", ["ou"] = "əu",
	["an"] = "an", ["en"] = "ən", ["ang"] = "aŋ", ["ong"] = "oŋ",

	["i"] = "i", ["ia"] = "ia", ["ie"] = "iɛ",
	["iai"] = "iɛi", ["iao"] = "iau", ["iu"] = "iəu",
	["ian"] = "iɛn", ["in"] = "in", ["iang"] = "iaŋ",

	["u"] = "u", ["ua"] = "ua", ["ue"] = "uɛ",
	["uai"] = "uai", ["ui"] = "uei",
	["uan"] = "uan", ["un"] = "uən", ["uang"] = "uaŋ",

	["ü"] = "y", ["üo"] = "yo", ["üe"] = "ye",
	["üan"] = "yan", ["ün"] = "yn", ["iong"] = "yoŋ",
}

local toneConv = {
	["1"] = "⁵⁵", ["2"] = "²¹", ["3"] = "⁵³", ["4"] = "²¹³", ["-"] = "⁻",
}

local initialConv_swz = {
	["j"] = "g", ["q"] = "k", ["n"] = "l", ["ny"] = "n", ["h"] = "x", ["w"] = "", ["r"] = "rh",
}
local finalConv_swz = {
	["ir"] = "", ["er"] = "r", ["ong"] = "ung", ["uang"] = "uong", ["ü"] = "y", ["üo"] = "iuo", ["üe"] = "ye", ["üan"] = "uan", ["ün"] = "un", ["iong"] = "yng",
}

local function fix(initial, final)
	-- ju /tɕy/
	if find(initial, '^[yjqx]$') and find(final, '^u') then
		final = gsub(final, '^u', 'ü')
	end
	
	if initial == 'y' then
		initial = ''
		if final == 'ou' then
			final = 'iu'
		elseif not find(final, '^[iü]') then -- yin /in/, yuan /yan/, ya /ia/
			final = 'i' .. final
		end
	end
	
	-- wei /uei/ (/-uei/ is usually spelled <-ui> but /uei/ is not <wui>)
	-- wu /vu/
	-- wai /uai/
	-- wen /uən/
	if initial == 'w' then
		initial = (final == 'u') and 'w' or ''
		if final == 'ei' then
			final = 'ui'
		elseif final == 'en' then
			final = 'un'
		elseif final ~= 'u' then
			final = 'u' .. final
		end
	end

	-- distinguish the two 'i's
	if find(initial, '^[zcsr]$') and final == 'i' then
		final = 'ir'
	end

	return initial, final
end

local function warn(initial, final, tone)
	if initial == "" and find(final, "^[iu]") then
		error("Syllables in Sichuanese Pinyin do not begin with i-/u-. Add y-/w-.")
	end
	
	if not initialConv[initial] and initial ~= "y" then
		error("Invalid initial: " .. initial)
	end

	if not finalConv[final] and final ~= "uo" then
		error("Invalid final: " .. final)
	end

	if tone == "5" then
		error("Chengdu does not have the fifth tone anymore. Use 2.")
	end
end

function export.convert(text, scheme)
	if type(text) == "table" then
		text, scheme = text.args[1], text.args[2]
	end

	local result = {}
	for word in gsplit(text, '/') do
		local converted = {}

		local extra2 = match(word, '^[^A-Za-zü]*')
		for syllable in gmatch(word, '[A-Za-zü]+[%d%-]+[^A-Za-zü]*') do
			local initial, final, erhua, tone, extra = match(syllable, '^([BDGPTKZJCQMNFSXHVRWYbdgptkzjcqmnfsxhvrwy]?[yg]?)([AEOaiueoüng]+)(r?)([%d%-]+)([^A-Za-zü]*)$')
			local caps = false

			if find(initial .. final, '[A-Z]') then
				caps = true
				initial, final = lower(initial), lower(final)
			end

			warn(initial, final, tone)

			initial, final = fix(initial, final)
			if final == 'e' and erhua == 'r' then
				final, erhua = 'er', ''
			end

			if scheme == 'IPA' then
				initial = initialConv[initial]
				final = finalConv[final]
				tone = gsub(tone, '.', function(char) return toneConv[char] end)

				if erhua == 'r' then
					if find(final, '^y') then -- 撮口呼
						final = 'yɚ'
					elseif find(final, '^i') then -- 齊齒呼
						final = 'iɚ'
					elseif find(final, '^u') then -- 合口呼
						final = 'uɚ'
					elseif (final == 'o' or final == 'oŋ') and find(initial, '^[pmfv]') then
						final = 'ɚ'
					elseif final == 'o' or final == 'oŋ' then
						final = 'uɚ'
					else -- 開口呼
						final = 'ɚ'
					end
				end

				syllable = initial .. final .. tone

				table.insert(converted, syllable)
			elseif scheme == 'SWZ' then
				initial = initialConv_swz[initial] or initial
				final = finalConv_swz[final] or final

				tone = gsub(tone, '(%d)%-(%d)', '%2')

				-- XXX: what happens with erhua? (disabled output for now)
				-- cf the given example 貓(mer)
				if erhua == 'r' then return false end

				if tone == '3' and (final == 'a' or final == 'ai') then
					final = 'a' .. final
				end

				syllable = initial .. final

				if caps then syllable = gsub(syllable, '^.', upper) end

				table.insert(converted, '@' .. syllable .. extra)
			else
				error('Convert to what representation?')
			end
		end

		if scheme == 'IPA' then
			local text = '/' .. table.concat(converted, ' ') .. '/'
			table.insert(result, text)
		elseif scheme == 'SWZ' then
			local text = table.concat(converted, '')
			text = gsub(text, '([a-z])@(u)', '%1w')
			text = gsub(text, '([a-z])@(i)', '%1j')
			text = gsub(text, '([ng])@(y)', '%1j')
			text = gsub(text, '@un', 'wen')
			text = gsub(text, '@', '')
			table.insert(result, extra2 .. text)
		end
	end

	if scheme == 'IPA' then
		return table.concat(result, ', ')
	else
		return table.concat(result, ' / ')
	end
end

return export