Bước tới nội dung

Mô đun:kpv-IPA

Từ điển mở Wiktionary
local export = {}

local langcode = "kpv"

local vowel = "[аеёиіоӧуыэюя]"
local vowel_soft = "[еёиюя]"
local vowel_prej = "[еёюя]"
local vowel_accent = "́"

local vowel_phonemic = {
	["а"] = "a", ["е"] = "e", ["ё"] = "o", ["и"] = "i",
	["і"] = "i", ["о"] = "o", ["ӧ"] = "ɘ", ["у"] = "u",
	["ы"] = "ɯ", ["э"] = "e", ["ю"] = "u", ["я"] = "a"
}

local vowel_phonetic = {
	["а"] = "ä", ["ё"] = "o̞", ["о"] = "o̞", ["ы"] = "ɯ̈", ["я"] = "ä"
}

local consonant = "[бвгджзйклмнпрстфхцчшщ]"

local consonant_voiced = "[бгджз]"
local consonant_unvoiced = "[птцч]"

local consonant_devoice = {
	["б"] = "п",
	["д"] = "т",
}

-- normal, soft
local consonant_phonemic = {
	["б"] =  { "b",	"b" },
	["в"] =  { "ʋ",	"ʋ" },
	["г"] =  { "ɡ",	"ɡ" },
	["д"] =  { "d",	"ɟ" },
	["дз"] = { "d͡ʑ", 	"d͡ʑ" },
	["дж"] = { "d͡ʒ",	"d͡ʒ" },
	["ж"] =  { "ʒ",	"ʒ" },
	["з"] =  { "z",	"ʑ" },
	["й"] =  { "j",	"j" },
	["к"] =  { "k",	"k" },
	["л"] =  { "l",	"ʎ" },
	["м"] =  { "m",	"m" },
	["н"] =  { "n",	"ɲ" },
	["п"] =  { "p",	"p" },
	["р"] =  { "r",	"r" },
	["с"] =  { "s",	"ɕ" },
	["т"] =  { "t",	"c" },
	["тш"] = { "t͡ʃ",	"t͡ʃ" },
	["ф"] =  { "f",	"f" },
	["х"] =  { "x",	"x" },
	["ц"] =  { "t͡s",	"t͡s" },
	["ч"] =  { "t͡ɕ",	"t͡ɕ" },
	["ш"] =  { "ʃ",	"ʃ" },
	["щ"] =  { "ʃː",	"ʃː" },
}

-- normal, soft, voiced, voiced+soft
local consonant_phonetic = {
	["дж"] = { "d͡ʒ̺",	"d͡ʒ̺",	"d͡ʒ̺",	"d͡ʒ̺" },
	["ж"] =  { "ʒ̺",	"ʒ̺",	"ʒ̺",	"ʒ̺" },
	["к"] =  { "k",	"k",	"ɡ",	"ɡ" },
	["л"] =  { "ɫ",	"ʎ",	"ɫ",	"ʎ" },
	["п"] =  { "p",	"p",	"b",	"b" },
	["с"] =  { "s",	"ɕ",	"z",	"ʑ" },
	["т"] =  { "t",	"c",	"d",	"ɟ" },
	["тш"] = { "t͡ʃ̺",	"t͡ʃ̺",	"d͡ʒ̺",	"d͡ʒ̺" },
	["ц"] =  { "t͡s",	"t͡s",	"d͡z",	"d͡z" },
	["ч"] =  { "t͡ɕ",	"t͡ɕ",	"d͡ʑ",	"d͡ʑ" },
	["ш"] =  { "ʃ̺",	"ʃ̺",	"ʃ̺",	"ʃ̺" },
	["щ"] =  { "ʃ̺ː",	"ʃ̺ː",	"ʃ̺ː",	"ʃ̺ː" },
}

local vowl = "[aäeiɨouɯyɘ]"
local vowld = "[̞̈]"
local cons = "[bcdfgjklmnprstvxzɕɟɲʃʎʑʒ]"
local consd = "[̺]"
local affricate1 = "[td]"
local affricate2b = "[sɕʃzʑʒ]"
local affricate2 = "͡" .. affricate2b
local postalveolar = {
	["sʃ"] = "ʃ̺",
	["zʃ"] = "ʒ̺",
	["sʒ"] = "ʒ̺",
	["zʒ"] = "ʒ̺",
}

local function convert_vowel(c, is_phonetic)
	if is_phonetic and vowel_phonetic[c] then
		return vowel_phonetic[c]
	end
	return vowel_phonemic[c]
end

local function convert_consonant(c, is_phonetic, is_soft, is_voiced)
	local num = is_soft and 2 or 1
	if is_phonetic and consonant_phonetic[c] then
		if is_voiced then
			num = num + 2
		end
		return consonant_phonetic[c][num]
	end
	return consonant_phonemic[c][num]
end

local digraphs = {
	["д"] = "зж",
	["т"] = "ш"
}

local function split_phonemes(s)
	local phonemes = { }
	local index = 1
	local len = mw.ustring.len(s)
	local stress_at = 1
	if mw.ustring.match(s, "^-") then
		table.insert(phonemes, { nil, "-" })
		stress_at = nil
		index = 2
	end
	while index <= len do
		if mw.ustring.match(s, "^" .. vowel .. vowel_accent .. "?", index) then
			local v = mw.ustring.match(s, "^(" .. vowel .. vowel_accent .. "?)", index)
			table.insert(phonemes, { "v", v })
			if mw.ustring.find(v, vowel_accent) then
				stress_at = nil
			end
			index = index + mw.ustring.len(v)
		elseif mw.ustring.match(s, "^" .. consonant, index) then
			local c = mw.ustring.match(s, "^" .. consonant, index)
			
			-- digraphs
			if digraphs[c] then
				local cc = mw.ustring.match(s, "^" .. c .. "[" .. digraphs[c] .. "]", index)
				if cc then c = cc end
			end

			table.insert(phonemes, { "c", c })
			index = index + mw.ustring.len(c)
		elseif mw.ustring.match(s, "^ь", index) then
			table.insert(phonemes, { "s", nil })
			index = index + 1
		elseif mw.ustring.match(s, "^ъ", index) then
			table.insert(phonemes, { "h", nil})
			index = index + 1
		elseif mw.ustring.match(s, "^-", index) then
			if index < len then
				table.insert(phonemes, { nil, "." })
			else
				table.insert(phonemes, { nil, "-" })
			end
			index = index + 1
		elseif mw.ustring.match(s, "^%s", index) then
			if stress_at then
				table.insert(phonemes, stress_at, { nil, "ˈ" })
			end
			table.insert(phonemes, { "w", mw.ustring.sub(s, index, index) })
			stress_at = #phonemes + 1
			index = index + 1
		else -- something else...
			table.insert(phonemes, { nil, mw.ustring.sub(s, index, index) })
			index = index + 1
		end
	end
	if stress_at then
		table.insert(phonemes, stress_at, { nil, "ˈ" })
	end
	return phonemes
end

local function is_next_consonant(phonemes, i, consonant)
	i = i + 1
	while i <= #phonemes do
		local type, x = phonemes[i][1], phonemes[i][2]
		if type == "v" then return false end
		if type == "c" then return mw.ustring.match(x, consonant) end
		i = i + 1
	end
	return false
end

local function has_next_consonant(phonemes, i, consonant)
	i = i + 1
	while i <= #phonemes do
		local type, x = phonemes[i][1], phonemes[i][2]
		if type == "v" then return false end
		if type == "c" then
			if mw.ustring.match(x, consonant) then
				return true
			end
		end
		i = i + 1
	end
	return false
end

local function handle_long_digraphs(s)
	s = mw.ustring.gsub(s, "([дзлнст])%1([ьяеиёю])", "%1ь%1%2")
	for k, v in pairs(digraphs) do
		s = mw.ustring.gsub(s, k .. k .. "([" .. v .. "])", k .. "%1" .. k .. "%1")
	end
	return s
end

function export.convert(s, is_phonetic)
	local phonemes = split_phonemes(handle_long_digraphs(s))
	local result = ""
	local stress_preinserted = false
	for i, phoneme in ipairs(phonemes) do
		local type, x = phoneme[1], phoneme[2]
		if type == "v" then
			-- vowel
			local v, s = mw.ustring.match(x, "(" .. vowel .. ")(" .. vowel_accent .. "?)", index)
			if #s > 0 and not stress_preinserted then
				result = result .. "ˈ"
			end
			stress_preinserted = false
			if (i < 2 or phonemes[i - 1][1] ~= "c") and mw.ustring.match(v, vowel_prej) then
				result = result .. "j"
			end
			result = result .. convert_vowel(v, is_phonetic)
		elseif type == "c" then
			-- consonant
			local c = x
			local soft = false
			local voiced = false
			if i < #phonemes then
				soft = (phonemes[i + 1][1] == "v" and mw.ustring.match(phonemes[i + 1][2], "^" .. vowel_soft)) or phonemes[i + 1][1] == "s"
				if is_phonetic then
					if has_next_consonant(phonemes, i, "^" .. consonant_voiced) then
						voiced = true
					elseif consonant_devoice[c] and is_next_consonant(phonemes, i, "^" .. consonant_unvoiced) then
						c = consonant_devoice[c]
					end
				end
				if phonemes[i + 1][1] == "v" and mw.ustring.find(phonemes[i + 1][2], vowel_accent) then
					result = result .. "ˈ"
					stress_preinserted = true
				end
			end
			result = result .. convert_consonant(c, is_phonetic, soft, voiced)
		elseif type == "s" then
			-- soft sign, ignore here
		elseif type == "h" then
			-- hard sign, ignore here
		elseif type == "w" then
			-- whitespace
			result = result .. x
		elseif not type then
			-- anything else
			result = result .. x
		end
	end

	if is_phonetic then
		-- assimilation of /s/ and /z/ to following /ʃ/, /ʒ/ or affricate with either
		result = mw.ustring.gsub(result, "([sz]ʲ?)([.ˈ%s]*)([ʃʒ]" .. consd .. "*ʲ?)",
			function (orig, space, palatal)
				return postalveolar[mw.ustring.sub(orig, 1, 1) .. mw.ustring.sub(palatal, 1, 1)] .. space .. palatal
			end)
		result = mw.ustring.gsub(result, "([sz]ʲ?)([.ˈ%s]*)(" .. affricate1 .. "͡)(" .. affricate2b .. consd .. "*ʲ?)",
			function (orig, space, plosive, palatal)
				return postalveolar[mw.ustring.sub(orig, 1, 1) .. mw.ustring.sub(palatal, 1, 1)] .. space .. plosive .. palatal
			end)
	end

	-- long consonants
	result = mw.ustring.gsub(result, "(" .. affricate1 .. affricate2 .. consd .. "*ʲ?)%1", "%1ː")
	if is_phonetic then
		result = mw.ustring.gsub(result, "(" .. affricate1 .. ")%1(" .. affricate2 .. consd .. "*ʲ?)", "%1%2ː")
	end
	result = mw.ustring.gsub(result, "(" .. cons .. ")%1([^͡ʲ])", "%1ː%2")
	result = mw.ustring.gsub(result, "(" .. cons .. ")%1$", "%1ː")
	result = mw.ustring.gsub(result, "(" .. vowl .. vowld .. "*)%1", "%1ː")

	return result
end

function export.main(frame)
	local title = mw.title.getCurrentTitle().text
	if type(frame) == "table" then
		title = frame:getParent().args[1] or title
	end
	
	local phonemic = export.convert(mw.ustring.lower(title), false)
	local phonetic = export.convert(mw.ustring.lower(title), true)
	
	return require("Module:IPA").format_IPA_full {
		lang = require("Module:languages").getByCode(langcode),
		items = {
			{pron = "/" .. phonemic .. "/"},
			{pron = "[" .. phonetic .. "]"}
		},
	}
end

return export