Bước tới nội dung

Mô đun:pl-pronunciation

Từ điển mở Wiktionary
local export = {}

local langcode = "pl"
local lang = require("Module:languages").getByCode(langcode)

local m_IPA = require("Module:IPA")
local m_pl_IPA = require("Module:pl-IPA")

local vowels = "aeiouyąęó"
local vowel = "[" .. vowels .. "]"
local consonants = "bcćdfghjklłmnńpqrsśtuvwxyzźż"
local consonant = "[" .. consonants .. "]"
-- vowel digraphs, not necessarily actual phonetic diphthongs
local diphthong_i_v2 = "[aąoeęuói]"
local diphthongs = {
	["a"] = "u",
	["e"] = "u",
	["i"] = diphthong_i_v2
}
-- consonant digraphs (key = first letter, value = possible second letters)
local digraphs = {
	["c"] = "[hz]",
	["d"] = "[zźż]",
	["q"] = "u",
	["r"] = "z",
	["s"] = "z",
}

local past_tense_suffixes = {
	"liśmy", "liście", "łyśmy", "łyście",
}

local latin_borrowing_suffixes = {
	"ika", "yka",
	"iki", "yki",
	"ika", "yka",
	"ice", "yce",
	"ikom", "ykom",
	"ikę", "ykę",
	"iką", "yką",
	"ice", "yce",
	"ikach", "ykach",
	"iko", "yko",
}

-- if this is changed, the next two functions also need to be
local function is_respelling_close_enough(respelling, word)
	word = mw.ustring.gsub(word, "j(" .. diphthong_i_v2 .. ")", "i%1")
	respelling = mw.ustring.gsub(respelling, "['.]", "")
	respelling = mw.ustring.gsub(respelling, "j(" .. diphthong_i_v2 .. ")", "i%1")
	return word == respelling
end

local function partition(word, oword)
	local parts = {}
	local lenword = mw.ustring.len(word)
	local pos = 1
	local offset = 0
	word = mw.ustring.gsub(word, "['-]", ".")
	while pos <= lenword do
		if mw.ustring.find(mw.ustring.lower(word), "^" .. vowel, pos) then
			local initial = mw.ustring.sub(mw.ustring.lower(word), pos, pos)
			local seq = 1
			if diphthongs[initial] and mw.ustring.find(mw.ustring.lower(word), "^" .. initial .. diphthongs[initial], pos) then
				seq = 2
			end
			table.insert(parts, { "v", mw.ustring.sub(oword, pos - offset, pos - offset + seq - 1) })
			pos = pos + seq
		elseif mw.ustring.find(mw.ustring.lower(word), "^" .. consonant, pos) then
			local initial = mw.ustring.sub(mw.ustring.lower(word), pos, pos)
			local seq = 1
			if digraphs[initial] and mw.ustring.find(mw.ustring.lower(word), "^" .. initial .. digraphs[initial], pos) then
				seq = 2
			end
			table.insert(parts, { "c", mw.ustring.sub(oword, pos - offset, pos - offset + seq - 1) })
			pos = pos + seq
		elseif mw.ustring.find(word, "^% ", pos) then
			-- multiword, do not hyphenate
			return nil
		elseif mw.ustring.find(word, "^%.", pos) then
			-- syllable break
			if not mw.ustring.find(oword, "^['-]", pos - offset) then
				offset = offset + 1
			end
			table.insert(parts, { "b", nil })
			pos = pos + 1
		else
			-- unrecognized symbol
			return nil
		end
	end
	return parts
end

local function get_word_suffix(word)
	word = word:gsub("([ˈ'.,ˌ])", "")
	local word_suffix = 0
	for i,v in ipairs(past_tense_suffixes) do
		if word:sub(-string.len(v)) == v 
		then
			word_suffix = 1
		end
	end
	for i,v in ipairs(latin_borrowing_suffixes) do
		if word:sub(-string.len(v)) == v 
		then
			word_suffix = 2
		end
	end
	
	return word_suffix
end

function export.generate_hyphenation(word, otitle)
	local syllables = {}
	local cursyl = ""
	local nucleus = false
	local coda = nil
	local pos = 1
	local parts = partition(word, otitle)
	if not parts then return nil end
	for pos, p in ipairs(parts) do
		local kind, part = unpack(p)
		if kind == "v" then
			if coda then
				cursyl = cursyl .. mw.ustring.sub(syllables[#syllables], -coda)
				syllables[#syllables] = mw.ustring.sub(syllables[#syllables], 1, -coda - 1)
				coda = nil
			end
			if nucleus then
				table.insert(syllables, cursyl)
				cursyl = ""
			end
			nucleus = true
			coda = nil
			cursyl = cursyl .. part
		elseif kind == "c" then
			cursyl = cursyl .. part
			if nucleus then
				table.insert(syllables, cursyl)
				cursyl = ""
				nucleus = false
				coda = mw.ustring.len(part)
			else
				coda = nil
			end
		elseif kind == "b" then
			-- implicit syllable break
			if #cursyl > 0 then
				if nucleus or #syllables < 1 then
					table.insert(syllables, cursyl)
				else
					syllables[#syllables] = syllables[#syllables] .. cursyl
				end
			end
			cursyl = ""
			nucleus = false
			coda = nil
		else
			-- unrecognized kind
			return nil
		end
	end
	if #cursyl > 0 then
		if nucleus or #syllables < 1 then
			table.insert(syllables, cursyl)
		else
			syllables[#syllables] = syllables[#syllables] .. cursyl
		end
	end
	return syllables
end

local ipavowel = "[aɛiɨɔu]"
function export.generate_rhyme(ipa)
	local vowels_at = { }
	local pos = 1
	while true do
		local posnext = mw.ustring.find(ipa, ipavowel, pos)
		if not posnext then break end
		table.insert(vowels_at, posnext)
		pos = posnext + 1
	end
	local vend
	if #vowels_at < 1 then return nil end
	if #vowels_at > 1 then
		vend = vowels_at[#vowels_at - 1]
	else
		vend = vowels_at[#vowels_at]
	end
	local snippet = mw.ustring.sub(ipa, vend)
	snippet = mw.ustring.gsub(snippet, "[ˈˌ.]", "")
	if mw.ustring.find(snippet, " ") then
		return nil -- copout, something must be wrong
	end
	return snippet
end

function export.show(frame)
	local args = require("Module:parameters").process(frame:getParent().args, {
		[1] = { list = true },
		
		["ipa"] = { list = true, default = nil, allow_holes = true },
		["qual"] = { list = true, allow_holes = true },
		["n"] = { list = true, allow_holes = true },
		["h"] = { list = true, allow_holes = true }, ["hyphen"] = {},
		["r"] = { list = true, allow_holes = true }, ["rhymes"] = {},
		["a"] = { list = true, default = nil }, ["audio"] = {},
		["ac"] = { list = true, default = nil }, ["caption"] = {},
		["hh"] = { default = "" }, ["homophones"] = {},
		["mp"] = { list = true, allow_holes = true },
		
		["q"] = { list = true, default = nil, allow_holes = true },
		["hp"] = { list = true, default = nil, allow_holes = true },
		["rp"] = { list = true, default = nil, allow_holes = true },
		["hhp"] = { list = true, default = nil, allow_holes = true },
			
		["nohyphen"] = { type = "boolean", default = false },
		["norhymes"] = { type = "boolean", default = false },
		["fs"] = { type = "boolean" }, ["fixstress"] = {},
		
		["title"] = { default = nil }, -- for debugging or demonstration only
	})

	local words, transcriptions, transcriptions_raw
	local lines = {}
	local categories = {}
	local actual = args["title"] or mw.title.getCurrentTitle().text
	if next(args[1]) ~= nil then
		words = args[1]
	else
		words = { actual }
	end
	local multiword = mw.ustring.find(words[1], " ")
	local hyphenations = args["h"]
	local rhymes = args["r"]
	
	local ipa = args["ipa"]
	if #ipa < 1 then ipa = nil end
	
	local qualifiers = args["q"]
	if not qualifiers or qualifiers.maxindex < 1 then qualifiers = args["qual"] end
	
	local mp = args["mp"]
	
	local hyphlabels = args["hp"]
	local rhymlabels = args["rp"]
	local nohyphen = args["nohyphen"]
	local norhymes = args["norhymes"]
	local fixstress = args["fs"]
	if args["fixstress"] then fixstress = args["fixstress"] end
	local homophones = mw.text.split(args["hh"], ",")
	local homophonelabels = args["hhp"]
	if #homophones == 1 and homophones[1] == "" then homophones = {} end
	local audio = {}
	local audios = args["a"]
	local captions = args["ac"]
	
	local word_suffix = 0
	if not ipa and #words == 1 then
		-- 0 - normal word
		-- 1 - past tense verb stressed antepenultimately
		-- 2 - Latin borrowing stressed antepenultimately
		word_suffix = get_word_suffix(words[1])
	end
	if not (fixstress or (fixstress == nil and word_suffix == 1)) then
		word_suffix = 0
	end
		
	if args["hyphen"] then hyphenation[1] = args["hyphen"] end
	if args["rhymes"] then rhymes[1] = args["rhymes"] end
	if args["homophones"] then homophones = mw.text.split(args["homophones"], ",") end
	if args["audio"] then audios[1] = args["audio"] end
	if args["captions"] then captions[1] = args["caption"] end
	local respelling_ok = true
	for i, w in ipairs(words) do
		if not is_respelling_close_enough(w, actual) then
			respelling_ok = false
			break
		end
	end

	for i, audiofile in ipairs(audios) do
		if audiofile then
			table.insert(audio, {file = audiofile, caption = captions[i]})
		end
	end
	if #hyphenations == 1 and hyphenations[1] == "-" then
		nohyphen = true
	end
	if #rhymes == 1 and rhymes[1] == "-" then
		norhymes = true
	end

	if word_suffix == 0 then
		if ipa then
			transcriptions = {}
			transcriptions_raw = {}
			for i = 1, #ipa do
				local qual = qualifiers[i]
				table.insert(transcriptions, {
					pron = ipa[i],
					qualifiers = qual and { qual } or nil,
					note = args.n[i]
				})
			end
		else
			transcriptions = {}
			transcriptions_raw = {}
			for i = 1, #words do
				local qual = qualifiers[i]
				local ipaconv = m_pl_IPA.convert_to_IPA(words[i])
				table.insert(transcriptions_raw, ipaconv)
				table.insert(transcriptions, {
					pron = "/" .. ipaconv .. "/",
					qualifiers = qual and { qual } or nil,
					note = args.n[i]
				})
			end
		end
		table.insert(lines, "* " .. m_IPA.format_IPA_full { lang = lang, items = transcriptions })
	else
		transcriptions = {}
		transcriptions2 = {}
		transcriptions_raw = {}
		if word_suffix == 1 then
			qualifier1 = {"prescriptive standard; rarely used"}
			qualifier2 = {"colloquial; overall more common"}
		elseif word_suffix == 2 then
			qualifier1 = {"standard"}
			qualifier2 = {"colloquial; common in casual speech"}
		end
		local ipaconv = m_pl_IPA.convert_to_IPA(words[1])
		table.insert(transcriptions_raw, ipaconv)
		ipaconv_syllables = mw.text.split(ipaconv, "([ˈ.])")
		for j, syl in ipairs(ipaconv_syllables) do
			if j == (#ipaconv_syllables - 2) then
				ipaconv_syllables[j] = "ˈ" .. syl
			elseif j ~= 1 then
				ipaconv_syllables[j] = "." .. syl
			end
		end
		ipacov_fixed_stress = table.concat(ipaconv_syllables)
		table.insert(transcriptions, {
			pron = "/" .. ipacov_fixed_stress .. "/",
			qualifiers = qualifier1,
		})
		table.insert(lines, "* " .. m_IPA.format_IPA_full { lang = lang, items = transcriptions })
		table.insert(transcriptions2, {
			pron = "/" .. ipaconv .. "/",
			qualifiers = qualifier2,
		})
		table.insert(lines, "* " .. m_IPA.format_IPA_full { lang = lang, items = transcriptions2 })
	end
	
	if #mp > 0 then
		transcriptions = {}
		for i = 1, #mp do
			if mp[i] == "+" then
				mp[i] = actual
			end
			local ipaconv = require("Module:zlw-mpl-IPA").convert_to_IPA_tables({mp[i]});
			for _, v  in ipairs(ipaconv) do
				table.insert(transcriptions, v);
			end
		end
		table.insert(lines, "* " .. require("Module:accent qualifier").format_qualifiers(lang, {"Middle Polish"}) .. " " .. m_IPA.format_IPA_full { lang = lang, items = transcriptions })
	end
	
	for i, a in ipairs(audio) do
		table.insert(lines, "* " .. require("Module:audio").format_audio { lang = lang, file = a["file"], caption = a["caption"] })
	end

	if not ipa and #hyphenations < 1 and respelling_ok and not multiword then
		local autohyph = export.generate_hyphenation(words[1], actual)
		if autohyph then
			table.insert(hyphenations, autohyph)
		end
	elseif #hyphenations >= 1 then
		local newhyphenations = {}
		for i, h in ipairs(hyphenations) do
			local t = {}
			for x in mw.text.gsplit(h, "[.]") do
				table.insert(t, x)	
			end
			newhyphenations[i] = t
		end
		hyphenations = newhyphenations
	end

	if not norhymes then
		if not ipa and #rhymes < 1 and #transcriptions_raw > 0 then
			local autorhyme = export.generate_rhyme(transcriptions_raw[1])
			if autorhyme then
				table.insert(rhymes, autorhyme)
			end
		end
	
		if #rhymes > 0 then
			-- merge rhymes if they have identical labels
			local last_label = false
			local new_rhymes = {}
			local new_labels = {}
			local current_list = {}
			
			for i, r in ipairs(rhymes) do
				local label = rhymlabels[i]
				if last_label == label then
					table.insert(current_list, r)
				else
					if #current_list > 0 then
						table.insert(new_rhymes, current_list)
					end
					if last_label ~= false then
						table.insert(new_labels, last_label)
					end
					current_list = { r }
					last_label = label
				end
			end
			
			table.insert(new_rhymes, current_list)
			table.insert(new_labels, last_label)
			rhymes = new_rhymes
			rhymlabels = new_labels
		end
		
		for i, r in ipairs(rhymes) do
			local label = ""
			if rhymlabels[i] then
				label = " " .. require("Module:qualifier").format_qualifier(rhymlabels[i])
			end
			if #r >= 1 then
				local sylkeys = {}
				local sylcounts = {}
				-- get all possible syllable counts from syllabifications
				for i, h in ipairs(hyphenations) do
					local hl = #h
					if hl > 0 and not sylkeys[hl] then
						table.insert(sylcounts, hl)
						sylkeys[hl] = true
					end
				end
				local rhymeobjs = {}
				for _, rhyme in ipairs(r) do
					table.insert(rhymeobjs, {rhyme = rhyme})
				end
				table.insert(lines, "* " .. require("Module:rhymes").format_rhymes(
					{ lang = lang, rhymes = rhymeobjs, num_syl = sylcounts }) .. label)
			end
		end
	end

	if not nohyphen then
		if #transcriptions > 0 and #hyphenations > 0 then
			local syl_IPA = require("Module:syllables").getVowels(transcriptions[1].pron, lang)
			local syl_hyph = #hyphenations[1]
			if syl_IPA and syl_IPA ~= syl_hyph then
				table.insert(categories, "pl-pronunciation syllable count mismatch")
			end
		end
		
		if not actual:find("[ %.]") and #hyphenations < 1 then
			table.insert(categories, "pl-pronunciation without hyphenation")
		end
		
		for i, h in ipairs(hyphenations) do
			local label = ""
			if hyphlabels[i] then
				label = " " .. require("Module:qualifier").format_qualifier(hyphlabels[i])
			end
			table.insert(lines, "* Tách âm tiết: " .. require("Module:links").full_link({lang = lang, alt = table.concat(h, "‧"), tr = "-"}) .. label)
		end
	end

	if #homophones > 0 then
		local homophone_objs = {}
		for i, h in ipairs(homophones) do
			table.insert(homophone_objs, {term = h, qq = homophonelabels[i] and {homophonelabels[i]} or nil})
		end
		table.insert(lines, "* " .. require("Module:homophones").format_homophones { lang = lang, homophones = homophone_objs })
	end
	
	return table.concat(lines, "\n") .. require("Module:utilities").format_categories(categories, lang)
end

return export