Mô đun:is-pronunciation
Giao diện
local export = {}
local lang = require("Module:languages").getByCode("is")
local sc = require("Module:scripts").getByCode("Latn")
local m_ipa = require("Module:IPA")
function export.tag_text(text, face)
return require("Module:script utilities").tag_text(text, lang, sc, face)
end
function export.link(term, face)
return require("Module:links").full_link( { term = term, lang = lang, sc = sc }, face )
end
local sub = mw.ustring.sub
local find = mw.ustring.find
local format = mw.ustring.format
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local len = mw.ustring.len
local lower = mw.ustring.lower
local split = mw.text.split
local U = require("Module:string/char")
local nonsyllabic = U(0x32F) -- inverted breve below
local voiceless = U(0x325) -- combining ring below
local long = U(0x2D0) -- triangular colon
local primary_stress = "ˈ"
local secondary_stress = "ˌ"
local consonants = "bdðfghjklmnprstvxþ"
local consonant = "[" .. consonants .. "]"
local vowels = "aɛɪiʏyœɔou"
local vowel = "[" .. vowels .. "]+" .. nonsyllabic .. "?" .. long .. "?"
local stress = "[" .. primary_stress .. secondary_stress .. "]"
-- pronunciation data
local data = {
-- consonants: initial, internal/word-final in arrays
-- trigraphs
["trigraphs"] = {
["fnd"] = "mt",
["fnt"] = "m" .. voiceless .. "t",
["mbd"] = "mt",
["mbg"] = "mk",
["mbs"] = "ms",
["mbt"] = "m" .. voiceless .. "t"
},
-- digraphs
["digraphs"] = {
["ll"] = "tl" .. voiceless,
["ff"] = "ff",
["gj"] = { "c", "gj" },
["kj"] = { "cʰ", "c" },
["rl"] = "rtl" .. voiceless,
["rn"] = "rtn" .. voiceless,
["sl"] = "stl" .. voiceless,
["sn"] = "stn" .. voiceless,
["qu"] = "kʰv",
["hv"] = "kʰv",
["hl"] = "l" .. voiceless,
["hn"] = "n" .. voiceless,
["hr"] = "r" .. voiceless,
["hj"] = "ç"
},
-- single chars
["single"] = {
["b"] = "p",
["d"] = "t",
["g"] = { "k", "g" },
["p"] = { "pʰ", "p" },
["t"] = { "tʰ", "t" },
["k"] = { "kʰ", "k" },
["q"] = { "kʰ", "k" },
["x"] = { "s", "xs"},
["f"] = { "f", "v" },
["þ"] = "θ"
},
-- vowels: regular, before gi, before ng/nk
["vowels"] = {
["au"] = {
"œy" .. nonsyllabic,
"œy" .. nonsyllabic,
"œy" .. nonsyllabic
},
["ei"] = {
"ɛi" .. nonsyllabic,
"ɛi" .. nonsyllabic,
"ɛi" .. nonsyllabic
},
["a"] = {
"a",
"ai" .. nonsyllabic,
"au" .. nonsyllabic
},
["á"] = {
"au" .. nonsyllabic,
"au" .. nonsyllabic,
"au" .. nonsyllabic
},
["e"] = {
"ɛ",
"ei" .. nonsyllabic,
"ɛi" .. nonsyllabic
},
["é"] = {
"jɛ",
"jɛ",
"jɛ"
},
["i"] = {
"ɪ",
"i",
"i"
},
["í"] = {
"i",
"i",
"i"
},
["o"] = {
"ɔ",
"ɔi" .. nonsyllabic,
"ɔi" .. nonsyllabic
},
["ó"] = {
"ou" .. nonsyllabic,
"ou" .. nonsyllabic,
"ou" .. nonsyllabic
},
["u"] = {
"ʏ",
"ʏi" .. nonsyllabic,
"u"
},
["ú"] = {
"u",
"u",
"u"
},
["æ"] = {
"ai" .. nonsyllabic,
"ai" .. nonsyllabic,
"ai" .. nonsyllabic
},
["ö"] = {
"œ",
"œy" .. nonsyllabic,
"œy" .. nonsyllabic
}
}
}
-- add data for preaspirated stop clusters
for letter_a in gmatch("ptk", ".") do
data.digraphs[letter_a .. letter_a] = "h" .. letter_a
for letter_b in gmatch("lmn", ".") do
data.digraphs[letter_a .. letter_b] = "h" .. letter_a .. letter_b .. voiceless
end
end
-- list pronunciation substitutions
local rules = {
[1] = {
{ "(" .. stress .. consonant .. "*" .. vowel .. ")nn", "%1tn" .. voiceless },
{ "(" .. vowel .. ")" .. "g" .. "([aʏðlr])", "%1ɣ%2" },
{ "(" .. vowel .. ")" .. "g" .. "([ji])", "%1j%2" },
{ "(" .. vowel .. ")" .. "[kg]" .. "([ts])", "%1x%2" },
{ "(" .. vowel .. ")" .. "p" .. "([tsk])", "%1f%2" },
{ "v" .. "([tsk])", "f%1" }
},
[2] = { -- set 2 only applies when special=false
{ "(u" .. nonsyllabic .. "?" .. long .. "?)[vɣ]", "%1" },
{ "kʏ(" .. long .. "?)ð", "kvʏ%1ð" }
},
[3] = {
{ "ng([ls])", "ŋ%1" },
{ "g", "k" },
{ "k(ʰ?[ɛiɪ])", "c%1" },
{ "k(ʰ?ai)", "c%1" },
{ "kj", "c" },
{ "(" .. long .. "?)jj", "i" .. nonsyllabic .. "%1j" },
{ "nk", "ŋk" },
{ "kc", "c" .. long },
{ "(.)%1", "%1" .. long }
}
}
-- function to track accents
function export.markAccent(term, string)
-- count number of compounds in term
local _, term_count = gsub(term, "[%- ]", "")
-- build default stress positions if no accent string provided
if not string then
local array = {}
for i = 1, term_count + 1 do
array[i] = "1"
end
return array
end
-- otherwise count number of commas in accent string
local _, string_count = gsub(string, ",", "")
-- ensure correct number of stress positions are present
if term_count ~= string_count then
error(format("Incorrect number of stress positions specified (%d). Specify %d stress positions.", string_count + 1, term_count + 1))
else
-- dash represents no stress in single compound words
if term_count == 0 then
string = gsub(string, "%-", "0")
-- otherwise dash represents default initial stress
else
string = gsub(string, "%-", "1")
end
-- return stressed positions as comma-separated array
return split(string, ",")
end
end
-- function to determine vowel length
local function determineLength(v, next_chars)
-- short if before x as it's treated like two consonants
if find(next_chars, "x") then
return v
-- long if word-final, preceding a single consonant followed by a vowel
-- or preceding the consonant clusters b/d/g/k/p/s/t + j/r/v
elseif len(next_chars) <= 1 or
find(next_chars, consonant .. "[^" .. consonants .. "%-]") or
find(next_chars, "[bdgkpst][jrv]") then
return v .. long
-- short otherwise
else
return v
end
end
-- function to determine vowel type
local function determineVowel(v, term, pos, is_stressed)
-- check next two chars
local next_chars = sub(term, pos + 1, pos + 2)
-- before ng/nk
if next_chars == "ng" or next_chars == "nk" then
return data.vowels[v][3]
-- before gi
elseif next_chars == "gi" then
return data.vowels[v][2]
-- determine vowel length if stressed
elseif is_stressed then
return determineLength(data.vowels[v][1], next_chars)
-- otherwise
else
return data.vowels[v][1]
end
end
-- function to count syllables
local function countSyllables(term)
local count = 0
local poss = {}
-- match positions of all vowels
for i in gmatch(term, vowel) do
count = count + 1
table.insert(poss, i)
end
-- return syllable count
return count, poss
end
-- function to generate rhyme
local function getRhyme(term)
local count, poss = countSyllables(term)
local start = 0
-- mark start of rhyme
if count == 1 then
-- start at last syllable
start = "-" .. term[poss[1]]
else
-- start at second-last syllable
start = "-" .. term[poss[count - 1]]
end
-- return rhymes
return sub(term, start)
end
-- function to generate transcription
function export.toIPA(term, accent, special)
if type(term) ~= "string" then
error('The function "toIPA" requires a string argument.')
end
-- initialise pronunciation
term = lower(term)
local IPA = {}
local pos = 1
local is_initial = true
local compound_index = 1
-- respell some letters that share pronunciations with other letters
term = gsub(term, "c([eéiíyö])", "s%1")
term = gsub(term, "[cwyýz]", { ["c"] = "k", ["w"] = "v", ["y"] = "i", ["ý"] = "í", ["z"] = "s" })
-- get current accent value from array
local current_accent = tonumber(accent[compound_index])
-- handle string
while pos <= len(term) do
-- mark stress when current accent is 1
if current_accent == 1 then
table.insert(IPA, compound_index == 1 and primary_stress or secondary_stress)
current_accent = current_accent - 1
end
-- handle consonant trigraphs
if data.trigraphs[sub(term, pos, pos + 2)] then
local trigraph = table.insert(IPA, data.trigraphs[sub(term, pos, pos + 2)])
table.insert(IPA, type(trigraph) == "table" and (is_initial and trigraph[1] or trigraph[2]) or trigraph)
pos = pos + 3
is_initial = false
-- handle consonant digraphs
elseif data.digraphs[sub(term, pos, pos + 1)] then
local digraph = data.digraphs[sub(term, pos, pos + 1)]
-- special case for ll
if sub(term, pos, pos + 1) == "ll" and special == true then
table.insert(IPA, "ll")
else
table.insert(IPA, type(digraph) == "table" and (is_initial and digraph[1] or digraph[2]) or digraph)
end
pos = pos + 2
is_initial = false
-- handle vowel digraphs (au, ei, ey)
elseif sub(term, pos, pos + 1) == "au" or sub(term, pos, pos + 1) == "ei" then
table.insert(IPA, determineVowel(sub(term, pos, pos + 1), term, pos + 1, current_accent == 0))
current_accent = current_accent - 1
pos = pos + 2
is_initial = false
-- handle single consonant letters
elseif data.single[sub(term, pos, pos)] then
local single = data.single[sub(term, pos, pos)]
table.insert(IPA, type(single) == "table" and (is_initial and single[1] or single[2]) or single)
pos = pos + 1
is_initial = false
-- handle single vowels
elseif data.vowels[sub(term, pos, pos)] then
table.insert(IPA, determineVowel(sub(term, pos, pos), term, pos, current_accent == 0))
current_accent = current_accent - 1
pos = pos + 1
is_initial = false
-- handle compound stress
elseif sub(term, pos, pos) == "-" then
-- check error for invalid stress position
if current_accent > 0 then
error(format("Invalid stress position %s in compound %d", accent[compound_index], compound_index))
end
-- increment compound index
compound_index = compound_index + 1
current_accent = tonumber(accent[compound_index])
pos = pos + 1
is_initial = true
-- otherwise
else
table.insert(IPA, sub(term, pos, pos))
pos = pos + 1
is_initial = false
end
end
-- check error for invalid stress position
if current_accent > 0 then
error(format("Invalid stress position %s in compound %d", accent[compound_index], compound_index))
end
-- combine ipa symbols into single string
local pron = table.concat(IPA)
-- apply phonemic rules
for i, set_of_rules in ipairs(rules) do
-- only use set 2 if special=false
if not (special and i == 2) then
for _, rule in ipairs(set_of_rules) do
local regex, replacement = rule[1], rule[2]
pron = gsub(pron, regex, replacement)
end
end
end
-- remove secondary stress if primary and secondary stress are both one syllable only
pron = gsub(pron, "([^" .. secondary_stress .. "]+)(" .. secondary_stress .. "[^" .. secondary_stress .. "]+)", function(a, b)
local count_a, _ = countSyllables(a)
local count_b, _ = countSyllables(b)
return a .. (count_a == 1 and count_b == 1 and gsub(b, secondary_stress, "") or b)
end)
-- remove any unwanted characters (e.g. full stops and commas)
pron = gsub(pron, "[%.,]", "")
return pron
end
-- main export function
function export.show(frame)
local p, results = {}, {}
local args = frame:getParent().args
if args[1] then
for _, v in ipairs(args) do
table.insert(p, (v ~= "") and v or nil)
end
else
p = { mw.title.getCurrentTitle().text }
end
for i, word in ipairs(p) do
local accent_param = args["accent" .. i] or (i == 1 and args.accent)
local special_param = args["special" .. i] or (i == 1 and args.special)
local accent = export.markAccent(word, accent_param)
local special = require("Module:yesno")(special_param)
local ipa = export.toIPA(word, accent, special)
table.insert(results, { pron = "/" .. ipa .. "/" })
end
return m_ipa.format_IPA_full { lang = lang, items = results }
end
return export