Bước tới nội dung

Mô đun:ne-IPA/sandbox

Từ điển mở Wiktionary
-- Transliteration for Nepali

local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match

local conv = {
    -- consonants
    ["क"] = "k",
    ["ख"] = "kʰ",
    ["ग"] = "ɡ",
    ["घ"] = "ɡʱ",
    ["ङ"] = "ŋ",
    ["च"] = "ʦ",
    ["छ"] = "ʦʰ",
    ["ज"] = "ʣ",
    ["झ"] = "ʣʱ",
    ["ञ"] = "n",
    ["ट"] = "ʈ",
    ["ठ"] = "ʈʰ",
    ["ड"] = "ɖ",
    ["ढ"] = "ɖʱ",
    ["ण"] = "ɳ",
    ["त"] = "t̪",
    ["थ"] = "t̪ʰ",
    ["द"] = "d̪",
    ["ध"] = "d̪ʱ",
    ["न"] = "n",
    ["प"] = "p",
    ["फ"] = "pʰ",
    ["ब"] = "b",
    ["भ"] = "bʱ",
    ["म"] = "m",
    ["य"] = "j",
    ["र"] = "r",
    ["ल"] = "l",
    ["व"] = "w",
    ["श"] = "s",
    ["ष"] = "ʂ",
    ["स"] = "s",
    ["ह"] = "ɦ",
    ["क़"] = "q",
    ["ख़"] = "x",
    ["ग़"] = "ɣ",
    ["ऴ"] = "ɭ",
    ["ळ"] = "ɭ",
    ["ज़"] = "z",
    ["श़"] = "ʒ",
    ["झ़"] = "ʒ",
    ["ड़"] = "ɽ",
    ["ढ़"] = "ɽʱ",
    ["फ़"] = "f",
    ["थ़"] = "θ",
    ["द़"] = "ð",
    ["ऩ"] = "n̪",
    ["ऱ"] = "ɹ",
    ["ॽ"] = "ʔ",
    ["व़"] = "v",
    ["ॹ"] = "ʒ",
    -- vowel diacritics
    ["ि"] = "i",
    ["ु"] = "u",
    ["े"] = "e",
    ["ो"] = "o",
    ["ा"] = "ä",
    ["ी"] = "i",
    ["ू"] = "u",
    ["ृ"] = "ri",
    ["ॄ"] = "ri",
    ["ॢ"] = "liɾi",
    ["ॣ"] = "liɾi",
    ["ै"] = "ʌi̯",
    ["ौ"] = "ʌu̯",
    ["ॉ"] = "ɔ",
    ["ॅ"] = "æ",
    -- vowel signs
    ["अ"] = "ʌ",
    ["इ"] = "i",
    ["उ"] = "u",
    ["ए"] = "e",
    ["ओ"] = "o",
    ["आ"] = "ä",
    ["ई"] = "i",
    ["ऊ"] = "u",
    ["ऋ"] = "ri",
    ["ॠ"] = "ri",
    ["ऌ"] = "liɾi",
    ["ॡ"] = "liɾi",
    ["ऐ"] = "ʌi̯",
    ["औ"] = "ʌu̯",
    ["ऑ"] = "ɔ",
    ["ॲ"] = "æ",
    ["ऍ"] = "æ",
    -- chandrabindu
    ["ँ"] = "̃",
    -- anusvara
    ["ं"] = "ṃ",
    -- visarga
    ["ः"] = "ː",
    -- virama
    ["्"] = "",
    -- om
    ["ॐ"] = "oːm",
    -- zero-width non joiner
    ["‌"] = " ͜ ",
    -- zero-width joiner
    ["‍"] = "ʌ",
    -- diphthong marker
    ["ॱ"] = "̯",
    -- numerals
    ["०"] = "0",
    ["१"] = "1",
    ["२"] = "2",
    ["३"] = "3",
    ["४"] = "4",
    ["५"] = "5",
    ["६"] = "6",
    ["७"] = "7",
    ["८"] = "8",
    ["९"] = "9",
    -- punctuation
    ["।"] = ".", -- danda
    ["॥"] = ".", -- double danda
    ["+"] = "", -- compound separator
    -- abbreviation sign
    ["॰"] = "."
}

local nasal_assim = {
    ["क"] = "ङ",
    ["ख"] = "ङ",
    ["ग"] = "ङ",
    ["घ"] = "ङ",
    ["च"] = "ञ",
    ["छ"] = "ञ",
    ["ज"] = "ञ",
    ["झ"] = "ञ",
    ["ट"] = "ण",
    ["ठ"] = "ण",
    ["ड"] = "ण",
    ["ढ"] = "ण",
    ["प"] = "म",
    ["फ"] = "म",
    ["ब"] = "म",
    ["भ"] = "म",
    ["म"] = "म",
    ["त"] = "न",
    ["थ"] = "न",
    ["द"] = "न",
    ["ध"] = "न",
    ["न"] = "न",
    ["ष"] = "न",
    ["श"] = "ङ",
    ["स"] = "न",
    ["य"] = "म",
    ["र"] = "म",
    ["ल"] = "ँ",
    ["व"] = "म",
    ["ह"] = "ङ"
}
local perm_cl = {
    ["ज्न"] = true,
    ["ज्ञ"] = true,
    ["ट्र"] = true,
    ["ड्र"] = true,
    ["ट्स"] = true,
    ["ड्स"] = true,
    ["स्ड"] = true
}

local all_cons, special_cons = "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह", "कखगघचछजझटठडढणतथदधनपफबभमयरलवशषसह"
local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊॄॢॣ", "अइउएओआईऊऋॠॡऌऐऔऑऍ"
local syncope_pattern = "(़?[" .. all_cons .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])(़?[" .. all_cons .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])(़?[" .. all_cons .. "])ʌ(़?[" .. all_cons .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])(़?[" .. all_cons .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])"

local nor_cons, sp_cons = "कखगघङचछजझञटठडढतथदधपफबभशषसयरलवणनमयरलवनम", "कलम"
local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊॄॢॣ", "अइउएओआईऊऋॠॡऌऐऔऑऍ"
local koka_sign = "ोीाैे"
local koka_pattern = "([" .. koka_sign .. "])(़?[" .. sp_cons .. "])ʌ(़?[" .. gsub(nor_cons, "य", "") .. "])([ंँ]?[" .. vowel .. vowel_sign .. "])"

local function rev_string(text)
    local result, length = {}, mw.ustring.len(text)
    for i = length, 1, -1 do
        table.insert(result, mw.ustring.sub(text, i, i))
    end
    return table.concat(result)
end
function export.tr(text, lang, sc)
    text =
        gsub(
        text,
        "([" .. all_cons .. "]़?)([" .. vowel .. "्]?)",
        function(c, d)
            return c .. (d == "" and "ʌ" or d)
        end
    )
    for word in mw.ustring.gmatch(text, "[ऀ-ॿʌ]+") do
        local orig_word = word
        word = rev_string(word)
        word =
            gsub(
            word,
            "^ʌ(़?)([" .. all_cons .. "])(.)(.?)",
            function(opt, first, second, third)
                return (((match(first, "[" .. special_cons .. "]") and match(second, "ं") or
                    match(first, "[" .. special_cons .. "]") and match(second, "्") and
                        not perm_cl[first .. second .. third]) or
                    match(first .. second, "य[aिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔʌ]") or
                    match(first .. second, "ह[अaिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔʌ]")) and
                    "ʌ" or
                    "") ..
                    opt .. first .. second .. third
            end
        )

        while match(word, syncope_pattern) do
            word = gsub(word, syncope_pattern, "%1%2%3%4%5%6%7%8%9")
        end
        while match(word, koka_pattern) do
            word = gsub(word, koka_pattern, "%1%2%3%4")
        end
        word =
            gsub(
            word,
            "(.?)ं(.)",
            function(succ, prev)
                return succ ..
                    (succ .. prev == "ʌ" and "्म" or
                        (succ == "" and match(prev, "[" .. vowel .. "]") and "̃" or nasal_assim[succ] or "̃")) ..
                        prev
            end
        )

        local escaped_orig_word = gsub(orig_word, "%+", "")
        text = gsub(text, orig_word, rev_string(word))
        text = gsub(text, "ईन$", "inʌ")
        text = gsub(text, "(...)ईन ", "%1inʌ ")
        text = gsub(text, "इन$", "inʌ")
        text = gsub(text, "(...)इन ", "%1inʌ ")
        text = gsub(text, "ैन$", "ʌi̯nʌ")
        text = gsub(text, "(...)ैैैैन ", "%1ʌi̯nʌ ")
        text = gsub(text, "उन$", "unʌ")
        text = gsub(text, "(...)उन ", "%1unʌ ")
        text = gsub(text, "ुन$", "unʌ")
        text = gsub(text, "(...)ुन ", "%1unʌ ")
        text = gsub(text, "िन$", "inʌ")
        text = gsub(text, "(...)िन ", "%1inʌ ")
        text = gsub(text, "िछ$", "içʰʌ")
        text = gsub(text, "(...)िछ ", "%1iʦʰʌ ")
        text = gsub(text, "उछ$", "uʦʰʌ")
        text = gsub(text, "(...)उछ ", "%1uʦʰʌ ")
        text = gsub(text, "इछ$", "iʦʰʌ")
        text = gsub(text, "(...)इछ ", "%1iʦʰʌ ")
        text = gsub(text, "एछ$", "eʦʰʌ")
        text = gsub(text, "ेछ$", "eʦʰʌ")
        text = gsub(text, "(...)ेछ ", "%1eʦʰʌ ")
        text = gsub(text, "(...)ेन ", "%1enʌ ")
        text = gsub(text, "ेन$", "enʌ")
        text = gsub(text, "(...)एन ", "%1enʌ ")
        text = gsub(text, "एर$", "eɾʌ")
        text = gsub(text, "(...)एर ", "%1eɾʌ ")
        text = gsub(text, "ेर$", "eɾʌ")
        text = gsub(text, "(...)ेर ", "%1eɾʌ ")
        text = gsub(text, "एन$", "enʌ")
        text = gsub(text, "उँछ$", "ũʦʰʌ")
        text = gsub(text, "(...)उँछ ", "%1ũʦʰʌ ")
        text = gsub(text, "ज्ञ", "ɡj")
    end
    text = gsub(text, ".़?", conv)
    text = gsub(text, "ʌ([iu])̯̃", "ʌ̃%1̯̃")
    text = gsub(text, "([ʌ])̃([iu])̯", "%1̃%2̯")
    text = gsub(text, "[<>]", "")
    text = gsub(text, "ॱ", "")

    text = gsub(text, "dʌʦʰ$", "dʌʦʰʌ")
    text = gsub(text, "(...)dʌʦʰ ", "%1dʌʦʰʌ ")
    text = gsub(text, "ʌi̯n$", "ʌi̯nʌ")
    text = gsub(text, "(...)ʌi̯n ", "%1ʌi̯nʌ ")
    text = gsub(text, "nʌʦʰ$", "nʌʦʰʌ")
    text = gsub(text, "(...)nʌʦʰ ", "%1nʌʦʰʌ ")
    text = gsub(text, "wʌi̯", "bʌi̯")
    text = gsub(text, "w$", "b")
    text = gsub(text, "(...)w ", "%1b ")
    text = gsub(text, "([rʌäiueo])([r])w", "%1rb")
    text = gsub(text, "w([iewuojr])", "b%1")
    text = gsub(text, "([w])ʌ([krɾjtcʦʣçʐṅñysśdpɦhn])([tnrṇṣcśkghjɦsçʐueoʌayd])", "bʌ%2%3")
    text =
        gsub(
        text,
        "([śsnlcçʦʣʐjzkʰʱɦhpɡtdgb])([w])([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])([cspdtçʐnɡgkʦʣbɾrjyṇṣśṇɾṅñṃ])",
        "%1w%3%4"
    )
    text = gsub(text, "([w])ä([cgjṇtdçʐʦʣmyshɦśṣn])", "bä%2")
    text = gsub(text, "([w])ä([rɾ])([tdābuṇɦṣh])", "bä%2%3")
    text = gsub(text, "([w])ä([l])([m])", "bä%2%3")
    text = gsub(text, "([w])ʌ([sśṣṅñṃyjʦʣpdtnçʐc])", "bʌ%2")
    text = gsub(text, "([ʌäiueoŏĕ])([nl])([td]̪)", "%1%2̪%3") -- dental assimilation
    text = gsub(text, "([ʌäiueoŏĕ])n([ʈɖ])", "%1ɳ%2") -- retroflex aassimilation
    text = gsub(text, "([l])([ʈɖ])", "ɭ%2")
    text = gsub(text, "([ʌʌ̃äaāiuūəãā̃ī̃ĩũū̃ẽõeeo̯o ̤])r([ʌʌ̃äaāiīuūəãā̃ī̃ĩũū̃ẽõeeyo̯o])", "%1ɾ%2")
    text = gsub(text, "([śsnlcjzʐçʦʣkhptdgb])([vw])([aāäiīuūoeĩ])([cspdtngkbrjyṇṣśṇɾṅñṃ])", "%1w%3%4")

    text = gsub(text, "([n])([ʌ])ʣ([ʱ]?)([ʌäiueo])", "%1%2ʣ%3%4")

    text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])([k])([ʰ])", "%1kʰ")
    --text = gsub(text, '([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])([d]͡)(z)([ʱ]?)', '%1(d)z')
    text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])b(ʱ?)([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤jː])", "%1b%3")
    text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])pʰ", "%1ɸ")
    text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõjɔ̃e̤ː])d̪ʱ", "%1d̪")
    text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃je̤ː])ɡ(ʱ?)", "%1ɡ")
    --text = gsub(text, 't͡st͡s(ʰ?)', 't̚t͡s%1')
    --text = gsub(text, 'd͡zd͡z(ʱ?)', 'd̚d͡z%1')
    text = gsub(text, "([ spdtzʱʰɦgkbçʦʣʐrjyɖʈṇṣśṇɾṅñṃ̪])([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤])ɦ%2", "%1%2̤ː")
    text = gsub(text, "([ʌä])ɦä", "ä̤ː")
    text = gsub(text, "([ʌʌ̃])ɦä̃", "ä̤̃ː")
    text = gsub(text, "äɦ([äʌ])", "ä̤ː")
    text = gsub(text, "([ʌäeoɔæɛʌ̃ä̃ẽõɔ̃e̤])ɦ([iuĩũ])", "%1%2")
    text = gsub(text, "([iĩ])ɦ([ũu])", "%1%2")
    text = gsub(text, "([uũ])ɦ([iĩ])", "%1%2")
    text = gsub(text, "([uũ])ɦ([äʌ])", "%1%2")
    text = gsub(text, "([ʌʌ̃iĩ])ɦ([eẽoõ])", "%1%2̤")
    text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̯e̤ː])ɖ(ʱ?)(j?)", "%1ɽ%3")
    text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])ɦr([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])", "%1ɾ%2")
    text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])ɦ([n])([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])", "%1̤ː%2%3")
    text = gsub(text, "([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])ɦ([ml])", "%1%2%2")
    text = gsub(text, "(#)([spdtzʱʰɦgkbrṇṣśʂʈɖçʐʦʣnṇʌäiuoɔæɛʌ̃ä̃ĩũõɔ̃e̤ːɾṅñ]?)jʌ", "%2e")
    text = gsub(text, "(#)([spdtzʱʰɦgkbrṇṣśʂʈɖçʦʣʐnʌäiuoɔæɛʌ̃ä̃ĩũõɔ̃e̤ːṇṅɾñ]?)wʌ", "%2o")

    text = gsub(text, "([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])kʂ([ʌe])", "%1k̚t͡sʰe") -- kṣ ligature
    text = gsub(text, "()kʂ", "t͡sʰ") -- kṣ initial
    text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])([spdtzɦgɡkbrṇṣśʃʂʈɖʦʣçʐnṇɾṅñ])(̪?)(ʰʱ?)wʌ", "%1%2%2%3%4o")
    text = gsub(text, "([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])([spdtzɦgɡkbrṇṣśʂʃʈɖçʐʦʣnṇɾṅñ])(̪?)([ʰʱ]?)jʌ", "%1%2%2%3%4e")
    text = gsub(text, "ʂ", "s")
    text = gsub(text, "ɦri", "ri")

    text = gsub(text, "kʌn$", "kʌnʌ")
    text = gsub(text, "(...)kʌn ", "%1kʌnʌ ")
    text = gsub(text, "nʌʌ$", "nʌ")
    text = gsub(text, "ä̤ː̃", "ä̤̃ː")
    text = gsub(text, "nɡj", "ŋɡj")
    return mw.ustring.toNFC(text)
end
return export