Mô đun:sa-Taml-translit
Giao diện
local export = {}
local function dc(text)
return string.gsub(string.gsub(text, 'க', ''), '𑌕', '') end -- 'drop carrier'
local consonants = {
['க']='k' , ['ங']='ṅ' , ['ச']='c' , ['ஞ']='ñ' , ['ட']='ṭ' , ['ண']='ṇ' , ['த']='t' ,
['ந']='n' , ['ப']='p', ['ம']='m' , ['ய']='y' , ['ர']='r' , ['ல']='l' , ['வ']='v' ,
['ழ']='ḻ' , ['ள']='ḷ' , ['ற']='ṟ' ,
['ன']='n' , -- So only contextual distinction between ந and ன.
['ஶ']='ś' , ['ஜ']='j' , ['ஷ']='ṣ' ,
['ஸ']='s' , ['ஹ']='h' ,
-- ['ஃப']='f' , ['ஃஜ']='z', ['ஃஸ']='ks' , ['ஃக ']='x',
['ஃ']='ḥ' , ['ௐ']='о̄m',
-- Consonants modified by spacing superscript digit. Be liberal.
['க¹']='k', ['க²']='kh', ['க³']='g', ['க⁴']='gh',
['ச¹']='c', ['ச²']='ch', ['ச³']='j', ['ச⁴']='jh',
['ஜ¹']='j', ['ஜ²']='jh',
['ட¹']='ṭ', ['ட²']='ṭh', ['ட³']='ḍ', ['ட⁴']='ḍh',
['த¹']='t', ['த²']='th', ['த³']='d', ['த⁴']='dh',
['ப¹']='p', ['ப²']='ph', ['ப³']='b', ['ப⁴']='bh',
['ம²']='ṃ', ['ம³']='m̐', ['ர²']='Ⓡ', ['ல²']='Ⓛ',
-- Consonants modified by spacing subscript digit. Be liberal.
['க₁']='k', ['க₂']='kh', ['க₃']='g', ['க₄']='gh',
['ச₁']='c', ['ச₂']='ch', ['ச₃']='j', ['ச₄']='jh',
['ஜ₁']='j', ['ஜ₂']='jh',
['ட₁']='ṭ', ['ட₂']='ṭh', ['ட₃']='ḍ', ['ட₄']='ḍh',
['த₁']='t', ['த₂']='th', ['த₃']='d', ['த₄']='dh',
['ப₁']='p', ['ப₂']='ph', ['ப₃']='b', ['ப₄']='bh',
['ம₂']='ṃ', ['ம₃']='m̐', ['ர₂']='Ⓡ', ['ல₂']='Ⓛ',
}
local diacritics = { -- Obliterate Tamil length contrast in mid vowels later.
['ா']= 'ā' , ['ி']='i' , ['ீ']='ī' , ['ு']='u' , ['ூ']='ū' , ['ெ']='e' ,
['ே']='ē' , ['ை']='ai' , ['ொ']='o' , ['ோ']='ō' , ['ௌ']='au',
['்']='', -- pulli, suppresses the inherent vowel "a"
-- Grantha syllabic consonants get used:
['𑍃']='ṛ', ['𑍄']='ṝ', ['𑍢']='ḷ', ['𑍣']='ḹ',
-- no diacritic
[''] = 'a',
}
local nonconsonants = {
-- vowels
['அ']='’a' , ['ஆ']='’ā' , ['இ']='’i' , ['ஈ']='’ī' , ['உ']='’u' , ['ஊ']='’ū' ,
['எ']='’e' , ['ஏ']='’ē' , ['ஐ']='’ai' , ['ஒ']='’o' , ['ஓ']='’ō' , ['ஔ']='’au' , ['ௐ']='о̄m',
-- other symbols
['ஃ']='ḥ', [dc('கஂ')] = 'ṃ', ['𑌃'] = 'ḥ',
-- syllabic consonants (Grantha)ː
['𑌋']='ṛ', ['𑍠']='ṝ', ['𑌌']='ḷ', ['𑍡']='ḹ',
}
local syll2 = {
['Ⓡu']='ṛ', ['Ⓡū']='ṝ', ['Ⓛu']='ḷ', ['Ⓛū']='ḹ',
}
-- translit any words or phrases
function export.tr(text, lang, sc)
-- Special vowel-killing diacritics
local pattern = "[மயலவ][dc(கஂ⃰𑌕𑌁)]" -- gsub on gsub arguments tends to fail!
text = mw.ustring.gsub(text, pattern, {
['மஂ'] = "ṃ", -- Desirable to get more and independent examples.
['ம⃰'] = " ṃ ",
['ய𑌁'] = "y̐", ['ல𑌁'] = "l̐", ['வ𑌁'] = "v̐",
})
local nukta = '([¹²³⁴₁₂₃₄]?)'
local anusvara = dc('([கஂ𑌕𑌂]?)')
local vowel = dc('([கா-க்𑌕𑍃𑌕𑍄𑌕𑍢𑌕𑍣]?)')
text = mw.ustring.gsub(
text,
'(ஃ?)([க-ஹ])'..nukta..vowel..nukta..'([ாௗ]?)'..nukta..anusvara..nukta,
function(h, c, n1, d, n2, d2, n3, av, n4)
local cn = c .. n1 .. n2 .. n3 .. n4
local da = d..d2
if d2 ~= "" then da = mw.ustring.toNFC(da) end
return (consonants[h..cn] or (consonants[h] or "") .. (consonants[cn] or cn)) ..
(diacritics[da] or da) .. av
end)
text = mw.ustring.gsub(text, '.', nonconsonants)
text = string.gsub(text, 'ē', 'e')
text = string.gsub(text, 'ō', 'o')
text = mw.ustring.gsub(text, '[ⓇⓁ][uū]', syll2)
text = string.gsub(text, '^’', '')
text = mw.ustring.gsub(text, '([%s%p])’', '%1')
return text
end
return export