Bước tới nội dung

Mô đun:ce-translit

Từ điển mở Wiktionary
local export = {}

local tt = {
	["а"]="a", ["б"]="b", ["в"]="w" , ["г"]="g" , ["д"]="d" , ["е"]="e" , ["ё"]="ë",
	["ж"]="ž", ["з"]="z", ["и"]="i" , ["й"]="j" , ["к"]="k" , ["л"]="l" , ["м"]="m",
	["н"]="n", ["о"]="o", ["п"]="p" , ["р"]="r" , ["с"]="s" , ["т"]="t" , ["у"]="u",
	["ф"]="f", ["х"]="χ", ["ц"]="c" , ["ч"]="č" , ["ш"]="š" , ["щ"]="šč", ["ъ"]="ʾ",
	["ы"]="y", ["ь"]="’", ["э"]="e" , ["ю"]="ju", ["я"]="ja", ["ӏ"]="ʿ" , ["А"]="A", 
	["Б"]="B", ["В"]="W", ["Г"]="G" , ["Д"]="D" , ["Е"]="E" , ["Ё"]="Ë" , ["Ж"]="Ž",
	["З"]="Z", ["И"]="I", ["Й"]="J" , ["К"]="K" , ["Л"]="L" , ["М"]="M" , ["Н"]="N",
	["О"]="O", ["П"]="P", ["Р"]="R" , ["С"]="S" , ["Т"]="T" , ["У"]="U" , ["Ф"]="F",
	["Х"]="Χ", ["Ц"]="C", ["Ч"]="Č" , ["Ш"]="Š" , ["Щ"]="Šč", ["Ъ"]="ʾ" , ["Ы"]="Y",
	["Ь"]="’", ["Э"]="E", ["Ю"]="Ju", ["Я"]="Ja", ["ӏ"]="ʿ" , ["ң"]="̃"  , ["ӣ"]="ī",
}

local digraphs = {
	["Аь"] = "Ä" , ["аь"] = "ä" ,
	["Гӏ"] = "Ġ" , ["гӏ"] = "ġ" ,
	["Дж"] = "Ǯ" , ["дж"] = "ǯ" ,
	["Дз"] = "Ʒ" , ["дз"] = "ʒ" ,
	["Ий"] = "Ī" , ["ий"] = "ī" ,
	["Кх"] = "Q" , ["кх"] = "q" ,
	["Кӏ"] = "Ḳ" , ["кӏ"] = "ḳ" ,
	["Къ"] = "Q̇" , ["къ"] = "q̇" ,
	["Оь"] = "Ö" , ["оь"] = "ö" ,
	["Пӏ"] = "Ṗ" , ["пӏ"] = "ṗ" ,
	["Тӏ"] = "Ṭ" , ["тӏ"] = "ṭ" ,
	["Уь"] = "Ü" , ["уь"] = "ü" ,
	["Хӏ"] = "H" , ["хӏ"] = "h" ,
	["Хь"] = "Ḥ" , ["хь"] = "ḥ" ,
	["Цӏ"] = "C̣" , ["цӏ"] = "c̣" ,
	["Чӏ"] = "Č̣" , ["чӏ"] = "č̣" ,
	["Юь"] = "Jü", ["юь"] = "jü",
	["Яь"] = "Jä", ["яь"] = "jä",
                   ["иң"] = "į" ,
                   ["аң"] = "ą" ,
                   ["уң"] = "ų" ,
                   ["оң"] = "ǫ" ,
}

function export.tr(text, lang, sc)
	local str_gsub = string.gsub
	
	-- Convert capital to lowercase palochka. Lowercase is found in tables
	-- above.
	text = str_gsub(text, mw.ustring.char(0x4C0), mw.ustring.char(0x4CF))
	text = str_gsub(text, "ккх", "qː")
	text = str_gsub(text, "Ккх", "Qː")
	text = str_gsub(text, "ткъ", "ṭq̇")
	
	for digraph, translit in pairs(digraphs) do
		text = str_gsub(text, digraph, translit)
	end
	
	text = str_gsub(text, '[%z\1-127\194-\244][\128-\191]*', tt) -- UTF-8 character pattern
	
	return text
end

return export