Mô đun:fa-translit

Từ điển mở Wiktionary
local U = mw.ustring.char
local rsub = mw.ustring.gsub -- WARNING: Don't return this directly in a function, or surround in parens
local export = {}

local fatHatan = U(0x64B) -- اً, tanvin-e nasb (تنوین نصب)
local fathe = U(0x64E) -- also zebar, zabar
local kasre = U(0x650) -- also zir
local zamme = U(0x64F) -- also piš
local tashdid = U(0x651) -- also called šadda
local jazm = U(0x652)
local alef = "ا" -- Arabic alif
local zwnj = U(0x200C)
local he = "ه"
local hamza_above = U(0x0654)
local xva = "خوا"
local xvi = "خوی"
local eyi = "ه‌ای"

local vaw = U(0x0648)
local ye = U(0x06CC)

local group = "بپتثجچحخدذرزژسشصضطظغفقکگلمنوهی"
local ZZP = "َُِ"

 
local mapping = {
	["ا"] = 'â', ["ب"] = 'b', ["پ"] = 'p', ["ت"] = 't', ["ث"] = 's', ["ج"] = 'j', ["چ"] = 'č', ["ح"] = 'h', ["خ"] = 'x', 
	["د"] = 'd', ["ذ"] = 'z', ["ر"] = 'r', ["ز"] = 'z', ["ژ"] = 'ž', ["س"] = 's', ["ش"] = 'š', ["ص"] = 's', ["ض"] = 'z', 
	["ط"] = 't', ["ظ"] = 'z', ["غ"] = 'ğ', ["ف"] = 'f', ["ق"] = 'q', ["ک"] = 'k', ["گ"] = 'g', ["ل"] = 'l', 
	["م"] = 'm', ["ن"] = 'n', ["و"] = 'u', ["ه"] = 'h', ["ی"] = 'i', ["آ"] = 'â',
	
	-- displaying on separate lines as the viewing becomes distorted on these combinations
	["ع"] = "'",
	["ء"] = "'",
	["ئ"] = "'", 
	["ؤ"] = "'",
	["أ"] = "'",
	
	-- diacritics
	[fathe] = "a",
	[kasre] = "e",
	[zamme] = "o",
	[jazm] = "", -- also sokun - no vowel
	[fatHatan] = "n",
	[zwnj] = "-",
	["ۀ"] = "-ye", -- U+06C0 ARABIC LETTER HEH WITH YEH ABOVE (non-normative; should use the sequence U+0647 U+0654 = هٔ)
	[xva] = "xâ",
	[xvi] = "xi",
	[eyi] = "e-yi",

	-- ligatures
	["ﻻ"] = "lâ",
	["ﷲ"] = "llâh",
	-- kashida (= Arabic tatweel)
	["ـ"] = "", -- kashida, no sound
	-- numerals
	["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5",
	["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0",
	-- normal arabic variants to numerals
	["١"] = "1", ["٢"] = "2", ["٣"] = "3", ["٤"] = "4", ["٥"] = "5",
	["٦"] = "6", ["٧"] = "7", ["٨"] = "8", ["٩"] = "9", ["٠"] = "0",
	-- punctuation (leave on separate lines)
	["؟"] = "?", -- question mark
	["،"] = ",", -- comma
	["؛"] = ";", -- semicolon
	["«"] = '“', -- quotation mark
	["»"] = '”', -- quotation mark
	["٪"] = "%", -- percent
	["؉"] = "‰", -- per mille
	["٫"] = ".", -- decimals
	["٬"] = ",", -- thousand
}
 
function export.tr(text, lang, sc)

    text = rsub(text, '([' .. group .. ']' .. tashdid .. '?)ه$', '%1e')
	text = rsub(text, alef .. fathe, "a")
	text = rsub(text, he .. zwnj, "e-")
	text = rsub(text, he .. ye, "-ye")
	text = rsub(text, "هٔ", "-ye")
	text = rsub(text, xva, "xâ")
    
	text = rsub(text, kasre .. ye .. '([' .. group .. '])', "ey%1")
	text = rsub(text, jazm .. alef, "a")

	--text = rsub(text, 'ىٰ', "â") -- the first letter is U+0649 (Arabic alif maqṣūra), it doesn't belong here
	text = rsub(text, 'یٰ', "â") -- the first letter is U+06CC (Farsi ye)
	text = rsub(text, 'ٰ', "â")
	text = rsub(text, 'ا' .. fatHatan, "an")
	-- text = rsub(text, 'الله', "ﷲ")
	-- text = rsub(text, 'لا', "ﻻ")

    --- ezafe
	text = rsub(text, '([' .. group .. '])' .. kasre .. " ", "%1-e ")

	text = rsub(text, '.', mapping)
	
	text = rsub(text, 'aâ', "â")
	text = rsub(text, 'âa', "a")

	text = rsub(text, 'u([aâeiou])', "v%1")
	text = rsub(text, 'i([aâeiou])', "y%1")
	text = rsub(text, "([aâeiou])(" .. tashdid .. ")", "%2%1") -- swapping tašdid with vowels
	text = rsub(text, "(.)" .. tashdid, "%1%1") -- implementing tašdid
	text = rsub(text, 'eh$', "e")
	text = rsub(text, "eh([^aâeiouy'bdfghjklmnpqrstvyxzčğšž])", "e%1")

	return text
end
 
return export