Bước tới nội dung

Mô đun:Jpan-translit

Từ điển mở Wiktionary
local find = mw.ustring.find
local gsub = mw.ustring.gsub
local kana_to_romaji = require("Module:Hrkt-translit")
local match = mw.ustring.match
local upper = string.uupper

local export = {}

local function is_good_romaji(str)
	str = gsub(str, "%A", "")
	return match(str, "[^A-za-zĀĪŪĒŌāīūēō]") == nil
end

local function format_pos_romaji(rom, pos)
	if pos == "proper" then
		if find(rom, "%u") then return rom end
		return (gsub(rom, "%f[%a]%a", upper))
	elseif pos == "prefix" then
		return (rom:gsub("%-?$", "-"))
	elseif pos == "suffix" or pos == "counter" or pos == "classifier" then
		return (rom:gsub("^%-?", "-"))
	else
		return rom
	end
end

function export.tr(text, lang, sc, options)
	local rom_result
	
	local rom_title = kana_to_romaji(text, lang, sc, options)
	if not is_good_romaji(rom_title) then rom_title = nil end
	
	local pagetext = mw.title.new(text):getContent()
	if pagetext then
		for _, tn in ipairs{"noun", "verb", "verb%-suru", "adj", "phrase", "combining form", "verb form", "see"} do
			if rom_title and pagetext:match("{{ja%-" .. tn .. "}}") then
				if rom_result and rom_result ~= rom_title then return rom_title end
				rom_result = rom_title
			end
			for t in pagetext:gmatch("{{ja%-" .. tn .. "(|..-})}") do
				local no_kana = true
				for tt in t:gmatch"%f[^|]..-%f[|}]" do
					if not tt:match"%D.*=" and not tt:match"%[%[" and not tt:match"]]" then
						local rom = kana_to_romaji(tt, lang, sc, options)
						if is_good_romaji(rom) then
							no_kana = false
							if rom_result and rom_result ~= rom then return rom_title end
							rom_result = rom
						end
					end
				end
				if rom_title and no_kana then
					if rom_result and rom_result ~= rom_title then return rom_title end
					rom_result = rom_title
				end
			end
		end
		
		for t in pagetext:gmatch"{{ja%-pos|(..-})}" do
			local pos, ta = t:match"^(..-)(|..-})$"
			if ta then
				local no_kana = true
				for tt in ta:gmatch"%f[^|]..-%f[|}]" do
					if not tt:match"%D.*=" and not tt:match"%[%[" and not tt:match"]]" then
						local rom = kana_to_romaji(tt, lang, sc, options)
						if is_good_romaji(rom) then
							no_kana = false
							rom = format_pos_romaji(rom, pos)
							if rom_result and rom_result ~= rom then return rom_title end
							rom_result = rom
						end
					end
				end
				if rom_title and no_kana then
					local rom = format_pos_romaji(rom_title, pos)
					if rom_result and rom_result ~= rom then return rom_title end
					rom_result = rom
				end
			elseif rom_title then
				local rom = format_pos_romaji(rom_title, t:sub(1, -2))
				if rom_result and rom_result ~= rom then return rom_title end
				rom_result = rom
			end
		end
		return rom_result or rom_title
	else
		return rom_title
	end
end

-- A hack to bypass [[mod:languages]] bug [[special:diff/72585061]]
local f_tr = export.tr
function export.tr(...)
	local rom = f_tr(...)
	if rom then
		return (rom:gsub("'", mw.getCurrentFrame():extensionTag("nowiki", "'")))
	end
end

return export