Mô đun:template parser/data

Từ điển mở Wiktionary
local gsub = string.gsub
local set = require("Module:utilities/set")
local upper = string.upper

local data = {}

-- The value is the string pattern used for the end tag.
data.tags = set(
	{"categorytree", "ce", "charinsert", "chem", "dynamicpagelist", "gallery", "graph", "hiero", "imagemap", "includeonly", "indicator", "inputbox", "langconvert", "mapframe", "maplink", "math", "noinclude", "nowiki", "phonos", "poem", "pre", "ref", "references", "score", "section", "source", "syntaxhighlight", "talkpage", "templatedata", "templatestyles", "thread", "timeline"},
	function(tag)
		return "</" .. gsub(tag, ".", function(ch)
			return "[" .. upper(ch) .. ch .. "]"
		end) .. "%s*>"
	end
)

do
	local aliases = {
		["#DATEFORMAT"] = "#FORMATDATE",
		["#SECTION"] = "#LST",
		["#SECTION-H"] = "#LSTH",
		["#SECTION-X"] = "#LSTX",
		["ARTICLEPAGENAME"] = "SUBJECTPAGENAME",
		["ARTICLEPAGENAMEE"] = "ARTICLEPAGENAMEE",
		["ARTICLESPACE"] = "SUBJECTSPACE",
		["ARTICLESPACEE"] = "SUBJECTSPACEE",
		["CONTENTLANG"] = "CONTENTLANGUAGE",
		["CURRENTMONTH2"] = "CURRENTMONTH",
		["DEFAULTCATEGORYSORT"] = "DEFAULTSORT",
		["DEFAULTSORTKEY"] = "DEFAULTSORT",
		["DIRMARK"] = "DIRECTIONMARK",
		["LOCALMONTH2"] = "LOCALMONTH",
		["NUMINGROUP"] = "NUMBERINGROUP",
		["PAGESINCAT"] = "PAGESINCATEGORY",
		["pagename"] = "PAGENAME" -- anomalous lowercase case-sensitive parser variable
	}
	
	local function set_magic_word(pf)
		return aliases[pf] or pf
	end
	
	data.parser_functions = {
		-- Case sensitive.
		set(
			{"ARTICLEPAGENAME", "ARTICLEPAGENAMEE", "ARTICLESPACE", "ARTICLESPACEE", "BASEPAGENAME", "BASEPAGENAMEE", "CASCADINGSOURCES", "DEFAULTCATEGORYSORT", "DEFAULTSORT", "DEFAULTSORTKEY", "DISPLAYTITLE", "FULLPAGENAME", "FULLPAGENAMEE", "NAMESPACE", "NAMESPACEE", "NAMESPACENUMBER", "NUMBERINGROUP", "NUMBEROFACTIVEUSERS", "NUMBEROFADMINS", "NUMBEROFARTICLES", "NUMBEROFEDITS", "NUMBEROFFILES", "NUMBEROFPAGES", "NUMBEROFUSERS", "NUMINGROUP", "PAGENAME", "PAGENAMEE", "PAGESINCAT", "PAGESINCATEGORY", "PAGESIZE", "PROTECTIONEXPIRY", "PROTECTIONLEVEL", "REVISIONDAY", "REVISIONDAY2", "REVISIONID", "REVISIONMONTH", "REVISIONMONTH1", "REVISIONTIMESTAMP", "REVISIONUSER", "REVISIONYEAR", "ROOTPAGENAME", "ROOTPAGENAMEE", "SUBJECTPAGENAME", "SUBJECTPAGENAMEE", "SUBJECTSPACE", "SUBJECTSPACEE", "SUBPAGENAME", "SUBPAGENAMEE", "TALKPAGENAME", "TALKPAGENAMEE", "TALKSPACE", "TALKSPACEE"},
			set_magic_word
		),
		-- Case insensitive.
		set(
			{"#BABEL", "#CATEGORYTREE", "#COMMASEPARATEDLIST", "#DATEFORMAT", "#EXPR", "#FORMATDATE", "#IF", "#IFEQ", "#IFERROR", "#IFEXIST", "#IFEXPR", "#INVOKE", "#LANGUAGE", "#LQTPAGELIMIT", "#LST", "#LSTH", "#LSTX", "#PROPERTY", "#REL2ABS", "#SECTION", "#SECTION-H", "#SECTION-X", "#SPECIAL", "#SPECIALE", "#STATEMENTS", "#SWITCH", "#TAG", "#TARGET", "#TIME", "#TIMEL", "#TITLEPARTS", "#USELIQUIDTHREADS", "ANCHORENCODE", "BIDI", "CANONICALURL", "CANONICALURLE", "FILEPATH", "FORMATNUM", "FULLURL", "FULLURLE", "GENDER", "GRAMMAR", "INT", "LC", "LCFIRST", "LOCALURL", "LOCALURLE", "NOEXTERNALLANGLINKS", "NS", "NSE", "PADLEFT", "PADRIGHT", "PAGEID", "PLURAL", "UC", "UCFIRST", "URLENCODE"},
			set_magic_word
		)
	}
	
	data.parser_variables = {
		-- Case sensitive.
		set(
			{"ARTICLEPAGENAME", "ARTICLEPAGENAMEE", "ARTICLESPACE", "ARTICLESPACEE", "BASEPAGENAME", "BASEPAGENAMEE", "CASCADINGSOURCES", "CONTENTLANG", "CONTENTLANGUAGE", "CURRENTDAY", "CURRENTDAY2", "CURRENTDAYNAME", "CURRENTDOW", "CURRENTHOUR", "CURRENTMONTH", "CURRENTMONTH1", "CURRENTMONTH2", "CURRENTMONTHABBREV", "CURRENTMONTHNAME", "CURRENTMONTHNAMEGEN", "CURRENTTIME", "CURRENTTIMESTAMP", "CURRENTVERSION", "CURRENTWEEK", "CURRENTYEAR", "DIRECTIONMARK", "DIRMARK", "FULLPAGENAME", "FULLPAGENAMEE", "LOCALDAY", "LOCALDAY2", "LOCALDAYNAME", "LOCALDOW", "LOCALHOUR", "LOCALMONTH", "LOCALMONTH1", "LOCALMONTH2", "LOCALMONTHABBREV", "LOCALMONTHNAME", "LOCALMONTHNAMEGEN", "LOCALTIME", "LOCALTIMESTAMP", "LOCALWEEK", "LOCALYEAR", "NAMESPACE", "NAMESPACEE", "NAMESPACENUMBER", "NUMBEROFACTIVEUSERS", "NUMBEROFADMINS", "NUMBEROFARTICLES", "NUMBEROFEDITS", "NUMBEROFFILES", "NUMBEROFPAGES", "NUMBEROFUSERS", "PAGELANGUAGE", "pagename", "PAGENAME", "PAGENAMEE", "REVISIONDAY", "REVISIONDAY2", "REVISIONID", "REVISIONMONTH", "REVISIONMONTH1", "REVISIONSIZE", "REVISIONTIMESTAMP", "REVISIONUSER", "REVISIONYEAR", "ROOTPAGENAME", "ROOTPAGENAMEE", "SITENAME", "SUBJECTPAGENAME", "SUBJECTPAGENAMEE", "SUBJECTSPACE", "SUBJECTSPACEE", "SUBPAGENAME", "SUBPAGENAMEE", "TALKPAGENAME", "TALKPAGENAMEE", "TALKSPACE", "TALKSPACEE"},
			set_magic_word
		),
		-- Case insensitive.
		set(
			{"!", "=", "ARTICLEPATH", "NOEXTERNALLANGLINKS", "PAGEID", "SCRIPTPATH", "SERVER", "SERVERNAME", "STYLEPATH"},
			set_magic_word
		)
	}
end

data.transclusion_modifiers = {
	INT = 4,
	MSG = 2,
	MSGNW = 2,
	RAW = 3,
	SAFESUBST = 1,
	SUBST = 1
}

-- %w is equivalent to \p{L}\p{Nd}, but the native regex uses \p{L}\p{N}.
data.invalid_tag_attribute_name_char = "[^%-%.:_%w" ..
	"\194\178" .. -- U+00B2
	"\194\179" .. -- U+00B3
	"\194\185" .. -- U+00B9
	"\194\188-\194\190" .. -- U+00BC-U+00BE
	"\224\167\180-\224\167\185" .. -- U+09F4-U+09F9
	"\224\173\178-\224\173\183" .. -- U+0B72-U+0B77
	"\224\175\176-\224\175\178" .. -- U+0BF0-U+0BF2
	"\224\177\184-\224\177\190" .. -- U+0C78-U+0C7E
	"\224\181\152-\224\181\158" .. -- U+0D58-U+0D5E
	"\224\181\176-\224\181\184" .. -- U+0D70-U+0D78
	"\224\188\170-\224\188\179" .. -- U+0F2A-U+0F33
	"\225\141\169-\225\141\188" .. -- U+1369-U+137C
	"\225\155\174-\225\155\176" .. -- U+16EE-U+16F0
	"\225\159\176-\225\159\185" .. -- U+17F0-U+17F9
	"\225\167\154" .. -- U+19DA
	"\226\129\176" .. -- U+2070
	"\226\129\180-\226\129\185" .. -- U+2074-U+2079
	"\226\130\128-\226\130\137" .. -- U+2080-U+2089
	"\226\133\144-\226\134\130" .. -- U+2150-U+2182
	"\226\134\133-\226\134\137" .. -- U+2185-U+2189
	"\226\145\160-\226\146\155" .. -- U+2460-U+249B
	"\226\147\170-\226\147\191" .. -- U+24EA-U+24FF
	"\226\157\182-\226\158\147" .. -- U+2776-U+2793
	"\226\179\189" .. -- U+2CFD
	"\227\128\135" .. -- U+3007
	"\227\128\161-\227\128\169" .. -- U+3021-U+3029
	"\227\128\184-\227\128\186" .. -- U+3038-U+303A
	"\227\134\146-\227\134\149" .. -- U+3192-U+3195
	"\227\136\160-\227\136\169" .. -- U+3220-U+3229
	"\227\137\136-\227\137\143" .. -- U+3248-U+324F
	"\227\137\145-\227\137\159" .. -- U+3251-U+325F
	"\227\138\128-\227\138\137" .. -- U+3280-U+3289
	"\227\138\177-\227\138\191" .. -- U+32B1-U+32BF
	"\234\155\166-\234\155\175" .. -- U+A6E6-U+A6EF
	"\234\160\176-\234\160\181" .. -- U+A830-U+A835
	"\240\144\132\135-\240\144\132\179" .. -- U+10107-U+10133
	"\240\144\133\128-\240\144\133\184" .. -- U+10140-U+10178
	"\240\144\134\138" .. -- U+1018A
	"\240\144\134\139" .. -- U+1018B
	"\240\144\139\161-\240\144\139\187" .. -- U+102E1-U+102FB
	"\240\144\140\160-\240\144\140\163" .. -- U+10320-U+10323
	"\240\144\141\129" .. -- U+10341
	"\240\144\141\138" .. -- U+1034A
	"\240\144\143\145-\240\144\143\149" .. -- U+103D1-U+103D5
	"\240\144\161\152-\240\144\161\159" .. -- U+10858-U+1085F
	"\240\144\161\185-\240\144\161\191" .. -- U+10879-U+1087F
	"\240\144\162\167-\240\144\162\175" .. -- U+108A7-U+108AF
	"\240\144\163\187-\240\144\163\191" .. -- U+108FB-U+108FF
	"\240\144\164\150-\240\144\164\155" .. -- U+10916-U+1091B
	"\240\144\166\188" .. -- U+109BC
	"\240\144\166\189" .. -- U+109BD
	"\240\144\167\128-\240\144\167\143" .. -- U+109C0-U+109CF
	"\240\144\167\146-\240\144\167\191" .. -- U+109D2-U+109FF
	"\240\144\169\128-\240\144\169\136" .. -- U+10A40-U+10A48
	"\240\144\169\189" .. -- U+10A7D
	"\240\144\169\190" .. -- U+10A7E
	"\240\144\170\157-\240\144\170\159" .. -- U+10A9D-U+10A9F
	"\240\144\171\171-\240\144\171\175" .. -- U+10AEB-U+10AEF
	"\240\144\173\152-\240\144\173\159" .. -- U+10B58-U+10B5F
	"\240\144\173\184-\240\144\173\191" .. -- U+10B78-U+10B7F
	"\240\144\174\169-\240\144\174\175" .. -- U+10BA9-U+10BAF
	"\240\144\179\186-\240\144\179\191" .. -- U+10CFA-U+10CFF
	"\240\144\185\160-\240\144\185\190" .. -- U+10E60-U+10E7E
	"\240\144\188\157-\240\144\188\166" .. -- U+10F1D-U+10F26
	"\240\144\189\145-\240\144\189\148" .. -- U+10F51-U+10F54
	"\240\144\191\133-\240\144\191\139" .. -- U+10FC5-U+10FCB
	"\240\145\129\146-\240\145\129\165" .. -- U+11052-U+11065
	"\240\145\135\161-\240\145\135\180" .. -- U+111E1-U+111F4
	"\240\145\156\186" .. -- U+1173A
	"\240\145\156\187" .. -- U+1173B
	"\240\145\163\170-\240\145\163\178" .. -- U+118EA-U+118F2
	"\240\145\177\154-\240\145\177\172" .. -- U+11C5A-U+11C6C
	"\240\145\191\128-\240\145\191\148" .. -- U+11FC0-U+11FD4
	"\240\146\144\128-\240\146\145\174" .. -- U+12400-U+1246E
	"\240\150\173\155-\240\150\173\161" .. -- U+16B5B-U+16B61
	"\240\150\186\128-\240\150\186\150" .. -- U+16E80-U+16E96
	"\240\157\139\128-\240\157\139\147" .. -- U+1D2C0-U+1D2D3
	"\240\157\139\160-\240\157\139\179" .. -- U+1D2E0-U+1D2F3
	"\240\157\141\160-\240\157\141\184" .. -- U+1D360-U+1D378
	"\240\158\163\135-\240\158\163\143" .. -- U+1E8C7-U+1E8CF
	"\240\158\177\177-\240\158\178\171" .. -- U+1EC71-U+1ECAB
	"\240\158\178\173-\240\158\178\175" .. -- U+1ECAD-U+1ECAF
	"\240\158\178\177-\240\158\178\180" .. -- U+1ECB1-U+1ECB4
	"\240\158\180\129-\240\158\180\173" .. -- U+1ED01-U+1ED2D
	"\240\158\180\175-\240\158\180\189" .. -- U+1ED2F-U+1ED3D
	"\240\159\132\128-\240\159\132\140" .. -- U+1F100-U+1F10C
	"]"

return data