local gsub = string.gsub
local set = require("Module:utilities/set")
local upper = string.upper
local data = {}
-- The value is the string pattern used for the end tag.
data.tags = set(
{"categorytree", "ce", "charinsert", "chem", "dynamicpagelist", "gallery", "graph", "hiero", "imagemap", "includeonly", "indicator", "inputbox", "langconvert", "mapframe", "maplink", "math", "noinclude", "nowiki", "phonos", "poem", "pre", "ref", "references", "score", "section", "source", "syntaxhighlight", "talkpage", "templatedata", "templatestyles", "thread", "timeline"},
function(tag)
return "</" .. gsub(tag, ".", function(ch)
return "[" .. upper(ch) .. ch .. "]"
end) .. "%s*>"
end
)
do
local aliases = {
["#DATEFORMAT"] = "#FORMATDATE",
["#SECTION"] = "#LST",
["#SECTION-H"] = "#LSTH",
["#SECTION-X"] = "#LSTX",
["ARTICLEPAGENAME"] = "SUBJECTPAGENAME",
["ARTICLEPAGENAMEE"] = "ARTICLEPAGENAMEE",
["ARTICLESPACE"] = "SUBJECTSPACE",
["ARTICLESPACEE"] = "SUBJECTSPACEE",
["CONTENTLANG"] = "CONTENTLANGUAGE",
["CURRENTMONTH2"] = "CURRENTMONTH",
["DEFAULTCATEGORYSORT"] = "DEFAULTSORT",
["DEFAULTSORTKEY"] = "DEFAULTSORT",
["DIRMARK"] = "DIRECTIONMARK",
["LOCALMONTH2"] = "LOCALMONTH",
["NUMINGROUP"] = "NUMBERINGROUP",
["PAGESINCAT"] = "PAGESINCATEGORY",
["pagename"] = "PAGENAME" -- anomalous lowercase case-sensitive parser variable
}
local function set_magic_word(pf)
return aliases[pf] or pf
end
data.parser_functions = {
-- Case sensitive.
set(
{"ARTICLEPAGENAME", "ARTICLEPAGENAMEE", "ARTICLESPACE", "ARTICLESPACEE", "BASEPAGENAME", "BASEPAGENAMEE", "CASCADINGSOURCES", "DEFAULTCATEGORYSORT", "DEFAULTSORT", "DEFAULTSORTKEY", "DISPLAYTITLE", "FULLPAGENAME", "FULLPAGENAMEE", "NAMESPACE", "NAMESPACEE", "NAMESPACENUMBER", "NUMBERINGROUP", "NUMBEROFACTIVEUSERS", "NUMBEROFADMINS", "NUMBEROFARTICLES", "NUMBEROFEDITS", "NUMBEROFFILES", "NUMBEROFPAGES", "NUMBEROFUSERS", "NUMINGROUP", "PAGENAME", "PAGENAMEE", "PAGESINCAT", "PAGESINCATEGORY", "PAGESIZE", "PROTECTIONEXPIRY", "PROTECTIONLEVEL", "REVISIONDAY", "REVISIONDAY2", "REVISIONID", "REVISIONMONTH", "REVISIONMONTH1", "REVISIONTIMESTAMP", "REVISIONUSER", "REVISIONYEAR", "ROOTPAGENAME", "ROOTPAGENAMEE", "SUBJECTPAGENAME", "SUBJECTPAGENAMEE", "SUBJECTSPACE", "SUBJECTSPACEE", "SUBPAGENAME", "SUBPAGENAMEE", "TALKPAGENAME", "TALKPAGENAMEE", "TALKSPACE", "TALKSPACEE"},
set_magic_word
),
-- Case insensitive.
set(
{"#BABEL", "#CATEGORYTREE", "#COMMASEPARATEDLIST", "#DATEFORMAT", "#EXPR", "#FORMATDATE", "#IF", "#IFEQ", "#IFERROR", "#IFEXIST", "#IFEXPR", "#INVOKE", "#LANGUAGE", "#LQTPAGELIMIT", "#LST", "#LSTH", "#LSTX", "#PROPERTY", "#REL2ABS", "#SECTION", "#SECTION-H", "#SECTION-X", "#SPECIAL", "#SPECIALE", "#STATEMENTS", "#SWITCH", "#TAG", "#TARGET", "#TIME", "#TIMEL", "#TITLEPARTS", "#USELIQUIDTHREADS", "ANCHORENCODE", "BIDI", "CANONICALURL", "CANONICALURLE", "FILEPATH", "FORMATNUM", "FULLURL", "FULLURLE", "GENDER", "GRAMMAR", "INT", "LC", "LCFIRST", "LOCALURL", "LOCALURLE", "NOEXTERNALLANGLINKS", "NS", "NSE", "PADLEFT", "PADRIGHT", "PAGEID", "PLURAL", "UC", "UCFIRST", "URLENCODE"},
set_magic_word
)
}
data.parser_variables = {
-- Case sensitive.
set(
{"ARTICLEPAGENAME", "ARTICLEPAGENAMEE", "ARTICLESPACE", "ARTICLESPACEE", "BASEPAGENAME", "BASEPAGENAMEE", "CASCADINGSOURCES", "CONTENTLANG", "CONTENTLANGUAGE", "CURRENTDAY", "CURRENTDAY2", "CURRENTDAYNAME", "CURRENTDOW", "CURRENTHOUR", "CURRENTMONTH", "CURRENTMONTH1", "CURRENTMONTH2", "CURRENTMONTHABBREV", "CURRENTMONTHNAME", "CURRENTMONTHNAMEGEN", "CURRENTTIME", "CURRENTTIMESTAMP", "CURRENTVERSION", "CURRENTWEEK", "CURRENTYEAR", "DIRECTIONMARK", "DIRMARK", "FULLPAGENAME", "FULLPAGENAMEE", "LOCALDAY", "LOCALDAY2", "LOCALDAYNAME", "LOCALDOW", "LOCALHOUR", "LOCALMONTH", "LOCALMONTH1", "LOCALMONTH2", "LOCALMONTHABBREV", "LOCALMONTHNAME", "LOCALMONTHNAMEGEN", "LOCALTIME", "LOCALTIMESTAMP", "LOCALWEEK", "LOCALYEAR", "NAMESPACE", "NAMESPACEE", "NAMESPACENUMBER", "NUMBEROFACTIVEUSERS", "NUMBEROFADMINS", "NUMBEROFARTICLES", "NUMBEROFEDITS", "NUMBEROFFILES", "NUMBEROFPAGES", "NUMBEROFUSERS", "PAGELANGUAGE", "pagename", "PAGENAME", "PAGENAMEE", "REVISIONDAY", "REVISIONDAY2", "REVISIONID", "REVISIONMONTH", "REVISIONMONTH1", "REVISIONSIZE", "REVISIONTIMESTAMP", "REVISIONUSER", "REVISIONYEAR", "ROOTPAGENAME", "ROOTPAGENAMEE", "SITENAME", "SUBJECTPAGENAME", "SUBJECTPAGENAMEE", "SUBJECTSPACE", "SUBJECTSPACEE", "SUBPAGENAME", "SUBPAGENAMEE", "TALKPAGENAME", "TALKPAGENAMEE", "TALKSPACE", "TALKSPACEE"},
set_magic_word
),
-- Case insensitive.
set(
{"!", "=", "ARTICLEPATH", "NOEXTERNALLANGLINKS", "PAGEID", "SCRIPTPATH", "SERVER", "SERVERNAME", "STYLEPATH"},
set_magic_word
)
}
end
data.transclusion_modifiers = {
INT = 4,
MSG = 2,
MSGNW = 2,
RAW = 3,
SAFESUBST = 1,
SUBST = 1
}
-- %w is equivalent to \p{L}\p{Nd}, but the native regex uses \p{L}\p{N}.
data.invalid_tag_attribute_name_char = "[^%-%.:_%w" ..
"\194\178" .. -- U+00B2
"\194\179" .. -- U+00B3
"\194\185" .. -- U+00B9
"\194\188-\194\190" .. -- U+00BC-U+00BE
"\224\167\180-\224\167\185" .. -- U+09F4-U+09F9
"\224\173\178-\224\173\183" .. -- U+0B72-U+0B77
"\224\175\176-\224\175\178" .. -- U+0BF0-U+0BF2
"\224\177\184-\224\177\190" .. -- U+0C78-U+0C7E
"\224\181\152-\224\181\158" .. -- U+0D58-U+0D5E
"\224\181\176-\224\181\184" .. -- U+0D70-U+0D78
"\224\188\170-\224\188\179" .. -- U+0F2A-U+0F33
"\225\141\169-\225\141\188" .. -- U+1369-U+137C
"\225\155\174-\225\155\176" .. -- U+16EE-U+16F0
"\225\159\176-\225\159\185" .. -- U+17F0-U+17F9
"\225\167\154" .. -- U+19DA
"\226\129\176" .. -- U+2070
"\226\129\180-\226\129\185" .. -- U+2074-U+2079
"\226\130\128-\226\130\137" .. -- U+2080-U+2089
"\226\133\144-\226\134\130" .. -- U+2150-U+2182
"\226\134\133-\226\134\137" .. -- U+2185-U+2189
"\226\145\160-\226\146\155" .. -- U+2460-U+249B
"\226\147\170-\226\147\191" .. -- U+24EA-U+24FF
"\226\157\182-\226\158\147" .. -- U+2776-U+2793
"\226\179\189" .. -- U+2CFD
"\227\128\135" .. -- U+3007
"\227\128\161-\227\128\169" .. -- U+3021-U+3029
"\227\128\184-\227\128\186" .. -- U+3038-U+303A
"\227\134\146-\227\134\149" .. -- U+3192-U+3195
"\227\136\160-\227\136\169" .. -- U+3220-U+3229
"\227\137\136-\227\137\143" .. -- U+3248-U+324F
"\227\137\145-\227\137\159" .. -- U+3251-U+325F
"\227\138\128-\227\138\137" .. -- U+3280-U+3289
"\227\138\177-\227\138\191" .. -- U+32B1-U+32BF
"\234\155\166-\234\155\175" .. -- U+A6E6-U+A6EF
"\234\160\176-\234\160\181" .. -- U+A830-U+A835
"\240\144\132\135-\240\144\132\179" .. -- U+10107-U+10133
"\240\144\133\128-\240\144\133\184" .. -- U+10140-U+10178
"\240\144\134\138" .. -- U+1018A
"\240\144\134\139" .. -- U+1018B
"\240\144\139\161-\240\144\139\187" .. -- U+102E1-U+102FB
"\240\144\140\160-\240\144\140\163" .. -- U+10320-U+10323
"\240\144\141\129" .. -- U+10341
"\240\144\141\138" .. -- U+1034A
"\240\144\143\145-\240\144\143\149" .. -- U+103D1-U+103D5
"\240\144\161\152-\240\144\161\159" .. -- U+10858-U+1085F
"\240\144\161\185-\240\144\161\191" .. -- U+10879-U+1087F
"\240\144\162\167-\240\144\162\175" .. -- U+108A7-U+108AF
"\240\144\163\187-\240\144\163\191" .. -- U+108FB-U+108FF
"\240\144\164\150-\240\144\164\155" .. -- U+10916-U+1091B
"\240\144\166\188" .. -- U+109BC
"\240\144\166\189" .. -- U+109BD
"\240\144\167\128-\240\144\167\143" .. -- U+109C0-U+109CF
"\240\144\167\146-\240\144\167\191" .. -- U+109D2-U+109FF
"\240\144\169\128-\240\144\169\136" .. -- U+10A40-U+10A48
"\240\144\169\189" .. -- U+10A7D
"\240\144\169\190" .. -- U+10A7E
"\240\144\170\157-\240\144\170\159" .. -- U+10A9D-U+10A9F
"\240\144\171\171-\240\144\171\175" .. -- U+10AEB-U+10AEF
"\240\144\173\152-\240\144\173\159" .. -- U+10B58-U+10B5F
"\240\144\173\184-\240\144\173\191" .. -- U+10B78-U+10B7F
"\240\144\174\169-\240\144\174\175" .. -- U+10BA9-U+10BAF
"\240\144\179\186-\240\144\179\191" .. -- U+10CFA-U+10CFF
"\240\144\185\160-\240\144\185\190" .. -- U+10E60-U+10E7E
"\240\144\188\157-\240\144\188\166" .. -- U+10F1D-U+10F26
"\240\144\189\145-\240\144\189\148" .. -- U+10F51-U+10F54
"\240\144\191\133-\240\144\191\139" .. -- U+10FC5-U+10FCB
"\240\145\129\146-\240\145\129\165" .. -- U+11052-U+11065
"\240\145\135\161-\240\145\135\180" .. -- U+111E1-U+111F4
"\240\145\156\186" .. -- U+1173A
"\240\145\156\187" .. -- U+1173B
"\240\145\163\170-\240\145\163\178" .. -- U+118EA-U+118F2
"\240\145\177\154-\240\145\177\172" .. -- U+11C5A-U+11C6C
"\240\145\191\128-\240\145\191\148" .. -- U+11FC0-U+11FD4
"\240\146\144\128-\240\146\145\174" .. -- U+12400-U+1246E
"\240\150\173\155-\240\150\173\161" .. -- U+16B5B-U+16B61
"\240\150\186\128-\240\150\186\150" .. -- U+16E80-U+16E96
"\240\157\139\128-\240\157\139\147" .. -- U+1D2C0-U+1D2D3
"\240\157\139\160-\240\157\139\179" .. -- U+1D2E0-U+1D2F3
"\240\157\141\160-\240\157\141\184" .. -- U+1D360-U+1D378
"\240\158\163\135-\240\158\163\143" .. -- U+1E8C7-U+1E8CF
"\240\158\177\177-\240\158\178\171" .. -- U+1EC71-U+1ECAB
"\240\158\178\173-\240\158\178\175" .. -- U+1ECAD-U+1ECAF
"\240\158\178\177-\240\158\178\180" .. -- U+1ECB1-U+1ECB4
"\240\158\180\129-\240\158\180\173" .. -- U+1ED01-U+1ED2D
"\240\158\180\175-\240\158\180\189" .. -- U+1ED2F-U+1ED3D
"\240\159\132\128-\240\159\132\140" .. -- U+1F100-U+1F10C
"]"
return data