Mô đun:string/codepoint
Giao diện
local byte = string.byte
local unpack = unpack
local function get_codepoint(b1, b2, b3, b4)
if b1 < 128 then
return b1, 1
elseif b1 < 224 then
return 0x40 * b1 + b2 - 0x3080, 2
elseif b1 < 240 then
return 0x1000 * b1 + 0x40 * b2 + b3 - 0xE2080, 3
end
return 0x40000 * b1 + 0x1000 * b2 + 0x40 * b3 + b4 - 0x3C82080, 4
end
return function(text, i, j)
i, j = i or 1, j or i or 1
if i == 1 and j == 1 then
return (get_codepoint(byte(text, 1, 4)))
elseif i < 0 or j < 0 then
return mw.ustring.codepoint(text, i, j) -- FIXME
end
local n, nb, ret, nr = 0, 1, {}, 0
while n < j do
n = n + 1
if n < i then
local b = byte(text, nb)
nb = nb + (b < 128 and 1 or b < 224 and 2 or b < 240 and 3 or 4)
else
local b1, b2, b3, b4 = byte(text, nb, nb + 3)
if not b1 then
break
end
nr = nr + 1
local add
ret[nr], add = get_codepoint(b1, b2, b3, b4)
nb = nb + add
end
end
return unpack(ret)
end