Bước tới nội dung

Mô đun:string/codepoint

Từ điển mở Wiktionary
local byte = string.byte
local unpack = unpack

local function get_codepoint(b1, b2, b3, b4)
	if b1 < 128 then
		return b1, 1
	elseif b1 < 224 then
		return 0x40 * b1 + b2 - 0x3080, 2
	elseif b1 < 240 then
		return 0x1000 * b1 + 0x40 * b2 + b3 - 0xE2080, 3
	end
	return 0x40000 * b1 + 0x1000 * b2 + 0x40 * b3 + b4 - 0x3C82080, 4
end

return function(text, i, j)
	i, j = i or 1, j or i or 1
	if i == 1 and j == 1 then
		return (get_codepoint(byte(text, 1, 4)))
	elseif i < 0 or j < 0 then
		return mw.ustring.codepoint(text, i, j) -- FIXME
	end
	local n, nb, ret, nr = 0, 1, {}, 0
	while n < j do
		n = n + 1
		if n < i then
			local b = byte(text, nb)
			nb = nb + (b < 128 and 1 or b < 224 and 2 or b < 240 and 3 or 4)
		else
			local b1, b2, b3, b4 = byte(text, nb, nb + 3)
			if not b1 then
				break
			end
			nr = nr + 1
			local add
			ret[nr], add = get_codepoint(b1, b2, b3, b4)
			nb = nb + add
		end
	end
	return unpack(ret)
end