Quantcast
-- -------------------------------------------------------------------------- --
-- BattlegroundTargets utf8.lua                                               --
-- From utf8.lua (c) Kyle Smith                                               --
-- Modified by kunda (utf8replace mapping is used for transliteration)        --
-- -------------------------------------------------------------------------- --

local strbyte, strlen, strsub, strupper, type = string.byte, string.len, string.sub, string.upper, type

-- determine bytes needed for character, based on RFC 3629
local function utf8charbytes(s, i)
	if type(s) ~= "string" then
		error("bad argument #1 to 'utf8charbytes' (string expected, got " .. type(s) .. ")")
	end
	if type(i) ~= "number" then
		error("bad argument #2 to 'utf8charbytes' (number expected, got " .. type(i) .. ")")
	end

	local c = strbyte(s, i)

	if c > 0 and c <= 127 then -- UTF8-1
		return 1
	elseif c >= 194 and c <= 223 then -- UTF8-2
		local c2 = strbyte(s, i + 1)
		if not c2 then
			error("UTF-8 string terminated early")
		end
		if c2 < 128 or c2 > 191 then
			error("Invalid UTF-8 character")
		end
		return 2
	elseif c >= 224 and c <= 239 then -- UTF8-3
		local c2 = strbyte(s, i + 1)
		local c3 = strbyte(s, i + 2)
		if not c2 or not c3 then
			error("UTF-8 string terminated early")
		end
		if c == 224 and (c2 < 160 or c2 > 191) then
			error("Invalid UTF-8 character")
		elseif c == 237 and (c2 < 128 or c2 > 159) then
			error("Invalid UTF-8 character")
		elseif c2 < 128 or c2 > 191 then
			error("Invalid UTF-8 character")
		end
		if c3 < 128 or c3 > 191 then
			error("Invalid UTF-8 character")
		end
		return 3
	elseif c >= 240 and c <= 244 then -- UTF8-4
		local c2 = strbyte(s, i + 1)
		local c3 = strbyte(s, i + 2)
		local c4 = strbyte(s, i + 3)
		if not c2 or not c3 or not c4 then
			error("UTF-8 string terminated early")
		end
		if c == 240 and (c2 < 144 or c2 > 191) then
			error("Invalid UTF-8 character")
		elseif c == 244 and (c2 < 128 or c2 > 143) then
			error("Invalid UTF-8 character")
		elseif c2 < 128 or c2 > 191 then
			error("Invalid UTF-8 character")
		end
		if c3 < 128 or c3 > 191 then
			error("Invalid UTF-8 character")
		end
		if c4 < 128 or c4 > 191 then
			error("Invalid UTF-8 character")
		end
		return 4
	else
		error("Invalid UTF-8 character")
	end
end

-- replace UTF-8 characters based on a mapping table
local _, prg = ...
prg.utf8replace = function(s, mapping)
	if type(s) ~= "string" then
		error("bad argument #1 to 'utf8replace' (string expected, got " .. type(s) .. ")")
	end
	if type(mapping) ~= "table" then
		error("bad argument #2 to 'utf8replace' (table expected, got " .. type(mapping) .. ")")
	end

	local pos = 1
	local bytes = strlen(s)
	local charbytes
	local newstr = ""
	local upval

	while pos <= bytes do
		charbytes = utf8charbytes(s, pos)
		local c = strsub(s, pos, pos + charbytes - 1)
		newstr = newstr .. (mapping[c] or c)
		pos = pos + charbytes
		if not upval and strlen(newstr) > 0 and utf8charbytes(newstr, 1) == 1 then -- uppercase first character
			newstr = strupper(strsub(newstr, 1, 1)) .. strsub(newstr, 2)
			upval = true
		end
	end

	if newstr == "" then newstr = UNKNOWN end
	return newstr
end