SisMaker
/
yzyCli


								-- local utf8 = {}


								-- --byte index of the next char after the char at byte index i, followed by a valid flag for the char at byte index i.

								-- --nil if not found. invalid characters are iterated as 1-byte chars.

								-- function utf8.next_raw(s, i)

								-- 	if not i then

								-- 		if #s == 0 then return nil end

								-- 		return 1, true --fake flag (doesn't matter since this flag is not to be taken as full validation)

								-- 	end

								-- 	if i > #s then return end

								-- 	local c = s:byte(i)

								-- 	if c >= 0x00 and c <= 0x7F then

								-- 		i = i + 1

								-- 	elseif c >= 0xC2 and c <= 0xDF then

								-- 		i = i + 2

								-- 	elseif c >= 0xE0 and c <= 0xEF then

								-- 		i = i + 3

								-- 	elseif c >= 0xF0 and c <= 0xF4 then

								-- 		i = i + 4

								-- 	else --invalid

								-- 		return i + 1, false

								-- 	end

								-- 	if i > #s then return end

								-- 	return i, true

								-- end


								-- --next() is the generic iterator and can be replaced for different semantics. next_raw() must preserve its semantics.

								-- utf8.next = utf8.next_raw


								-- --iterate chars, returning the byte index where each char starts

								-- function utf8.byte_indices(s, previ)

								-- 	return utf8.next, s, previ

								-- end


								-- --number of chars in string

								-- function utf8.len(s)

								-- 	assert(s, "bad argument #1 to 'len' (string expected, got nil)")

								-- 	local len = 0

								-- 	for _ in utf8.byte_indices(s) do

								-- 		len = len + 1

								-- 	end

								-- 	return len

								-- end


								-- --byte index given char index. nil if the index is outside the string.

								-- function utf8.byte_index(s, target_ci)

								-- 	if target_ci < 1 then return end

								-- 	local ci = 0

								-- 	for i in utf8.byte_indices(s) do

								-- 		ci = ci + 1

								-- 		if ci == target_ci then

								-- 			return i

								-- 		end

								-- 	end

								-- 	assert(target_ci > ci, "invalid index")

								-- end


								-- --char index given byte index. nil if the index is outside the string.

								-- function utf8.char_index(s, target_i)

								-- 	if target_i < 1 or target_i > #s then return end

								-- 	local ci = 0

								-- 	for i in utf8.byte_indices(s) do

								-- 		ci = ci + 1

								-- 		if i == target_i then

								-- 			return ci

								-- 		end

								-- 	end

								-- 	LogError("invalid index")

								-- end


								-- --byte index of the prev. char before the char at byte index i, which defaults to #s + 1.

								-- --nil if the index is outside the 2..#s+1 range.

								-- --NOTE: unlike next(), this is a O(N) operation!

								-- function utf8.prev(s, nexti)

								-- 	nexti = nexti or #s + 1

								-- 	if nexti <= 1 or nexti > #s + 1 then return end

								-- 	local lasti, lastvalid = utf8.next(s)

								-- 	for i, valid in utf8.byte_indices(s) do

								-- 		if i == nexti then

								-- 			return lasti, lastvalid

								-- 		end

								-- 		lasti, lastvalid = i, valid

								-- 	end

								-- 	if nexti == #s + 1 then

								-- 		return lasti, lastvalid

								-- 	end

								-- 	LogError("invalid index")

								-- end


								-- --iterate chars in reverse order, returning the byte index where each char starts.

								-- function utf8.byte_indices_reverse(s, nexti)

								-- 	if #s < 200 then

								-- 		--using prev() is a O(N^2/2) operation, ok for small strings (200 chars need 40,000 iterations)

								-- 		return utf8.prev, s, nexti

								-- 	else

								-- 		--store byte indices in a table and iterate them in reverse.

								-- 		--this is 40x slower than byte_indices() but still fast at 2mil chars/second (but eats RAM and makes garbage).

								-- 		local t = {}

								-- 		for i in utf8.byte_indices(s) do

								-- 			if nexti and i >= nexti then break end

								-- 			table.insert(t, i)

								-- 		end

								-- 		local i = #t + 1

								-- 		return function()

								-- 			i = i - 1

								-- 			return t[i]

								-- 		end

								-- 	end

								-- end


								-- --sub based on char indices, which, unlike with standard string.sub(), can't be negative.

								-- --start_ci can be 1..inf and end_ci can be 0..inf. end_ci can be nil meaning last char.

								-- --if start_ci is out of range or end_ci < start_ci, the empty string is returned.

								-- --if end_ci is out of range, it is considered to be the last position in the string.

								-- function utf8.sub(s, start_ci, end_ci)

								-- 	--assert for positive indices because we might implement negative indices in the future.

								-- 	assert(start_ci >= 1)

								-- 	assert(not end_ci or end_ci >= 0)

								-- 	local ci = 0

								-- 	local start_i, end_i

								-- 	for i in utf8.byte_indices(s) do

								-- 		ci = ci + 1

								-- 		if ci == start_ci then

								-- 			start_i = i

								-- 		end

								-- 		if ci == end_ci then

								-- 			end_i = i

								-- 		end

								-- 	end

								-- 	if not start_i then

								-- 		assert(start_ci > ci, 'invalid index')

								-- 		return ''

								-- 	end

								-- 	if end_ci and not end_i then

								-- 		if end_ci < start_ci then

								-- 			return ''

								-- 		end

								-- 		assert(end_ci > ci, 'invalid index')

								-- 	end

								-- 	return s:sub(start_i, end_i and end_i - 1)

								-- end


								-- --check if a string contains a substring at byte index i without making garbage.

								-- --nil if the index is out of range. true if searching for the empty string.

								-- function utf8.contains(s, i, sub)

								-- 	if i < 1 or i > #s then return nil end

								-- 	for si = 1, #sub do

								-- 		if s:byte(i + si - 1) ~= sub:byte(si) then

								-- 			return false

								-- 		end

								-- 	end

								-- 	return true

								-- end


								-- --count the number of occurences of a substring in a string. the substring cannot be the empty string.

								-- function utf8.count(s, sub)

								-- 	assert(#sub > 0)

								-- 	local count = 0

								-- 	local i = 1

								-- 	while i do

								-- 		if utf8.contains(s, i, sub) then

								-- 			count = count + 1

								-- 			i = i + #sub

								-- 			if i > #s then break end

								-- 		else

								-- 			i = utf8.next(s, i)

								-- 		end

								-- 	end

								-- 	return count

								-- end


								-- --utf8 validation and sanitization


								-- --check if there's a valid utf8 codepoint at byte index i. valid ranges for each utf8 byte are:

								-- -- byte  1          2           3          4

								-- --------------------------------------------

								-- -- 00 - 7F

								-- -- C2 - DF    80 - BF

								-- -- E0         A0 - BF     80 - BF

								-- -- E1 - EC    80 - BF     80 - BF

								-- -- ED         80 - 9F     80 - BF

								-- -- EE - EF    80 - BF     80 - BF

								-- -- F0         90 - BF     80 - BF    80 - BF

								-- -- F1 - F3    80 - BF     80 - BF    80 - BF

								-- -- F4         80 - 8F     80 - BF    80 - BF

								-- function utf8.isvalid(s, i)

								-- 	local c = s:byte(i)

								-- 	if not c then

								-- 		return false

								-- 	elseif c >= 0x00 and c <= 0x7F then

								-- 		return true

								-- 	elseif c >= 0xC2 and c <= 0xDF then

								-- 		local c2 = s:byte(i + 1)

								-- 		return c2 and c2 >= 0x80 and c2 <= 0xBF

								-- 	elseif c >= 0xE0 and c <= 0xEF then

								-- 		local c2 = s:byte(i + 1)

								-- 		local c3 = s:byte(i + 2)

								-- 		if c == 0xE0 then

								-- 			return c2 and c3 and

								-- 				c2 >= 0xA0 and c2 <= 0xBF and

								-- 				c3 >= 0x80 and c3 <= 0xBF

								-- 		elseif c >= 0xE1 and c <= 0xEC then

								-- 			return c2 and c3 and

								-- 				c2 >= 0x80 and c2 <= 0xBF and

								-- 				c3 >= 0x80 and c3 <= 0xBF

								-- 		elseif c == 0xED then

								-- 			return c2 and c3 and

								-- 				c2 >= 0x80 and c2 <= 0x9F and

								-- 				c3 >= 0x80 and c3 <= 0xBF

								-- 		elseif c >= 0xEE and c <= 0xEF then

								-- 			if c == 0xEF and c2 == 0xBF and (c3 == 0xBE or c3 == 0xBF) then

								-- 				return false --uFFFE and uFFFF non-characters

								-- 			end

								-- 			return c2 and c3 and

								-- 				c2 >= 0x80 and c2 <= 0xBF and

								-- 				c3 >= 0x80 and c3 <= 0xBF

								-- 		end

								-- 	elseif c >= 0xF0 and c <= 0xF4 then

								-- 		local c2 = s:byte(i + 1)

								-- 		local c3 = s:byte(i + 2)

								-- 		local c4 = s:byte(i + 3)

								-- 		if c == 0xF0 then

								-- 			return c2 and c3 and c4 and

								-- 				c2 >= 0x90 and c2 <= 0xBF and

								-- 				c3 >= 0x80 and c3 <= 0xBF and

								-- 				c4 >= 0x80 and c4 <= 0xBF

								-- 		elseif c >= 0xF1 and c <= 0xF3 then

								-- 			return c2 and c3 and c4 and

								-- 				c2 >= 0x80 and c2 <= 0xBF and

								-- 				c3 >= 0x80 and c3 <= 0xBF and

								-- 				c4 >= 0x80 and c4 <= 0xBF

								-- 		elseif c == 0xF4 then

								-- 			return c2 and c3 and c4 and

								-- 				c2 >= 0x80 and c2 <= 0x8F and

								-- 				c3 >= 0x80 and c3 <= 0xBF and

								-- 				c4 >= 0x80 and c4 <= 0xBF

								-- 		end

								-- 	end

								-- 	return false

								-- end


								-- --byte index of the next valid utf8 char after the char at byte index i.

								-- --nil if indices go out of range. invalid characters are skipped.

								-- function utf8.next_valid(s, i)

								-- 	local valid

								-- 	i, valid = utf8.next_raw(s, i)

								-- 	while i and (not valid or not utf8.isvalid(s, i)) do

								-- 		i, valid = utf8.next(s, i)

								-- 	end

								-- 	return i

								-- end


								-- --iterate valid chars, returning the byte index where each char starts

								-- function utf8.valid_byte_indices(s)

								-- 	return utf8.next_valid, s

								-- end


								-- --assert that a string only contains valid utf8 characters

								-- function utf8.validate(s)

								-- 	for i, valid in utf8.byte_indices(s) do

								-- 		if not valid or not utf8.isvalid(s, i) then

								-- 			LogError(string.format('invalid utf8 char at #%d', i))

								-- 		end

								-- 	end

								-- end


								-- local function table_lookup(s, i, j, t)

								-- 	return t[s:sub(i, j)]

								-- end


								-- --replace characters in string based on a function f(s, i, j, ...) -> replacement_string | nil

								-- function utf8.replace(s, f, ...)

								-- 	if type(f) == 'table' then

								-- 		return utf8.replace(s, table_lookup, f)

								-- 	end

								-- 	if s == '' then

								-- 		return s

								-- 	end

								-- 	local t = {}

								-- 	local lasti = 1

								-- 	for i in utf8.byte_indices(s) do

								-- 		local nexti = utf8.next(s, i) or #s + 1

								-- 		local repl = f(s, i, nexti - 1, ...)

								-- 		if repl then

								-- 			table.insert(t, s:sub(lasti, i - 1))

								-- 			table.insert(t, repl)

								-- 			lasti = nexti

								-- 		end

								-- 	end

								-- 	table.insert(t, s:sub(lasti))

								-- 	return table.concat(t)

								-- end


								-- local function replace_invalid(s, i, j, repl_char)

								-- 	if not utf8.isvalid(s, i) then

								-- 		return repl_char

								-- 	end

								-- end


								-- --replace invalid utf8 chars with a replacement char

								-- function utf8.sanitize(s, repl_char)

								-- 	repl_char = repl_char or '�' --\uFFFD

								-- 	return utf8.replace(s, replace_invalid, repl_char)

								-- end


								-- return utf8