6 changed files with 2209 additions and 2174 deletions
@ -0,0 +1,3 @@ |
|||
*.lua -crlf |
|||
*.h -crlf |
|||
*.c -crlf |
@ -1,307 +1,328 @@ |
|||
-- $Id: utf8.lua 179 2009-04-03 18:10:03Z pasta $ |
|||
-- |
|||
-- Provides UTF-8 aware string functions implemented in pure lua: |
|||
-- * string.utf8len(s) |
|||
-- * string.utf8sub(s, i, j) |
|||
-- * string.utf8reverse(s) |
|||
-- |
|||
-- If utf8data.lua (containing the lower<->upper case mappings) is loaded, these |
|||
-- additional functions are available: |
|||
-- * string.utf8upper(s) |
|||
-- * string.utf8lower(s) |
|||
-- |
|||
-- All functions behave as their non UTF-8 aware counterparts with the exception |
|||
-- that UTF-8 characters are used instead of bytes for all units. |
|||
|
|||
--[[ |
|||
Copyright (c) 2006-2007, Kyle Smith |
|||
All rights reserved. |
|||
|
|||
Redistribution and use in source and binary forms, with or without |
|||
modification, are permitted provided that the following conditions are met: |
|||
|
|||
* Redistributions of source code must retain the above copyright notice, |
|||
this list of conditions and the following disclaimer. |
|||
* Redistributions in binary form must reproduce the above copyright |
|||
notice, this list of conditions and the following disclaimer in the |
|||
documentation and/or other materials provided with the distribution. |
|||
* Neither the name of the author nor the names of its contributors may be |
|||
used to endorse or promote products derived from this software without |
|||
specific prior written permission. |
|||
|
|||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE |
|||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
|||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
|||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
|||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|||
--]] |
|||
|
|||
-- ABNF from RFC 3629 |
|||
-- |
|||
-- UTF8-octets = *( UTF8-char ) |
|||
-- UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 |
|||
-- UTF8-1 = %x00-7F |
|||
-- UTF8-2 = %xC2-DF UTF8-tail |
|||
-- UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / |
|||
-- %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) |
|||
-- UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / |
|||
-- %xF4 %x80-8F 2( UTF8-tail ) |
|||
-- UTF8-tail = %x80-BF |
|||
-- |
|||
|
|||
local strbyte, strlen, strsub, type = string.byte, string.len, string.sub, type |
|||
local utf8 = {} |
|||
|
|||
-- returns the number of bytes used by the UTF-8 character at byte i in s |
|||
-- also doubles as a UTF-8 character validator |
|||
local function utf8charbytes(s, i) |
|||
-- argument defaults |
|||
i = i or 1 |
|||
|
|||
-- argument checking |
|||
if type(s) ~= "string" then |
|||
error("bad argument #1 to 'utf8charbytes' (string expected, got ".. type(s).. ")") |
|||
end |
|||
if type(i) ~= "number" then |
|||
error("bad argument #2 to 'utf8charbytes' (number expected, got ".. type(i).. ")") |
|||
end |
|||
|
|||
local c = strbyte(s, i) |
|||
|
|||
-- determine bytes needed for character, based on RFC 3629 |
|||
-- validate byte 1 |
|||
if c > 0 and c <= 127 then |
|||
-- UTF8-1 |
|||
return 1 |
|||
|
|||
elseif c >= 194 and c <= 223 then |
|||
-- UTF8-2 |
|||
local c2 = strbyte(s, i + 1) |
|||
|
|||
if not c2 then |
|||
error("UTF-8 string terminated early") |
|||
end |
|||
|
|||
-- validate byte 2 |
|||
if c2 < 128 or c2 > 191 then |
|||
error("Invalid UTF-8 character") |
|||
end |
|||
|
|||
return 2 |
|||
|
|||
elseif c >= 224 and c <= 239 then |
|||
-- UTF8-3 |
|||
local c2 = strbyte(s, i + 1) |
|||
local c3 = strbyte(s, i + 2) |
|||
|
|||
if not c2 or not c3 then |
|||
error("UTF-8 string terminated early") |
|||
end |
|||
|
|||
-- validate byte 2 |
|||
if c == 224 and (c2 < 160 or c2 > 191) then |
|||
error("Invalid UTF-8 character") |
|||
elseif c == 237 and (c2 < 128 or c2 > 159) then |
|||
error("Invalid UTF-8 character") |
|||
elseif c2 < 128 or c2 > 191 then |
|||
error("Invalid UTF-8 character") |
|||
end |
|||
|
|||
-- validate byte 3 |
|||
if c3 < 128 or c3 > 191 then |
|||
error("Invalid UTF-8 character") |
|||
end |
|||
|
|||
return 3 |
|||
|
|||
elseif c >= 240 and c <= 244 then |
|||
-- UTF8-4 |
|||
local c2 = strbyte(s, i + 1) |
|||
local c3 = strbyte(s, i + 2) |
|||
local c4 = strbyte(s, i + 3) |
|||
|
|||
if not c2 or not c3 or not c4 then |
|||
error("UTF-8 string terminated early") |
|||
end |
|||
|
|||
-- validate byte 2 |
|||
if c == 240 and (c2 < 144 or c2 > 191) then |
|||
error("Invalid UTF-8 character") |
|||
elseif c == 244 and (c2 < 128 or c2 > 143) then |
|||
error("Invalid UTF-8 character") |
|||
elseif c2 < 128 or c2 > 191 then |
|||
error("Invalid UTF-8 character") |
|||
end |
|||
|
|||
-- validate byte 3 |
|||
if c3 < 128 or c3 > 191 then |
|||
error("Invalid UTF-8 character") |
|||
end |
|||
|
|||
-- validate byte 4 |
|||
if c4 < 128 or c4 > 191 then |
|||
error("Invalid UTF-8 character") |
|||
end |
|||
|
|||
return 4 |
|||
|
|||
else |
|||
error("Invalid UTF-8 character") |
|||
end |
|||
end |
|||
|
|||
utf8.charbytes = utf8charbytes |
|||
|
|||
-- returns the number of characters in a UTF-8 string |
|||
local function utf8len(s) |
|||
-- argument checking |
|||
if type(s) ~= "string" then |
|||
error("bad argument #1 to 'utf8len' (string expected, got ".. type(s).. ")") |
|||
end |
|||
|
|||
local pos = 1 |
|||
local bytes = strlen(s) |
|||
local len = 0 |
|||
|
|||
while pos <= bytes do |
|||
len = len + 1 |
|||
pos = pos + utf8charbytes(s, pos) |
|||
end |
|||
|
|||
return len |
|||
end |
|||
|
|||
utf8.len = utf8len |
|||
|
|||
-- functions identically to string.sub except that i and j are UTF-8 characters |
|||
-- instead of bytes |
|||
local function utf8sub(s, i, j) |
|||
-- argument defaults |
|||
j = j or -1 |
|||
|
|||
-- argument checking |
|||
if type(s) ~= "string" then |
|||
error("bad argument #1 to 'utf8sub' (string expected, got ".. type(s).. ")") |
|||
end |
|||
if type(i) ~= "number" then |
|||
error("bad argument #2 to 'utf8sub' (number expected, got ".. type(i).. ")") |
|||
end |
|||
if type(j) ~= "number" then |
|||
error("bad argument #3 to 'utf8sub' (number expected, got ".. type(j).. ")") |
|||
end |
|||
|
|||
local pos = 1 |
|||
local bytes = strlen(s) |
|||
local len = 0 |
|||
|
|||
-- only set l if i or j is negative |
|||
local l = (i >= 0 and j >= 0) or utf8len(s) |
|||
local startChar = (i >= 0) and i or l + i + 1 |
|||
local endChar = (j >= 0) and j or l + j + 1 |
|||
|
|||
-- can't have start before end! |
|||
if startChar > endChar then |
|||
return "" |
|||
end |
|||
|
|||
-- byte offsets to pass to string.sub |
|||
local startByte, endByte = 1, bytes |
|||
|
|||
while pos <= bytes do |
|||
len = len + 1 |
|||
|
|||
if len == startChar then |
|||
startByte = pos |
|||
end |
|||
|
|||
pos = pos + utf8charbytes(s, pos) |
|||
|
|||
if len == endChar then |
|||
endByte = pos - 1 |
|||
break |
|||
end |
|||
end |
|||
|
|||
return strsub(s, startByte, endByte) |
|||
end |
|||
|
|||
utf8.sub = utf8sub |
|||
|
|||
-- replace UTF-8 characters based on a mapping table |
|||
local function utf8replace(s, mapping) |
|||
-- argument checking |
|||
if type(s) ~= "string" then |
|||
error("bad argument #1 to 'utf8replace' (string expected, got ".. type(s).. ")") |
|||
end |
|||
if type(mapping) ~= "table" then |
|||
error("bad argument #2 to 'utf8replace' (table expected, got ".. type(mapping).. ")") |
|||
end |
|||
|
|||
local pos = 1 |
|||
local bytes = strlen(s) |
|||
local charbytes |
|||
local newstr = "" |
|||
|
|||
while pos <= bytes do |
|||
charbytes = utf8charbytes(s, pos) |
|||
local c = strsub(s, pos, pos + charbytes - 1) |
|||
|
|||
newstr = newstr .. (mapping[c] or c) |
|||
|
|||
pos = pos + charbytes |
|||
end |
|||
|
|||
return newstr |
|||
end |
|||
|
|||
-- identical to string.upper except it knows about unicode simple case conversions |
|||
local function utf8upper(s) |
|||
return utf8replace(s, modules.utf8data.lc_uc) |
|||
end |
|||
|
|||
utf8.upper = utf8upper |
|||
|
|||
-- identical to string.lower except it knows about unicode simple case conversions |
|||
local function utf8lower(s) |
|||
return utf8replace(s, modules.utf8data.uc_lc) |
|||
end |
|||
|
|||
utf8.lower = utf8lower |
|||
|
|||
-- identical to string.reverse except that it supports UTF-8 |
|||
local function utf8reverse(s) |
|||
-- argument checking |
|||
if type(s) ~= "string" then |
|||
error("bad argument #1 to 'utf8reverse' (string expected, got ".. type(s).. ")") |
|||
end |
|||
|
|||
local bytes = strlen(s) |
|||
local pos = bytes |
|||
local charbytes |
|||
local newstr = "" |
|||
|
|||
while pos > 0 do |
|||
c = strbyte(s, pos) |
|||
while c >= 128 and c <= 191 do |
|||
pos = pos - 1 |
|||
c = strbyte(pos) |
|||
end |
|||
|
|||
charbytes = utf8charbytes(s, pos) |
|||
|
|||
newstr = newstr .. strsub(s, pos, pos + charbytes - 1) |
|||
|
|||
pos = pos - 1 |
|||
end |
|||
|
|||
return newstr |
|||
end |
|||
|
|||
utf8.reverse = utf8reverse |
|||
|
|||
return utf8 |
|||
-- $Id: utf8.lua 179 2009-04-03 18:10:03Z pasta $ |
|||
-- |
|||
-- Provides UTF-8 aware string functions implemented in pure lua: |
|||
-- * string.utf8len(s) |
|||
-- * string.utf8sub(s, i, j) |
|||
-- * string.utf8reverse(s) |
|||
-- |
|||
-- If utf8data.lua (containing the lower<->upper case mappings) is loaded, these |
|||
-- additional functions are available: |
|||
-- * string.utf8upper(s) |
|||
-- * string.utf8lower(s) |
|||
-- |
|||
-- All functions behave as their non UTF-8 aware counterparts with the exception |
|||
-- that UTF-8 characters are used instead of bytes for all units. |
|||
|
|||
--[[ |
|||
Copyright (c) 2006-2007, Kyle Smith |
|||
All rights reserved. |
|||
|
|||
Redistribution and use in source and binary forms, with or without |
|||
modification, are permitted provided that the following conditions are met: |
|||
|
|||
* Redistributions of source code must retain the above copyright notice, |
|||
this list of conditions and the following disclaimer. |
|||
* Redistributions in binary form must reproduce the above copyright |
|||
notice, this list of conditions and the following disclaimer in the |
|||
documentation and/or other materials provided with the distribution. |
|||
* Neither the name of the author nor the names of its contributors may be |
|||
used to endorse or promote products derived from this software without |
|||
specific prior written permission. |
|||
|
|||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE |
|||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
|||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
|||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
|||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|||
--]] |
|||
|
|||
-- ABNF from RFC 3629 |
|||
-- |
|||
-- UTF8-octets = *( UTF8-char ) |
|||
-- UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 |
|||
-- UTF8-1 = %x00-7F |
|||
-- UTF8-2 = %xC2-DF UTF8-tail |
|||
-- UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / |
|||
-- %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) |
|||
-- UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / |
|||
-- %xF4 %x80-8F 2( UTF8-tail ) |
|||
-- UTF8-tail = %x80-BF |
|||
-- |
|||
|
|||
local strbyte, strlen, strsub, type = string.byte, string.len, string.sub, type |
|||
local lutf8 = utf8 |
|||
local utf8 = {} |
|||
|
|||
-- returns the number of bytes used by the UTF-8 character at byte i in s |
|||
-- also doubles as a UTF-8 character validator |
|||
local function utf8charbytes(s, i) |
|||
-- argument defaults |
|||
i = i or 1 |
|||
|
|||
-- argument checking |
|||
if type(s) ~= "string" then |
|||
error("bad argument #1 to 'utf8charbytes' (string expected, got ".. type(s).. ")") |
|||
end |
|||
if type(i) ~= "number" then |
|||
error("bad argument #2 to 'utf8charbytes' (number expected, got ".. type(i).. ")") |
|||
end |
|||
|
|||
local c = strbyte(s, i) |
|||
|
|||
-- determine bytes needed for character, based on RFC 3629 |
|||
-- validate byte 1 |
|||
if c > 0 and c <= 127 then |
|||
-- UTF8-1 |
|||
return 1 |
|||
|
|||
elseif c >= 194 and c <= 223 then |
|||
-- UTF8-2 |
|||
local c2 = strbyte(s, i + 1) |
|||
|
|||
if not c2 then |
|||
error("UTF-8 string terminated early") |
|||
end |
|||
|
|||
-- validate byte 2 |
|||
if c2 < 128 or c2 > 191 then |
|||
error("Invalid UTF-8 character") |
|||
end |
|||
|
|||
return 2 |
|||
|
|||
elseif c >= 224 and c <= 239 then |
|||
-- UTF8-3 |
|||
local c2 = strbyte(s, i + 1) |
|||
local c3 = strbyte(s, i + 2) |
|||
|
|||
if not c2 or not c3 then |
|||
error("UTF-8 string terminated early") |
|||
end |
|||
|
|||
-- validate byte 2 |
|||
if c == 224 and (c2 < 160 or c2 > 191) then |
|||
error("Invalid UTF-8 character") |
|||
elseif c == 237 and (c2 < 128 or c2 > 159) then |
|||
error("Invalid UTF-8 character") |
|||
elseif c2 < 128 or c2 > 191 then |
|||
error("Invalid UTF-8 character") |
|||
end |
|||
|
|||
-- validate byte 3 |
|||
if c3 < 128 or c3 > 191 then |
|||
error("Invalid UTF-8 character") |
|||
end |
|||
|
|||
return 3 |
|||
|
|||
elseif c >= 240 and c <= 244 then |
|||
-- UTF8-4 |
|||
local c2 = strbyte(s, i + 1) |
|||
local c3 = strbyte(s, i + 2) |
|||
local c4 = strbyte(s, i + 3) |
|||
|
|||
if not c2 or not c3 or not c4 then |
|||
error("UTF-8 string terminated early") |
|||
end |
|||
|
|||
-- validate byte 2 |
|||
if c == 240 and (c2 < 144 or c2 > 191) then |
|||
error("Invalid UTF-8 character") |
|||
elseif c == 244 and (c2 < 128 or c2 > 143) then |
|||
error("Invalid UTF-8 character") |
|||
elseif c2 < 128 or c2 > 191 then |
|||
error("Invalid UTF-8 character") |
|||
end |
|||
|
|||
-- validate byte 3 |
|||
if c3 < 128 or c3 > 191 then |
|||
error("Invalid UTF-8 character") |
|||
end |
|||
|
|||
-- validate byte 4 |
|||
if c4 < 128 or c4 > 191 then |
|||
error("Invalid UTF-8 character") |
|||
end |
|||
|
|||
return 4 |
|||
|
|||
else |
|||
error("Invalid UTF-8 character") |
|||
end |
|||
end |
|||
|
|||
utf8.charbytes = utf8charbytes |
|||
|
|||
-- returns the number of characters in a UTF-8 string |
|||
local function utf8len(s) |
|||
-- argument checking |
|||
if type(s) ~= "string" then |
|||
error("bad argument #1 to 'utf8len' (string expected, got ".. type(s).. ")") |
|||
end |
|||
|
|||
local pos = 1 |
|||
local bytes = strlen(s) |
|||
local len = 0 |
|||
|
|||
while pos <= bytes do |
|||
len = len + 1 |
|||
pos = pos + utf8charbytes(s, pos) |
|||
end |
|||
|
|||
return len |
|||
end |
|||
|
|||
utf8.len = utf8len |
|||
|
|||
-- functions identically to string.sub except that i and j are UTF-8 characters |
|||
-- instead of bytes |
|||
local function utf8sub(s, i, j) |
|||
-- argument defaults |
|||
j = j or -1 |
|||
|
|||
-- argument checking |
|||
if type(s) ~= "string" then |
|||
error("bad argument #1 to 'utf8sub' (string expected, got ".. type(s).. ")") |
|||
end |
|||
if type(i) ~= "number" then |
|||
error("bad argument #2 to 'utf8sub' (number expected, got ".. type(i).. ")") |
|||
end |
|||
if type(j) ~= "number" then |
|||
error("bad argument #3 to 'utf8sub' (number expected, got ".. type(j).. ")") |
|||
end |
|||
|
|||
local pos = 1 |
|||
local bytes = strlen(s) |
|||
local len = 0 |
|||
|
|||
-- only set l if i or j is negative |
|||
local l = (i >= 0 and j >= 0) or utf8len(s) |
|||
local startChar = (i >= 0) and i or l + i + 1 |
|||
local endChar = (j >= 0) and j or l + j + 1 |
|||
|
|||
-- can't have start before end! |
|||
if startChar > endChar then |
|||
return "" |
|||
end |
|||
|
|||
-- byte offsets to pass to string.sub |
|||
local startByte, endByte = 1, bytes |
|||
|
|||
while pos <= bytes do |
|||
len = len + 1 |
|||
|
|||
if len == startChar then |
|||
startByte = pos |
|||
end |
|||
|
|||
pos = pos + utf8charbytes(s, pos) |
|||
|
|||
if len == endChar then |
|||
endByte = pos - 1 |
|||
break |
|||
end |
|||
end |
|||
|
|||
return strsub(s, startByte, endByte) |
|||
end |
|||
|
|||
function utf8.sub(s,i,j) |
|||
i = i or 1 |
|||
j = j or math.maxinteger |
|||
if i<1 or j<1 then |
|||
local n = lutf8.len(s) |
|||
if not n then return nil end |
|||
if i<0 then i = n+1+i end |
|||
if j<0 then j = n+1+j end |
|||
if i<0 then i = 1 elseif i>n then i = n end |
|||
if j<0 then j = 1 elseif j>n then j = n end |
|||
end |
|||
if j<i then return "" end |
|||
i = lutf8.offset(s,i) or math.maxinteger |
|||
j = lutf8.offset(s,j+1) or math.maxinteger |
|||
if i and j then return s:sub(i,j-1) |
|||
elseif i then return s:sub(i) |
|||
else return "" |
|||
end |
|||
end |
|||
|
|||
--utf8.sub = utf8sub |
|||
|
|||
-- replace UTF-8 characters based on a mapping table |
|||
local function utf8replace(s, mapping) |
|||
-- argument checking |
|||
if type(s) ~= "string" then |
|||
error("bad argument #1 to 'utf8replace' (string expected, got ".. type(s).. ")") |
|||
end |
|||
if type(mapping) ~= "table" then |
|||
error("bad argument #2 to 'utf8replace' (table expected, got ".. type(mapping).. ")") |
|||
end |
|||
|
|||
local pos = 1 |
|||
local bytes = strlen(s) |
|||
local charbytes |
|||
local newstr = "" |
|||
|
|||
while pos <= bytes do |
|||
charbytes = utf8charbytes(s, pos) |
|||
local c = strsub(s, pos, pos + charbytes - 1) |
|||
|
|||
newstr = newstr .. (mapping[c] or c) |
|||
|
|||
pos = pos + charbytes |
|||
end |
|||
|
|||
return newstr |
|||
end |
|||
|
|||
-- identical to string.upper except it knows about unicode simple case conversions |
|||
local function utf8upper(s) |
|||
return utf8replace(s, modules.utf8data.lc_uc) |
|||
end |
|||
|
|||
utf8.upper = utf8upper |
|||
|
|||
-- identical to string.lower except it knows about unicode simple case conversions |
|||
local function utf8lower(s) |
|||
return utf8replace(s, modules.utf8data.uc_lc) |
|||
end |
|||
|
|||
utf8.lower = utf8lower |
|||
|
|||
-- identical to string.reverse except that it supports UTF-8 |
|||
local function utf8reverse(s) |
|||
-- argument checking |
|||
if type(s) ~= "string" then |
|||
error("bad argument #1 to 'utf8reverse' (string expected, got ".. type(s).. ")") |
|||
end |
|||
|
|||
local bytes = strlen(s) |
|||
local pos = bytes |
|||
local charbytes |
|||
local newstr = "" |
|||
|
|||
while pos > 0 do |
|||
c = strbyte(s, pos) |
|||
while c >= 128 and c <= 191 do |
|||
pos = pos - 1 |
|||
c = strbyte(pos) |
|||
end |
|||
|
|||
charbytes = utf8charbytes(s, pos) |
|||
|
|||
newstr = newstr .. strsub(s, pos, pos + charbytes - 1) |
|||
|
|||
pos = pos - 1 |
|||
end |
|||
|
|||
return newstr |
|||
end |
|||
|
|||
utf8.reverse = utf8reverse |
|||
|
|||
return utf8 |
|||
|
File diff suppressed because it is too large
Loading…
issues.context.reference_issue