forked from izaya/LuPPC
utf8 fixes
This commit is contained in:
parent
d40aab48ea
commit
494c22c408
2
Makefile
2
Makefile
@ -59,4 +59,4 @@ smallclean:
|
||||
|
||||
# Other
|
||||
|
||||
.PHONY: clean cleanresourcues resources build smallclean all
|
||||
.PHONY: clean cleanresourcues resources build smallclean all
|
||||
|
@ -8,8 +8,6 @@ extern char lua_filesystem[];
|
||||
extern char lua_init[];
|
||||
extern char lua_sandbox[];
|
||||
extern char lua_textgpu[];
|
||||
extern char lua_utf8_utf8data[];
|
||||
extern char lua_utf8_utf8[];
|
||||
extern char lua_util_color[];
|
||||
extern char lua_util_random[];
|
||||
#endif
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <fcntl.h>
|
||||
#include <dirent.h>
|
||||
#include <ftw.h>
|
||||
#include <wchar.h>
|
||||
#include <limits.h>
|
||||
#include <linux/kd.h>
|
||||
|
||||
@ -350,6 +351,21 @@ static int l_pull (lua_State *L) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int l_wcwidth (lua_State *L) {
|
||||
lua_pushnumber(L, wcwidth(lua_tonumber(L, 1)));
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int l_towupper (lua_State *L) {
|
||||
lua_pushnumber(L, towupper(lua_tonumber(L, 1)));
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int l_towlower (lua_State *L) {
|
||||
lua_pushnumber(L, towlower(lua_tonumber(L, 1)));
|
||||
return 1;
|
||||
}
|
||||
|
||||
void luanative_start(lua_State *L) {
|
||||
lua_createtable (L, 0, 1);
|
||||
|
||||
@ -371,6 +387,10 @@ void luanative_start(lua_State *L) {
|
||||
pushctuple(L, "fs_size", l_fs_size);
|
||||
pushctuple(L, "fs_read", l_fs_read);
|
||||
|
||||
pushctuple(L, "wcwidth", l_wcwidth);
|
||||
pushctuple(L, "towlower", l_towlower);
|
||||
pushctuple(L, "towupper", l_towupper);
|
||||
|
||||
pushctuple(L, "beep", l_beep);
|
||||
pushctuple(L, "uptime", l_uptime);
|
||||
pushctuple(L, "totalMemory", l_totalMemory);
|
||||
@ -378,4 +398,4 @@ void luanative_start(lua_State *L) {
|
||||
pushctuple(L, "pull", l_pull);
|
||||
|
||||
lua_setglobal(L, "native");
|
||||
}
|
||||
}
|
||||
|
@ -18,10 +18,8 @@ void setup_modules(lua_State *L) {
|
||||
pushstuple(L, "textgpu", lua_textgpu);
|
||||
pushstuple(L, "color", lua_util_color);
|
||||
pushstuple(L, "random", lua_util_random);
|
||||
pushstuple(L, "utf8data", lua_utf8_utf8data);
|
||||
pushstuple(L, "utf8", lua_utf8_utf8);
|
||||
|
||||
pushstuple(L, "eepromDefault", res_eepromDefault);
|
||||
|
||||
lua_setglobal(L, "moduleCode");
|
||||
}
|
||||
}
|
||||
|
@ -43,8 +43,6 @@ function main()
|
||||
--Utils
|
||||
loadModule("random")
|
||||
loadModule("color")
|
||||
loadModule("utf8data")
|
||||
loadModule("utf8")
|
||||
|
||||
modules.address = modules.random.uuid() --TODO: PREALPHA: Make constant
|
||||
|
||||
@ -88,4 +86,4 @@ for k, hook in ipairs(deadhooks) do
|
||||
print("Shutdown hook with following error:")
|
||||
print(cause)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -1,4 +1,14 @@
|
||||
local sandbox
|
||||
|
||||
local l_wlen = function(text)
|
||||
checkArg(1, text, "string")
|
||||
local i = 0
|
||||
for p, c in utf8.codes(text) do
|
||||
i = i + native.wcwidth(c)
|
||||
end
|
||||
return utf8.len(text)
|
||||
end
|
||||
|
||||
sandbox = {
|
||||
assert = assert,
|
||||
dofile = nil,
|
||||
@ -163,20 +173,72 @@ sandbox = {
|
||||
unicode = {
|
||||
char = utf8.char,
|
||||
charWidth = function(c)
|
||||
checkArg(1, c, "string")
|
||||
return modules.utf8.charbytes(c)
|
||||
return l_wlen(c)
|
||||
end,
|
||||
isWide = function(c)
|
||||
checkArg(1, c, "string")
|
||||
return modules.utf8.charbytes(c) > 1
|
||||
return l_wlen(c) > 1
|
||||
end,
|
||||
len = utf8.len,
|
||||
lower = modules.utf8.lower,
|
||||
reverse = modules.utf8.reverse,
|
||||
sub = modules.utf8.sub,
|
||||
upper = modules.utf8.upper,
|
||||
wlen = utf8.len, --How is it different from len?
|
||||
--wtrunc?
|
||||
lower = function(text)
|
||||
checkArg(1, text, "string")
|
||||
local s = ""
|
||||
for p, c in utf8.codes(text) do
|
||||
s = s .. utf8.char(native.towlower(c))
|
||||
end
|
||||
return s
|
||||
end,
|
||||
upper = function(text)
|
||||
checkArg(1, text, "string")
|
||||
local s = ""
|
||||
for p, c in utf8.codes(text) do
|
||||
s = s .. utf8.char(native.towupper(c))
|
||||
end
|
||||
return s
|
||||
end,
|
||||
reverse = function(text)
|
||||
checkArg(1, text, "string")
|
||||
local s = ""
|
||||
for p, c in utf8.codes(text) do
|
||||
s = utf8.char(c) .. s
|
||||
end
|
||||
return s
|
||||
end,
|
||||
sub = function(s, i, j)
|
||||
checkArg(1, s, "string")
|
||||
i = i or 1
|
||||
j = j or math.maxinteger
|
||||
if i<1 or j<1 then
|
||||
local n = utf8.len(s)
|
||||
if not n then return nil end
|
||||
if i<0 then i = n+1+i end
|
||||
if j<0 then j = n+1+j end
|
||||
if i<0 then i = 1 elseif i>n then i = n end
|
||||
if j<0 then j = 1 elseif j>n then j = n end
|
||||
end
|
||||
if j<i then return "" end
|
||||
i = utf8.offset(s,i) or math.maxinteger
|
||||
j = utf8.offset(s,j+1) or math.maxinteger
|
||||
if i and j then return s:sub(i,j-1)
|
||||
elseif i then return s:sub(i)
|
||||
else return ""
|
||||
end
|
||||
end,
|
||||
wlen = l_wlen,
|
||||
wtrunc = function(s, l)
|
||||
checkArg(1, s, "string")
|
||||
checkArg(2, l, "number")
|
||||
local width = 0
|
||||
local text = ""
|
||||
for p, c in utf8.codes(s) do
|
||||
if width < l then
|
||||
width = width + native.wcwidth(c)
|
||||
if width < l then
|
||||
text = text .. utf8.char(c)
|
||||
end
|
||||
end
|
||||
end
|
||||
return text
|
||||
end,
|
||||
},
|
||||
checkArg = checkArg,
|
||||
og = _G
|
||||
|
@ -39,7 +39,7 @@ local function prepareBuffers(w, h)
|
||||
end
|
||||
end
|
||||
|
||||
local usub = modules.utf8.sub
|
||||
local usub = modules.sandbox.utf8.sub
|
||||
local function insertString(main, sub, at)
|
||||
return usub(main, 1, at - 1) .. sub .. usub(main, at + utf8.len(sub))
|
||||
end
|
||||
@ -234,4 +234,4 @@ function textgpu.start()
|
||||
end
|
||||
end
|
||||
|
||||
return textgpu
|
||||
return textgpu
|
||||
|
@ -1,328 +0,0 @@
|
||||
-- $Id: utf8.lua 179 2009-04-03 18:10:03Z pasta $
|
||||
--
|
||||
-- Provides UTF-8 aware string functions implemented in pure lua:
|
||||
-- * string.utf8len(s)
|
||||
-- * string.utf8sub(s, i, j)
|
||||
-- * string.utf8reverse(s)
|
||||
--
|
||||
-- If utf8data.lua (containing the lower<->upper case mappings) is loaded, these
|
||||
-- additional functions are available:
|
||||
-- * string.utf8upper(s)
|
||||
-- * string.utf8lower(s)
|
||||
--
|
||||
-- All functions behave as their non UTF-8 aware counterparts with the exception
|
||||
-- that UTF-8 characters are used instead of bytes for all units.
|
||||
|
||||
--[[
|
||||
Copyright (c) 2006-2007, Kyle Smith
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the author nor the names of its contributors may be
|
||||
used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
--]]
|
||||
|
||||
-- ABNF from RFC 3629
|
||||
--
|
||||
-- UTF8-octets = *( UTF8-char )
|
||||
-- UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
|
||||
-- UTF8-1 = %x00-7F
|
||||
-- UTF8-2 = %xC2-DF UTF8-tail
|
||||
-- UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
|
||||
-- %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
|
||||
-- UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
|
||||
-- %xF4 %x80-8F 2( UTF8-tail )
|
||||
-- UTF8-tail = %x80-BF
|
||||
--
|
||||
|
||||
local strbyte, strlen, strsub, type = string.byte, string.len, string.sub, type
|
||||
local lutf8 = utf8
|
||||
local utf8 = {}
|
||||
|
||||
-- returns the number of bytes used by the UTF-8 character at byte i in s
|
||||
-- also doubles as a UTF-8 character validator
|
||||
local function utf8charbytes(s, i)
|
||||
-- argument defaults
|
||||
i = i or 1
|
||||
|
||||
-- argument checking
|
||||
if type(s) ~= "string" then
|
||||
error("bad argument #1 to 'utf8charbytes' (string expected, got ".. type(s).. ")")
|
||||
end
|
||||
if type(i) ~= "number" then
|
||||
error("bad argument #2 to 'utf8charbytes' (number expected, got ".. type(i).. ")")
|
||||
end
|
||||
|
||||
local c = strbyte(s, i)
|
||||
|
||||
-- determine bytes needed for character, based on RFC 3629
|
||||
-- validate byte 1
|
||||
if c > 0 and c <= 127 then
|
||||
-- UTF8-1
|
||||
return 1
|
||||
|
||||
elseif c >= 194 and c <= 223 then
|
||||
-- UTF8-2
|
||||
local c2 = strbyte(s, i + 1)
|
||||
|
||||
if not c2 then
|
||||
error("UTF-8 string terminated early")
|
||||
end
|
||||
|
||||
-- validate byte 2
|
||||
if c2 < 128 or c2 > 191 then
|
||||
error("Invalid UTF-8 character")
|
||||
end
|
||||
|
||||
return 2
|
||||
|
||||
elseif c >= 224 and c <= 239 then
|
||||
-- UTF8-3
|
||||
local c2 = strbyte(s, i + 1)
|
||||
local c3 = strbyte(s, i + 2)
|
||||
|
||||
if not c2 or not c3 then
|
||||
error("UTF-8 string terminated early")
|
||||
end
|
||||
|
||||
-- validate byte 2
|
||||
if c == 224 and (c2 < 160 or c2 > 191) then
|
||||
error("Invalid UTF-8 character")
|
||||
elseif c == 237 and (c2 < 128 or c2 > 159) then
|
||||
error("Invalid UTF-8 character")
|
||||
elseif c2 < 128 or c2 > 191 then
|
||||
error("Invalid UTF-8 character")
|
||||
end
|
||||
|
||||
-- validate byte 3
|
||||
if c3 < 128 or c3 > 191 then
|
||||
error("Invalid UTF-8 character")
|
||||
end
|
||||
|
||||
return 3
|
||||
|
||||
elseif c >= 240 and c <= 244 then
|
||||
-- UTF8-4
|
||||
local c2 = strbyte(s, i + 1)
|
||||
local c3 = strbyte(s, i + 2)
|
||||
local c4 = strbyte(s, i + 3)
|
||||
|
||||
if not c2 or not c3 or not c4 then
|
||||
error("UTF-8 string terminated early")
|
||||
end
|
||||
|
||||
-- validate byte 2
|
||||
if c == 240 and (c2 < 144 or c2 > 191) then
|
||||
error("Invalid UTF-8 character")
|
||||
elseif c == 244 and (c2 < 128 or c2 > 143) then
|
||||
error("Invalid UTF-8 character")
|
||||
elseif c2 < 128 or c2 > 191 then
|
||||
error("Invalid UTF-8 character")
|
||||
end
|
||||
|
||||
-- validate byte 3
|
||||
if c3 < 128 or c3 > 191 then
|
||||
error("Invalid UTF-8 character")
|
||||
end
|
||||
|
||||
-- validate byte 4
|
||||
if c4 < 128 or c4 > 191 then
|
||||
error("Invalid UTF-8 character")
|
||||
end
|
||||
|
||||
return 4
|
||||
|
||||
else
|
||||
error("Invalid UTF-8 character")
|
||||
end
|
||||
end
|
||||
|
||||
utf8.charbytes = utf8charbytes
|
||||
|
||||
-- returns the number of characters in a UTF-8 string
|
||||
local function utf8len(s)
|
||||
-- argument checking
|
||||
if type(s) ~= "string" then
|
||||
error("bad argument #1 to 'utf8len' (string expected, got ".. type(s).. ")")
|
||||
end
|
||||
|
||||
local pos = 1
|
||||
local bytes = strlen(s)
|
||||
local len = 0
|
||||
|
||||
while pos <= bytes do
|
||||
len = len + 1
|
||||
pos = pos + utf8charbytes(s, pos)
|
||||
end
|
||||
|
||||
return len
|
||||
end
|
||||
|
||||
utf8.len = utf8len
|
||||
|
||||
-- functions identically to string.sub except that i and j are UTF-8 characters
|
||||
-- instead of bytes
|
||||
local function utf8sub(s, i, j)
|
||||
-- argument defaults
|
||||
j = j or -1
|
||||
|
||||
-- argument checking
|
||||
if type(s) ~= "string" then
|
||||
error("bad argument #1 to 'utf8sub' (string expected, got ".. type(s).. ")")
|
||||
end
|
||||
if type(i) ~= "number" then
|
||||
error("bad argument #2 to 'utf8sub' (number expected, got ".. type(i).. ")")
|
||||
end
|
||||
if type(j) ~= "number" then
|
||||
error("bad argument #3 to 'utf8sub' (number expected, got ".. type(j).. ")")
|
||||
end
|
||||
|
||||
local pos = 1
|
||||
local bytes = strlen(s)
|
||||
local len = 0
|
||||
|
||||
-- only set l if i or j is negative
|
||||
local l = (i >= 0 and j >= 0) or utf8len(s)
|
||||
local startChar = (i >= 0) and i or l + i + 1
|
||||
local endChar = (j >= 0) and j or l + j + 1
|
||||
|
||||
-- can't have start before end!
|
||||
if startChar > endChar then
|
||||
return ""
|
||||
end
|
||||
|
||||
-- byte offsets to pass to string.sub
|
||||
local startByte, endByte = 1, bytes
|
||||
|
||||
while pos <= bytes do
|
||||
len = len + 1
|
||||
|
||||
if len == startChar then
|
||||
startByte = pos
|
||||
end
|
||||
|
||||
pos = pos + utf8charbytes(s, pos)
|
||||
|
||||
if len == endChar then
|
||||
endByte = pos - 1
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
return strsub(s, startByte, endByte)
|
||||
end
|
||||
|
||||
function utf8.sub(s,i,j)
|
||||
i = i or 1
|
||||
j = j or math.maxinteger
|
||||
if i<1 or j<1 then
|
||||
local n = lutf8.len(s)
|
||||
if not n then return nil end
|
||||
if i<0 then i = n+1+i end
|
||||
if j<0 then j = n+1+j end
|
||||
if i<0 then i = 1 elseif i>n then i = n end
|
||||
if j<0 then j = 1 elseif j>n then j = n end
|
||||
end
|
||||
if j<i then return "" end
|
||||
i = lutf8.offset(s,i) or math.maxinteger
|
||||
j = lutf8.offset(s,j+1) or math.maxinteger
|
||||
if i and j then return s:sub(i,j-1)
|
||||
elseif i then return s:sub(i)
|
||||
else return ""
|
||||
end
|
||||
end
|
||||
|
||||
--utf8.sub = utf8sub
|
||||
|
||||
-- replace UTF-8 characters based on a mapping table
|
||||
local function utf8replace(s, mapping)
|
||||
-- argument checking
|
||||
if type(s) ~= "string" then
|
||||
error("bad argument #1 to 'utf8replace' (string expected, got ".. type(s).. ")")
|
||||
end
|
||||
if type(mapping) ~= "table" then
|
||||
error("bad argument #2 to 'utf8replace' (table expected, got ".. type(mapping).. ")")
|
||||
end
|
||||
|
||||
local pos = 1
|
||||
local bytes = strlen(s)
|
||||
local charbytes
|
||||
local newstr = ""
|
||||
|
||||
while pos <= bytes do
|
||||
charbytes = utf8charbytes(s, pos)
|
||||
local c = strsub(s, pos, pos + charbytes - 1)
|
||||
|
||||
newstr = newstr .. (mapping[c] or c)
|
||||
|
||||
pos = pos + charbytes
|
||||
end
|
||||
|
||||
return newstr
|
||||
end
|
||||
|
||||
-- identical to string.upper except it knows about unicode simple case conversions
|
||||
local function utf8upper(s)
|
||||
return utf8replace(s, modules.utf8data.lc_uc)
|
||||
end
|
||||
|
||||
utf8.upper = utf8upper
|
||||
|
||||
-- identical to string.lower except it knows about unicode simple case conversions
|
||||
local function utf8lower(s)
|
||||
return utf8replace(s, modules.utf8data.uc_lc)
|
||||
end
|
||||
|
||||
utf8.lower = utf8lower
|
||||
|
||||
-- identical to string.reverse except that it supports UTF-8
|
||||
local function utf8reverse(s)
|
||||
-- argument checking
|
||||
if type(s) ~= "string" then
|
||||
error("bad argument #1 to 'utf8reverse' (string expected, got ".. type(s).. ")")
|
||||
end
|
||||
|
||||
local bytes = strlen(s)
|
||||
local pos = bytes
|
||||
local charbytes
|
||||
local newstr = ""
|
||||
|
||||
while pos > 0 do
|
||||
c = strbyte(s, pos)
|
||||
while c >= 128 and c <= 191 do
|
||||
pos = pos - 1
|
||||
c = strbyte(pos)
|
||||
end
|
||||
|
||||
charbytes = utf8charbytes(s, pos)
|
||||
|
||||
newstr = newstr .. strsub(s, pos, pos + charbytes - 1)
|
||||
|
||||
pos = pos - 1
|
||||
end
|
||||
|
||||
return newstr
|
||||
end
|
||||
|
||||
utf8.reverse = utf8reverse
|
||||
|
||||
return utf8
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user