Finish up (maybe) the new compression scheme.

This commit is contained in:
20kdc 2020-03-31 17:52:24 +01:00
parent 151097cdce
commit aef0043d4a
14 changed files with 338 additions and 176 deletions

View File

@ -41,8 +41,9 @@ for i = 0, 127 do
end
end
local function preproc(blk, p)
local function preprocWithPadding(blk, p)
local out = ""
local needsPadding = false
while blk ~= "" do
p(blk)
local len = math.min(preprocMaxLen, #blk)
@ -50,6 +51,7 @@ local function preproc(blk, p)
local seg = blk:sub(1, len)
if preprocParts[seg] then
out = out .. preprocParts[seg]
needsPadding = #preprocParts[seg] < 2
blk = blk:sub(#seg + 1)
break
end
@ -57,68 +59,27 @@ local function preproc(blk, p)
end
assert(len ~= 0)
end
return out
end
-- BDIVIDE r5 edition
-- Algorithm simplified for smaller implementation and potentially better compression
-- format:
-- 0-127 for constants
-- <128 + (length - 4)>, <position high>, <position low>
-- Position is where in the window it was found, minus 1.
-- windowSize must be the same between the encoder and decoder,
-- and is the amount of data preserved after cropping.
local function bdivide(blk, p)
local out = ""
local windowSize = 0x10000
local windowData = ("\x00"):rep(windowSize)
while blk ~= "" do
p(blk)
local bestData = blk:sub(1, 1)
local bestRes = bestData
for lm = 0, 127 do
local al = lm + 4
local pfx = blk:sub(1, al)
if #pfx ~= al then
break
end
local p = windowData:find(pfx, 1, true)
if not p then
break
end
local pm = p - 1
local thirdByte = pm % 256
-- anti ']'-corruption helper
if thirdByte ~= 93 then
bestData = string.char(128 + lm, math.floor(pm / 256), thirdByte)
bestRes = pfx
end
end
-- ok, encode!
out = out .. bestData
-- crop window
windowData = (windowData .. bestRes):sub(-windowSize)
blk = blk:sub(#bestRes + 1)
-- This needsPadding bit is just sort of quickly added in
-- to keep this part properly maintained
-- even though it might never get used
if needsPadding then
return out .. "\x00"
end
return out
end
local bdCore = require("bdivide.core")
return function (data, lexCrunch)
io.stderr:write("preproc: ")
local pi = frw.progress()
local function p(b)
pi(1 - (#b / #data))
end
data = preproc(data, p)
data = preprocWithPadding(data, p)
io.stderr:write("\nbdivide: ")
pi = frw.progress()
data = bdivide(data, p)
data = bdCore.bdividePad(bdCore.bdivide(data, p))
io.stderr:write("\n")
-- These are used to pad the stream to flush the pipeline.
-- It's cheaper than the required code.
-- 1 byte of buffer for preproc,
-- 2 bytes of buffer for bdivide.
return lexCrunch.process(frw.read("bdivide/instdeco.lua"), {}), data .. ("\x00"):rep(3)
return lexCrunch.process(frw.read("bdivide/instdeco.lua"), {}), data
end

72
inst/bdivide/core.lua Normal file
View File

@ -0,0 +1,72 @@
-- This is released into the public domain.
-- No warranty is provided, implied or otherwise.
-- BDIVIDE r5 edition
-- Algorithm simplified for smaller implementation and potentially better compression
-- format:
-- 0-127 for constants
-- <128 + (length - 4)>, <position high>, <position low>
-- Position is where in the window it was found, minus 1.
-- windowSize must be the same between the encoder and decoder,
-- and is the amount of data preserved after cropping.
local bdivCore = {}
function bdivCore.bdivide(blk, p)
local out = ""
local windowSize = 0x10000
local windowData = ("\x00"):rep(windowSize)
while blk ~= "" do
p(blk)
local bestData = blk:sub(1, 1)
assert(blk:byte() < 128, "BDIVIDE does not handle 8-bit data")
local bestRes = bestData
for lm = 0, 127 do
local al = lm + 4
local pfx = blk:sub(1, al)
if #pfx ~= al then
break
end
local p = windowData:find(pfx, 1, true)
if not p then
break
end
local pm = p - 1
local thirdByte = pm % 256
bestData = string.char(128 + lm, math.floor(pm / 256), thirdByte)
bestRes = pfx
end
-- ok, encode!
out = out .. bestData
-- crop window
windowData = (windowData .. bestRes):sub(-windowSize)
blk = blk:sub(#bestRes + 1)
end
return out
end
-- Adds padding if required
function bdivCore.bdividePad(data)
local i = 1
-- Basically, if it ends on a literal,
-- then the literal won't get read without two padding bytes.
-- Otherwise (including if no data) it's fine.
local needsPadding = false
while i <= #data do
if data:byte(i) > 127 then
i = i + 3
needsPadding = false
else
i = i + 1
needsPadding = true
end
end
if needsPadding then
return data .. "\x00\x00"
end
return data
end
return bdivCore

View File

@ -34,29 +34,31 @@ function $bdPP(x, y)
return string.char(("enart"):byte(x % 5 + 1), ("ndtelh"):byte((x - x % 5) / 5 + 1)), 2
end
function $engineInput($a0)
$bdBDBuffer = $bdBDBuffer .. $a0
${
function $engineInput($L|lData)
$bdBDBuffer = $bdBDBuffer .. $lData
while #$bdBDBuffer > 2 do
$a0 = $bdBDBuffer:byte()
if $a0 < 128 then
$a0 = $bdBDBuffer:sub(1, 1)
$lData = $bdBDBuffer:byte()
if $lData < 128 then
$lData = $bdBDBuffer:sub(1, 1)
$bdBDBuffer = $bdBDBuffer:sub(2)
else
$NT|bdBDPtr
$bdBDPtr = $bdBDBuffer:byte(2) * 256 + $bdBDBuffer:byte(3) + 1
$a0 = $bdBDWindow:sub($bdBDPtr, $bdBDPtr + $a0 - 125)
${
$L|bdBDPtr = $bdBDBuffer:byte(2) * 256 + $bdBDBuffer:byte(3) + 1
$lData = $bdBDWindow:sub($bdBDPtr, $bdBDPtr + $lData - 125)
$bdBDBuffer = $bdBDBuffer:sub(4)
$DT|bdBDPtr
$}
end
$bdPPBuffer = $bdPPBuffer .. $a0
$bdBDWindow = ($bdBDWindow .. $a0):sub(-2^16)
$bdPPBuffer = $bdPPBuffer .. $lData
$bdBDWindow = ($bdBDWindow .. $lData):sub(-2^16)
while #$bdPPBuffer > 1 do
$NT|bdPPAdv
$a0, $bdPPAdv = $bdPP($bdPPBuffer:byte(), $bdPPBuffer:byte(2))
${
$lData, $L|bdPPAdv = $bdPP($bdPPBuffer:byte(), $bdPPBuffer:byte(2))
$bdPPBuffer = $bdPPBuffer:sub($bdPPAdv)
$DT|bdPPAdv
$engineOutput($a0)
$}
$engineOutput($lData)
end
end
end
$}

18
inst/bdvlite/compress.lua Normal file
View File

@ -0,0 +1,18 @@
-- This is released into the public domain.
-- No warranty is provided, implied or otherwise.
local frw = require("libs.frw")
local bdCore = require("bdivide.core")
return function (data, lexCrunch)
io.stderr:write("\nbdivide: ")
local pi = frw.progress()
local function p(b)
pi(1 - (#b / #data))
end
data = bdCore.bdividePad(bdCore.bdivide(data, p))
io.stderr:write("\n")
return lexCrunch.process(frw.read("bdvlite/instdeco.lua"), {}), data
end

72
inst/bdvlite/core.lua Normal file
View File

@ -0,0 +1,72 @@
-- This is released into the public domain.
-- No warranty is provided, implied or otherwise.
-- BDIVIDE r5 edition
-- Algorithm simplified for smaller implementation and potentially better compression
-- format:
-- 0-127 for constants
-- <128 + (length - 4)>, <position high>, <position low>
-- Position is where in the window it was found, minus 1.
-- windowSize must be the same between the encoder and decoder,
-- and is the amount of data preserved after cropping.
local bdivCore = {}
function bdivCore.bdivide(blk, p)
local out = ""
local windowSize = 0x10000
local windowData = ("\x00"):rep(windowSize)
while blk ~= "" do
p(blk)
local bestData = blk:sub(1, 1)
assert(blk:byte() < 128, "BDIVIDE does not handle 8-bit data")
local bestRes = bestData
for lm = 0, 127 do
local al = lm + 4
local pfx = blk:sub(1, al)
if #pfx ~= al then
break
end
local p = windowData:find(pfx, 1, true)
if not p then
break
end
local pm = p - 1
local thirdByte = pm % 256
bestData = string.char(128 + lm, math.floor(pm / 256), thirdByte)
bestRes = pfx
end
-- ok, encode!
out = out .. bestData
-- crop window
windowData = (windowData .. bestRes):sub(-windowSize)
blk = blk:sub(#bestRes + 1)
end
return out
end
-- Adds padding if required
function bdivCore.bdividePad(data)
local i = 1
-- Basically, if it ends on a literal,
-- then the literal won't get read without two padding bytes.
-- Otherwise (including if no data) it's fine.
local needsPadding = false
while i <= #data do
if data:byte(i) > 127 then
i = i + 3
needsPadding = false
else
i = i + 1
needsPadding = true
end
end
if needsPadding then
return data .. "\x00\x00"
end
return data
end
return bdivCore

30
inst/bdvlite/instdeco.lua Normal file
View File

@ -0,0 +1,30 @@
-- This is released into the public domain.
-- No warranty is provided, implied or otherwise.
-- BDIVIDE (r5 edition)
-- decompression engine used to decompress DEFLATE decompression engine
$bdBDBuffer = ""
$bdBDWindow = ("\x00"):rep(2^16)
${
function $engineInput($L|lData)
$bdBDBuffer = $bdBDBuffer .. $lData
while #$bdBDBuffer > 2 do
$lData = $bdBDBuffer:byte()
if $lData < 128 then
$lData = $bdBDBuffer:sub(1, 1)
$bdBDBuffer = $bdBDBuffer:sub(2)
else
${
$L|bdBDPtr = $bdBDBuffer:byte(2) * 256 + $bdBDBuffer:byte(3) + 1
$lData = $bdBDWindow:sub($bdBDPtr, $bdBDPtr + $lData - 125)
$bdBDBuffer = $bdBDBuffer:sub(4)
$}
end
$bdBDWindow = ($bdBDWindow .. $lData):sub(-2^16)
$engineOutput($lData)
end
end
$}

View File

@ -2,46 +2,74 @@
-- No warranty is provided, implied or otherwise.
-- KittenOS NEO Installer Generator --
local alg, tarName, cid = ...
cid = (cid or "UNKNOWN"):sub(1, 7)
local args = {...}
local cid = args[1]
local tarName = args[2]
local algorithmsInReverseOrder = {}
for i = 3, #args do
table.insert(algorithmsInReverseOrder, 1, args[i])
end
local u = require("libs.frw")
local algImpl = require(alg .. ".compress")
local instSize = 0
local function put(data)
io.write(data)
instSize = instSize + #data
end
-- TAR File --
local tarData = u.read(tarName)
local tarSectors = math.floor(#tarData / 512)
-- Installer Lexcrunch Context --
local lexCrunch = require("libs.lexcrunch")()
local installerCore = lexCrunch.process(u.read("instcore.lua"), {["$$SECTORS"] = tostring(tarSectors)})
local installerHead = lexCrunch.process(u.read("insthead.lua"), {["$$CORESIZE"] = tostring(#installerCore)})
local installerTail = lexCrunch.process(u.read("insttail.lua"), {})
-- Installer Core --
-- installerFinalized:
-- Stuff that's already finished and put at the end of RISM. Prepend to this.
-- installerPayload / installerProgramLength:
-- The next-outer chunk that hasn't been written to the end of RISM
-- as the compression scheme (if one) has not been applied yet.
-- Really, installerProgramLength is only necessary because of the innermost chunk,
-- as that chunk has the TAR; additional data that's part of the same effective compression block,
-- but requires the program length to avoid it.
local installerPayload
local installerProgramLength
local installerFinalized = ""
do
local tarData = u.read(tarName)
local tarSectors = math.floor(#tarData / 512)
local installerCore = lexCrunch.process(u.read("instcore.lua"), {["$$SECTORS"] = tostring(tarSectors)})
installerPayload = installerCore .. tarData
installerProgramLength = #installerCore
end
-- Installer Compression --
local rawData = installerCore .. tarData
io.stderr:write("compressing...\n")
local compressionEngine, compressedData = algImpl(rawData, lexCrunch)
-- RISM [[
compressedData = compressedData:gsub("\xFE", "\xFE\xFE")
compressedData = compressedData:gsub("]]", "]\xFE]")
compressedData = "\x00" .. compressedData
-- ]]
io.stderr:write("compression with " .. alg .. ": " .. #rawData .. " -> " .. #compressedData .. "\n")
for _, v in ipairs(algorithmsInReverseOrder) do
io.stderr:write("compressing (" .. v .. ")\n")
local algImpl = require(v .. ".compress")
local algEngine, algData = algImpl(installerPayload, lexCrunch)
io.stderr:write("result: " .. #installerPayload .. " -> " .. #algData .. "\n")
-- prepend the program length of the last section
algEngine = lexCrunch.process("$iBlockingLen = " .. installerProgramLength .. " " .. algEngine, {})
-- commit
installerPayload = algEngine
installerProgramLength = #installerPayload
installerFinalized = algData .. installerFinalized
end
-- Installer Final Generation --
-- Installer Final --
-- This is a special case, so the program length/payload/etc. business has to be repeated.
put("--" .. cid .. "\n")
put("--This is released into the public domain. No warranty is provided, implied or otherwise.\n")
put(lexCrunch.process(installerHead .. compressionEngine .. installerTail, {}))
put("--[[" .. compressedData .. "]]")
put(lexCrunch.process(u.read("insthead.lua"), {["$$CORESIZE"] = tostring(installerProgramLength)}))
local RISM = installerPayload .. installerFinalized
RISM = RISM:gsub("\xFE", "\xFE\xFE")
RISM = RISM:gsub("]]", "]\xFE]")
RISM = "\x00" .. RISM
put("--[[" .. RISM .. "]]")
-- Dumping debug info --
local dbg = io.open("iSymTab", "wb")

View File

@ -28,8 +28,8 @@ $icSectorsRead = 0
$iBlockingLen = 512
function $iBlockingHook($a0)
if $icBytesRemaining > 0 then
$NT|icByteAdv
$icByteAdv = math.min(512, $icBytesRemaining)
${
$L|icByteAdv = math.min(512, $icBytesRemaining)
$icBytesRemaining = $icBytesRemaining - $icByteAdv
if $icFile then
$filesystem.write($icFile, $a0:sub(1, $icByteAdv))
@ -38,7 +38,7 @@ function $iBlockingHook($a0)
$icFile = nil
end
end
$DT|icByteAdv
$}
else
$icFilename = $a0:sub(1, 100):gsub("\x00", "")
-- this sets up the reading/skipping of data

View File

@ -14,21 +14,54 @@ $readInFile = $filesystem.open("init.neoi.lua", "rb")
$iBlockingBuffer = ""
$iBlockingLen = $$CORESIZE
$iBlockingHook = function ($a0)
-- This takes over the iBlockingHook.
assert(load($a0))()
${
function $iBlockingHook($L|lBlock)
-- Run the next script (replacement compression engine,)
assert(load($lBlock))()
end
$}
$engineOutput = function ($a0)
$iBlockingBuffer = $iBlockingBuffer .. $a0
${
function $engineOutput($L|lBlock)
$iBlockingBuffer = $iBlockingBuffer .. $lBlock
while #$iBlockingBuffer >= $iBlockingLen do
$NT|iBlock
$iBlock = $iBlockingBuffer:sub(1, $iBlockingLen)
$lBlock = $iBlockingBuffer:sub(1, $iBlockingLen)
$iBlockingBuffer = $iBlockingBuffer:sub($iBlockingLen + 1)
$iBlockingHook($iBlock)
$DT|iBlock
$iBlockingHook($lBlock)
end
end
$}
$engineInput = $engineOutput
-- DECOMPRESSION ENGINE FOLLOWS THIS CODE --
while true do
$readInBlock = $filesystem.read($readInFile, 1024)
${
for i = 1, #$readInBlock do
-- Read-in state machine
-- IT IS VERY IMPORTANT that read-in be performed char-by-char.
-- This is because of compression chain-loading; if the switch between engines isn't "clean",
-- bad stuff happens.
-- This character becomes invalid once
-- it gets passed to engineInput,
-- but that's the last step, so it's ok!
$L|readInChar = $readInBlock:sub(i, i)
if not $readInState then
if $readInChar == "\x00" then
$readInState = 0
end
elseif $readInState == 0 then
if $readInChar == "\xFE" then
$readInState = 1
else
$engineInput($readInChar)
end
else
$engineInput($readInChar)
$readInState = 0
end
end
$}
end

View File

@ -1,32 +0,0 @@
-- KOSNEO installer base
-- This is released into the public domain.
-- No warranty is provided, implied or otherwise.
-- DECOMPRESSION ENGINE PRECEDES THIS CODE --
while true do
$readInBlock = $filesystem.read($readInFile, 1024)
for i = 1, #$readInBlock do
-- Read-in state machine
$NT|readInChar
$readInChar = $readInBlock:sub(i, i)
if not $readInState then
if $readInChar == "\x00" then
$readInState = 0
end
elseif $readInState == 0 then
if $readInChar == "\xFE" then
$readInState = 1
else
$engineInput($readInChar)
end
else
$engineInput($readInChar)
$readInState = 0
end
end
$DT|readInChar
end
-- COMPRESSED DATA FOLLOWS THIS CODE --

View File

@ -135,48 +135,30 @@ return function ()
assert(defines[str], "no define " .. str)
return defines[str]
end
local comGet = str:sub(2):gmatch("[^%|]*")
local command = comGet()
if command == "NT" then
-- temporaries +
allocTmp("$" .. comGet())
return ""
elseif command == "DT" then
-- temporaries -
local id = "$" .. comGet()
assert(forwardSymTab[id], "no such var: " .. id)
assert(reverseSymTab[forwardSymTab[id]] == "temporary", "var not allocated as temporary: " .. id)
table.insert(temporaryPool, forwardSymTab[id])
forwardSymTab[id] = nil
return ""
elseif command == "NA" then
local id = "$" .. comGet()
local ib = "$" .. comGet()
assert(forwardSymTab[ib], "no such var: " .. ib)
assert(not forwardSymTab[id], "alias already present: " .. id)
forwardSymTab[id] = forwardSymTab[ib]
return ""
elseif command == "DA" then
local id = "$" .. comGet()
assert(forwardSymTab[id], "no entry for " .. id)
forwardSymTab[id] = nil
return ""
elseif command == "L" then
local id = "$" .. comGet()
local com = {}
for v in str:sub(2):gmatch("[^%|]*") do
table.insert(com, v)
end
if com[1] == "L" then
assert(#com == 2)
local id = "$" .. com[2]
assert(stackFrames[1], "allocation of " .. id .. " outside of stack frame")
table.insert(stackFrames[1], id)
return allocTmp(id)
elseif command == "{" then
elseif com[1] == "{" then
assert(#com == 1)
table.insert(stackFrames, 1, {})
return ""
elseif command == "}" then
elseif com[1] == "}" then
assert(#com == 1)
for _, id in ipairs(table.remove(stackFrames, 1)) do
table.insert(temporaryPool, forwardSymTab[id])
forwardSymTab[id] = nil
end
return ""
else
local id = "$" .. command
assert(#com == 1)
local id = "$" .. com[1]
-- normal handling
if forwardSymTab[id] then
return forwardSymTab[id]

View File

@ -1,24 +1,20 @@
# The Symbol Guide
## lexCrunch commands
The following prefixes are really special,
and are lexcrunch's responsibility:
"$$THING" : These are defines.
"$Thing" : Writes a global into stream. If not already allocated, is allocated a global.
DEPRECATED (yes, already):
"$NT|THING" : Allocates THING from temp pool
"$DT|THING" : Returns temp THING to temp pool
PROBABLY DEPRECATED:
"$NA|THING1|THING2" : Copies $THING2 to $THING1 in forwards table (not in backwards table)
"$DA|THING1" : Removes THING1 in forwards table.
"${" : Opens a frame.
"$}" : Closes a frame. (Attached temps are released.)
"$L|THING" : Allocates THING from temp pool, attaches to stack frame, writes to stream.
Use inside a comment to erase the written symbol
## Conventions
The rest are convention:
"$iThing" symbols are Installer Wrapper.
"$icThing" symbols are Installer Core.

View File

@ -7,5 +7,5 @@ cp ocemu.cfg.default ocemu.cfg && rm -rf c1-sda c1-sdb tmpfs
mkdir c1-sda c1-sdb
echo -n c1-sda > c1-eeprom/data.bin
cd ..
./package.sh $1
./package.sh $*
cp inst.lua laboratory/c1-sda/init.lua

View File

@ -15,7 +15,7 @@ cd ..
# The Installer Creator
cd inst
lua build.lua $1 ../code.tar `git status --porcelain=2 --branch | grep branch.oid | grep -E -o "[0-9a-f]*$" -` > ../inst.lua
lua build.lua `git status --porcelain=2 --branch | grep branch.oid | grep -E -o "[0-9a-f]*$" -` ../code.tar $* > ../inst.lua
lua status.lua ../inst.lua
cd ..