1
0
mirror of https://github.com/hsoft/collapseos.git synced 2024-09-29 11:10:56 +10:00

lib/expr: make recursion process a bit more orderly

Instead of going left and right, finding operators chars and replacing them
with nulls, we parse expressions in a more orderly manner, one chunk at a
time. I think it qualifies as "recursive descent", but I'm not sure.

This allows us to preserve the string we parse and should also make the
implementation of parens much easier.
This commit is contained in:
Virgil Dupras 2019-12-29 11:42:18 -05:00
parent 7410891ad1
commit 213614af33
4 changed files with 270 additions and 159 deletions

View File

@ -1,7 +1,5 @@
; *** Requirements *** ; *** Requirements ***
; findchar ; ari
; multDEBC
; callIXI
; ;
; *** Defines *** ; *** Defines ***
; ;
@ -11,167 +9,132 @@
; ;
; *** Code *** ; *** Code ***
; ;
; Parse expression in string at (HL) and returns the result in DE. ; Parse expression in string at (HL) and returns the result in DE.
; **This routine mutates (HL).** ; This routine needs to be able to mutate (HL), but it takes care of restoring
; We expect (HL) to be disposable: we mutate it to avoid having to make a copy. ; the string to its original value before returning.
; Sets Z on success, unset on error. ; Sets Z on success, unset on error.
parseExpr: parseExpr:
push iy
push ix push ix
push hl push hl
call _parseExpr call _parseAddSubst
push ix \ pop de
pop hl pop hl
pop ix pop ix
pop iy
ret ret
_parseExpr: ; *** Op signature ***
ld de, exprTbl ; The signature of "operators routines" (.plus, .mult, etc) below is this:
.loop: ; Combine HL and DE with an operator (+, -, *, etc) and put the result in DE.
ld a, (de) ; Destroys HL and A. Never fails. Yes, that's a problem for division by zero.
or a ; Don't divide by zero. All other registers are protected.
jp z, EXPR_PARSE ; no operator, just parse the literal
push de ; --> lvl 1. save operator row ; Given a running result in DE, a rest-of-expression in (HL), a parse routine
call _findAndSplit ; in IY and an apply "operator routine" in IX, (HL/DE --> DE)
jr z, .found ; With that, parse the rest of (HL) and apply the operation on it, then place
pop de ; <-- lvl 1 ; HL at the end of the parsed string, with A containing the last char of it,
inc de \ inc de \ inc de ; which can be either an operator or a null char.
jr .loop ; Z for success.
.found: ;
; Operator found, string splitted. Left in (HL), right in (DE) _parseApply:
call _resolveLeftAndRight push de ; --> lvl 1, left result
; Whether _resolveLeftAndRight was a success, we pop our lvl 1 stack push ix ; --> lvl 2, routine to apply
; out, which contains our operator row. We pop it in IX. inc hl ; after op char
; L-R numbers are parsed in HL (left) and DE (right). call callIY ; --> DE
pop ix ; <-- lvl 1 pop ix ; <-- lvl 2, routine to apply
; Here we do some stack kung fu. We have, in HL, a string pointer we
; want to keep. We have, in (SP), our left result we want to use.
ex (sp), hl ; <-> lvl 1
ret nz ret nz
; Resolving left and right succeeded, proceed! push af ; --> lvl 2, save ending operator
inc ix ; point to routine pointer call callIX
call callIXI pop af ; <-- lvl 2, restore operator.
push de \ pop ix pop hl ; <-- lvl 1, restore str pointer
cp a ; ensure Z
ret ret
; Given a string in (HL) and a separator char in A, return a splitted string, ; Unless there's an error, this routine completely resolves any valid expression
; that is, the same (HL) string but with the found A char replaced by a null ; from (HL) and puts the result in DE.
; char. DE points to the second part of the split. ; Destroys HL
; Sets Z if found, unset if not found. ; Z for success.
_findAndSplit: _parseAddSubst:
push hl call _parseMultDiv
call .skipCharLiteral ret nz
call findchar .loop:
jr nz, .end ; nothing found ; do we have an operator?
; Alright, we have our char and we're pointing at it. Let's replace it
; with a null char.
xor a
ld (hl), a ; + changed to \0
inc hl
ex de, hl ; DE now points to the second part of the split
cp a ; ensure Z
.end:
pop hl ; HL is back to the start
ret
.skipCharLiteral:
; special case: if our first char is ', skip the first 3 characters
; so that we don't mistake a literal for an iterator
push af
ld a, (hl)
cp 0x27 ; '
jr nz, .skipCharLiteralEnd ; not a '
xor a ; check for null char during skipping
; skip 3
inc hl
cp (hl)
jr z, .skipCharLiteralEnd
inc hl
cp (hl)
jr z, .skipCharLiteralEnd
inc hl
.skipCharLiteralEnd:
pop af
ret
.find:
; parse expression on the left (HL) and the right (DE) and put the results in
; HL (left) and DE (right)
_resolveLeftAndRight:
ld a, (hl)
or a or a
jr z, .noleft ret z ; null char, we're done
; Parse left operand in (HL) ; We have an operator. Resolve the rest of the expr then apply it.
push de ; --> lvl 1 ld ix, .plus
call parseExpr cp '+'
pop hl ; <-- lvl 1, orig DE jr z, .found
ret nz ; return immediately if error ld ix, .minus
.parseright: cp '-'
; Now we have parsed everything to the left and we have its result in ret nz ; unknown char, error
; DE. What we need to do now is the same thing on (DE) and then apply .found:
; the + operator. Let's save DE somewhere and parse this. ld iy, _parseMultDiv
push de ; --> lvl 1 call _parseApply
; right expr in (HL) ret nz
call parseExpr ; DE is set jr .loop
pop hl ; <-- lvl 1. left value
ret ; Z is parseExpr's result
.noleft:
; special case: is (HL) zero? If yes, it means that our left operand
; is empty. consider it as 0
ex de, hl ; (DE) goes in (HL) for .parseright
ld de, 0
jr .parseright
; Routines in here all have the same signature: they take two numbers, DE (left)
; and IX (right), apply the operator and put the resulting number in DE.
; The table has 3 bytes per row: 1 byte for operator and 2 bytes for routine
; pointer.
exprTbl:
.db '+'
.dw .plus
.db '-'
.dw .minus
.db '*'
.dw .mult
.db '/'
.dw .div
.db '%'
.dw .mod
.db '&'
.dw .and
.db 0x7c ; '|'
.dw .or
.db '^'
.dw .xor
.db '}'
.dw .rshift
.db '{'
.dw .lshift
.db 0 ; end of table
.plus: .plus:
add hl, de add hl, de
ex de, hl ex de, hl
ret ret
.minus: .minus:
or a ; clear carry or a ; clear carry
sbc hl, de sbc hl, de
ex de, hl ex de, hl
ret ret
; Parse (HL) as far as it can, that is, resolving expressions at its level or
; lower (anything but + and -).
; A is set to the last op it encountered. Unless there's an error, this can only
; be +, - or null. Null if we're done parsing, + and - if there's still work to
; do.
; (HL) points to last op encountered.
; DE is set to the numerical value of everything that was parsed left of (HL).
_parseMultDiv:
call _parseBitShift
ret nz
.loop:
; do we have an operator?
or a
ret z ; null char, we're done
; We have an operator. Resolve the rest of the expr then apply it.
ld ix, .mult
cp '*'
jr z, .found
ld ix, .div
cp '/'
jr z, .found
ld ix, .mod
cp '%'
jr z, .found
; might not be an error, return success
cp a
ret
.found:
ld iy, _parseBitShift
call _parseApply
ret nz
jr .loop
.mult: .mult:
push bc ; --> lvl 1
ld b, h ld b, h
ld c, l ld c, l
call multDEBC ; --> HL call multDEBC ; --> HL
pop bc ; <-- lvl 1
ex de, hl ex de, hl
ret ret
.div: .div:
; divide takes HL/DE ; divide takes HL/DE
push bc ld a, l
push bc ; --> lvl 1
call divide call divide
ld e, c ld e, c
ld d, b ld d, b
pop bc pop bc ; <-- lvl 1
ret ret
.mod: .mod:
@ -179,6 +142,39 @@ exprTbl:
ex de, hl ex de, hl
ret ret
; Same as _parseMultDiv, but a layer lower.
_parseBitShift:
call _parseNumber
ret nz
.loop:
; do we have an operator?
or a
ret z ; null char, we're done
; We have an operator. Resolve the rest of the expr then apply it.
ld ix, .and
cp '&'
jr z, .found
ld ix, .or
cp 0x7c ; '|'
jr z, .found
ld ix, .xor
cp '^'
jr z, .found
ld ix, .rshift
cp '}'
jr z, .found
ld ix, .lshift
cp '{'
jr z, .found
; might not be an error, return success
cp a
ret
.found:
ld iy, _parseNumber
call _parseApply
ret nz
jr .loop
.and: .and:
ld a, h ld a, h
and d and d
@ -209,26 +205,130 @@ exprTbl:
ld a, e ld a, e
and 0xf and 0xf
ret z ret z
push bc push bc ; --> lvl 1
ld b, a ld b, a
.rshiftLoop: .rshiftLoop:
srl h srl h
rr l rr l
djnz .rshiftLoop djnz .rshiftLoop
ex de, hl ex de, hl
pop bc pop bc ; <-- lvl 1
ret ret
.lshift: .lshift:
ld a, e ld a, e
and 0xf and 0xf
ret z ret z
push bc push bc ; --> lvl 1
ld b, a ld b, a
.lshiftLoop: .lshiftLoop:
sla l sla l
rl h rl h
djnz .lshiftLoop djnz .lshiftLoop
ex de, hl ex de, hl
pop bc pop bc ; <-- lvl 1
ret ret
; Parse first number of expression at (HL). A valid number is anything that can
; be parsed by EXPR_PARSE and is followed either by a null char or by any of the
; operator chars. This routines takes care of replacing an operator char with
; the null char before calling EXPR_PARSE and then replace the operator back
; afterwards.
; HL is moved to the char following the number having been parsed.
; DE contains the numerical result.
; A contains the operator char following the number (or null). Only on success.
; Z for success.
_parseNumber:
; Special case 1: number starts with '-'
ld a, (hl)
cp '-'
jr nz, .skip1
; We have a negative number. Parse normally, then subst from zero
inc hl
call _parseNumber
push hl ; --> lvl 1
ex af, af' ; preserve flags
or a ; clear carry
ld hl, 0
sbc hl, de
ex de, hl
ex af, af' ; restore flags
pop hl ; <-- lvl 1
ret
.skip1:
; End of special case 1
push ix
; Copy beginning of string to DE, we'll need it later
ld d, h
ld e, l
; Special case 2: we have a char literal. If we have a char literal, we
; don't want to go through the "_isOp" loop below because if that char
; is one of our operators, we're messing up our processing. So, set
; ourselves 3 chars further and continue from there. EXPR_PARSE will
; take care of validating those 3 chars.
cp 0x27 ; apostrophe (') char
jr nz, .skip2
; "'". advance HL by 3
inc hl \ inc hl \ inc hl
; End of special case 2
.skip2:
dec hl ; offset "inc-hl-before" in loop
.loop:
inc hl
ld a, (hl)
call _isOp
jr nz, .loop
; (HL) and A is an op or a null
push af ; --> lvl 1 save op
push hl ; --> lvl 2 save end of string
; temporarily put a null char instead of the op
xor a
ld (hl), a
ex de, hl ; rewind to beginning of number
call EXPR_PARSE
ex af, af' ; keep result flags away while we restore (HL)
push ix \ pop de ; result in DE
pop hl ; <-- lvl 2, end of string
pop af ; <-- lvl 1, saved op
ld (hl), a
ex af, af' ; restore Z from EXPR_PARSE
jr nz, .end
; HL is currently at the end of the number's string
; On success, have A be the operator char following the number
ex af, af'
.end:
pop ix
ret
; Sets Z if A contains a valid operator char or a null char.
_isOp:
or a
ret z
push hl ; --> lvl 1
; Set A' to zero for quick end-of-table checks
ex af, af'
xor a
ex af, af'
ld hl, .exprChars
.loop:
cp (hl)
jr z, .found
ex af, af'
cp (hl)
jr z, .notFound ; end of table
ex af, af'
inc hl ; next char
jr .loop
.notFound:
ex af, af' ; restore orig A
inc a ; unset Z
.found:
; Z already set
pop hl ; <-- lvl 1
ret
.exprChars:
.db "+-*/%&|^{}", 0

View File

@ -51,6 +51,24 @@ assertEQW:
.msg: .msg:
.db "HL != DE", CR, LF, 0 .db "HL != DE", CR, LF, 0
; Given a list of pointer to test data structures in HL and a pointer to a test
; routine in IX, call (IX) with HL pointing to the test structure until the list
; points to a zero. See testParseExpr in test_expr for an example usage.
testList:
push hl ; --> lvl 1
call intoHL
ld a, h
or l
jr z, .end
call callIX
call nexttest
pop hl ; <-- lvl 1
inc hl \ inc hl
jr testList
.end:
pop hl ; <-- lvl 1
ret
nexttest: nexttest:
ld a, (testNum) ld a, (testNum)
inc a inc a

View File

@ -109,35 +109,18 @@ test:
halt halt
testParseExpr: testParseExpr:
ld iy, .t1 ld hl, .alltests
call .testEQ ld ix, .test
ld iy, .t2 jp testList
call .testEQ
ld iy, .t3
call .testEQ
ld iy, .t4
call .testEQ
ld iy, .t5
call .testEQ
ld iy, .t6
call .testEQ
ld iy, .t7
call .testEQ
ld iy, .t8
call .testEQ
ld iy, .t9
call .testEQ
ret
.testEQ: .test:
push iy \ pop hl push hl \ pop iy
inc hl \ inc hl inc hl \ inc hl
call parseExpr call parseExpr
call assertZ call assertZ
ld l, (iy) ld l, (iy)
ld h, (iy+1) ld h, (iy+1)
call assertEQW jp assertEQW
jp nexttest
.t1: .t1:
.dw 7 .dw 7
@ -166,3 +149,13 @@ testParseExpr:
.t9: .t9:
.dw 10 .dw 10
.db "2*3+4", 0 .db "2*3+4", 0
; There was this untested regression during the replacement of find-and-subst
; parseExpr to the recursive descent one. It was time consuming to find. Here
; it goes, here it stays.
.t10:
.dw '-'+1
.db "'-'+1", 0
.alltests:
.dw .t1, .t2, .t3, .t4, .t5, .t6, .t7, .t8, .t9, .t10, 0

View File

@ -13,9 +13,9 @@ cmpas() {
echo ok echo ok
else else
echo actual echo actual
echo $ACTUAL echo "$ACTUAL"
echo expected echo expected
echo $EXPECTED echo "$EXPECTED"
exit 1 exit 1
fi fi
} }