From 213614af33fd41aac7dc127974dd2e5b877d962f Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Sun, 29 Dec 2019 11:42:18 -0500 Subject: [PATCH] lib/expr: make recursion process a bit more orderly Instead of going left and right, finding operators chars and replacing them with nulls, we parse expressions in a more orderly manner, one chunk at a time. I think it qualifies as "recursive descent", but I'm not sure. This allows us to preserve the string we parse and should also make the implementation of parens much easier. --- apps/lib/expr.asm | 368 +++++++++++++++++++++------------ tools/tests/unit/common.asm | 18 ++ tools/tests/unit/test_expr.asm | 39 ++-- tools/tests/zasm/runtests.sh | 4 +- 4 files changed, 270 insertions(+), 159 deletions(-) diff --git a/apps/lib/expr.asm b/apps/lib/expr.asm index a847d7a..ccfa4f0 100644 --- a/apps/lib/expr.asm +++ b/apps/lib/expr.asm @@ -1,7 +1,5 @@ ; *** Requirements *** -; findchar -; multDEBC -; callIXI +; ari ; ; *** Defines *** ; @@ -11,167 +9,132 @@ ; ; *** Code *** ; - ; Parse expression in string at (HL) and returns the result in DE. -; **This routine mutates (HL).** -; We expect (HL) to be disposable: we mutate it to avoid having to make a copy. +; This routine needs to be able to mutate (HL), but it takes care of restoring +; the string to its original value before returning. ; Sets Z on success, unset on error. parseExpr: + push iy push ix push hl - call _parseExpr - push ix \ pop de + call _parseAddSubst pop hl pop ix + pop iy ret -_parseExpr: - ld de, exprTbl -.loop: - ld a, (de) - or a - jp z, EXPR_PARSE ; no operator, just parse the literal - push de ; --> lvl 1. save operator row - call _findAndSplit - jr z, .found - pop de ; <-- lvl 1 - inc de \ inc de \ inc de - jr .loop -.found: - ; Operator found, string splitted. Left in (HL), right in (DE) - call _resolveLeftAndRight - ; Whether _resolveLeftAndRight was a success, we pop our lvl 1 stack - ; out, which contains our operator row. We pop it in IX. - ; L-R numbers are parsed in HL (left) and DE (right). - pop ix ; <-- lvl 1 +; *** Op signature *** +; The signature of "operators routines" (.plus, .mult, etc) below is this: +; Combine HL and DE with an operator (+, -, *, etc) and put the result in DE. +; Destroys HL and A. Never fails. Yes, that's a problem for division by zero. +; Don't divide by zero. All other registers are protected. + +; Given a running result in DE, a rest-of-expression in (HL), a parse routine +; in IY and an apply "operator routine" in IX, (HL/DE --> DE) +; With that, parse the rest of (HL) and apply the operation on it, then place +; HL at the end of the parsed string, with A containing the last char of it, +; which can be either an operator or a null char. +; Z for success. +; +_parseApply: + push de ; --> lvl 1, left result + push ix ; --> lvl 2, routine to apply + inc hl ; after op char + call callIY ; --> DE + pop ix ; <-- lvl 2, routine to apply + ; Here we do some stack kung fu. We have, in HL, a string pointer we + ; want to keep. We have, in (SP), our left result we want to use. + ex (sp), hl ; <-> lvl 1 ret nz - ; Resolving left and right succeeded, proceed! - inc ix ; point to routine pointer - call callIXI - push de \ pop ix - cp a ; ensure Z + push af ; --> lvl 2, save ending operator + call callIX + pop af ; <-- lvl 2, restore operator. + pop hl ; <-- lvl 1, restore str pointer ret -; Given a string in (HL) and a separator char in A, return a splitted string, -; that is, the same (HL) string but with the found A char replaced by a null -; char. DE points to the second part of the split. -; Sets Z if found, unset if not found. -_findAndSplit: - push hl - call .skipCharLiteral - call findchar - jr nz, .end ; nothing found - ; Alright, we have our char and we're pointing at it. Let's replace it - ; with a null char. - xor a - ld (hl), a ; + changed to \0 - inc hl - ex de, hl ; DE now points to the second part of the split - cp a ; ensure Z -.end: - pop hl ; HL is back to the start - ret - -.skipCharLiteral: - ; special case: if our first char is ', skip the first 3 characters - ; so that we don't mistake a literal for an iterator - push af - ld a, (hl) - cp 0x27 ; ' - jr nz, .skipCharLiteralEnd ; not a ' - xor a ; check for null char during skipping - ; skip 3 - inc hl - cp (hl) - jr z, .skipCharLiteralEnd - inc hl - cp (hl) - jr z, .skipCharLiteralEnd - inc hl -.skipCharLiteralEnd: - pop af - ret -.find: - -; parse expression on the left (HL) and the right (DE) and put the results in -; HL (left) and DE (right) -_resolveLeftAndRight: - ld a, (hl) +; Unless there's an error, this routine completely resolves any valid expression +; from (HL) and puts the result in DE. +; Destroys HL +; Z for success. +_parseAddSubst: + call _parseMultDiv + ret nz +.loop: + ; do we have an operator? or a - jr z, .noleft - ; Parse left operand in (HL) - push de ; --> lvl 1 - call parseExpr - pop hl ; <-- lvl 1, orig DE - ret nz ; return immediately if error -.parseright: - ; Now we have parsed everything to the left and we have its result in - ; DE. What we need to do now is the same thing on (DE) and then apply - ; the + operator. Let's save DE somewhere and parse this. - push de ; --> lvl 1 - ; right expr in (HL) - call parseExpr ; DE is set - pop hl ; <-- lvl 1. left value - ret ; Z is parseExpr's result -.noleft: - ; special case: is (HL) zero? If yes, it means that our left operand - ; is empty. consider it as 0 - ex de, hl ; (DE) goes in (HL) for .parseright - ld de, 0 - jr .parseright - -; Routines in here all have the same signature: they take two numbers, DE (left) -; and IX (right), apply the operator and put the resulting number in DE. -; The table has 3 bytes per row: 1 byte for operator and 2 bytes for routine -; pointer. -exprTbl: - .db '+' - .dw .plus - .db '-' - .dw .minus - .db '*' - .dw .mult - .db '/' - .dw .div - .db '%' - .dw .mod - .db '&' - .dw .and - .db 0x7c ; '|' - .dw .or - .db '^' - .dw .xor - .db '}' - .dw .rshift - .db '{' - .dw .lshift - .db 0 ; end of table - + ret z ; null char, we're done + ; We have an operator. Resolve the rest of the expr then apply it. + ld ix, .plus + cp '+' + jr z, .found + ld ix, .minus + cp '-' + ret nz ; unknown char, error +.found: + ld iy, _parseMultDiv + call _parseApply + ret nz + jr .loop .plus: add hl, de ex de, hl ret - .minus: - or a ; clear carry + or a ; clear carry sbc hl, de ex de, hl ret +; Parse (HL) as far as it can, that is, resolving expressions at its level or +; lower (anything but + and -). +; A is set to the last op it encountered. Unless there's an error, this can only +; be +, - or null. Null if we're done parsing, + and - if there's still work to +; do. +; (HL) points to last op encountered. +; DE is set to the numerical value of everything that was parsed left of (HL). +_parseMultDiv: + call _parseBitShift + ret nz +.loop: + ; do we have an operator? + or a + ret z ; null char, we're done + ; We have an operator. Resolve the rest of the expr then apply it. + ld ix, .mult + cp '*' + jr z, .found + ld ix, .div + cp '/' + jr z, .found + ld ix, .mod + cp '%' + jr z, .found + ; might not be an error, return success + cp a + ret +.found: + ld iy, _parseBitShift + call _parseApply + ret nz + jr .loop + .mult: + push bc ; --> lvl 1 ld b, h ld c, l call multDEBC ; --> HL + pop bc ; <-- lvl 1 ex de, hl ret .div: ; divide takes HL/DE - push bc + ld a, l + push bc ; --> lvl 1 call divide ld e, c ld d, b - pop bc + pop bc ; <-- lvl 1 ret .mod: @@ -179,6 +142,39 @@ exprTbl: ex de, hl ret +; Same as _parseMultDiv, but a layer lower. +_parseBitShift: + call _parseNumber + ret nz +.loop: + ; do we have an operator? + or a + ret z ; null char, we're done + ; We have an operator. Resolve the rest of the expr then apply it. + ld ix, .and + cp '&' + jr z, .found + ld ix, .or + cp 0x7c ; '|' + jr z, .found + ld ix, .xor + cp '^' + jr z, .found + ld ix, .rshift + cp '}' + jr z, .found + ld ix, .lshift + cp '{' + jr z, .found + ; might not be an error, return success + cp a + ret +.found: + ld iy, _parseNumber + call _parseApply + ret nz + jr .loop + .and: ld a, h and d @@ -209,26 +205,130 @@ exprTbl: ld a, e and 0xf ret z - push bc + push bc ; --> lvl 1 ld b, a .rshiftLoop: srl h rr l djnz .rshiftLoop ex de, hl - pop bc + pop bc ; <-- lvl 1 ret .lshift: ld a, e and 0xf ret z - push bc + push bc ; --> lvl 1 ld b, a .lshiftLoop: sla l rl h djnz .lshiftLoop ex de, hl - pop bc + pop bc ; <-- lvl 1 ret + +; Parse first number of expression at (HL). A valid number is anything that can +; be parsed by EXPR_PARSE and is followed either by a null char or by any of the +; operator chars. This routines takes care of replacing an operator char with +; the null char before calling EXPR_PARSE and then replace the operator back +; afterwards. +; HL is moved to the char following the number having been parsed. +; DE contains the numerical result. +; A contains the operator char following the number (or null). Only on success. +; Z for success. +_parseNumber: + ; Special case 1: number starts with '-' + ld a, (hl) + cp '-' + jr nz, .skip1 + ; We have a negative number. Parse normally, then subst from zero + inc hl + call _parseNumber + push hl ; --> lvl 1 + ex af, af' ; preserve flags + or a ; clear carry + ld hl, 0 + sbc hl, de + ex de, hl + ex af, af' ; restore flags + pop hl ; <-- lvl 1 + ret +.skip1: + ; End of special case 1 + push ix + ; Copy beginning of string to DE, we'll need it later + ld d, h + ld e, l + + ; Special case 2: we have a char literal. If we have a char literal, we + ; don't want to go through the "_isOp" loop below because if that char + ; is one of our operators, we're messing up our processing. So, set + ; ourselves 3 chars further and continue from there. EXPR_PARSE will + ; take care of validating those 3 chars. + cp 0x27 ; apostrophe (') char + jr nz, .skip2 + ; "'". advance HL by 3 + inc hl \ inc hl \ inc hl + ; End of special case 2 +.skip2: + + dec hl ; offset "inc-hl-before" in loop +.loop: + inc hl + ld a, (hl) + call _isOp + jr nz, .loop + ; (HL) and A is an op or a null + push af ; --> lvl 1 save op + push hl ; --> lvl 2 save end of string + ; temporarily put a null char instead of the op + xor a + ld (hl), a + ex de, hl ; rewind to beginning of number + call EXPR_PARSE + ex af, af' ; keep result flags away while we restore (HL) + push ix \ pop de ; result in DE + pop hl ; <-- lvl 2, end of string + pop af ; <-- lvl 1, saved op + ld (hl), a + ex af, af' ; restore Z from EXPR_PARSE + jr nz, .end + ; HL is currently at the end of the number's string + ; On success, have A be the operator char following the number + ex af, af' +.end: + pop ix + ret + +; Sets Z if A contains a valid operator char or a null char. +_isOp: + or a + ret z + push hl ; --> lvl 1 + ; Set A' to zero for quick end-of-table checks + ex af, af' + xor a + ex af, af' + ld hl, .exprChars +.loop: + cp (hl) + jr z, .found + ex af, af' + cp (hl) + jr z, .notFound ; end of table + ex af, af' + inc hl ; next char + jr .loop +.notFound: + ex af, af' ; restore orig A + inc a ; unset Z +.found: + ; Z already set + pop hl ; <-- lvl 1 + ret + +.exprChars: + .db "+-*/%&|^{}", 0 + diff --git a/tools/tests/unit/common.asm b/tools/tests/unit/common.asm index 4377619..2139009 100644 --- a/tools/tests/unit/common.asm +++ b/tools/tests/unit/common.asm @@ -51,6 +51,24 @@ assertEQW: .msg: .db "HL != DE", CR, LF, 0 +; Given a list of pointer to test data structures in HL and a pointer to a test +; routine in IX, call (IX) with HL pointing to the test structure until the list +; points to a zero. See testParseExpr in test_expr for an example usage. +testList: + push hl ; --> lvl 1 + call intoHL + ld a, h + or l + jr z, .end + call callIX + call nexttest + pop hl ; <-- lvl 1 + inc hl \ inc hl + jr testList +.end: + pop hl ; <-- lvl 1 + ret + nexttest: ld a, (testNum) inc a diff --git a/tools/tests/unit/test_expr.asm b/tools/tests/unit/test_expr.asm index 1eaa6b5..7ba3f36 100644 --- a/tools/tests/unit/test_expr.asm +++ b/tools/tests/unit/test_expr.asm @@ -109,35 +109,18 @@ test: halt testParseExpr: - ld iy, .t1 - call .testEQ - ld iy, .t2 - call .testEQ - ld iy, .t3 - call .testEQ - ld iy, .t4 - call .testEQ - ld iy, .t5 - call .testEQ - ld iy, .t6 - call .testEQ - ld iy, .t7 - call .testEQ - ld iy, .t8 - call .testEQ - ld iy, .t9 - call .testEQ - ret + ld hl, .alltests + ld ix, .test + jp testList -.testEQ: - push iy \ pop hl +.test: + push hl \ pop iy inc hl \ inc hl call parseExpr call assertZ ld l, (iy) ld h, (iy+1) - call assertEQW - jp nexttest + jp assertEQW .t1: .dw 7 @@ -166,3 +149,13 @@ testParseExpr: .t9: .dw 10 .db "2*3+4", 0 + +; There was this untested regression during the replacement of find-and-subst +; parseExpr to the recursive descent one. It was time consuming to find. Here +; it goes, here it stays. +.t10: + .dw '-'+1 + .db "'-'+1", 0 + +.alltests: + .dw .t1, .t2, .t3, .t4, .t5, .t6, .t7, .t8, .t9, .t10, 0 diff --git a/tools/tests/zasm/runtests.sh b/tools/tests/zasm/runtests.sh index 983e5c9..50282b2 100755 --- a/tools/tests/zasm/runtests.sh +++ b/tools/tests/zasm/runtests.sh @@ -13,9 +13,9 @@ cmpas() { echo ok else echo actual - echo $ACTUAL + echo "$ACTUAL" echo expected - echo $EXPECTED + echo "$EXPECTED" exit 1 fi }