From 213614af33fd41aac7dc127974dd2e5b877d962f Mon Sep 17 00:00:00 2001
From: Virgil Dupras <hsoft@hardcoded.net>
Date: Sun, 29 Dec 2019 11:42:18 -0500
Subject: [PATCH] lib/expr: make recursion process a bit more orderly

Instead of going left and right, finding operators chars and replacing them
with nulls, we parse expressions in a more orderly manner, one chunk at a
time. I think it qualifies as "recursive descent", but I'm not sure.

This allows us to preserve the string we parse and should also make the
implementation of parens much easier.
---
 apps/lib/expr.asm              | 368 +++++++++++++++++++++------------
 tools/tests/unit/common.asm    |  18 ++
 tools/tests/unit/test_expr.asm |  39 ++--
 tools/tests/zasm/runtests.sh   |   4 +-
 4 files changed, 270 insertions(+), 159 deletions(-)

diff --git a/apps/lib/expr.asm b/apps/lib/expr.asm
index a847d7a..ccfa4f0 100644
--- a/apps/lib/expr.asm
+++ b/apps/lib/expr.asm
@@ -1,7 +1,5 @@
 ; *** Requirements ***
-; findchar
-; multDEBC
-; callIXI
+; ari
 ;
 ; *** Defines ***
 ;
@@ -11,167 +9,132 @@
 ;
 ; *** Code ***
 ;
-
 ; Parse expression in string at (HL) and returns the result in DE.
-; **This routine mutates (HL).**
-; We expect (HL) to be disposable: we mutate it to avoid having to make a copy.
+; This routine needs to be able to mutate (HL), but it takes care of restoring
+; the string to its original value before returning.
 ; Sets Z on success, unset on error.
 parseExpr:
+	push	iy
 	push	ix
 	push	hl
-	call	_parseExpr
-	push	ix \ pop de
+	call	_parseAddSubst
 	pop	hl
 	pop	ix
+	pop	iy
 	ret
 
-_parseExpr:
-	ld	de, exprTbl
-.loop:
-	ld	a, (de)
-	or	a
-	jp	z, EXPR_PARSE	; no operator, just parse the literal
-	push	de		; --> lvl 1. save operator row
-	call	_findAndSplit
-	jr	z, .found
-	pop	de		; <-- lvl 1
-	inc	de \ inc de \ inc de
-	jr	.loop
-.found:
-	; Operator found, string splitted. Left in (HL), right in (DE)
-	call	_resolveLeftAndRight
-	; Whether _resolveLeftAndRight was a success, we pop our lvl 1 stack
-	; out, which contains our operator row. We pop it in IX.
-	; L-R numbers are parsed in HL (left) and DE (right).
-	pop	ix		; <-- lvl 1
+; *** Op signature ***
+; The signature of "operators routines" (.plus, .mult, etc) below is this:
+; Combine HL and DE with an operator (+, -, *, etc) and put the result in DE.
+; Destroys HL and A. Never fails. Yes, that's a problem for division by zero.
+; Don't divide by zero. All other registers are protected.
+
+; Given a running result in DE, a rest-of-expression in (HL), a parse routine
+; in IY and an apply "operator routine" in IX, (HL/DE --> DE)
+; With that, parse the rest of (HL) and apply the operation on it, then place
+; HL at the end of the parsed string, with A containing the last char of it,
+; which can be either an operator or a null char.
+; Z for success.
+;
+_parseApply:
+	push	de	; --> lvl 1, left result
+	push	ix	; --> lvl 2, routine to apply
+	inc	hl	; after op char
+	call	callIY	; --> DE
+	pop	ix	; <-- lvl 2, routine to apply
+	; Here we do some stack kung fu. We have, in HL, a string pointer we
+	; want to keep. We have, in (SP), our left result we want to use.
+	ex	(sp), hl	; <-> lvl 1
 	ret	nz
-	; Resolving left and right succeeded, proceed!
-	inc	ix		; point to routine pointer
-	call	callIXI
-	push	de \ pop ix
-	cp	a		; ensure Z
+	push	af	; --> lvl 2, save ending operator
+	call	callIX
+	pop	af	; <-- lvl 2, restore operator.
+	pop	hl	; <-- lvl 1, restore str pointer
 	ret
 
-; Given a string in (HL) and a separator char in A, return a splitted string,
-; that is, the same (HL) string but with the found A char replaced by a null
-; char. DE points to the second part of the split.
-; Sets Z if found, unset if not found.
-_findAndSplit:
-	push	hl
-	call	.skipCharLiteral
-	call	findchar
-	jr	nz, .end	; nothing found
-	; Alright, we have our char and we're pointing at it. Let's replace it
-	; with a null char.
-	xor	a
-	ld	(hl), a		; + changed to \0
-	inc	hl
-	ex	de, hl		; DE now points to the second part of the split
-	cp	a		; ensure Z
-.end:
-	pop	hl		; HL is back to the start
-	ret
-
-.skipCharLiteral:
-	; special case: if our first char is ', skip the first 3 characters
-	; so that we don't mistake a literal for an iterator
-	push	af
-	ld	a, (hl)
-	cp	0x27		; '
-	jr	nz, .skipCharLiteralEnd	; not a '
-	xor	a	; check for null char during skipping
-	; skip 3
-	inc	hl
-	cp	(hl)
-	jr	z, .skipCharLiteralEnd
-	inc	hl
-	cp	(hl)
-	jr	z, .skipCharLiteralEnd
-	inc	hl
-.skipCharLiteralEnd:
-	pop	af
-	ret
-.find:
-
-; parse expression on the left (HL) and the right (DE) and put the results in
-; HL (left) and DE (right)
-_resolveLeftAndRight:
-	ld	a, (hl)
+; Unless there's an error, this routine completely resolves any valid expression
+; from (HL) and puts the result in DE.
+; Destroys HL
+; Z for success.
+_parseAddSubst:
+	call	_parseMultDiv
+	ret	nz
+.loop:
+	; do we have an operator?
 	or	a
-	jr	z, .noleft
-	; Parse left operand in (HL)
-	push	de		; --> lvl 1
-	call	parseExpr
-	pop	hl		; <-- lvl 1, orig DE
-	ret	nz		; return immediately if error
-.parseright:
-	; Now we have parsed everything to the left and we have its result in
-	; DE. What we need to do now is the same thing on (DE) and then apply
-	; the + operator. Let's save DE somewhere and parse this.
-	push	de	; --> lvl 1
-	; right expr in (HL)
-	call	parseExpr	; DE is set
-	pop	hl	; <-- lvl 1. left value
-	ret		; Z is parseExpr's result
-.noleft:
-	; special case: is (HL) zero? If yes, it means that our left operand
-	; is empty. consider it as 0
-	ex	de, hl	; (DE) goes in (HL) for .parseright
-	ld	de, 0
-	jr	.parseright
-
-; Routines in here all have the same signature: they take two numbers, DE (left)
-; and IX (right), apply the operator and put the resulting number in DE.
-; The table has 3 bytes per row: 1 byte for operator and 2 bytes for routine
-; pointer.
-exprTbl:
-	.db	'+'
-	.dw	.plus
-	.db	'-'
-	.dw	.minus
-	.db	'*'
-	.dw	.mult
-	.db	'/'
-	.dw	.div
-	.db	'%'
-	.dw	.mod
-	.db	'&'
-	.dw	.and
-	.db	0x7c		; '|'
-	.dw	.or
-	.db	'^'
-	.dw	.xor
-	.db	'}'
-	.dw	.rshift
-	.db	'{'
-	.dw	.lshift
-	.db	0		; end of table
-
+	ret	z	; null char, we're done
+	; We have an operator. Resolve the rest of the expr then apply it.
+	ld	ix, .plus
+	cp	'+'
+	jr	z, .found
+	ld	ix, .minus
+	cp	'-'
+	ret	nz		; unknown char, error
+.found:
+	ld	iy, _parseMultDiv
+	call	_parseApply
+	ret	nz
+	jr	.loop
 .plus:
 	add	hl, de
 	ex	de, hl
 	ret
-
 .minus:
-	or	a	; clear carry
+	or	a		; clear carry
 	sbc	hl, de
 	ex	de, hl
 	ret
 
+; Parse (HL) as far as it can, that is, resolving expressions at its level or
+; lower (anything but + and -).
+; A is set to the last op it encountered. Unless there's an error, this can only
+; be +, - or null. Null if we're done parsing, + and - if there's still work to
+; do.
+; (HL) points to last op encountered.
+; DE is set to the numerical value of everything that was parsed left of (HL).
+_parseMultDiv:
+	call	_parseBitShift
+	ret	nz
+.loop:
+	; do we have an operator?
+	or	a
+	ret	z	; null char, we're done
+	; We have an operator. Resolve the rest of the expr then apply it.
+	ld	ix, .mult
+	cp	'*'
+	jr	z, .found
+	ld	ix, .div
+	cp	'/'
+	jr	z, .found
+	ld	ix, .mod
+	cp	'%'
+	jr	z, .found
+	; might not be an error, return success
+	cp	a
+	ret
+.found:
+	ld	iy, _parseBitShift
+	call	_parseApply
+	ret	nz
+	jr	.loop
+
 .mult:
+	push	bc		; --> lvl 1
 	ld	b, h
 	ld	c, l
 	call	multDEBC	; --> HL
+	pop	bc		; <-- lvl 1
 	ex	de, hl
 	ret
 
 .div:
 	; divide takes HL/DE
-	push	bc
+	ld	a, l
+	push	bc		; --> lvl 1
 	call	divide
 	ld	e, c
 	ld	d, b
-	pop	bc
+	pop	bc		; <-- lvl 1
 	ret
 
 .mod:
@@ -179,6 +142,39 @@ exprTbl:
 	ex	de, hl
 	ret
 
+; Same as _parseMultDiv, but a layer lower.
+_parseBitShift:
+	call	_parseNumber
+	ret	nz
+.loop:
+	; do we have an operator?
+	or	a
+	ret	z	; null char, we're done
+	; We have an operator. Resolve the rest of the expr then apply it.
+	ld	ix, .and
+	cp	'&'
+	jr	z, .found
+	ld	ix, .or
+	cp	0x7c		; '|'
+	jr	z, .found
+	ld	ix, .xor
+	cp	'^'
+	jr	z, .found
+	ld	ix, .rshift
+	cp	'}'
+	jr	z, .found
+	ld	ix, .lshift
+	cp	'{'
+	jr	z, .found
+	; might not be an error, return success
+	cp	a
+	ret
+.found:
+	ld	iy, _parseNumber
+	call	_parseApply
+	ret	nz
+	jr	.loop
+
 .and:
 	ld	a, h
 	and	d
@@ -209,26 +205,130 @@ exprTbl:
 	ld	a, e
 	and	0xf
 	ret	z
-	push	bc
+	push	bc		; --> lvl 1
 	ld	b, a
 .rshiftLoop:
 	srl	h
 	rr	l
 	djnz	.rshiftLoop
 	ex	de, hl
-	pop	bc
+	pop	bc		; <-- lvl 1
 	ret
 
 .lshift:
 	ld	a, e
 	and	0xf
 	ret	z
-	push	bc
+	push	bc		; --> lvl 1
 	ld	b, a
 .lshiftLoop:
 	sla	l
 	rl	h
 	djnz	.lshiftLoop
 	ex	de, hl
-	pop	bc
+	pop	bc		; <-- lvl 1
 	ret
+
+; Parse first number of expression at (HL). A valid number is anything that can
+; be parsed by EXPR_PARSE and is followed either by a null char or by any of the
+; operator chars. This routines takes care of replacing an operator char with
+; the null char before calling EXPR_PARSE and then replace the operator back
+; afterwards.
+; HL is moved to the char following the number having been parsed.
+; DE contains the numerical result.
+; A contains the operator char following the number (or null). Only on success.
+; Z for success.
+_parseNumber:
+	; Special case 1: number starts with '-'
+	ld	a, (hl)
+	cp	'-'
+	jr	nz, .skip1
+	; We have a negative number. Parse normally, then subst from zero
+	inc	hl
+	call	_parseNumber
+	push	hl		; --> lvl 1
+	ex	af, af'		; preserve flags
+	or	a		; clear carry
+	ld	hl, 0
+	sbc	hl, de
+	ex	de, hl
+	ex	af, af'		; restore flags
+	pop	hl		; <-- lvl 1
+	ret
+.skip1:
+	; End of special case 1
+	push	ix
+	; Copy beginning of string to DE, we'll need it later
+	ld	d, h
+	ld	e, l
+
+	; Special case 2: we have a char literal. If we have a char literal, we
+	; don't want to go through the "_isOp" loop below because if that char
+	; is one of our operators, we're messing up our processing. So, set
+	; ourselves 3 chars further and continue from there. EXPR_PARSE will
+	; take care of validating those 3 chars.
+	cp	0x27		; apostrophe (') char
+	jr	nz, .skip2
+	; "'". advance HL by 3
+	inc	hl \ inc hl \ inc hl
+	; End of special case 2
+.skip2:
+
+	dec	hl	; offset "inc-hl-before" in loop
+.loop:
+	inc	hl
+	ld	a, (hl)
+	call	_isOp
+	jr	nz, .loop
+	; (HL) and A is an op or a null
+	push	af	; --> lvl 1 save op
+	push	hl	; --> lvl 2 save end of string
+	; temporarily put a null char instead of the op
+	xor	a
+	ld	(hl), a
+	ex	de, hl	; rewind to beginning of number
+	call	EXPR_PARSE
+	ex	af, af'		; keep result flags away while we restore (HL)
+	push	ix \ pop de	; result in DE
+	pop	hl	; <-- lvl 2, end of string
+	pop	af	; <-- lvl 1, saved op
+	ld	(hl), a
+	ex	af, af'		; restore Z from EXPR_PARSE
+	jr	nz, .end
+	; HL is currently at the end of the number's string
+	; On success, have A be the operator char following the number
+	ex	af, af'
+.end:
+	pop	ix
+	ret
+
+; Sets Z if A contains a valid operator char or a null char.
+_isOp:
+	or	a
+	ret	z
+	push	hl	; --> lvl 1
+	; Set A' to zero for quick end-of-table checks
+	ex	af, af'
+	xor	a
+	ex	af, af'
+	ld	hl, .exprChars
+.loop:
+	cp	(hl)
+	jr	z, .found
+	ex	af, af'
+	cp	(hl)
+	jr	z, .notFound	; end of table
+	ex	af, af'
+	inc	hl		; next char
+	jr	.loop
+.notFound:
+	ex	af, af'			; restore orig A
+	inc	a			; unset Z
+.found:
+	; Z already set
+	pop	hl	; <-- lvl 1
+	ret
+
+.exprChars:
+	.db	"+-*/%&|^{}", 0
+
diff --git a/tools/tests/unit/common.asm b/tools/tests/unit/common.asm
index 4377619..2139009 100644
--- a/tools/tests/unit/common.asm
+++ b/tools/tests/unit/common.asm
@@ -51,6 +51,24 @@ assertEQW:
 .msg:
 	.db	"HL != DE", CR, LF, 0
 
+; Given a list of pointer to test data structures in HL and a pointer to a test
+; routine in IX, call (IX) with HL pointing to the test structure until the list
+; points to a zero. See testParseExpr in test_expr for an example usage.
+testList:
+	push	hl		; --> lvl 1
+	call	intoHL
+	ld	a, h
+	or	l
+	jr	z, .end
+	call	callIX
+	call	nexttest
+	pop	hl		; <-- lvl 1
+	inc	hl \ inc hl
+	jr	testList
+.end:
+	pop	hl		; <-- lvl 1
+	ret
+
 nexttest:
 	ld	a, (testNum)
 	inc	a
diff --git a/tools/tests/unit/test_expr.asm b/tools/tests/unit/test_expr.asm
index 1eaa6b5..7ba3f36 100644
--- a/tools/tests/unit/test_expr.asm
+++ b/tools/tests/unit/test_expr.asm
@@ -109,35 +109,18 @@ test:
 	halt
 
 testParseExpr:
-	ld	iy, .t1
-	call	.testEQ
-	ld	iy, .t2
-	call	.testEQ
-	ld	iy, .t3
-	call	.testEQ
-	ld	iy, .t4
-	call	.testEQ
-	ld	iy, .t5
-	call	.testEQ
-	ld	iy, .t6
-	call	.testEQ
-	ld	iy, .t7
-	call	.testEQ
-	ld	iy, .t8
-	call	.testEQ
-	ld	iy, .t9
-	call	.testEQ
-	ret
+	ld	hl, .alltests
+	ld	ix, .test
+	jp	testList
 
-.testEQ:
-	push	iy \ pop hl
+.test:
+	push	hl \ pop iy
 	inc	hl \ inc hl
 	call	parseExpr
 	call	assertZ
 	ld	l, (iy)
 	ld	h, (iy+1)
-	call	assertEQW
-	jp	nexttest
+	jp	assertEQW
 
 .t1:
 	.dw	7
@@ -166,3 +149,13 @@ testParseExpr:
 .t9:
 	.dw	10
 	.db	"2*3+4", 0
+
+; There was this untested regression during the replacement of find-and-subst
+; parseExpr to the recursive descent one. It was time consuming to find. Here
+; it goes, here it stays.
+.t10:
+	.dw	'-'+1
+	.db	"'-'+1", 0
+
+.alltests:
+	.dw	.t1, .t2, .t3, .t4, .t5, .t6, .t7, .t8, .t9, .t10, 0
diff --git a/tools/tests/zasm/runtests.sh b/tools/tests/zasm/runtests.sh
index 983e5c9..50282b2 100755
--- a/tools/tests/zasm/runtests.sh
+++ b/tools/tests/zasm/runtests.sh
@@ -13,9 +13,9 @@ cmpas() {
         echo ok
     else
         echo actual
-        echo $ACTUAL
+        echo "$ACTUAL"
         echo expected
-        echo $EXPECTED
+        echo "$EXPECTED"
         exit 1
     fi
 }