From 03e529b762f6367ed995e189497bcf833fc3dc29 Mon Sep 17 00:00:00 2001
From: Virgil Dupras <hsoft@hardcoded.net>
Date: Mon, 9 Mar 2020 08:49:51 -0400
Subject: [PATCH] forth: simplify execution model and handle literals better

This scheme of "when we handle line-by-line, compile one word at a time then
execute" so that we could allow words like "CREATE" to call "readword" before
continuing was a bad scheme. It made things like branching outside of a colon
definition impossible.

This commit implement a new "litWord". When an undefined word is encountered at
compile time, it is included as-is in a string literal word. It is at run time
that we decide what to do with it.
---
 apps/forth/dict.asm       | 115 ++++++++++++++++------------
 apps/forth/dictionary.txt |   4 +-
 apps/forth/main.asm       |  38 +---------
 apps/forth/stack.asm      |  15 ++--
 apps/forth/util.asm       | 156 ++++++++++++++++++++++++++++++++++----
 5 files changed, 222 insertions(+), 106 deletions(-)

diff --git a/apps/forth/dict.asm b/apps/forth/dict.asm
index 48eaf61..06c569e 100644
--- a/apps/forth/dict.asm
+++ b/apps/forth/dict.asm
@@ -71,10 +71,27 @@ numberWord:
 NUMBER:
 	.dw	numberWord
 
+; Similarly to numberWord, this is not a real word, but a string literal.
+; Instead of being followed by a 2 bytes number, it's followed by a
+; null-terminated string. This is not expected to be called in a regular
+; context. Only words expecting those literals will look for them. This is why
+; the litWord triggers abort.
+litWord:
+	call	popRS
+	call	intoHL
+	call	printstr	; let's print the word before abort.
+	ld	hl, .msg
+	call	printstr
+	jp	abort
+.msg:
+	.db "undefined word", 0
+LIT:
+	.dw	litWord
 
 ; ( R:I -- )
 EXIT:
-	.db "EXIT", 0, 0, 0, 0
+	.db ";"
+	.fill 7
 	.dw 0
 	.dw nativeWord
 ; When we call the EXIT word, we have to do a "double exit" because our current
@@ -88,7 +105,7 @@ exit:
 	call	popRS
 	; We have a pointer to a word
 	push	hl \ pop iy
-	jr	compiledWord
+	jp	compiledWord
 
 ; ( R:I -- )
 QUIT:
@@ -96,31 +113,29 @@ QUIT:
 	.dw EXIT
 	.dw nativeWord
 quit:
-	ld	hl, FLAGS
-	set	FLAG_QUITTING, (hl)
-	jr	exit
+	jp	forthRdLine
 
 ABORT:
 	.db "ABORT", 0, 0, 0
 	.dw QUIT
 	.dw nativeWord
 abort:
+	; Reinitialize PS (RS is reinitialized in forthInterpret
 	ld	sp, (INITIAL_SP)
-	ld	hl, .msg
-	call	printstr
-	call	printcrlf
-	jr	quit
-.msg:
-	.db " err", 0
+	jp	forthRdLine
 
 BYE:
 	.db "BYE"
 	.fill 5
 	.dw ABORT
 	.dw nativeWord
-	ld	hl, FLAGS
-	set	FLAG_ENDPGM, (hl)
-	jp	exit
+	; Goodbye Forth! Before we go, let's restore the stack
+	ld	sp, (INITIAL_SP)
+	; unwind stack underflow buffer
+	pop	af \ pop af \ pop af
+	; success
+	xor	a
+	ret
 
 ; ( c -- )
 EMIT:
@@ -155,36 +170,44 @@ DEFINE:
 	.dw EXECUTE
 	.dw nativeWord
 	call	entryhead
-	jp	nz, quit
 	ld	de, compiledWord
 	ld	(hl), e
 	inc	hl
 	ld	(hl), d
 	inc	hl
-	push	hl \ pop iy
+	; At this point, we've processed the name literal following the ':'.
+	; What's next? We have, in IP, a pointer to words that *have already
+	; been compiled by INTERPRET*. All those bytes will be copied as-is.
+	; All we need to do is to know how many bytes to copy. To do so, we
+	; skip compwords until EXIT is reached.
+	ld	(HERE), hl	; where we write compwords.
+	; Let's save old RS TOS
+	ld	e, (ix)
+	ld	d, (ix+1)
 .loop:
-	call	readword
-	jr	nz, .end
-	call	.issemicol
-	jr	z, .end
-	call	compile
-	jp	nz, quit
+	call	RSIsEXIT
+	jr	z, .loopend
+	call	compSkip
 	jr	.loop
-.end:
-	; end chain with EXIT
-	ld	hl, EXIT+CODELINK_OFFSET
-	call	wrCompHL
-	ld	(HERE), iy
+.loopend:
+	; At this point, RS' TOS points to EXIT compword. We'll copy it too.
+	; We'll use LDIR. BC will be RSTOP-OLDRSTOP+2
+	ld	l, (ix)
+	ld	h, (ix+1)
+	inc	hl \ inc hl	; our +2
+	or	a		; clear carry
+	sbc	hl, de
+	ld	b, h
+	ld	c, l
+	; BC has proper count
+	ex	de, hl		; HL is our source (old RS' TOS)
+	ld	de, (HERE)	; and DE is our dest
+	ldir			; go!
+	; HL has our new RS' TOS
+	ld	(ix), l
+	ld	(ix+1), h
+	ld	(HERE), de	; update HERE
 	jp	exit
-.issemicol:
-	ld	a, (hl)
-	cp	';'
-	ret	nz
-	inc	hl
-	ld	a, (hl)
-	dec	hl
-	or	a
-	ret
 
 DOES:
 	.db "DOES>", 0, 0, 0
@@ -226,22 +249,17 @@ INTERPRET:
 	.dw KEY
 	.dw nativeWord
 interpret:
-	call	readword
-	jp	nz, quit
 	ld	iy, COMPBUF
+.loop:
+	call	readword
+	jr	nz, .end
 	call	compile
-	jp	nz, .notfound
-	ld	hl, EXIT+CODELINK_OFFSET
-	ld	(iy), l
-	ld	(iy+1), h
+	jr	.loop
+.end:
+	ld	hl, QUIT+CODELINK_OFFSET
+	call	wrCompHL
 	ld	iy, COMPBUF
 	jp	compiledWord
-.notfound:
-	ld	hl, .msg
-	call	printstr
-	jp	quit
-.msg:
-	.db	"not found", 0
 
 CREATE:
 	.db "CREATE", 0, 0
@@ -454,4 +472,3 @@ CONSTANT:
 	.dw DOES+CODELINK_OFFSET
 	.dw FETCH+CODELINK_OFFSET
 	.dw EXIT+CODELINK_OFFSET
-
diff --git a/apps/forth/dictionary.txt b/apps/forth/dictionary.txt
index bc2e7c3..748c520 100644
--- a/apps/forth/dictionary.txt
+++ b/apps/forth/dictionary.txt
@@ -17,7 +17,8 @@ DOES>: Used inside a colon definition that itself uses CREATE, DOES> transforms
 
 *** Native Words ***
 
-: x ... ;   --              Define a new word
+: x ...     --              Define a new word
+;           R:I --          Exit a colon definition
 .           n --            Print n in its decimal form
 @           a -- n          Set n to value at address a
 !           n a --          Store n in address a
@@ -30,7 +31,6 @@ DOES>       --              See description at top of file
 DUP         a -- a a
 EMIT        c --            Spit char c to stdout
 EXECUTE     a --            Execute word at addr a
-EXIT        R:I --          Exit a colon definition
 HERE        -- a            Push HERE's address
 QUIT        R:drop --       Return to interpreter promp immediately
 KEY         -- c            Get char c from stdin
diff --git a/apps/forth/main.asm b/apps/forth/main.asm
index 807b38f..6abcb02 100644
--- a/apps/forth/main.asm
+++ b/apps/forth/main.asm
@@ -7,47 +7,17 @@
 .equ	NAMELEN		8
 ; Offset of the code link relative to the beginning of the word
 .equ	CODELINK_OFFSET	10
-; When set, the interpreter should abort parsing of current line and return to
-; prompt.
-.equ	FLAG_QUITTING	0
-; When set, the interpreter should quit
-.equ	FLAG_ENDPGM	1
 
 ; *** Variables ***
 .equ	INITIAL_SP	FORTH_RAMSTART
 .equ	CURRENT		@+2
 .equ	HERE		@+2
 .equ	INPUTPOS	@+2
-.equ	FLAGS		@+2
 ; Buffer where we compile the current input line. Same size as STDIO_BUFSIZE.
-.equ	COMPBUF		@+1
+.equ	COMPBUF		@+2
 .equ	FORTH_RAMEND	@+0x40
 
 ; *** Code ***
-MAIN:
-	.dw compiledWord
-	.dw INTERPRET+CODELINK_OFFSET
-	.dw CHKEND
-
-; If FLAG_ENDPGM is set, stop the program, else, tweak the RS so that we loop.
-CHKEND:
-	.dw nativeWord
-	ld	hl, FLAGS
-	bit	FLAG_ENDPGM, (hl)
-	jr	nz, .endpgm
-	; not quitting program, are we supposed to continue parsing line?
-	ld	hl, FLAGS
-	bit	FLAG_QUITTING, (hl)
-	jr	nz, forthRdLine
-	; Not quitting line either.
-	jr	forthInterpret
-.endpgm:
-	ld	sp, (INITIAL_SP)
-	; restore stack
-	pop	af \ pop af \ pop af
-	xor	a
-	ret
-
 forthMain:
 	; STACK OVERFLOW PROTECTION:
 	; To avoid having to check for stack underflow after each pop operation
@@ -62,16 +32,14 @@ forthMain:
 	ld	hl, FORTH_RAMEND
 	ld	(HERE), hl
 forthRdLine:
-	xor	a
-	ld	(FLAGS), a
 	ld	hl, msgOk
 	call	printstr
 	call	printcrlf
 	call	stdioReadLine
 	ld	(INPUTPOS), hl
 forthInterpret:
-	ld	ix, RS_ADDR
-	ld	iy, MAIN
+	ld	ix, RS_ADDR-2		; -2 because we inc-before-push
+	ld	iy, INTERPRET+CODELINK_OFFSET
 	jp	executeCodeLink
 msgOk:
 	.db	" ok", 0
diff --git a/apps/forth/stack.asm b/apps/forth/stack.asm
index bbebba2..a17b728 100644
--- a/apps/forth/stack.asm
+++ b/apps/forth/stack.asm
@@ -3,25 +3,26 @@
 ; PS is the most frequently used. However, this causes a problem with routine
 ; calls: because in Forth, the stack isn't balanced within each call, our return
 ; offset, when placed by a CALL, messes everything up. This is one of the
-; reasons why we need stack management routines below.
+; reasons why we need stack management routines below. IX always points to RS'
+; Top Of Stack (TOS)
 ;
 ; This return stack contain "Interpreter pointers", that is a pointer to the
 ; address of a word, as seen in a compiled list of words.
 
 ; Push value HL to RS
 pushRS:
+	inc	ix
+	inc	ix
 	ld	(ix), l
-	inc	ix
-	ld	(ix), h
-	inc	ix
+	ld	(ix+1), h
 	ret
 
 ; Pop RS' TOS to HL
 popRS:
-	dec ix
-	ld	h, (ix)
-	dec ix
 	ld	l, (ix)
+	ld	h, (ix+1)
+	dec ix
+	dec ix
 	ret
 
 ; Verifies that SP is within bounds. If it's not, call ABORT
diff --git a/apps/forth/util.asm b/apps/forth/util.asm
index 7f93211..d0e8e2e 100644
--- a/apps/forth/util.asm
+++ b/apps/forth/util.asm
@@ -43,6 +43,126 @@ readword:
 	inc	a	; unset Z
 	ret
 
+RSIsDE:
+	push	hl
+	ld	l, (ix)
+	ld	h, (ix+1)
+	ld	a, (hl)
+	cp	e
+	jr	nz, .end	; no
+	inc	hl
+	ld	a, (hl)
+	cp	d		; Z has our answer
+.end:
+	pop	hl
+	ret
+
+
+; Is RS' TOS pointing to a NUMBER word?
+; Z if yes, NZ if no.
+RSIsNUMBER:
+	push	de
+	ld	de, NUMBER
+	call	RSIsDE
+	pop	de
+	ret
+
+; Is RS' TOS pointing to a LIT word?
+; Z if yes, NZ if no.
+RSIsLIT:
+	push	de
+	ld	de, LIT
+	call	RSIsDE
+	pop	de
+	ret
+
+; Is RS' TOS pointing to EXIT?
+; Z if yes, NZ if no.
+RSIsEXIT:
+	push	de
+	ld	de, EXIT+CODELINK_OFFSET
+	call	RSIsDE
+	pop	de
+	ret
+
+; Skip the compword where RS' TOS is currently pointing. If it's a regular word,
+; it's easy: we inc by 2. If it's a NUMBER, we inc by 4. If it's a LIT, we skip
+; to after null-termination.
+compSkip:
+	push	hl
+	ld	l, (ix)
+	ld	h, (ix+1)
+	; At the minimum, we skip by 2
+	inc	hl \ inc hl
+	call	RSIsNUMBER
+	jr	z, .isNum
+	call	RSIsLIT
+	jr	nz, .end	; A word
+	; We have a literal
+	call	strskip
+	inc	hl		; byte after word termination
+	jr	.end
+.isNum:
+	; skip by 4
+	inc	hl \ inc hl
+.end:
+	; HL is good, write it to RS
+	ld	(ix), l
+	ld	(ix+1), h
+	pop	hl
+	ret
+
+; Checks RS' TOS and, if it points to a string literal (LIT), makes HL point
+; to it and advance IP to byte following null-termination.
+; If it doesn't, things get interesting: If it's a word reference, then it's
+; not an invalid literal. For example, one could want to redefine an existing
+; word. So in that case, we'll copy the word's name on the pad (it might not be
+; null-terminated) and set HL to point to it.
+; How do we know that our reference is a word reference (it could be, for
+; example, a NUMBER reference)? We check that its address is more than QUIT, the
+; second word in our dict. We don't accept EXIT because it's the termination
+; word. Yeah, it means that ";" can't be overridden...
+; If name can't be read, we abort
+readCompWord:
+	; In all cases, we want RS' TOS in HL. Let's get it now.
+	ld	l, (ix)
+	ld	h, (ix+1)
+	call	RSIsLIT
+	jr	nz, .notLIT
+	; RS TOS is a LIT, make HL point to string, then skip this RS compword.
+	inc	hl \ inc hl	; HL now points to string itself
+	jr	compSkip
+.notLIT:
+	; Alright, not a literal, but is it a word? If it's not a number, then
+	; it's a word.
+	call	RSIsNUMBER
+	jr	z, .notWord
+	; Not a number, then it's a word. Copy word to pad and point to it.
+	call	intoHL
+	or	a		; clear carry
+	ld	de, CODELINK_OFFSET
+	sbc	hl, de
+	; That's our return value
+	push	hl		; --> lvl 1
+	; HL now points to word offset, let'd copy it to pad
+	ex	de, hl
+	call	pad
+	ex	de, hl
+	ld	bc, NAMELEN
+	ldir
+	; null-terminate
+	xor	a
+	ld	(de), a
+	call	compSkip
+	pop	hl		; <-- lvl 1
+	ret
+.notWord:
+	ld	hl, .msg
+	call	printstr
+	jp	abort
+.msg:
+	.db "word expected", 0
+
 ; For DE pointing to a dict entry, set DE to point to the previous entry.
 ; Z is set if DE point to 0 (no entry). NZ if not.
 prev:
@@ -84,38 +204,48 @@ wrCompHL:
 
 ; Compile word string at (HL) and write down its compiled version in IY,
 ; advancing IY to the byte next to the last written byte.
-; Set Z on success, unset on failure.
 compile:
 	call	find
 	jr	nz, .maybeNum
-	ret	nz
 	; DE is a word offset, we need a code link
 	ld	hl, CODELINK_OFFSET
 	add	hl, de
 	xor	a	; set Z
 	jr	wrCompHL
 .maybeNum:
+	push	hl		; --> lvl 1. save string addr
 	call	parseLiteral
-	ret	nz
+	jr	nz, .undef
+	pop	hl		; <-- lvl 1
 	; a valid number!
 	ld	hl, NUMBER
 	call	wrCompHL
 	ex	de, hl		; number in HL
 	jr	wrCompHL
-	ret	z
-	; unknown name
-	ld	hl, .msg
-	call	printstr
-	jp	abort
-.msg:
-	.db "unknown name", 0
+.undef:
+	; When encountering an undefined word during compilation, we spit a
+	; reference to litWord, followed by the null-terminated word.
+	; This way, if a preceding word expect a string literal, it will read it
+	; by calling readCompWord, and if it doesn't, the routine will be
+	; called, triggering an abort.
+	ld	hl, LIT
+	call	wrCompHL
+	pop	hl		; <-- lvl 1. recall string addr
+.writeLit:
+	ld	a, (hl)
+	ld	(iy), a
+	inc	hl
+	inc	iy
+	or	a
+	jr	nz, .writeLit
+	ret
+
 
 ; Spit name + prev in (HERE) and adjust (HERE) and (CURRENT)
 ; HL points to new (HERE)
-; Set Z if name could be read, NZ if not
 entryhead:
-	call	readword
-	ret	nz
+	call	readCompWord
+	call	printstr
 	ld	de, (HERE)
 	call	strcpy
 	ex	de, hl		; (HERE) now in HL