From c3838714d5590960d3c97705cd6cd6bda193d13b Mon Sep 17 00:00:00 2001
From: Virgil Dupras <hsoft@hardcoded.net>
Date: Fri, 13 Mar 2020 16:01:09 -0400
Subject: [PATCH] forth: improve execution model

My approach with RS was slightly wrong: RS' TOP was always containing current
IP. It worked, but it was problematic when came the time to introduce
RS-modifying words: it's impossible to modify RS in a word without immediately
messing your flow.

Therefore, what used to be RS' TOS has to be a variable that isn't changed
midway by RS-modifying words. I guess that's why RS is called *return* stack...
---
 apps/forth/dict.asm | 180 +++++++++++++++++++++-----------------------
 apps/forth/main.asm |  45 ++++++++---
 apps/forth/util.asm |  24 ++++--
 emul/.gitignore     |   1 +
 emul/Makefile       |   3 +
 emul/forth/stage1.c |  16 +++-
 6 files changed, 159 insertions(+), 110 deletions(-)

diff --git a/apps/forth/dict.asm b/apps/forth/dict.asm
index a4066f0..189f994 100644
--- a/apps/forth/dict.asm
+++ b/apps/forth/dict.asm
@@ -7,43 +7,64 @@
 ;
 ; The code pointer point to "word routines". These routines expect to be called
 ; with IY pointing to the PF. They themselves are expected to end by jumping
-; to the address at the top of the Return Stack. They will usually do so with
-; "jp exit".
+; to the address at (IP). They will usually do so with "jp next".
 ;
 ; That's for "regular" words (words that are part of the dict chain). There are
 ; also "special words", for example NUMBER, LIT, FBR, that have a slightly
 ; different structure. They're also a pointer to an executable, but as for the
 ; other fields, the only one they have is the "flags" field.
 
+; This routine is jumped to at the end of every word. In it, we jump to current
+; IP, but we also take care of increasing it my 2 before jumping
+next:
+	; Before we continue: are stacks within bounds?
+	call	chkPS
+	ld	de, (IP)
+	ld	h, d
+	ld	l, e
+	inc	de \ inc de
+	ld	(IP), de
+	; HL is an atom list pointer. We need to go into it to have a wordref
+	ld	e, (hl)
+	inc	hl
+	ld	d, (hl)
+	push	de
+	jp	EXECUTE+2
+
+
 ; Execute a word containing native code at its PF address (PFA)
 nativeWord:
 	jp	(iy)
 
-; Execute a list of atoms, which usually ends with EXIT.
-; IY points to that list.
+; Execute a list of atoms, which always end with EXIT.
+; IY points to that list. What do we do:
+; 1. Push current IP to RS
+; 2. Set new IP to the second atom of the list
+; 3. Execute the first atom of the list.
 compiledWord:
+	ld	hl, (IP)
+	call	pushRS
 	push	iy \ pop hl
 	inc	hl
 	inc	hl
-	; HL points to next Interpreter pointer.
-	call	pushRS
+	ld	(IP), hl
+	; IY still is our atom reference...
 	ld	l, (iy)
 	ld	h, (iy+1)
-	push	hl \ pop iy
-	; IY points to code link
-	jp	executeCodeLink
+	push	hl	; argument for EXECUTE
+	jp	EXECUTE+2
 
 ; Pushes the PFA directly
 cellWord:
 	push	iy
-	jp	exit
+	jp	next
 
 ; Pushes the address in the first word of the PF
 sysvarWord:
 	ld	l, (iy)
 	ld	h, (iy+1)
 	push	hl
-	jp	exit
+	jp	next
 
 ; The word was spawned from a definition word that has a DOES>. PFA+2 (right
 ; after the actual cell) is a link to the slot right after that DOES>.
@@ -59,20 +80,16 @@ doesWord:
 ; This is not a word, but a number literal. This works a bit differently than
 ; others: PF means nothing and the actual number is placed next to the
 ; numberWord reference in the compiled word list. What we need to do to fetch
-; that number is to play with the Return stack: We pop it, read the number, push
-; it to the Parameter stack and then push an increase Interpreter Pointer back
-; to RS.
+; that number is to play with the IP.
 numberWord:
-	ld	l, (ix)
-	ld	h, (ix+1)
+	ld	hl, (IP)	; (HL) is out number
 	ld	e, (hl)
 	inc	hl
 	ld	d, (hl)
 	inc	hl
-	ld	(ix), l
-	ld	(ix+1), h
+	ld	(IP), hl	; advance IP by 2
 	push	de
-	jp	exit
+	jp	next
 
 	.db	0b10		; Flags
 NUMBER:
@@ -84,8 +101,7 @@ NUMBER:
 ; context. Only words expecting those literals will look for them. This is why
 ; the litWord triggers abort.
 litWord:
-	call	popRS
-	call	intoHL
+	ld	hl, (IP)
 	call	printstr	; let's print the word before abort.
 	ld	hl, .msg
 	call	printstr
@@ -97,24 +113,16 @@ litWord:
 LIT:
 	.dw	litWord
 
+; Pop previous IP from Return stack and execute it.
 ; ( R:I -- )
 	.db ";"
 	.fill 7
 	.dw 0
 EXIT:
 	.dw nativeWord
-; When we call the EXIT word, we have to do a "double exit" because our current
-; Interpreter pointer is pointing to the word *next* to our EXIT reference when,
-; in fact, we want to continue processing the one above it.
 	call	popRS
-exit:
-	; Before we continue: is SP within bounds?
-	call	chkPS
-	; we're good
-	call	popRS
-	; We have a pointer to a word
-	push	hl \ pop iy
-	jp	compiledWord
+	ld	(IP), hl
+	jp	next
 
 ; ( R:I -- )
 	.db "QUIT"
@@ -133,9 +141,9 @@ quit:
 ABORT:
 	.dw nativeWord
 abort:
-	; Reinitialize PS (RS is reinitialized in forthInterpret
+	; Reinitialize PS (RS is reinitialized in forthInterpret)
 	ld	sp, (INITIAL_SP)
-	jp	forthRdLine
+	jp	forthRdLineNoOk
 ABORTREF:
 	.dw ABORT
 
@@ -163,7 +171,7 @@ EMIT:
 	pop	hl
 	ld	a, l
 	call	stdioPutC
-	jp	exit
+	jp	next
 
 ; ( c port -- )
 	.db "PC!"
@@ -175,7 +183,7 @@ PSTORE:
 	pop	bc
 	pop	hl
 	out	(c), l
-	jp	exit
+	jp	next
 
 ; ( port -- c )
 	.db "PC@"
@@ -188,7 +196,7 @@ PFETCH:
 	ld	h, 0
 	in	l, (c)
 	push	hl
-	jp	exit
+	jp	next
 
 ; ( addr -- )
 	.db "EXECUTE"
@@ -214,10 +222,7 @@ DEFINE:
 	.dw nativeWord
 	call	entryhead
 	ld	de, compiledWord
-	ld	(hl), e
-	inc	hl
-	ld	(hl), d
-	inc	hl
+	call	DEinHL
 	; At this point, we've processed the name literal following the ':'.
 	; What's next? We have, in IP, a pointer to words that *have already
 	; been compiled by INTERPRET*. All those bytes will be copied as-is.
@@ -225,8 +230,7 @@ DEFINE:
 	; skip compwords until EXIT is reached.
 	ex	de, hl		; DE is our dest
 	ld	(HERE), de	; update HERE
-	ld	l, (ix)
-	ld	h, (ix+1)
+	ld	hl, (IP)
 .loop:
 	call	HLPointsEXIT
 	jr	z, .loopend
@@ -236,22 +240,19 @@ DEFINE:
 	; skip EXIT
 	inc	hl \ inc hl
 	; We have out end offset. Let's get our offset
-	ld	e, (ix)
-	ld	d, (ix+1)
+	ld	de, (IP)
 	or	a		; clear carry
 	sbc	hl, de
 	; HL is our copy count.
 	ld	b, h
 	ld	c, l
-	ld	l, (ix)
-	ld	h, (ix+1)
+	ld	hl, (IP)
 	ld	de, (HERE)	; recall dest
 	; copy!
 	ldir
-	ld	(ix), l
-	ld	(ix+1), h
+	ld	(IP), hl
 	ld	(HERE), de
-	jp	exit
+	jp	next
 
 
 	.db "DOES>"
@@ -264,18 +265,17 @@ DOES:
 	; need to do.
 	; 1. Change the code link to doesWord
 	; 2. Leave 2 bytes for regular cell variable.
-	; 3. Get the Interpreter pointer from the stack and write this down to
-	;    entry PFA+2.
-	; 3. exit. Because we've already popped RS, a regular exit will abort
-	;    colon definition, so we're good.
+	; 3. Write down IP+2 to entry.
+	; 3. exit. we're done here.
 	ld	iy, (CURRENT)
 	ld	hl, doesWord
 	call	wrCompHL
 	inc	iy \ inc iy		; cell variable space
-	call	popRS
+	ld	hl, (IP)
+	inc	hl \ inc hl
 	call	wrCompHL
 	ld	(HERE), iy
-	jp	exit
+	jp	EXIT+2
 
 
 	.db "IMMEDIA"
@@ -286,7 +286,7 @@ IMMEDIATE:
 	ld	hl, (CURRENT)
 	dec	hl
 	set	FLAG_IMMED, (hl)
-	jp	exit
+	jp	next
 
 ; ( n -- )
 	.db "LITERAL"
@@ -300,7 +300,7 @@ LITERAL:
 	pop	de		; number from stack
 	call	DEinHL
 	ld	(HERE), hl
-	jp	exit
+	jp	next
 
 
 	.db	"'"
@@ -313,7 +313,7 @@ APOS:
 	call	find
 	jr	nz, .notfound
 	push	de
-	jp	exit
+	jp	next
 .notfound:
 	ld	hl, .msg
 	call	printstr
@@ -337,7 +337,7 @@ APOSI:
 	pop	de		; <-- lvl 1
 	call	DEinHL
 	ld	(HERE), hl
-	jp	exit
+	jp	next
 .notfound:
 	ld	hl, .msg
 	call	printstr
@@ -356,7 +356,7 @@ KEY:
 	ld	h, 0
 	ld	l, a
 	push	hl
-	jp	exit
+	jp	next
 
 	.db "CREATE"
 	.fill 1
@@ -365,14 +365,13 @@ KEY:
 CREATE:
 	.dw nativeWord
 	call	entryhead
-	jp	nz, quit
 	ld	de, cellWord
 	ld	(hl), e
 	inc	hl
 	ld	(hl), d
 	inc	hl
 	ld	(HERE), hl
-	jp	exit
+	jp	next
 
 	.db "HERE"
 	.fill 3
@@ -403,7 +402,7 @@ DOT:
 	call	pad
 	call	fmtDecimalS
 	call	printstr
-	jp	exit
+	jp	next
 
 ; ( n a -- )
 	.db "!"
@@ -416,7 +415,7 @@ STORE:
 	pop	hl
 	ld	(iy), l
 	ld	(iy+1), h
-	jp	exit
+	jp	next
 
 ; ( n a -- )
 	.db "C!"
@@ -428,7 +427,7 @@ CSTORE:
 	pop	hl
 	pop	de
 	ld	(hl), e
-	jp	exit
+	jp	next
 
 ; ( a -- n )
 	.db "@"
@@ -440,7 +439,7 @@ FETCH:
 	pop	hl
 	call	intoHL
 	push	hl
-	jp	exit
+	jp	next
 
 ; ( a -- c )
 	.db "C@"
@@ -453,7 +452,7 @@ CFETCH:
 	ld	l, (hl)
 	ld	h, 0
 	push	hl
-	jp	exit
+	jp	next
 
 ; ( -- a )
 	.db "LIT@"
@@ -464,7 +463,7 @@ LITFETCH:
 	.dw nativeWord
 	call	readLITTOS
 	push	hl
-	jp	exit
+	jp	next
 
 ; ( a b -- b a )
 	.db "SWAP"
@@ -476,7 +475,7 @@ SWAP:
 	pop	hl
 	ex	(sp), hl
 	push	hl
-	jp	exit
+	jp	next
 
 ; ( a b c d -- c d a b )
 	.db "2SWAP"
@@ -493,7 +492,7 @@ SWAP2:
 	push	de		; D
 	push	hl		; A
 	push	bc		; B
-	jp	exit
+	jp	next
 
 ; ( a -- a a )
 	.db "DUP"
@@ -505,7 +504,7 @@ DUP:
 	pop	hl
 	push	hl
 	push	hl
-	jp	exit
+	jp	next
 
 ; ( a b -- a b a b )
 	.db "2DUP"
@@ -520,7 +519,7 @@ DUP2:
 	push	hl
 	push	de
 	push	hl
-	jp	exit
+	jp	next
 
 ; ( a b -- a b a )
 	.db "OVER"
@@ -534,7 +533,7 @@ OVER:
 	push	de
 	push	hl
 	push	de
-	jp	exit
+	jp	next
 
 ; ( a b c d -- a b c d a b )
 	.db "2OVER"
@@ -553,7 +552,7 @@ OVER2:
 	push	hl	; D
 	push	iy	; A
 	push	bc	; B
-	jp	exit
+	jp	next
 
 ; ( a b -- c ) A + B
 	.db "+"
@@ -566,7 +565,7 @@ PLUS:
 	pop	de
 	add	hl, de
 	push	hl
-	jp	exit
+	jp	next
 
 ; ( a b -- c ) A - B
 	.db "-"
@@ -580,7 +579,7 @@ MINUS:
 	or	a		; reset carry
 	sbc	hl, de
 	push	hl
-	jp	exit
+	jp	next
 
 ; ( a b -- c ) A * B
 	.db "*"
@@ -593,7 +592,7 @@ MULT:
 	pop	bc
 	call	multDEBC
 	push	hl
-	jp	exit
+	jp	next
 
 ; ( a b -- c ) A / B
 	.db "/"
@@ -606,7 +605,7 @@ DIV:
 	pop	hl
 	call	divide
 	push	bc
-	jp	exit
+	jp	next
 
 ; ( a1 a2 -- b )
 	.db "SCMP"
@@ -620,7 +619,7 @@ SCMP:
 	call	strcmp
 	call	flagsToBC
 	push	bc
-	jp	exit
+	jp	next
 
 ; ( n1 n2 -- f )
 	.db "CMP"
@@ -635,7 +634,7 @@ CMP:
 	sbc	hl, de
 	call	flagsToBC
 	push	bc
-	jp	exit
+	jp	next
 
 ; This word's atom is followed by 1b *relative* offset (to the cell's addr) to
 ; where to branch to. For example, The branching cell of "IF THEN" would
@@ -647,14 +646,12 @@ CMP:
 FBR:
 	.dw	nativeWord
 	push	de
-	ld	l, (ix)
-	ld	h, (ix+1)
+	ld	hl, (IP)
 	ld	a, (hl)
 	call	addHL
-	ld	(ix), l
-	ld	(ix+1), h
+	ld	(IP), hl
 	pop	de
-	jp	exit
+	jp	next
 
 ; Conditional branch, only branch if TOS is zero
 	.db	"(fbr?)"
@@ -668,12 +665,10 @@ FBRC:
 	or	l
 	jr	z, FBR+2
 	; skip next byte in RS
-	ld	l, (ix)
-	ld	h, (ix+1)
+	ld	hl, (IP)
 	inc	hl
-	ld	(ix), l
-	ld	(ix+1), h
-	jp	exit
+	ld	(IP), hl
+	jp	next
 
 
 	.db "RECURSE"
@@ -682,9 +677,8 @@ FBRC:
 RECURSE:
 	.dw nativeWord
 	call	popRS
-	ld	l, (ix)
-	ld	h, (ix+1)
 	dec	hl \ dec hl
+	ld	(IP), hl
 	push	hl \ pop iy
 	jp	compiledWord
 
diff --git a/apps/forth/main.asm b/apps/forth/main.asm
index a57a10a..3c758e9 100644
--- a/apps/forth/main.asm
+++ b/apps/forth/main.asm
@@ -20,6 +20,8 @@
 .equ	CURRENT		@+2
 .equ	HERE		@+2
 .equ	OLDHERE		@+2
+; Interpreter pointer. See Execution model comment below.
+.equ	IP		@+2
 ; Pointer to where we currently are in the interpretation of the current line.
 .equ	INPUTPOS	@+2
 ; Buffer where we compile the current input line. Same size as STDIO_BUFSIZE.
@@ -59,6 +61,23 @@
 ;
 ; During IMMEDIATE mode, (HERE) is temporarily set to COMPBUF, and when we're
 ; done, we restore (HERE) for runtime. This way, everyone is happy.
+;
+; EXECUTING A WORD
+;
+; At it's core, executing a word is having the wordref in IY and call
+; executeCodeLink. Then, we let the word do its things. Some words are special,
+; but most of them are of the compiledWord type, and that's their execution that
+; we describe here.
+;
+; First of all, at all time during execution, the Interpreter Pointer (IP)
+; points to the wordref we're executing next.
+;
+; When we execute a compiledWord, the first thing we do is push IP to the Return
+; Stack (RS). Therefore, RS' top of stack will contain a wordref to execute
+; next, after we EXIT.
+;
+; At the end of every compiledWord is an EXIT. This pops RS, sets IP to it, and
+; continues.
 
 ; *** Code ***
 forthMain:
@@ -82,10 +101,14 @@ forthMain:
 forthRdLine:
 	ld	hl, msgOk
 	call	printstr
+forthRdLineNoOk:
 	call	printcrlf
 	call	stdioReadLine
-	ld	ix, RS_ADDR-2		; -2 because we inc-before-push
 	ld	(INPUTPOS), hl
+	; Setup return stack. As a safety net, we set its bottom to ABORTREF.
+	ld	hl, ABORTREF
+	ld	(RS_ADDR), hl
+	ld	ix, RS_ADDR
 	; We're about to compile the line and possibly execute IMMEDIATE words.
 	; Let's save current (HERE) and temporarily set it to COMPBUF.
 	ld	hl, (HERE)
@@ -128,17 +151,23 @@ forthInterpret:
 	ld	(HERE), de
 	jr	forthInterpret
 .immed:
-	push	hl		; --> lvl 1
+	push	hl		; --> For EXECUTE
 	ld	hl, .retRef
-	call	pushRS
-	pop	iy		; <-- lvl 1
-	jp	executeCodeLink
+	ld	(IP), hl
+	jp	EXECUTE+2
 .execute:
 	ld	de, QUIT
 	call	.writeDE
 	; Compilation done, let's restore (HERE) and execute!
 	ld	hl, (OLDHERE)
 	ld	(HERE), hl
+	; before we execute, let's play with our RS a bit: compiledWord is
+	; going to push (IP) on the RS, but we don't expect our compiled words
+	; to ever return: it ends with QUIT. Let's set (IP) to ABORTREF and
+	; IX to RS_ADDR-2 so that compiledWord re-pushes our safety net.
+	ld	hl, ABORTREF
+	ld	(IP), hl
+	ld	ix, RS_ADDR-2
 	ld	iy, COMPBUF
 	jp	compiledWord
 .writeDE:
@@ -153,10 +182,8 @@ forthInterpret:
 	ret
 
 .retRef:
-	.dw $+2
-	.dw $+2
-	call	popRS
-	jr	forthInterpret
+	.dw	$+2
+	.dw	forthInterpret
 
 msgOk:
 	.db	" ok", 0
diff --git a/apps/forth/util.asm b/apps/forth/util.asm
index de04b5e..0455141 100644
--- a/apps/forth/util.asm
+++ b/apps/forth/util.asm
@@ -202,20 +202,31 @@ readLIT:
 	.db "word expected", 0
 
 readLITBOS:
+	; Before we start: is our RS empty? If IX == RS_ADDR, it is (it only has
+	; its safety net). When that happens, we actually want to run readLITTOS
+	push	hl
 	push	de
-	ld	hl, (RS_ADDR)
+	push	ix \ pop hl
+	ld	de, RS_ADDR
+	or	a		; clear carry
+	sbc	hl, de
+	pop	de
+	pop	hl
+	jr	z, readLITTOS
+	push	de
+	; Our bottom-of-stack is RS_ADDR+2 because RS_ADDR is occupied by our
+	; ABORTREF safety net.
+	ld	hl, (RS_ADDR+2)
 	call	readLIT
-	ld	(RS_ADDR), de
+	ld	(RS_ADDR+2), de
 	pop	de
 	ret
 
 readLITTOS:
 	push	de
-	ld	l, (ix)
-	ld	h, (ix+1)
+	ld	hl, (IP)
 	call	readLIT
-	ld	(ix), e
-	ld	(ix+1), d
+	ld	(IP), de
 	pop	de
 	ret
 
@@ -284,7 +295,6 @@ entryhead:
 	inc	hl
 	ld	(CURRENT), hl
 	ld	(HERE), hl
-	xor	a		; set Z
 	ret
 
 ; Sets Z if wordref at HL is of the IMMEDIATE type
diff --git a/emul/.gitignore b/emul/.gitignore
index 88d5e2c..344593e 100644
--- a/emul/.gitignore
+++ b/emul/.gitignore
@@ -1,5 +1,6 @@
 /shell/shell
 /forth/stage1
+/forth/stage1dbg
 /forth/forth
 /zasm/zasm
 /zasm/avra
diff --git a/emul/Makefile b/emul/Makefile
index 1ea7d1e..35b1e66 100644
--- a/emul/Makefile
+++ b/emul/Makefile
@@ -33,6 +33,9 @@ forth/forth0-bin.h: forth/forth0.bin
 forth/stage1: forth/stage1.c $(OBJS) forth/forth0-bin.h 
 	$(CC) forth/stage1.c $(OBJS) -o $@
 
+forth/stage1dbg: forth/stage1.c $(OBJS) forth/forth0-bin.h 
+	$(CC) -DDEBUG forth/stage1.c $(OBJS) -o $@
+
 forth/core.bin: $(APPS)/forth/core.fth forth/stage1
 	./forth/stage1 $(APPS)/forth/core.fth | tee $@ > /dev/null
 
diff --git a/emul/forth/stage1.c b/emul/forth/stage1.c
index a4c24c5..69f401f 100644
--- a/emul/forth/stage1.c
+++ b/emul/forth/stage1.c
@@ -20,6 +20,11 @@ directly follow executable's last byte so that we don't waste spce and also
 that wordref offsets correspond.
 */
 
+// When DEBUG is set, stage1 is a core-less forth that works interactively.
+// Useful for... debugging!
+// By the way: there's a double-echo in stagedbg. It's normal. Don't panic.
+
+//#define DEBUG
 // in sync with glue.asm
 #define RAMSTART 0x900
 #define STDIO_PORT 0x00
@@ -44,11 +49,17 @@ static uint8_t iord_stdio()
 static void iowr_stdio(uint8_t val)
 {
     // we don't output stdout in stage0
+#ifdef DEBUG
+    // ... unless we're in DEBUG mode!
+    putchar(val);
+#endif
 }
 
 int main(int argc, char *argv[])
 {
-    bool tty = false;
+#ifdef DEBUG
+    fp = stdin;
+#else
     if (argc == 2) {
         fp = fopen(argv[1], "r");
         if (fp == NULL) {
@@ -59,6 +70,7 @@ int main(int argc, char *argv[])
         fprintf(stderr, "Usage: ./stage0 filename\n");
         return 1;
     }
+#endif
     Machine *m = emul_init();
     m->ramstart = RAMSTART;
     m->iord[STDIO_PORT] = iord_stdio;
@@ -74,6 +86,7 @@ int main(int argc, char *argv[])
 
     fclose(fp);
 
+#ifndef DEBUG
     // We're done, now let's spit dict data
     // let's start with LATEST spitting.
     putchar(m->mem[CURRENT]);
@@ -82,6 +95,7 @@ int main(int argc, char *argv[])
     for (int i=sizeof(KERNEL); i<here; i++) {
         putchar(m->mem[i]);
     }
+#endif
     return 0;
 }