forth: simplify execution model and handle literals better

This scheme of "when we handle line-by-line, compile one word at a time then execute" so that we could allow words like "CREATE" to call "readword" before continuing was a bad scheme. It made things like branching outside of a colon definition impossible. This commit implement a new "litWord". When an undefined word is encountered at compile time, it is included as-is in a string literal word. It is at run time that we decide what to do with it.
2025-04-02 09:18:40 +11:00 · 2020-03-09 08:49:51 -04:00 · 2020-03-09 08:49:51 -04:00 · 03e529b762
commit 03e529b762
parent 5cadde557c
5 changed files with 222 additions and 106 deletions
--- a/apps/forth/dict.asm
+++ b/apps/forth/dict.asm
@ -71,10 +71,27 @@ numberWord:
 NUMBER:
 	.dw	numberWord
 ; Similarly to numberWord, this is not a real word, but a string literal.
 ; Instead of being followed by a 2 bytes number, it's followed by a
 ; null-terminated string. This is not expected to be called in a regular
 ; context. Only words expecting those literals will look for them. This is why
 ; the litWord triggers abort.
 litWord:
 	call	popRS
 	call	intoHL
 	call	printstr	; let's print the word before abort.
 	ld	hl, .msg
 	call	printstr
 	jp	abort
 .msg:
 	.db "undefined word", 0
 LIT:
 	.dw	litWord
 ; ( R:I -- )
 EXIT:
-	.db "EXIT", 0, 0, 0, 0
+	.db ";"
 	.fill 7
 	.dw 0
 	.dw nativeWord
 ; When we call the EXIT word, we have to do a "double exit" because our current
@ -88,7 +105,7 @@ exit:
 	call	popRS
 	; We have a pointer to a word
 	push	hl \ pop iy
-	jr	compiledWord
+	jp	compiledWord
 ; ( R:I -- )
 QUIT:
@ -96,31 +113,29 @@ QUIT:
 	.dw EXIT
 	.dw nativeWord
 quit:
-	ld	hl, FLAGS
+	jp	forthRdLine
 	set	FLAG_QUITTING, (hl)
 	jr	exit
 ABORT:
 	.db "ABORT", 0, 0, 0
 	.dw QUIT
 	.dw nativeWord
 abort:
 	; Reinitialize PS (RS is reinitialized in forthInterpret
 	ld	sp, (INITIAL_SP)
-	ld	hl, .msg
+	jp	forthRdLine
 	call	printstr
 	call	printcrlf
 	jr	quit
 .msg:
 	.db " err", 0
 BYE:
 	.db "BYE"
 	.fill 5
 	.dw ABORT
 	.dw nativeWord
-	ld	hl, FLAGS
+	; Goodbye Forth! Before we go, let's restore the stack
-	set	FLAG_ENDPGM, (hl)
+	ld	sp, (INITIAL_SP)
-	jp	exit
+	; unwind stack underflow buffer
 	pop	af \ pop af \ pop af
 	; success
 	xor	a
 	ret
 ; ( c -- )
 EMIT:
@ -155,36 +170,44 @@ DEFINE:
 	.dw EXECUTE
 	.dw nativeWord
 	call	entryhead
 	jp	nz, quit
 	ld	de, compiledWord
 	ld	(hl), e
 	inc	hl
 	ld	(hl), d
 	inc	hl
-	push	hl \ pop iy
+	; At this point, we've processed the name literal following the ':'.
 	; What's next? We have, in IP, a pointer to words that *have already
 	; been compiled by INTERPRET*. All those bytes will be copied as-is.
 	; All we need to do is to know how many bytes to copy. To do so, we
 	; skip compwords until EXIT is reached.
 	ld	(HERE), hl	; where we write compwords.
 	; Let's save old RS TOS
 	ld	e, (ix)
 	ld	d, (ix+1)
 .loop:
-	call	readword
+	call	RSIsEXIT
-	jr	nz, .end
+	jr	z, .loopend
-	call	.issemicol
+	call	compSkip
 	jr	z, .end
 	call	compile
 	jp	nz, quit
 	jr	.loop
-.end:
+.loopend:
-	; end chain with EXIT
+	; At this point, RS' TOS points to EXIT compword. We'll copy it too.
-	ld	hl, EXIT+CODELINK_OFFSET
+	; We'll use LDIR. BC will be RSTOP-OLDRSTOP+2
-	call	wrCompHL
+	ld	l, (ix)
-	ld	(HERE), iy
+	ld	h, (ix+1)
 	inc	hl \ inc hl	; our +2
 	or	a		; clear carry
 	sbc	hl, de
 	ld	b, h
 	ld	c, l
 	; BC has proper count
 	ex	de, hl		; HL is our source (old RS' TOS)
 	ld	de, (HERE)	; and DE is our dest
 	ldir			; go!
 	; HL has our new RS' TOS
 	ld	(ix), l
 	ld	(ix+1), h
 	ld	(HERE), de	; update HERE
 	jp	exit
 .issemicol:
 	ld	a, (hl)
 	cp	';'
 	ret	nz
 	inc	hl
 	ld	a, (hl)
 	dec	hl
 	or	a
 	ret
 DOES:
 	.db "DOES>", 0, 0, 0
@ -226,22 +249,17 @@ INTERPRET:
 	.dw KEY
 	.dw nativeWord
 interpret:
 	call	readword
 	jp	nz, quit
 	ld	iy, COMPBUF
 .loop:
 	call	readword
 	jr	nz, .end
 	call	compile
-	jp	nz, .notfound
+	jr	.loop
-	ld	hl, EXIT+CODELINK_OFFSET
+.end:
-	ld	(iy), l
+	ld	hl, QUIT+CODELINK_OFFSET
-	ld	(iy+1), h
+	call	wrCompHL
 	ld	iy, COMPBUF
 	jp	compiledWord
 .notfound:
 	ld	hl, .msg
 	call	printstr
 	jp	quit
 .msg:
 	.db	"not found", 0
 CREATE:
 	.db "CREATE", 0, 0
@ -454,4 +472,3 @@ CONSTANT:
 	.dw DOES+CODELINK_OFFSET
 	.dw FETCH+CODELINK_OFFSET
 	.dw EXIT+CODELINK_OFFSET
--- a/apps/forth/dictionary.txt
+++ b/apps/forth/dictionary.txt
@ -17,7 +17,8 @@ DOES>: Used inside a colon definition that itself uses CREATE, DOES> transforms
 *** Native Words ***
-: x ... ;   --              Define a new word
+: x ...     --              Define a new word
 ;           R:I --          Exit a colon definition
 .           n --            Print n in its decimal form
@           a -- n          Set n to value at address a
 !           n a --          Store n in address a
@ -30,7 +31,6 @@ DOES>       --              See description at top of file
 DUP         a -- a a
 EMIT        c --            Spit char c to stdout
 EXECUTE     a --            Execute word at addr a
 EXIT        R:I --          Exit a colon definition
 HERE        -- a            Push HERE's address
 QUIT        R:drop --       Return to interpreter promp immediately
 KEY         -- c            Get char c from stdin
--- a/apps/forth/main.asm
+++ b/apps/forth/main.asm
@ -7,47 +7,17 @@
 .equ	NAMELEN		8
 ; Offset of the code link relative to the beginning of the word
 .equ	CODELINK_OFFSET	10
 ; When set, the interpreter should abort parsing of current line and return to
 ; prompt.
 .equ	FLAG_QUITTING	0
 ; When set, the interpreter should quit
 .equ	FLAG_ENDPGM	1
 ; *** Variables ***
 .equ	INITIAL_SP	FORTH_RAMSTART
 .equ	CURRENT		@+2
 .equ	HERE		@+2
 .equ	INPUTPOS	@+2
 .equ	FLAGS		@+2
 ; Buffer where we compile the current input line. Same size as STDIO_BUFSIZE.
-.equ	COMPBUF		@+1
+.equ	COMPBUF		@+2
 .equ	FORTH_RAMEND	@+0x40
 ; *** Code ***
 MAIN:
 	.dw compiledWord
 	.dw INTERPRET+CODELINK_OFFSET
 	.dw CHKEND
 ; If FLAG_ENDPGM is set, stop the program, else, tweak the RS so that we loop.
 CHKEND:
 	.dw nativeWord
 	ld	hl, FLAGS
 	bit	FLAG_ENDPGM, (hl)
 	jr	nz, .endpgm
 	; not quitting program, are we supposed to continue parsing line?
 	ld	hl, FLAGS
 	bit	FLAG_QUITTING, (hl)
 	jr	nz, forthRdLine
 	; Not quitting line either.
 	jr	forthInterpret
 .endpgm:
 	ld	sp, (INITIAL_SP)
 	; restore stack
 	pop	af \ pop af \ pop af
 	xor	a
 	ret
 forthMain:
 	; STACK OVERFLOW PROTECTION:
 	; To avoid having to check for stack underflow after each pop operation
@ -62,16 +32,14 @@ forthMain:
 	ld	hl, FORTH_RAMEND
 	ld	(HERE), hl
 forthRdLine:
 	xor	a
 	ld	(FLAGS), a
 	ld	hl, msgOk
 	call	printstr
 	call	printcrlf
 	call	stdioReadLine
 	ld	(INPUTPOS), hl
 forthInterpret:
-	ld	ix, RS_ADDR
+	ld	ix, RS_ADDR-2		; -2 because we inc-before-push
-	ld	iy, MAIN
+	ld	iy, INTERPRET+CODELINK_OFFSET
 	jp	executeCodeLink
 msgOk:
 	.db	" ok", 0
--- a/apps/forth/stack.asm
+++ b/apps/forth/stack.asm
@ -3,25 +3,26 @@
 ; PS is the most frequently used. However, this causes a problem with routine
 ; calls: because in Forth, the stack isn't balanced within each call, our return
 ; offset, when placed by a CALL, messes everything up. This is one of the
-; reasons why we need stack management routines below.
+; reasons why we need stack management routines below. IX always points to RS'
 ; Top Of Stack (TOS)
 ;
 ; This return stack contain "Interpreter pointers", that is a pointer to the
 ; address of a word, as seen in a compiled list of words.
 ; Push value HL to RS
 pushRS:
 	inc	ix
 	inc	ix
 	ld	(ix), l
-	inc	ix
+	ld	(ix+1), h
 	ld	(ix), h
 	inc	ix
 	ret
 ; Pop RS' TOS to HL
 popRS:
 	dec ix
 	ld	h, (ix)
 	dec ix
 	ld	l, (ix)
 	ld	h, (ix+1)
 	dec ix
 	dec ix
 	ret
 ; Verifies that SP is within bounds. If it's not, call ABORT
--- a/apps/forth/util.asm
+++ b/apps/forth/util.asm
@ -43,6 +43,126 @@ readword:
 	inc	a	; unset Z
 	ret
 RSIsDE:
 	push	hl
 	ld	l, (ix)
 	ld	h, (ix+1)
 	ld	a, (hl)
 	cp	e
 	jr	nz, .end	; no
 	inc	hl
 	ld	a, (hl)
 	cp	d		; Z has our answer
 .end:
 	pop	hl
 	ret
 ; Is RS' TOS pointing to a NUMBER word?
 ; Z if yes, NZ if no.
 RSIsNUMBER:
 	push	de
 	ld	de, NUMBER
 	call	RSIsDE
 	pop	de
 	ret
 ; Is RS' TOS pointing to a LIT word?
 ; Z if yes, NZ if no.
 RSIsLIT:
 	push	de
 	ld	de, LIT
 	call	RSIsDE
 	pop	de
 	ret
 ; Is RS' TOS pointing to EXIT?
 ; Z if yes, NZ if no.
 RSIsEXIT:
 	push	de
 	ld	de, EXIT+CODELINK_OFFSET
 	call	RSIsDE
 	pop	de
 	ret
 ; Skip the compword where RS' TOS is currently pointing. If it's a regular word,
 ; it's easy: we inc by 2. If it's a NUMBER, we inc by 4. If it's a LIT, we skip
 ; to after null-termination.
 compSkip:
 	push	hl
 	ld	l, (ix)
 	ld	h, (ix+1)
 	; At the minimum, we skip by 2
 	inc	hl \ inc hl
 	call	RSIsNUMBER
 	jr	z, .isNum
 	call	RSIsLIT
 	jr	nz, .end	; A word
 	; We have a literal
 	call	strskip
 	inc	hl		; byte after word termination
 	jr	.end
 .isNum:
 	; skip by 4
 	inc	hl \ inc hl
 .end:
 	; HL is good, write it to RS
 	ld	(ix), l
 	ld	(ix+1), h
 	pop	hl
 	ret
 ; Checks RS' TOS and, if it points to a string literal (LIT), makes HL point
 ; to it and advance IP to byte following null-termination.
 ; If it doesn't, things get interesting: If it's a word reference, then it's
 ; not an invalid literal. For example, one could want to redefine an existing
 ; word. So in that case, we'll copy the word's name on the pad (it might not be
 ; null-terminated) and set HL to point to it.
 ; How do we know that our reference is a word reference (it could be, for
 ; example, a NUMBER reference)? We check that its address is more than QUIT, the
 ; second word in our dict. We don't accept EXIT because it's the termination
 ; word. Yeah, it means that ";" can't be overridden...
 ; If name can't be read, we abort
 readCompWord:
 	; In all cases, we want RS' TOS in HL. Let's get it now.
 	ld	l, (ix)
 	ld	h, (ix+1)
 	call	RSIsLIT
 	jr	nz, .notLIT
 	; RS TOS is a LIT, make HL point to string, then skip this RS compword.
 	inc	hl \ inc hl	; HL now points to string itself
 	jr	compSkip
 .notLIT:
 	; Alright, not a literal, but is it a word? If it's not a number, then
 	; it's a word.
 	call	RSIsNUMBER
 	jr	z, .notWord
 	; Not a number, then it's a word. Copy word to pad and point to it.
 	call	intoHL
 	or	a		; clear carry
 	ld	de, CODELINK_OFFSET
 	sbc	hl, de
 	; That's our return value
 	push	hl		; --> lvl 1
 	; HL now points to word offset, let'd copy it to pad
 	ex	de, hl
 	call	pad
 	ex	de, hl
 	ld	bc, NAMELEN
 	ldir
 	; null-terminate
 	xor	a
 	ld	(de), a
 	call	compSkip
 	pop	hl		; <-- lvl 1
 	ret
 .notWord:
 	ld	hl, .msg
 	call	printstr
 	jp	abort
 .msg:
 	.db "word expected", 0
 ; For DE pointing to a dict entry, set DE to point to the previous entry.
 ; Z is set if DE point to 0 (no entry). NZ if not.
 prev:
@ -84,38 +204,48 @@ wrCompHL:
 ; Compile word string at (HL) and write down its compiled version in IY,
 ; advancing IY to the byte next to the last written byte.
 ; Set Z on success, unset on failure.
 compile:
 	call	find
 	jr	nz, .maybeNum
 	ret	nz
 	; DE is a word offset, we need a code link
 	ld	hl, CODELINK_OFFSET
 	add	hl, de
 	xor	a	; set Z
 	jr	wrCompHL
 .maybeNum:
 	push	hl		; --> lvl 1. save string addr
 	call	parseLiteral
-	ret	nz
+	jr	nz, .undef
 	pop	hl		; <-- lvl 1
 	; a valid number!
 	ld	hl, NUMBER
 	call	wrCompHL
 	ex	de, hl		; number in HL
 	jr	wrCompHL
-	ret	z
+.undef:
-	; unknown name
+	; When encountering an undefined word during compilation, we spit a
-	ld	hl, .msg
+	; reference to litWord, followed by the null-terminated word.
-	call	printstr
+	; This way, if a preceding word expect a string literal, it will read it
-	jp	abort
+	; by calling readCompWord, and if it doesn't, the routine will be
-.msg:
+	; called, triggering an abort.
-	.db "unknown name", 0
+	ld	hl, LIT
 	call	wrCompHL
 	pop	hl		; <-- lvl 1. recall string addr
 .writeLit:
 	ld	a, (hl)
 	ld	(iy), a
 	inc	hl
 	inc	iy
 	or	a
 	jr	nz, .writeLit
 	ret
 ; Spit name + prev in (HERE) and adjust (HERE) and (CURRENT)
 ; HL points to new (HERE)
 ; Set Z if name could be read, NZ if not
 entryhead:
-	call	readword
+	call	readCompWord
-	ret	nz
+	call	printstr
 	ld	de, (HERE)
 	call	strcpy
 	ex	de, hl		; (HERE) now in HL