forth: make readCompWord read from RS' BOS instead of TOS

Previous approach was broken with regards to defined word using CREATE. Also, reduce name length by one to make space for a new flags field for "immediate" (which isn't used yet).
forth: clarify the meaning of "wordref"
2025-02-19 10:06:03 +11:00 · 2020-03-09 19:50:51 -04:00 · 2020-03-09 15:12:44 -04:00 · 2020-03-09 14:19:51 -04:00 · 2020-03-09 14:14:26 -04:00
5 changed files with 435 additions and 205 deletions
--- a/apps/forth/dict.asm
+++ b/apps/forth/dict.asm
@ -1,5 +1,6 @@
 ; A dictionary entry has this structure:
-; - 8b name (zero-padded)
+; - 7b name (zero-padded)
 ; - 1b flags (bit 0: IMMEDIATE)
 ; - 2b prev pointer
 ; - 2b code pointer
 ; - Parameter field (PF)
@ -13,11 +14,8 @@
 nativeWord:
 	jp	(iy)
-; Execute a compiled word containing a list of references to other words,
+; Execute a list of atoms, which usually ends with EXIT.
-; usually ended by a reference to EXIT.
+; IY points to that list.
 ; A reference to a word in a compiledWord section is *not* a direct reference,
 ; but a word+CODELINK_OFFSET reference. Therefore, for a code link "link",
 ; (link) is the routine to call.
 compiledWord:
 	push	iy \ pop hl
 	inc	hl
@ -53,6 +51,69 @@ doesWord:
 	push	hl \ pop iy
 	jr	compiledWord
 ; The IF word checks the stack for zero. If it's non-zero, it does nothing and
 ; allow compiledWord to continue.
 ; If it's zero, it tracksback RS, advance it until it finds a ELSE, a THEN, or
 ; an EXIT (not supposed to happen unless the IF is misconstructed). Whether
 ; it's a ELSE or THEN, the same thing happens: we resume execution after the
 ; ELSE/THEN. If it's a EXIT, we simply execute it.
 ifWord:
 	pop	hl
 	ld	a, h
 	or	l
 	jp	nz, exit	; non-zero, continue
 	; Zero, seek ELSE, THEN or EXIT. Continue to elseWord
 ; If a ELSE word is executed, it means that the preceding IF had a non-zero
 ; condition and continued execution. This means that upon encountering an ELSE,
 ; we must search for a THEN or an EXIT.
 ; To simplify implementation and share code with ifWord, we also match ELSE,
 ; which is only possible in malformed construct. Therefore "IF ELSE ELSE" is
 ; valid and interpreted as "IF ELSE THEN".
 elseWord:
 	; to save processing, we test EXIT, ELSE and THEN in the order they
 	; appear, address-wise. This way, we don't need to push/pop HL: we can
 	; SUB the difference between the words and check for zeroes.
 	call	popRS
 	; We need to save that IP somewhere. Let it be BC
 	ld	b, h
 	ld	c, l
 .loop:
 	; Whether there's a match or not, we will resume the operation at IP+2,
 	; which means that we have to increase BC anyways. Let's do it now.
 	inc	bc \ inc bc
 	call	intoHL
 	or	a		; clear carry
 	ld	de, EXIT
 	sbc	hl, de
 	jp	z, exit
 	; Not EXIT, let's continue with ELSE. No carry possible because EXIT
 	; is first word. No need to clear.
 	ld	de, ELSE-EXIT
 	sbc	hl, de
 	jr	c, .nomatch	; A word between EXIT and ELSE. No match.
 	jr	z, .match	; We have a ELSE
 	; Let's try with THEN. Again, no carry possible, C cond was handled.
 	ld	de, THEN-ELSE
 	sbc	hl, de
 	jr	z, .match	; We have a THEN
 .nomatch:
 	; Nothing matched, which means that we need to continue looking.
 	; BC is already IP+2
 	ld	h, b
 	ld	l, c
 	jr	.loop
 .match:
 	; Matched a ELSE or a THEN, which means we need to continue executing
 	; word from IP+2, which is already in BC.
 	push	bc \ pop iy
 	jp	compiledWord
 ; This word does nothing. It's never going to be executed unless the wordlist
 ; is misconstructed.
 thenWord:
 	jp	exit
 ; This is not a word, but a number literal. This works a bit differently than
 ; others: PF means nothing and the actual number is placed next to the
 ; numberWord reference in the compiled word list. What we need to do to fetch
@ -71,11 +132,28 @@ numberWord:
 NUMBER:
 	.dw	numberWord
 ; Similarly to numberWord, this is not a real word, but a string literal.
 ; Instead of being followed by a 2 bytes number, it's followed by a
 ; null-terminated string. This is not expected to be called in a regular
 ; context. Only words expecting those literals will look for them. This is why
 ; the litWord triggers abort.
 litWord:
 	call	popRS
 	call	intoHL
 	call	printstr	; let's print the word before abort.
 	ld	hl, .msg
 	call	printstr
 	jp	abort
 .msg:
 	.db "undefined word", 0
 LIT:
 	.dw	litWord
 ; ( R:I -- )
-EXIT:
+	.db ";"
-	.db "EXIT", 0, 0, 0, 0
+	.fill 7
 	.dw 0
 EXIT:
 	.dw nativeWord
 ; When we call the EXIT word, we have to do a "double exit" because our current
 ; Interpreter pointer is pointing to the word *next* to our EXIT reference when,
@ -88,44 +166,45 @@ exit:
 	call	popRS
 	; We have a pointer to a word
 	push	hl \ pop iy
-	jr	compiledWord
+	jp	compiledWord
 ; ( R:I -- )
-QUIT:
+	.db "QUIT"
-	.db "QUIT", 0, 0, 0, 0
+	.fill 4
 	.dw EXIT
 QUIT:
 	.dw nativeWord
 quit:
-	ld	hl, FLAGS
+	jp	forthRdLine
 	set	FLAG_QUITTING, (hl)
 	jr	exit
-ABORT:
+	.db "ABORT"
-	.db "ABORT", 0, 0, 0
+	.fill 3
 	.dw QUIT
 ABORT:
 	.dw nativeWord
 abort:
 	; Reinitialize PS (RS is reinitialized in forthInterpret
 	ld	sp, (INITIAL_SP)
-	ld	hl, .msg
+	jp	forthRdLine
 	call	printstr
 	call	printcrlf
 	jr	quit
 .msg:
 	.db " err", 0
 BYE:
 	.db "BYE"
 	.fill 5
 	.dw ABORT
 BYE:
 	.dw nativeWord
-	ld	hl, FLAGS
+	; Goodbye Forth! Before we go, let's restore the stack
-	set	FLAG_ENDPGM, (hl)
+	ld	sp, (INITIAL_SP)
-	jp	exit
+	; unwind stack underflow buffer
 	pop	af \ pop af \ pop af
 	; success
 	xor	a
 	ret
 ; ( c -- )
-EMIT:
+	.db "EMIT"
-	.db "EMIT", 0, 0, 0, 0
+	.fill 4
 	.dw BYE
 EMIT:
 	.dw nativeWord
 	pop	hl
 	ld	a, l
@ -133,13 +212,12 @@ EMIT:
 	jp	exit
 ; ( addr -- )
-EXECUTE:
+	.db "EXECUTE"
-	.db "EXECUTE", 0
+	.db 0
 	.dw EMIT
 EXECUTE:
 	.dw nativeWord
-	pop	iy	; Points to word_offset
+	pop	iy	; is a wordref
 	ld	de, CODELINK_OFFSET
 	add	iy, de
 executeCodeLink:
 	ld	l, (iy)
 	ld	h, (iy+1)
@ -149,46 +227,54 @@ executeCodeLink:
 	; IY points to PFA
 	jp	(hl)	; go!
 DEFINE:
 	.db ":"
 	.fill 7
 	.dw EXECUTE
 DEFINE:
 	.dw nativeWord
 	call	entryhead
 	jp	nz, quit
 	ld	de, compiledWord
 	ld	(hl), e
 	inc	hl
 	ld	(hl), d
 	inc	hl
-	push	hl \ pop iy
+	; At this point, we've processed the name literal following the ':'.
 	; What's next? We have, in IP, a pointer to words that *have already
 	; been compiled by INTERPRET*. All those bytes will be copied as-is.
 	; All we need to do is to know how many bytes to copy. To do so, we
 	; skip compwords until EXIT is reached.
 	ld	(HERE), hl	; where we write compwords.
 	ld	l, (ix)
 	ld	h, (ix+1)
 .loop:
-	call	readword
+	call	HLPointsEXIT
-	jr	nz, .end
+	jr	z, .loopend
-	call	.issemicol
+	call	compSkip
 	jr	z, .end
 	call	compile
 	jp	nz, quit
 	jr	.loop
-.end:
+.loopend:
-	; end chain with EXIT
+	; At this point, HL points to EXIT compword. We'll copy it too.
-	ld	hl, EXIT+CODELINK_OFFSET
+	; We'll use LDIR. BC will be RSTOP-OLDRSTOP+2
-	call	wrCompHL
+	ld	e, (ix)
-	ld	(HERE), iy
+	ld	d, (ix+1)
 	inc	hl \ inc hl	; our +2
 	or	a		; clear carry
 	sbc	hl, de
 	ld	b, h
 	ld	c, l
 	; BC has proper count
 	ex	de, hl		; HL is our source (old RS' TOS)
 	ld	de, (HERE)	; and DE is our dest
 	ldir			; go!
 	; HL has our new RS' TOS
 	ld	(ix), l
 	ld	(ix+1), h
 	ld	(HERE), de	; update HERE
 	jp	exit
 .issemicol:
 	ld	a, (hl)
 	cp	';'
 	ret	nz
 	inc	hl
 	ld	a, (hl)
 	dec	hl
 	or	a
 	ret
-DOES:
+	.db "DOES>"
-	.db "DOES>", 0, 0, 0
+	.fill 3
 	.dw DEFINE
 DOES:
 	.dw nativeWord
 	; We run this when we're in an entry creation context. Many things we
 	; need to do.
@ -199,8 +285,6 @@ DOES:
 	; 3. exit. Because we've already popped RS, a regular exit will abort
 	;    colon definition, so we're good.
 	ld	iy, (CURRENT)
 	ld	de, CODELINK_OFFSET
 	add	iy, de
 	ld	hl, doesWord
 	call	wrCompHL
 	inc	iy \ inc iy		; cell variable space
@ -210,10 +294,10 @@ DOES:
 	jp	exit
 ; ( -- c )
 KEY:
 	.db "KEY"
 	.fill 5
 	.dw DOES
 KEY:
 	.dw nativeWord
 	call	stdioGetC
 	ld	h, 0
@ -221,31 +305,28 @@ KEY:
 	push	hl
 	jp	exit
-INTERPRET:
+	.db "INTERPR"
-	.db "INTERPRE"
+	.db 0
 	.dw KEY
 INTERPRET:
 	.dw nativeWord
 interpret:
 	call	readword
 	jp	nz, quit
 	ld	iy, COMPBUF
 .loop:
 	call	readword
 	jr	nz, .end
 	call	compile
-	jp	nz, .notfound
+	jr	.loop
-	ld	hl, EXIT+CODELINK_OFFSET
+.end:
-	ld	(iy), l
+	ld	hl, QUIT
-	ld	(iy+1), h
+	call	wrCompHL
 	ld	iy, COMPBUF
 	jp	compiledWord
 .notfound:
 	ld	hl, .msg
 	call	printstr
 	jp	quit
 .msg:
 	.db	"not found", 0
-CREATE:
+	.db "CREATE"
-	.db "CREATE", 0, 0
+	.fill 2
 	.dw INTERPRET
 CREATE:
 	.dw nativeWord
 	call	entryhead
 	jp	nz, quit
@ -257,24 +338,25 @@ CREATE:
 	ld	(HERE), hl
 	jp	exit
 HERE_:	; Caution: conflicts with actual variable name
 	.db "HERE"
 	.fill 4
 	.dw CREATE
 HERE_:	; Caution: conflicts with actual variable name
 	.dw sysvarWord
 	.dw HERE
-CURRENT_:
+	.db "CURRENT"
-	.db "CURRENT", 0
+	.db 0
 	.dw HERE_
 CURRENT_:
 	.dw sysvarWord
 	.dw CURRENT
 ; ( n -- )
 DOT:
 	.db "."
 	.fill 7
 	.dw CURRENT_
 DOT:
 	.dw nativeWord
 	pop	de
 	; We check PS explicitly because it doesn't look nice to spew gibberish
@ -286,10 +368,10 @@ DOT:
 	jp	exit
 ; ( n a -- )
 STORE:
 	.db "!"
 	.fill 7
 	.dw DOT
 STORE:
 	.dw nativeWord
 	pop	iy
 	pop	hl
@ -298,10 +380,10 @@ STORE:
 	jp	exit
 ; ( a -- n )
 FETCH:
 	.db "@"
 	.fill 7
 	.dw STORE
 FETCH:
 	.dw nativeWord
 	pop	hl
 	call	intoHL
@ -309,10 +391,10 @@ FETCH:
 	jp	exit
 ; ( a b -- b a )
 SWAP:
 	.db "SWAP"
 	.fill 4
 	.dw FETCH
 SWAP:
 	.dw nativeWord
 	pop	hl
 	ex	(sp), hl
@ -320,10 +402,10 @@ SWAP:
 	jp	exit
 ; ( a -- a a )
 DUP:
 	.db "DUP"
 	.fill 5
 	.dw SWAP
 DUP:
 	.dw nativeWord
 	pop	hl
 	push	hl
@ -331,10 +413,10 @@ DUP:
 	jp	exit
 ; ( a b -- a b a )
 OVER:
 	.db "OVER"
 	.fill 4
 	.dw DUP
 OVER:
 	.dw nativeWord
 	pop	hl	; B
 	pop	de	; A
@ -344,10 +426,10 @@ OVER:
 	jp	exit
 ; ( a b -- c ) A + B
 PLUS:
 	.db "+"
 	.fill 7
 	.dw OVER
 PLUS:
 	.dw nativeWord
 	pop	hl
 	pop	de
@ -356,10 +438,10 @@ PLUS:
 	jp	exit
 ; ( a b -- c ) A - B
 MINUS:
 	.db "-"
 	.fill 7
 	.dw PLUS
 MINUS:
 	.dw nativeWord
 	pop	de		; B
 	pop	hl		; A
@ -369,10 +451,10 @@ MINUS:
 	jp	exit
 ; ( a b -- c ) A * B
 MULT:
 	.db "*"
 	.fill 7
 	.dw MINUS
 MULT:
 	.dw nativeWord
 	pop	de
 	pop	bc
@ -381,10 +463,10 @@ MULT:
 	jp	exit
 ; ( a b -- c ) A / B
 DIV:
 	.db "/"
 	.fill 7
 	.dw MULT
 DIV:
 	.dw nativeWord
 	pop	de
 	pop	hl
@ -392,66 +474,86 @@ DIV:
 	push	bc
 	jp	exit
 	.db "IF"
 	.fill 6
 	.dw DIV
 IF:
 	.dw ifWord
 	.db "ELSE"
 	.fill 4
 	.dw IF
 ELSE:
 	.dw elseWord
 	.db "THEN"
 	.fill 4
 	.dw ELSE
 THEN:
 	.dw thenWord
 ; End of native words
 ; ( a -- )
 ; @ .
 FETCHDOT:
 	.db "?"
 	.fill 7
-	.dw DIV
+	.dw THEN
 FETCHDOT:
 	.dw compiledWord
-	.dw FETCH+CODELINK_OFFSET
+	.dw FETCH
-	.dw DOT+CODELINK_OFFSET
+	.dw DOT
-	.dw EXIT+CODELINK_OFFSET
+	.dw EXIT
 ; ( n a -- )
 ; SWAP OVER @ + SWAP !
 STOREINC:
 	.db "+!"
 	.fill 6
 	.dw FETCHDOT
 STOREINC:
 	.dw compiledWord
-	.dw SWAP+CODELINK_OFFSET
+	.dw SWAP
-	.dw OVER+CODELINK_OFFSET
+	.dw OVER
-	.dw FETCH+CODELINK_OFFSET
+	.dw FETCH
-	.dw PLUS+CODELINK_OFFSET
+	.dw PLUS
-	.dw SWAP+CODELINK_OFFSET
+	.dw SWAP
-	.dw STORE+CODELINK_OFFSET
+	.dw STORE
-	.dw EXIT+CODELINK_OFFSET
+	.dw EXIT
 ; ( n -- )
 ; HERE +!
-ALLOT:
+	.db "ALLOT"
-	.db "ALLOT", 0, 0, 0
+	.fill 3
 	.dw STOREINC
 ALLOT:
 	.dw compiledWord
-	.dw HERE_+CODELINK_OFFSET
+	.dw HERE_
-	.dw STOREINC+CODELINK_OFFSET
+	.dw STOREINC
-	.dw EXIT+CODELINK_OFFSET
+	.dw EXIT
 ; CREATE 2 ALLOT
-VARIABLE:
+	.db "VARIABL"
-	.db "VARIABLE"
+	.db 0
 	.dw ALLOT
 VARIABLE:
 	.dw compiledWord
-	.dw CREATE+CODELINK_OFFSET
+	.dw CREATE
 	.dw NUMBER
 	.dw 2
-	.dw ALLOT+CODELINK_OFFSET
+	.dw ALLOT
-	.dw EXIT+CODELINK_OFFSET
+	.dw EXIT
 ; ( n -- )
 ; CREATE HERE @ ! DOES> @
-CONSTANT:
+	.db "CONSTAN"
-	.db "CONSTANT"
+	.db 0
 	.dw VARIABLE
 CONSTANT:
 	.dw compiledWord
-	.dw CREATE+CODELINK_OFFSET
+	.dw CREATE
-	.dw HERE_+CODELINK_OFFSET
+	.dw HERE_
-	.dw FETCH+CODELINK_OFFSET
+	.dw FETCH
-	.dw STORE+CODELINK_OFFSET
+	.dw STORE
-	.dw DOES+CODELINK_OFFSET
+	.dw DOES
-	.dw FETCH+CODELINK_OFFSET
+	.dw FETCH
-	.dw EXIT+CODELINK_OFFSET
+	.dw EXIT
--- a/apps/forth/dictionary.txt
+++ b/apps/forth/dictionary.txt
@ -1,23 +1,34 @@
 Stack notation: "<stack before> -- <stack after>". Rightmost is top of stack
-                (TOS). For example, in "a b -- c d", b is TOS before, d is TOS
+(TOS). For example, in "a b -- c d", b is TOS before, d is TOS after. "R:" means
-                after. "R:" means that the Return Stack is modified.
+that the Return Stack is modified.
 DOES>: Used inside a colon definition that itself uses CREATE, DOES> transforms
-       that newly created word into a "does cell", that is, a regular cell (
+that newly created word into a "does cell", that is, a regular cell ( when
-       when called, puts the cell's addr on PS), but right after that, it
+called, puts the cell's addr on PS), but right after that, it executes words
-       executes words that appear after the DOES>.
+that appear after the DOES>.
-       "does cells" always allocate 4 bytes (2 for the cell, 2 for the DOES>
+"does cells" always allocate 4 bytes (2 for the cell, 2 for the DOES> link) and
-       link) and there is no need for ALLOT in colon definition.
+there is no need for ALLOT in colon definition.
-       At compile time, colon definition stops processing words when reaching
+At compile time, colon definition stops processing words when reaching the
-       the DOES>.
+DOES>.
-       Example: ": CONSTANT CREATE HERE @ ! DOES> @ ;"
+Example: ": CONSTANT CREATE HERE @ ! DOES> @ ;"
 Word references (wordref): When we say we have a "word reference", it's a
 pointer to a words *code link*. For example, the label "PLUS:" in this unit is a
 word reference. Why not refer to the beginning of the word struct? Because we
 actually seldom refer to the name and prev link, except during compilation, so
 defining "word reference" this way makes the code easier to understand.
 Atom: A word of the type compiledWord contains, in its PF, a list of what we
 call "atoms". Those atoms are most of the time word references, but they can
 also be references to NUMBER and LIT.
 *** Native Words ***
-: x ... ;   --              Define a new word
+: x ...     --              Define a new word
 ;           R:I --          Exit a colon definition
 .           n --            Print n in its decimal form
@           a -- n          Set n to value at address a
 !           n a --          Store n in address a
@ -26,18 +37,22 @@ DOES>: Used inside a colon definition that itself uses CREATE, DOES> transforms
 *           a b -- c        a * b -> c
 /           a b -- c        a / b -> c
 CREATE x    --              Create cell named x. Doesn't allocate a PF.
 CURRENT     -- n            Set n to wordref of last added entry.
 DOES>       --              See description at top of file
 DUP         a -- a a
 ELSE        --              Branch to THEN
 EMIT        c --            Spit char c to stdout
-EXECUTE     a --            Execute word at addr a
+EXECUTE     a --            Execute wordref at addr a
 EXIT        R:I --          Exit a colon definition
 HERE        -- a            Push HERE's address
 IF          n --            Branch to ELSE or THEN if n is zero
 QUIT        R:drop --       Return to interpreter promp immediately
 KEY         -- c            Get char c from stdin
 INTERPRET   --              Get a line from stdin, compile it in tmp memory,
                            then execute the compiled contents.
 OVER        a b -- a b a
 SWAP        a b -- b a
 THEN        --              Does nothing. Serves as a branching merker for IF
                            and ELSE.
 *** Core-but-Forth Words ***
--- a/apps/forth/main.asm
+++ b/apps/forth/main.asm
@ -4,50 +4,20 @@
 ; Number of bytes we keep as a padding between HERE and the scratchpad
 .equ	PADDING		0x20
 ; Max length of dict entry names
-.equ	NAMELEN		8
+.equ	NAMELEN		7
 ; Offset of the code link relative to the beginning of the word
-.equ	CODELINK_OFFSET	10
+.equ	CODELINK_OFFSET	NAMELEN+3
 ; When set, the interpreter should abort parsing of current line and return to
 ; prompt.
 .equ	FLAG_QUITTING	0
 ; When set, the interpreter should quit
 .equ	FLAG_ENDPGM	1
 ; *** Variables ***
 .equ	INITIAL_SP	FORTH_RAMSTART
 .equ	CURRENT		@+2
 .equ	HERE		@+2
 .equ	INPUTPOS	@+2
 .equ	FLAGS		@+2
 ; Buffer where we compile the current input line. Same size as STDIO_BUFSIZE.
-.equ	COMPBUF		@+1
+.equ	COMPBUF		@+2
 .equ	FORTH_RAMEND	@+0x40
 ; *** Code ***
 MAIN:
 	.dw compiledWord
 	.dw INTERPRET+CODELINK_OFFSET
 	.dw CHKEND
 ; If FLAG_ENDPGM is set, stop the program, else, tweak the RS so that we loop.
 CHKEND:
 	.dw nativeWord
 	ld	hl, FLAGS
 	bit	FLAG_ENDPGM, (hl)
 	jr	nz, .endpgm
 	; not quitting program, are we supposed to continue parsing line?
 	ld	hl, FLAGS
 	bit	FLAG_QUITTING, (hl)
 	jr	nz, forthRdLine
 	; Not quitting line either.
 	jr	forthInterpret
 .endpgm:
 	ld	sp, (INITIAL_SP)
 	; restore stack
 	pop	af \ pop af \ pop af
 	xor	a
 	ret
 forthMain:
 	; STACK OVERFLOW PROTECTION:
 	; To avoid having to check for stack underflow after each pop operation
@ -62,16 +32,14 @@ forthMain:
 	ld	hl, FORTH_RAMEND
 	ld	(HERE), hl
 forthRdLine:
 	xor	a
 	ld	(FLAGS), a
 	ld	hl, msgOk
 	call	printstr
 	call	printcrlf
 	call	stdioReadLine
 	ld	(INPUTPOS), hl
 forthInterpret:
-	ld	ix, RS_ADDR
+	ld	ix, RS_ADDR-2		; -2 because we inc-before-push
-	ld	iy, MAIN
+	ld	iy, INTERPRET
 	jp	executeCodeLink
 msgOk:
 	.db	" ok", 0
--- a/apps/forth/stack.asm
+++ b/apps/forth/stack.asm
@ -3,25 +3,26 @@
 ; PS is the most frequently used. However, this causes a problem with routine
 ; calls: because in Forth, the stack isn't balanced within each call, our return
 ; offset, when placed by a CALL, messes everything up. This is one of the
-; reasons why we need stack management routines below.
+; reasons why we need stack management routines below. IX always points to RS'
 ; Top Of Stack (TOS)
 ;
 ; This return stack contain "Interpreter pointers", that is a pointer to the
 ; address of a word, as seen in a compiled list of words.
 ; Push value HL to RS
 pushRS:
 	inc	ix
 	inc	ix
 	ld	(ix), l
-	inc	ix
+	ld	(ix+1), h
 	ld	(ix), h
 	inc	ix
 	ret
 ; Pop RS' TOS to HL
 popRS:
 	dec ix
 	ld	h, (ix)
 	dec ix
 	ld	l, (ix)
 	ld	h, (ix+1)
 	dec ix
 	dec ix
 	ret
 ; Verifies that SP is within bounds. If it's not, call ABORT
--- a/apps/forth/util.asm
+++ b/apps/forth/util.asm
@ -43,14 +43,136 @@ readword:
 	inc	a	; unset Z
 	ret
-; For DE pointing to a dict entry, set DE to point to the previous entry.
+; Sets Z if (HL) == E and (HL+1) == D
 HLPointsDE:
 	ld	a, (hl)
 	cp	e
 	ret	nz		; no
 	inc	hl
 	ld	a, (hl)
 	dec	hl
 	cp	d		; Z has our answer
 	ret
 HLPointsNUMBER:
 	push	de
 	ld	de, NUMBER
 	call	HLPointsDE
 	pop	de
 	ret
 HLPointsLIT:
 	push	de
 	ld	de, LIT
 	call	HLPointsDE
 	pop	de
 	ret
 HLPointsEXIT:
 	push	de
 	ld	de, EXIT
 	call	HLPointsDE
 	pop	de
 	ret
 ; Skip the compword where HL is currently pointing. If it's a regular word,
 ; it's easy: we inc by 2. If it's a NUMBER, we inc by 4. If it's a LIT, we skip
 ; to after null-termination.
 compSkip:
 	call	HLPointsNUMBER
 	jr	z, .isNum
 	call	HLPointsLIT
 	jr	nz, .isWord
 	; We have a literal
 	inc	hl \ inc hl
 	call	strskip
 	inc	hl		; byte after word termination
 	ret
 .isNum:
 	; skip by 4
 	inc	hl \ inc hl
 	; continue to isWord
 .isWord:
 	; skip by 2
 	inc	hl \ inc hl
 	ret
 ; The goal of this routine is to read a string literal following the currently
 ; executed words. For example, CREATE and DEFINE need this. Things are a little
 ; twisted, so bear with me while I explain how it works.
 ;
 ; When we call this routine, everything has been compiled. We're on an atom and
 ; we're executing it. Now, we're looking for a string literal or a word-with-a
 ; name that follows our readCompWord caller. We could think that this word is
 ; right there on RS' TOS, but no! You have to account for words wrapping the
 ; caller. For example, "VARIABLE" calls "CREATE". If you call "VARIABLE foo",
 ; if CREATE looks at what follows in RS' TOS, it will only find the "2" in
 ; "CREATE 2 ALLOT".
 ;
 ; Therefore, we actually need to check in RS' *bottom of stack* for our answer.
 ; If that atom is a LIT, we're good. We make HL point to it and advance IP to
 ; byte following null-termination.
 ;
 ; If it isn't, things get interesting: If it's a word reference, then it's
 ; not an invalid literal. For example, one could want to redefine an existing
 ; word. So in that case, we'll copy the word's name on the pad (it might not be
 ; null-terminated) and set HL to point to it.
 ; How do we know that our reference is a word reference (it could be, for
 ; example, a NUMBER reference)? We check that its address is more than QUIT, the
 ; second word in our dict. We don't accept EXIT because it's the termination
 ; word. Yeah, it means that ";" can't be overridden...
 ; If name can't be read, we abort
 readCompWord:
 	; In all cases, we want RS' BOS in HL. Let's get it now.
 	ld	hl, (RS_ADDR)
 	call	HLPointsLIT
 	jr	nz, .notLIT
 	; RS BOS is a LIT, make HL point to string, then skip this RS compword.
 	inc	hl \ inc hl	; HL now points to string itself
 	push	hl		; --> lvl 1, our result
 	call	strskip
 	inc	hl		; byte after word termination
 	ld	(RS_ADDR), hl
 	pop	hl		; <-- lvl 1, our result
 	ret
 .notLIT:
 	; Alright, not a literal, but is it a word? If it's not a number, then
 	; it's a word.
 	call	HLPointsNUMBER
 	jr	z, .notWord
 	; Not a number, then it's a word. Copy word to pad and point to it.
 	call	intoHL
 	or	a		; clear carry
 	ld	de, CODELINK_OFFSET
 	sbc	hl, de
 	; That's our return value
 	push	hl		; --> lvl 1
 	; HL now points to word offset, let'd copy it to pad
 	ex	de, hl
 	call	pad
 	ex	de, hl
 	ld	bc, NAMELEN
 	ldir
 	; null-terminate
 	xor	a
 	ld	(de), a
 	; Advance RS' BOS by 2
 	ld	hl, RS_ADDR
 	inc	(hl) \ inc (hl)
 	pop	hl		; <-- lvl 1
 	ret
 .notWord:
 	ld	hl, .msg
 	call	printstr
 	jp	abort
 .msg:
 	.db "word expected", 0
 ; For DE being a wordref, move DE to the previous wordref.
 ; Z is set if DE point to 0 (no entry). NZ if not.
 prev:
-	push	hl		; --> lvl 1
+	dec	de \ dec de	; prev field
 	ld	hl, NAMELEN	; prev field offset
 	add	hl, de
 	ex	de, hl
 	pop	hl		; <-- lvl 1
 	call	intoDE
 	; DE points to prev. Is it zero?
 	xor	a
@ -63,15 +185,28 @@ prev:
 ; point to that entry.
 ; Z if found, NZ if not.
 find:
 	push	hl
 	push	bc
 	ld	de, (CURRENT)
 	ld	bc, CODELINK_OFFSET
 .inner:
 	; DE is a wordref, let's go to beginning of struct
 	push	de		; --> lvl 1
 	or	a		; clear carry
 	ex	de, hl
 	sbc	hl, bc
 	ex	de, hl		; We're good, DE points to word name
 	ld	a, NAMELEN
 	call	strncmp
-	ret	z		; found
+	pop	de		; <-- lvl 1, return to wordref
 	jr	z, .end		; found
 	call	prev
 	jr	nz, .inner
 	; Z set? end of dict unset Z
 	inc	a
 .end:
 	pop	bc
 	pop	hl
 	ret
 ; Write compiled data from HL into IY, advancing IY at the same time.
@ -84,49 +219,58 @@ wrCompHL:
 ; Compile word string at (HL) and write down its compiled version in IY,
 ; advancing IY to the byte next to the last written byte.
 ; Set Z on success, unset on failure.
 compile:
 	call	find
 	jr	nz, .maybeNum
-	ret	nz
+	ex	de, hl
 	; DE is a word offset, we need a code link
 	ld	hl, CODELINK_OFFSET
 	add	hl, de
 	xor	a	; set Z
 	jr	wrCompHL
 .maybeNum:
 	push	hl		; --> lvl 1. save string addr
 	call	parseLiteral
-	ret	nz
+	jr	nz, .undef
 	pop	hl		; <-- lvl 1
 	; a valid number!
 	ld	hl, NUMBER
 	call	wrCompHL
 	ex	de, hl		; number in HL
 	jr	wrCompHL
-	ret	z
+.undef:
-	; unknown name
+	; When encountering an undefined word during compilation, we spit a
-	ld	hl, .msg
+	; reference to litWord, followed by the null-terminated word.
-	call	printstr
+	; This way, if a preceding word expect a string literal, it will read it
-	jp	abort
+	; by calling readCompWord, and if it doesn't, the routine will be
-.msg:
+	; called, triggering an abort.
-	.db "unknown name", 0
+	ld	hl, LIT
 	call	wrCompHL
 	pop	hl		; <-- lvl 1. recall string addr
 .writeLit:
 	ld	a, (hl)
 	ld	(iy), a
 	inc	hl
 	inc	iy
 	or	a
 	jr	nz, .writeLit
 	ret
 ; Spit name + prev in (HERE) and adjust (HERE) and (CURRENT)
 ; HL points to new (HERE)
 ; Set Z if name could be read, NZ if not
 entryhead:
-	call	readword
+	call	readCompWord
 	ret	nz
 	ld	de, (HERE)
 	call	strcpy
 	ex	de, hl		; (HERE) now in HL
 	ld	de, (CURRENT)
 	ld	(CURRENT), hl
 	ld	a, NAMELEN
 	call	addHL
 	xor	a		; IMMED
 	ld	(hl), a
 	inc	hl
 	ld	(hl), e
 	inc	hl
 	ld	(hl), d
 	inc	hl
 	ld	(CURRENT), hl
 	ld	(HERE), hl
 	xor	a		; set Z
 	ret
Author	SHA1	Message	Date
Virgil Dupras	03bd9ee39b	forth: make readCompWord read from RS' BOS instead of TOS Previous approach was broken with regards to defined word using CREATE. Also, reduce name length by one to make space for a new flags field for "immediate" (which isn't used yet).	2020-03-09 19:50:51 -04:00
Virgil Dupras	0e8af3cea4	forth: clarify the meaning of "wordref" Also, make entry labels in dict.asm be wordref instead of entry ref.	2020-03-09 15:12:44 -04:00
Virgil Dupras	e8a4768304	forth: add words "IF", "ELSE", "THEN"	2020-03-09 14:19:51 -04:00
Virgil Dupras	03e529b762	forth: simplify execution model and handle literals better This scheme of "when we handle line-by-line, compile one word at a time then execute" so that we could allow words like "CREATE" to call "readword" before continuing was a bad scheme. It made things like branching outside of a colon definition impossible. This commit implement a new "litWord". When an undefined word is encountered at compile time, it is included as-is in a string literal word. It is at run time that we decide what to do with it.	2020-03-09 14:14:26 -04:00