forth: make readCompWord read from RS' BOS instead of TOS

Previous approach was broken with regards to defined word using CREATE. Also, reduce name length by one to make space for a new flags field for "immediate" (which isn't used yet).
forth: clarify the meaning of "wordref"
2025-02-19 09:06:02 +11:00 · 2020-03-09 19:50:51 -04:00 · 2020-03-09 15:12:44 -04:00 · 2020-03-09 14:19:51 -04:00 · 2020-03-09 14:14:26 -04:00
5 changed files with 435 additions and 205 deletions
--- a/apps/forth/dict.asm
+++ b/apps/forth/dict.asm
@ -1,5 +1,6 @@
 ; A dictionary entry has this structure:
-; - 8b name (zero-padded)
+; - 7b name (zero-padded)
+; - 1b flags (bit 0: IMMEDIATE)
 ; - 2b prev pointer
 ; - 2b code pointer
 ; - Parameter field (PF)
@ -13,11 +14,8 @@
 nativeWord:
 	jp	(iy)

-; Execute a compiled word containing a list of references to other words,
-; usually ended by a reference to EXIT.
-; A reference to a word in a compiledWord section is *not* a direct reference,
-; but a word+CODELINK_OFFSET reference. Therefore, for a code link "link",
-; (link) is the routine to call.
+; Execute a list of atoms, which usually ends with EXIT.
+; IY points to that list.
 compiledWord:
 	push	iy \ pop hl
 	inc	hl
@ -53,6 +51,69 @@ doesWord:
 	push	hl \ pop iy
 	jr	compiledWord

+; The IF word checks the stack for zero. If it's non-zero, it does nothing and
+; allow compiledWord to continue.
+; If it's zero, it tracksback RS, advance it until it finds a ELSE, a THEN, or
+; an EXIT (not supposed to happen unless the IF is misconstructed). Whether
+; it's a ELSE or THEN, the same thing happens: we resume execution after the
+; ELSE/THEN. If it's a EXIT, we simply execute it.
+ifWord:
+	pop	hl
+	ld	a, h
+	or	l
+	jp	nz, exit	; non-zero, continue
+	; Zero, seek ELSE, THEN or EXIT. Continue to elseWord
+
+; If a ELSE word is executed, it means that the preceding IF had a non-zero
+; condition and continued execution. This means that upon encountering an ELSE,
+; we must search for a THEN or an EXIT.
+; To simplify implementation and share code with ifWord, we also match ELSE,
+; which is only possible in malformed construct. Therefore "IF ELSE ELSE" is
+; valid and interpreted as "IF ELSE THEN".
+elseWord:
+	; to save processing, we test EXIT, ELSE and THEN in the order they
+	; appear, address-wise. This way, we don't need to push/pop HL: we can
+	; SUB the difference between the words and check for zeroes.
+	call	popRS
+	; We need to save that IP somewhere. Let it be BC
+	ld	b, h
+	ld	c, l
+.loop:
+	; Whether there's a match or not, we will resume the operation at IP+2,
+	; which means that we have to increase BC anyways. Let's do it now.
+	inc	bc \ inc bc
+	call	intoHL
+	or	a		; clear carry
+	ld	de, EXIT
+	sbc	hl, de
+	jp	z, exit
+	; Not EXIT, let's continue with ELSE. No carry possible because EXIT
+	; is first word. No need to clear.
+	ld	de, ELSE-EXIT
+	sbc	hl, de
+	jr	c, .nomatch	; A word between EXIT and ELSE. No match.
+	jr	z, .match	; We have a ELSE
+	; Let's try with THEN. Again, no carry possible, C cond was handled.
+	ld	de, THEN-ELSE
+	sbc	hl, de
+	jr	z, .match	; We have a THEN
+.nomatch:
+	; Nothing matched, which means that we need to continue looking.
+	; BC is already IP+2
+	ld	h, b
+	ld	l, c
+	jr	.loop
+.match:
+	; Matched a ELSE or a THEN, which means we need to continue executing
+	; word from IP+2, which is already in BC.
+	push	bc \ pop iy
+	jp	compiledWord
+
+; This word does nothing. It's never going to be executed unless the wordlist
+; is misconstructed.
+thenWord:
+	jp	exit
+
 ; This is not a word, but a number literal. This works a bit differently than
 ; others: PF means nothing and the actual number is placed next to the
 ; numberWord reference in the compiled word list. What we need to do to fetch
@ -71,11 +132,28 @@ numberWord:
 NUMBER:
 	.dw	numberWord

+; Similarly to numberWord, this is not a real word, but a string literal.
+; Instead of being followed by a 2 bytes number, it's followed by a
+; null-terminated string. This is not expected to be called in a regular
+; context. Only words expecting those literals will look for them. This is why
+; the litWord triggers abort.
+litWord:
+	call	popRS
+	call	intoHL
+	call	printstr	; let's print the word before abort.
+	ld	hl, .msg
+	call	printstr
+	jp	abort
+.msg:
+	.db "undefined word", 0
+LIT:
+	.dw	litWord

 ; ( R:I -- )
-EXIT:
-	.db "EXIT", 0, 0, 0, 0
+	.db ";"
+	.fill 7
 	.dw 0
+EXIT:
 	.dw nativeWord
 ; When we call the EXIT word, we have to do a "double exit" because our current
 ; Interpreter pointer is pointing to the word *next* to our EXIT reference when,
@ -88,44 +166,45 @@ exit:
 	call	popRS
 	; We have a pointer to a word
 	push	hl \ pop iy
-	jr	compiledWord
+	jp	compiledWord

 ; ( R:I -- )
-QUIT:
-	.db "QUIT", 0, 0, 0, 0
+	.db "QUIT"
+	.fill 4
 	.dw EXIT
+QUIT:
 	.dw nativeWord
 quit:
-	ld	hl, FLAGS
-	set	FLAG_QUITTING, (hl)
-	jr	exit
+	jp	forthRdLine

-ABORT:
-	.db "ABORT", 0, 0, 0
+	.db "ABORT"
+	.fill 3
 	.dw QUIT
+ABORT:
 	.dw nativeWord
 abort:
+	; Reinitialize PS (RS is reinitialized in forthInterpret
 	ld	sp, (INITIAL_SP)
-	ld	hl, .msg
-	call	printstr
-	call	printcrlf
-	jr	quit
-.msg:
-	.db " err", 0
+	jp	forthRdLine

-BYE:
 	.db "BYE"
 	.fill 5
 	.dw ABORT
+BYE:
 	.dw nativeWord
-	ld	hl, FLAGS
-	set	FLAG_ENDPGM, (hl)
-	jp	exit
+	; Goodbye Forth! Before we go, let's restore the stack
+	ld	sp, (INITIAL_SP)
+	; unwind stack underflow buffer
+	pop	af \ pop af \ pop af
+	; success
+	xor	a
+	ret

 ; ( c -- )
-EMIT:
-	.db "EMIT", 0, 0, 0, 0
+	.db "EMIT"
+	.fill 4
 	.dw BYE
+EMIT:
 	.dw nativeWord
 	pop	hl
 	ld	a, l
@ -133,13 +212,12 @@ EMIT:
 	jp	exit

 ; ( addr -- )
-EXECUTE:
-	.db "EXECUTE", 0
+	.db "EXECUTE"
+	.db 0
 	.dw EMIT
+EXECUTE:
 	.dw nativeWord
-	pop	iy	; Points to word_offset
-	ld	de, CODELINK_OFFSET
-	add	iy, de
+	pop	iy	; is a wordref
 executeCodeLink:
 	ld	l, (iy)
 	ld	h, (iy+1)
@ -149,46 +227,54 @@ executeCodeLink:
 	; IY points to PFA
 	jp	(hl)	; go!

-DEFINE:
 	.db ":"
 	.fill 7
 	.dw EXECUTE
+DEFINE:
 	.dw nativeWord
 	call	entryhead
-	jp	nz, quit
 	ld	de, compiledWord
 	ld	(hl), e
 	inc	hl
 	ld	(hl), d
 	inc	hl
-	push	hl \ pop iy
+	; At this point, we've processed the name literal following the ':'.
+	; What's next? We have, in IP, a pointer to words that *have already
+	; been compiled by INTERPRET*. All those bytes will be copied as-is.
+	; All we need to do is to know how many bytes to copy. To do so, we
+	; skip compwords until EXIT is reached.
+	ld	(HERE), hl	; where we write compwords.
+	ld	l, (ix)
+	ld	h, (ix+1)
 .loop:
-	call	readword
-	jr	nz, .end
-	call	.issemicol
-	jr	z, .end
-	call	compile
-	jp	nz, quit
+	call	HLPointsEXIT
+	jr	z, .loopend
+	call	compSkip
 	jr	.loop
-.end:
-	; end chain with EXIT
-	ld	hl, EXIT+CODELINK_OFFSET
-	call	wrCompHL
-	ld	(HERE), iy
+.loopend:
+	; At this point, HL points to EXIT compword. We'll copy it too.
+	; We'll use LDIR. BC will be RSTOP-OLDRSTOP+2
+	ld	e, (ix)
+	ld	d, (ix+1)
+	inc	hl \ inc hl	; our +2
+	or	a		; clear carry
+	sbc	hl, de
+	ld	b, h
+	ld	c, l
+	; BC has proper count
+	ex	de, hl		; HL is our source (old RS' TOS)
+	ld	de, (HERE)	; and DE is our dest
+	ldir			; go!
+	; HL has our new RS' TOS
+	ld	(ix), l
+	ld	(ix+1), h
+	ld	(HERE), de	; update HERE
 	jp	exit
-.issemicol:
-	ld	a, (hl)
-	cp	';'
-	ret	nz
-	inc	hl
-	ld	a, (hl)
-	dec	hl
-	or	a
-	ret

-DOES:
-	.db "DOES>", 0, 0, 0
+	.db "DOES>"
+	.fill 3
 	.dw DEFINE
+DOES:
 	.dw nativeWord
 	; We run this when we're in an entry creation context. Many things we
 	; need to do.
@ -199,8 +285,6 @@ DOES:
 	; 3. exit. Because we've already popped RS, a regular exit will abort
 	;    colon definition, so we're good.
 	ld	iy, (CURRENT)
-	ld	de, CODELINK_OFFSET
-	add	iy, de
 	ld	hl, doesWord
 	call	wrCompHL
 	inc	iy \ inc iy		; cell variable space
@ -210,10 +294,10 @@ DOES:
 	jp	exit

 ; ( -- c )
-KEY:
 	.db "KEY"
 	.fill 5
 	.dw DOES
+KEY:
 	.dw nativeWord
 	call	stdioGetC
 	ld	h, 0
@ -221,31 +305,28 @@ KEY:
 	push	hl
 	jp	exit

-INTERPRET:
-	.db "INTERPRE"
+	.db "INTERPR"
+	.db 0
 	.dw KEY
+INTERPRET:
 	.dw nativeWord
 interpret:
-	call	readword
-	jp	nz, quit
 	ld	iy, COMPBUF
+.loop:
+	call	readword
+	jr	nz, .end
 	call	compile
-	jp	nz, .notfound
-	ld	hl, EXIT+CODELINK_OFFSET
-	ld	(iy), l
-	ld	(iy+1), h
+	jr	.loop
+.end:
+	ld	hl, QUIT
+	call	wrCompHL
 	ld	iy, COMPBUF
 	jp	compiledWord
-.notfound:
-	ld	hl, .msg
-	call	printstr
-	jp	quit
-.msg:
-	.db	"not found", 0

-CREATE:
-	.db "CREATE", 0, 0
+	.db "CREATE"
+	.fill 2
 	.dw INTERPRET
+CREATE:
 	.dw nativeWord
 	call	entryhead
 	jp	nz, quit
@ -257,24 +338,25 @@ CREATE:
 	ld	(HERE), hl
 	jp	exit

-HERE_:	; Caution: conflicts with actual variable name
 	.db "HERE"
 	.fill 4
 	.dw CREATE
+HERE_:	; Caution: conflicts with actual variable name
 	.dw sysvarWord
 	.dw HERE

-CURRENT_:
-	.db "CURRENT", 0
+	.db "CURRENT"
+	.db 0
 	.dw HERE_
+CURRENT_:
 	.dw sysvarWord
 	.dw CURRENT

 ; ( n -- )
-DOT:
 	.db "."
 	.fill 7
 	.dw CURRENT_
+DOT:
 	.dw nativeWord
 	pop	de
 	; We check PS explicitly because it doesn't look nice to spew gibberish
@ -286,10 +368,10 @@ DOT:
 	jp	exit

 ; ( n a -- )
-STORE:
 	.db "!"
 	.fill 7
 	.dw DOT
+STORE:
 	.dw nativeWord
 	pop	iy
 	pop	hl
@ -298,10 +380,10 @@ STORE:
 	jp	exit

 ; ( a -- n )
-FETCH:
 	.db "@"
 	.fill 7
 	.dw STORE
+FETCH:
 	.dw nativeWord
 	pop	hl
 	call	intoHL
@ -309,10 +391,10 @@ FETCH:
 	jp	exit

 ; ( a b -- b a )
-SWAP:
 	.db "SWAP"
 	.fill 4
 	.dw FETCH
+SWAP:
 	.dw nativeWord
 	pop	hl
 	ex	(sp), hl
@ -320,10 +402,10 @@ SWAP:
 	jp	exit

 ; ( a -- a a )
-DUP:
 	.db "DUP"
 	.fill 5
 	.dw SWAP
+DUP:
 	.dw nativeWord
 	pop	hl
 	push	hl
@ -331,10 +413,10 @@ DUP:
 	jp	exit

 ; ( a b -- a b a )
-OVER:
 	.db "OVER"
 	.fill 4
 	.dw DUP
+OVER:
 	.dw nativeWord
 	pop	hl	; B
 	pop	de	; A
@ -344,10 +426,10 @@ OVER:
 	jp	exit

 ; ( a b -- c ) A + B
-PLUS:
 	.db "+"
 	.fill 7
 	.dw OVER
+PLUS:
 	.dw nativeWord
 	pop	hl
 	pop	de
@ -356,10 +438,10 @@ PLUS:
 	jp	exit

 ; ( a b -- c ) A - B
-MINUS:
 	.db "-"
 	.fill 7
 	.dw PLUS
+MINUS:
 	.dw nativeWord
 	pop	de		; B
 	pop	hl		; A
@ -369,10 +451,10 @@ MINUS:
 	jp	exit

 ; ( a b -- c ) A * B
-MULT:
 	.db "*"
 	.fill 7
 	.dw MINUS
+MULT:
 	.dw nativeWord
 	pop	de
 	pop	bc
@ -381,10 +463,10 @@ MULT:
 	jp	exit

 ; ( a b -- c ) A / B
-DIV:
 	.db "/"
 	.fill 7
 	.dw MULT
+DIV:
 	.dw nativeWord
 	pop	de
 	pop	hl
@ -392,66 +474,86 @@ DIV:
 	push	bc
 	jp	exit

+	.db "IF"
+	.fill 6
+	.dw DIV
+IF:
+	.dw ifWord
+
+	.db "ELSE"
+	.fill 4
+	.dw IF
+ELSE:
+	.dw elseWord
+
+	.db "THEN"
+	.fill 4
+	.dw ELSE
+THEN:
+	.dw thenWord
+
 ; End of native words

 ; ( a -- )
 ; @ .
-FETCHDOT:
 	.db "?"
 	.fill 7
-	.dw DIV
+	.dw THEN
+FETCHDOT:
 	.dw compiledWord
-	.dw FETCH+CODELINK_OFFSET
-	.dw DOT+CODELINK_OFFSET
-	.dw EXIT+CODELINK_OFFSET
+	.dw FETCH
+	.dw DOT
+	.dw EXIT

 ; ( n a -- )
 ; SWAP OVER @ + SWAP !
-STOREINC:
 	.db "+!"
 	.fill 6
 	.dw FETCHDOT
+STOREINC:
 	.dw compiledWord
-	.dw SWAP+CODELINK_OFFSET
-	.dw OVER+CODELINK_OFFSET
-	.dw FETCH+CODELINK_OFFSET
-	.dw PLUS+CODELINK_OFFSET
-	.dw SWAP+CODELINK_OFFSET
-	.dw STORE+CODELINK_OFFSET
-	.dw EXIT+CODELINK_OFFSET
+	.dw SWAP
+	.dw OVER
+	.dw FETCH
+	.dw PLUS
+	.dw SWAP
+	.dw STORE
+	.dw EXIT

 ; ( n -- )
 ; HERE +!
-ALLOT:
-	.db "ALLOT", 0, 0, 0
+	.db "ALLOT"
+	.fill 3
 	.dw STOREINC
+ALLOT:
 	.dw compiledWord
-	.dw HERE_+CODELINK_OFFSET
-	.dw STOREINC+CODELINK_OFFSET
-	.dw EXIT+CODELINK_OFFSET
+	.dw HERE_
+	.dw STOREINC
+	.dw EXIT

 ; CREATE 2 ALLOT
-VARIABLE:
-	.db "VARIABLE"
+	.db "VARIABL"
+	.db 0
 	.dw ALLOT
+VARIABLE:
 	.dw compiledWord
-	.dw CREATE+CODELINK_OFFSET
+	.dw CREATE
 	.dw NUMBER
 	.dw 2
-	.dw ALLOT+CODELINK_OFFSET
-	.dw EXIT+CODELINK_OFFSET
+	.dw ALLOT
+	.dw EXIT

 ; ( n -- )
 ; CREATE HERE @ ! DOES> @
-CONSTANT:
-	.db "CONSTANT"
+	.db "CONSTAN"
+	.db 0
 	.dw VARIABLE
+CONSTANT:
 	.dw compiledWord
-	.dw CREATE+CODELINK_OFFSET
-	.dw HERE_+CODELINK_OFFSET
-	.dw FETCH+CODELINK_OFFSET
-	.dw STORE+CODELINK_OFFSET
-	.dw DOES+CODELINK_OFFSET
-	.dw FETCH+CODELINK_OFFSET
-	.dw EXIT+CODELINK_OFFSET
-
+	.dw CREATE
+	.dw HERE_
+	.dw FETCH
+	.dw STORE
+	.dw DOES
+	.dw FETCH
+	.dw EXIT
--- a/apps/forth/dictionary.txt
+++ b/apps/forth/dictionary.txt
@ -1,23 +1,34 @@
 Stack notation: "<stack before> -- <stack after>". Rightmost is top of stack
-                (TOS). For example, in "a b -- c d", b is TOS before, d is TOS
-                after. "R:" means that the Return Stack is modified.
+(TOS). For example, in "a b -- c d", b is TOS before, d is TOS after. "R:" means
+that the Return Stack is modified.

 DOES>: Used inside a colon definition that itself uses CREATE, DOES> transforms
-       that newly created word into a "does cell", that is, a regular cell (
-       when called, puts the cell's addr on PS), but right after that, it
-       executes words that appear after the DOES>.
+that newly created word into a "does cell", that is, a regular cell ( when
+called, puts the cell's addr on PS), but right after that, it executes words
+that appear after the DOES>.

-       "does cells" always allocate 4 bytes (2 for the cell, 2 for the DOES>
-       link) and there is no need for ALLOT in colon definition.
+"does cells" always allocate 4 bytes (2 for the cell, 2 for the DOES> link) and
+there is no need for ALLOT in colon definition.

-       At compile time, colon definition stops processing words when reaching
-       the DOES>.
+At compile time, colon definition stops processing words when reaching the
+DOES>.

 Example: ": CONSTANT CREATE HERE @ ! DOES> @ ;"

+Word references (wordref): When we say we have a "word reference", it's a
+pointer to a words *code link*. For example, the label "PLUS:" in this unit is a
+word reference. Why not refer to the beginning of the word struct? Because we
+actually seldom refer to the name and prev link, except during compilation, so
+defining "word reference" this way makes the code easier to understand.
+
+Atom: A word of the type compiledWord contains, in its PF, a list of what we
+call "atoms". Those atoms are most of the time word references, but they can
+also be references to NUMBER and LIT.
+
 *** Native Words ***

-: x ... ;   --              Define a new word
+: x ...     --              Define a new word
+;           R:I --          Exit a colon definition
 .           n --            Print n in its decimal form
@           a -- n          Set n to value at address a
 !           n a --          Store n in address a
@ -26,18 +37,22 @@ DOES>: Used inside a colon definition that itself uses CREATE, DOES> transforms
 *           a b -- c        a * b -> c
 /           a b -- c        a / b -> c
 CREATE x    --              Create cell named x. Doesn't allocate a PF.
+CURRENT     -- n            Set n to wordref of last added entry.
 DOES>       --              See description at top of file
 DUP         a -- a a
+ELSE        --              Branch to THEN
 EMIT        c --            Spit char c to stdout
-EXECUTE     a --            Execute word at addr a
-EXIT        R:I --          Exit a colon definition
+EXECUTE     a --            Execute wordref at addr a
 HERE        -- a            Push HERE's address
+IF          n --            Branch to ELSE or THEN if n is zero
 QUIT        R:drop --       Return to interpreter promp immediately
 KEY         -- c            Get char c from stdin
 INTERPRET   --              Get a line from stdin, compile it in tmp memory,
                            then execute the compiled contents.
 OVER        a b -- a b a
 SWAP        a b -- b a
+THEN        --              Does nothing. Serves as a branching merker for IF
+                            and ELSE.

 *** Core-but-Forth Words ***

--- a/apps/forth/main.asm
+++ b/apps/forth/main.asm
@ -4,50 +4,20 @@
 ; Number of bytes we keep as a padding between HERE and the scratchpad
 .equ	PADDING		0x20
 ; Max length of dict entry names
-.equ	NAMELEN		8
+.equ	NAMELEN		7
 ; Offset of the code link relative to the beginning of the word
-.equ	CODELINK_OFFSET	10
-; When set, the interpreter should abort parsing of current line and return to
-; prompt.
-.equ	FLAG_QUITTING	0
-; When set, the interpreter should quit
-.equ	FLAG_ENDPGM	1
+.equ	CODELINK_OFFSET	NAMELEN+3

 ; *** Variables ***
 .equ	INITIAL_SP	FORTH_RAMSTART
 .equ	CURRENT		@+2
 .equ	HERE		@+2
 .equ	INPUTPOS	@+2
-.equ	FLAGS		@+2
 ; Buffer where we compile the current input line. Same size as STDIO_BUFSIZE.
-.equ	COMPBUF		@+1
+.equ	COMPBUF		@+2
 .equ	FORTH_RAMEND	@+0x40

 ; *** Code ***
-MAIN:
-	.dw compiledWord
-	.dw INTERPRET+CODELINK_OFFSET
-	.dw CHKEND
-
-; If FLAG_ENDPGM is set, stop the program, else, tweak the RS so that we loop.
-CHKEND:
-	.dw nativeWord
-	ld	hl, FLAGS
-	bit	FLAG_ENDPGM, (hl)
-	jr	nz, .endpgm
-	; not quitting program, are we supposed to continue parsing line?
-	ld	hl, FLAGS
-	bit	FLAG_QUITTING, (hl)
-	jr	nz, forthRdLine
-	; Not quitting line either.
-	jr	forthInterpret
-.endpgm:
-	ld	sp, (INITIAL_SP)
-	; restore stack
-	pop	af \ pop af \ pop af
-	xor	a
-	ret
-
 forthMain:
 	; STACK OVERFLOW PROTECTION:
 	; To avoid having to check for stack underflow after each pop operation
@ -62,16 +32,14 @@ forthMain:
 	ld	hl, FORTH_RAMEND
 	ld	(HERE), hl
 forthRdLine:
-	xor	a
-	ld	(FLAGS), a
 	ld	hl, msgOk
 	call	printstr
 	call	printcrlf
 	call	stdioReadLine
 	ld	(INPUTPOS), hl
 forthInterpret:
-	ld	ix, RS_ADDR
-	ld	iy, MAIN
+	ld	ix, RS_ADDR-2		; -2 because we inc-before-push
+	ld	iy, INTERPRET
 	jp	executeCodeLink
 msgOk:
 	.db	" ok", 0
--- a/apps/forth/stack.asm
+++ b/apps/forth/stack.asm
@ -3,25 +3,26 @@
 ; PS is the most frequently used. However, this causes a problem with routine
 ; calls: because in Forth, the stack isn't balanced within each call, our return
 ; offset, when placed by a CALL, messes everything up. This is one of the
-; reasons why we need stack management routines below.
+; reasons why we need stack management routines below. IX always points to RS'
+; Top Of Stack (TOS)
 ;
 ; This return stack contain "Interpreter pointers", that is a pointer to the
 ; address of a word, as seen in a compiled list of words.

 ; Push value HL to RS
 pushRS:
+	inc	ix
+	inc	ix
 	ld	(ix), l
-	inc	ix
-	ld	(ix), h
-	inc	ix
+	ld	(ix+1), h
 	ret

 ; Pop RS' TOS to HL
 popRS:
-	dec ix
-	ld	h, (ix)
-	dec ix
 	ld	l, (ix)
+	ld	h, (ix+1)
+	dec ix
+	dec ix
 	ret

 ; Verifies that SP is within bounds. If it's not, call ABORT
--- a/apps/forth/util.asm
+++ b/apps/forth/util.asm
@ -43,14 +43,136 @@ readword:
 	inc	a	; unset Z
 	ret

-; For DE pointing to a dict entry, set DE to point to the previous entry.
+; Sets Z if (HL) == E and (HL+1) == D
+HLPointsDE:
+	ld	a, (hl)
+	cp	e
+	ret	nz		; no
+	inc	hl
+	ld	a, (hl)
+	dec	hl
+	cp	d		; Z has our answer
+	ret
+
+
+HLPointsNUMBER:
+	push	de
+	ld	de, NUMBER
+	call	HLPointsDE
+	pop	de
+	ret
+
+HLPointsLIT:
+	push	de
+	ld	de, LIT
+	call	HLPointsDE
+	pop	de
+	ret
+
+HLPointsEXIT:
+	push	de
+	ld	de, EXIT
+	call	HLPointsDE
+	pop	de
+	ret
+
+; Skip the compword where HL is currently pointing. If it's a regular word,
+; it's easy: we inc by 2. If it's a NUMBER, we inc by 4. If it's a LIT, we skip
+; to after null-termination.
+compSkip:
+	call	HLPointsNUMBER
+	jr	z, .isNum
+	call	HLPointsLIT
+	jr	nz, .isWord
+	; We have a literal
+	inc	hl \ inc hl
+	call	strskip
+	inc	hl		; byte after word termination
+	ret
+.isNum:
+	; skip by 4
+	inc	hl \ inc hl
+	; continue to isWord
+.isWord:
+	; skip by 2
+	inc	hl \ inc hl
+	ret
+
+; The goal of this routine is to read a string literal following the currently
+; executed words. For example, CREATE and DEFINE need this. Things are a little
+; twisted, so bear with me while I explain how it works.
+;
+; When we call this routine, everything has been compiled. We're on an atom and
+; we're executing it. Now, we're looking for a string literal or a word-with-a
+; name that follows our readCompWord caller. We could think that this word is
+; right there on RS' TOS, but no! You have to account for words wrapping the
+; caller. For example, "VARIABLE" calls "CREATE". If you call "VARIABLE foo",
+; if CREATE looks at what follows in RS' TOS, it will only find the "2" in
+; "CREATE 2 ALLOT".
+;
+; Therefore, we actually need to check in RS' *bottom of stack* for our answer.
+; If that atom is a LIT, we're good. We make HL point to it and advance IP to
+; byte following null-termination.
+;
+; If it isn't, things get interesting: If it's a word reference, then it's
+; not an invalid literal. For example, one could want to redefine an existing
+; word. So in that case, we'll copy the word's name on the pad (it might not be
+; null-terminated) and set HL to point to it.
+; How do we know that our reference is a word reference (it could be, for
+; example, a NUMBER reference)? We check that its address is more than QUIT, the
+; second word in our dict. We don't accept EXIT because it's the termination
+; word. Yeah, it means that ";" can't be overridden...
+; If name can't be read, we abort
+readCompWord:
+	; In all cases, we want RS' BOS in HL. Let's get it now.
+	ld	hl, (RS_ADDR)
+	call	HLPointsLIT
+	jr	nz, .notLIT
+	; RS BOS is a LIT, make HL point to string, then skip this RS compword.
+	inc	hl \ inc hl	; HL now points to string itself
+	push	hl		; --> lvl 1, our result
+	call	strskip
+	inc	hl		; byte after word termination
+	ld	(RS_ADDR), hl
+	pop	hl		; <-- lvl 1, our result
+	ret
+.notLIT:
+	; Alright, not a literal, but is it a word? If it's not a number, then
+	; it's a word.
+	call	HLPointsNUMBER
+	jr	z, .notWord
+	; Not a number, then it's a word. Copy word to pad and point to it.
+	call	intoHL
+	or	a		; clear carry
+	ld	de, CODELINK_OFFSET
+	sbc	hl, de
+	; That's our return value
+	push	hl		; --> lvl 1
+	; HL now points to word offset, let'd copy it to pad
+	ex	de, hl
+	call	pad
+	ex	de, hl
+	ld	bc, NAMELEN
+	ldir
+	; null-terminate
+	xor	a
+	ld	(de), a
+	; Advance RS' BOS by 2
+	ld	hl, RS_ADDR
+	inc	(hl) \ inc (hl)
+	pop	hl		; <-- lvl 1
+	ret
+.notWord:
+	ld	hl, .msg
+	call	printstr
+	jp	abort
+.msg:
+	.db "word expected", 0
+
+; For DE being a wordref, move DE to the previous wordref.
 ; Z is set if DE point to 0 (no entry). NZ if not.
 prev:
-	push	hl		; --> lvl 1
-	ld	hl, NAMELEN	; prev field offset
-	add	hl, de
-	ex	de, hl
-	pop	hl		; <-- lvl 1
+	dec	de \ dec de	; prev field
 	call	intoDE
 	; DE points to prev. Is it zero?
 	xor	a
@ -63,15 +185,28 @@ prev:
 ; point to that entry.
 ; Z if found, NZ if not.
 find:
+	push	hl
+	push	bc
 	ld	de, (CURRENT)
+	ld	bc, CODELINK_OFFSET
 .inner:
+	; DE is a wordref, let's go to beginning of struct
+	push	de		; --> lvl 1
+	or	a		; clear carry
+	ex	de, hl
+	sbc	hl, bc
+	ex	de, hl		; We're good, DE points to word name
 	ld	a, NAMELEN
 	call	strncmp
-	ret	z		; found
+	pop	de		; <-- lvl 1, return to wordref
+	jr	z, .end		; found
 	call	prev
 	jr	nz, .inner
 	; Z set? end of dict unset Z
 	inc	a
+.end:
+	pop	bc
+	pop	hl
 	ret

 ; Write compiled data from HL into IY, advancing IY at the same time.
@ -84,49 +219,58 @@ wrCompHL:

 ; Compile word string at (HL) and write down its compiled version in IY,
 ; advancing IY to the byte next to the last written byte.
-; Set Z on success, unset on failure.
 compile:
 	call	find
 	jr	nz, .maybeNum
-	ret	nz
-	; DE is a word offset, we need a code link
-	ld	hl, CODELINK_OFFSET
-	add	hl, de
-	xor	a	; set Z
+	ex	de, hl
 	jr	wrCompHL
 .maybeNum:
+	push	hl		; --> lvl 1. save string addr
 	call	parseLiteral
-	ret	nz
+	jr	nz, .undef
+	pop	hl		; <-- lvl 1
 	; a valid number!
 	ld	hl, NUMBER
 	call	wrCompHL
 	ex	de, hl		; number in HL
 	jr	wrCompHL
-	ret	z
-	; unknown name
-	ld	hl, .msg
-	call	printstr
-	jp	abort
-.msg:
-	.db "unknown name", 0
+.undef:
+	; When encountering an undefined word during compilation, we spit a
+	; reference to litWord, followed by the null-terminated word.
+	; This way, if a preceding word expect a string literal, it will read it
+	; by calling readCompWord, and if it doesn't, the routine will be
+	; called, triggering an abort.
+	ld	hl, LIT
+	call	wrCompHL
+	pop	hl		; <-- lvl 1. recall string addr
+.writeLit:
+	ld	a, (hl)
+	ld	(iy), a
+	inc	hl
+	inc	iy
+	or	a
+	jr	nz, .writeLit
+	ret
+

 ; Spit name + prev in (HERE) and adjust (HERE) and (CURRENT)
 ; HL points to new (HERE)
-; Set Z if name could be read, NZ if not
 entryhead:
-	call	readword
-	ret	nz
+	call	readCompWord
 	ld	de, (HERE)
 	call	strcpy
 	ex	de, hl		; (HERE) now in HL
 	ld	de, (CURRENT)
-	ld	(CURRENT), hl
 	ld	a, NAMELEN
 	call	addHL
+	xor	a		; IMMED
+	ld	(hl), a
+	inc	hl
 	ld	(hl), e
 	inc	hl
 	ld	(hl), d
 	inc	hl
+	ld	(CURRENT), hl
 	ld	(HERE), hl
 	xor	a		; set Z
 	ret
Author	SHA1	Message	Date
Virgil Dupras	03bd9ee39b	forth: make readCompWord read from RS' BOS instead of TOS Previous approach was broken with regards to defined word using CREATE. Also, reduce name length by one to make space for a new flags field for "immediate" (which isn't used yet).	2020-03-09 19:50:51 -04:00
Virgil Dupras	0e8af3cea4	forth: clarify the meaning of "wordref" Also, make entry labels in dict.asm be wordref instead of entry ref.	2020-03-09 15:12:44 -04:00
Virgil Dupras	e8a4768304	forth: add words "IF", "ELSE", "THEN"	2020-03-09 14:19:51 -04:00
Virgil Dupras	03e529b762	forth: simplify execution model and handle literals better This scheme of "when we handle line-by-line, compile one word at a time then execute" so that we could allow words like "CREATE" to call "readword" before continuing was a bad scheme. It made things like branching outside of a colon definition impossible. This commit implement a new "litWord". When an undefined word is encountered at compile time, it is included as-is in a string literal word. It is at run time that we decide what to do with it.	2020-03-09 14:14:26 -04:00