forth: simplify execution model and handle literals better

This scheme of "when we handle line-by-line, compile one word at a time then
execute" so that we could allow words like "CREATE" to call "readword" before
continuing was a bad scheme. It made things like branching outside of a colon
definition impossible.

This commit implement a new "litWord". When an undefined word is encountered at
compile time, it is included as-is in a string literal word. It is at run time
that we decide what to do with it.
This commit is contained in:
Virgil Dupras 2020-03-09 08:49:51 -04:00
parent 5cadde557c
commit 03e529b762
5 changed files with 222 additions and 106 deletions

View File

@ -71,10 +71,27 @@ numberWord:
NUMBER:
.dw numberWord
; Similarly to numberWord, this is not a real word, but a string literal.
; Instead of being followed by a 2 bytes number, it's followed by a
; null-terminated string. This is not expected to be called in a regular
; context. Only words expecting those literals will look for them. This is why
; the litWord triggers abort.
litWord:
call popRS
call intoHL
call printstr ; let's print the word before abort.
ld hl, .msg
call printstr
jp abort
.msg:
.db "undefined word", 0
LIT:
.dw litWord
; ( R:I -- )
EXIT:
.db "EXIT", 0, 0, 0, 0
.db ";"
.fill 7
.dw 0
.dw nativeWord
; When we call the EXIT word, we have to do a "double exit" because our current
@ -88,7 +105,7 @@ exit:
call popRS
; We have a pointer to a word
push hl \ pop iy
jr compiledWord
jp compiledWord
; ( R:I -- )
QUIT:
@ -96,31 +113,29 @@ QUIT:
.dw EXIT
.dw nativeWord
quit:
ld hl, FLAGS
set FLAG_QUITTING, (hl)
jr exit
jp forthRdLine
ABORT:
.db "ABORT", 0, 0, 0
.dw QUIT
.dw nativeWord
abort:
; Reinitialize PS (RS is reinitialized in forthInterpret
ld sp, (INITIAL_SP)
ld hl, .msg
call printstr
call printcrlf
jr quit
.msg:
.db " err", 0
jp forthRdLine
BYE:
.db "BYE"
.fill 5
.dw ABORT
.dw nativeWord
ld hl, FLAGS
set FLAG_ENDPGM, (hl)
jp exit
; Goodbye Forth! Before we go, let's restore the stack
ld sp, (INITIAL_SP)
; unwind stack underflow buffer
pop af \ pop af \ pop af
; success
xor a
ret
; ( c -- )
EMIT:
@ -155,36 +170,44 @@ DEFINE:
.dw EXECUTE
.dw nativeWord
call entryhead
jp nz, quit
ld de, compiledWord
ld (hl), e
inc hl
ld (hl), d
inc hl
push hl \ pop iy
; At this point, we've processed the name literal following the ':'.
; What's next? We have, in IP, a pointer to words that *have already
; been compiled by INTERPRET*. All those bytes will be copied as-is.
; All we need to do is to know how many bytes to copy. To do so, we
; skip compwords until EXIT is reached.
ld (HERE), hl ; where we write compwords.
; Let's save old RS TOS
ld e, (ix)
ld d, (ix+1)
.loop:
call readword
jr nz, .end
call .issemicol
jr z, .end
call compile
jp nz, quit
call RSIsEXIT
jr z, .loopend
call compSkip
jr .loop
.end:
; end chain with EXIT
ld hl, EXIT+CODELINK_OFFSET
call wrCompHL
ld (HERE), iy
.loopend:
; At this point, RS' TOS points to EXIT compword. We'll copy it too.
; We'll use LDIR. BC will be RSTOP-OLDRSTOP+2
ld l, (ix)
ld h, (ix+1)
inc hl \ inc hl ; our +2
or a ; clear carry
sbc hl, de
ld b, h
ld c, l
; BC has proper count
ex de, hl ; HL is our source (old RS' TOS)
ld de, (HERE) ; and DE is our dest
ldir ; go!
; HL has our new RS' TOS
ld (ix), l
ld (ix+1), h
ld (HERE), de ; update HERE
jp exit
.issemicol:
ld a, (hl)
cp ';'
ret nz
inc hl
ld a, (hl)
dec hl
or a
ret
DOES:
.db "DOES>", 0, 0, 0
@ -226,22 +249,17 @@ INTERPRET:
.dw KEY
.dw nativeWord
interpret:
call readword
jp nz, quit
ld iy, COMPBUF
.loop:
call readword
jr nz, .end
call compile
jp nz, .notfound
ld hl, EXIT+CODELINK_OFFSET
ld (iy), l
ld (iy+1), h
jr .loop
.end:
ld hl, QUIT+CODELINK_OFFSET
call wrCompHL
ld iy, COMPBUF
jp compiledWord
.notfound:
ld hl, .msg
call printstr
jp quit
.msg:
.db "not found", 0
CREATE:
.db "CREATE", 0, 0
@ -454,4 +472,3 @@ CONSTANT:
.dw DOES+CODELINK_OFFSET
.dw FETCH+CODELINK_OFFSET
.dw EXIT+CODELINK_OFFSET

View File

@ -17,7 +17,8 @@ DOES>: Used inside a colon definition that itself uses CREATE, DOES> transforms
*** Native Words ***
: x ... ; -- Define a new word
: x ... -- Define a new word
; R:I -- Exit a colon definition
. n -- Print n in its decimal form
@ a -- n Set n to value at address a
! n a -- Store n in address a
@ -30,7 +31,6 @@ DOES> -- See description at top of file
DUP a -- a a
EMIT c -- Spit char c to stdout
EXECUTE a -- Execute word at addr a
EXIT R:I -- Exit a colon definition
HERE -- a Push HERE's address
QUIT R:drop -- Return to interpreter promp immediately
KEY -- c Get char c from stdin

View File

@ -7,47 +7,17 @@
.equ NAMELEN 8
; Offset of the code link relative to the beginning of the word
.equ CODELINK_OFFSET 10
; When set, the interpreter should abort parsing of current line and return to
; prompt.
.equ FLAG_QUITTING 0
; When set, the interpreter should quit
.equ FLAG_ENDPGM 1
; *** Variables ***
.equ INITIAL_SP FORTH_RAMSTART
.equ CURRENT @+2
.equ HERE @+2
.equ INPUTPOS @+2
.equ FLAGS @+2
; Buffer where we compile the current input line. Same size as STDIO_BUFSIZE.
.equ COMPBUF @+1
.equ COMPBUF @+2
.equ FORTH_RAMEND @+0x40
; *** Code ***
MAIN:
.dw compiledWord
.dw INTERPRET+CODELINK_OFFSET
.dw CHKEND
; If FLAG_ENDPGM is set, stop the program, else, tweak the RS so that we loop.
CHKEND:
.dw nativeWord
ld hl, FLAGS
bit FLAG_ENDPGM, (hl)
jr nz, .endpgm
; not quitting program, are we supposed to continue parsing line?
ld hl, FLAGS
bit FLAG_QUITTING, (hl)
jr nz, forthRdLine
; Not quitting line either.
jr forthInterpret
.endpgm:
ld sp, (INITIAL_SP)
; restore stack
pop af \ pop af \ pop af
xor a
ret
forthMain:
; STACK OVERFLOW PROTECTION:
; To avoid having to check for stack underflow after each pop operation
@ -62,16 +32,14 @@ forthMain:
ld hl, FORTH_RAMEND
ld (HERE), hl
forthRdLine:
xor a
ld (FLAGS), a
ld hl, msgOk
call printstr
call printcrlf
call stdioReadLine
ld (INPUTPOS), hl
forthInterpret:
ld ix, RS_ADDR
ld iy, MAIN
ld ix, RS_ADDR-2 ; -2 because we inc-before-push
ld iy, INTERPRET+CODELINK_OFFSET
jp executeCodeLink
msgOk:
.db " ok", 0

View File

@ -3,25 +3,26 @@
; PS is the most frequently used. However, this causes a problem with routine
; calls: because in Forth, the stack isn't balanced within each call, our return
; offset, when placed by a CALL, messes everything up. This is one of the
; reasons why we need stack management routines below.
; reasons why we need stack management routines below. IX always points to RS'
; Top Of Stack (TOS)
;
; This return stack contain "Interpreter pointers", that is a pointer to the
; address of a word, as seen in a compiled list of words.
; Push value HL to RS
pushRS:
inc ix
inc ix
ld (ix), l
inc ix
ld (ix), h
inc ix
ld (ix+1), h
ret
; Pop RS' TOS to HL
popRS:
dec ix
ld h, (ix)
dec ix
ld l, (ix)
ld h, (ix+1)
dec ix
dec ix
ret
; Verifies that SP is within bounds. If it's not, call ABORT

View File

@ -43,6 +43,126 @@ readword:
inc a ; unset Z
ret
RSIsDE:
push hl
ld l, (ix)
ld h, (ix+1)
ld a, (hl)
cp e
jr nz, .end ; no
inc hl
ld a, (hl)
cp d ; Z has our answer
.end:
pop hl
ret
; Is RS' TOS pointing to a NUMBER word?
; Z if yes, NZ if no.
RSIsNUMBER:
push de
ld de, NUMBER
call RSIsDE
pop de
ret
; Is RS' TOS pointing to a LIT word?
; Z if yes, NZ if no.
RSIsLIT:
push de
ld de, LIT
call RSIsDE
pop de
ret
; Is RS' TOS pointing to EXIT?
; Z if yes, NZ if no.
RSIsEXIT:
push de
ld de, EXIT+CODELINK_OFFSET
call RSIsDE
pop de
ret
; Skip the compword where RS' TOS is currently pointing. If it's a regular word,
; it's easy: we inc by 2. If it's a NUMBER, we inc by 4. If it's a LIT, we skip
; to after null-termination.
compSkip:
push hl
ld l, (ix)
ld h, (ix+1)
; At the minimum, we skip by 2
inc hl \ inc hl
call RSIsNUMBER
jr z, .isNum
call RSIsLIT
jr nz, .end ; A word
; We have a literal
call strskip
inc hl ; byte after word termination
jr .end
.isNum:
; skip by 4
inc hl \ inc hl
.end:
; HL is good, write it to RS
ld (ix), l
ld (ix+1), h
pop hl
ret
; Checks RS' TOS and, if it points to a string literal (LIT), makes HL point
; to it and advance IP to byte following null-termination.
; If it doesn't, things get interesting: If it's a word reference, then it's
; not an invalid literal. For example, one could want to redefine an existing
; word. So in that case, we'll copy the word's name on the pad (it might not be
; null-terminated) and set HL to point to it.
; How do we know that our reference is a word reference (it could be, for
; example, a NUMBER reference)? We check that its address is more than QUIT, the
; second word in our dict. We don't accept EXIT because it's the termination
; word. Yeah, it means that ";" can't be overridden...
; If name can't be read, we abort
readCompWord:
; In all cases, we want RS' TOS in HL. Let's get it now.
ld l, (ix)
ld h, (ix+1)
call RSIsLIT
jr nz, .notLIT
; RS TOS is a LIT, make HL point to string, then skip this RS compword.
inc hl \ inc hl ; HL now points to string itself
jr compSkip
.notLIT:
; Alright, not a literal, but is it a word? If it's not a number, then
; it's a word.
call RSIsNUMBER
jr z, .notWord
; Not a number, then it's a word. Copy word to pad and point to it.
call intoHL
or a ; clear carry
ld de, CODELINK_OFFSET
sbc hl, de
; That's our return value
push hl ; --> lvl 1
; HL now points to word offset, let'd copy it to pad
ex de, hl
call pad
ex de, hl
ld bc, NAMELEN
ldir
; null-terminate
xor a
ld (de), a
call compSkip
pop hl ; <-- lvl 1
ret
.notWord:
ld hl, .msg
call printstr
jp abort
.msg:
.db "word expected", 0
; For DE pointing to a dict entry, set DE to point to the previous entry.
; Z is set if DE point to 0 (no entry). NZ if not.
prev:
@ -84,38 +204,48 @@ wrCompHL:
; Compile word string at (HL) and write down its compiled version in IY,
; advancing IY to the byte next to the last written byte.
; Set Z on success, unset on failure.
compile:
call find
jr nz, .maybeNum
ret nz
; DE is a word offset, we need a code link
ld hl, CODELINK_OFFSET
add hl, de
xor a ; set Z
jr wrCompHL
.maybeNum:
push hl ; --> lvl 1. save string addr
call parseLiteral
ret nz
jr nz, .undef
pop hl ; <-- lvl 1
; a valid number!
ld hl, NUMBER
call wrCompHL
ex de, hl ; number in HL
jr wrCompHL
ret z
; unknown name
ld hl, .msg
call printstr
jp abort
.msg:
.db "unknown name", 0
.undef:
; When encountering an undefined word during compilation, we spit a
; reference to litWord, followed by the null-terminated word.
; This way, if a preceding word expect a string literal, it will read it
; by calling readCompWord, and if it doesn't, the routine will be
; called, triggering an abort.
ld hl, LIT
call wrCompHL
pop hl ; <-- lvl 1. recall string addr
.writeLit:
ld a, (hl)
ld (iy), a
inc hl
inc iy
or a
jr nz, .writeLit
ret
; Spit name + prev in (HERE) and adjust (HERE) and (CURRENT)
; HL points to new (HERE)
; Set Z if name could be read, NZ if not
entryhead:
call readword
ret nz
call readCompWord
call printstr
ld de, (HERE)
call strcpy
ex de, hl ; (HERE) now in HL