forth: simplify execution model and handle literals better

This scheme of "when we handle line-by-line, compile one word at a time then
execute" so that we could allow words like "CREATE" to call "readword" before
continuing was a bad scheme. It made things like branching outside of a colon
definition impossible.

This commit implement a new "litWord". When an undefined word is encountered at
compile time, it is included as-is in a string literal word. It is at run time
that we decide what to do with it.
This commit is contained in:
Virgil Dupras 2020-03-09 08:49:51 -04:00
parent 5cadde557c
commit 03e529b762
5 changed files with 222 additions and 106 deletions

View File

@ -71,10 +71,27 @@ numberWord:
NUMBER: NUMBER:
.dw numberWord .dw numberWord
; Similarly to numberWord, this is not a real word, but a string literal.
; Instead of being followed by a 2 bytes number, it's followed by a
; null-terminated string. This is not expected to be called in a regular
; context. Only words expecting those literals will look for them. This is why
; the litWord triggers abort.
litWord:
call popRS
call intoHL
call printstr ; let's print the word before abort.
ld hl, .msg
call printstr
jp abort
.msg:
.db "undefined word", 0
LIT:
.dw litWord
; ( R:I -- ) ; ( R:I -- )
EXIT: EXIT:
.db "EXIT", 0, 0, 0, 0 .db ";"
.fill 7
.dw 0 .dw 0
.dw nativeWord .dw nativeWord
; When we call the EXIT word, we have to do a "double exit" because our current ; When we call the EXIT word, we have to do a "double exit" because our current
@ -88,7 +105,7 @@ exit:
call popRS call popRS
; We have a pointer to a word ; We have a pointer to a word
push hl \ pop iy push hl \ pop iy
jr compiledWord jp compiledWord
; ( R:I -- ) ; ( R:I -- )
QUIT: QUIT:
@ -96,31 +113,29 @@ QUIT:
.dw EXIT .dw EXIT
.dw nativeWord .dw nativeWord
quit: quit:
ld hl, FLAGS jp forthRdLine
set FLAG_QUITTING, (hl)
jr exit
ABORT: ABORT:
.db "ABORT", 0, 0, 0 .db "ABORT", 0, 0, 0
.dw QUIT .dw QUIT
.dw nativeWord .dw nativeWord
abort: abort:
; Reinitialize PS (RS is reinitialized in forthInterpret
ld sp, (INITIAL_SP) ld sp, (INITIAL_SP)
ld hl, .msg jp forthRdLine
call printstr
call printcrlf
jr quit
.msg:
.db " err", 0
BYE: BYE:
.db "BYE" .db "BYE"
.fill 5 .fill 5
.dw ABORT .dw ABORT
.dw nativeWord .dw nativeWord
ld hl, FLAGS ; Goodbye Forth! Before we go, let's restore the stack
set FLAG_ENDPGM, (hl) ld sp, (INITIAL_SP)
jp exit ; unwind stack underflow buffer
pop af \ pop af \ pop af
; success
xor a
ret
; ( c -- ) ; ( c -- )
EMIT: EMIT:
@ -155,36 +170,44 @@ DEFINE:
.dw EXECUTE .dw EXECUTE
.dw nativeWord .dw nativeWord
call entryhead call entryhead
jp nz, quit
ld de, compiledWord ld de, compiledWord
ld (hl), e ld (hl), e
inc hl inc hl
ld (hl), d ld (hl), d
inc hl inc hl
push hl \ pop iy ; At this point, we've processed the name literal following the ':'.
; What's next? We have, in IP, a pointer to words that *have already
; been compiled by INTERPRET*. All those bytes will be copied as-is.
; All we need to do is to know how many bytes to copy. To do so, we
; skip compwords until EXIT is reached.
ld (HERE), hl ; where we write compwords.
; Let's save old RS TOS
ld e, (ix)
ld d, (ix+1)
.loop: .loop:
call readword call RSIsEXIT
jr nz, .end jr z, .loopend
call .issemicol call compSkip
jr z, .end
call compile
jp nz, quit
jr .loop jr .loop
.end: .loopend:
; end chain with EXIT ; At this point, RS' TOS points to EXIT compword. We'll copy it too.
ld hl, EXIT+CODELINK_OFFSET ; We'll use LDIR. BC will be RSTOP-OLDRSTOP+2
call wrCompHL ld l, (ix)
ld (HERE), iy ld h, (ix+1)
inc hl \ inc hl ; our +2
or a ; clear carry
sbc hl, de
ld b, h
ld c, l
; BC has proper count
ex de, hl ; HL is our source (old RS' TOS)
ld de, (HERE) ; and DE is our dest
ldir ; go!
; HL has our new RS' TOS
ld (ix), l
ld (ix+1), h
ld (HERE), de ; update HERE
jp exit jp exit
.issemicol:
ld a, (hl)
cp ';'
ret nz
inc hl
ld a, (hl)
dec hl
or a
ret
DOES: DOES:
.db "DOES>", 0, 0, 0 .db "DOES>", 0, 0, 0
@ -226,22 +249,17 @@ INTERPRET:
.dw KEY .dw KEY
.dw nativeWord .dw nativeWord
interpret: interpret:
call readword
jp nz, quit
ld iy, COMPBUF ld iy, COMPBUF
.loop:
call readword
jr nz, .end
call compile call compile
jp nz, .notfound jr .loop
ld hl, EXIT+CODELINK_OFFSET .end:
ld (iy), l ld hl, QUIT+CODELINK_OFFSET
ld (iy+1), h call wrCompHL
ld iy, COMPBUF ld iy, COMPBUF
jp compiledWord jp compiledWord
.notfound:
ld hl, .msg
call printstr
jp quit
.msg:
.db "not found", 0
CREATE: CREATE:
.db "CREATE", 0, 0 .db "CREATE", 0, 0
@ -454,4 +472,3 @@ CONSTANT:
.dw DOES+CODELINK_OFFSET .dw DOES+CODELINK_OFFSET
.dw FETCH+CODELINK_OFFSET .dw FETCH+CODELINK_OFFSET
.dw EXIT+CODELINK_OFFSET .dw EXIT+CODELINK_OFFSET

View File

@ -17,7 +17,8 @@ DOES>: Used inside a colon definition that itself uses CREATE, DOES> transforms
*** Native Words *** *** Native Words ***
: x ... ; -- Define a new word : x ... -- Define a new word
; R:I -- Exit a colon definition
. n -- Print n in its decimal form . n -- Print n in its decimal form
@ a -- n Set n to value at address a @ a -- n Set n to value at address a
! n a -- Store n in address a ! n a -- Store n in address a
@ -30,7 +31,6 @@ DOES> -- See description at top of file
DUP a -- a a DUP a -- a a
EMIT c -- Spit char c to stdout EMIT c -- Spit char c to stdout
EXECUTE a -- Execute word at addr a EXECUTE a -- Execute word at addr a
EXIT R:I -- Exit a colon definition
HERE -- a Push HERE's address HERE -- a Push HERE's address
QUIT R:drop -- Return to interpreter promp immediately QUIT R:drop -- Return to interpreter promp immediately
KEY -- c Get char c from stdin KEY -- c Get char c from stdin

View File

@ -7,47 +7,17 @@
.equ NAMELEN 8 .equ NAMELEN 8
; Offset of the code link relative to the beginning of the word ; Offset of the code link relative to the beginning of the word
.equ CODELINK_OFFSET 10 .equ CODELINK_OFFSET 10
; When set, the interpreter should abort parsing of current line and return to
; prompt.
.equ FLAG_QUITTING 0
; When set, the interpreter should quit
.equ FLAG_ENDPGM 1
; *** Variables *** ; *** Variables ***
.equ INITIAL_SP FORTH_RAMSTART .equ INITIAL_SP FORTH_RAMSTART
.equ CURRENT @+2 .equ CURRENT @+2
.equ HERE @+2 .equ HERE @+2
.equ INPUTPOS @+2 .equ INPUTPOS @+2
.equ FLAGS @+2
; Buffer where we compile the current input line. Same size as STDIO_BUFSIZE. ; Buffer where we compile the current input line. Same size as STDIO_BUFSIZE.
.equ COMPBUF @+1 .equ COMPBUF @+2
.equ FORTH_RAMEND @+0x40 .equ FORTH_RAMEND @+0x40
; *** Code *** ; *** Code ***
MAIN:
.dw compiledWord
.dw INTERPRET+CODELINK_OFFSET
.dw CHKEND
; If FLAG_ENDPGM is set, stop the program, else, tweak the RS so that we loop.
CHKEND:
.dw nativeWord
ld hl, FLAGS
bit FLAG_ENDPGM, (hl)
jr nz, .endpgm
; not quitting program, are we supposed to continue parsing line?
ld hl, FLAGS
bit FLAG_QUITTING, (hl)
jr nz, forthRdLine
; Not quitting line either.
jr forthInterpret
.endpgm:
ld sp, (INITIAL_SP)
; restore stack
pop af \ pop af \ pop af
xor a
ret
forthMain: forthMain:
; STACK OVERFLOW PROTECTION: ; STACK OVERFLOW PROTECTION:
; To avoid having to check for stack underflow after each pop operation ; To avoid having to check for stack underflow after each pop operation
@ -62,16 +32,14 @@ forthMain:
ld hl, FORTH_RAMEND ld hl, FORTH_RAMEND
ld (HERE), hl ld (HERE), hl
forthRdLine: forthRdLine:
xor a
ld (FLAGS), a
ld hl, msgOk ld hl, msgOk
call printstr call printstr
call printcrlf call printcrlf
call stdioReadLine call stdioReadLine
ld (INPUTPOS), hl ld (INPUTPOS), hl
forthInterpret: forthInterpret:
ld ix, RS_ADDR ld ix, RS_ADDR-2 ; -2 because we inc-before-push
ld iy, MAIN ld iy, INTERPRET+CODELINK_OFFSET
jp executeCodeLink jp executeCodeLink
msgOk: msgOk:
.db " ok", 0 .db " ok", 0

View File

@ -3,25 +3,26 @@
; PS is the most frequently used. However, this causes a problem with routine ; PS is the most frequently used. However, this causes a problem with routine
; calls: because in Forth, the stack isn't balanced within each call, our return ; calls: because in Forth, the stack isn't balanced within each call, our return
; offset, when placed by a CALL, messes everything up. This is one of the ; offset, when placed by a CALL, messes everything up. This is one of the
; reasons why we need stack management routines below. ; reasons why we need stack management routines below. IX always points to RS'
; Top Of Stack (TOS)
; ;
; This return stack contain "Interpreter pointers", that is a pointer to the ; This return stack contain "Interpreter pointers", that is a pointer to the
; address of a word, as seen in a compiled list of words. ; address of a word, as seen in a compiled list of words.
; Push value HL to RS ; Push value HL to RS
pushRS: pushRS:
inc ix
inc ix
ld (ix), l ld (ix), l
inc ix ld (ix+1), h
ld (ix), h
inc ix
ret ret
; Pop RS' TOS to HL ; Pop RS' TOS to HL
popRS: popRS:
dec ix
ld h, (ix)
dec ix
ld l, (ix) ld l, (ix)
ld h, (ix+1)
dec ix
dec ix
ret ret
; Verifies that SP is within bounds. If it's not, call ABORT ; Verifies that SP is within bounds. If it's not, call ABORT

View File

@ -43,6 +43,126 @@ readword:
inc a ; unset Z inc a ; unset Z
ret ret
RSIsDE:
push hl
ld l, (ix)
ld h, (ix+1)
ld a, (hl)
cp e
jr nz, .end ; no
inc hl
ld a, (hl)
cp d ; Z has our answer
.end:
pop hl
ret
; Is RS' TOS pointing to a NUMBER word?
; Z if yes, NZ if no.
RSIsNUMBER:
push de
ld de, NUMBER
call RSIsDE
pop de
ret
; Is RS' TOS pointing to a LIT word?
; Z if yes, NZ if no.
RSIsLIT:
push de
ld de, LIT
call RSIsDE
pop de
ret
; Is RS' TOS pointing to EXIT?
; Z if yes, NZ if no.
RSIsEXIT:
push de
ld de, EXIT+CODELINK_OFFSET
call RSIsDE
pop de
ret
; Skip the compword where RS' TOS is currently pointing. If it's a regular word,
; it's easy: we inc by 2. If it's a NUMBER, we inc by 4. If it's a LIT, we skip
; to after null-termination.
compSkip:
push hl
ld l, (ix)
ld h, (ix+1)
; At the minimum, we skip by 2
inc hl \ inc hl
call RSIsNUMBER
jr z, .isNum
call RSIsLIT
jr nz, .end ; A word
; We have a literal
call strskip
inc hl ; byte after word termination
jr .end
.isNum:
; skip by 4
inc hl \ inc hl
.end:
; HL is good, write it to RS
ld (ix), l
ld (ix+1), h
pop hl
ret
; Checks RS' TOS and, if it points to a string literal (LIT), makes HL point
; to it and advance IP to byte following null-termination.
; If it doesn't, things get interesting: If it's a word reference, then it's
; not an invalid literal. For example, one could want to redefine an existing
; word. So in that case, we'll copy the word's name on the pad (it might not be
; null-terminated) and set HL to point to it.
; How do we know that our reference is a word reference (it could be, for
; example, a NUMBER reference)? We check that its address is more than QUIT, the
; second word in our dict. We don't accept EXIT because it's the termination
; word. Yeah, it means that ";" can't be overridden...
; If name can't be read, we abort
readCompWord:
; In all cases, we want RS' TOS in HL. Let's get it now.
ld l, (ix)
ld h, (ix+1)
call RSIsLIT
jr nz, .notLIT
; RS TOS is a LIT, make HL point to string, then skip this RS compword.
inc hl \ inc hl ; HL now points to string itself
jr compSkip
.notLIT:
; Alright, not a literal, but is it a word? If it's not a number, then
; it's a word.
call RSIsNUMBER
jr z, .notWord
; Not a number, then it's a word. Copy word to pad and point to it.
call intoHL
or a ; clear carry
ld de, CODELINK_OFFSET
sbc hl, de
; That's our return value
push hl ; --> lvl 1
; HL now points to word offset, let'd copy it to pad
ex de, hl
call pad
ex de, hl
ld bc, NAMELEN
ldir
; null-terminate
xor a
ld (de), a
call compSkip
pop hl ; <-- lvl 1
ret
.notWord:
ld hl, .msg
call printstr
jp abort
.msg:
.db "word expected", 0
; For DE pointing to a dict entry, set DE to point to the previous entry. ; For DE pointing to a dict entry, set DE to point to the previous entry.
; Z is set if DE point to 0 (no entry). NZ if not. ; Z is set if DE point to 0 (no entry). NZ if not.
prev: prev:
@ -84,38 +204,48 @@ wrCompHL:
; Compile word string at (HL) and write down its compiled version in IY, ; Compile word string at (HL) and write down its compiled version in IY,
; advancing IY to the byte next to the last written byte. ; advancing IY to the byte next to the last written byte.
; Set Z on success, unset on failure.
compile: compile:
call find call find
jr nz, .maybeNum jr nz, .maybeNum
ret nz
; DE is a word offset, we need a code link ; DE is a word offset, we need a code link
ld hl, CODELINK_OFFSET ld hl, CODELINK_OFFSET
add hl, de add hl, de
xor a ; set Z xor a ; set Z
jr wrCompHL jr wrCompHL
.maybeNum: .maybeNum:
push hl ; --> lvl 1. save string addr
call parseLiteral call parseLiteral
ret nz jr nz, .undef
pop hl ; <-- lvl 1
; a valid number! ; a valid number!
ld hl, NUMBER ld hl, NUMBER
call wrCompHL call wrCompHL
ex de, hl ; number in HL ex de, hl ; number in HL
jr wrCompHL jr wrCompHL
ret z .undef:
; unknown name ; When encountering an undefined word during compilation, we spit a
ld hl, .msg ; reference to litWord, followed by the null-terminated word.
call printstr ; This way, if a preceding word expect a string literal, it will read it
jp abort ; by calling readCompWord, and if it doesn't, the routine will be
.msg: ; called, triggering an abort.
.db "unknown name", 0 ld hl, LIT
call wrCompHL
pop hl ; <-- lvl 1. recall string addr
.writeLit:
ld a, (hl)
ld (iy), a
inc hl
inc iy
or a
jr nz, .writeLit
ret
; Spit name + prev in (HERE) and adjust (HERE) and (CURRENT) ; Spit name + prev in (HERE) and adjust (HERE) and (CURRENT)
; HL points to new (HERE) ; HL points to new (HERE)
; Set Z if name could be read, NZ if not
entryhead: entryhead:
call readword call readCompWord
ret nz call printstr
ld de, (HERE) ld de, (HERE)
call strcpy call strcpy
ex de, hl ; (HERE) now in HL ex de, hl ; (HERE) now in HL