mirror of
https://github.com/hsoft/collapseos.git
synced 2025-01-27 14:56:02 +11:00
forth: simplify execution model and handle literals better
This scheme of "when we handle line-by-line, compile one word at a time then execute" so that we could allow words like "CREATE" to call "readword" before continuing was a bad scheme. It made things like branching outside of a colon definition impossible. This commit implement a new "litWord". When an undefined word is encountered at compile time, it is included as-is in a string literal word. It is at run time that we decide what to do with it.
This commit is contained in:
parent
5cadde557c
commit
03e529b762
@ -71,10 +71,27 @@ numberWord:
|
||||
NUMBER:
|
||||
.dw numberWord
|
||||
|
||||
; Similarly to numberWord, this is not a real word, but a string literal.
|
||||
; Instead of being followed by a 2 bytes number, it's followed by a
|
||||
; null-terminated string. This is not expected to be called in a regular
|
||||
; context. Only words expecting those literals will look for them. This is why
|
||||
; the litWord triggers abort.
|
||||
litWord:
|
||||
call popRS
|
||||
call intoHL
|
||||
call printstr ; let's print the word before abort.
|
||||
ld hl, .msg
|
||||
call printstr
|
||||
jp abort
|
||||
.msg:
|
||||
.db "undefined word", 0
|
||||
LIT:
|
||||
.dw litWord
|
||||
|
||||
; ( R:I -- )
|
||||
EXIT:
|
||||
.db "EXIT", 0, 0, 0, 0
|
||||
.db ";"
|
||||
.fill 7
|
||||
.dw 0
|
||||
.dw nativeWord
|
||||
; When we call the EXIT word, we have to do a "double exit" because our current
|
||||
@ -88,7 +105,7 @@ exit:
|
||||
call popRS
|
||||
; We have a pointer to a word
|
||||
push hl \ pop iy
|
||||
jr compiledWord
|
||||
jp compiledWord
|
||||
|
||||
; ( R:I -- )
|
||||
QUIT:
|
||||
@ -96,31 +113,29 @@ QUIT:
|
||||
.dw EXIT
|
||||
.dw nativeWord
|
||||
quit:
|
||||
ld hl, FLAGS
|
||||
set FLAG_QUITTING, (hl)
|
||||
jr exit
|
||||
jp forthRdLine
|
||||
|
||||
ABORT:
|
||||
.db "ABORT", 0, 0, 0
|
||||
.dw QUIT
|
||||
.dw nativeWord
|
||||
abort:
|
||||
; Reinitialize PS (RS is reinitialized in forthInterpret
|
||||
ld sp, (INITIAL_SP)
|
||||
ld hl, .msg
|
||||
call printstr
|
||||
call printcrlf
|
||||
jr quit
|
||||
.msg:
|
||||
.db " err", 0
|
||||
jp forthRdLine
|
||||
|
||||
BYE:
|
||||
.db "BYE"
|
||||
.fill 5
|
||||
.dw ABORT
|
||||
.dw nativeWord
|
||||
ld hl, FLAGS
|
||||
set FLAG_ENDPGM, (hl)
|
||||
jp exit
|
||||
; Goodbye Forth! Before we go, let's restore the stack
|
||||
ld sp, (INITIAL_SP)
|
||||
; unwind stack underflow buffer
|
||||
pop af \ pop af \ pop af
|
||||
; success
|
||||
xor a
|
||||
ret
|
||||
|
||||
; ( c -- )
|
||||
EMIT:
|
||||
@ -155,36 +170,44 @@ DEFINE:
|
||||
.dw EXECUTE
|
||||
.dw nativeWord
|
||||
call entryhead
|
||||
jp nz, quit
|
||||
ld de, compiledWord
|
||||
ld (hl), e
|
||||
inc hl
|
||||
ld (hl), d
|
||||
inc hl
|
||||
push hl \ pop iy
|
||||
; At this point, we've processed the name literal following the ':'.
|
||||
; What's next? We have, in IP, a pointer to words that *have already
|
||||
; been compiled by INTERPRET*. All those bytes will be copied as-is.
|
||||
; All we need to do is to know how many bytes to copy. To do so, we
|
||||
; skip compwords until EXIT is reached.
|
||||
ld (HERE), hl ; where we write compwords.
|
||||
; Let's save old RS TOS
|
||||
ld e, (ix)
|
||||
ld d, (ix+1)
|
||||
.loop:
|
||||
call readword
|
||||
jr nz, .end
|
||||
call .issemicol
|
||||
jr z, .end
|
||||
call compile
|
||||
jp nz, quit
|
||||
call RSIsEXIT
|
||||
jr z, .loopend
|
||||
call compSkip
|
||||
jr .loop
|
||||
.end:
|
||||
; end chain with EXIT
|
||||
ld hl, EXIT+CODELINK_OFFSET
|
||||
call wrCompHL
|
||||
ld (HERE), iy
|
||||
.loopend:
|
||||
; At this point, RS' TOS points to EXIT compword. We'll copy it too.
|
||||
; We'll use LDIR. BC will be RSTOP-OLDRSTOP+2
|
||||
ld l, (ix)
|
||||
ld h, (ix+1)
|
||||
inc hl \ inc hl ; our +2
|
||||
or a ; clear carry
|
||||
sbc hl, de
|
||||
ld b, h
|
||||
ld c, l
|
||||
; BC has proper count
|
||||
ex de, hl ; HL is our source (old RS' TOS)
|
||||
ld de, (HERE) ; and DE is our dest
|
||||
ldir ; go!
|
||||
; HL has our new RS' TOS
|
||||
ld (ix), l
|
||||
ld (ix+1), h
|
||||
ld (HERE), de ; update HERE
|
||||
jp exit
|
||||
.issemicol:
|
||||
ld a, (hl)
|
||||
cp ';'
|
||||
ret nz
|
||||
inc hl
|
||||
ld a, (hl)
|
||||
dec hl
|
||||
or a
|
||||
ret
|
||||
|
||||
DOES:
|
||||
.db "DOES>", 0, 0, 0
|
||||
@ -226,22 +249,17 @@ INTERPRET:
|
||||
.dw KEY
|
||||
.dw nativeWord
|
||||
interpret:
|
||||
call readword
|
||||
jp nz, quit
|
||||
ld iy, COMPBUF
|
||||
.loop:
|
||||
call readword
|
||||
jr nz, .end
|
||||
call compile
|
||||
jp nz, .notfound
|
||||
ld hl, EXIT+CODELINK_OFFSET
|
||||
ld (iy), l
|
||||
ld (iy+1), h
|
||||
jr .loop
|
||||
.end:
|
||||
ld hl, QUIT+CODELINK_OFFSET
|
||||
call wrCompHL
|
||||
ld iy, COMPBUF
|
||||
jp compiledWord
|
||||
.notfound:
|
||||
ld hl, .msg
|
||||
call printstr
|
||||
jp quit
|
||||
.msg:
|
||||
.db "not found", 0
|
||||
|
||||
CREATE:
|
||||
.db "CREATE", 0, 0
|
||||
@ -454,4 +472,3 @@ CONSTANT:
|
||||
.dw DOES+CODELINK_OFFSET
|
||||
.dw FETCH+CODELINK_OFFSET
|
||||
.dw EXIT+CODELINK_OFFSET
|
||||
|
||||
|
@ -17,7 +17,8 @@ DOES>: Used inside a colon definition that itself uses CREATE, DOES> transforms
|
||||
|
||||
*** Native Words ***
|
||||
|
||||
: x ... ; -- Define a new word
|
||||
: x ... -- Define a new word
|
||||
; R:I -- Exit a colon definition
|
||||
. n -- Print n in its decimal form
|
||||
@ a -- n Set n to value at address a
|
||||
! n a -- Store n in address a
|
||||
@ -30,7 +31,6 @@ DOES> -- See description at top of file
|
||||
DUP a -- a a
|
||||
EMIT c -- Spit char c to stdout
|
||||
EXECUTE a -- Execute word at addr a
|
||||
EXIT R:I -- Exit a colon definition
|
||||
HERE -- a Push HERE's address
|
||||
QUIT R:drop -- Return to interpreter promp immediately
|
||||
KEY -- c Get char c from stdin
|
||||
|
@ -7,47 +7,17 @@
|
||||
.equ NAMELEN 8
|
||||
; Offset of the code link relative to the beginning of the word
|
||||
.equ CODELINK_OFFSET 10
|
||||
; When set, the interpreter should abort parsing of current line and return to
|
||||
; prompt.
|
||||
.equ FLAG_QUITTING 0
|
||||
; When set, the interpreter should quit
|
||||
.equ FLAG_ENDPGM 1
|
||||
|
||||
; *** Variables ***
|
||||
.equ INITIAL_SP FORTH_RAMSTART
|
||||
.equ CURRENT @+2
|
||||
.equ HERE @+2
|
||||
.equ INPUTPOS @+2
|
||||
.equ FLAGS @+2
|
||||
; Buffer where we compile the current input line. Same size as STDIO_BUFSIZE.
|
||||
.equ COMPBUF @+1
|
||||
.equ COMPBUF @+2
|
||||
.equ FORTH_RAMEND @+0x40
|
||||
|
||||
; *** Code ***
|
||||
MAIN:
|
||||
.dw compiledWord
|
||||
.dw INTERPRET+CODELINK_OFFSET
|
||||
.dw CHKEND
|
||||
|
||||
; If FLAG_ENDPGM is set, stop the program, else, tweak the RS so that we loop.
|
||||
CHKEND:
|
||||
.dw nativeWord
|
||||
ld hl, FLAGS
|
||||
bit FLAG_ENDPGM, (hl)
|
||||
jr nz, .endpgm
|
||||
; not quitting program, are we supposed to continue parsing line?
|
||||
ld hl, FLAGS
|
||||
bit FLAG_QUITTING, (hl)
|
||||
jr nz, forthRdLine
|
||||
; Not quitting line either.
|
||||
jr forthInterpret
|
||||
.endpgm:
|
||||
ld sp, (INITIAL_SP)
|
||||
; restore stack
|
||||
pop af \ pop af \ pop af
|
||||
xor a
|
||||
ret
|
||||
|
||||
forthMain:
|
||||
; STACK OVERFLOW PROTECTION:
|
||||
; To avoid having to check for stack underflow after each pop operation
|
||||
@ -62,16 +32,14 @@ forthMain:
|
||||
ld hl, FORTH_RAMEND
|
||||
ld (HERE), hl
|
||||
forthRdLine:
|
||||
xor a
|
||||
ld (FLAGS), a
|
||||
ld hl, msgOk
|
||||
call printstr
|
||||
call printcrlf
|
||||
call stdioReadLine
|
||||
ld (INPUTPOS), hl
|
||||
forthInterpret:
|
||||
ld ix, RS_ADDR
|
||||
ld iy, MAIN
|
||||
ld ix, RS_ADDR-2 ; -2 because we inc-before-push
|
||||
ld iy, INTERPRET+CODELINK_OFFSET
|
||||
jp executeCodeLink
|
||||
msgOk:
|
||||
.db " ok", 0
|
||||
|
@ -3,25 +3,26 @@
|
||||
; PS is the most frequently used. However, this causes a problem with routine
|
||||
; calls: because in Forth, the stack isn't balanced within each call, our return
|
||||
; offset, when placed by a CALL, messes everything up. This is one of the
|
||||
; reasons why we need stack management routines below.
|
||||
; reasons why we need stack management routines below. IX always points to RS'
|
||||
; Top Of Stack (TOS)
|
||||
;
|
||||
; This return stack contain "Interpreter pointers", that is a pointer to the
|
||||
; address of a word, as seen in a compiled list of words.
|
||||
|
||||
; Push value HL to RS
|
||||
pushRS:
|
||||
inc ix
|
||||
inc ix
|
||||
ld (ix), l
|
||||
inc ix
|
||||
ld (ix), h
|
||||
inc ix
|
||||
ld (ix+1), h
|
||||
ret
|
||||
|
||||
; Pop RS' TOS to HL
|
||||
popRS:
|
||||
dec ix
|
||||
ld h, (ix)
|
||||
dec ix
|
||||
ld l, (ix)
|
||||
ld h, (ix+1)
|
||||
dec ix
|
||||
dec ix
|
||||
ret
|
||||
|
||||
; Verifies that SP is within bounds. If it's not, call ABORT
|
||||
|
@ -43,6 +43,126 @@ readword:
|
||||
inc a ; unset Z
|
||||
ret
|
||||
|
||||
RSIsDE:
|
||||
push hl
|
||||
ld l, (ix)
|
||||
ld h, (ix+1)
|
||||
ld a, (hl)
|
||||
cp e
|
||||
jr nz, .end ; no
|
||||
inc hl
|
||||
ld a, (hl)
|
||||
cp d ; Z has our answer
|
||||
.end:
|
||||
pop hl
|
||||
ret
|
||||
|
||||
|
||||
; Is RS' TOS pointing to a NUMBER word?
|
||||
; Z if yes, NZ if no.
|
||||
RSIsNUMBER:
|
||||
push de
|
||||
ld de, NUMBER
|
||||
call RSIsDE
|
||||
pop de
|
||||
ret
|
||||
|
||||
; Is RS' TOS pointing to a LIT word?
|
||||
; Z if yes, NZ if no.
|
||||
RSIsLIT:
|
||||
push de
|
||||
ld de, LIT
|
||||
call RSIsDE
|
||||
pop de
|
||||
ret
|
||||
|
||||
; Is RS' TOS pointing to EXIT?
|
||||
; Z if yes, NZ if no.
|
||||
RSIsEXIT:
|
||||
push de
|
||||
ld de, EXIT+CODELINK_OFFSET
|
||||
call RSIsDE
|
||||
pop de
|
||||
ret
|
||||
|
||||
; Skip the compword where RS' TOS is currently pointing. If it's a regular word,
|
||||
; it's easy: we inc by 2. If it's a NUMBER, we inc by 4. If it's a LIT, we skip
|
||||
; to after null-termination.
|
||||
compSkip:
|
||||
push hl
|
||||
ld l, (ix)
|
||||
ld h, (ix+1)
|
||||
; At the minimum, we skip by 2
|
||||
inc hl \ inc hl
|
||||
call RSIsNUMBER
|
||||
jr z, .isNum
|
||||
call RSIsLIT
|
||||
jr nz, .end ; A word
|
||||
; We have a literal
|
||||
call strskip
|
||||
inc hl ; byte after word termination
|
||||
jr .end
|
||||
.isNum:
|
||||
; skip by 4
|
||||
inc hl \ inc hl
|
||||
.end:
|
||||
; HL is good, write it to RS
|
||||
ld (ix), l
|
||||
ld (ix+1), h
|
||||
pop hl
|
||||
ret
|
||||
|
||||
; Checks RS' TOS and, if it points to a string literal (LIT), makes HL point
|
||||
; to it and advance IP to byte following null-termination.
|
||||
; If it doesn't, things get interesting: If it's a word reference, then it's
|
||||
; not an invalid literal. For example, one could want to redefine an existing
|
||||
; word. So in that case, we'll copy the word's name on the pad (it might not be
|
||||
; null-terminated) and set HL to point to it.
|
||||
; How do we know that our reference is a word reference (it could be, for
|
||||
; example, a NUMBER reference)? We check that its address is more than QUIT, the
|
||||
; second word in our dict. We don't accept EXIT because it's the termination
|
||||
; word. Yeah, it means that ";" can't be overridden...
|
||||
; If name can't be read, we abort
|
||||
readCompWord:
|
||||
; In all cases, we want RS' TOS in HL. Let's get it now.
|
||||
ld l, (ix)
|
||||
ld h, (ix+1)
|
||||
call RSIsLIT
|
||||
jr nz, .notLIT
|
||||
; RS TOS is a LIT, make HL point to string, then skip this RS compword.
|
||||
inc hl \ inc hl ; HL now points to string itself
|
||||
jr compSkip
|
||||
.notLIT:
|
||||
; Alright, not a literal, but is it a word? If it's not a number, then
|
||||
; it's a word.
|
||||
call RSIsNUMBER
|
||||
jr z, .notWord
|
||||
; Not a number, then it's a word. Copy word to pad and point to it.
|
||||
call intoHL
|
||||
or a ; clear carry
|
||||
ld de, CODELINK_OFFSET
|
||||
sbc hl, de
|
||||
; That's our return value
|
||||
push hl ; --> lvl 1
|
||||
; HL now points to word offset, let'd copy it to pad
|
||||
ex de, hl
|
||||
call pad
|
||||
ex de, hl
|
||||
ld bc, NAMELEN
|
||||
ldir
|
||||
; null-terminate
|
||||
xor a
|
||||
ld (de), a
|
||||
call compSkip
|
||||
pop hl ; <-- lvl 1
|
||||
ret
|
||||
.notWord:
|
||||
ld hl, .msg
|
||||
call printstr
|
||||
jp abort
|
||||
.msg:
|
||||
.db "word expected", 0
|
||||
|
||||
; For DE pointing to a dict entry, set DE to point to the previous entry.
|
||||
; Z is set if DE point to 0 (no entry). NZ if not.
|
||||
prev:
|
||||
@ -84,38 +204,48 @@ wrCompHL:
|
||||
|
||||
; Compile word string at (HL) and write down its compiled version in IY,
|
||||
; advancing IY to the byte next to the last written byte.
|
||||
; Set Z on success, unset on failure.
|
||||
compile:
|
||||
call find
|
||||
jr nz, .maybeNum
|
||||
ret nz
|
||||
; DE is a word offset, we need a code link
|
||||
ld hl, CODELINK_OFFSET
|
||||
add hl, de
|
||||
xor a ; set Z
|
||||
jr wrCompHL
|
||||
.maybeNum:
|
||||
push hl ; --> lvl 1. save string addr
|
||||
call parseLiteral
|
||||
ret nz
|
||||
jr nz, .undef
|
||||
pop hl ; <-- lvl 1
|
||||
; a valid number!
|
||||
ld hl, NUMBER
|
||||
call wrCompHL
|
||||
ex de, hl ; number in HL
|
||||
jr wrCompHL
|
||||
ret z
|
||||
; unknown name
|
||||
ld hl, .msg
|
||||
call printstr
|
||||
jp abort
|
||||
.msg:
|
||||
.db "unknown name", 0
|
||||
.undef:
|
||||
; When encountering an undefined word during compilation, we spit a
|
||||
; reference to litWord, followed by the null-terminated word.
|
||||
; This way, if a preceding word expect a string literal, it will read it
|
||||
; by calling readCompWord, and if it doesn't, the routine will be
|
||||
; called, triggering an abort.
|
||||
ld hl, LIT
|
||||
call wrCompHL
|
||||
pop hl ; <-- lvl 1. recall string addr
|
||||
.writeLit:
|
||||
ld a, (hl)
|
||||
ld (iy), a
|
||||
inc hl
|
||||
inc iy
|
||||
or a
|
||||
jr nz, .writeLit
|
||||
ret
|
||||
|
||||
|
||||
; Spit name + prev in (HERE) and adjust (HERE) and (CURRENT)
|
||||
; HL points to new (HERE)
|
||||
; Set Z if name could be read, NZ if not
|
||||
entryhead:
|
||||
call readword
|
||||
ret nz
|
||||
call readCompWord
|
||||
call printstr
|
||||
ld de, (HERE)
|
||||
call strcpy
|
||||
ex de, hl ; (HERE) now in HL
|
||||
|
Loading…
Reference in New Issue
Block a user