forth: Simplify execution model

Change the mainloop so that words are executed immediately after they're read.
This greatly simplifies execution model and allow the "DEFINE" word to become
an IMMEDIATE and stop its "copy from compiled words" scheme.

The downside to this is that flow control words no longer work when being used
directly in the input buffer. They only work as part of a definition.

It also broke "RECURSE", but I've replaced it with "BEGIN" and "AGAIN".

Another effect of this change is that definitions can now span multiple lines.

All in all, it feels good to get rid of that COMPBUF...
This commit is contained in:
Virgil Dupras 2020-03-15 22:46:17 -04:00
parent 7befe56597
commit 80ab395823
5 changed files with 150 additions and 284 deletions

View File

@ -4,16 +4,15 @@
: ALLOT HERE +! ; : ALLOT HERE +! ;
: , H ! 2 ALLOT ; : , H ! 2 ALLOT ;
: C, H C! 1 ALLOT ; : C, H C! 1 ALLOT ;
: BEGIN H ; IMMEDIATE
: AGAIN ['] (bbr) , H -^ C, ; IMMEDIATE
: NOT 1 SWAP SKIP? EXIT 0 * ; : NOT 1 SWAP SKIP? EXIT 0 * ;
: RECURSE R> R> 2 - >R >R EXIT ; : ( BEGIN LITS ) WORD SCMP NOT SKIP? AGAIN ; IMMEDIATE
: ( LIT@ ) WORD SCMP NOT SKIP? RECURSE ; IMMEDIATE
( Hello, hello, krkrkrkr... do you hear me? ( Hello, hello, krkrkrkr... do you hear me?
Ah, voice at last! Some lines above need comments Ah, voice at last! Some lines above need comments
BTW: Forth lines limited to 64 cols because of default BTW: Forth lines limited to 64 cols because of default
input buffer size in Collapse OS input buffer size in Collapse OS
NOT: a bit convulted because we don't have IF yet NOT: a bit convulted because we don't have IF yet
RECURSE: RS TOS is for RECURSE itself, then we have to dig
one more level to get to RECURSE's parent's caller.
IF true, skip following (fbr). Also, push br cell ref H, IF true, skip following (fbr). Also, push br cell ref H,
to PS ) to PS )
: IF ['] SKIP? , ['] (fbr) , H 1 ALLOT ; IMMEDIATE : IF ['] SKIP? , ['] (fbr) , H 1 ALLOT ; IMMEDIATE

View File

@ -97,17 +97,14 @@ NUMBER:
; Similarly to numberWord, this is not a real word, but a string literal. ; Similarly to numberWord, this is not a real word, but a string literal.
; Instead of being followed by a 2 bytes number, it's followed by a ; Instead of being followed by a 2 bytes number, it's followed by a
; null-terminated string. This is not expected to be called in a regular ; null-terminated string. When called, puts the string's address on PS
; context. Only words expecting those literals will look for them. This is why
; the litWord triggers abort.
litWord: litWord:
ld hl, (IP) ld hl, (IP)
call printstr ; let's print the word before abort. push hl
ld hl, .msg call strskip
call printstr inc hl ; after null termination
jp abort ld (IP), hl
.msg: jp next
.db "undefined word", 0
.db 0b10 ; Flags .db 0b10 ; Flags
LIT: LIT:
@ -142,11 +139,24 @@ quit:
ABORT: ABORT:
.dw nativeWord .dw nativeWord
abort: abort:
; flush rest of input
ld hl, (INPUTPOS)
xor a
ld (hl), a
; Reinitialize PS (RS is reinitialized in forthInterpret) ; Reinitialize PS (RS is reinitialized in forthInterpret)
ld sp, (INITIAL_SP) ld sp, (INITIAL_SP)
jp forthRdLineNoOk jp forthRdLineNoOk
ABORTREF:
.dw ABORT ; prints msg in (HL) then aborts
abortMsg:
call printstr
jr abort
abortUnknownWord:
ld hl, .msg
jr abortMsg
.msg:
.db "unknown word", 0
.db "BYE" .db "BYE"
.fill 4 .fill 4
@ -220,7 +230,7 @@ EXECUTE:
.db 1 ; IMMEDIATE .db 1 ; IMMEDIATE
COMPILE: COMPILE:
.dw nativeWord .dw nativeWord
pop hl ; word addr call readword
call find call find
jr nz, .maybeNum jr nz, .maybeNum
ex de, hl ex de, hl
@ -242,29 +252,11 @@ COMPILE:
call .writeDE call .writeDE
jp next jp next
.undef: .undef:
; When encountering an undefined word during compilation, we spit a call printstr
; reference to litWord, followed by the null-terminated word. jp abortUnknownWord
; This way, if a preceding word expect a string literal, it will read it
; by calling readLIT, and if it doesn't, the routine will be
; called, triggering an abort.
ld de, LIT
call .writeDE
ld de, (HERE)
call strcpyM
ld (HERE), de
jp next
.immed: .immed:
; For this IMMEDIATE word to be compatible with regular execution model, push hl
; it needs to be compiled as an atom somewhere in memory. jp EXECUTE+2
; For example, RECURSE backtracks in RS and steps back 2 bytes. This
; can only work with our compiled atom being next to an EXIT atom.
ex de, hl ; atom to write in DE
ld hl, (OLDHERE)
push hl \ pop iy
call DEinHL
ld de, EXIT
call DEinHL
jp compiledWord
.writeDE: .writeDE:
push hl push hl
ld hl, (HERE) ld hl, (HERE)
@ -274,56 +266,49 @@ COMPILE:
ret ret
.db ";" .db ":"
.fill 6 .fill 6
.dw COMPILE .dw COMPILE
.db 0 .db 1 ; IMMEDIATE
ENDDEF:
.dw nativeWord
jp EXIT+2
.db ":"
.fill 6
.dw ENDDEF
.db 0
DEFINE: DEFINE:
.dw nativeWord .dw nativeWord
call entryhead call entryhead
ld de, compiledWord ld de, compiledWord
call DEinHL call DEinHL
; At this point, we've processed the name literal following the ':'. ld (HERE), hl
; What's next? We have, in IP, a pointer to words that *have already
; been compiled by INTERPRET*. All those bytes will be copied as-is.
; All we need to do is to know how many bytes to copy. To do so, we
; skip compwords until EXIT is reached.
ex de, hl ; DE is our dest
ld (HERE), de ; update HERE
ld hl, (IP)
.loop: .loop:
push de ; --> lvl 1 ; did we reach ";"?
ld de, ENDDEF ld hl, (INPUTPOS)
call HLPointsDE ld a, (hl)
pop de ; <-- lvl 1 cp ';'
jr z, .loopend jr nz, .compile
call compSkip inc hl
jr .loop ld a, (hl)
.loopend: cp ' '+1
; skip EXIT jr c, .loopend ; whitespace, we have semicol. end
inc hl \ inc hl .compile:
; We have out end offset. Let's get our offset
ld de, (IP)
or a ; clear carry
sbc hl, de
; HL is our copy count.
ld b, h
ld c, l
ld hl, (IP) ld hl, (IP)
ld de, (HERE) ; recall dest call pushRS
; copy! ld hl, .retRef
ldir
ld (IP), hl ld (IP), hl
ld (HERE), de ld hl, COMPILE
push hl
jp EXECUTE+2
.loopend:
; Advance (INPUTPOS) to after semicol. HL is already there.
ld (INPUTPOS), hl
; write EXIT and return
ld hl, (HERE)
ld de, EXIT
call DEinHL
ld (HERE), hl
jp next jp next
.retRef:
.dw $+2
.dw $+2
call popRS
ld (IP), hl
jr .loop
.db "DOES>" .db "DOES>"
@ -359,10 +344,11 @@ IMMEDIATE:
jp next jp next
; ( n -- ) ; ( n -- )
.db "LITERAL" .db "LITN"
.dw IMMEDIATE .fill 3
.db 1 ; IMMEDIATE .dw IMMEDIATE
LITERAL: .db 1 ; IMMEDIATE
LITN:
.dw nativeWord .dw nativeWord
ld hl, (HERE) ld hl, (HERE)
ld de, NUMBER ld de, NUMBER
@ -372,14 +358,28 @@ LITERAL:
ld (HERE), hl ld (HERE), hl
jp next jp next
.db "LITS"
.fill 3
.dw LITN
.db 1 ; IMMEDIATE
LITS:
.dw nativeWord
ld hl, (HERE)
ld de, LIT
call DEinHL
ex de, hl ; (HERE) in DE
call readword
call strcpyM
ld (HERE), de
jp next
.db "'" .db "'"
.fill 6 .fill 6
.dw LITERAL .dw LITS
.db 0 .db 0
APOS: APOS:
.dw nativeWord .dw nativeWord
call readLITBOS call readword
call find call find
jr nz, .notfound jr nz, .notfound
push de push de
@ -542,20 +542,10 @@ CFETCH:
push hl push hl
jp next jp next
.db "LIT@"
.fill 3
.dw CFETCH
.db 0
LITFETCH:
.dw nativeWord
call readLITTOS
push hl
jp next
; ( a -- ) ; ( a -- )
.db "DROP" .db "DROP"
.fill 3 .fill 3
.dw LITFETCH .dw CFETCH
.db 0 .db 0
DROP: DROP:
.dw nativeWord .dw nativeWord
@ -818,6 +808,19 @@ FBR:
pop de pop de
jp next jp next
LATEST: .db "(bbr)"
.dw FBR .fill 2
.dw FBR
.db 0
BBR:
.dw nativeWord
ld hl, (IP)
ld d, 0
ld e, (hl)
or a ; clear carry
sbc hl, de
ld (IP), hl
jp next
LATEST:
.dw BBR

View File

@ -42,22 +42,30 @@ directly, but as part of another word.
ALLOT n -- Move HERE by n bytes ALLOT n -- Move HERE by n bytes
C, b -- Write byte b in HERE and advance it. C, b -- Write byte b in HERE and advance it.
CREATE x -- Create cell named x. Doesn't allocate a PF. CREATE x -- Create cell named x. Doesn't allocate a PF.
COMPILE x -- Compile word x and write it to HERE
CONSTANT x n -- Creates cell x that when called pushes its value CONSTANT x n -- Creates cell x that when called pushes its value
DOES> -- See description at top of file DOES> -- See description at top of file
IMMEDIATE -- Flag the latest defined word as immediate. IMMEDIATE -- Flag the latest defined word as immediate.
LITERAL n -- *I* Inserts number from TOS as a literal LITN n -- *I* Inserts number from TOS as a literal
VARIABLE c -- Creates cell x with 2 bytes allocation. VARIABLE c -- Creates cell x with 2 bytes allocation.
*** Flow *** *** Flow ***
Note about flow words: flow words can only be used in definitions. In the
INTERPRET loop, they don't have the desired effect because each word from the
input stream is executed immediately. In this context, branching doesn't work.
(fbr) -- Branches forward by the number specified in its (fbr) -- Branches forward by the number specified in its
atom's cell. atom's cell.
COMPILE a -- Compile string word at addr a and spit it to HERE. (bbr) -- Branches backward by the number specified in its
atom's cell.
AGAIN I:a -- *I* Jump backwards to preceeding BEGIN.
BEGIN -- I:a *I* Marker for backward branching with AGAIN.
ELSE I:a -- *I* Compiles a (fbr) and set branching cell at a. ELSE I:a -- *I* Compiles a (fbr) and set branching cell at a.
EXECUTE a -- Execute wordref at addr a EXECUTE a -- Execute wordref at addr a
IF -- I:a *I* Compiles a (fbr?) and pushes its cell's addr IF -- I:a *I* Compiles a (fbr?) and pushes its cell's addr
INTERPRET -- Get a line from stdin, compile it in tmp memory, INTERPRET -- Get a line from stdin, compile it in tmp memory,
then execute the compiled contents. then execute the compiled contents.
QUIT R:drop -- Return to interpreter promp immediately QUIT R:drop -- Return to interpreter prompt immediately
RECURSE R:I -- R:I-2 Run the current word again. RECURSE R:I -- R:I-2 Run the current word again.
SKIP? f -- If f is true, skip the execution of the next atom. SKIP? f -- If f is true, skip the execution of the next atom.
Use this right before ";" and you're gonna have a Use this right before ";" and you're gonna have a
@ -108,7 +116,7 @@ CMP n1 n2 -- n Compare n1 and n2 and set n to -1, 0, or 1.
NOT f -- f Push the logical opposite of f NOT f -- f Push the logical opposite of f
*** Strings *** *** Strings ***
LIT@ x -- a Read following LIT and push its addr to a LITS x -- a Read following LIT and push its addr to a
SCMP a1 a2 -- n Compare strings a1 and a2. See CMP SCMP a1 a2 -- n Compare strings a1 and a2. See CMP
*** I/O *** *** I/O ***

View File

@ -19,18 +19,14 @@
.equ INITIAL_SP FORTH_RAMSTART .equ INITIAL_SP FORTH_RAMSTART
; wordref of the last entry of the dict. ; wordref of the last entry of the dict.
.equ CURRENT @+2 .equ CURRENT @+2
; Pointer to the next free byte in dict. During compilation of input text, this ; Pointer to the next free byte in dict.
; temporarily points to the next free byte in COMPBUF.
.equ HERE @+2 .equ HERE @+2
; Used to hold HERE while we temporarily point it to COMPBUF
.equ OLDHERE @+2
; Interpreter pointer. See Execution model comment below. ; Interpreter pointer. See Execution model comment below.
.equ IP @+2 .equ IP @+2
; Pointer to where we currently are in the interpretation of the current line. ; Pointer to where we currently are in the interpretation of the current line.
.equ INPUTPOS @+2 .equ INPUTPOS @+2
; Buffer where we compile the current input line. Same size as STDIO_BUFSIZE. ; Buffer where we compile the current input line. Same size as STDIO_BUFSIZE.
.equ COMPBUF @+2 .equ FORTH_RAMEND @+2
.equ FORTH_RAMEND @+0x40
; (HERE) usually starts at RAMEND, but in certain situations, such as in stage0, ; (HERE) usually starts at RAMEND, but in certain situations, such as in stage0,
; (HERE) will begin at a strategic place. ; (HERE) will begin at a strategic place.
@ -41,30 +37,11 @@
; a general rule, we go like this: ; a general rule, we go like this:
; ;
; 1. read single word from line ; 1. read single word from line
; 2. compile word to atom ; 2. Can we find the word in dict?
; 3. if immediate, execute atom ; 3. If yes, execute that word, goto 1
; 4. goto 1 until we exhaust words ; 4. Is it a number?
; 5. Execute compiled atom list as if it was a regular compiledWord. ; 5. If yes, push that number to PS, goto 1
; ; 6. Error: undefined word.
; Because the Parameter Stack uses SP, we can't just go around calling routines:
; This messes with the PS. This is why we almost always jump (unless our call
; doesn't involve Forth words in any way).
;
; This presents a challenge for our interpret loop because step 4, "goto 1"
; isn't obvious. To be able to do that, we must push a "return routine" to the
; Return Stack before step 3.
;
; HERE and IMMEDIATE: When compiling in step 2, we spit compiled atoms in
; (HERE) to simplify "," semantic in Forth (spitting, in all cases, is done in
; (HERE)). However, suring input line compilation, it isn't like during ":", we
; aren't creating a new entry.
;
; Compiling and executing from (HERE) would be dangerous because an
; entry-creation word, during runtime, could end up overwriting the atom list
; we're executing. This is why we have this list in COMPBUF.
;
; During IMMEDIATE mode, (HERE) is temporarily set to COMPBUF, and when we're
; done, we restore (HERE) for runtime. This way, everyone is happy.
; ;
; EXECUTING A WORD ; EXECUTING A WORD
; ;
@ -113,54 +90,45 @@ forthRdLine:
forthRdLineNoOk: forthRdLineNoOk:
; Setup return stack. After INTERPRET, we run forthExecLine ; Setup return stack. After INTERPRET, we run forthExecLine
ld ix, RS_ADDR ld ix, RS_ADDR
; We're about to compile the line and possibly execute IMMEDIATE words. ld hl, MAINLOOP
; Let's save current (HERE) and temporarily set it to COMPBUF.
ld hl, (HERE)
ld (OLDHERE), hl
ld hl, COMPBUF
ld (HERE), hl
ld hl, .retRef
ld (IP), hl
ld hl, INTERPRET
push hl push hl
jp EXECUTE+2 jp EXECUTE+2
.retRef:
.dw $+2
.dw forthExecLine
forthExecLine:
ld de, QUIT
ld hl, (HERE)
call DEinHL
ld (HERE), hl
; Compilation done, let's restore (HERE) and execute!
ld hl, (OLDHERE)
ld (HERE), hl
ld iy, COMPBUF
; before we execute, let's play with our RS a bit: compiledWord is
; going to push (IP) on the RS, but we don't expect our compiled words
; to ever return: it ends with QUIT. Let's set (IP) to ABORTREF and
; IX to RS_ADDR-2 so that compiledWord re-pushes our safety net.
ld hl, ABORTREF
ld (IP), hl
ld ix, RS_ADDR-2
jp compiledWord
; (we don't have RECURSE here. Calling interpret makes us needlessly use our
; RS stack, but it can take it, can't it? )
; WORD COMPILE IN> @ C@ (to check if null) SKIP? (skip if not null) EXIT INTERPRET
.db 0b10 ; UNWORD .db 0b10 ; UNWORD
INTERPRET: INTERPRET:
.dw nativeWord
pop hl ; from WORD
ld a, (hl) ; special case: empty
or a
jp z, next
call find
jr nz, .maybeNum
; regular word
push de
jp EXECUTE+2
.maybeNum:
push hl ; --> lvl 1. save string addr
call parseLiteral
pop hl ; <-- lvl 1
jr nz, .undef
; a valid number in DE!
push de
jp next
.undef:
call printstr
jp abortUnknownWord
.db 0b10 ; UNWORD
MAINLOOP:
.dw compiledWord .dw compiledWord
.dw WORD .dw WORD
.dw COMPILE .dw INTERPRET
.dw INP .dw INP
.dw FETCH .dw FETCH
.dw CFETCH .dw CFETCH
.dw CSKIP .dw CSKIP
.dw EXIT .dw QUIT
.dw INTERPRET .dw MAINLOOP
.dw EXIT
msgOk: msgOk:
.db " ok", 0 .db " ok", 0

View File

@ -74,6 +74,10 @@ HLPointsBR:
push de push de
ld de, FBR ld de, FBR
call HLPointsDE call HLPointsDE
jr z, .end
ld de, BBR
call HLPointsDE
.end:
pop de pop de
ret ret
@ -105,121 +109,6 @@ compSkip:
inc hl \ inc hl inc hl \ inc hl
ret ret
; ***readLIT***
; The goal of this routine is to read a string literal following the currently
; executed words. For example, CREATE and DEFINE need this. Things are a little
; twisted, so bear with me while I explain how it works.
;
; When we call this routine, everything has been compiled. We're on an atom and
; we're executing it. Now, we're looking for a string literal or a word-with-a
; name that follows our readLIT caller. We could think that this word is
; right there on RS' TOS, but not always! You have to account for words wrapping
; the caller. For example, "VARIABLE" calls "CREATE". If you call
; "VARIABLE foo", if CREATE looks at what follows in RS' TOS, it will only find
; the "2" in "CREATE 2 ALLOT".
;
; In this case, we actually need to check in RS' *bottom of stack* for our
; answer. If that atom is a LIT, we're good. We make HL point to it and advance
; IP to byte following null-termination.
;
; If it isn't, things get interesting: If it's a word reference, then it's
; not an invalid literal. For example, one could want to redefine an existing
; word. So in that case, we'll copy the word's name on the pad (it might not be
; null-terminated) and set HL to point to it.
; How do we know that our reference is a word reference (it could be, for
; example, a NUMBER reference)? We check that its address is more than QUIT, the
; second word in our dict. We don't accept EXIT because it's the termination
; word. Yeah, it means that ";" can't be overridden...
; If name can't be read, we abort
;
; BOS vs TOS: What we cover so far is the "CREATE" and friends cases, where we
; want to read BOS. There are, however, cases where we want to read TOS, that is
; that we want to read the LIT right next to our atom. Example: "(". When
; processing comments, we are at compile time and want to read words from BOS,
; yes), however, in "("'s definition, there's "LIT@ )", which means "fetch LIT
; next to me and push this to stack". This LIT we want to fetch is *not* from
; BOS, it's from TOS.
;
; This is why we have readLITBOS and readLITTOS. readLIT uses HL and DE and is
; not used directly.
; Given a RS stack pointer HL, read LIT next to it (or abort) and set HL to
; point to its associated string. Set DE to there the RS stack pointer should
; point next.
readLIT:
call HLPointsLIT
jr nz, .notLIT
; RS BOS is a LIT, make HL point to string, then skip this RS compword.
inc hl \ inc hl ; HL now points to string itself
; HL has our its final value
ld d, h
ld e, l
call strskip
inc hl ; byte after word termination
ex de, hl
ret
.notLIT:
; Alright, not a literal, but is it a word?
call HLPointsUNWORD
jr z, .notWord
; Not a number, then it's a word. Copy word to pad and point to it.
push hl ; --> lvl 1. we need it to set DE later
call intoHL
or a ; clear carry
ld de, CODELINK_OFFSET
sbc hl, de
; That's our return value
push hl ; --> lvl 2
; HL now points to word offset, let'd copy it to pad
ex de, hl
call pad
ex de, hl
ld bc, NAMELEN
ldir
; null-terminate
xor a
ld (de), a
pop hl ; <-- lvl 2
pop de ; <-- lvl 1
; Advance IP by 2
inc de \ inc de
ret
.notWord:
ld hl, .msg
call printstr
jp abort
.msg:
.db "word expected", 0
readLITBOS:
; Before we start: is our RS empty? If IX == RS_ADDR, it is (it only has
; its safety net). When that happens, we actually want to run readLITTOS
push hl
push de
push ix \ pop hl
ld de, RS_ADDR
or a ; clear carry
sbc hl, de
pop de
pop hl
jr z, readLITTOS
push de
; Our bottom-of-stack is RS_ADDR+2 because RS_ADDR is occupied by our
; ABORTREF safety net.
ld hl, (RS_ADDR+2)
call readLIT
ld (RS_ADDR+2), de
pop de
ret
readLITTOS:
push de
ld hl, (IP)
call readLIT
ld (IP), de
pop de
ret
; Find the entry corresponding to word where (HL) points to and sets DE to ; Find the entry corresponding to word where (HL) points to and sets DE to
; point to that entry. ; point to that entry.
; Z if found, NZ if not. ; Z if found, NZ if not.
@ -271,7 +160,7 @@ wrCompHL:
; Spit name + prev in (HERE) and adjust (HERE) and (CURRENT) ; Spit name + prev in (HERE) and adjust (HERE) and (CURRENT)
; HL points to new (HERE) ; HL points to new (HERE)
entryhead: entryhead:
call readLITBOS call readword
ld de, (HERE) ld de, (HERE)
call strcpy call strcpy
ex de, hl ; (HERE) now in HL ex de, hl ; (HERE) now in HL
@ -343,4 +232,3 @@ fetchline:
call stdioReadLine call stdioReadLine
ld (INPUTPOS), hl ld (INPUTPOS), hl
ret ret