From 80ab3958233177af0924107c0aaf675adf7e1f35 Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Sun, 15 Mar 2020 22:46:17 -0400 Subject: [PATCH] forth: Simplify execution model Change the mainloop so that words are executed immediately after they're read. This greatly simplifies execution model and allow the "DEFINE" word to become an IMMEDIATE and stop its "copy from compiled words" scheme. The downside to this is that flow control words no longer work when being used directly in the input buffer. They only work as part of a definition. It also broke "RECURSE", but I've replaced it with "BEGIN" and "AGAIN". Another effect of this change is that definitions can now span multiple lines. All in all, it feels good to get rid of that COMPBUF... --- apps/forth/core.fth | 7 +- apps/forth/dict.asm | 187 +++++++++++++++++++------------------- apps/forth/dictionary.txt | 16 +++- apps/forth/main.asm | 102 +++++++-------------- apps/forth/util.asm | 122 +------------------------ 5 files changed, 150 insertions(+), 284 deletions(-) diff --git a/apps/forth/core.fth b/apps/forth/core.fth index 2b8c7c2..5f72445 100644 --- a/apps/forth/core.fth +++ b/apps/forth/core.fth @@ -4,16 +4,15 @@ : ALLOT HERE +! ; : , H ! 2 ALLOT ; : C, H C! 1 ALLOT ; +: BEGIN H ; IMMEDIATE +: AGAIN ['] (bbr) , H -^ C, ; IMMEDIATE : NOT 1 SWAP SKIP? EXIT 0 * ; -: RECURSE R> R> 2 - >R >R EXIT ; -: ( LIT@ ) WORD SCMP NOT SKIP? RECURSE ; IMMEDIATE +: ( BEGIN LITS ) WORD SCMP NOT SKIP? AGAIN ; IMMEDIATE ( Hello, hello, krkrkrkr... do you hear me? Ah, voice at last! Some lines above need comments BTW: Forth lines limited to 64 cols because of default input buffer size in Collapse OS NOT: a bit convulted because we don't have IF yet - RECURSE: RS TOS is for RECURSE itself, then we have to dig - one more level to get to RECURSE's parent's caller. IF true, skip following (fbr). Also, push br cell ref H, to PS ) : IF ['] SKIP? , ['] (fbr) , H 1 ALLOT ; IMMEDIATE diff --git a/apps/forth/dict.asm b/apps/forth/dict.asm index febab0d..6127bc1 100644 --- a/apps/forth/dict.asm +++ b/apps/forth/dict.asm @@ -97,17 +97,14 @@ NUMBER: ; Similarly to numberWord, this is not a real word, but a string literal. ; Instead of being followed by a 2 bytes number, it's followed by a -; null-terminated string. This is not expected to be called in a regular -; context. Only words expecting those literals will look for them. This is why -; the litWord triggers abort. +; null-terminated string. When called, puts the string's address on PS litWord: ld hl, (IP) - call printstr ; let's print the word before abort. - ld hl, .msg - call printstr - jp abort -.msg: - .db "undefined word", 0 + push hl + call strskip + inc hl ; after null termination + ld (IP), hl + jp next .db 0b10 ; Flags LIT: @@ -142,11 +139,24 @@ quit: ABORT: .dw nativeWord abort: + ; flush rest of input + ld hl, (INPUTPOS) + xor a + ld (hl), a ; Reinitialize PS (RS is reinitialized in forthInterpret) ld sp, (INITIAL_SP) jp forthRdLineNoOk -ABORTREF: - .dw ABORT + +; prints msg in (HL) then aborts +abortMsg: + call printstr + jr abort + +abortUnknownWord: + ld hl, .msg + jr abortMsg +.msg: + .db "unknown word", 0 .db "BYE" .fill 4 @@ -220,7 +230,7 @@ EXECUTE: .db 1 ; IMMEDIATE COMPILE: .dw nativeWord - pop hl ; word addr + call readword call find jr nz, .maybeNum ex de, hl @@ -242,29 +252,11 @@ COMPILE: call .writeDE jp next .undef: - ; When encountering an undefined word during compilation, we spit a - ; reference to litWord, followed by the null-terminated word. - ; This way, if a preceding word expect a string literal, it will read it - ; by calling readLIT, and if it doesn't, the routine will be - ; called, triggering an abort. - ld de, LIT - call .writeDE - ld de, (HERE) - call strcpyM - ld (HERE), de - jp next + call printstr + jp abortUnknownWord .immed: - ; For this IMMEDIATE word to be compatible with regular execution model, - ; it needs to be compiled as an atom somewhere in memory. - ; For example, RECURSE backtracks in RS and steps back 2 bytes. This - ; can only work with our compiled atom being next to an EXIT atom. - ex de, hl ; atom to write in DE - ld hl, (OLDHERE) - push hl \ pop iy - call DEinHL - ld de, EXIT - call DEinHL - jp compiledWord + push hl + jp EXECUTE+2 .writeDE: push hl ld hl, (HERE) @@ -274,56 +266,49 @@ COMPILE: ret - .db ";" + .db ":" .fill 6 .dw COMPILE - .db 0 -ENDDEF: - .dw nativeWord - jp EXIT+2 - - .db ":" - .fill 6 - .dw ENDDEF - .db 0 + .db 1 ; IMMEDIATE DEFINE: .dw nativeWord call entryhead ld de, compiledWord call DEinHL - ; At this point, we've processed the name literal following the ':'. - ; What's next? We have, in IP, a pointer to words that *have already - ; been compiled by INTERPRET*. All those bytes will be copied as-is. - ; All we need to do is to know how many bytes to copy. To do so, we - ; skip compwords until EXIT is reached. - ex de, hl ; DE is our dest - ld (HERE), de ; update HERE - ld hl, (IP) + ld (HERE), hl .loop: - push de ; --> lvl 1 - ld de, ENDDEF - call HLPointsDE - pop de ; <-- lvl 1 - jr z, .loopend - call compSkip - jr .loop -.loopend: - ; skip EXIT - inc hl \ inc hl - ; We have out end offset. Let's get our offset - ld de, (IP) - or a ; clear carry - sbc hl, de - ; HL is our copy count. - ld b, h - ld c, l + ; did we reach ";"? + ld hl, (INPUTPOS) + ld a, (hl) + cp ';' + jr nz, .compile + inc hl + ld a, (hl) + cp ' '+1 + jr c, .loopend ; whitespace, we have semicol. end +.compile: ld hl, (IP) - ld de, (HERE) ; recall dest - ; copy! - ldir + call pushRS + ld hl, .retRef ld (IP), hl - ld (HERE), de + ld hl, COMPILE + push hl + jp EXECUTE+2 +.loopend: + ; Advance (INPUTPOS) to after semicol. HL is already there. + ld (INPUTPOS), hl + ; write EXIT and return + ld hl, (HERE) + ld de, EXIT + call DEinHL + ld (HERE), hl jp next +.retRef: + .dw $+2 + .dw $+2 + call popRS + ld (IP), hl + jr .loop .db "DOES>" @@ -359,10 +344,11 @@ IMMEDIATE: jp next ; ( n -- ) - .db "LITERAL" - .dw IMMEDIATE - .db 1 ; IMMEDIATE -LITERAL: + .db "LITN" + .fill 3 + .dw IMMEDIATE + .db 1 ; IMMEDIATE +LITN: .dw nativeWord ld hl, (HERE) ld de, NUMBER @@ -372,14 +358,28 @@ LITERAL: ld (HERE), hl jp next + .db "LITS" + .fill 3 + .dw LITN + .db 1 ; IMMEDIATE +LITS: + .dw nativeWord + ld hl, (HERE) + ld de, LIT + call DEinHL + ex de, hl ; (HERE) in DE + call readword + call strcpyM + ld (HERE), de + jp next .db "'" .fill 6 - .dw LITERAL + .dw LITS .db 0 APOS: .dw nativeWord - call readLITBOS + call readword call find jr nz, .notfound push de @@ -542,20 +542,10 @@ CFETCH: push hl jp next - .db "LIT@" - .fill 3 - .dw CFETCH - .db 0 -LITFETCH: - .dw nativeWord - call readLITTOS - push hl - jp next - ; ( a -- ) .db "DROP" .fill 3 - .dw LITFETCH + .dw CFETCH .db 0 DROP: .dw nativeWord @@ -818,6 +808,19 @@ FBR: pop de jp next -LATEST: - .dw FBR + .db "(bbr)" + .fill 2 + .dw FBR + .db 0 +BBR: + .dw nativeWord + ld hl, (IP) + ld d, 0 + ld e, (hl) + or a ; clear carry + sbc hl, de + ld (IP), hl + jp next +LATEST: + .dw BBR diff --git a/apps/forth/dictionary.txt b/apps/forth/dictionary.txt index 9ad8d51..c892bc7 100644 --- a/apps/forth/dictionary.txt +++ b/apps/forth/dictionary.txt @@ -42,22 +42,30 @@ directly, but as part of another word. ALLOT n -- Move HERE by n bytes C, b -- Write byte b in HERE and advance it. CREATE x -- Create cell named x. Doesn't allocate a PF. +COMPILE x -- Compile word x and write it to HERE CONSTANT x n -- Creates cell x that when called pushes its value DOES> -- See description at top of file IMMEDIATE -- Flag the latest defined word as immediate. -LITERAL n -- *I* Inserts number from TOS as a literal +LITN n -- *I* Inserts number from TOS as a literal VARIABLE c -- Creates cell x with 2 bytes allocation. *** Flow *** +Note about flow words: flow words can only be used in definitions. In the +INTERPRET loop, they don't have the desired effect because each word from the +input stream is executed immediately. In this context, branching doesn't work. + (fbr) -- Branches forward by the number specified in its atom's cell. -COMPILE a -- Compile string word at addr a and spit it to HERE. +(bbr) -- Branches backward by the number specified in its + atom's cell. +AGAIN I:a -- *I* Jump backwards to preceeding BEGIN. +BEGIN -- I:a *I* Marker for backward branching with AGAIN. ELSE I:a -- *I* Compiles a (fbr) and set branching cell at a. EXECUTE a -- Execute wordref at addr a IF -- I:a *I* Compiles a (fbr?) and pushes its cell's addr INTERPRET -- Get a line from stdin, compile it in tmp memory, then execute the compiled contents. -QUIT R:drop -- Return to interpreter promp immediately +QUIT R:drop -- Return to interpreter prompt immediately RECURSE R:I -- R:I-2 Run the current word again. SKIP? f -- If f is true, skip the execution of the next atom. Use this right before ";" and you're gonna have a @@ -108,7 +116,7 @@ CMP n1 n2 -- n Compare n1 and n2 and set n to -1, 0, or 1. NOT f -- f Push the logical opposite of f *** Strings *** -LIT@ x -- a Read following LIT and push its addr to a +LITS x -- a Read following LIT and push its addr to a SCMP a1 a2 -- n Compare strings a1 and a2. See CMP *** I/O *** diff --git a/apps/forth/main.asm b/apps/forth/main.asm index bb5b934..f6b79c0 100644 --- a/apps/forth/main.asm +++ b/apps/forth/main.asm @@ -19,18 +19,14 @@ .equ INITIAL_SP FORTH_RAMSTART ; wordref of the last entry of the dict. .equ CURRENT @+2 -; Pointer to the next free byte in dict. During compilation of input text, this -; temporarily points to the next free byte in COMPBUF. +; Pointer to the next free byte in dict. .equ HERE @+2 -; Used to hold HERE while we temporarily point it to COMPBUF -.equ OLDHERE @+2 ; Interpreter pointer. See Execution model comment below. .equ IP @+2 ; Pointer to where we currently are in the interpretation of the current line. .equ INPUTPOS @+2 ; Buffer where we compile the current input line. Same size as STDIO_BUFSIZE. -.equ COMPBUF @+2 -.equ FORTH_RAMEND @+0x40 +.equ FORTH_RAMEND @+2 ; (HERE) usually starts at RAMEND, but in certain situations, such as in stage0, ; (HERE) will begin at a strategic place. @@ -41,30 +37,11 @@ ; a general rule, we go like this: ; ; 1. read single word from line -; 2. compile word to atom -; 3. if immediate, execute atom -; 4. goto 1 until we exhaust words -; 5. Execute compiled atom list as if it was a regular compiledWord. -; -; Because the Parameter Stack uses SP, we can't just go around calling routines: -; This messes with the PS. This is why we almost always jump (unless our call -; doesn't involve Forth words in any way). -; -; This presents a challenge for our interpret loop because step 4, "goto 1" -; isn't obvious. To be able to do that, we must push a "return routine" to the -; Return Stack before step 3. -; -; HERE and IMMEDIATE: When compiling in step 2, we spit compiled atoms in -; (HERE) to simplify "," semantic in Forth (spitting, in all cases, is done in -; (HERE)). However, suring input line compilation, it isn't like during ":", we -; aren't creating a new entry. -; -; Compiling and executing from (HERE) would be dangerous because an -; entry-creation word, during runtime, could end up overwriting the atom list -; we're executing. This is why we have this list in COMPBUF. -; -; During IMMEDIATE mode, (HERE) is temporarily set to COMPBUF, and when we're -; done, we restore (HERE) for runtime. This way, everyone is happy. +; 2. Can we find the word in dict? +; 3. If yes, execute that word, goto 1 +; 4. Is it a number? +; 5. If yes, push that number to PS, goto 1 +; 6. Error: undefined word. ; ; EXECUTING A WORD ; @@ -113,54 +90,45 @@ forthRdLine: forthRdLineNoOk: ; Setup return stack. After INTERPRET, we run forthExecLine ld ix, RS_ADDR - ; We're about to compile the line and possibly execute IMMEDIATE words. - ; Let's save current (HERE) and temporarily set it to COMPBUF. - ld hl, (HERE) - ld (OLDHERE), hl - ld hl, COMPBUF - ld (HERE), hl - ld hl, .retRef - ld (IP), hl - ld hl, INTERPRET + ld hl, MAINLOOP push hl jp EXECUTE+2 -.retRef: - .dw $+2 - .dw forthExecLine -forthExecLine: - ld de, QUIT - ld hl, (HERE) - call DEinHL - ld (HERE), hl - ; Compilation done, let's restore (HERE) and execute! - ld hl, (OLDHERE) - ld (HERE), hl - ld iy, COMPBUF - ; before we execute, let's play with our RS a bit: compiledWord is - ; going to push (IP) on the RS, but we don't expect our compiled words - ; to ever return: it ends with QUIT. Let's set (IP) to ABORTREF and - ; IX to RS_ADDR-2 so that compiledWord re-pushes our safety net. - ld hl, ABORTREF - ld (IP), hl - ld ix, RS_ADDR-2 - jp compiledWord - -; (we don't have RECURSE here. Calling interpret makes us needlessly use our -; RS stack, but it can take it, can't it? ) -; WORD COMPILE IN> @ C@ (to check if null) SKIP? (skip if not null) EXIT INTERPRET .db 0b10 ; UNWORD INTERPRET: + .dw nativeWord + pop hl ; from WORD + ld a, (hl) ; special case: empty + or a + jp z, next + call find + jr nz, .maybeNum + ; regular word + push de + jp EXECUTE+2 +.maybeNum: + push hl ; --> lvl 1. save string addr + call parseLiteral + pop hl ; <-- lvl 1 + jr nz, .undef + ; a valid number in DE! + push de + jp next +.undef: + call printstr + jp abortUnknownWord + + .db 0b10 ; UNWORD +MAINLOOP: .dw compiledWord .dw WORD - .dw COMPILE + .dw INTERPRET .dw INP .dw FETCH .dw CFETCH .dw CSKIP - .dw EXIT - .dw INTERPRET - .dw EXIT + .dw QUIT + .dw MAINLOOP msgOk: .db " ok", 0 diff --git a/apps/forth/util.asm b/apps/forth/util.asm index b238cab..cb7e328 100644 --- a/apps/forth/util.asm +++ b/apps/forth/util.asm @@ -74,6 +74,10 @@ HLPointsBR: push de ld de, FBR call HLPointsDE + jr z, .end + ld de, BBR + call HLPointsDE +.end: pop de ret @@ -105,121 +109,6 @@ compSkip: inc hl \ inc hl ret -; ***readLIT*** -; The goal of this routine is to read a string literal following the currently -; executed words. For example, CREATE and DEFINE need this. Things are a little -; twisted, so bear with me while I explain how it works. -; -; When we call this routine, everything has been compiled. We're on an atom and -; we're executing it. Now, we're looking for a string literal or a word-with-a -; name that follows our readLIT caller. We could think that this word is -; right there on RS' TOS, but not always! You have to account for words wrapping -; the caller. For example, "VARIABLE" calls "CREATE". If you call -; "VARIABLE foo", if CREATE looks at what follows in RS' TOS, it will only find -; the "2" in "CREATE 2 ALLOT". -; -; In this case, we actually need to check in RS' *bottom of stack* for our -; answer. If that atom is a LIT, we're good. We make HL point to it and advance -; IP to byte following null-termination. -; -; If it isn't, things get interesting: If it's a word reference, then it's -; not an invalid literal. For example, one could want to redefine an existing -; word. So in that case, we'll copy the word's name on the pad (it might not be -; null-terminated) and set HL to point to it. -; How do we know that our reference is a word reference (it could be, for -; example, a NUMBER reference)? We check that its address is more than QUIT, the -; second word in our dict. We don't accept EXIT because it's the termination -; word. Yeah, it means that ";" can't be overridden... -; If name can't be read, we abort -; -; BOS vs TOS: What we cover so far is the "CREATE" and friends cases, where we -; want to read BOS. There are, however, cases where we want to read TOS, that is -; that we want to read the LIT right next to our atom. Example: "(". When -; processing comments, we are at compile time and want to read words from BOS, -; yes), however, in "("'s definition, there's "LIT@ )", which means "fetch LIT -; next to me and push this to stack". This LIT we want to fetch is *not* from -; BOS, it's from TOS. -; -; This is why we have readLITBOS and readLITTOS. readLIT uses HL and DE and is -; not used directly. - -; Given a RS stack pointer HL, read LIT next to it (or abort) and set HL to -; point to its associated string. Set DE to there the RS stack pointer should -; point next. -readLIT: - call HLPointsLIT - jr nz, .notLIT - ; RS BOS is a LIT, make HL point to string, then skip this RS compword. - inc hl \ inc hl ; HL now points to string itself - ; HL has our its final value - ld d, h - ld e, l - call strskip - inc hl ; byte after word termination - ex de, hl - ret -.notLIT: - ; Alright, not a literal, but is it a word? - call HLPointsUNWORD - jr z, .notWord - ; Not a number, then it's a word. Copy word to pad and point to it. - push hl ; --> lvl 1. we need it to set DE later - call intoHL - or a ; clear carry - ld de, CODELINK_OFFSET - sbc hl, de - ; That's our return value - push hl ; --> lvl 2 - ; HL now points to word offset, let'd copy it to pad - ex de, hl - call pad - ex de, hl - ld bc, NAMELEN - ldir - ; null-terminate - xor a - ld (de), a - pop hl ; <-- lvl 2 - pop de ; <-- lvl 1 - ; Advance IP by 2 - inc de \ inc de - ret -.notWord: - ld hl, .msg - call printstr - jp abort -.msg: - .db "word expected", 0 - -readLITBOS: - ; Before we start: is our RS empty? If IX == RS_ADDR, it is (it only has - ; its safety net). When that happens, we actually want to run readLITTOS - push hl - push de - push ix \ pop hl - ld de, RS_ADDR - or a ; clear carry - sbc hl, de - pop de - pop hl - jr z, readLITTOS - push de - ; Our bottom-of-stack is RS_ADDR+2 because RS_ADDR is occupied by our - ; ABORTREF safety net. - ld hl, (RS_ADDR+2) - call readLIT - ld (RS_ADDR+2), de - pop de - ret - -readLITTOS: - push de - ld hl, (IP) - call readLIT - ld (IP), de - pop de - ret - ; Find the entry corresponding to word where (HL) points to and sets DE to ; point to that entry. ; Z if found, NZ if not. @@ -271,7 +160,7 @@ wrCompHL: ; Spit name + prev in (HERE) and adjust (HERE) and (CURRENT) ; HL points to new (HERE) entryhead: - call readLITBOS + call readword ld de, (HERE) call strcpy ex de, hl ; (HERE) now in HL @@ -343,4 +232,3 @@ fetchline: call stdioReadLine ld (INPUTPOS), hl ret -