From 03e529b762f6367ed995e189497bcf833fc3dc29 Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Mon, 9 Mar 2020 08:49:51 -0400 Subject: [PATCH] forth: simplify execution model and handle literals better This scheme of "when we handle line-by-line, compile one word at a time then execute" so that we could allow words like "CREATE" to call "readword" before continuing was a bad scheme. It made things like branching outside of a colon definition impossible. This commit implement a new "litWord". When an undefined word is encountered at compile time, it is included as-is in a string literal word. It is at run time that we decide what to do with it. --- apps/forth/dict.asm | 115 ++++++++++++++++------------ apps/forth/dictionary.txt | 4 +- apps/forth/main.asm | 38 +--------- apps/forth/stack.asm | 15 ++-- apps/forth/util.asm | 156 ++++++++++++++++++++++++++++++++++---- 5 files changed, 222 insertions(+), 106 deletions(-) diff --git a/apps/forth/dict.asm b/apps/forth/dict.asm index 48eaf61..06c569e 100644 --- a/apps/forth/dict.asm +++ b/apps/forth/dict.asm @@ -71,10 +71,27 @@ numberWord: NUMBER: .dw numberWord +; Similarly to numberWord, this is not a real word, but a string literal. +; Instead of being followed by a 2 bytes number, it's followed by a +; null-terminated string. This is not expected to be called in a regular +; context. Only words expecting those literals will look for them. This is why +; the litWord triggers abort. +litWord: + call popRS + call intoHL + call printstr ; let's print the word before abort. + ld hl, .msg + call printstr + jp abort +.msg: + .db "undefined word", 0 +LIT: + .dw litWord ; ( R:I -- ) EXIT: - .db "EXIT", 0, 0, 0, 0 + .db ";" + .fill 7 .dw 0 .dw nativeWord ; When we call the EXIT word, we have to do a "double exit" because our current @@ -88,7 +105,7 @@ exit: call popRS ; We have a pointer to a word push hl \ pop iy - jr compiledWord + jp compiledWord ; ( R:I -- ) QUIT: @@ -96,31 +113,29 @@ QUIT: .dw EXIT .dw nativeWord quit: - ld hl, FLAGS - set FLAG_QUITTING, (hl) - jr exit + jp forthRdLine ABORT: .db "ABORT", 0, 0, 0 .dw QUIT .dw nativeWord abort: + ; Reinitialize PS (RS is reinitialized in forthInterpret ld sp, (INITIAL_SP) - ld hl, .msg - call printstr - call printcrlf - jr quit -.msg: - .db " err", 0 + jp forthRdLine BYE: .db "BYE" .fill 5 .dw ABORT .dw nativeWord - ld hl, FLAGS - set FLAG_ENDPGM, (hl) - jp exit + ; Goodbye Forth! Before we go, let's restore the stack + ld sp, (INITIAL_SP) + ; unwind stack underflow buffer + pop af \ pop af \ pop af + ; success + xor a + ret ; ( c -- ) EMIT: @@ -155,36 +170,44 @@ DEFINE: .dw EXECUTE .dw nativeWord call entryhead - jp nz, quit ld de, compiledWord ld (hl), e inc hl ld (hl), d inc hl - push hl \ pop iy + ; At this point, we've processed the name literal following the ':'. + ; What's next? We have, in IP, a pointer to words that *have already + ; been compiled by INTERPRET*. All those bytes will be copied as-is. + ; All we need to do is to know how many bytes to copy. To do so, we + ; skip compwords until EXIT is reached. + ld (HERE), hl ; where we write compwords. + ; Let's save old RS TOS + ld e, (ix) + ld d, (ix+1) .loop: - call readword - jr nz, .end - call .issemicol - jr z, .end - call compile - jp nz, quit + call RSIsEXIT + jr z, .loopend + call compSkip jr .loop -.end: - ; end chain with EXIT - ld hl, EXIT+CODELINK_OFFSET - call wrCompHL - ld (HERE), iy +.loopend: + ; At this point, RS' TOS points to EXIT compword. We'll copy it too. + ; We'll use LDIR. BC will be RSTOP-OLDRSTOP+2 + ld l, (ix) + ld h, (ix+1) + inc hl \ inc hl ; our +2 + or a ; clear carry + sbc hl, de + ld b, h + ld c, l + ; BC has proper count + ex de, hl ; HL is our source (old RS' TOS) + ld de, (HERE) ; and DE is our dest + ldir ; go! + ; HL has our new RS' TOS + ld (ix), l + ld (ix+1), h + ld (HERE), de ; update HERE jp exit -.issemicol: - ld a, (hl) - cp ';' - ret nz - inc hl - ld a, (hl) - dec hl - or a - ret DOES: .db "DOES>", 0, 0, 0 @@ -226,22 +249,17 @@ INTERPRET: .dw KEY .dw nativeWord interpret: - call readword - jp nz, quit ld iy, COMPBUF +.loop: + call readword + jr nz, .end call compile - jp nz, .notfound - ld hl, EXIT+CODELINK_OFFSET - ld (iy), l - ld (iy+1), h + jr .loop +.end: + ld hl, QUIT+CODELINK_OFFSET + call wrCompHL ld iy, COMPBUF jp compiledWord -.notfound: - ld hl, .msg - call printstr - jp quit -.msg: - .db "not found", 0 CREATE: .db "CREATE", 0, 0 @@ -454,4 +472,3 @@ CONSTANT: .dw DOES+CODELINK_OFFSET .dw FETCH+CODELINK_OFFSET .dw EXIT+CODELINK_OFFSET - diff --git a/apps/forth/dictionary.txt b/apps/forth/dictionary.txt index bc2e7c3..748c520 100644 --- a/apps/forth/dictionary.txt +++ b/apps/forth/dictionary.txt @@ -17,7 +17,8 @@ DOES>: Used inside a colon definition that itself uses CREATE, DOES> transforms *** Native Words *** -: x ... ; -- Define a new word +: x ... -- Define a new word +; R:I -- Exit a colon definition . n -- Print n in its decimal form @ a -- n Set n to value at address a ! n a -- Store n in address a @@ -30,7 +31,6 @@ DOES> -- See description at top of file DUP a -- a a EMIT c -- Spit char c to stdout EXECUTE a -- Execute word at addr a -EXIT R:I -- Exit a colon definition HERE -- a Push HERE's address QUIT R:drop -- Return to interpreter promp immediately KEY -- c Get char c from stdin diff --git a/apps/forth/main.asm b/apps/forth/main.asm index 807b38f..6abcb02 100644 --- a/apps/forth/main.asm +++ b/apps/forth/main.asm @@ -7,47 +7,17 @@ .equ NAMELEN 8 ; Offset of the code link relative to the beginning of the word .equ CODELINK_OFFSET 10 -; When set, the interpreter should abort parsing of current line and return to -; prompt. -.equ FLAG_QUITTING 0 -; When set, the interpreter should quit -.equ FLAG_ENDPGM 1 ; *** Variables *** .equ INITIAL_SP FORTH_RAMSTART .equ CURRENT @+2 .equ HERE @+2 .equ INPUTPOS @+2 -.equ FLAGS @+2 ; Buffer where we compile the current input line. Same size as STDIO_BUFSIZE. -.equ COMPBUF @+1 +.equ COMPBUF @+2 .equ FORTH_RAMEND @+0x40 ; *** Code *** -MAIN: - .dw compiledWord - .dw INTERPRET+CODELINK_OFFSET - .dw CHKEND - -; If FLAG_ENDPGM is set, stop the program, else, tweak the RS so that we loop. -CHKEND: - .dw nativeWord - ld hl, FLAGS - bit FLAG_ENDPGM, (hl) - jr nz, .endpgm - ; not quitting program, are we supposed to continue parsing line? - ld hl, FLAGS - bit FLAG_QUITTING, (hl) - jr nz, forthRdLine - ; Not quitting line either. - jr forthInterpret -.endpgm: - ld sp, (INITIAL_SP) - ; restore stack - pop af \ pop af \ pop af - xor a - ret - forthMain: ; STACK OVERFLOW PROTECTION: ; To avoid having to check for stack underflow after each pop operation @@ -62,16 +32,14 @@ forthMain: ld hl, FORTH_RAMEND ld (HERE), hl forthRdLine: - xor a - ld (FLAGS), a ld hl, msgOk call printstr call printcrlf call stdioReadLine ld (INPUTPOS), hl forthInterpret: - ld ix, RS_ADDR - ld iy, MAIN + ld ix, RS_ADDR-2 ; -2 because we inc-before-push + ld iy, INTERPRET+CODELINK_OFFSET jp executeCodeLink msgOk: .db " ok", 0 diff --git a/apps/forth/stack.asm b/apps/forth/stack.asm index bbebba2..a17b728 100644 --- a/apps/forth/stack.asm +++ b/apps/forth/stack.asm @@ -3,25 +3,26 @@ ; PS is the most frequently used. However, this causes a problem with routine ; calls: because in Forth, the stack isn't balanced within each call, our return ; offset, when placed by a CALL, messes everything up. This is one of the -; reasons why we need stack management routines below. +; reasons why we need stack management routines below. IX always points to RS' +; Top Of Stack (TOS) ; ; This return stack contain "Interpreter pointers", that is a pointer to the ; address of a word, as seen in a compiled list of words. ; Push value HL to RS pushRS: + inc ix + inc ix ld (ix), l - inc ix - ld (ix), h - inc ix + ld (ix+1), h ret ; Pop RS' TOS to HL popRS: - dec ix - ld h, (ix) - dec ix ld l, (ix) + ld h, (ix+1) + dec ix + dec ix ret ; Verifies that SP is within bounds. If it's not, call ABORT diff --git a/apps/forth/util.asm b/apps/forth/util.asm index 7f93211..d0e8e2e 100644 --- a/apps/forth/util.asm +++ b/apps/forth/util.asm @@ -43,6 +43,126 @@ readword: inc a ; unset Z ret +RSIsDE: + push hl + ld l, (ix) + ld h, (ix+1) + ld a, (hl) + cp e + jr nz, .end ; no + inc hl + ld a, (hl) + cp d ; Z has our answer +.end: + pop hl + ret + + +; Is RS' TOS pointing to a NUMBER word? +; Z if yes, NZ if no. +RSIsNUMBER: + push de + ld de, NUMBER + call RSIsDE + pop de + ret + +; Is RS' TOS pointing to a LIT word? +; Z if yes, NZ if no. +RSIsLIT: + push de + ld de, LIT + call RSIsDE + pop de + ret + +; Is RS' TOS pointing to EXIT? +; Z if yes, NZ if no. +RSIsEXIT: + push de + ld de, EXIT+CODELINK_OFFSET + call RSIsDE + pop de + ret + +; Skip the compword where RS' TOS is currently pointing. If it's a regular word, +; it's easy: we inc by 2. If it's a NUMBER, we inc by 4. If it's a LIT, we skip +; to after null-termination. +compSkip: + push hl + ld l, (ix) + ld h, (ix+1) + ; At the minimum, we skip by 2 + inc hl \ inc hl + call RSIsNUMBER + jr z, .isNum + call RSIsLIT + jr nz, .end ; A word + ; We have a literal + call strskip + inc hl ; byte after word termination + jr .end +.isNum: + ; skip by 4 + inc hl \ inc hl +.end: + ; HL is good, write it to RS + ld (ix), l + ld (ix+1), h + pop hl + ret + +; Checks RS' TOS and, if it points to a string literal (LIT), makes HL point +; to it and advance IP to byte following null-termination. +; If it doesn't, things get interesting: If it's a word reference, then it's +; not an invalid literal. For example, one could want to redefine an existing +; word. So in that case, we'll copy the word's name on the pad (it might not be +; null-terminated) and set HL to point to it. +; How do we know that our reference is a word reference (it could be, for +; example, a NUMBER reference)? We check that its address is more than QUIT, the +; second word in our dict. We don't accept EXIT because it's the termination +; word. Yeah, it means that ";" can't be overridden... +; If name can't be read, we abort +readCompWord: + ; In all cases, we want RS' TOS in HL. Let's get it now. + ld l, (ix) + ld h, (ix+1) + call RSIsLIT + jr nz, .notLIT + ; RS TOS is a LIT, make HL point to string, then skip this RS compword. + inc hl \ inc hl ; HL now points to string itself + jr compSkip +.notLIT: + ; Alright, not a literal, but is it a word? If it's not a number, then + ; it's a word. + call RSIsNUMBER + jr z, .notWord + ; Not a number, then it's a word. Copy word to pad and point to it. + call intoHL + or a ; clear carry + ld de, CODELINK_OFFSET + sbc hl, de + ; That's our return value + push hl ; --> lvl 1 + ; HL now points to word offset, let'd copy it to pad + ex de, hl + call pad + ex de, hl + ld bc, NAMELEN + ldir + ; null-terminate + xor a + ld (de), a + call compSkip + pop hl ; <-- lvl 1 + ret +.notWord: + ld hl, .msg + call printstr + jp abort +.msg: + .db "word expected", 0 + ; For DE pointing to a dict entry, set DE to point to the previous entry. ; Z is set if DE point to 0 (no entry). NZ if not. prev: @@ -84,38 +204,48 @@ wrCompHL: ; Compile word string at (HL) and write down its compiled version in IY, ; advancing IY to the byte next to the last written byte. -; Set Z on success, unset on failure. compile: call find jr nz, .maybeNum - ret nz ; DE is a word offset, we need a code link ld hl, CODELINK_OFFSET add hl, de xor a ; set Z jr wrCompHL .maybeNum: + push hl ; --> lvl 1. save string addr call parseLiteral - ret nz + jr nz, .undef + pop hl ; <-- lvl 1 ; a valid number! ld hl, NUMBER call wrCompHL ex de, hl ; number in HL jr wrCompHL - ret z - ; unknown name - ld hl, .msg - call printstr - jp abort -.msg: - .db "unknown name", 0 +.undef: + ; When encountering an undefined word during compilation, we spit a + ; reference to litWord, followed by the null-terminated word. + ; This way, if a preceding word expect a string literal, it will read it + ; by calling readCompWord, and if it doesn't, the routine will be + ; called, triggering an abort. + ld hl, LIT + call wrCompHL + pop hl ; <-- lvl 1. recall string addr +.writeLit: + ld a, (hl) + ld (iy), a + inc hl + inc iy + or a + jr nz, .writeLit + ret + ; Spit name + prev in (HERE) and adjust (HERE) and (CURRENT) ; HL points to new (HERE) -; Set Z if name could be read, NZ if not entryhead: - call readword - ret nz + call readCompWord + call printstr ld de, (HERE) call strcpy ex de, hl ; (HERE) now in HL