From c3838714d5590960d3c97705cd6cd6bda193d13b Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Fri, 13 Mar 2020 16:01:09 -0400 Subject: [PATCH] forth: improve execution model My approach with RS was slightly wrong: RS' TOP was always containing current IP. It worked, but it was problematic when came the time to introduce RS-modifying words: it's impossible to modify RS in a word without immediately messing your flow. Therefore, what used to be RS' TOS has to be a variable that isn't changed midway by RS-modifying words. I guess that's why RS is called *return* stack... --- apps/forth/dict.asm | 180 +++++++++++++++++++++----------------------- apps/forth/main.asm | 45 ++++++++--- apps/forth/util.asm | 24 ++++-- emul/.gitignore | 1 + emul/Makefile | 3 + emul/forth/stage1.c | 16 +++- 6 files changed, 159 insertions(+), 110 deletions(-) diff --git a/apps/forth/dict.asm b/apps/forth/dict.asm index a4066f0..189f994 100644 --- a/apps/forth/dict.asm +++ b/apps/forth/dict.asm @@ -7,43 +7,64 @@ ; ; The code pointer point to "word routines". These routines expect to be called ; with IY pointing to the PF. They themselves are expected to end by jumping -; to the address at the top of the Return Stack. They will usually do so with -; "jp exit". +; to the address at (IP). They will usually do so with "jp next". ; ; That's for "regular" words (words that are part of the dict chain). There are ; also "special words", for example NUMBER, LIT, FBR, that have a slightly ; different structure. They're also a pointer to an executable, but as for the ; other fields, the only one they have is the "flags" field. +; This routine is jumped to at the end of every word. In it, we jump to current +; IP, but we also take care of increasing it my 2 before jumping +next: + ; Before we continue: are stacks within bounds? + call chkPS + ld de, (IP) + ld h, d + ld l, e + inc de \ inc de + ld (IP), de + ; HL is an atom list pointer. We need to go into it to have a wordref + ld e, (hl) + inc hl + ld d, (hl) + push de + jp EXECUTE+2 + + ; Execute a word containing native code at its PF address (PFA) nativeWord: jp (iy) -; Execute a list of atoms, which usually ends with EXIT. -; IY points to that list. +; Execute a list of atoms, which always end with EXIT. +; IY points to that list. What do we do: +; 1. Push current IP to RS +; 2. Set new IP to the second atom of the list +; 3. Execute the first atom of the list. compiledWord: + ld hl, (IP) + call pushRS push iy \ pop hl inc hl inc hl - ; HL points to next Interpreter pointer. - call pushRS + ld (IP), hl + ; IY still is our atom reference... ld l, (iy) ld h, (iy+1) - push hl \ pop iy - ; IY points to code link - jp executeCodeLink + push hl ; argument for EXECUTE + jp EXECUTE+2 ; Pushes the PFA directly cellWord: push iy - jp exit + jp next ; Pushes the address in the first word of the PF sysvarWord: ld l, (iy) ld h, (iy+1) push hl - jp exit + jp next ; The word was spawned from a definition word that has a DOES>. PFA+2 (right ; after the actual cell) is a link to the slot right after that DOES>. @@ -59,20 +80,16 @@ doesWord: ; This is not a word, but a number literal. This works a bit differently than ; others: PF means nothing and the actual number is placed next to the ; numberWord reference in the compiled word list. What we need to do to fetch -; that number is to play with the Return stack: We pop it, read the number, push -; it to the Parameter stack and then push an increase Interpreter Pointer back -; to RS. +; that number is to play with the IP. numberWord: - ld l, (ix) - ld h, (ix+1) + ld hl, (IP) ; (HL) is out number ld e, (hl) inc hl ld d, (hl) inc hl - ld (ix), l - ld (ix+1), h + ld (IP), hl ; advance IP by 2 push de - jp exit + jp next .db 0b10 ; Flags NUMBER: @@ -84,8 +101,7 @@ NUMBER: ; context. Only words expecting those literals will look for them. This is why ; the litWord triggers abort. litWord: - call popRS - call intoHL + ld hl, (IP) call printstr ; let's print the word before abort. ld hl, .msg call printstr @@ -97,24 +113,16 @@ litWord: LIT: .dw litWord +; Pop previous IP from Return stack and execute it. ; ( R:I -- ) .db ";" .fill 7 .dw 0 EXIT: .dw nativeWord -; When we call the EXIT word, we have to do a "double exit" because our current -; Interpreter pointer is pointing to the word *next* to our EXIT reference when, -; in fact, we want to continue processing the one above it. call popRS -exit: - ; Before we continue: is SP within bounds? - call chkPS - ; we're good - call popRS - ; We have a pointer to a word - push hl \ pop iy - jp compiledWord + ld (IP), hl + jp next ; ( R:I -- ) .db "QUIT" @@ -133,9 +141,9 @@ quit: ABORT: .dw nativeWord abort: - ; Reinitialize PS (RS is reinitialized in forthInterpret + ; Reinitialize PS (RS is reinitialized in forthInterpret) ld sp, (INITIAL_SP) - jp forthRdLine + jp forthRdLineNoOk ABORTREF: .dw ABORT @@ -163,7 +171,7 @@ EMIT: pop hl ld a, l call stdioPutC - jp exit + jp next ; ( c port -- ) .db "PC!" @@ -175,7 +183,7 @@ PSTORE: pop bc pop hl out (c), l - jp exit + jp next ; ( port -- c ) .db "PC@" @@ -188,7 +196,7 @@ PFETCH: ld h, 0 in l, (c) push hl - jp exit + jp next ; ( addr -- ) .db "EXECUTE" @@ -214,10 +222,7 @@ DEFINE: .dw nativeWord call entryhead ld de, compiledWord - ld (hl), e - inc hl - ld (hl), d - inc hl + call DEinHL ; At this point, we've processed the name literal following the ':'. ; What's next? We have, in IP, a pointer to words that *have already ; been compiled by INTERPRET*. All those bytes will be copied as-is. @@ -225,8 +230,7 @@ DEFINE: ; skip compwords until EXIT is reached. ex de, hl ; DE is our dest ld (HERE), de ; update HERE - ld l, (ix) - ld h, (ix+1) + ld hl, (IP) .loop: call HLPointsEXIT jr z, .loopend @@ -236,22 +240,19 @@ DEFINE: ; skip EXIT inc hl \ inc hl ; We have out end offset. Let's get our offset - ld e, (ix) - ld d, (ix+1) + ld de, (IP) or a ; clear carry sbc hl, de ; HL is our copy count. ld b, h ld c, l - ld l, (ix) - ld h, (ix+1) + ld hl, (IP) ld de, (HERE) ; recall dest ; copy! ldir - ld (ix), l - ld (ix+1), h + ld (IP), hl ld (HERE), de - jp exit + jp next .db "DOES>" @@ -264,18 +265,17 @@ DOES: ; need to do. ; 1. Change the code link to doesWord ; 2. Leave 2 bytes for regular cell variable. - ; 3. Get the Interpreter pointer from the stack and write this down to - ; entry PFA+2. - ; 3. exit. Because we've already popped RS, a regular exit will abort - ; colon definition, so we're good. + ; 3. Write down IP+2 to entry. + ; 3. exit. we're done here. ld iy, (CURRENT) ld hl, doesWord call wrCompHL inc iy \ inc iy ; cell variable space - call popRS + ld hl, (IP) + inc hl \ inc hl call wrCompHL ld (HERE), iy - jp exit + jp EXIT+2 .db "IMMEDIA" @@ -286,7 +286,7 @@ IMMEDIATE: ld hl, (CURRENT) dec hl set FLAG_IMMED, (hl) - jp exit + jp next ; ( n -- ) .db "LITERAL" @@ -300,7 +300,7 @@ LITERAL: pop de ; number from stack call DEinHL ld (HERE), hl - jp exit + jp next .db "'" @@ -313,7 +313,7 @@ APOS: call find jr nz, .notfound push de - jp exit + jp next .notfound: ld hl, .msg call printstr @@ -337,7 +337,7 @@ APOSI: pop de ; <-- lvl 1 call DEinHL ld (HERE), hl - jp exit + jp next .notfound: ld hl, .msg call printstr @@ -356,7 +356,7 @@ KEY: ld h, 0 ld l, a push hl - jp exit + jp next .db "CREATE" .fill 1 @@ -365,14 +365,13 @@ KEY: CREATE: .dw nativeWord call entryhead - jp nz, quit ld de, cellWord ld (hl), e inc hl ld (hl), d inc hl ld (HERE), hl - jp exit + jp next .db "HERE" .fill 3 @@ -403,7 +402,7 @@ DOT: call pad call fmtDecimalS call printstr - jp exit + jp next ; ( n a -- ) .db "!" @@ -416,7 +415,7 @@ STORE: pop hl ld (iy), l ld (iy+1), h - jp exit + jp next ; ( n a -- ) .db "C!" @@ -428,7 +427,7 @@ CSTORE: pop hl pop de ld (hl), e - jp exit + jp next ; ( a -- n ) .db "@" @@ -440,7 +439,7 @@ FETCH: pop hl call intoHL push hl - jp exit + jp next ; ( a -- c ) .db "C@" @@ -453,7 +452,7 @@ CFETCH: ld l, (hl) ld h, 0 push hl - jp exit + jp next ; ( -- a ) .db "LIT@" @@ -464,7 +463,7 @@ LITFETCH: .dw nativeWord call readLITTOS push hl - jp exit + jp next ; ( a b -- b a ) .db "SWAP" @@ -476,7 +475,7 @@ SWAP: pop hl ex (sp), hl push hl - jp exit + jp next ; ( a b c d -- c d a b ) .db "2SWAP" @@ -493,7 +492,7 @@ SWAP2: push de ; D push hl ; A push bc ; B - jp exit + jp next ; ( a -- a a ) .db "DUP" @@ -505,7 +504,7 @@ DUP: pop hl push hl push hl - jp exit + jp next ; ( a b -- a b a b ) .db "2DUP" @@ -520,7 +519,7 @@ DUP2: push hl push de push hl - jp exit + jp next ; ( a b -- a b a ) .db "OVER" @@ -534,7 +533,7 @@ OVER: push de push hl push de - jp exit + jp next ; ( a b c d -- a b c d a b ) .db "2OVER" @@ -553,7 +552,7 @@ OVER2: push hl ; D push iy ; A push bc ; B - jp exit + jp next ; ( a b -- c ) A + B .db "+" @@ -566,7 +565,7 @@ PLUS: pop de add hl, de push hl - jp exit + jp next ; ( a b -- c ) A - B .db "-" @@ -580,7 +579,7 @@ MINUS: or a ; reset carry sbc hl, de push hl - jp exit + jp next ; ( a b -- c ) A * B .db "*" @@ -593,7 +592,7 @@ MULT: pop bc call multDEBC push hl - jp exit + jp next ; ( a b -- c ) A / B .db "/" @@ -606,7 +605,7 @@ DIV: pop hl call divide push bc - jp exit + jp next ; ( a1 a2 -- b ) .db "SCMP" @@ -620,7 +619,7 @@ SCMP: call strcmp call flagsToBC push bc - jp exit + jp next ; ( n1 n2 -- f ) .db "CMP" @@ -635,7 +634,7 @@ CMP: sbc hl, de call flagsToBC push bc - jp exit + jp next ; This word's atom is followed by 1b *relative* offset (to the cell's addr) to ; where to branch to. For example, The branching cell of "IF THEN" would @@ -647,14 +646,12 @@ CMP: FBR: .dw nativeWord push de - ld l, (ix) - ld h, (ix+1) + ld hl, (IP) ld a, (hl) call addHL - ld (ix), l - ld (ix+1), h + ld (IP), hl pop de - jp exit + jp next ; Conditional branch, only branch if TOS is zero .db "(fbr?)" @@ -668,12 +665,10 @@ FBRC: or l jr z, FBR+2 ; skip next byte in RS - ld l, (ix) - ld h, (ix+1) + ld hl, (IP) inc hl - ld (ix), l - ld (ix+1), h - jp exit + ld (IP), hl + jp next .db "RECURSE" @@ -682,9 +677,8 @@ FBRC: RECURSE: .dw nativeWord call popRS - ld l, (ix) - ld h, (ix+1) dec hl \ dec hl + ld (IP), hl push hl \ pop iy jp compiledWord diff --git a/apps/forth/main.asm b/apps/forth/main.asm index a57a10a..3c758e9 100644 --- a/apps/forth/main.asm +++ b/apps/forth/main.asm @@ -20,6 +20,8 @@ .equ CURRENT @+2 .equ HERE @+2 .equ OLDHERE @+2 +; Interpreter pointer. See Execution model comment below. +.equ IP @+2 ; Pointer to where we currently are in the interpretation of the current line. .equ INPUTPOS @+2 ; Buffer where we compile the current input line. Same size as STDIO_BUFSIZE. @@ -59,6 +61,23 @@ ; ; During IMMEDIATE mode, (HERE) is temporarily set to COMPBUF, and when we're ; done, we restore (HERE) for runtime. This way, everyone is happy. +; +; EXECUTING A WORD +; +; At it's core, executing a word is having the wordref in IY and call +; executeCodeLink. Then, we let the word do its things. Some words are special, +; but most of them are of the compiledWord type, and that's their execution that +; we describe here. +; +; First of all, at all time during execution, the Interpreter Pointer (IP) +; points to the wordref we're executing next. +; +; When we execute a compiledWord, the first thing we do is push IP to the Return +; Stack (RS). Therefore, RS' top of stack will contain a wordref to execute +; next, after we EXIT. +; +; At the end of every compiledWord is an EXIT. This pops RS, sets IP to it, and +; continues. ; *** Code *** forthMain: @@ -82,10 +101,14 @@ forthMain: forthRdLine: ld hl, msgOk call printstr +forthRdLineNoOk: call printcrlf call stdioReadLine - ld ix, RS_ADDR-2 ; -2 because we inc-before-push ld (INPUTPOS), hl + ; Setup return stack. As a safety net, we set its bottom to ABORTREF. + ld hl, ABORTREF + ld (RS_ADDR), hl + ld ix, RS_ADDR ; We're about to compile the line and possibly execute IMMEDIATE words. ; Let's save current (HERE) and temporarily set it to COMPBUF. ld hl, (HERE) @@ -128,17 +151,23 @@ forthInterpret: ld (HERE), de jr forthInterpret .immed: - push hl ; --> lvl 1 + push hl ; --> For EXECUTE ld hl, .retRef - call pushRS - pop iy ; <-- lvl 1 - jp executeCodeLink + ld (IP), hl + jp EXECUTE+2 .execute: ld de, QUIT call .writeDE ; Compilation done, let's restore (HERE) and execute! ld hl, (OLDHERE) ld (HERE), hl + ; before we execute, let's play with our RS a bit: compiledWord is + ; going to push (IP) on the RS, but we don't expect our compiled words + ; to ever return: it ends with QUIT. Let's set (IP) to ABORTREF and + ; IX to RS_ADDR-2 so that compiledWord re-pushes our safety net. + ld hl, ABORTREF + ld (IP), hl + ld ix, RS_ADDR-2 ld iy, COMPBUF jp compiledWord .writeDE: @@ -153,10 +182,8 @@ forthInterpret: ret .retRef: - .dw $+2 - .dw $+2 - call popRS - jr forthInterpret + .dw $+2 + .dw forthInterpret msgOk: .db " ok", 0 diff --git a/apps/forth/util.asm b/apps/forth/util.asm index de04b5e..0455141 100644 --- a/apps/forth/util.asm +++ b/apps/forth/util.asm @@ -202,20 +202,31 @@ readLIT: .db "word expected", 0 readLITBOS: + ; Before we start: is our RS empty? If IX == RS_ADDR, it is (it only has + ; its safety net). When that happens, we actually want to run readLITTOS + push hl push de - ld hl, (RS_ADDR) + push ix \ pop hl + ld de, RS_ADDR + or a ; clear carry + sbc hl, de + pop de + pop hl + jr z, readLITTOS + push de + ; Our bottom-of-stack is RS_ADDR+2 because RS_ADDR is occupied by our + ; ABORTREF safety net. + ld hl, (RS_ADDR+2) call readLIT - ld (RS_ADDR), de + ld (RS_ADDR+2), de pop de ret readLITTOS: push de - ld l, (ix) - ld h, (ix+1) + ld hl, (IP) call readLIT - ld (ix), e - ld (ix+1), d + ld (IP), de pop de ret @@ -284,7 +295,6 @@ entryhead: inc hl ld (CURRENT), hl ld (HERE), hl - xor a ; set Z ret ; Sets Z if wordref at HL is of the IMMEDIATE type diff --git a/emul/.gitignore b/emul/.gitignore index 88d5e2c..344593e 100644 --- a/emul/.gitignore +++ b/emul/.gitignore @@ -1,5 +1,6 @@ /shell/shell /forth/stage1 +/forth/stage1dbg /forth/forth /zasm/zasm /zasm/avra diff --git a/emul/Makefile b/emul/Makefile index 1ea7d1e..35b1e66 100644 --- a/emul/Makefile +++ b/emul/Makefile @@ -33,6 +33,9 @@ forth/forth0-bin.h: forth/forth0.bin forth/stage1: forth/stage1.c $(OBJS) forth/forth0-bin.h $(CC) forth/stage1.c $(OBJS) -o $@ +forth/stage1dbg: forth/stage1.c $(OBJS) forth/forth0-bin.h + $(CC) -DDEBUG forth/stage1.c $(OBJS) -o $@ + forth/core.bin: $(APPS)/forth/core.fth forth/stage1 ./forth/stage1 $(APPS)/forth/core.fth | tee $@ > /dev/null diff --git a/emul/forth/stage1.c b/emul/forth/stage1.c index a4c24c5..69f401f 100644 --- a/emul/forth/stage1.c +++ b/emul/forth/stage1.c @@ -20,6 +20,11 @@ directly follow executable's last byte so that we don't waste spce and also that wordref offsets correspond. */ +// When DEBUG is set, stage1 is a core-less forth that works interactively. +// Useful for... debugging! +// By the way: there's a double-echo in stagedbg. It's normal. Don't panic. + +//#define DEBUG // in sync with glue.asm #define RAMSTART 0x900 #define STDIO_PORT 0x00 @@ -44,11 +49,17 @@ static uint8_t iord_stdio() static void iowr_stdio(uint8_t val) { // we don't output stdout in stage0 +#ifdef DEBUG + // ... unless we're in DEBUG mode! + putchar(val); +#endif } int main(int argc, char *argv[]) { - bool tty = false; +#ifdef DEBUG + fp = stdin; +#else if (argc == 2) { fp = fopen(argv[1], "r"); if (fp == NULL) { @@ -59,6 +70,7 @@ int main(int argc, char *argv[]) fprintf(stderr, "Usage: ./stage0 filename\n"); return 1; } +#endif Machine *m = emul_init(); m->ramstart = RAMSTART; m->iord[STDIO_PORT] = iord_stdio; @@ -74,6 +86,7 @@ int main(int argc, char *argv[]) fclose(fp); +#ifndef DEBUG // We're done, now let's spit dict data // let's start with LATEST spitting. putchar(m->mem[CURRENT]); @@ -82,6 +95,7 @@ int main(int argc, char *argv[]) for (int i=sizeof(KERNEL); imem[i]); } +#endif return 0; }