diff --git a/apps/forth/README.md b/apps/forth/README.md new file mode 100644 index 0000000..14d40c5 --- /dev/null +++ b/apps/forth/README.md @@ -0,0 +1,7 @@ +# Forth + +**WIP** A Forth interpreter. Far from complete, but you can do stuff like + + KEY EMIT KEY EMIT + +See dictionary.txt for a word reference. diff --git a/apps/forth/dict.asm b/apps/forth/dict.asm new file mode 100644 index 0000000..ccef6e4 --- /dev/null +++ b/apps/forth/dict.asm @@ -0,0 +1,110 @@ +; A dictionary entry has this structure: +; - 8b name (zero-padded) +; - 2b prev pointer +; - 2b code pointer +; - Parameter field area (PFA) +; +; The code pointer point to "word routines". These routines expect to be called +; with IY pointing to the PFA. They themselves are expected to end by jumping +; to the address at the top of the Return Stack. They will usually do so with +; "jp exit". + +; Execute a word containing native code at its PFA +nativeWord: + jp (iy) + +; Execute a compiled word containing a list of references to other words, +; usually ended by a reference to EXIT. +; A reference to a word in a compiledWord section is *not* a direct reference, +; but a word+CODELINK_OFFSET reference. Therefore, for a code link "link", +; (link) is the routine to call. +compiledWord: + push iy \ pop hl + inc hl + inc hl + ; HL points to next Interpreter pointer. + call pushRS + ld l, (iy) + ld h, (iy+1) + push hl \ pop iy + ; IY points to code link + jp executeCodeLink + +; ( R:I -- ) +EXIT: + .db "EXIT", 0, 0, 0, 0 + .dw 0 + .dw nativeWord +; When we call the EXIT word, we have to do a "double exit" because our current +; Interpreter pointer is pointing to the word *next* to our EXIT reference when, +; in fact, we want to continue processing the one above it. + call popRS +exit: + call popRS + ; We have a pointer to a word + push hl \ pop iy + jp compiledWord + +; ( c -- ) +EMIT: + .db "EMIT", 0, 0, 0, 0 + .dw EXIT + .dw nativeWord + pop hl + ld a, l + call stdioPutC + jp exit + +; ( addr -- ) +EXECUTE: + .db "EXECUTE", 0 + .dw EMIT + .dw nativeWord + pop iy ; Points to word_offset + ld de, CODELINK_OFFSET + add iy, de +executeCodeLink: + ld l, (iy) + ld h, (iy+1) + ; HL points to code pointer + inc iy + inc iy + ; IY points to PFA + jp (hl) ; go! + +; ( -- c ) +KEY: + .db "KEY", 0, 0, 0, 0, 0 + .dw EXECUTE + .dw nativeWord + call stdioGetC + ld h, 0 + ld l, a + push hl + jp exit + +INTERPRET: + .db "INTERPRE" + .dw KEY + .dw nativeWord + call pad + push hl \ pop iy + call stdioReadLine + ld (INPUTPOS), hl +.loop: + call readword + jp nz, .loopend + call compile + jr nz, .notfound + jr .loop +.loopend: + call compileExit + call pad + push hl \ pop iy + jp compiledWord +.notfound: + ld hl, .msg + call printstr + jp exit +.msg: + .db "not found", 0 diff --git a/apps/forth/dictionary.txt b/apps/forth/dictionary.txt new file mode 100644 index 0000000..2599b02 --- /dev/null +++ b/apps/forth/dictionary.txt @@ -0,0 +1,10 @@ +Stack notation: " -- ". Rightmost is top of stack + (TOS). For example, in "a b -- c d", b is TOS before, d is TOS + after. "R:" means that the Return Stack is modified. + +EMIT c -- Spit char c to stdout +EXECUTE a -- Execute word at addr a +EXIT R:I -- Exit a colon definition +KEY -- c Get char c from stdin +INTERPRET -- Get a line from stdin, compile it in tmp memory, + then execute the compiled contents. diff --git a/apps/forth/glue.asm b/apps/forth/glue.asm new file mode 100644 index 0000000..32bcded --- /dev/null +++ b/apps/forth/glue.asm @@ -0,0 +1,10 @@ +.inc "user.h" +jp forthMain + +.inc "core.asm" +.equ FORTH_RAMSTART RAMSTART +.inc "forth/main.asm" +.inc "forth/util.asm" +.inc "forth/stack.asm" +.inc "forth/dict.asm" +RAMSTART: diff --git a/apps/forth/main.asm b/apps/forth/main.asm new file mode 100644 index 0000000..331885b --- /dev/null +++ b/apps/forth/main.asm @@ -0,0 +1,36 @@ +; *** Const *** +; Base of the Return Stack +.equ RS_ADDR 0xf000 +; Number of bytes we keep as a padding between HERE and the scratchpad +.equ PADDING 0x20 +; Offset of the code link relative to the beginning of the word +.equ CODELINK_OFFSET 10 + +; *** Variables *** +.equ INITIAL_SP FORTH_RAMSTART +.equ CURRENT @+2 +.equ HERE @+2 +.equ INPUTPOS @+2 +.equ FORTH_RAMEND @+2 + +; *** Code *** +MAIN: + .dw compiledWord + .dw INTERPRET+CODELINK_OFFSET + .dw ENDPGM + +ENDPGM: + .dw nativeWord + ld sp, (INITIAL_SP) + xor a + ret + +forthMain: + ld (INITIAL_SP), sp + ld hl, INTERPRET ; last entry in hardcoded dict + ld (CURRENT), hl + ld hl, FORTH_RAMEND + ld (HERE), hl + ld ix, RS_ADDR + ld iy, MAIN + jp executeCodeLink diff --git a/apps/forth/stack.asm b/apps/forth/stack.asm new file mode 100644 index 0000000..ebad3d2 --- /dev/null +++ b/apps/forth/stack.asm @@ -0,0 +1,25 @@ +; The Parameter stack (PS) is maintained by SP and the Return stack (RS) is +; maintained by IX. This allows us to generally use push and pop freely because +; PS is the most frequently used. However, this causes a problem with routine +; calls: because in Forth, the stack isn't balanced within each call, our return +; offset, when placed by a CALL, messes everything up. This is one of the +; reasons why we need stack management routines below. +; +; This return stack contain "Interpreter pointers", that is a pointer to the +; address of a word, as seen in a compiled list of words. + +; Push value HL to RS +pushRS: + ld (ix), l + inc ix + ld (ix), h + inc ix + ret + +; Pop RS' TOS to HL +popRS: + dec ix + ld h, (ix) + dec ix + ld l, (ix) + ret diff --git a/apps/forth/util.asm b/apps/forth/util.asm new file mode 100644 index 0000000..daa78ba --- /dev/null +++ b/apps/forth/util.asm @@ -0,0 +1,101 @@ +; Return address of scratchpad in HL +pad: + ld hl, (HERE) + ld de, PADDING + add hl, de + ret + +; Read word from (INPUTPOS) and return, in HL, a null-terminated word. +; Advance (INPUTPOS) to the character following the whitespace ending the +; word. +; Z set of word was read, unset if end of line. +readword: + ld hl, (INPUTPOS) + ; skip leading whitespace + dec hl ; offset leading "inc hl" +.loop1: + inc hl + ld a, (hl) + or a + jr z, .empty + cp ' '+1 + jr c, .loop1 + push hl ; --> lvl 1. that's our result +.loop2: + inc hl + ld a, (hl) + ; special case: is A null? If yes, we will *not* inc A so that we don't + ; go over the bounds of our input string. + or a + jr z, .noinc + cp ' '+1 + jr nc, .loop2 + ; we've just read a whitespace, HL is pointing to it. Let's transform + ; it into a null-termination, inc HL, then set (INPUTPOS). + xor a + ld (hl), a + inc hl +.noinc: + ld (INPUTPOS), hl + pop hl ; <-- lvl 1. our result + ret ; Z set from XOR A +.empty: + ld (hl), a + inc a ; unset Z + ret + +; For DE pointing to a dict entry, set DE to point to the previous entry. +; Z is set if DE point to 0 (no entry). NZ if not. +prev: + push hl ; --> lvl 1 + ld hl, 8 ; prev field offset + add hl, de + ex de, hl + pop hl ; <-- lvl 1 + call intoDE + ; DE points to prev. Is it zero? + xor a + or d + or e + ; Z will be set if DE is zero + ret + +; Find the entry corresponding to word where (HL) points to and sets DE to +; point to that entry. +; Z if found, NZ if not. +find: + ld de, (CURRENT) +.inner: + ld a, 8 + call strncmp + ret z ; found + call prev + jr nz, .inner + ; Z set? end of dict unset Z + inc a + ret + +; Compile word string at (HL) and write down its compiled version in IY, +; advancing IY to the byte next to the last written byte. +; Set Z on success, unset on failure. +compile: + call find + ret nz + ; DE is a word offset, we need a code link + ld hl, CODELINK_OFFSET + add hl, de + ld (iy), l + inc iy + ld (iy), h + inc iy + xor a ; set Z + ret + +compileExit: + ld hl, EXIT+CODELINK_OFFSET + ld (iy), l + inc iy + ld (iy), h + inc iy + ret + diff --git a/emul/.gitignore b/emul/.gitignore index db7da81..9ee7d93 100644 --- a/emul/.gitignore +++ b/emul/.gitignore @@ -8,4 +8,5 @@ /cfsin/zasm /cfsin/ed /cfsin/basic +/cfsin/forth /cfsin/user.h diff --git a/emul/Makefile b/emul/Makefile index ef0fa05..5cd6d13 100644 --- a/emul/Makefile +++ b/emul/Makefile @@ -4,7 +4,7 @@ KERNEL = ../kernel APPS = ../apps ZASMBIN = zasm/zasm AVRABIN = zasm/avra -SHELLAPPS = zasm ed +SHELLAPPS = zasm ed forth SHELLTGTS = ${SHELLAPPS:%=cfsin/%} CFSIN_CONTENTS = $(SHELLTGTS) cfsin/user.h OBJS = emul.o libz80/libz80.o