1
0
mirror of https://github.com/hsoft/collapseos.git synced 2024-11-26 09:28:05 +11:00

Compare commits

...

4 Commits

Author SHA1 Message Date
Virgil Dupras
1df9c4fc1b forth: split forth source into multiple files 2020-03-17 21:44:32 -04:00
Virgil Dupras
9451c599e0 forth: Make (parse) indirect and Forth-ify (parsec) 2020-03-17 21:19:56 -04:00
Virgil Dupras
4212d5161f forth: Word-ify number parsing 2020-03-17 17:46:58 -04:00
Virgil Dupras
707f1dbae1 forth: Word-ify "[COMPILE]" 2020-03-17 17:29:03 -04:00
10 changed files with 249 additions and 284 deletions

View File

@ -2,7 +2,6 @@
: -^ SWAP - ;
: +! SWAP OVER @ + SWAP ! ;
: ALLOT HERE +! ;
: , H ! 2 ALLOT ;
: C, H C! 1 ALLOT ;
: BEGIN H ; IMMEDIATE
: COMPILE ' ['] LITN EXECUTE ['] , , ; IMMEDIATE
@ -49,49 +48,3 @@
: > CMP 1 = ;
: / /MOD SWAP DROP ;
: MOD /MOD DROP ;
( Format numbers )
( TODO FORGET this word )
: PUSHDGTS
999 SWAP ( stop indicator )
DUP 0 = IF '0' EXIT THEN ( 0 is a special case )
BEGIN
DUP 0 = IF DROP EXIT THEN
10 /MOD ( r q )
SWAP '0' + SWAP ( d q )
AGAIN
;
: . ( n -- )
( handle negative )
( that "0 1 -" thing is because we don't parse negative
number correctly yet. )
DUP 0 < IF '-' EMIT 0 1 - * THEN
PUSHDGTS
BEGIN
DUP '9' > IF DROP EXIT THEN ( stop indicator, we're done )
EMIT
AGAIN
;
: PUSHDGTS
999 SWAP ( stop indicator )
DUP 0 = IF '0' EXIT THEN ( 0 is a special case )
BEGIN
DUP 0 = IF DROP EXIT THEN
16 /MOD ( r q )
SWAP ( r q )
DUP 9 > IF 10 - 'a' +
ELSE '0' + THEN ( q d )
SWAP ( d q )
AGAIN
;
: .X ( n -- )
( For hex display, there are no negatives )
PUSHDGTS
BEGIN
DUP 'f' > IF DROP EXIT THEN ( stop indicator, we're done )
EMIT
AGAIN
;

View File

@ -119,8 +119,7 @@ LIT:
.db 0
EXIT:
.dw nativeWord
call popRS
ld (IP), hl
call popRSIP
jp next
; ( R:I -- )
@ -130,7 +129,6 @@ EXIT:
.db 0
QUIT:
.dw nativeWord
quit:
jp forthRdLine
.db "ABORT"
@ -283,9 +281,23 @@ PFETCH:
push hl
jp next
.db ","
.fill 6
.dw PFETCH
.db 0
WR:
.dw nativeWord
pop de
call chkPS
ld hl, (HERE)
call DEinHL
ld (HERE), hl
jp next
; ( addr -- )
.db "EXECUTE"
.dw PFETCH
.dw WR
.db 0
EXECUTE:
.dw nativeWord
@ -304,41 +316,34 @@ EXECUTE:
.dw EXECUTE
.db 1 ; IMMEDIATE
COMPILE:
.dw nativeWord
call readword
call find
jr nz, .maybeNum
ex de, hl
call HLisIMMED
jr z, .immed
ex de, hl
call .writeDE
jp next
.dw compiledWord
.dw FIND_
.dw CSKIP
.dw .maybeNum
.dw DUP
.dw ISIMMED
.dw CSKIP
.dw .word
; is immediate. just execute.
.dw EXECUTE
.dw EXIT
.db 0b10 ; UNWORD
.word:
.dw compiledWord
.dw WR
.dw R2P ; exit COMPILE
.dw DROP
.dw EXIT
.db 0b10 ; UNWORD
.maybeNum:
push hl ; --> lvl 1. save string addr
call parseLiteral
pop hl ; <-- lvl 1
jr nz, .undef
; a valid number in DE!
ex de, hl
ld de, NUMBER
call .writeDE
ex de, hl ; number in DE
call .writeDE
jp next
.undef:
call printstr
jp abortUnknownWord
.immed:
push hl
jp EXECUTE+2
.writeDE:
push hl
ld hl, (HERE)
call DEinHL
ld (HERE), hl
pop hl
ret
.dw compiledWord
.dw PARSEI
.dw LITN
.dw R2P ; exit COMPILE
.dw DROP
.dw EXIT
.db ":"
@ -381,8 +386,7 @@ DEFINE:
.retRef:
.dw $+2
.dw $+2
call popRS
ld (IP), hl
call popRSIP
jr .loop
@ -418,10 +422,28 @@ IMMEDIATE:
set FLAG_IMMED, (hl)
jp next
.db "IMMED?"
.fill 1
.dw IMMEDIATE
.db 0
ISIMMED:
.dw nativeWord
pop hl
call chkPS
dec hl
ld de, 0
bit FLAG_IMMED, (hl)
jr z, .notset
inc de
.notset:
push de
jp next
; ( n -- )
.db "LITN"
.fill 3
.dw IMMEDIATE
.dw ISIMMED
.db 1 ; IMMEDIATE
LITN:
.dw nativeWord
@ -524,9 +546,61 @@ WORD:
push hl
jp next
.db "(parsed"
.dw WORD
.db 0
PARSED:
.dw nativeWord
pop hl
call chkPS
call parseDecimal
jr z, .success
; error
ld de, 0
push de ; dummy
push de ; flag
jp next
.success:
push de
ld de, 1 ; flag
push de
jp next
.db "(parse)"
.dw PARSED
.db 0
PARSE:
.dw compiledWord
.dw PARSED
.dw CSKIP
.dw .error
; success, stack is already good, we can exit
.dw EXIT
.db 0b10 ; UNWORD
.error:
.dw compiledWord
.dw LIT
.db "unknown word", 0
.dw PRINT
.dw ABORT
; Indirect parse caller. Reads PARSEPTR and calls
.db 0b10 ; UNWORD
PARSEI:
.dw compiledWord
.dw PARSEPTR_
.dw FETCH
.dw EXECUTE
.dw EXIT
.db "CREATE"
.fill 1
.dw WORD
.dw PARSE
.db 0
CREATE:
.dw nativeWord
@ -554,9 +628,16 @@ CURRENT_:
.dw sysvarWord
.dw CURRENT
.db "(parse*"
.dw CURRENT_
.db 0
PARSEPTR_:
.dw sysvarWord
.dw PARSEPTR
.db "IN>"
.fill 4
.dw CURRENT_
.dw PARSEPTR_
.db 0
INP:
.dw sysvarWord

View File

@ -48,6 +48,7 @@ CREATE x -- Create cell named x. Doesn't allocate a PF.
COMPILE x -- Meta compiles. Kind of blows the mind. See below.
CONSTANT x n -- Creates cell x that when called pushes its value
DOES> -- See description at top of file
IMMED? a -- f Checks whether wordref at a is immediate.
IMMEDIATE -- Flag the latest defined word as immediate.
LITN n -- *I* Inserts number from TOS as a literal
VARIABLE c -- Creates cell x with 2 bytes allocation.
@ -150,6 +151,33 @@ wait until another line is entered.
KEY input, however, is direct. Regardless of the input buffer's state, KEY will
return the next typed key.
PARSING AND BOOTSTRAP: Parsing number literal is a very "core" activity of
Forth, and therefore generally seen as having to be implemented in native code.
However, Collapse OS' Forth supports many kinds of literals: decimal, hex, char,
binary. This incurs a significant complexity penalty.
What if we could implement those parsing routines in Forth? "But it's a core
routine!" you say. Yes, but here's the deal: at its native core, only decimal
parsing is supported. It lives in the "(parsed)" word. The interpreter's main
loop is initially set to simply call that word.
However, in core.fs, "(parsex)", "(parsec)" and "(parseb)" are implemented, in
Forth, then "(parse)", which goes through them all is defined. Then, "(parsef)",
which is the variable in which the interpreter's word pointer is set, is
updated to that new "(parse)" word.
This way, we have a full-featured (and extensible) parsing with a tiny native
core.
(parse) a -- n Parses string at a as a number and push the result
in n as well as whether parsing was a success in f
(false = failure, true = success)
(parse.) a -- n f Sub-parsing words. They all have the same signature.
Parses string at a as a number and push the result
in n as well as whether parsing was a success in f
(0 = failure, 1 = success)
(parse*) -- a Variable holding the current pointer for system
number parsing. By default, (parse).
(print) a -- Print string at addr a.
. n -- Print n in its decimal form
.X n -- Print n in its hexadecimal form. In hex, numbers

46
apps/forth/fmt.fs Normal file
View File

@ -0,0 +1,46 @@
( requires core, parse )
( TODO FORGET this word )
: PUSHDGTS
999 SWAP ( stop indicator )
DUP 0 = IF '0' EXIT THEN ( 0 is a special case )
BEGIN
DUP 0 = IF DROP EXIT THEN
10 /MOD ( r q )
SWAP '0' + SWAP ( d q )
AGAIN
;
: . ( n -- )
( handle negative )
( that "0 1 -" thing is because we don't parse negative
number correctly yet. )
DUP 0 < IF '-' EMIT 0 1 - * THEN
PUSHDGTS
BEGIN
DUP '9' > IF DROP EXIT THEN ( stop indicator, we're done )
EMIT
AGAIN
;
: PUSHDGTS
999 SWAP ( stop indicator )
DUP 0 = IF '0' EXIT THEN ( 0 is a special case )
BEGIN
DUP 0 = IF DROP EXIT THEN
16 /MOD ( r q )
SWAP ( r q )
DUP 9 > IF 10 - 'a' +
ELSE '0' + THEN ( q d )
SWAP ( d q )
AGAIN
;
: .X ( n -- )
( For hex display, there are no negatives )
PUSHDGTS
BEGIN
DUP 'f' > IF DROP EXIT THEN ( stop indicator, we're done )
EMIT
AGAIN
;

View File

@ -25,7 +25,11 @@
.equ IP @+2
; Pointer to where we currently are in the interpretation of the current line.
.equ INPUTPOS @+2
; Buffer where we compile the current input line. Same size as STDIO_BUFSIZE.
; Pointer to the system's number parsing function. It points to then entry that
; had the "(parse)" name at startup. During stage0, it's out builtin PARSE,
; but at stage1, it becomes "(parse)" from core.fs. It can also be changed at
; runtime.
.equ PARSEPTR @+2
.equ FORTH_RAMEND @+2
; (HERE) usually starts at RAMEND, but in certain situations, such as in stage0,
@ -84,6 +88,10 @@ forthMain:
ld (INPUTPOS), hl
xor a
ld (hl), a
; Set up PARSEPTR
ld hl, PARSE-CODELINK_OFFSET
call find
ld (PARSEPTR), de
forthRdLine:
ld hl, msgOk
call printstr
@ -96,32 +104,24 @@ forthRdLineNoOk:
.db 0b10 ; UNWORD
INTERPRET:
.dw nativeWord
pop hl ; from WORD
ld a, (hl) ; special case: empty
or a
jp z, next
call find
jr nz, .maybeNum
; regular word
push de
jp EXECUTE+2
.dw compiledWord
.dw FIND_
.dw CSKIP
.dw .maybeNum
; It's a word, execute it
.dw EXECUTE
.dw EXIT
.maybeNum:
push hl ; --> lvl 1. save string addr
call parseLiteral
pop hl ; <-- lvl 1
jr nz, .undef
; a valid number in DE!
push de
jp next
.undef:
call printstr
jp abortUnknownWord
.dw compiledWord
.dw PARSEI
.dw R2P ; exit INTERPRET
.dw DROP
.dw EXIT
.db 0b10 ; UNWORD
MAINLOOP:
.dw compiledWord
.dw WORD
.dw INTERPRET
.dw INP
.dw FETCH

18
apps/forth/parse.fs Normal file
View File

@ -0,0 +1,18 @@
( requires core )
: (parsec) ( a -- n f )
( apostrophe is ASCII 39 )
DUP C@ 39 = NOT IF 0 EXIT THEN ( -- a 0 )
DUP 2 + C@ 39 = NOT IF 0 EXIT THEN ( -- a 0 )
( surrounded by apos, good, return )
1 + C@ 1 ( -- n 1 )
;
: (parse) ( a -- n )
(parsec) NOT SKIP? EXIT
(parsed) NOT SKIP? EXIT
( nothing works )
ABORT" unknown word! "
;
' (parse) (parse*) !

View File

@ -25,6 +25,11 @@ popRS:
dec ix
ret
popRSIP:
call popRS
ld (IP), hl
ret
; Skip the next two bytes in RS' TOS
skipRS:
push hl

View File

@ -115,25 +115,6 @@ multDEBC:
jr nz, .loop
ret
; Parse the hex char at A and extract it's 0-15 numerical value. Put the result
; in A.
;
; On success, the carry flag is reset. On error, it is set.
parseHex:
; First, let's see if we have an easy 0-9 case
add a, 0xc6 ; maps '0'-'9' onto 0xf6-0xff
sub 0xf6 ; maps to 0-9 and carries if not a digit
ret nc
and 0xdf ; converts lowercase to uppercase
add a, 0xe9 ; map 0x11-x017 onto 0xFA - 0xFF
sub 0xfa ; map onto 0-6
ret c
; we have an A-F digit
add a, 10 ; C is clear, map back to 0xA-0xF
ret
; Parse string at (HL) as a decimal value and return value in DE.
; Reads as many digits as it can and stop when:
; 1 - A non-digit character is read
@ -156,7 +137,6 @@ parseDecimal:
; During this routine, we switch between HL and its shadow. On one side,
; we have HL the string pointer, and on the other side, we have HL the
; numerical result. We also use EXX to preserve BC, saving us a push.
parseDecimalSkip: ; enter here to skip parsing the first digit
exx ; HL as a result
ld h, 0
ld l, a ; load first digit in without multiplying
@ -201,130 +181,6 @@ parseDecimalSkip: ; enter here to skip parsing the first digit
cp a ; ensure Z
ret
; Parse string at (HL) as a hexadecimal value without the "0x" prefix and
; return value in DE.
; HL is advanced to the character following the last successfully read char.
; Sets Z on success.
parseHexadecimal:
ld a, (hl)
call parseHex ; before "ret c" is "sub 0xfa" in parseHex
; so carry implies not zero
ret c ; we need at least one char
push bc
ld de, 0
ld b, d
ld c, d
; The idea here is that the 4 hex digits of the result can be represented "bdce",
; where each register holds a single digit. Then the result is simply
; e = (c << 4) | e, d = (b << 4) | d
; However, the actual string may be of any length, so when loading in the most
; significant digit, we don't know which digit of the result it actually represents
; To solve this, after a digit is loaded into a (and is checked for validity),
; all digits are moved along, with e taking the latest digit.
.loop:
dec b
inc b ; b should be 0, else we've overflowed
jr nz, .end ; Z already unset if overflow
ld b, d
ld d, c
ld c, e
ld e, a
inc hl
ld a, (hl)
call parseHex
jr nc, .loop
ld a, b
add a, a \ add a, a \ add a, a \ add a, a
or d
ld d, a
ld a, c
add a, a \ add a, a \ add a, a \ add a, a
or e
ld e, a
xor a ; ensure z
.end:
pop bc
ret
; Parse string at (HL) as a binary value (010101) without the "0b" prefix and
; return value in E. D is always zero.
; HL is advanced to the character following the last successfully read char.
; Sets Z on success.
parseBinaryLiteral:
ld de, 0
.loop:
ld a, (hl)
add a, 0xff-'1'
sub 0xff-1
jr c, .end
rlc e ; sets carry if overflow, and affects Z
ret c ; Z unset if carry set, since bit 0 of e must be set
add a, e
ld e, a
inc hl
jr .loop
.end:
; HL is properly set
xor a ; ensure Z
ret
; Parses the string at (HL) and returns the 16-bit value in DE. The string
; can be a decimal literal (1234), a hexadecimal literal (0x1234) or a char
; literal ('X').
; HL is advanced to the character following the last successfully read char.
;
; As soon as the number doesn't fit 16-bit any more, parsing stops and the
; number is invalid. If the number is valid, Z is set, otherwise, unset.
parseLiteral:
ld de, 0 ; pre-fill
ld a, (hl)
cp 0x27 ; apostrophe
jr z, .char
; inline parseDecimalDigit
add a, 0xc6 ; maps '0'-'9' onto 0xf6-0xff
sub 0xf6 ; maps to 0-9 and carries if not a digit
ret c
; a already parsed so skip first few instructions of parseDecimal
jp nz, parseDecimalSkip
; maybe hex, maybe binary
inc hl
ld a, (hl)
inc hl ; already place it for hex or bin
cp 'x'
jr z, parseHexadecimal
cp 'b'
jr z, parseBinaryLiteral
; nope, just a regular decimal
dec hl \ dec hl
jp parseDecimal
; Parse string at (HL) and, if it is a char literal, sets Z and return
; corresponding value in E. D is always zero.
; HL is advanced to the character following the last successfully read char.
;
; A valid char literal starts with ', ends with ' and has one character in the
; middle. No escape sequence are accepted, but ''' will return the apostrophe
; character.
.char:
inc hl
ld e, (hl) ; our result
inc hl
cp (hl)
; advance HL and return if good char
inc hl
ret z
; Z unset and there's an error
; In all error conditions, HL is advanced by 3. Rewind.
dec hl \ dec hl \ dec hl
; NZ already set
ret
; *** Forth-specific part ***
; Return address of scratchpad in HL
pad:
@ -519,14 +375,6 @@ HLisIMMED:
; We need an invert flag. We want to Z to be set when flag is non-zero.
jp toggleZ
; Sets Z if wordref at (HL) is of the IMMEDIATE type
HLPointsIMMED:
push hl
call intoHL
call HLisIMMED
pop hl
ret
; Sets Z if wordref at HL is of the UNWORD type
HLisUNWORD:
dec hl

View File

@ -6,6 +6,9 @@ ZASMBIN = zasm/zasm
AVRABIN = zasm/avra
SHELLAPPS = zasm ed
SHELLTGTS = ${SHELLAPPS:%=cfsin/%}
# Those Forth source files are in a particular order
FORTHSRCS = core.fs parse.fs fmt.fs
FORTHSRC_PATHS = ${FORTHSRCS:%=$(APPS)/forth/%}
CFSIN_CONTENTS = $(SHELLTGTS) cfsin/user.h
OBJS = emul.o libz80/libz80.o
SHELLOBJS = $(OBJS) $(CFSPACK_OBJ)
@ -36,8 +39,8 @@ forth/stage1: forth/stage1.c $(OBJS) forth/forth0-bin.h
forth/stage1dbg: forth/stage1.c $(OBJS) forth/forth0-bin.h
$(CC) -DDEBUG forth/stage1.c $(OBJS) -o $@
forth/core.bin: $(APPS)/forth/core.fs forth/stage1
./forth/stage1 $(APPS)/forth/core.fs | tee $@ > /dev/null
forth/core.bin: $(FORTHSRC_PATHS) forth/stage1
cat $(FORTHSRC_PATHS) | ./forth/stage1 | tee $@ > /dev/null
forth/forth1.bin: forth/glue1.asm forth/core.bin $(ZASMBIN)
$(ZASMBIN) $(KERNEL) $(APPS) forth/core.bin < forth/glue1.asm | tee $@ > /dev/null

View File

@ -35,11 +35,10 @@ that wordref offsets correspond.
#define CURRENT 0xe702
static int running;
static FILE *fp;
static uint8_t iord_stdio()
{
int c = getc(fp);
int c = getc(stdin);
if (c == EOF) {
running = 0;
}
@ -57,20 +56,6 @@ static void iowr_stdio(uint8_t val)
int main(int argc, char *argv[])
{
#ifdef DEBUG
fp = stdin;
#else
if (argc == 2) {
fp = fopen(argv[1], "r");
if (fp == NULL) {
fprintf(stderr, "Can't open %s\n", argv[1]);
return 1;
}
} else {
fprintf(stderr, "Usage: ./stage0 filename\n");
return 1;
}
#endif
Machine *m = emul_init();
m->ramstart = RAMSTART;
m->iord[STDIO_PORT] = iord_stdio;
@ -84,8 +69,6 @@ int main(int argc, char *argv[])
while (running && emul_step());
fclose(fp);
#ifndef DEBUG
// We're done, now let's spit dict data
// let's start with LATEST spitting.