forth: Word-ify number parsing

This commit is contained in:
Virgil Dupras 2020-03-17 17:46:58 -04:00
parent 707f1dbae1
commit 4212d5161f
4 changed files with 91 additions and 170 deletions

View File

@ -339,29 +339,13 @@ COMPILE:
.db 0b10 ; UNWORD
.maybeNum:
.dw compiledWord
.dw .parseNum
.dw PARSE
.dw LITN
.dw R2P ; exit COMPILE
.dw DROP
.dw EXIT
.db 0b10 ; UNWORD
.parseNum:
.dw nativeWord
pop hl ; string addr
push hl ; --> lvl 1. save string addr
call parseLiteral
pop hl ; <-- lvl 1
jr nz, .undef
; a valid number in DE!
push de
jp next
.undef:
call printstr
jp abortUnknownWord
.db ":"
.fill 6
.dw COMPILE
@ -562,9 +546,55 @@ WORD:
push hl
jp next
.db "(parsed"
.dw WORD
.db 0
PARSED:
.dw nativeWord
pop hl
call chkPS
; temporary: run parseCharLit in here until it's implemented in Forth
; core.fs needs char literal parsing.
call parseCharLit
jr z, .success
call parseDecimal
jr z, .success
; error
ld de, 0
push de ; dummy
push de ; flag
jp next
.success:
push de
ld de, 1 ; flag
push de
jp next
.db "(parse)"
.dw WORD
.db 0
PARSE:
.dw compiledWord
.dw PARSED
.dw CSKIP
.dw .error
; success, stack is already good, we can exit
.dw EXIT
.db 0b10 ; UNWORD
.error:
.dw compiledWord
.dw LIT
.db "unknown word", 0
.dw PRINT
.dw ABORT
.db "CREATE"
.fill 1
.dw WORD
.dw PARSE
.db 0
CREATE:
.dw nativeWord

View File

@ -151,6 +151,31 @@ wait until another line is entered.
KEY input, however, is direct. Regardless of the input buffer's state, KEY will
return the next typed key.
PARSING AND BOOTSTRAP: Parsing number literal is a very "core" activity of
Forth, and therefore generally seen as having to be implemented in native code.
However, Collapse OS' Forth supports many kinds of literals: decimal, hex, char,
binary. This incurs a significant complexity penalty.
What if we could implement those parsing routines in Forth? "But it's a core
routine!" you say. Yes, but here's the deal: at its native core, only decimal
parsing is supported. It lives in the "(parsed)" word. The interpreter's main
loop is initially set to simply call that word.
However, in core.fs, "(parsex)", "(parsec)" and "(parseb)" are implemented, in
Forth, then "(parse)", which goes through them all is defined. Then, "(parsef)",
which is the variable in which the interpreter's word pointer is set, is
updated to that new "(parse)" word.
This way, we have a full-featured (and extensible) parsing with a tiny native
core.
(parse) a -- n Parses string at a as a number and push the result
in n as well as whether parsing was a success in f
(false = failure, true = success)
(parse.) a -- n f Sub-parsing words. They all have the same signature.
Parses string at a as a number and push the result
in n as well as whether parsing was a success in f
(0 = failure, 1 = success)
(print) a -- Print string at addr a.
. n -- Print n in its decimal form
.X n -- Print n in its hexadecimal form. In hex, numbers

View File

@ -96,32 +96,24 @@ forthRdLineNoOk:
.db 0b10 ; UNWORD
INTERPRET:
.dw nativeWord
pop hl ; from WORD
ld a, (hl) ; special case: empty
or a
jp z, next
call find
jr nz, .maybeNum
; regular word
push de
jp EXECUTE+2
.dw compiledWord
.dw FIND_
.dw CSKIP
.dw .maybeNum
; It's a word, execute it
.dw EXECUTE
.dw EXIT
.maybeNum:
push hl ; --> lvl 1. save string addr
call parseLiteral
pop hl ; <-- lvl 1
jr nz, .undef
; a valid number in DE!
push de
jp next
.undef:
call printstr
jp abortUnknownWord
.dw compiledWord
.dw PARSE
.dw R2P ; exit INTERPRET
.dw DROP
.dw EXIT
.db 0b10 ; UNWORD
MAINLOOP:
.dw compiledWord
.dw WORD
.dw INTERPRET
.dw INP
.dw FETCH

View File

@ -115,25 +115,6 @@ multDEBC:
jr nz, .loop
ret
; Parse the hex char at A and extract it's 0-15 numerical value. Put the result
; in A.
;
; On success, the carry flag is reset. On error, it is set.
parseHex:
; First, let's see if we have an easy 0-9 case
add a, 0xc6 ; maps '0'-'9' onto 0xf6-0xff
sub 0xf6 ; maps to 0-9 and carries if not a digit
ret nc
and 0xdf ; converts lowercase to uppercase
add a, 0xe9 ; map 0x11-x017 onto 0xFA - 0xFF
sub 0xfa ; map onto 0-6
ret c
; we have an A-F digit
add a, 10 ; C is clear, map back to 0xA-0xF
ret
; Parse string at (HL) as a decimal value and return value in DE.
; Reads as many digits as it can and stop when:
; 1 - A non-digit character is read
@ -156,7 +137,6 @@ parseDecimal:
; During this routine, we switch between HL and its shadow. On one side,
; we have HL the string pointer, and on the other side, we have HL the
; numerical result. We also use EXX to preserve BC, saving us a push.
parseDecimalSkip: ; enter here to skip parsing the first digit
exx ; HL as a result
ld h, 0
ld l, a ; load first digit in without multiplying
@ -201,127 +181,21 @@ parseDecimalSkip: ; enter here to skip parsing the first digit
cp a ; ensure Z
ret
; Parse string at (HL) as a hexadecimal value without the "0x" prefix and
; return value in DE.
; HL is advanced to the character following the last successfully read char.
; Sets Z on success.
parseHexadecimal:
ld a, (hl)
call parseHex ; before "ret c" is "sub 0xfa" in parseHex
; so carry implies not zero
ret c ; we need at least one char
push bc
ld de, 0
ld b, d
ld c, d
; The idea here is that the 4 hex digits of the result can be represented "bdce",
; where each register holds a single digit. Then the result is simply
; e = (c << 4) | e, d = (b << 4) | d
; However, the actual string may be of any length, so when loading in the most
; significant digit, we don't know which digit of the result it actually represents
; To solve this, after a digit is loaded into a (and is checked for validity),
; all digits are moved along, with e taking the latest digit.
.loop:
dec b
inc b ; b should be 0, else we've overflowed
jr nz, .end ; Z already unset if overflow
ld b, d
ld d, c
ld c, e
ld e, a
inc hl
ld a, (hl)
call parseHex
jr nc, .loop
ld a, b
add a, a \ add a, a \ add a, a \ add a, a
or d
ld d, a
ld a, c
add a, a \ add a, a \ add a, a \ add a, a
or e
ld e, a
xor a ; ensure z
.end:
pop bc
ret
; Parse string at (HL) as a binary value (010101) without the "0b" prefix and
; return value in E. D is always zero.
; HL is advanced to the character following the last successfully read char.
; Sets Z on success.
parseBinaryLiteral:
ld de, 0
.loop:
ld a, (hl)
add a, 0xff-'1'
sub 0xff-1
jr c, .end
rlc e ; sets carry if overflow, and affects Z
ret c ; Z unset if carry set, since bit 0 of e must be set
add a, e
ld e, a
inc hl
jr .loop
.end:
; HL is properly set
xor a ; ensure Z
ret
; Parses the string at (HL) and returns the 16-bit value in DE. The string
; can be a decimal literal (1234), a hexadecimal literal (0x1234) or a char
; literal ('X').
; HL is advanced to the character following the last successfully read char.
;
; As soon as the number doesn't fit 16-bit any more, parsing stops and the
; number is invalid. If the number is valid, Z is set, otherwise, unset.
parseLiteral:
ld de, 0 ; pre-fill
parseCharLit:
ld a, (hl)
cp 0x27 ; apostrophe
jr z, .char
ret nz
; inline parseDecimalDigit
add a, 0xc6 ; maps '0'-'9' onto 0xf6-0xff
sub 0xf6 ; maps to 0-9 and carries if not a digit
ret c
; a already parsed so skip first few instructions of parseDecimal
jp nz, parseDecimalSkip
; maybe hex, maybe binary
inc hl
ld a, (hl)
inc hl ; already place it for hex or bin
cp 'x'
jr z, parseHexadecimal
cp 'b'
jr z, parseBinaryLiteral
; nope, just a regular decimal
dec hl \ dec hl
jp parseDecimal
; Parse string at (HL) and, if it is a char literal, sets Z and return
; corresponding value in E. D is always zero.
; HL is advanced to the character following the last successfully read char.
;
; A valid char literal starts with ', ends with ' and has one character in the
; middle. No escape sequence are accepted, but ''' will return the apostrophe
; character.
.char:
ld d, 0 ; preset
inc hl
ld e, (hl) ; our result
inc hl
cp (hl)
; advance HL and return if good char
inc hl
ret z
; Z unset and there's an error
; In all error conditions, HL is advanced by 3. Rewind.
dec hl \ dec hl \ dec hl
; In all error conditions, HL is advanced by 2. Rewind.
dec hl \ dec hl
; NZ already set
ret