mirror of
https://github.com/hsoft/collapseos.git
synced 2025-01-28 03:46:04 +11:00
forth: Word-ify number parsing
This commit is contained in:
parent
707f1dbae1
commit
4212d5161f
@ -339,29 +339,13 @@ COMPILE:
|
||||
.db 0b10 ; UNWORD
|
||||
.maybeNum:
|
||||
.dw compiledWord
|
||||
.dw .parseNum
|
||||
.dw PARSE
|
||||
.dw LITN
|
||||
.dw R2P ; exit COMPILE
|
||||
.dw DROP
|
||||
.dw EXIT
|
||||
|
||||
|
||||
.db 0b10 ; UNWORD
|
||||
.parseNum:
|
||||
.dw nativeWord
|
||||
pop hl ; string addr
|
||||
push hl ; --> lvl 1. save string addr
|
||||
call parseLiteral
|
||||
pop hl ; <-- lvl 1
|
||||
jr nz, .undef
|
||||
; a valid number in DE!
|
||||
push de
|
||||
jp next
|
||||
.undef:
|
||||
call printstr
|
||||
jp abortUnknownWord
|
||||
|
||||
|
||||
.db ":"
|
||||
.fill 6
|
||||
.dw COMPILE
|
||||
@ -562,9 +546,55 @@ WORD:
|
||||
push hl
|
||||
jp next
|
||||
|
||||
|
||||
.db "(parsed"
|
||||
.dw WORD
|
||||
.db 0
|
||||
PARSED:
|
||||
.dw nativeWord
|
||||
pop hl
|
||||
call chkPS
|
||||
; temporary: run parseCharLit in here until it's implemented in Forth
|
||||
; core.fs needs char literal parsing.
|
||||
call parseCharLit
|
||||
jr z, .success
|
||||
call parseDecimal
|
||||
jr z, .success
|
||||
; error
|
||||
ld de, 0
|
||||
push de ; dummy
|
||||
push de ; flag
|
||||
jp next
|
||||
.success:
|
||||
push de
|
||||
ld de, 1 ; flag
|
||||
push de
|
||||
jp next
|
||||
|
||||
|
||||
.db "(parse)"
|
||||
.dw WORD
|
||||
.db 0
|
||||
PARSE:
|
||||
.dw compiledWord
|
||||
.dw PARSED
|
||||
.dw CSKIP
|
||||
.dw .error
|
||||
; success, stack is already good, we can exit
|
||||
.dw EXIT
|
||||
|
||||
.db 0b10 ; UNWORD
|
||||
.error:
|
||||
.dw compiledWord
|
||||
.dw LIT
|
||||
.db "unknown word", 0
|
||||
.dw PRINT
|
||||
.dw ABORT
|
||||
|
||||
|
||||
.db "CREATE"
|
||||
.fill 1
|
||||
.dw WORD
|
||||
.dw PARSE
|
||||
.db 0
|
||||
CREATE:
|
||||
.dw nativeWord
|
||||
|
@ -151,6 +151,31 @@ wait until another line is entered.
|
||||
KEY input, however, is direct. Regardless of the input buffer's state, KEY will
|
||||
return the next typed key.
|
||||
|
||||
PARSING AND BOOTSTRAP: Parsing number literal is a very "core" activity of
|
||||
Forth, and therefore generally seen as having to be implemented in native code.
|
||||
However, Collapse OS' Forth supports many kinds of literals: decimal, hex, char,
|
||||
binary. This incurs a significant complexity penalty.
|
||||
|
||||
What if we could implement those parsing routines in Forth? "But it's a core
|
||||
routine!" you say. Yes, but here's the deal: at its native core, only decimal
|
||||
parsing is supported. It lives in the "(parsed)" word. The interpreter's main
|
||||
loop is initially set to simply call that word.
|
||||
|
||||
However, in core.fs, "(parsex)", "(parsec)" and "(parseb)" are implemented, in
|
||||
Forth, then "(parse)", which goes through them all is defined. Then, "(parsef)",
|
||||
which is the variable in which the interpreter's word pointer is set, is
|
||||
updated to that new "(parse)" word.
|
||||
|
||||
This way, we have a full-featured (and extensible) parsing with a tiny native
|
||||
core.
|
||||
|
||||
(parse) a -- n Parses string at a as a number and push the result
|
||||
in n as well as whether parsing was a success in f
|
||||
(false = failure, true = success)
|
||||
(parse.) a -- n f Sub-parsing words. They all have the same signature.
|
||||
Parses string at a as a number and push the result
|
||||
in n as well as whether parsing was a success in f
|
||||
(0 = failure, 1 = success)
|
||||
(print) a -- Print string at addr a.
|
||||
. n -- Print n in its decimal form
|
||||
.X n -- Print n in its hexadecimal form. In hex, numbers
|
||||
|
@ -96,32 +96,24 @@ forthRdLineNoOk:
|
||||
|
||||
.db 0b10 ; UNWORD
|
||||
INTERPRET:
|
||||
.dw nativeWord
|
||||
pop hl ; from WORD
|
||||
ld a, (hl) ; special case: empty
|
||||
or a
|
||||
jp z, next
|
||||
call find
|
||||
jr nz, .maybeNum
|
||||
; regular word
|
||||
push de
|
||||
jp EXECUTE+2
|
||||
.dw compiledWord
|
||||
.dw FIND_
|
||||
.dw CSKIP
|
||||
.dw .maybeNum
|
||||
; It's a word, execute it
|
||||
.dw EXECUTE
|
||||
.dw EXIT
|
||||
|
||||
.maybeNum:
|
||||
push hl ; --> lvl 1. save string addr
|
||||
call parseLiteral
|
||||
pop hl ; <-- lvl 1
|
||||
jr nz, .undef
|
||||
; a valid number in DE!
|
||||
push de
|
||||
jp next
|
||||
.undef:
|
||||
call printstr
|
||||
jp abortUnknownWord
|
||||
.dw compiledWord
|
||||
.dw PARSE
|
||||
.dw R2P ; exit INTERPRET
|
||||
.dw DROP
|
||||
.dw EXIT
|
||||
|
||||
.db 0b10 ; UNWORD
|
||||
MAINLOOP:
|
||||
.dw compiledWord
|
||||
.dw WORD
|
||||
.dw INTERPRET
|
||||
.dw INP
|
||||
.dw FETCH
|
||||
|
@ -115,25 +115,6 @@ multDEBC:
|
||||
jr nz, .loop
|
||||
ret
|
||||
|
||||
; Parse the hex char at A and extract it's 0-15 numerical value. Put the result
|
||||
; in A.
|
||||
;
|
||||
; On success, the carry flag is reset. On error, it is set.
|
||||
parseHex:
|
||||
; First, let's see if we have an easy 0-9 case
|
||||
|
||||
add a, 0xc6 ; maps '0'-'9' onto 0xf6-0xff
|
||||
sub 0xf6 ; maps to 0-9 and carries if not a digit
|
||||
ret nc
|
||||
|
||||
and 0xdf ; converts lowercase to uppercase
|
||||
add a, 0xe9 ; map 0x11-x017 onto 0xFA - 0xFF
|
||||
sub 0xfa ; map onto 0-6
|
||||
ret c
|
||||
; we have an A-F digit
|
||||
add a, 10 ; C is clear, map back to 0xA-0xF
|
||||
ret
|
||||
|
||||
; Parse string at (HL) as a decimal value and return value in DE.
|
||||
; Reads as many digits as it can and stop when:
|
||||
; 1 - A non-digit character is read
|
||||
@ -156,7 +137,6 @@ parseDecimal:
|
||||
; During this routine, we switch between HL and its shadow. On one side,
|
||||
; we have HL the string pointer, and on the other side, we have HL the
|
||||
; numerical result. We also use EXX to preserve BC, saving us a push.
|
||||
parseDecimalSkip: ; enter here to skip parsing the first digit
|
||||
exx ; HL as a result
|
||||
ld h, 0
|
||||
ld l, a ; load first digit in without multiplying
|
||||
@ -201,127 +181,21 @@ parseDecimalSkip: ; enter here to skip parsing the first digit
|
||||
cp a ; ensure Z
|
||||
ret
|
||||
|
||||
; Parse string at (HL) as a hexadecimal value without the "0x" prefix and
|
||||
; return value in DE.
|
||||
; HL is advanced to the character following the last successfully read char.
|
||||
; Sets Z on success.
|
||||
parseHexadecimal:
|
||||
ld a, (hl)
|
||||
call parseHex ; before "ret c" is "sub 0xfa" in parseHex
|
||||
; so carry implies not zero
|
||||
ret c ; we need at least one char
|
||||
push bc
|
||||
ld de, 0
|
||||
ld b, d
|
||||
ld c, d
|
||||
|
||||
; The idea here is that the 4 hex digits of the result can be represented "bdce",
|
||||
; where each register holds a single digit. Then the result is simply
|
||||
; e = (c << 4) | e, d = (b << 4) | d
|
||||
; However, the actual string may be of any length, so when loading in the most
|
||||
; significant digit, we don't know which digit of the result it actually represents
|
||||
; To solve this, after a digit is loaded into a (and is checked for validity),
|
||||
; all digits are moved along, with e taking the latest digit.
|
||||
.loop:
|
||||
dec b
|
||||
inc b ; b should be 0, else we've overflowed
|
||||
jr nz, .end ; Z already unset if overflow
|
||||
ld b, d
|
||||
ld d, c
|
||||
ld c, e
|
||||
ld e, a
|
||||
inc hl
|
||||
ld a, (hl)
|
||||
call parseHex
|
||||
jr nc, .loop
|
||||
ld a, b
|
||||
add a, a \ add a, a \ add a, a \ add a, a
|
||||
or d
|
||||
ld d, a
|
||||
|
||||
ld a, c
|
||||
add a, a \ add a, a \ add a, a \ add a, a
|
||||
or e
|
||||
ld e, a
|
||||
xor a ; ensure z
|
||||
|
||||
.end:
|
||||
pop bc
|
||||
ret
|
||||
|
||||
|
||||
; Parse string at (HL) as a binary value (010101) without the "0b" prefix and
|
||||
; return value in E. D is always zero.
|
||||
; HL is advanced to the character following the last successfully read char.
|
||||
; Sets Z on success.
|
||||
parseBinaryLiteral:
|
||||
ld de, 0
|
||||
.loop:
|
||||
ld a, (hl)
|
||||
add a, 0xff-'1'
|
||||
sub 0xff-1
|
||||
jr c, .end
|
||||
rlc e ; sets carry if overflow, and affects Z
|
||||
ret c ; Z unset if carry set, since bit 0 of e must be set
|
||||
add a, e
|
||||
ld e, a
|
||||
inc hl
|
||||
jr .loop
|
||||
.end:
|
||||
; HL is properly set
|
||||
xor a ; ensure Z
|
||||
ret
|
||||
|
||||
; Parses the string at (HL) and returns the 16-bit value in DE. The string
|
||||
; can be a decimal literal (1234), a hexadecimal literal (0x1234) or a char
|
||||
; literal ('X').
|
||||
; HL is advanced to the character following the last successfully read char.
|
||||
;
|
||||
; As soon as the number doesn't fit 16-bit any more, parsing stops and the
|
||||
; number is invalid. If the number is valid, Z is set, otherwise, unset.
|
||||
parseLiteral:
|
||||
ld de, 0 ; pre-fill
|
||||
parseCharLit:
|
||||
ld a, (hl)
|
||||
cp 0x27 ; apostrophe
|
||||
jr z, .char
|
||||
ret nz
|
||||
|
||||
; inline parseDecimalDigit
|
||||
add a, 0xc6 ; maps '0'-'9' onto 0xf6-0xff
|
||||
sub 0xf6 ; maps to 0-9 and carries if not a digit
|
||||
ret c
|
||||
; a already parsed so skip first few instructions of parseDecimal
|
||||
jp nz, parseDecimalSkip
|
||||
; maybe hex, maybe binary
|
||||
inc hl
|
||||
ld a, (hl)
|
||||
inc hl ; already place it for hex or bin
|
||||
cp 'x'
|
||||
jr z, parseHexadecimal
|
||||
cp 'b'
|
||||
jr z, parseBinaryLiteral
|
||||
; nope, just a regular decimal
|
||||
dec hl \ dec hl
|
||||
jp parseDecimal
|
||||
|
||||
; Parse string at (HL) and, if it is a char literal, sets Z and return
|
||||
; corresponding value in E. D is always zero.
|
||||
; HL is advanced to the character following the last successfully read char.
|
||||
;
|
||||
; A valid char literal starts with ', ends with ' and has one character in the
|
||||
; middle. No escape sequence are accepted, but ''' will return the apostrophe
|
||||
; character.
|
||||
.char:
|
||||
ld d, 0 ; preset
|
||||
inc hl
|
||||
ld e, (hl) ; our result
|
||||
inc hl
|
||||
cp (hl)
|
||||
; advance HL and return if good char
|
||||
inc hl
|
||||
ret z
|
||||
|
||||
; Z unset and there's an error
|
||||
; In all error conditions, HL is advanced by 3. Rewind.
|
||||
dec hl \ dec hl \ dec hl
|
||||
; In all error conditions, HL is advanced by 2. Rewind.
|
||||
dec hl \ dec hl
|
||||
; NZ already set
|
||||
ret
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user