mirror of
https://github.com/hsoft/collapseos.git
synced 2024-11-30 22:48:08 +11:00
forth: Word-ify number parsing
This commit is contained in:
parent
707f1dbae1
commit
4212d5161f
@ -339,29 +339,13 @@ COMPILE:
|
|||||||
.db 0b10 ; UNWORD
|
.db 0b10 ; UNWORD
|
||||||
.maybeNum:
|
.maybeNum:
|
||||||
.dw compiledWord
|
.dw compiledWord
|
||||||
.dw .parseNum
|
.dw PARSE
|
||||||
.dw LITN
|
.dw LITN
|
||||||
.dw R2P ; exit COMPILE
|
.dw R2P ; exit COMPILE
|
||||||
.dw DROP
|
.dw DROP
|
||||||
.dw EXIT
|
.dw EXIT
|
||||||
|
|
||||||
|
|
||||||
.db 0b10 ; UNWORD
|
|
||||||
.parseNum:
|
|
||||||
.dw nativeWord
|
|
||||||
pop hl ; string addr
|
|
||||||
push hl ; --> lvl 1. save string addr
|
|
||||||
call parseLiteral
|
|
||||||
pop hl ; <-- lvl 1
|
|
||||||
jr nz, .undef
|
|
||||||
; a valid number in DE!
|
|
||||||
push de
|
|
||||||
jp next
|
|
||||||
.undef:
|
|
||||||
call printstr
|
|
||||||
jp abortUnknownWord
|
|
||||||
|
|
||||||
|
|
||||||
.db ":"
|
.db ":"
|
||||||
.fill 6
|
.fill 6
|
||||||
.dw COMPILE
|
.dw COMPILE
|
||||||
@ -562,9 +546,55 @@ WORD:
|
|||||||
push hl
|
push hl
|
||||||
jp next
|
jp next
|
||||||
|
|
||||||
|
|
||||||
|
.db "(parsed"
|
||||||
|
.dw WORD
|
||||||
|
.db 0
|
||||||
|
PARSED:
|
||||||
|
.dw nativeWord
|
||||||
|
pop hl
|
||||||
|
call chkPS
|
||||||
|
; temporary: run parseCharLit in here until it's implemented in Forth
|
||||||
|
; core.fs needs char literal parsing.
|
||||||
|
call parseCharLit
|
||||||
|
jr z, .success
|
||||||
|
call parseDecimal
|
||||||
|
jr z, .success
|
||||||
|
; error
|
||||||
|
ld de, 0
|
||||||
|
push de ; dummy
|
||||||
|
push de ; flag
|
||||||
|
jp next
|
||||||
|
.success:
|
||||||
|
push de
|
||||||
|
ld de, 1 ; flag
|
||||||
|
push de
|
||||||
|
jp next
|
||||||
|
|
||||||
|
|
||||||
|
.db "(parse)"
|
||||||
|
.dw WORD
|
||||||
|
.db 0
|
||||||
|
PARSE:
|
||||||
|
.dw compiledWord
|
||||||
|
.dw PARSED
|
||||||
|
.dw CSKIP
|
||||||
|
.dw .error
|
||||||
|
; success, stack is already good, we can exit
|
||||||
|
.dw EXIT
|
||||||
|
|
||||||
|
.db 0b10 ; UNWORD
|
||||||
|
.error:
|
||||||
|
.dw compiledWord
|
||||||
|
.dw LIT
|
||||||
|
.db "unknown word", 0
|
||||||
|
.dw PRINT
|
||||||
|
.dw ABORT
|
||||||
|
|
||||||
|
|
||||||
.db "CREATE"
|
.db "CREATE"
|
||||||
.fill 1
|
.fill 1
|
||||||
.dw WORD
|
.dw PARSE
|
||||||
.db 0
|
.db 0
|
||||||
CREATE:
|
CREATE:
|
||||||
.dw nativeWord
|
.dw nativeWord
|
||||||
|
@ -151,6 +151,31 @@ wait until another line is entered.
|
|||||||
KEY input, however, is direct. Regardless of the input buffer's state, KEY will
|
KEY input, however, is direct. Regardless of the input buffer's state, KEY will
|
||||||
return the next typed key.
|
return the next typed key.
|
||||||
|
|
||||||
|
PARSING AND BOOTSTRAP: Parsing number literal is a very "core" activity of
|
||||||
|
Forth, and therefore generally seen as having to be implemented in native code.
|
||||||
|
However, Collapse OS' Forth supports many kinds of literals: decimal, hex, char,
|
||||||
|
binary. This incurs a significant complexity penalty.
|
||||||
|
|
||||||
|
What if we could implement those parsing routines in Forth? "But it's a core
|
||||||
|
routine!" you say. Yes, but here's the deal: at its native core, only decimal
|
||||||
|
parsing is supported. It lives in the "(parsed)" word. The interpreter's main
|
||||||
|
loop is initially set to simply call that word.
|
||||||
|
|
||||||
|
However, in core.fs, "(parsex)", "(parsec)" and "(parseb)" are implemented, in
|
||||||
|
Forth, then "(parse)", which goes through them all is defined. Then, "(parsef)",
|
||||||
|
which is the variable in which the interpreter's word pointer is set, is
|
||||||
|
updated to that new "(parse)" word.
|
||||||
|
|
||||||
|
This way, we have a full-featured (and extensible) parsing with a tiny native
|
||||||
|
core.
|
||||||
|
|
||||||
|
(parse) a -- n Parses string at a as a number and push the result
|
||||||
|
in n as well as whether parsing was a success in f
|
||||||
|
(false = failure, true = success)
|
||||||
|
(parse.) a -- n f Sub-parsing words. They all have the same signature.
|
||||||
|
Parses string at a as a number and push the result
|
||||||
|
in n as well as whether parsing was a success in f
|
||||||
|
(0 = failure, 1 = success)
|
||||||
(print) a -- Print string at addr a.
|
(print) a -- Print string at addr a.
|
||||||
. n -- Print n in its decimal form
|
. n -- Print n in its decimal form
|
||||||
.X n -- Print n in its hexadecimal form. In hex, numbers
|
.X n -- Print n in its hexadecimal form. In hex, numbers
|
||||||
|
@ -96,32 +96,24 @@ forthRdLineNoOk:
|
|||||||
|
|
||||||
.db 0b10 ; UNWORD
|
.db 0b10 ; UNWORD
|
||||||
INTERPRET:
|
INTERPRET:
|
||||||
.dw nativeWord
|
.dw compiledWord
|
||||||
pop hl ; from WORD
|
.dw FIND_
|
||||||
ld a, (hl) ; special case: empty
|
.dw CSKIP
|
||||||
or a
|
.dw .maybeNum
|
||||||
jp z, next
|
; It's a word, execute it
|
||||||
call find
|
.dw EXECUTE
|
||||||
jr nz, .maybeNum
|
.dw EXIT
|
||||||
; regular word
|
|
||||||
push de
|
|
||||||
jp EXECUTE+2
|
|
||||||
.maybeNum:
|
.maybeNum:
|
||||||
push hl ; --> lvl 1. save string addr
|
.dw compiledWord
|
||||||
call parseLiteral
|
.dw PARSE
|
||||||
pop hl ; <-- lvl 1
|
.dw R2P ; exit INTERPRET
|
||||||
jr nz, .undef
|
.dw DROP
|
||||||
; a valid number in DE!
|
.dw EXIT
|
||||||
push de
|
|
||||||
jp next
|
|
||||||
.undef:
|
|
||||||
call printstr
|
|
||||||
jp abortUnknownWord
|
|
||||||
|
|
||||||
.db 0b10 ; UNWORD
|
.db 0b10 ; UNWORD
|
||||||
MAINLOOP:
|
MAINLOOP:
|
||||||
.dw compiledWord
|
.dw compiledWord
|
||||||
.dw WORD
|
|
||||||
.dw INTERPRET
|
.dw INTERPRET
|
||||||
.dw INP
|
.dw INP
|
||||||
.dw FETCH
|
.dw FETCH
|
||||||
|
@ -115,25 +115,6 @@ multDEBC:
|
|||||||
jr nz, .loop
|
jr nz, .loop
|
||||||
ret
|
ret
|
||||||
|
|
||||||
; Parse the hex char at A and extract it's 0-15 numerical value. Put the result
|
|
||||||
; in A.
|
|
||||||
;
|
|
||||||
; On success, the carry flag is reset. On error, it is set.
|
|
||||||
parseHex:
|
|
||||||
; First, let's see if we have an easy 0-9 case
|
|
||||||
|
|
||||||
add a, 0xc6 ; maps '0'-'9' onto 0xf6-0xff
|
|
||||||
sub 0xf6 ; maps to 0-9 and carries if not a digit
|
|
||||||
ret nc
|
|
||||||
|
|
||||||
and 0xdf ; converts lowercase to uppercase
|
|
||||||
add a, 0xe9 ; map 0x11-x017 onto 0xFA - 0xFF
|
|
||||||
sub 0xfa ; map onto 0-6
|
|
||||||
ret c
|
|
||||||
; we have an A-F digit
|
|
||||||
add a, 10 ; C is clear, map back to 0xA-0xF
|
|
||||||
ret
|
|
||||||
|
|
||||||
; Parse string at (HL) as a decimal value and return value in DE.
|
; Parse string at (HL) as a decimal value and return value in DE.
|
||||||
; Reads as many digits as it can and stop when:
|
; Reads as many digits as it can and stop when:
|
||||||
; 1 - A non-digit character is read
|
; 1 - A non-digit character is read
|
||||||
@ -156,7 +137,6 @@ parseDecimal:
|
|||||||
; During this routine, we switch between HL and its shadow. On one side,
|
; During this routine, we switch between HL and its shadow. On one side,
|
||||||
; we have HL the string pointer, and on the other side, we have HL the
|
; we have HL the string pointer, and on the other side, we have HL the
|
||||||
; numerical result. We also use EXX to preserve BC, saving us a push.
|
; numerical result. We also use EXX to preserve BC, saving us a push.
|
||||||
parseDecimalSkip: ; enter here to skip parsing the first digit
|
|
||||||
exx ; HL as a result
|
exx ; HL as a result
|
||||||
ld h, 0
|
ld h, 0
|
||||||
ld l, a ; load first digit in without multiplying
|
ld l, a ; load first digit in without multiplying
|
||||||
@ -201,127 +181,21 @@ parseDecimalSkip: ; enter here to skip parsing the first digit
|
|||||||
cp a ; ensure Z
|
cp a ; ensure Z
|
||||||
ret
|
ret
|
||||||
|
|
||||||
; Parse string at (HL) as a hexadecimal value without the "0x" prefix and
|
parseCharLit:
|
||||||
; return value in DE.
|
|
||||||
; HL is advanced to the character following the last successfully read char.
|
|
||||||
; Sets Z on success.
|
|
||||||
parseHexadecimal:
|
|
||||||
ld a, (hl)
|
|
||||||
call parseHex ; before "ret c" is "sub 0xfa" in parseHex
|
|
||||||
; so carry implies not zero
|
|
||||||
ret c ; we need at least one char
|
|
||||||
push bc
|
|
||||||
ld de, 0
|
|
||||||
ld b, d
|
|
||||||
ld c, d
|
|
||||||
|
|
||||||
; The idea here is that the 4 hex digits of the result can be represented "bdce",
|
|
||||||
; where each register holds a single digit. Then the result is simply
|
|
||||||
; e = (c << 4) | e, d = (b << 4) | d
|
|
||||||
; However, the actual string may be of any length, so when loading in the most
|
|
||||||
; significant digit, we don't know which digit of the result it actually represents
|
|
||||||
; To solve this, after a digit is loaded into a (and is checked for validity),
|
|
||||||
; all digits are moved along, with e taking the latest digit.
|
|
||||||
.loop:
|
|
||||||
dec b
|
|
||||||
inc b ; b should be 0, else we've overflowed
|
|
||||||
jr nz, .end ; Z already unset if overflow
|
|
||||||
ld b, d
|
|
||||||
ld d, c
|
|
||||||
ld c, e
|
|
||||||
ld e, a
|
|
||||||
inc hl
|
|
||||||
ld a, (hl)
|
|
||||||
call parseHex
|
|
||||||
jr nc, .loop
|
|
||||||
ld a, b
|
|
||||||
add a, a \ add a, a \ add a, a \ add a, a
|
|
||||||
or d
|
|
||||||
ld d, a
|
|
||||||
|
|
||||||
ld a, c
|
|
||||||
add a, a \ add a, a \ add a, a \ add a, a
|
|
||||||
or e
|
|
||||||
ld e, a
|
|
||||||
xor a ; ensure z
|
|
||||||
|
|
||||||
.end:
|
|
||||||
pop bc
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
; Parse string at (HL) as a binary value (010101) without the "0b" prefix and
|
|
||||||
; return value in E. D is always zero.
|
|
||||||
; HL is advanced to the character following the last successfully read char.
|
|
||||||
; Sets Z on success.
|
|
||||||
parseBinaryLiteral:
|
|
||||||
ld de, 0
|
|
||||||
.loop:
|
|
||||||
ld a, (hl)
|
|
||||||
add a, 0xff-'1'
|
|
||||||
sub 0xff-1
|
|
||||||
jr c, .end
|
|
||||||
rlc e ; sets carry if overflow, and affects Z
|
|
||||||
ret c ; Z unset if carry set, since bit 0 of e must be set
|
|
||||||
add a, e
|
|
||||||
ld e, a
|
|
||||||
inc hl
|
|
||||||
jr .loop
|
|
||||||
.end:
|
|
||||||
; HL is properly set
|
|
||||||
xor a ; ensure Z
|
|
||||||
ret
|
|
||||||
|
|
||||||
; Parses the string at (HL) and returns the 16-bit value in DE. The string
|
|
||||||
; can be a decimal literal (1234), a hexadecimal literal (0x1234) or a char
|
|
||||||
; literal ('X').
|
|
||||||
; HL is advanced to the character following the last successfully read char.
|
|
||||||
;
|
|
||||||
; As soon as the number doesn't fit 16-bit any more, parsing stops and the
|
|
||||||
; number is invalid. If the number is valid, Z is set, otherwise, unset.
|
|
||||||
parseLiteral:
|
|
||||||
ld de, 0 ; pre-fill
|
|
||||||
ld a, (hl)
|
ld a, (hl)
|
||||||
cp 0x27 ; apostrophe
|
cp 0x27 ; apostrophe
|
||||||
jr z, .char
|
ret nz
|
||||||
|
|
||||||
; inline parseDecimalDigit
|
ld d, 0 ; preset
|
||||||
add a, 0xc6 ; maps '0'-'9' onto 0xf6-0xff
|
|
||||||
sub 0xf6 ; maps to 0-9 and carries if not a digit
|
|
||||||
ret c
|
|
||||||
; a already parsed so skip first few instructions of parseDecimal
|
|
||||||
jp nz, parseDecimalSkip
|
|
||||||
; maybe hex, maybe binary
|
|
||||||
inc hl
|
|
||||||
ld a, (hl)
|
|
||||||
inc hl ; already place it for hex or bin
|
|
||||||
cp 'x'
|
|
||||||
jr z, parseHexadecimal
|
|
||||||
cp 'b'
|
|
||||||
jr z, parseBinaryLiteral
|
|
||||||
; nope, just a regular decimal
|
|
||||||
dec hl \ dec hl
|
|
||||||
jp parseDecimal
|
|
||||||
|
|
||||||
; Parse string at (HL) and, if it is a char literal, sets Z and return
|
|
||||||
; corresponding value in E. D is always zero.
|
|
||||||
; HL is advanced to the character following the last successfully read char.
|
|
||||||
;
|
|
||||||
; A valid char literal starts with ', ends with ' and has one character in the
|
|
||||||
; middle. No escape sequence are accepted, but ''' will return the apostrophe
|
|
||||||
; character.
|
|
||||||
.char:
|
|
||||||
inc hl
|
inc hl
|
||||||
ld e, (hl) ; our result
|
ld e, (hl) ; our result
|
||||||
inc hl
|
inc hl
|
||||||
cp (hl)
|
cp (hl)
|
||||||
; advance HL and return if good char
|
|
||||||
inc hl
|
|
||||||
ret z
|
ret z
|
||||||
|
|
||||||
; Z unset and there's an error
|
; Z unset and there's an error
|
||||||
; In all error conditions, HL is advanced by 3. Rewind.
|
; In all error conditions, HL is advanced by 2. Rewind.
|
||||||
dec hl \ dec hl \ dec hl
|
dec hl \ dec hl
|
||||||
; NZ already set
|
; NZ already set
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user