From 4212d5161f13233e6f63b8ffc0f75b72118b7d72 Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Tue, 17 Mar 2020 17:46:58 -0400 Subject: [PATCH] forth: Word-ify number parsing --- apps/forth/dict.asm | 66 +++++++++++++----- apps/forth/dictionary.txt | 25 +++++++ apps/forth/main.asm | 34 ++++------ apps/forth/util.asm | 136 ++------------------------------------ 4 files changed, 91 insertions(+), 170 deletions(-) diff --git a/apps/forth/dict.asm b/apps/forth/dict.asm index bf1eedf..e47b933 100644 --- a/apps/forth/dict.asm +++ b/apps/forth/dict.asm @@ -339,29 +339,13 @@ COMPILE: .db 0b10 ; UNWORD .maybeNum: .dw compiledWord - .dw .parseNum + .dw PARSE .dw LITN .dw R2P ; exit COMPILE .dw DROP .dw EXIT - .db 0b10 ; UNWORD -.parseNum: - .dw nativeWord - pop hl ; string addr - push hl ; --> lvl 1. save string addr - call parseLiteral - pop hl ; <-- lvl 1 - jr nz, .undef - ; a valid number in DE! - push de - jp next -.undef: - call printstr - jp abortUnknownWord - - .db ":" .fill 6 .dw COMPILE @@ -562,9 +546,55 @@ WORD: push hl jp next + + .db "(parsed" + .dw WORD + .db 0 +PARSED: + .dw nativeWord + pop hl + call chkPS + ; temporary: run parseCharLit in here until it's implemented in Forth + ; core.fs needs char literal parsing. + call parseCharLit + jr z, .success + call parseDecimal + jr z, .success + ; error + ld de, 0 + push de ; dummy + push de ; flag + jp next +.success: + push de + ld de, 1 ; flag + push de + jp next + + + .db "(parse)" + .dw WORD + .db 0 +PARSE: + .dw compiledWord + .dw PARSED + .dw CSKIP + .dw .error + ; success, stack is already good, we can exit + .dw EXIT + + .db 0b10 ; UNWORD +.error: + .dw compiledWord + .dw LIT + .db "unknown word", 0 + .dw PRINT + .dw ABORT + + .db "CREATE" .fill 1 - .dw WORD + .dw PARSE .db 0 CREATE: .dw nativeWord diff --git a/apps/forth/dictionary.txt b/apps/forth/dictionary.txt index c3d074e..cc3fb2d 100644 --- a/apps/forth/dictionary.txt +++ b/apps/forth/dictionary.txt @@ -151,6 +151,31 @@ wait until another line is entered. KEY input, however, is direct. Regardless of the input buffer's state, KEY will return the next typed key. +PARSING AND BOOTSTRAP: Parsing number literal is a very "core" activity of +Forth, and therefore generally seen as having to be implemented in native code. +However, Collapse OS' Forth supports many kinds of literals: decimal, hex, char, +binary. This incurs a significant complexity penalty. + +What if we could implement those parsing routines in Forth? "But it's a core +routine!" you say. Yes, but here's the deal: at its native core, only decimal +parsing is supported. It lives in the "(parsed)" word. The interpreter's main +loop is initially set to simply call that word. + +However, in core.fs, "(parsex)", "(parsec)" and "(parseb)" are implemented, in +Forth, then "(parse)", which goes through them all is defined. Then, "(parsef)", +which is the variable in which the interpreter's word pointer is set, is +updated to that new "(parse)" word. + +This way, we have a full-featured (and extensible) parsing with a tiny native +core. + +(parse) a -- n Parses string at a as a number and push the result + in n as well as whether parsing was a success in f + (false = failure, true = success) +(parse.) a -- n f Sub-parsing words. They all have the same signature. + Parses string at a as a number and push the result + in n as well as whether parsing was a success in f + (0 = failure, 1 = success) (print) a -- Print string at addr a. . n -- Print n in its decimal form .X n -- Print n in its hexadecimal form. In hex, numbers diff --git a/apps/forth/main.asm b/apps/forth/main.asm index f6b79c0..9c65c11 100644 --- a/apps/forth/main.asm +++ b/apps/forth/main.asm @@ -96,32 +96,24 @@ forthRdLineNoOk: .db 0b10 ; UNWORD INTERPRET: - .dw nativeWord - pop hl ; from WORD - ld a, (hl) ; special case: empty - or a - jp z, next - call find - jr nz, .maybeNum - ; regular word - push de - jp EXECUTE+2 + .dw compiledWord + .dw FIND_ + .dw CSKIP + .dw .maybeNum + ; It's a word, execute it + .dw EXECUTE + .dw EXIT + .maybeNum: - push hl ; --> lvl 1. save string addr - call parseLiteral - pop hl ; <-- lvl 1 - jr nz, .undef - ; a valid number in DE! - push de - jp next -.undef: - call printstr - jp abortUnknownWord + .dw compiledWord + .dw PARSE + .dw R2P ; exit INTERPRET + .dw DROP + .dw EXIT .db 0b10 ; UNWORD MAINLOOP: .dw compiledWord - .dw WORD .dw INTERPRET .dw INP .dw FETCH diff --git a/apps/forth/util.asm b/apps/forth/util.asm index 6d070b4..dbd980a 100644 --- a/apps/forth/util.asm +++ b/apps/forth/util.asm @@ -115,25 +115,6 @@ multDEBC: jr nz, .loop ret -; Parse the hex char at A and extract it's 0-15 numerical value. Put the result -; in A. -; -; On success, the carry flag is reset. On error, it is set. -parseHex: - ; First, let's see if we have an easy 0-9 case - - add a, 0xc6 ; maps '0'-'9' onto 0xf6-0xff - sub 0xf6 ; maps to 0-9 and carries if not a digit - ret nc - - and 0xdf ; converts lowercase to uppercase - add a, 0xe9 ; map 0x11-x017 onto 0xFA - 0xFF - sub 0xfa ; map onto 0-6 - ret c - ; we have an A-F digit - add a, 10 ; C is clear, map back to 0xA-0xF - ret - ; Parse string at (HL) as a decimal value and return value in DE. ; Reads as many digits as it can and stop when: ; 1 - A non-digit character is read @@ -156,7 +137,6 @@ parseDecimal: ; During this routine, we switch between HL and its shadow. On one side, ; we have HL the string pointer, and on the other side, we have HL the ; numerical result. We also use EXX to preserve BC, saving us a push. -parseDecimalSkip: ; enter here to skip parsing the first digit exx ; HL as a result ld h, 0 ld l, a ; load first digit in without multiplying @@ -201,127 +181,21 @@ parseDecimalSkip: ; enter here to skip parsing the first digit cp a ; ensure Z ret -; Parse string at (HL) as a hexadecimal value without the "0x" prefix and -; return value in DE. -; HL is advanced to the character following the last successfully read char. -; Sets Z on success. -parseHexadecimal: - ld a, (hl) - call parseHex ; before "ret c" is "sub 0xfa" in parseHex - ; so carry implies not zero - ret c ; we need at least one char - push bc - ld de, 0 - ld b, d - ld c, d - -; The idea here is that the 4 hex digits of the result can be represented "bdce", -; where each register holds a single digit. Then the result is simply -; e = (c << 4) | e, d = (b << 4) | d -; However, the actual string may be of any length, so when loading in the most -; significant digit, we don't know which digit of the result it actually represents -; To solve this, after a digit is loaded into a (and is checked for validity), -; all digits are moved along, with e taking the latest digit. -.loop: - dec b - inc b ; b should be 0, else we've overflowed - jr nz, .end ; Z already unset if overflow - ld b, d - ld d, c - ld c, e - ld e, a - inc hl - ld a, (hl) - call parseHex - jr nc, .loop - ld a, b - add a, a \ add a, a \ add a, a \ add a, a - or d - ld d, a - - ld a, c - add a, a \ add a, a \ add a, a \ add a, a - or e - ld e, a - xor a ; ensure z - -.end: - pop bc - ret - - -; Parse string at (HL) as a binary value (010101) without the "0b" prefix and -; return value in E. D is always zero. -; HL is advanced to the character following the last successfully read char. -; Sets Z on success. -parseBinaryLiteral: - ld de, 0 -.loop: - ld a, (hl) - add a, 0xff-'1' - sub 0xff-1 - jr c, .end - rlc e ; sets carry if overflow, and affects Z - ret c ; Z unset if carry set, since bit 0 of e must be set - add a, e - ld e, a - inc hl - jr .loop -.end: - ; HL is properly set - xor a ; ensure Z - ret - -; Parses the string at (HL) and returns the 16-bit value in DE. The string -; can be a decimal literal (1234), a hexadecimal literal (0x1234) or a char -; literal ('X'). -; HL is advanced to the character following the last successfully read char. -; -; As soon as the number doesn't fit 16-bit any more, parsing stops and the -; number is invalid. If the number is valid, Z is set, otherwise, unset. -parseLiteral: - ld de, 0 ; pre-fill +parseCharLit: ld a, (hl) cp 0x27 ; apostrophe - jr z, .char + ret nz - ; inline parseDecimalDigit - add a, 0xc6 ; maps '0'-'9' onto 0xf6-0xff - sub 0xf6 ; maps to 0-9 and carries if not a digit - ret c - ; a already parsed so skip first few instructions of parseDecimal - jp nz, parseDecimalSkip - ; maybe hex, maybe binary - inc hl - ld a, (hl) - inc hl ; already place it for hex or bin - cp 'x' - jr z, parseHexadecimal - cp 'b' - jr z, parseBinaryLiteral - ; nope, just a regular decimal - dec hl \ dec hl - jp parseDecimal - -; Parse string at (HL) and, if it is a char literal, sets Z and return -; corresponding value in E. D is always zero. -; HL is advanced to the character following the last successfully read char. -; -; A valid char literal starts with ', ends with ' and has one character in the -; middle. No escape sequence are accepted, but ''' will return the apostrophe -; character. -.char: + ld d, 0 ; preset inc hl ld e, (hl) ; our result inc hl cp (hl) - ; advance HL and return if good char - inc hl ret z ; Z unset and there's an error - ; In all error conditions, HL is advanced by 3. Rewind. - dec hl \ dec hl \ dec hl + ; In all error conditions, HL is advanced by 2. Rewind. + dec hl \ dec hl ; NZ already set ret