forth: Word-ify number parsing

2025-04-28 13:06:14 +10:00 · 2020-03-17 17:46:58 -04:00 · 2020-03-17 17:46:58 -04:00 · 4212d5161f
commit 4212d5161f
parent 707f1dbae1
4 changed files with 91 additions and 170 deletions
--- a/apps/forth/dict.asm
+++ b/apps/forth/dict.asm
@ -339,29 +339,13 @@ COMPILE:
 	.db	0b10		; UNWORD
 .maybeNum:
 	.dw	compiledWord
-	.dw	.parseNum
+	.dw	PARSE
 	.dw	LITN
 	.dw	R2P		; exit COMPILE
 	.dw	DROP
 	.dw	EXIT


-	.db	0b10		; UNWORD
-.parseNum:
-	.dw	nativeWord
-	pop	hl		; string addr
-	push	hl		; --> lvl 1. save string addr
-	call	parseLiteral
-	pop	hl		; <-- lvl 1
-	jr	nz, .undef
-	; a valid number in DE!
-	push	de
-	jp	next
-.undef:
-	call	printstr
-	jp	abortUnknownWord
-
-
 	.db	":"
 	.fill	6
 	.dw	COMPILE
@ -562,9 +546,55 @@ WORD:
 	push	hl
 	jp	next

+
+	.db	"(parsed"
+	.dw	WORD
+	.db	0
+PARSED:
+	.dw	nativeWord
+	pop	hl
+	call	chkPS
+	; temporary: run parseCharLit in here until it's implemented in Forth
+	; core.fs needs char literal parsing.
+	call	parseCharLit
+	jr	z, .success
+	call	parseDecimal
+	jr	z, .success
+	; error
+	ld	de, 0
+	push	de	; dummy
+	push	de	; flag
+	jp	next
+.success:
+	push	de
+	ld	de, 1		; flag
+	push	de
+	jp	next
+
+
+	.db	"(parse)"
+	.dw	WORD
+	.db	0
+PARSE:
+	.dw	compiledWord
+	.dw	PARSED
+	.dw	CSKIP
+	.dw	.error
+	; success, stack is already good, we can exit
+	.dw	EXIT
+
+	.db	0b10		; UNWORD
+.error:
+	.dw	compiledWord
+	.dw	LIT
+	.db	"unknown word", 0
+	.dw	PRINT
+	.dw	ABORT
+
+
 	.db "CREATE"
 	.fill 1
-	.dw WORD
+	.dw PARSE
 	.db 0
 CREATE:
 	.dw nativeWord
--- a/apps/forth/dictionary.txt
+++ b/apps/forth/dictionary.txt
@ -151,6 +151,31 @@ wait until another line is entered.
 KEY input, however, is direct. Regardless of the input buffer's state, KEY will
 return the next typed key.

+PARSING AND BOOTSTRAP: Parsing number literal is a very "core" activity of
+Forth, and therefore generally seen as having to be implemented in native code.
+However, Collapse OS' Forth supports many kinds of literals: decimal, hex, char,
+binary. This incurs a significant complexity penalty.
+
+What if we could implement those parsing routines in Forth? "But it's a core
+routine!" you say. Yes, but here's the deal: at its native core, only decimal
+parsing is supported. It lives in the "(parsed)" word. The interpreter's main
+loop is initially set to simply call that word.
+
+However, in core.fs, "(parsex)", "(parsec)" and "(parseb)" are implemented, in
+Forth, then "(parse)", which goes through them all is defined. Then, "(parsef)",
+which is the variable in which the interpreter's word pointer is set, is
+updated to that new "(parse)" word.
+
+This way, we have a full-featured (and extensible) parsing with a tiny native
+core.
+
+(parse)     a -- n          Parses string at a as a number and push the result
+                            in n as well as whether parsing was a success in f
+                            (false = failure, true = success)
+(parse.)    a -- n f        Sub-parsing words. They all have the same signature.
+                            Parses string at a as a number and push the result
+                            in n as well as whether parsing was a success in f
+                            (0 = failure, 1 = success)
 (print)     a --            Print string at addr a.
 .           n --            Print n in its decimal form
 .X          n --            Print n in its hexadecimal form. In hex, numbers
--- a/apps/forth/main.asm
+++ b/apps/forth/main.asm
@ -96,32 +96,24 @@ forthRdLineNoOk:

 	.db	0b10		; UNWORD
 INTERPRET:
-	.dw	nativeWord
-	pop	hl		; from WORD
-	ld	a, (hl)		; special case: empty
-	or	a
-	jp	z, next
-	call	find
-	jr	nz, .maybeNum
-	; regular word
-	push	de
-	jp	EXECUTE+2
+	.dw	compiledWord
+	.dw	FIND_
+	.dw	CSKIP
+	.dw	.maybeNum
+	; It's a word, execute it
+	.dw	EXECUTE
+	.dw	EXIT
+
 .maybeNum:
-	push	hl		; --> lvl 1. save string addr
-	call	parseLiteral
-	pop	hl		; <-- lvl 1
-	jr	nz, .undef
-	; a valid number in DE!
-	push	de
-	jp	next
-.undef:
-	call	printstr
-	jp	abortUnknownWord
+	.dw	compiledWord
+	.dw	PARSE
+	.dw	R2P		; exit INTERPRET
+	.dw	DROP
+	.dw	EXIT

 	.db	0b10		; UNWORD
 MAINLOOP:
 	.dw	compiledWord
-	.dw	WORD
 	.dw	INTERPRET
 	.dw	INP
 	.dw	FETCH
--- a/apps/forth/util.asm
+++ b/apps/forth/util.asm
@ -115,25 +115,6 @@ multDEBC:
 	jr	nz, .loop
 	ret

-; Parse the hex char at A and extract it's 0-15 numerical value. Put the result
-; in A.
-;
-; On success, the carry flag is reset. On error, it is set.
-parseHex:
-	; First, let's see if we have an easy 0-9 case
-
-	add 	a, 0xc6	; maps '0'-'9' onto 0xf6-0xff
-	sub 	0xf6	; maps to 0-9 and carries if not a digit
-	ret	nc
-
-	and 	0xdf		; converts lowercase to uppercase
-	add	a, 0xe9		; map 0x11-x017 onto 0xFA - 0xFF
-	sub 	0xfa		; map onto 0-6
-	ret 	c
-	; we have an A-F digit
-	add	a, 10		; C is clear, map back to 0xA-0xF
-	ret
-
 ; Parse string at (HL) as a decimal value and return value in DE.
 ; Reads as many digits as it can and stop when:
 ; 1 - A non-digit character is read
@ -156,7 +137,6 @@ parseDecimal:
 	; During this routine, we switch between HL and its shadow. On one side,
 	; we have HL the string pointer, and on the other side, we have HL the
 	; numerical result. We also use EXX to preserve BC, saving us a push.
-parseDecimalSkip:	; enter here to skip parsing the first digit
 	exx		; HL as a result
 	ld	h, 0
 	ld	l, a	; load first digit in without multiplying
@ -201,127 +181,21 @@ parseDecimalSkip:	; enter here to skip parsing the first digit
 	cp	a	; ensure Z
 	ret

-; Parse string at (HL) as a hexadecimal value without the "0x" prefix and
-; return value in DE.
-; HL is advanced to the character following the last successfully read char.
-; Sets Z on success.
-parseHexadecimal:
-	ld	a, (hl)
-	call	parseHex	; before "ret c" is "sub 0xfa" in parseHex
-				; so carry implies not zero
-	ret 	c	; we need at least one char
-	push	bc
-	ld	de, 0
-	ld	b, d
-	ld	c, d
-
-; The idea here is that the 4 hex digits of the result can be represented "bdce",
-; where each register holds a single digit. Then the result is simply
-; e = (c << 4) | e,	 d = (b << 4) | d
-; However, the actual string may be of any length, so when loading in the most
-; significant digit, we don't know which digit of the result it actually represents
-; To solve this, after a digit is loaded into a (and is checked for validity),
-; all digits are moved along, with e taking the latest digit.
-.loop:
-	dec 	b
-	inc 	b	; b should be 0, else we've overflowed
-	jr	nz, .end	; Z already unset if overflow
-	ld 	b, d
-	ld 	d, c
-	ld 	c, e
-	ld 	e, a
-	inc 	hl
-	ld 	a, (hl)
-	call 	parseHex
-	jr 	nc, .loop
-	ld 	a, b
-	add	a, a  \ add a, a \ add a, a \ add a, a
-	or 	d
-	ld 	d, a
-
-	ld 	a, c
-	add	a, a  \ add a, a \ add a, a \ add a, a
-	or 	e
-	ld 	e, a
-	xor	a	; ensure z
-
-.end:
-	pop	bc
-	ret
-
-
-; Parse string at (HL) as a binary value (010101) without the "0b" prefix and
-; return value in E. D is always zero.
-; HL is advanced to the character following the last successfully read char.
-; Sets Z on success.
-parseBinaryLiteral:
-	ld	de, 0
-.loop:
-	ld	a, (hl)
-	add	a, 0xff-'1'
-	sub	0xff-1
-	jr	c, .end
-	rlc	e	; sets carry if overflow, and affects Z
-	ret	c	; Z unset if carry set, since bit 0 of e must be set
-	add	a, e
-	ld	e, a
-	inc	hl
-	jr	.loop
-.end:
-	; HL is properly set
-	xor	a		; ensure Z
-	ret
-
-; Parses the string at (HL) and returns the 16-bit value in DE. The string
-; can be a decimal literal (1234), a hexadecimal literal (0x1234) or a char
-; literal ('X').
-; HL is advanced to the character following the last successfully read char.
-;
-; As soon as the number doesn't fit 16-bit any more, parsing stops and the
-; number is invalid. If the number is valid, Z is set, otherwise, unset.
-parseLiteral:
-	ld	de, 0		; pre-fill
+parseCharLit:
 	ld	a, (hl)
 	cp	0x27		; apostrophe
-	jr	z, .char
+	ret	nz

-	; inline parseDecimalDigit
-	add 	a, 0xc6	; maps '0'-'9' onto 0xf6-0xff
-	sub 	0xf6	; maps to 0-9 and carries if not a digit
-	ret	c
-	; a already parsed so skip first few instructions of parseDecimal
-	jp	nz, parseDecimalSkip
-	; maybe hex, maybe binary
-	inc	hl
-	ld	a, (hl)
-	inc	hl		; already place it for hex or bin
-	cp	'x'
-	jr	z, parseHexadecimal
-	cp	'b'
-	jr	z, parseBinaryLiteral
-	; nope, just a regular decimal
-	dec	hl \ dec hl
-	jp	parseDecimal
-
-; Parse string at (HL) and, if it is a char literal, sets Z and return
-; corresponding value in E. D is always zero.
-; HL is advanced to the character following the last successfully read char.
-;
-; A valid char literal starts with ', ends with ' and has one character in the
-; middle. No escape sequence are accepted, but ''' will return the apostrophe
-; character.
-.char:
+	ld	d, 0		; preset
 	inc	hl
 	ld	e, (hl)		; our result
 	inc	hl
 	cp	(hl)
-	; advance HL and return if good char
-	inc	hl
 	ret	z

 	; Z unset and there's an error
-	; In all error conditions, HL is advanced by 3. Rewind.
-	dec	hl \ dec hl \ dec hl
+	; In all error conditions, HL is advanced by 2. Rewind.
+	dec	hl \ dec hl
 	; NZ already set
 	ret