forth: Word-ify number parsing

2025-04-03 15:08:39 +11:00 · 2020-03-17 17:46:58 -04:00 · 2020-03-17 17:46:58 -04:00 · 4212d5161f
commit 4212d5161f
parent 707f1dbae1
4 changed files with 91 additions and 170 deletions
--- a/apps/forth/dict.asm
+++ b/apps/forth/dict.asm
@ -339,29 +339,13 @@ COMPILE:
 	.db	0b10		; UNWORD
 .maybeNum:
 	.dw	compiledWord
-	.dw	.parseNum
+	.dw	PARSE
 	.dw	LITN
 	.dw	R2P		; exit COMPILE
 	.dw	DROP
 	.dw	EXIT
 	.db	0b10		; UNWORD
 .parseNum:
 	.dw	nativeWord
 	pop	hl		; string addr
 	push	hl		; --> lvl 1. save string addr
 	call	parseLiteral
 	pop	hl		; <-- lvl 1
 	jr	nz, .undef
 	; a valid number in DE!
 	push	de
 	jp	next
 .undef:
 	call	printstr
 	jp	abortUnknownWord
 	.db	":"
 	.fill	6
 	.dw	COMPILE
@ -562,9 +546,55 @@ WORD:
 	push	hl
 	jp	next
 	.db	"(parsed"
 	.dw	WORD
 	.db	0
 PARSED:
 	.dw	nativeWord
 	pop	hl
 	call	chkPS
 	; temporary: run parseCharLit in here until it's implemented in Forth
 	; core.fs needs char literal parsing.
 	call	parseCharLit
 	jr	z, .success
 	call	parseDecimal
 	jr	z, .success
 	; error
 	ld	de, 0
 	push	de	; dummy
 	push	de	; flag
 	jp	next
 .success:
 	push	de
 	ld	de, 1		; flag
 	push	de
 	jp	next
 	.db	"(parse)"
 	.dw	WORD
 	.db	0
 PARSE:
 	.dw	compiledWord
 	.dw	PARSED
 	.dw	CSKIP
 	.dw	.error
 	; success, stack is already good, we can exit
 	.dw	EXIT
 	.db	0b10		; UNWORD
 .error:
 	.dw	compiledWord
 	.dw	LIT
 	.db	"unknown word", 0
 	.dw	PRINT
 	.dw	ABORT
 	.db "CREATE"
 	.fill 1
-	.dw WORD
+	.dw PARSE
 	.db 0
 CREATE:
 	.dw nativeWord
--- a/apps/forth/dictionary.txt
+++ b/apps/forth/dictionary.txt
@ -151,6 +151,31 @@ wait until another line is entered.
 KEY input, however, is direct. Regardless of the input buffer's state, KEY will
 return the next typed key.
 PARSING AND BOOTSTRAP: Parsing number literal is a very "core" activity of
 Forth, and therefore generally seen as having to be implemented in native code.
 However, Collapse OS' Forth supports many kinds of literals: decimal, hex, char,
 binary. This incurs a significant complexity penalty.
 What if we could implement those parsing routines in Forth? "But it's a core
 routine!" you say. Yes, but here's the deal: at its native core, only decimal
 parsing is supported. It lives in the "(parsed)" word. The interpreter's main
 loop is initially set to simply call that word.
 However, in core.fs, "(parsex)", "(parsec)" and "(parseb)" are implemented, in
 Forth, then "(parse)", which goes through them all is defined. Then, "(parsef)",
 which is the variable in which the interpreter's word pointer is set, is
 updated to that new "(parse)" word.
 This way, we have a full-featured (and extensible) parsing with a tiny native
 core.
 (parse)     a -- n          Parses string at a as a number and push the result
                            in n as well as whether parsing was a success in f
                            (false = failure, true = success)
 (parse.)    a -- n f        Sub-parsing words. They all have the same signature.
                            Parses string at a as a number and push the result
                            in n as well as whether parsing was a success in f
                            (0 = failure, 1 = success)
 (print)     a --            Print string at addr a.
 .           n --            Print n in its decimal form
 .X          n --            Print n in its hexadecimal form. In hex, numbers
--- a/apps/forth/main.asm
+++ b/apps/forth/main.asm
@ -96,32 +96,24 @@ forthRdLineNoOk:
 	.db	0b10		; UNWORD
 INTERPRET:
-	.dw	nativeWord
+	.dw	compiledWord
-	pop	hl		; from WORD
+	.dw	FIND_
-	ld	a, (hl)		; special case: empty
+	.dw	CSKIP
-	or	a
+	.dw	.maybeNum
-	jp	z, next
+	; It's a word, execute it
-	call	find
+	.dw	EXECUTE
-	jr	nz, .maybeNum
+	.dw	EXIT
-	; regular word
+
 	push	de
 	jp	EXECUTE+2
 .maybeNum:
-	push	hl		; --> lvl 1. save string addr
+	.dw	compiledWord
-	call	parseLiteral
+	.dw	PARSE
-	pop	hl		; <-- lvl 1
+	.dw	R2P		; exit INTERPRET
-	jr	nz, .undef
+	.dw	DROP
-	; a valid number in DE!
+	.dw	EXIT
 	push	de
 	jp	next
 .undef:
 	call	printstr
 	jp	abortUnknownWord
 	.db	0b10		; UNWORD
 MAINLOOP:
 	.dw	compiledWord
 	.dw	WORD
 	.dw	INTERPRET
 	.dw	INP
 	.dw	FETCH
--- a/apps/forth/util.asm
+++ b/apps/forth/util.asm
@ -115,25 +115,6 @@ multDEBC:
 	jr	nz, .loop
 	ret
 ; Parse the hex char at A and extract it's 0-15 numerical value. Put the result
 ; in A.
 ;
 ; On success, the carry flag is reset. On error, it is set.
 parseHex:
 	; First, let's see if we have an easy 0-9 case
 	add 	a, 0xc6	; maps '0'-'9' onto 0xf6-0xff
 	sub 	0xf6	; maps to 0-9 and carries if not a digit
 	ret	nc
 	and 	0xdf		; converts lowercase to uppercase
 	add	a, 0xe9		; map 0x11-x017 onto 0xFA - 0xFF
 	sub 	0xfa		; map onto 0-6
 	ret 	c
 	; we have an A-F digit
 	add	a, 10		; C is clear, map back to 0xA-0xF
 	ret
 ; Parse string at (HL) as a decimal value and return value in DE.
 ; Reads as many digits as it can and stop when:
 ; 1 - A non-digit character is read
@ -156,7 +137,6 @@ parseDecimal:
 	; During this routine, we switch between HL and its shadow. On one side,
 	; we have HL the string pointer, and on the other side, we have HL the
 	; numerical result. We also use EXX to preserve BC, saving us a push.
 parseDecimalSkip:	; enter here to skip parsing the first digit
 	exx		; HL as a result
 	ld	h, 0
 	ld	l, a	; load first digit in without multiplying
@ -201,127 +181,21 @@ parseDecimalSkip:	; enter here to skip parsing the first digit
 	cp	a	; ensure Z
 	ret
-; Parse string at (HL) as a hexadecimal value without the "0x" prefix and
+parseCharLit:
 ; return value in DE.
 ; HL is advanced to the character following the last successfully read char.
 ; Sets Z on success.
 parseHexadecimal:
 	ld	a, (hl)
 	call	parseHex	; before "ret c" is "sub 0xfa" in parseHex
 				; so carry implies not zero
 	ret 	c	; we need at least one char
 	push	bc
 	ld	de, 0
 	ld	b, d
 	ld	c, d
 ; The idea here is that the 4 hex digits of the result can be represented "bdce",
 ; where each register holds a single digit. Then the result is simply
 ; e = (c << 4) | e,	 d = (b << 4) | d
 ; However, the actual string may be of any length, so when loading in the most
 ; significant digit, we don't know which digit of the result it actually represents
 ; To solve this, after a digit is loaded into a (and is checked for validity),
 ; all digits are moved along, with e taking the latest digit.
 .loop:
 	dec 	b
 	inc 	b	; b should be 0, else we've overflowed
 	jr	nz, .end	; Z already unset if overflow
 	ld 	b, d
 	ld 	d, c
 	ld 	c, e
 	ld 	e, a
 	inc 	hl
 	ld 	a, (hl)
 	call 	parseHex
 	jr 	nc, .loop
 	ld 	a, b
 	add	a, a  \ add a, a \ add a, a \ add a, a
 	or 	d
 	ld 	d, a
 	ld 	a, c
 	add	a, a  \ add a, a \ add a, a \ add a, a
 	or 	e
 	ld 	e, a
 	xor	a	; ensure z
 .end:
 	pop	bc
 	ret
 ; Parse string at (HL) as a binary value (010101) without the "0b" prefix and
 ; return value in E. D is always zero.
 ; HL is advanced to the character following the last successfully read char.
 ; Sets Z on success.
 parseBinaryLiteral:
 	ld	de, 0
 .loop:
 	ld	a, (hl)
 	add	a, 0xff-'1'
 	sub	0xff-1
 	jr	c, .end
 	rlc	e	; sets carry if overflow, and affects Z
 	ret	c	; Z unset if carry set, since bit 0 of e must be set
 	add	a, e
 	ld	e, a
 	inc	hl
 	jr	.loop
 .end:
 	; HL is properly set
 	xor	a		; ensure Z
 	ret
 ; Parses the string at (HL) and returns the 16-bit value in DE. The string
 ; can be a decimal literal (1234), a hexadecimal literal (0x1234) or a char
 ; literal ('X').
 ; HL is advanced to the character following the last successfully read char.
 ;
 ; As soon as the number doesn't fit 16-bit any more, parsing stops and the
 ; number is invalid. If the number is valid, Z is set, otherwise, unset.
 parseLiteral:
 	ld	de, 0		; pre-fill
 	ld	a, (hl)
 	cp	0x27		; apostrophe
-	jr	z, .char
+	ret	nz
-	; inline parseDecimalDigit
+	ld	d, 0		; preset
 	add 	a, 0xc6	; maps '0'-'9' onto 0xf6-0xff
 	sub 	0xf6	; maps to 0-9 and carries if not a digit
 	ret	c
 	; a already parsed so skip first few instructions of parseDecimal
 	jp	nz, parseDecimalSkip
 	; maybe hex, maybe binary
 	inc	hl
 	ld	a, (hl)
 	inc	hl		; already place it for hex or bin
 	cp	'x'
 	jr	z, parseHexadecimal
 	cp	'b'
 	jr	z, parseBinaryLiteral
 	; nope, just a regular decimal
 	dec	hl \ dec hl
 	jp	parseDecimal
 ; Parse string at (HL) and, if it is a char literal, sets Z and return
 ; corresponding value in E. D is always zero.
 ; HL is advanced to the character following the last successfully read char.
 ;
 ; A valid char literal starts with ', ends with ' and has one character in the
 ; middle. No escape sequence are accepted, but ''' will return the apostrophe
 ; character.
 .char:
 	inc	hl
 	ld	e, (hl)		; our result
 	inc	hl
 	cp	(hl)
 	; advance HL and return if good char
 	inc	hl
 	ret	z
 	; Z unset and there's an error
-	; In all error conditions, HL is advanced by 3. Rewind.
+	; In all error conditions, HL is advanced by 2. Rewind.
-	dec	hl \ dec hl \ dec hl
+	dec	hl \ dec hl
 	; NZ already set
 	ret