zasm: big I/O overhaul

Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish.
2025-04-03 09:48:40 +11:00 · 2019-05-16 07:53:42 -04:00 · 2019-05-16 07:53:42 -04:00 · e9244b80ee
commit e9244b80ee
parent 34ff0a6c2a
5 changed files with 164 additions and 144 deletions
--- a/apps/zasm/directive.asm
+++ b/apps/zasm/directive.asm
@ -24,7 +24,6 @@ directiveHandlers:
 handleDB:
 	push	hl
 	call	toWord
 	call	readWord
 	ld	hl, scratchpad
 	call	parseLiteral
@ -36,7 +35,6 @@ handleDB:
 handleDW:
 	push	hl
 	call	toWord
 	call	readWord
 	ld	hl, scratchpad
 	call	parseExpr
@ -51,7 +49,9 @@ handleDW:
 handleEQU:
 	call	zasmIsFirstPass
 	jr	nz, .begin
-	; first pass? .equ are noops
+	; first pass? .equ are noops Consume args and return
 	call	readWord
 	call	readWord
 	xor	a
 	ret
 .begin:
@ -59,19 +59,14 @@ handleEQU:
 	push	de
 	push	bc
 	; Read our constant name
 	call	toWord
 	call	readWord
 	; We can't register our symbol yet: we don't have our value!
 	; Let's copy it over.
 	push	hl
 		ld	hl, scratchpad
 	ld	de, DIREC_SCRATCHPAD
 	ld	bc, SCRATCHPAD_SIZE
 	ldir
 	pop	hl
 	; Now, read the value associated to it
 	call	toWord
 	call	readWord
 	ld	hl, scratchpad
 	call	parseExpr
@ -102,7 +97,7 @@ getDirectiveID:
 	pop	bc
 	ret
-; Parse directive specified in A (D_* const) with args in (HL) and act in
+; Parse directive specified in A (D_* const) with args in I/O and act in
 ; an appropriate manner. If the directive results in writing data at its
 ; current location, that data is in (direcData) and A is the number of bytes
 ; in it.
--- a/apps/zasm/instr.asm
+++ b/apps/zasm/instr.asm
@ -708,16 +708,14 @@ getUpcode:
 	pop	ix
 	ret
-; Parse next argument in string (HL) and place it in (DE)
+; Parse next argument in I/O and place it in (DE)
 ; Sets Z on success, reset on error.
 processArg:
 	call	toWord
 	call	readWord
-	; Read word is in scratchpad, HL is properly advanced. Now, let's push
+	jr	nz, .noarg
 	; Read word is in (HL). Now, let's push
 	; that HL value and replace it with (scratchpad) so that we can parse
 	; that arg.
 	push	hl
 	ld	hl, scratchpad
 	call	parseArg
 	cp	0xff
@ -733,15 +731,18 @@ processArg:
 	ld	a, ixh
 	ld	(de), a
 	cp	a		; ensure Z is set
-	jr	.end
+	ret
 .error:
 	call	JUMP_UNSETZ
-.end:
+	ret
-	pop	hl
+.noarg:
 	xor	a
 	ld	(de), a
 	ret
-; Parse instruction specified in A (I_* const) with args in (HL) and write
+; Parse instruction specified in A (I_* const) with args in I/O and write
-; resulting opcode(s) in (instrUpcode). Returns the number of bytes written in A.
+; resulting opcode(s) in (instrUpcode). Returns the number of bytes written in
 ; A.
 parseInstruction:
 	push	bc
 	push	hl
--- a/apps/zasm/io.asm
+++ b/apps/zasm/io.asm
@ -1,5 +1,3 @@
 ; *** Consts ***
 .equ	IO_MAX_LINELEN	0xff
 ; *** Variables ***
 .equ	IO_IN_GETC	IO_RAMSTART
 .equ	IO_IN_PUTC	IO_IN_GETC+2
@ -9,14 +7,36 @@
 .equ	IO_OUT_PUTC	IO_OUT_GETC+2
 .equ	IO_OUT_SEEK	IO_OUT_PUTC+2
 .equ	IO_OUT_TELL	IO_OUT_SEEK+2
-.equ	IO_LINEBUF	IO_OUT_TELL+2
+; see ioPutBack below
-.equ	IO_RAMEND	IO_LINEBUF+IO_MAX_LINELEN+1
+.equ	IO_PUTBACK_BUF	IO_OUT_TELL+2
 .equ	IO_RAMEND	IO_PUTBACK_BUF+1
 ; *** Code ***
 ioInit:
 	xor	a
 	ld	(IO_PUTBACK_BUF), a
 	ret
 ioGetC:
 	ld	a, (IO_PUTBACK_BUF)
 	or	a		; cp 0
 	jr	nz, .getback
 	ld	ix, (IO_IN_GETC)
 	jp	(ix)
 .getback:
 	push	af
 	xor	a
 	ld	(IO_PUTBACK_BUF), a
 	pop	af
 	ret
 ; Put back non-zero character A into the "ioGetC stack". The next ioGetC call,
 ; instead of reading from IO_IN_GETC, will return that character. That's the
 ; easiest way I found to handle the readWord/gotoNextLine problem.
 ioPutBack:
 	ld	(IO_PUTBACK_BUF), a
 	ret
 ioPutC:
 	ld	ix, (IO_OUT_PUTC)
@ -26,54 +46,3 @@ ioSeek:
 	ld	ix, (IO_IN_SEEK)
 	jp	(ix)
 ; Sets Z is A is CR, LF, or null.
 isLineEnd:
 	or	a	; same as cp 0
 	ret	z
 	cp	0x0d
 	ret	z
 	cp	0x0a
 	ret	z
 	cp	'\'
 	ret
 ; Read a single line from ioGetCPtr and place it in IO_LINEBUF.
 ; Returns number of chars read in A. 0 means we're at the end of our input
 ; stream, which happens when GetC unsets Z. Make HL point to IO_LINEBUF.
 ; We ignore empty lines and pass through them like butter.
 ; A null char is written at the end of the line.
 ioReadLine:
 	push	bc
 	; consume ioGetC as long as it yields a line end char.
 .loop1:
 	call	ioGetC
 	jr	nz, .eof	; GetC unsets Z? We don't have a line to read,
 				; we have EOF.
 	call	isLineEnd
 	jr	z, .loop1
 	; A contains the first char of our line.
 	ld	c, 1
 	ld	(IO_LINEBUF), a
 	ld	hl, IO_LINEBUF+1
 .loop2:
 	call	ioGetC
 	call	isLineEnd
 	jr	z, .success	; We have end of line
 	ld	(hl), a
 	inc	hl
 	inc	c
 	jr	.loop2
 .success:
 	; write null char at HL before we return
 	xor	a
 	ld	(hl), a
 	ld	a, c
 	ld	hl, IO_LINEBUF
 	jr	.end
 .eof:
 	xor	a
 .end:
 	pop	bc
 	ret
--- a/apps/zasm/main.asm
+++ b/apps/zasm/main.asm
@ -67,6 +67,7 @@ zasmMain:
 	ld	de, IO_OUT_GETC
 	call	JUMP_BLKSEL
 	; Init modules
 	call	ioInit
 	call	symInit
 	; First pass
@ -105,17 +106,18 @@ zasmParseFile:
 	ld	(ZASM_PC), de
 .loop:
 	inc	de
 	call	ioReadLine
 	or	a		; is A 0?
 	ret	z		; We have EOF
 	call	parseLine
 	ret	nz		; error
 	ld	a, b		; TOK_*
 	cp	TOK_EOF
 	ret	z		; if EOF, return now with success
 	jr	.loop
-; Parse line in (HL), write the resulting opcode(s) through ioPutC and increases
+; Parse next token and accompanying args (when relevant) in I/O, write the
-; (ZASM_PC) by the number of bytes written. BC is set to the result of the call
+; resulting opcode(s) through ioPutC and increases (ZASM_PC) by the number of
-; to tokenize.
+; bytes written. BC is set to the result of the call to tokenize.
-; Sets Z if parse was successful, unset if there was an error or EOF.
+; Sets Z if parse was successful, unset if there was an error. EOF is not an
 ; error.
 parseLine:
 	call	tokenize
 	ld	a, b		; TOK_*
@ -124,22 +126,11 @@ parseLine:
 	cp	TOK_DIRECTIVE
 	jp	z, _parseDirec
 	cp	TOK_LABEL
-	jr	z, .label
+	jr	z, _parseLabel
-	cp	TOK_EMPTY
+	cp	TOK_EOF
-	ret			; Z is correct. If empty, Z is set and not an
+	ret			; Z is correct. If EOF, Z is set and not an
 				; error, otherwise, it means bad token and
 				; errors out.
 .label:
 	push	hl
 	call	_parseLabel
 	pop	hl
 	ret	nz		; error out
 	; We're finished here. However, because it's a label, it's possible that
 	; another logical line follows directly after the label. Let's parse
 	; this and propagate error.
 	call	parseLine
 	; Z has proper value
 	ret
 _parseInstr:
 	ld	a, c		; I_*
--- a/apps/zasm/tok.asm
+++ b/apps/zasm/tok.asm
@ -2,7 +2,7 @@
 TOK_INSTR	.equ	0x01
 TOK_DIRECTIVE	.equ	0x02
 TOK_LABEL	.equ	0x03
-TOK_EMPTY	.equ	0xfe	; not a bad token, just an empty line
+TOK_EOF		.equ	0xfe	; end of file
 TOK_BAD		.equ	0xff
 .equ	SCRATCHPAD_SIZE	0x20
@ -16,7 +16,17 @@ scratchpad:
 isLineEndOrComment:
 	cp	';'
 	ret	z
-	or	a	; cp 0
+	; continue to isLineEnd
 ; Sets Z is A is CR, LF, or null.
 isLineEnd:
 	or	a	; same as cp 0
 	ret	z
 	cp	0x0d
 	ret	z
 	cp	0x0a
 	ret	z
 	cp	'\'
 	ret
 ; Sets Z is A is ' ' '\t' or ','
@ -32,8 +42,7 @@ isSep:
 isSepOrLineEnd:
 	call	isSep
 	ret	z
-	call	isLineEndOrComment
+	jr	isLineEndOrComment
 	ret
 ; Checks whether string at (HL) is a label, that is, whether it ends with a ":"
 ; Sets Z if yes, unset if no.
@ -63,48 +72,98 @@ isLabel:
 	pop	hl
 	ret
-; read word in (HL) and put it in (scratchpad), null terminated, for a maximum
+; Read ioGetC until a word starts, then read ioGetC as long as there is no
-; of SCRATCHPAD_SIZE-1 characters. As a result, A is the read length. HL is
+; separator and put that contents in (scratchpad), null terminated, for a
-; advanced to the next separator char.
+; maximum of SCRATCHPAD_SIZE-1 characters.
 ; If EOL (\n, \r or comment) or EOF is hit before we could read a word, we stop
 ; right there. If scratchpad is not big enough, we stop right there and error.
 ; HL points to scratchpad
 ; Sets Z if a word could be read, unsets if not.
 readWord:
 	push	bc
-	push	de
+	; Get to word
-	ld	de, scratchpad
+.loop1:
-	ld	b, SCRATCHPAD_SIZE-1
+	call	ioGetC
 .loop:
 	ld	a, (hl)
 	call	isSepOrLineEnd
 	jr	z, .success
 	ld	(de), a
 	inc	hl
 	inc	de
 	djnz	.loop
 .success:
 	xor	a
 	ld	(de), a
 	ld	a, SCRATCHPAD_SIZE-1
 	sub	a, b
 .end:
 	pop	de
 	pop	bc
 	ret
 ; (HL) being a string, advance it to the next non-sep character.
 ; Set Z if we could do it before the line ended, reset Z if we couldn't.
 toWord:
 .loop:
 	ld	a, (hl)
 	call	isLineEndOrComment
 	jr	z, .error
 	call	isSep
-	jr	nz, .success
+	jr	nz, .read
 	jr	.loop1
 .read:
 	ld	hl, scratchpad
 	ld	b, SCRATCHPAD_SIZE-1
 	; A contains the first letter to read
 .loop2:
 	ld	(hl), a
 	inc	hl
-	jr	.loop
+	call	ioGetC
 	call	isSepOrLineEnd
 	jr	z, .success
 	djnz	.loop2
 	; out of space. error.
 .error:
 	; We need to put the last char we've read back so that gotoNextLine
 	; behaves properly.
 	call	ioPutBack
 	call	JUMP_UNSETZ
-	ret
+	jr	.end
 .success:
-	xor	a	; ensure Z
+	call	ioPutBack
 	; null-terminate scratchpad
 	xor	a
 	ld	(hl), a
 	ld	hl, scratchpad
 .end:
 	pop	bc
 	ret
 ; Read ioGetC until we reach the beginning of next line, skipping comments if
 ; necessary. This skips all whitespace, \n, \r, comments until we reach the
 ; first non-comment character. Then, we put it back (ioPutBack) and return.
 ;
 ; If gotoNextLine encounters anything else than whitespace, comment or line
 ; separator, we error out (no putback)
 ; Sets Z if we reached a new line. Unset if EOF or error.
 gotoNextLine:
 .loop1:
 	; first loop is "strict", that is: we error out on non-whitespace.
 	call	ioGetC
 	call	isSepOrLineEnd
 	ret	nz		; error
 	or	a		; cp 0
 	jr	z, .eof
 	call	isLineEnd
 	jr	z, .loop3	; good!
 	cp	';'
 	jr	z, .loop2	; comment starting, go to "fast lane"
 	jr	.loop1
 .loop2:
 	; second loop is the "comment loop": anything is valid and we just run
 	; until EOL.
 	call	ioGetC
 	or	a		; cp 0
 	jr	z, .eof
 	cp	'\'		; special case: '\' doesn't count as a line end
 				; in a comment.
 	jr	z, .loop2
 	call	isLineEnd
 	jr	z, .loop3
 	jr	.loop2
 .loop3:
 	; Loop 3 happens after we reach our first line sep. This means that we
 	; wade through whitespace until we reach a non-whitespace character.
 	call	ioGetC
 	or	a		; cp 0
 	jr	z, .eof
 	cp	';'
 	jr	z, .loop2	; oh, another comment! go back to loop2!
 	call	isSepOrLineEnd
 	jr	z, .loop3
 	; Non-whitespace. That's our goal! Put it back
 	call	ioPutBack
 .eof:
 	cp	a		; ensure Z
 	ret
 ; Parse line in (HL) and read the next token in BC. The token is written on
@ -113,17 +172,27 @@ toWord:
 ; Advance HL to after the read word.
 ; If no token matches, TOK_BAD is written to B
 tokenize:
 	call	toWord
 	jr	nz, .emptyline
 	call	readWord
-	push	hl		; Save advanced HL for later
+	jr	z, .process	; read successful, process into token.
-	ld	hl, scratchpad
+	; Error. It could be EOL, EOF or scraptchpad size problem
 	; Whatever it is, calling gotoNextLine is appropriate. If it's EOL
 	; that's obviously what we want to do. If it's EOF, we can check
 	; it after. If it's a scratchpad overrun, gotoNextLine handles it.
 	call	gotoNextLine
 	jr	nz, .error
 	or	a		; Are we EOF?
 	jr	nz, tokenize	; not EOF? then continue!
 	; We're EOF
 	ld	b, TOK_EOF
 	ret
 .process:
 	call	isLabel
 	jr	z, .label
 	call	getInstID
 	jr	z, .instr
 	call	getDirectiveID
 	jr	z, .direc
 .error:
 	; no match
 	ld	b, TOK_BAD
 	jr	.end
@ -137,9 +206,4 @@ tokenize:
 	ld	b, TOK_LABEL
 .end:
 	ld	c, a
 	pop	hl
 	ret
 .emptyline:
 	ld	b, TOK_EMPTY
 	; no HL to pop, we jumped before the push
 	ret