collapseos/apps/zasm/tok.asm

; *** Consts ***
TOK_INSTR	.equ	0x01
TOK_DIRECTIVE	.equ	0x02
TOK_LABEL	.equ	0x03
TOK_EOF		.equ	0xfe	; end of file
TOK_BAD		.equ	0xff

.equ	SCRATCHPAD_SIZE	0x40
; *** Variables ***
scratchpad:
	.fill	SCRATCHPAD_SIZE

; *** Code ***

; Sets Z is A is ';' or null.
isLineEndOrComment:
	cp	';'
	ret	z
	; continue to isLineEnd

; Sets Z is A is CR, LF, or null.
isLineEnd:
	or	a	; same as cp 0
	ret	z
	cp	0x0d
	ret	z
	cp	0x0a
	ret	z
	cp	'\'
	ret

; Sets Z is A is ' ' '\t' or ','
isSep:
	cp	' '
	ret	z
	cp	0x09
	ret

; Sets Z is A is ' ', ',', ';', CR, LF, or null.
isSepOrLineEnd:
	call	isSep
	ret	z
	jr	isLineEndOrComment

; Checks whether string at (HL) is a label, that is, whether it ends with a ":"
; Sets Z if yes, unset if no.
;
; If it's a label, we change the trailing ':' char with a null char. It's a bit
; dirty, but it's the easiest way to proceed.
isLabel:
	push	hl
	ld	a, ':'
	call	findchar
	ld	a, (hl)
	cp	':'
	jr	nz, .nomatch
	; We also have to check that it's our last char.
	inc	hl
	ld	a, (hl)
	or	a		; cp 0
	jr	nz, .nomatch	; not a null char following the :. no match.
	; We have a match!
	; Remove trailing ':'
	xor	a		; Z is set
	dec	hl
	ld	(hl), a
	jr	.end
.nomatch:
	call	unsetZ
.end:
	pop	hl
	ret

; Read ioGetC until a word starts, then read ioGetC as long as there is no
; separator and put that contents in (scratchpad), null terminated, for a
; maximum of SCRATCHPAD_SIZE-1 characters.
; If EOL (\n, \r or comment) or EOF is hit before we could read a word, we stop
; right there. If scratchpad is not big enough, we stop right there and error.
; HL points to scratchpad
; Sets Z if a word could be read, unsets if not.
readWord:
	push	bc
	; Get to word
.loop1:
	call	ioGetC
	call	isLineEndOrComment
	jr	z, .error
	call	isSep
	jr	nz, .read
	jr	.loop1
.read:
	ld	hl, scratchpad
	ld	b, SCRATCHPAD_SIZE-1
	; A contains the first letter to read
	; Are we opening a double quote?
	cp	'"'
	jr	z, .insideQuote
	; Are we opening a single quote?
	cp	0x27		; '
	jr	z, .singleQuote
.loop2:
	ld	(hl), a
	inc	hl
	call	ioGetC
	call	isSepOrLineEnd
	jr	z, .success
	cp	','
	jr	z, .success
	djnz	.loop2
	; out of space. error.
.error:
	; We need to put the last char we've read back so that gotoNextLine
	; behaves properly.
	call	ioPutBack
	call	unsetZ
	jr	.end
.success:
	call	ioPutBack
	; null-terminate scratchpad
	xor	a
	ld	(hl), a
	ld	hl, scratchpad
.end:
	pop	bc
	ret
.insideQuote:
	; inside quotes, we accept literal whitespaces, but not line ends.
	ld	(hl), a
	inc	hl
	call	ioGetC
	cp	'"'
	jr	z, .loop2	; ending the quote ends the word
	call	isLineEnd
	jr	z, .error	; ending the line without closing the quote,
				; nope.
	djnz	.insideQuote
	; out of space. error.
	jr	.error
.singleQuote:
	; single quote is more straightforward: we have 3 chars and we put them
	; right in scratchpad
	ld	(hl), a
	call	ioGetC
	or	a
	jr	z, .error
	inc	hl
	ld	(hl), a
	call	ioGetC
	cp	0x27		; '
	jr	nz, .error
	inc	hl
	ld	(hl), a
	jr	.loop2

; Reads the next char in I/O. If it's a comma, Set Z and return. If it's not,
; Put the read char back in I/O and unset Z.
readComma:
	call	ioGetC
	cp	','
	ret	z
	call	ioPutBack
	call	unsetZ
	ret

; Read ioGetC until we reach the beginning of next line, skipping comments if
; necessary. This skips all whitespace, \n, \r, comments until we reach the
; first non-comment character. Then, we put it back (ioPutBack) and return.
;
; If gotoNextLine encounters anything else than whitespace, comment or line
; separator, we error out (no putback)

; Sets Z if we reached a new line. Unset if EOF or error.
gotoNextLine:
.loop1:
	; first loop is "strict", that is: we error out on non-whitespace.
	call	ioGetC
	call	isSepOrLineEnd
	ret	nz		; error
	or	a		; cp 0
	jr	z, .eof
	call	isLineEnd
	jr	z, .loop3	; good!
	cp	';'
	jr	z, .loop2	; comment starting, go to "fast lane"
	jr	.loop1
.loop2:
	; second loop is the "comment loop": anything is valid and we just run
	; until EOL.
	call	ioGetC
	or	a		; cp 0
	jr	z, .eof
	cp	'\'		; special case: '\' doesn't count as a line end
				; in a comment.
	jr	z, .loop2
	call	isLineEnd
	jr	z, .loop3
	jr	.loop2
.loop3:
	; Loop 3 happens after we reach our first line sep. This means that we
	; wade through whitespace until we reach a non-whitespace character.
	call	ioGetC
	or	a		; cp 0
	jr	z, .eof
	cp	';'
	jr	z, .loop2	; oh, another comment! go back to loop2!
	call	isSepOrLineEnd
	jr	z, .loop3
	; Non-whitespace. That's our goal! Put it back
	call	ioPutBack
.eof:
	cp	a		; ensure Z
	ret

; Parse line in (HL) and read the next token in BC. The token is written on
; two bytes (B and C). B is a token type (TOK_* constants) and C is an ID
; specific to that token type.
; Advance HL to after the read word.
; If no token matches, TOK_BAD is written to B
tokenize:
	call	readWord
	jr	z, .process	; read successful, process into token.
	; Error. It could be EOL, EOF or scraptchpad size problem
	; Whatever it is, calling gotoNextLine is appropriate. If it's EOL
	; that's obviously what we want to do. If it's EOF, we can check
	; it after. If it's a scratchpad overrun, gotoNextLine handles it.
	call	gotoNextLine
	jr	nz, .error
	or	a		; Are we EOF?
	jr	nz, tokenize	; not EOF? then continue!
	; We're EOF
	ld	b, TOK_EOF
	ret
.process:
	call	isLabel
	jr	z, .label
	call	getInstID
	jr	z, .instr
	call	getDirectiveID
	jr	z, .direc
.error:
	; no match
	ld	b, TOK_BAD
	jr	.end
.instr:
	ld	b, TOK_INSTR
	jr	.end
.direc:
	ld	b, TOK_DIRECTIVE
	jr	.end
.label:
	ld	b, TOK_LABEL
.end:
	ld	c, a
	ret
zasm: little code reorganisation 2019-05-11 11:19:34 +10:00			`; * Consts *`
			`TOK_INSTR .equ 0x01`
			`TOK_DIRECTIVE .equ 0x02`
			`TOK_LABEL .equ 0x03`
zasm: big I/O overhaul Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish. 2019-05-16 21:53:42 +10:00			`TOK_EOF .equ 0xfe ; end of file`
zasm: little code reorganisation 2019-05-11 11:19:34 +10:00			`TOK_BAD .equ 0xff`

zasm: allow for whitespace inside string literals Also, increase scratchpad size. It wasn't big enough for some expressions in shell unit. 2019-05-18 06:44:08 +10:00			`.equ SCRATCHPAD_SIZE 0x40`
zasm: little code reorganisation 2019-05-11 11:19:34 +10:00			`; * Variables *`
			`scratchpad:`
			`.fill SCRATCHPAD_SIZE`

			`; * Code *`

			`; Sets Z is A is ';' or null.`
			`isLineEndOrComment:`
			`cp ';'`
			`ret z`
zasm: big I/O overhaul Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish. 2019-05-16 21:53:42 +10:00			`; continue to isLineEnd`

			`; Sets Z is A is CR, LF, or null.`
			`isLineEnd:`
			`or a ; same as cp 0`
			`ret z`
			`cp 0x0d`
			`ret z`
			`cp 0x0a`
			`ret z`
			`cp '\'`
zasm: little code reorganisation 2019-05-11 11:19:34 +10:00			`ret`

			`; Sets Z is A is ' ' '\t' or ','`
			`isSep:`
			`cp ' '`
			`ret z`
			`cp 0x09`
			`ret`

			`; Sets Z is A is ' ', ',', ';', CR, LF, or null.`
			`isSepOrLineEnd:`
			`call isSep`
			`ret z`
zasm: big I/O overhaul Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish. 2019-05-16 21:53:42 +10:00			`jr isLineEndOrComment`
zasm: little code reorganisation 2019-05-11 11:19:34 +10:00
			`; Checks whether string at (HL) is a label, that is, whether it ends with a ":"`
			`; Sets Z if yes, unset if no.`
			`;`
			`; If it's a label, we change the trailing ':' char with a null char. It's a bit`
			`; dirty, but it's the easiest way to proceed.`
			`isLabel:`
			`push hl`
			`ld a, ':'`
zasm: remove JUMP_ prefixes They serve no purpose and make the code less flexible. 2019-05-17 23:50:11 +10:00			`call findchar`
zasm: little code reorganisation 2019-05-11 11:19:34 +10:00			`ld a, (hl)`
			`cp ':'`
			`jr nz, .nomatch`
			`; We also have to check that it's our last char.`
			`inc hl`
			`ld a, (hl)`
			`or a ; cp 0`
			`jr nz, .nomatch ; not a null char following the :. no match.`
			`; We have a match!`
			`; Remove trailing ':'`
			`xor a ; Z is set`
zasm: don't match prefixes in symFind Only match when full names match. 2019-05-18 03:14:16 +10:00			`dec hl`
zasm: little code reorganisation 2019-05-11 11:19:34 +10:00			`ld (hl), a`
			`jr .end`
			`.nomatch:`
zasm: remove JUMP_ prefixes They serve no purpose and make the code less flexible. 2019-05-17 23:50:11 +10:00			`call unsetZ`
zasm: little code reorganisation 2019-05-11 11:19:34 +10:00			`.end:`
			`pop hl`
			`ret`

zasm: big I/O overhaul Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish. 2019-05-16 21:53:42 +10:00			`; Read ioGetC until a word starts, then read ioGetC as long as there is no`
			`; separator and put that contents in (scratchpad), null terminated, for a`
			`; maximum of SCRATCHPAD_SIZE-1 characters.`
			`; If EOL (\n, \r or comment) or EOF is hit before we could read a word, we stop`
			`; right there. If scratchpad is not big enough, we stop right there and error.`
			`; HL points to scratchpad`
			`; Sets Z if a word could be read, unsets if not.`
zasm: little code reorganisation 2019-05-11 11:19:34 +10:00			`readWord:`
			`push bc`
zasm: big I/O overhaul Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish. 2019-05-16 21:53:42 +10:00			`; Get to word`
			`.loop1:`
			`call ioGetC`
			`call isLineEndOrComment`
			`jr z, .error`
			`call isSep`
			`jr nz, .read`
			`jr .loop1`
			`.read:`
			`ld hl, scratchpad`
zasm: little code reorganisation 2019-05-11 11:19:34 +10:00			`ld b, SCRATCHPAD_SIZE-1`
zasm: big I/O overhaul Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish. 2019-05-16 21:53:42 +10:00			`; A contains the first letter to read`
zasm: allow single quote to contain whitespace 2019-05-18 07:22:16 +10:00			`; Are we opening a double quote?`
zasm: allow for whitespace inside string literals Also, increase scratchpad size. It wasn't big enough for some expressions in shell unit. 2019-05-18 06:44:08 +10:00			`cp '"'`
			`jr z, .insideQuote`
zasm: allow single quote to contain whitespace 2019-05-18 07:22:16 +10:00			`; Are we opening a single quote?`
			`cp 0x27 ; '`
			`jr z, .singleQuote`
zasm: big I/O overhaul Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish. 2019-05-16 21:53:42 +10:00			`.loop2:`
			`ld (hl), a`
			`inc hl`
			`call ioGetC`
zasm: little code reorganisation 2019-05-11 11:19:34 +10:00			`call isSepOrLineEnd`
			`jr z, .success`
zasm: improve comma processing We don't treat "," exactly as a whitespace anymore. We have specific processing for it. 2019-05-18 04:34:38 +10:00			`cp ','`
			`jr z, .success`
zasm: big I/O overhaul Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish. 2019-05-16 21:53:42 +10:00			`djnz .loop2`
			`; out of space. error.`
			`.error:`
			`; We need to put the last char we've read back so that gotoNextLine`
			`; behaves properly.`
			`call ioPutBack`
zasm: remove JUMP_ prefixes They serve no purpose and make the code less flexible. 2019-05-17 23:50:11 +10:00			`call unsetZ`
zasm: big I/O overhaul Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish. 2019-05-16 21:53:42 +10:00			`jr .end`
zasm: little code reorganisation 2019-05-11 11:19:34 +10:00			`.success:`
zasm: big I/O overhaul Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish. 2019-05-16 21:53:42 +10:00			`call ioPutBack`
			`; null-terminate scratchpad`
zasm: little code reorganisation 2019-05-11 11:19:34 +10:00			`xor a`
zasm: big I/O overhaul Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish. 2019-05-16 21:53:42 +10:00			`ld (hl), a`
			`ld hl, scratchpad`
zasm: little code reorganisation 2019-05-11 11:19:34 +10:00			`.end:`
			`pop bc`
			`ret`
zasm: allow for whitespace inside string literals Also, increase scratchpad size. It wasn't big enough for some expressions in shell unit. 2019-05-18 06:44:08 +10:00			`.insideQuote:`
			`; inside quotes, we accept literal whitespaces, but not line ends.`
			`ld (hl), a`
			`inc hl`
			`call ioGetC`
			`cp '"'`
			`jr z, .loop2 ; ending the quote ends the word`
			`call isLineEnd`
			`jr z, .error ; ending the line without closing the quote,`
			`; nope.`
			`djnz .insideQuote`
			`; out of space. error.`
			`jr .error`
zasm: allow single quote to contain whitespace 2019-05-18 07:22:16 +10:00			`.singleQuote:`
			`; single quote is more straightforward: we have 3 chars and we put them`
			`; right in scratchpad`
			`ld (hl), a`
			`call ioGetC`
			`or a`
			`jr z, .error`
			`inc hl`
			`ld (hl), a`
			`call ioGetC`
			`cp 0x27 ; '`
			`jr nz, .error`
			`inc hl`
			`ld (hl), a`
			`jr .loop2`
zasm: little code reorganisation 2019-05-11 11:19:34 +10:00
zasm: improve comma processing We don't treat "," exactly as a whitespace anymore. We have specific processing for it. 2019-05-18 04:34:38 +10:00			`; Reads the next char in I/O. If it's a comma, Set Z and return. If it's not,`
			`; Put the read char back in I/O and unset Z.`
			`readComma:`
			`call ioGetC`
			`cp ','`
			`ret z`
			`call ioPutBack`
			`call unsetZ`
			`ret`

zasm: big I/O overhaul Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish. 2019-05-16 21:53:42 +10:00			`; Read ioGetC until we reach the beginning of next line, skipping comments if`
			`; necessary. This skips all whitespace, \n, \r, comments until we reach the`
			`; first non-comment character. Then, we put it back (ioPutBack) and return.`
			`;`
			`; If gotoNextLine encounters anything else than whitespace, comment or line`
			`; separator, we error out (no putback)`

			`; Sets Z if we reached a new line. Unset if EOF or error.`
			`gotoNextLine:`
			`.loop1:`
			`; first loop is "strict", that is: we error out on non-whitespace.`
			`call ioGetC`
			`call isSepOrLineEnd`
			`ret nz ; error`
			`or a ; cp 0`
			`jr z, .eof`
			`call isLineEnd`
			`jr z, .loop3 ; good!`
			`cp ';'`
			`jr z, .loop2 ; comment starting, go to "fast lane"`
			`jr .loop1`
			`.loop2:`
			`; second loop is the "comment loop": anything is valid and we just run`
			`; until EOL.`
			`call ioGetC`
			`or a ; cp 0`
			`jr z, .eof`
			`cp '\' ; special case: '\' doesn't count as a line end`
			`; in a comment.`
			`jr z, .loop2`
			`call isLineEnd`
			`jr z, .loop3`
			`jr .loop2`
			`.loop3:`
			`; Loop 3 happens after we reach our first line sep. This means that we`
			`; wade through whitespace until we reach a non-whitespace character.`
			`call ioGetC`
			`or a ; cp 0`
			`jr z, .eof`
			`cp ';'`
			`jr z, .loop2 ; oh, another comment! go back to loop2!`
			`call isSepOrLineEnd`
			`jr z, .loop3`
			`; Non-whitespace. That's our goal! Put it back`
			`call ioPutBack`
			`.eof:`
			`cp a ; ensure Z`
zasm: little code reorganisation 2019-05-11 11:19:34 +10:00			`ret`

			`; Parse line in (HL) and read the next token in BC. The token is written on`
			`; two bytes (B and C). B is a token type (TOK_* constants) and C is an ID`
			`; specific to that token type.`
			`; Advance HL to after the read word.`
			`; If no token matches, TOK_BAD is written to B`
			`tokenize:`
			`call readWord`
zasm: big I/O overhaul Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish. 2019-05-16 21:53:42 +10:00			`jr z, .process ; read successful, process into token.`
			`; Error. It could be EOL, EOF or scraptchpad size problem`
			`; Whatever it is, calling gotoNextLine is appropriate. If it's EOL`
			`; that's obviously what we want to do. If it's EOF, we can check`
			`; it after. If it's a scratchpad overrun, gotoNextLine handles it.`
			`call gotoNextLine`
			`jr nz, .error`
			`or a ; Are we EOF?`
			`jr nz, tokenize ; not EOF? then continue!`
			`; We're EOF`
			`ld b, TOK_EOF`
			`ret`
			`.process:`
zasm: little code reorganisation 2019-05-11 11:19:34 +10:00			`call isLabel`
			`jr z, .label`
			`call getInstID`
			`jr z, .instr`
			`call getDirectiveID`
			`jr z, .direc`
zasm: big I/O overhaul Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish. 2019-05-16 21:53:42 +10:00			`.error:`
zasm: little code reorganisation 2019-05-11 11:19:34 +10:00			`; no match`
			`ld b, TOK_BAD`
			`jr .end`
			`.instr:`
			`ld b, TOK_INSTR`
			`jr .end`
			`.direc:`
			`ld b, TOK_DIRECTIVE`
			`jr .end`
			`.label:`
			`ld b, TOK_LABEL`
			`.end:`
			`ld c, a`
			`ret`