collapseos/apps/zasm/tok.asm

; *** Consts ***
TOK_INSTR	.equ	0x01
TOK_DIRECTIVE	.equ	0x02
TOK_LABEL	.equ	0x03
TOK_EOF		.equ	0xfe	; end of file
TOK_BAD		.equ	0xff

.equ	SCRATCHPAD_SIZE	0x20
; *** Variables ***
scratchpad:
	.fill	SCRATCHPAD_SIZE

; *** Code ***

; Sets Z is A is ';' or null.
isLineEndOrComment:
	cp	';'
	ret	z
	; continue to isLineEnd

; Sets Z is A is CR, LF, or null.
isLineEnd:
	or	a	; same as cp 0
	ret	z
	cp	0x0d
	ret	z
	cp	0x0a
	ret	z
	cp	'\'
	ret

; Sets Z is A is ' ' '\t' or ','
isSep:
	cp	' '
	ret	z
	cp	0x09
	ret	z
	cp	','
	ret

; Sets Z is A is ' ', ',', ';', CR, LF, or null.
isSepOrLineEnd:
	call	isSep
	ret	z
	jr	isLineEndOrComment

; Checks whether string at (HL) is a label, that is, whether it ends with a ":"
; Sets Z if yes, unset if no.
;
; If it's a label, we change the trailing ':' char with a null char. It's a bit
; dirty, but it's the easiest way to proceed.
isLabel:
	push	hl
	ld	a, ':'
	call	JUMP_FINDCHAR
	ld	a, (hl)
	cp	':'
	jr	nz, .nomatch
	; We also have to check that it's our last char.
	inc	hl
	ld	a, (hl)
	or	a		; cp 0
	jr	nz, .nomatch	; not a null char following the :. no match.
	; We have a match!
	; Remove trailing ':'
	xor	a		; Z is set
	ld	(hl), a
	jr	.end
.nomatch:
	call	JUMP_UNSETZ
.end:
	pop	hl
	ret

; Read ioGetC until a word starts, then read ioGetC as long as there is no
; separator and put that contents in (scratchpad), null terminated, for a
; maximum of SCRATCHPAD_SIZE-1 characters.
; If EOL (\n, \r or comment) or EOF is hit before we could read a word, we stop
; right there. If scratchpad is not big enough, we stop right there and error.
; HL points to scratchpad
; Sets Z if a word could be read, unsets if not.
readWord:
	push	bc
	; Get to word
.loop1:
	call	ioGetC
	call	isLineEndOrComment
	jr	z, .error
	call	isSep
	jr	nz, .read
	jr	.loop1
.read:
	ld	hl, scratchpad
	ld	b, SCRATCHPAD_SIZE-1
	; A contains the first letter to read
.loop2:
	ld	(hl), a
	inc	hl
	call	ioGetC
	call	isSepOrLineEnd
	jr	z, .success
	djnz	.loop2
	; out of space. error.
.error:
	; We need to put the last char we've read back so that gotoNextLine
	; behaves properly.
	call	ioPutBack
	call	JUMP_UNSETZ
	jr	.end
.success:
	call	ioPutBack
	; null-terminate scratchpad
	xor	a
	ld	(hl), a
	ld	hl, scratchpad
.end:
	pop	bc
	ret

; Read ioGetC until we reach the beginning of next line, skipping comments if
; necessary. This skips all whitespace, \n, \r, comments until we reach the
; first non-comment character. Then, we put it back (ioPutBack) and return.
;
; If gotoNextLine encounters anything else than whitespace, comment or line
; separator, we error out (no putback)

; Sets Z if we reached a new line. Unset if EOF or error.
gotoNextLine:
.loop1:
	; first loop is "strict", that is: we error out on non-whitespace.
	call	ioGetC
	call	isSepOrLineEnd
	ret	nz		; error
	or	a		; cp 0
	jr	z, .eof
	call	isLineEnd
	jr	z, .loop3	; good!
	cp	';'
	jr	z, .loop2	; comment starting, go to "fast lane"
	jr	.loop1
.loop2:
	; second loop is the "comment loop": anything is valid and we just run
	; until EOL.
	call	ioGetC
	or	a		; cp 0
	jr	z, .eof
	cp	'\'		; special case: '\' doesn't count as a line end
				; in a comment.
	jr	z, .loop2
	call	isLineEnd
	jr	z, .loop3
	jr	.loop2
.loop3:
	; Loop 3 happens after we reach our first line sep. This means that we
	; wade through whitespace until we reach a non-whitespace character.
	call	ioGetC
	or	a		; cp 0
	jr	z, .eof
	cp	';'
	jr	z, .loop2	; oh, another comment! go back to loop2!
	call	isSepOrLineEnd
	jr	z, .loop3
	; Non-whitespace. That's our goal! Put it back
	call	ioPutBack
.eof:
	cp	a		; ensure Z
	ret

; Parse line in (HL) and read the next token in BC. The token is written on
; two bytes (B and C). B is a token type (TOK_* constants) and C is an ID
; specific to that token type.
; Advance HL to after the read word.
; If no token matches, TOK_BAD is written to B
tokenize:
	call	readWord
	jr	z, .process	; read successful, process into token.
	; Error. It could be EOL, EOF or scraptchpad size problem
	; Whatever it is, calling gotoNextLine is appropriate. If it's EOL
	; that's obviously what we want to do. If it's EOF, we can check
	; it after. If it's a scratchpad overrun, gotoNextLine handles it.
	call	gotoNextLine
	jr	nz, .error
	or	a		; Are we EOF?
	jr	nz, tokenize	; not EOF? then continue!
	; We're EOF
	ld	b, TOK_EOF
	ret
.process:
	call	isLabel
	jr	z, .label
	call	getInstID
	jr	z, .instr
	call	getDirectiveID
	jr	z, .direc
.error:
	; no match
	ld	b, TOK_BAD
	jr	.end
.instr:
	ld	b, TOK_INSTR
	jr	.end
.direc:
	ld	b, TOK_DIRECTIVE
	jr	.end
.label:
	ld	b, TOK_LABEL
.end:
	ld	c, a
	ret