; tok
;
; Tokenizes an ASM source file into 1, 2 or 3-sized structures.
;
; *** Requirements ***
; JUMP_UPCASE

; *** Consts ***
TOK_INSTR	.equ	0x01
TOK_DIRECTIVE	.equ	0x02
TOK_BAD		.equ	0xff

; *** Code ***
; Parse line in (HL) and read the next token in BC. The token is written on
; two bytes (B and C). B is a token type (TOK_* constants) and C is an ID
; specific to that token type.
; Advance HL to after the read word.
; If no token matches, TOK_BAD is written to B
tokenize:
	push	de
	call	toWord
	ld	a, 4
	ld	de, scratchpad
	call	readWord
	push	hl		; Save advanced HL for later
	ld	hl, scratchpad
	call	getInstID
	jr	z, .instr
	call	getDirectiveID
	jr	z, .direc
	; no match
	ld	b, TOK_BAD
	jr	.end
.instr:
	ld	b, TOK_INSTR
	jr	.end
.direc:
	ld	b, TOK_DIRECTIVE
.end:
	ld	c, a
	pop	hl
	pop	de
	ret

; Sets Z is A is ';', CR, LF, or null.
isLineEndOrComment:
	cp	';'
	ret	z
	; Continues onto isLineEnd...

; Sets Z is A is CR, LF, or null.
isLineEnd:
	or	a	; same as cp 0
	ret	z
	cp	0x0d
	ret	z
	cp	0x0a
	ret

; Sets Z is A is ' ' '\t' or ','
isSep:
	cp	' '
	ret	z
	cp	0x09
	ret	z
	cp	','
	ret

; Sets Z is A is ' ', ',', ';', CR, LF, or null.
isSepOrLineEnd:
	call	isSep
	ret	z
	call	isLineEndOrComment
	ret

; read word in (HL) and put it in (DE), null terminated, for a maximum of A
; characters. As a result, A is the read length. HL is advanced to the next
; separator char.
readWord:
	push	bc
	push	de
	ld	b, a
.loop:
	ld	a, (hl)
	call	isSepOrLineEnd
	jr	z, .success
	call	JUMP_UPCASE
	ld	(de), a
	inc	hl
	inc	de
	djnz	.loop
.success:
	xor	a
	ld	(de), a
	ld	a, 4
	sub	a, b
.end:
	pop	de
	pop	bc
	ret

; (HL) being a string, advance it to the next non-sep character.
; Set Z if we could do it before the line ended, reset Z if we couldn't.
toWord:
.loop:
	ld	a, (hl)
	call	isLineEndOrComment
	jr	z, .error
	call	isSep
	jr	nz, .success
	inc	hl
	jr	.loop
.error:
	call	JUMP_UNSETZ
	ret
.success:
	xor	a	; ensure Z
	ret

; Advance HL to the beginning of the next line, that is, right after the next
; 0x10 or 0x13 or both. If we reach null, we stop and error out.
; Sets Z on success, unsets it on error.
gotoNextLine:
	dec	hl	; a bit weird, but makes the looping easier
.loop:
	inc	hl
	ld	a, (hl)
	call	isLineEnd
	jr	nz, .loop
	; (HL) is 0x10, 0x13 or 0
	or	a	; is 0?
	jr	z, .error
	; we might have 0x13 followed by 0x10, let's account for this.
	; Yes, 0x10 followed by 0x10 will make us skip two lines, but this is of
	; no real consequence in our context.
	inc	hl
	ld	a, (hl)
	call	isLineEnd
	jr	nz, .success
	or	a	; is 0?
	jr	z, .error
	; There was another line sep. Skip this char
	inc	hl
	; Continue on to .success
.success:
	xor	a	; ensure Z
	ret
.error:
	call	JUMP_UNSETZ
	ret

; Repeatedly calls gotoNextLine until the line in (HL) points to a line that
; isn't blank or 100% comment. Sets Z if we reach a line, Unset Z if we reach
; EOF
gotoNextNotBlankLine:
	call	toWord
	ret	z	; Z set? we have a not-blank line
	; Z not set? (HL) is at the end of the line or at the beginning of
	; comments.
	call	gotoNextLine
	ret	nz
	jr	gotoNextNotBlankLine