collapseos/apps/zasm/tok.asm

; tok
;
; Tokenizes an ASM source file into 1, 2 or 3-sized structures.
;
; *** Requirements ***
; JUMP_UPCASE

; *** Consts ***
TOK_INSTR	.equ	0x01
TOK_DIRECTIVE	.equ	0x02
TOK_BAD		.equ	0xff

; *** Code ***
; Parse line in (HL) and read the next token in (DE). The token is written on
; two bytes. The first byte is a token type (TOK_* constants) and the second
; byte is an ID specific to that token type.
; If no token matches, TOK_BAD is written to (DE)
tokenize:
	xor	a
	ld	(de), a
	call	toWord
	ld	a, 4
	call	readWord
	ex	hl, de
	call	getInstID
	jr	z, .instr
	call	getDirectiveID
	jr	z, .direc
	; no match
	ex	hl, de		; swap it back
	ld	a, TOK_BAD
	ld	(de), a
	ret
.instr:
	ex	af, af'
	ld	a, TOK_INSTR
	jr	.end
.direc:
	ex	af, af'
	ld	a, TOK_DIRECTIVE
	jr	.end
.end:
	ex	hl, de		; swap it back
	ld	(de), a
	ex	af, af'
	inc	de
	ld	(de), a
	ret

; Sets Z is A is ';', CR, LF, or null.
isLineEndOrComment:
	cp	';'
	ret	z
	; Continues onto isLineEnd...

; Sets Z is A is CR, LF, or null.
isLineEnd:
	or	a	; same as cp 0
	ret	z
	cp	0x0d
	ret	z
	cp	0x0a
	ret

; Sets Z is A is ' ' '\t' or ','
isSep:
	cp	' '
	ret	z
	cp	0x09
	ret	z
	cp	','
	ret

; Sets Z is A is ' ', ',', ';', CR, LF, or null.
isSepOrLineEnd:
	call	isSep
	ret	z
	call	isLineEndOrComment
	ret

; read word in (HL) and put it in (DE), null terminated, for a maximum of A
; characters. As a result, A is the read length. HL is advanced to the next
; separator char.
readWord:
	push	bc
	push	de
	ld	b, a
.loop:
	ld	a, (hl)
	call	isSepOrLineEnd
	jr	z, .success
	call	JUMP_UPCASE
	ld	(de), a
	inc	hl
	inc	de
	djnz	.loop
.success:
	xor	a
	ld	(de), a
	ld	a, 4
	sub	a, b
	jr	.end
.error:
	xor	a
	ld	(de), a
.end:
	pop	de
	pop	bc
	ret

; (HL) being a string, advance it to the next non-sep character.
; Set Z if we could do it before the line ended, reset Z if we couldn't.
toWord:
.loop:
	ld	a, (hl)
	call	isLineEndOrComment
	jr	z, .error
	call	isSep
	jr	nz, .success
	inc	hl
	jr	.loop
.error:
	call	JUMP_UNSETZ
	ret
.success:
	xor	a	; ensure Z
	ret

; Advance HL to the beginning of the next line, that is, right after the next
; 0x10 or 0x13 or both. If we reach null, we stop and error out.
; Sets Z on success, unsets it on error.
gotoNextLine:
	dec	hl	; a bit weird, but makes the looping easier
.loop:
	inc	hl
	ld	a, (hl)
	call	isLineEnd
	jr	nz, .loop
	; (HL) is 0x10, 0x13 or 0
	or	a	; is 0?
	jr	z, .error
	; we might have 0x13 followed by 0x10, let's account for this.
	; Yes, 0x10 followed by 0x10 will make us skip two lines, but this is of
	; no real consequence in our context.
	inc	hl
	ld	a, (hl)
	call	isLineEnd
	jr	nz, .success
	or	a	; is 0?
	jr	z, .error
	; There was another line sep. Skip this char
	inc	hl
	; Continue on to .success
.success:
	xor	a	; ensure Z
	ret
.error:
	call	JUMP_UNSETZ
	ret

; Repeatedly calls gotoNextLine until the line in (HL) points to a line that
; isn't blank or 100% comment. Sets Z if we reach a line, Unset Z if we reach
; EOF
gotoNextNotBlankLine:
	call	toWord
	ret	z	; Z set? we have a not-blank line
	; Z not set? (HL) is at the end of the line or at the beginning of
	; comments.
	call	gotoNextLine
	ret	nz
	jr	gotoNextNotBlankLine
zasm: extract tok.asm from zasm.asm 2019-04-21 23:25:52 +10:00			`; tok`
			`;`
			`; Tokenizes an ASM source file into 1, 2 or 3-sized structures.`
			`;`
			`; * Requirements *`
			`; JUMP_UPCASE`

zasm: code consolidation 2019-05-01 11:13:37 +10:00			`; * Consts *`
			`TOK_INSTR .equ 0x01`
zasm: creep in the notion of directive 2019-05-01 11:27:44 +10:00			`TOK_DIRECTIVE .equ 0x02`
zasm: code consolidation 2019-05-01 11:13:37 +10:00			`TOK_BAD .equ 0xff`

zasm: extract tok.asm from zasm.asm 2019-04-21 23:25:52 +10:00			`; * Code *`
zasm: creep in the notion of directive 2019-05-01 11:27:44 +10:00			`; Parse line in (HL) and read the next token in (DE). The token is written on`
			`; two bytes. The first byte is a token type (TOK_* constants) and the second`
			`; byte is an ID specific to that token type.`
			`; If no token matches, TOK_BAD is written to (DE)`
zasm: extract tok.asm from zasm.asm 2019-04-21 23:25:52 +10:00			`tokenize:`
			`xor a`
zasm: move token variables from tok.asm into main.asm 2019-05-01 07:04:42 +10:00			`ld (de), a`
zasm: allow leading whitespace in parsed lines 2019-05-01 06:08:21 +10:00			`call toWord`
zasm: extract tok.asm from zasm.asm 2019-04-21 23:25:52 +10:00			`ld a, 4`
			`call readWord`
zasm: introduce the concept of instruction ID It will make tokenization cleaner and it also makes the instruction table significantly more compact. 2019-05-01 10:25:38 +10:00			`ex hl, de`
			`call getInstID`
zasm: creep in the notion of directive 2019-05-01 11:27:44 +10:00			`jr z, .instr`
			`call getDirectiveID`
			`jr z, .direc`
zasm: code consolidation 2019-05-01 11:13:37 +10:00			`; no match`
zasm: creep in the notion of directive 2019-05-01 11:27:44 +10:00			`ex hl, de ; swap it back`
zasm: code consolidation 2019-05-01 11:13:37 +10:00			`ld a, TOK_BAD`
zasm: creep in the notion of directive 2019-05-01 11:27:44 +10:00			`ld (de), a`
			`ret`
			`.instr:`
			`ex af, af'`
			`ld a, TOK_INSTR`
			`jr .end`
			`.direc:`
			`ex af, af'`
			`ld a, TOK_DIRECTIVE`
			`jr .end`
			`.end:`
			`ex hl, de ; swap it back`
			`ld (de), a`
			`ex af, af'`
			`inc de`
zasm: introduce the concept of instruction ID It will make tokenization cleaner and it also makes the instruction table significantly more compact. 2019-05-01 10:25:38 +10:00			`ld (de), a`
zasm: move token variables from tok.asm into main.asm 2019-05-01 07:04:42 +10:00			`ret`

zasm: extract tok.asm from zasm.asm 2019-04-21 23:25:52 +10:00			`; Sets Z is A is ';', CR, LF, or null.`
zasm: assemble multiple lines at once 2019-05-01 05:51:39 +10:00			`isLineEndOrComment:`
zasm: extract tok.asm from zasm.asm 2019-04-21 23:25:52 +10:00			`cp ';'`
			`ret z`
zasm: assemble multiple lines at once 2019-05-01 05:51:39 +10:00			`; Continues onto isLineEnd...`

			`; Sets Z is A is CR, LF, or null.`
			`isLineEnd:`
			`or a ; same as cp 0`
zasm: extract tok.asm from zasm.asm 2019-04-21 23:25:52 +10:00			`ret z`
			`cp 0x0d`
			`ret z`
			`cp 0x0a`
			`ret`

zasm: assemble multiple lines at once 2019-05-01 05:51:39 +10:00			`; Sets Z is A is ' ' '\t' or ','`
zasm: extract tok.asm from zasm.asm 2019-04-21 23:25:52 +10:00			`isSep:`
			`cp ' '`
			`ret z`
zasm: assemble multiple lines at once 2019-05-01 05:51:39 +10:00			`cp 0x09`
			`ret z`
zasm: extract tok.asm from zasm.asm 2019-04-21 23:25:52 +10:00			`cp ','`
			`ret`

			`; Sets Z is A is ' ', ',', ';', CR, LF, or null.`
			`isSepOrLineEnd:`
			`call isSep`
			`ret z`
zasm: assemble multiple lines at once 2019-05-01 05:51:39 +10:00			`call isLineEndOrComment`
zasm: extract tok.asm from zasm.asm 2019-04-21 23:25:52 +10:00			`ret`

			`; read word in (HL) and put it in (DE), null terminated, for a maximum of A`
			`; characters. As a result, A is the read length. HL is advanced to the next`
			`; separator char.`
			`readWord:`
			`push bc`
zasm: introduce the concept of instruction ID It will make tokenization cleaner and it also makes the instruction table significantly more compact. 2019-05-01 10:25:38 +10:00			`push de`
zasm: extract tok.asm from zasm.asm 2019-04-21 23:25:52 +10:00			`ld b, a`
			`.loop:`
			`ld a, (hl)`
			`call isSepOrLineEnd`
			`jr z, .success`
			`call JUMP_UPCASE`
			`ld (de), a`
			`inc hl`
			`inc de`
			`djnz .loop`
			`.success:`
			`xor a`
			`ld (de), a`
			`ld a, 4`
			`sub a, b`
			`jr .end`
			`.error:`
			`xor a`
			`ld (de), a`
			`.end:`
zasm: introduce the concept of instruction ID It will make tokenization cleaner and it also makes the instruction table significantly more compact. 2019-05-01 10:25:38 +10:00			`pop de`
zasm: extract tok.asm from zasm.asm 2019-04-21 23:25:52 +10:00			`pop bc`
			`ret`

			`; (HL) being a string, advance it to the next non-sep character.`
			`; Set Z if we could do it before the line ended, reset Z if we couldn't.`
			`toWord:`
			`.loop:`
			`ld a, (hl)`
zasm: assemble multiple lines at once 2019-05-01 05:51:39 +10:00			`call isLineEndOrComment`
zasm: extract tok.asm from zasm.asm 2019-04-21 23:25:52 +10:00			`jr z, .error`
			`call isSep`
			`jr nz, .success`
			`inc hl`
			`jr .loop`
			`.error:`
zasm: allow leading whitespace in parsed lines 2019-05-01 06:08:21 +10:00			`call JUMP_UNSETZ`
zasm: extract tok.asm from zasm.asm 2019-04-21 23:25:52 +10:00			`ret`
			`.success:`
zasm: allow leading whitespace in parsed lines 2019-05-01 06:08:21 +10:00			`xor a ; ensure Z`
zasm: extract tok.asm from zasm.asm 2019-04-21 23:25:52 +10:00			`ret`

zasm: assemble multiple lines at once 2019-05-01 05:51:39 +10:00			`; Advance HL to the beginning of the next line, that is, right after the next`
			`; 0x10 or 0x13 or both. If we reach null, we stop and error out.`
			`; Sets Z on success, unsets it on error.`
			`gotoNextLine:`
			`dec hl ; a bit weird, but makes the looping easier`
			`.loop:`
			`inc hl`
			`ld a, (hl)`
			`call isLineEnd`
			`jr nz, .loop`
			`; (HL) is 0x10, 0x13 or 0`
			`or a ; is 0?`
			`jr z, .error`
			`; we might have 0x13 followed by 0x10, let's account for this.`
			`; Yes, 0x10 followed by 0x10 will make us skip two lines, but this is of`
			`; no real consequence in our context.`
			`inc hl`
			`ld a, (hl)`
			`call isLineEnd`
			`jr nz, .success`
			`or a ; is 0?`
			`jr z, .error`
			`; There was another line sep. Skip this char`
			`inc hl`
			`; Continue on to .success`
			`.success:`
			`xor a ; ensure Z`
			`ret`
			`.error:`
			`call JUMP_UNSETZ`
			`ret`

zasm: allow blank lines in asm source code 2019-05-01 06:24:45 +10:00			`; Repeatedly calls gotoNextLine until the line in (HL) points to a line that`
			`; isn't blank or 100% comment. Sets Z if we reach a line, Unset Z if we reach`
			`; EOF`
			`gotoNextNotBlankLine:`
			`call toWord`
			`ret z ; Z set? we have a not-blank line`
			`; Z not set? (HL) is at the end of the line or at the beginning of`
			`; comments.`
			`call gotoNextLine`
			`ret nz`
			`jr gotoNextNotBlankLine`