2019-04-21 23:25:52 +10:00
|
|
|
; tok
|
|
|
|
;
|
|
|
|
; Tokenizes an ASM source file into 1, 2 or 3-sized structures.
|
|
|
|
;
|
|
|
|
; *** Requirements ***
|
|
|
|
; JUMP_UPCASE
|
|
|
|
|
2019-05-01 11:13:37 +10:00
|
|
|
; *** Consts ***
|
|
|
|
TOK_INSTR .equ 0x01
|
2019-05-01 11:27:44 +10:00
|
|
|
TOK_DIRECTIVE .equ 0x02
|
2019-05-01 11:13:37 +10:00
|
|
|
TOK_BAD .equ 0xff
|
|
|
|
|
2019-04-21 23:25:52 +10:00
|
|
|
; *** Code ***
|
2019-05-01 11:27:44 +10:00
|
|
|
; Parse line in (HL) and read the next token in (DE). The token is written on
|
|
|
|
; two bytes. The first byte is a token type (TOK_* constants) and the second
|
|
|
|
; byte is an ID specific to that token type.
|
|
|
|
; If no token matches, TOK_BAD is written to (DE)
|
2019-04-21 23:25:52 +10:00
|
|
|
tokenize:
|
|
|
|
xor a
|
2019-05-01 07:04:42 +10:00
|
|
|
ld (de), a
|
2019-05-01 06:08:21 +10:00
|
|
|
call toWord
|
2019-04-21 23:25:52 +10:00
|
|
|
ld a, 4
|
|
|
|
call readWord
|
2019-05-01 10:25:38 +10:00
|
|
|
ex hl, de
|
|
|
|
call getInstID
|
2019-05-01 11:27:44 +10:00
|
|
|
jr z, .instr
|
|
|
|
call getDirectiveID
|
|
|
|
jr z, .direc
|
2019-05-01 11:13:37 +10:00
|
|
|
; no match
|
2019-05-01 11:27:44 +10:00
|
|
|
ex hl, de ; swap it back
|
2019-05-01 11:13:37 +10:00
|
|
|
ld a, TOK_BAD
|
2019-05-01 11:27:44 +10:00
|
|
|
ld (de), a
|
|
|
|
ret
|
|
|
|
.instr:
|
|
|
|
ex af, af'
|
|
|
|
ld a, TOK_INSTR
|
|
|
|
jr .end
|
|
|
|
.direc:
|
|
|
|
ex af, af'
|
|
|
|
ld a, TOK_DIRECTIVE
|
|
|
|
jr .end
|
|
|
|
.end:
|
|
|
|
ex hl, de ; swap it back
|
|
|
|
ld (de), a
|
|
|
|
ex af, af'
|
|
|
|
inc de
|
2019-05-01 10:25:38 +10:00
|
|
|
ld (de), a
|
2019-05-01 07:04:42 +10:00
|
|
|
ret
|
|
|
|
|
2019-04-21 23:25:52 +10:00
|
|
|
; Sets Z is A is ';', CR, LF, or null.
|
2019-05-01 05:51:39 +10:00
|
|
|
isLineEndOrComment:
|
2019-04-21 23:25:52 +10:00
|
|
|
cp ';'
|
|
|
|
ret z
|
2019-05-01 05:51:39 +10:00
|
|
|
; Continues onto isLineEnd...
|
|
|
|
|
|
|
|
; Sets Z is A is CR, LF, or null.
|
|
|
|
isLineEnd:
|
|
|
|
or a ; same as cp 0
|
2019-04-21 23:25:52 +10:00
|
|
|
ret z
|
|
|
|
cp 0x0d
|
|
|
|
ret z
|
|
|
|
cp 0x0a
|
|
|
|
ret
|
|
|
|
|
2019-05-01 05:51:39 +10:00
|
|
|
; Sets Z is A is ' ' '\t' or ','
|
2019-04-21 23:25:52 +10:00
|
|
|
isSep:
|
|
|
|
cp ' '
|
|
|
|
ret z
|
2019-05-01 05:51:39 +10:00
|
|
|
cp 0x09
|
|
|
|
ret z
|
2019-04-21 23:25:52 +10:00
|
|
|
cp ','
|
|
|
|
ret
|
|
|
|
|
|
|
|
; Sets Z is A is ' ', ',', ';', CR, LF, or null.
|
|
|
|
isSepOrLineEnd:
|
|
|
|
call isSep
|
|
|
|
ret z
|
2019-05-01 05:51:39 +10:00
|
|
|
call isLineEndOrComment
|
2019-04-21 23:25:52 +10:00
|
|
|
ret
|
|
|
|
|
|
|
|
; read word in (HL) and put it in (DE), null terminated, for a maximum of A
|
|
|
|
; characters. As a result, A is the read length. HL is advanced to the next
|
|
|
|
; separator char.
|
|
|
|
readWord:
|
|
|
|
push bc
|
2019-05-01 10:25:38 +10:00
|
|
|
push de
|
2019-04-21 23:25:52 +10:00
|
|
|
ld b, a
|
|
|
|
.loop:
|
|
|
|
ld a, (hl)
|
|
|
|
call isSepOrLineEnd
|
|
|
|
jr z, .success
|
|
|
|
call JUMP_UPCASE
|
|
|
|
ld (de), a
|
|
|
|
inc hl
|
|
|
|
inc de
|
|
|
|
djnz .loop
|
|
|
|
.success:
|
|
|
|
xor a
|
|
|
|
ld (de), a
|
|
|
|
ld a, 4
|
|
|
|
sub a, b
|
|
|
|
jr .end
|
|
|
|
.error:
|
|
|
|
xor a
|
|
|
|
ld (de), a
|
|
|
|
.end:
|
2019-05-01 10:25:38 +10:00
|
|
|
pop de
|
2019-04-21 23:25:52 +10:00
|
|
|
pop bc
|
|
|
|
ret
|
|
|
|
|
|
|
|
; (HL) being a string, advance it to the next non-sep character.
|
|
|
|
; Set Z if we could do it before the line ended, reset Z if we couldn't.
|
|
|
|
toWord:
|
|
|
|
.loop:
|
|
|
|
ld a, (hl)
|
2019-05-01 05:51:39 +10:00
|
|
|
call isLineEndOrComment
|
2019-04-21 23:25:52 +10:00
|
|
|
jr z, .error
|
|
|
|
call isSep
|
|
|
|
jr nz, .success
|
|
|
|
inc hl
|
|
|
|
jr .loop
|
|
|
|
.error:
|
2019-05-01 06:08:21 +10:00
|
|
|
call JUMP_UNSETZ
|
2019-04-21 23:25:52 +10:00
|
|
|
ret
|
|
|
|
.success:
|
2019-05-01 06:08:21 +10:00
|
|
|
xor a ; ensure Z
|
2019-04-21 23:25:52 +10:00
|
|
|
ret
|
|
|
|
|
2019-05-01 05:51:39 +10:00
|
|
|
; Advance HL to the beginning of the next line, that is, right after the next
|
|
|
|
; 0x10 or 0x13 or both. If we reach null, we stop and error out.
|
|
|
|
; Sets Z on success, unsets it on error.
|
|
|
|
gotoNextLine:
|
|
|
|
dec hl ; a bit weird, but makes the looping easier
|
|
|
|
.loop:
|
|
|
|
inc hl
|
|
|
|
ld a, (hl)
|
|
|
|
call isLineEnd
|
|
|
|
jr nz, .loop
|
|
|
|
; (HL) is 0x10, 0x13 or 0
|
|
|
|
or a ; is 0?
|
|
|
|
jr z, .error
|
|
|
|
; we might have 0x13 followed by 0x10, let's account for this.
|
|
|
|
; Yes, 0x10 followed by 0x10 will make us skip two lines, but this is of
|
|
|
|
; no real consequence in our context.
|
|
|
|
inc hl
|
|
|
|
ld a, (hl)
|
|
|
|
call isLineEnd
|
|
|
|
jr nz, .success
|
|
|
|
or a ; is 0?
|
|
|
|
jr z, .error
|
|
|
|
; There was another line sep. Skip this char
|
|
|
|
inc hl
|
|
|
|
; Continue on to .success
|
|
|
|
.success:
|
|
|
|
xor a ; ensure Z
|
|
|
|
ret
|
|
|
|
.error:
|
|
|
|
call JUMP_UNSETZ
|
|
|
|
ret
|
|
|
|
|
2019-05-01 06:24:45 +10:00
|
|
|
; Repeatedly calls gotoNextLine until the line in (HL) points to a line that
|
|
|
|
; isn't blank or 100% comment. Sets Z if we reach a line, Unset Z if we reach
|
|
|
|
; EOF
|
|
|
|
gotoNextNotBlankLine:
|
|
|
|
call toWord
|
|
|
|
ret z ; Z set? we have a not-blank line
|
|
|
|
; Z not set? (HL) is at the end of the line or at the beginning of
|
|
|
|
; comments.
|
|
|
|
call gotoNextLine
|
|
|
|
ret nz
|
|
|
|
jr gotoNextNotBlankLine
|
|
|
|
|