collapseos/apps/zasm/tok.asm

255 lines
5.3 KiB
NASM
Raw Normal View History

2019-05-11 11:19:34 +10:00
; *** Consts ***
TOK_INSTR .equ 0x01
TOK_DIRECTIVE .equ 0x02
TOK_LABEL .equ 0x03
TOK_EOF .equ 0xfe ; end of file
2019-05-11 11:19:34 +10:00
TOK_BAD .equ 0xff
.equ SCRATCHPAD_SIZE 0x40
2019-05-11 11:19:34 +10:00
; *** Variables ***
scratchpad:
.fill SCRATCHPAD_SIZE
; *** Code ***
; Sets Z is A is ';' or null.
isLineEndOrComment:
cp ';'
ret z
; continue to isLineEnd
; Sets Z is A is CR, LF, or null.
isLineEnd:
or a ; same as cp 0
ret z
cp 0x0d
ret z
cp 0x0a
ret z
cp '\'
2019-05-11 11:19:34 +10:00
ret
; Sets Z is A is ' ' '\t' or ','
isSep:
cp ' '
ret z
cp 0x09
ret
; Sets Z is A is ' ', ',', ';', CR, LF, or null.
isSepOrLineEnd:
call isSep
ret z
jr isLineEndOrComment
2019-05-11 11:19:34 +10:00
; Checks whether string at (HL) is a label, that is, whether it ends with a ":"
; Sets Z if yes, unset if no.
;
; If it's a label, we change the trailing ':' char with a null char. It's a bit
; dirty, but it's the easiest way to proceed.
isLabel:
push hl
ld a, ':'
call findchar
2019-05-11 11:19:34 +10:00
ld a, (hl)
cp ':'
jr nz, .nomatch
; We also have to check that it's our last char.
inc hl
ld a, (hl)
or a ; cp 0
jr nz, .nomatch ; not a null char following the :. no match.
; We have a match!
; Remove trailing ':'
xor a ; Z is set
dec hl
2019-05-11 11:19:34 +10:00
ld (hl), a
jr .end
.nomatch:
call unsetZ
2019-05-11 11:19:34 +10:00
.end:
pop hl
ret
; Read ioGetC until a word starts, then read ioGetC as long as there is no
; separator and put that contents in (scratchpad), null terminated, for a
; maximum of SCRATCHPAD_SIZE-1 characters.
; If EOL (\n, \r or comment) or EOF is hit before we could read a word, we stop
; right there. If scratchpad is not big enough, we stop right there and error.
; HL points to scratchpad
; Sets Z if a word could be read, unsets if not.
2019-05-11 11:19:34 +10:00
readWord:
push bc
; Get to word
.loop1:
call ioGetC
call isLineEndOrComment
jr z, .error
call isSep
jr nz, .read
jr .loop1
.read:
ld hl, scratchpad
2019-05-11 11:19:34 +10:00
ld b, SCRATCHPAD_SIZE-1
; A contains the first letter to read
; Are we opening a double quote?
cp '"'
jr z, .insideQuote
; Are we opening a single quote?
cp 0x27 ; '
jr z, .singleQuote
.loop2:
ld (hl), a
inc hl
call ioGetC
2019-05-11 11:19:34 +10:00
call isSepOrLineEnd
jr z, .success
cp ','
jr z, .success
djnz .loop2
; out of space. error.
.error:
; We need to put the last char we've read back so that gotoNextLine
; behaves properly.
call ioPutBack
call unsetZ
jr .end
2019-05-11 11:19:34 +10:00
.success:
call ioPutBack
; null-terminate scratchpad
2019-05-11 11:19:34 +10:00
xor a
ld (hl), a
ld hl, scratchpad
2019-05-11 11:19:34 +10:00
.end:
pop bc
ret
.insideQuote:
; inside quotes, we accept literal whitespaces, but not line ends.
ld (hl), a
inc hl
call ioGetC
cp '"'
jr z, .loop2 ; ending the quote ends the word
call isLineEnd
jr z, .error ; ending the line without closing the quote,
; nope.
djnz .insideQuote
; out of space. error.
jr .error
.singleQuote:
; single quote is more straightforward: we have 3 chars and we put them
; right in scratchpad
ld (hl), a
call ioGetC
or a
jr z, .error
inc hl
ld (hl), a
call ioGetC
cp 0x27 ; '
jr nz, .error
inc hl
ld (hl), a
jr .loop2
2019-05-11 11:19:34 +10:00
; Reads the next char in I/O. If it's a comma, Set Z and return. If it's not,
; Put the read char back in I/O and unset Z.
readComma:
call ioGetC
cp ','
ret z
call ioPutBack
call unsetZ
ret
; Read ioGetC until we reach the beginning of next line, skipping comments if
; necessary. This skips all whitespace, \n, \r, comments until we reach the
; first non-comment character. Then, we put it back (ioPutBack) and return.
;
; If gotoNextLine encounters anything else than whitespace, comment or line
; separator, we error out (no putback)
; Sets Z if we reached a new line. Unset if EOF or error.
gotoNextLine:
.loop1:
; first loop is "strict", that is: we error out on non-whitespace.
call ioGetC
call isSepOrLineEnd
ret nz ; error
or a ; cp 0
jr z, .eof
call isLineEnd
jr z, .loop3 ; good!
cp ';'
jr z, .loop2 ; comment starting, go to "fast lane"
jr .loop1
.loop2:
; second loop is the "comment loop": anything is valid and we just run
; until EOL.
call ioGetC
or a ; cp 0
jr z, .eof
cp '\' ; special case: '\' doesn't count as a line end
; in a comment.
jr z, .loop2
call isLineEnd
jr z, .loop3
jr .loop2
.loop3:
; Loop 3 happens after we reach our first line sep. This means that we
; wade through whitespace until we reach a non-whitespace character.
call ioGetC
or a ; cp 0
jr z, .eof
cp ';'
jr z, .loop2 ; oh, another comment! go back to loop2!
call isSepOrLineEnd
jr z, .loop3
; Non-whitespace. That's our goal! Put it back
call ioPutBack
.eof:
cp a ; ensure Z
2019-05-11 11:19:34 +10:00
ret
; Parse line in (HL) and read the next token in BC. The token is written on
; two bytes (B and C). B is a token type (TOK_* constants) and C is an ID
; specific to that token type.
; Advance HL to after the read word.
; If no token matches, TOK_BAD is written to B
tokenize:
call readWord
jr z, .process ; read successful, process into token.
; Error. It could be EOL, EOF or scraptchpad size problem
; Whatever it is, calling gotoNextLine is appropriate. If it's EOL
; that's obviously what we want to do. If it's EOF, we can check
; it after. If it's a scratchpad overrun, gotoNextLine handles it.
call gotoNextLine
jr nz, .error
or a ; Are we EOF?
jr nz, tokenize ; not EOF? then continue!
; We're EOF
ld b, TOK_EOF
ret
.process:
2019-05-11 11:19:34 +10:00
call isLabel
jr z, .label
call getInstID
jr z, .instr
call getDirectiveID
jr z, .direc
.error:
2019-05-11 11:19:34 +10:00
; no match
ld b, TOK_BAD
jr .end
.instr:
ld b, TOK_INSTR
jr .end
.direc:
ld b, TOK_DIRECTIVE
jr .end
.label:
ld b, TOK_LABEL
.end:
ld c, a
ret