1
0
mirror of https://github.com/hsoft/collapseos.git synced 2024-11-28 03:28:07 +11:00
collapseos/apps/zasm/tok.asm

251 lines
5.5 KiB
NASM
Raw Normal View History

2019-05-11 11:19:34 +10:00
; *** Consts ***
2019-05-19 10:31:52 +10:00
.equ TOK_INSTR 0x01
.equ TOK_DIRECTIVE 0x02
.equ TOK_LABEL 0x03
.equ TOK_EOF 0xfe ; end of file
.equ TOK_BAD 0xff
2019-05-11 11:19:34 +10:00
.equ SCRATCHPAD_SIZE 0x40
2019-05-11 11:19:34 +10:00
; *** Variables ***
2019-05-19 10:31:52 +10:00
.equ scratchpad TOK_RAMSTART
.equ TOK_RAMEND scratchpad+SCRATCHPAD_SIZE
2019-05-11 11:19:34 +10:00
; *** Code ***
; Sets Z is A is ';' or null.
isLineEndOrComment:
2019-05-19 10:31:52 +10:00
cp 0x3b ; ';'
2019-05-11 11:19:34 +10:00
ret z
; continue to isLineEnd
; Sets Z is A is CR, LF, or null.
isLineEnd:
or a ; same as cp 0
ret z
cp CR
ret z
cp LF
ret z
cp '\'
2019-05-11 11:19:34 +10:00
ret
; Sets Z is A is ' ', ',', ';', CR, LF, or null.
isSepOrLineEnd:
call isSep
ret z
jr isLineEndOrComment
2019-05-11 11:19:34 +10:00
; Checks whether string at (HL) is a label, that is, whether it ends with a ":"
; Sets Z if yes, unset if no.
;
; If it's a label, we change the trailing ':' char with a null char. It's a bit
; dirty, but it's the easiest way to proceed.
isLabel:
push hl
ld a, ':'
call findchar
2019-05-11 11:19:34 +10:00
ld a, (hl)
cp ':'
jr nz, .nomatch
; We also have to check that it's our last char.
inc hl
ld a, (hl)
or a ; cp 0
jr nz, .nomatch ; not a null char following the :. no match.
; We have a match!
; Remove trailing ':'
xor a ; Z is set
dec hl
2019-05-11 11:19:34 +10:00
ld (hl), a
jr .end
.nomatch:
call unsetZ
2019-05-11 11:19:34 +10:00
.end:
pop hl
ret
; Read I/O as long as it's whitespace. When it's not, stop and return the last
; read char in A
_eatWhitespace:
call ioGetB
call isSep
ret nz
jr _eatWhitespace
; Read ioGetB until a word starts, then read ioGetB as long as there is no
; separator and put that contents in (scratchpad), null terminated, for a
; maximum of SCRATCHPAD_SIZE-1 characters.
; If EOL (\n, \r or comment) or EOF is hit before we could read a word, we stop
; right there. If scratchpad is not big enough, we stop right there and error.
; HL points to scratchpad
; Sets Z if a word could be read, unsets if not.
2019-05-11 11:19:34 +10:00
readWord:
push bc
; Get to word
call _eatWhitespace
call isLineEndOrComment
jr z, .error
ld hl, scratchpad
2019-05-11 11:19:34 +10:00
ld b, SCRATCHPAD_SIZE-1
; A contains the first letter to read
; Are we opening a double quote?
cp '"'
jr z, .insideQuote
; Are we opening a single quote?
cp 0x27 ; '
jr z, .singleQuote
.loop:
ld (hl), a
inc hl
call ioGetB
2019-05-11 11:19:34 +10:00
call isSepOrLineEnd
jr z, .success
cp ','
jr z, .success
djnz .loop
; out of space. error.
.error:
; We need to put the last char we've read back so that gotoNextLine
; behaves properly.
call ioPutBack
call unsetZ
jr .end
2019-05-11 11:19:34 +10:00
.success:
call ioPutBack
; null-terminate scratchpad
2019-05-11 11:19:34 +10:00
xor a
ld (hl), a
ld hl, scratchpad
2019-05-11 11:19:34 +10:00
.end:
pop bc
ret
.insideQuote:
; inside quotes, we accept literal whitespaces, but not line ends.
ld (hl), a
inc hl
call ioGetB
cp '"'
jr z, .loop ; ending the quote ends the word
call isLineEnd
jr z, .error ; ending the line without closing the quote,
; nope.
djnz .insideQuote
; out of space. error.
jr .error
.singleQuote:
; single quote is more straightforward: we have 3 chars and we put them
; right in scratchpad
ld (hl), a
call ioGetB
or a
jr z, .error
inc hl
ld (hl), a
call ioGetB
cp 0x27 ; '
jr nz, .error
inc hl
ld (hl), a
jr .loop
2019-05-11 11:19:34 +10:00
; Reads the next char in I/O. If it's a comma, Set Z and return. If it's not,
; Put the read char back in I/O and unset Z.
readComma:
call _eatWhitespace
cp ','
ret z
call ioPutBack
call unsetZ
ret
; Read ioGetB until we reach the beginning of next line, skipping comments if
; necessary. This skips all whitespace, \n, \r, comments until we reach the
; first non-comment character. Then, we put it back (ioPutBack) and return.
;
; If gotoNextLine encounters anything else than whitespace, comment or line
; separator, we error out (no putback)
; Sets Z if we reached a new line. Unset if EOF or error.
gotoNextLine:
.loop1:
; first loop is "strict", that is: we error out on non-whitespace.
call ioGetB
call isSepOrLineEnd
ret nz ; error
or a ; cp 0
jr z, .eof
call isLineEnd
jr z, .loop3 ; good!
cp 0x3b ; ';'
jr z, .loop2 ; comment starting, go to "fast lane"
jr .loop1
.loop2:
; second loop is the "comment loop": anything is valid and we just run
; until EOL.
call ioGetB
or a ; cp 0
jr z, .eof
cp '\' ; special case: '\' doesn't count as a line end
; in a comment.
jr z, .loop2
call isLineEnd
jr z, .loop3
jr .loop2
.loop3:
; Loop 3 happens after we reach our first line sep. This means that we
; wade through whitespace until we reach a non-whitespace character.
call ioGetB
or a ; cp 0
jr z, .eof
cp 0x3b ; ';'
jr z, .loop2 ; oh, another comment! go back to loop2!
call isSepOrLineEnd
jr z, .loop3
; Non-whitespace. That's our goal! Put it back
call ioPutBack
.eof:
cp a ; ensure Z
2019-05-11 11:19:34 +10:00
ret
; Parse line in (HL) and read the next token in BC. The token is written on
; two bytes (B and C). B is a token type (TOK_* constants) and C is an ID
; specific to that token type.
; Advance HL to after the read word.
; If no token matches, TOK_BAD is written to B
tokenize:
call readWord
jr z, .process ; read successful, process into token.
; Error. It could be EOL, EOF or scraptchpad size problem
; Whatever it is, calling gotoNextLine is appropriate. If it's EOL
; that's obviously what we want to do. If it's EOF, we can check
; it after. If it's a scratchpad overrun, gotoNextLine handles it.
call gotoNextLine
jr nz, .error
or a ; Are we EOF?
jr nz, tokenize ; not EOF? then continue!
; We're EOF
ld b, TOK_EOF
ret
.process:
2019-05-11 11:19:34 +10:00
call isLabel
jr z, .label
call getInstID
jr z, .instr
call getDirectiveID
jr z, .direc
.error:
2019-05-11 11:19:34 +10:00
; no match
ld b, TOK_BAD
jr .end
.instr:
ld b, TOK_INSTR
jr .end
.direc:
ld b, TOK_DIRECTIVE
jr .end
.label:
ld b, TOK_LABEL
.end:
ld c, a
ret