1
0
mirror of https://github.com/hsoft/collapseos.git synced 2024-11-17 09:48:05 +11:00
collapseos/apps/zasm/tok.asm
Virgil Dupras e9244b80ee zasm: big I/O overhaul
Instead of buffering input in memory one line at a time, we go in "just
in time" mode and always read contents directly from I/O, without
buffering.

It forces us to implement a `ioPutback` scheme, but on the other hand it
greatly simplifies cases where multiple tokens are on the same line
(when a label is directly followed by an instruction).

The end result feels much more solid and less hackish.
2019-05-16 07:53:42 -04:00

210 lines
4.5 KiB
NASM

; *** Consts ***
TOK_INSTR .equ 0x01
TOK_DIRECTIVE .equ 0x02
TOK_LABEL .equ 0x03
TOK_EOF .equ 0xfe ; end of file
TOK_BAD .equ 0xff
.equ SCRATCHPAD_SIZE 0x20
; *** Variables ***
scratchpad:
.fill SCRATCHPAD_SIZE
; *** Code ***
; Sets Z is A is ';' or null.
isLineEndOrComment:
cp ';'
ret z
; continue to isLineEnd
; Sets Z is A is CR, LF, or null.
isLineEnd:
or a ; same as cp 0
ret z
cp 0x0d
ret z
cp 0x0a
ret z
cp '\'
ret
; Sets Z is A is ' ' '\t' or ','
isSep:
cp ' '
ret z
cp 0x09
ret z
cp ','
ret
; Sets Z is A is ' ', ',', ';', CR, LF, or null.
isSepOrLineEnd:
call isSep
ret z
jr isLineEndOrComment
; Checks whether string at (HL) is a label, that is, whether it ends with a ":"
; Sets Z if yes, unset if no.
;
; If it's a label, we change the trailing ':' char with a null char. It's a bit
; dirty, but it's the easiest way to proceed.
isLabel:
push hl
ld a, ':'
call JUMP_FINDCHAR
ld a, (hl)
cp ':'
jr nz, .nomatch
; We also have to check that it's our last char.
inc hl
ld a, (hl)
or a ; cp 0
jr nz, .nomatch ; not a null char following the :. no match.
; We have a match!
; Remove trailing ':'
xor a ; Z is set
ld (hl), a
jr .end
.nomatch:
call JUMP_UNSETZ
.end:
pop hl
ret
; Read ioGetC until a word starts, then read ioGetC as long as there is no
; separator and put that contents in (scratchpad), null terminated, for a
; maximum of SCRATCHPAD_SIZE-1 characters.
; If EOL (\n, \r or comment) or EOF is hit before we could read a word, we stop
; right there. If scratchpad is not big enough, we stop right there and error.
; HL points to scratchpad
; Sets Z if a word could be read, unsets if not.
readWord:
push bc
; Get to word
.loop1:
call ioGetC
call isLineEndOrComment
jr z, .error
call isSep
jr nz, .read
jr .loop1
.read:
ld hl, scratchpad
ld b, SCRATCHPAD_SIZE-1
; A contains the first letter to read
.loop2:
ld (hl), a
inc hl
call ioGetC
call isSepOrLineEnd
jr z, .success
djnz .loop2
; out of space. error.
.error:
; We need to put the last char we've read back so that gotoNextLine
; behaves properly.
call ioPutBack
call JUMP_UNSETZ
jr .end
.success:
call ioPutBack
; null-terminate scratchpad
xor a
ld (hl), a
ld hl, scratchpad
.end:
pop bc
ret
; Read ioGetC until we reach the beginning of next line, skipping comments if
; necessary. This skips all whitespace, \n, \r, comments until we reach the
; first non-comment character. Then, we put it back (ioPutBack) and return.
;
; If gotoNextLine encounters anything else than whitespace, comment or line
; separator, we error out (no putback)
; Sets Z if we reached a new line. Unset if EOF or error.
gotoNextLine:
.loop1:
; first loop is "strict", that is: we error out on non-whitespace.
call ioGetC
call isSepOrLineEnd
ret nz ; error
or a ; cp 0
jr z, .eof
call isLineEnd
jr z, .loop3 ; good!
cp ';'
jr z, .loop2 ; comment starting, go to "fast lane"
jr .loop1
.loop2:
; second loop is the "comment loop": anything is valid and we just run
; until EOL.
call ioGetC
or a ; cp 0
jr z, .eof
cp '\' ; special case: '\' doesn't count as a line end
; in a comment.
jr z, .loop2
call isLineEnd
jr z, .loop3
jr .loop2
.loop3:
; Loop 3 happens after we reach our first line sep. This means that we
; wade through whitespace until we reach a non-whitespace character.
call ioGetC
or a ; cp 0
jr z, .eof
cp ';'
jr z, .loop2 ; oh, another comment! go back to loop2!
call isSepOrLineEnd
jr z, .loop3
; Non-whitespace. That's our goal! Put it back
call ioPutBack
.eof:
cp a ; ensure Z
ret
; Parse line in (HL) and read the next token in BC. The token is written on
; two bytes (B and C). B is a token type (TOK_* constants) and C is an ID
; specific to that token type.
; Advance HL to after the read word.
; If no token matches, TOK_BAD is written to B
tokenize:
call readWord
jr z, .process ; read successful, process into token.
; Error. It could be EOL, EOF or scraptchpad size problem
; Whatever it is, calling gotoNextLine is appropriate. If it's EOL
; that's obviously what we want to do. If it's EOF, we can check
; it after. If it's a scratchpad overrun, gotoNextLine handles it.
call gotoNextLine
jr nz, .error
or a ; Are we EOF?
jr nz, tokenize ; not EOF? then continue!
; We're EOF
ld b, TOK_EOF
ret
.process:
call isLabel
jr z, .label
call getInstID
jr z, .instr
call getDirectiveID
jr z, .direc
.error:
; no match
ld b, TOK_BAD
jr .end
.instr:
ld b, TOK_INSTR
jr .end
.direc:
ld b, TOK_DIRECTIVE
jr .end
.label:
ld b, TOK_LABEL
.end:
ld c, a
ret