mirror of
https://github.com/hsoft/collapseos.git
synced 2024-11-14 14:58:05 +11:00
e9244b80ee
Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish.
210 lines
4.5 KiB
NASM
210 lines
4.5 KiB
NASM
; *** Consts ***
|
|
TOK_INSTR .equ 0x01
|
|
TOK_DIRECTIVE .equ 0x02
|
|
TOK_LABEL .equ 0x03
|
|
TOK_EOF .equ 0xfe ; end of file
|
|
TOK_BAD .equ 0xff
|
|
|
|
.equ SCRATCHPAD_SIZE 0x20
|
|
; *** Variables ***
|
|
scratchpad:
|
|
.fill SCRATCHPAD_SIZE
|
|
|
|
; *** Code ***
|
|
|
|
; Sets Z is A is ';' or null.
|
|
isLineEndOrComment:
|
|
cp ';'
|
|
ret z
|
|
; continue to isLineEnd
|
|
|
|
; Sets Z is A is CR, LF, or null.
|
|
isLineEnd:
|
|
or a ; same as cp 0
|
|
ret z
|
|
cp 0x0d
|
|
ret z
|
|
cp 0x0a
|
|
ret z
|
|
cp '\'
|
|
ret
|
|
|
|
; Sets Z is A is ' ' '\t' or ','
|
|
isSep:
|
|
cp ' '
|
|
ret z
|
|
cp 0x09
|
|
ret z
|
|
cp ','
|
|
ret
|
|
|
|
; Sets Z is A is ' ', ',', ';', CR, LF, or null.
|
|
isSepOrLineEnd:
|
|
call isSep
|
|
ret z
|
|
jr isLineEndOrComment
|
|
|
|
; Checks whether string at (HL) is a label, that is, whether it ends with a ":"
|
|
; Sets Z if yes, unset if no.
|
|
;
|
|
; If it's a label, we change the trailing ':' char with a null char. It's a bit
|
|
; dirty, but it's the easiest way to proceed.
|
|
isLabel:
|
|
push hl
|
|
ld a, ':'
|
|
call JUMP_FINDCHAR
|
|
ld a, (hl)
|
|
cp ':'
|
|
jr nz, .nomatch
|
|
; We also have to check that it's our last char.
|
|
inc hl
|
|
ld a, (hl)
|
|
or a ; cp 0
|
|
jr nz, .nomatch ; not a null char following the :. no match.
|
|
; We have a match!
|
|
; Remove trailing ':'
|
|
xor a ; Z is set
|
|
ld (hl), a
|
|
jr .end
|
|
.nomatch:
|
|
call JUMP_UNSETZ
|
|
.end:
|
|
pop hl
|
|
ret
|
|
|
|
; Read ioGetC until a word starts, then read ioGetC as long as there is no
|
|
; separator and put that contents in (scratchpad), null terminated, for a
|
|
; maximum of SCRATCHPAD_SIZE-1 characters.
|
|
; If EOL (\n, \r or comment) or EOF is hit before we could read a word, we stop
|
|
; right there. If scratchpad is not big enough, we stop right there and error.
|
|
; HL points to scratchpad
|
|
; Sets Z if a word could be read, unsets if not.
|
|
readWord:
|
|
push bc
|
|
; Get to word
|
|
.loop1:
|
|
call ioGetC
|
|
call isLineEndOrComment
|
|
jr z, .error
|
|
call isSep
|
|
jr nz, .read
|
|
jr .loop1
|
|
.read:
|
|
ld hl, scratchpad
|
|
ld b, SCRATCHPAD_SIZE-1
|
|
; A contains the first letter to read
|
|
.loop2:
|
|
ld (hl), a
|
|
inc hl
|
|
call ioGetC
|
|
call isSepOrLineEnd
|
|
jr z, .success
|
|
djnz .loop2
|
|
; out of space. error.
|
|
.error:
|
|
; We need to put the last char we've read back so that gotoNextLine
|
|
; behaves properly.
|
|
call ioPutBack
|
|
call JUMP_UNSETZ
|
|
jr .end
|
|
.success:
|
|
call ioPutBack
|
|
; null-terminate scratchpad
|
|
xor a
|
|
ld (hl), a
|
|
ld hl, scratchpad
|
|
.end:
|
|
pop bc
|
|
ret
|
|
|
|
; Read ioGetC until we reach the beginning of next line, skipping comments if
|
|
; necessary. This skips all whitespace, \n, \r, comments until we reach the
|
|
; first non-comment character. Then, we put it back (ioPutBack) and return.
|
|
;
|
|
; If gotoNextLine encounters anything else than whitespace, comment or line
|
|
; separator, we error out (no putback)
|
|
|
|
; Sets Z if we reached a new line. Unset if EOF or error.
|
|
gotoNextLine:
|
|
.loop1:
|
|
; first loop is "strict", that is: we error out on non-whitespace.
|
|
call ioGetC
|
|
call isSepOrLineEnd
|
|
ret nz ; error
|
|
or a ; cp 0
|
|
jr z, .eof
|
|
call isLineEnd
|
|
jr z, .loop3 ; good!
|
|
cp ';'
|
|
jr z, .loop2 ; comment starting, go to "fast lane"
|
|
jr .loop1
|
|
.loop2:
|
|
; second loop is the "comment loop": anything is valid and we just run
|
|
; until EOL.
|
|
call ioGetC
|
|
or a ; cp 0
|
|
jr z, .eof
|
|
cp '\' ; special case: '\' doesn't count as a line end
|
|
; in a comment.
|
|
jr z, .loop2
|
|
call isLineEnd
|
|
jr z, .loop3
|
|
jr .loop2
|
|
.loop3:
|
|
; Loop 3 happens after we reach our first line sep. This means that we
|
|
; wade through whitespace until we reach a non-whitespace character.
|
|
call ioGetC
|
|
or a ; cp 0
|
|
jr z, .eof
|
|
cp ';'
|
|
jr z, .loop2 ; oh, another comment! go back to loop2!
|
|
call isSepOrLineEnd
|
|
jr z, .loop3
|
|
; Non-whitespace. That's our goal! Put it back
|
|
call ioPutBack
|
|
.eof:
|
|
cp a ; ensure Z
|
|
ret
|
|
|
|
; Parse line in (HL) and read the next token in BC. The token is written on
|
|
; two bytes (B and C). B is a token type (TOK_* constants) and C is an ID
|
|
; specific to that token type.
|
|
; Advance HL to after the read word.
|
|
; If no token matches, TOK_BAD is written to B
|
|
tokenize:
|
|
call readWord
|
|
jr z, .process ; read successful, process into token.
|
|
; Error. It could be EOL, EOF or scraptchpad size problem
|
|
; Whatever it is, calling gotoNextLine is appropriate. If it's EOL
|
|
; that's obviously what we want to do. If it's EOF, we can check
|
|
; it after. If it's a scratchpad overrun, gotoNextLine handles it.
|
|
call gotoNextLine
|
|
jr nz, .error
|
|
or a ; Are we EOF?
|
|
jr nz, tokenize ; not EOF? then continue!
|
|
; We're EOF
|
|
ld b, TOK_EOF
|
|
ret
|
|
.process:
|
|
call isLabel
|
|
jr z, .label
|
|
call getInstID
|
|
jr z, .instr
|
|
call getDirectiveID
|
|
jr z, .direc
|
|
.error:
|
|
; no match
|
|
ld b, TOK_BAD
|
|
jr .end
|
|
.instr:
|
|
ld b, TOK_INSTR
|
|
jr .end
|
|
.direc:
|
|
ld b, TOK_DIRECTIVE
|
|
jr .end
|
|
.label:
|
|
ld b, TOK_LABEL
|
|
.end:
|
|
ld c, a
|
|
ret
|