; *** Consts *** .equ TOK_INSTR 0x01 .equ TOK_DIRECTIVE 0x02 .equ TOK_LABEL 0x03 .equ TOK_EOF 0xfe ; end of file .equ TOK_BAD 0xff .equ SCRATCHPAD_SIZE 0x40 ; *** Variables *** .equ scratchpad TOK_RAMSTART .equ TOK_RAMEND scratchpad+SCRATCHPAD_SIZE ; *** Code *** ; Sets Z is A is ';' or null. isLineEndOrComment: cp 0x3b ; ';' ret z ; continue to isLineEnd ; Sets Z is A is CR, LF, or null. isLineEnd: or a ; same as cp 0 ret z cp CR ret z cp LF ret z cp '\' ret ; Sets Z is A is ' ', ',', ';', CR, LF, or null. isSepOrLineEnd: call isWS ret z jr isLineEndOrComment ; Checks whether string at (HL) is a label, that is, whether it ends with a ":" ; Sets Z if yes, unset if no. ; ; If it's a label, we change the trailing ':' char with a null char. It's a bit ; dirty, but it's the easiest way to proceed. isLabel: push hl ld a, ':' call findchar ld a, (hl) cp ':' jr nz, .nomatch ; We also have to check that it's our last char. inc hl ld a, (hl) or a ; cp 0 jr nz, .nomatch ; not a null char following the :. no match. ; We have a match! ; Remove trailing ':' xor a ; Z is set dec hl ld (hl), a jr .end .nomatch: call unsetZ .end: pop hl ret ; Read I/O as long as it's whitespace. When it's not, stop and return the last ; read char in A _eatWhitespace: call ioGetB call isWS ret nz jr _eatWhitespace ; Read ioGetB until a word starts, then read ioGetB as long as there is no ; separator and put that contents in (scratchpad), null terminated, for a ; maximum of SCRATCHPAD_SIZE-1 characters. ; If EOL (\n, \r or comment) or EOF is hit before we could read a word, we stop ; right there. If scratchpad is not big enough, we stop right there and error. ; HL points to scratchpad ; Sets Z if a word could be read, unsets if not. readWord: push bc ; Get to word call _eatWhitespace call isLineEndOrComment jr z, .error ld hl, scratchpad ld b, SCRATCHPAD_SIZE-1 ; A contains the first letter to read ; Are we opening a double quote? cp '"' jr z, .insideQuote ; Are we opening a single quote? cp 0x27 ; ' jr z, .singleQuote .loop: ld (hl), a inc hl call ioGetB call isSepOrLineEnd jr z, .success cp ',' jr z, .success djnz .loop ; out of space. error. .error: ; We need to put the last char we've read back so that gotoNextLine ; behaves properly. call ioPutBack call unsetZ jr .end .success: call ioPutBack ; null-terminate scratchpad xor a ld (hl), a ld hl, scratchpad .end: pop bc ret .insideQuote: ; inside quotes, we accept literal whitespaces, but not line ends. ld (hl), a inc hl call ioGetB cp '"' jr z, .loop ; ending the quote ends the word call isLineEnd jr z, .error ; ending the line without closing the quote, ; nope. djnz .insideQuote ; out of space. error. jr .error .singleQuote: ; single quote is more straightforward: we have 3 chars and we put them ; right in scratchpad ld (hl), a call ioGetB or a jr z, .error inc hl ld (hl), a call ioGetB cp 0x27 ; ' jr nz, .error inc hl ld (hl), a jr .loop ; Reads the next char in I/O. If it's a comma, Set Z and return. If it's not, ; Put the read char back in I/O and unset Z. readComma: call _eatWhitespace cp ',' ret z call ioPutBack jp unsetZ ; Read ioGetB until we reach the beginning of next line, skipping comments if ; necessary. This skips all whitespace, \n, \r, comments until we reach the ; first non-comment character. Then, we put it back (ioPutBack) and return. ; ; If gotoNextLine encounters anything else than whitespace, comment or line ; separator, we error out (no putback) ; Sets Z if we reached a new line. Unset if EOF or error. gotoNextLine: .loop1: ; first loop is "strict", that is: we error out on non-whitespace. call ioGetB call isSepOrLineEnd ret nz ; error or a ; cp 0 jr z, .eof call isLineEnd jr z, .loop3 ; good! cp 0x3b ; ';' jr z, .loop2 ; comment starting, go to "fast lane" jr .loop1 .loop2: ; second loop is the "comment loop": anything is valid and we just run ; until EOL. call ioGetB or a ; cp 0 jr z, .eof cp '\' ; special case: '\' doesn't count as a line end ; in a comment. jr z, .loop2 call isLineEnd jr z, .loop3 jr .loop2 .loop3: ; Loop 3 happens after we reach our first line sep. This means that we ; wade through whitespace until we reach a non-whitespace character. call ioGetB or a ; cp 0 jr z, .eof cp 0x3b ; ';' jr z, .loop2 ; oh, another comment! go back to loop2! call isSepOrLineEnd jr z, .loop3 ; Non-whitespace. That's our goal! Put it back call ioPutBack .eof: cp a ; ensure Z ret ; Parse line in (HL) and read the next token in BC. The token is written on ; two bytes (B and C). B is a token type (TOK_* constants) and C is an ID ; specific to that token type. ; Advance HL to after the read word. ; If no token matches, TOK_BAD is written to B tokenize: call readWord jr z, .process ; read successful, process into token. ; Error. It could be EOL, EOF or scraptchpad size problem ; Whatever it is, calling gotoNextLine is appropriate. If it's EOL ; that's obviously what we want to do. If it's EOF, we can check ; it after. If it's a scratchpad overrun, gotoNextLine handles it. call gotoNextLine jr nz, .error or a ; Are we EOF? jr nz, tokenize ; not EOF? then continue! ; We're EOF ld b, TOK_EOF ret .process: call isLabel jr z, .label call getInstID jr z, .instr call getDirectiveID jr z, .direc .error: ; no match ld b, TOK_BAD jr .end .instr: ld b, TOK_INSTR jr .end .direc: ld b, TOK_DIRECTIVE jr .end .label: ld b, TOK_LABEL .end: ld c, a ret