From b87feac785899d6f5ba72615ce490b2f37bfc448 Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Thu, 9 May 2019 21:21:08 -0400 Subject: [PATCH] zasm: add support for labels! --- apps/zasm/instr.asm | 40 ++++----- apps/zasm/literal.asm | 7 +- apps/zasm/main.asm | 39 ++++++-- apps/zasm/parse.asm | 34 +++++++ apps/zasm/symbol.asm | 184 ++++++++++++++++++++++++++++++++++++++ apps/zasm/tests/test1.asm | 4 + apps/zasm/util.asm | 31 +++++++ tools/emul/zasm_glue.asm | 2 + tools/emul/zasm_user.asm | 12 ++- 9 files changed, 319 insertions(+), 34 deletions(-) create mode 100644 apps/zasm/symbol.asm diff --git a/apps/zasm/instr.asm b/apps/zasm/instr.asm index ecbcc9e..b56cec2 100644 --- a/apps/zasm/instr.asm +++ b/apps/zasm/instr.asm @@ -115,27 +115,23 @@ parseIXY: pop hl ret -; Returns length of string at (HL) in A. -strlen: - push bc - push hl - ld bc, 0 - ld a, 0 ; look for null char -.loop: - cpi - jp z, .found - jr .loop -.found: - ; How many char do we have? the (NEG BC)-1, which started at 0 and - ; decreased at each CPI call. In this routine, we stay in the 8-bit - ; realm, so C only. - ld a, c - neg - dec a - pop hl - pop bc +; Parse string in (HL) and return its numerical value whether its a number +; literal or a symbol. Returns value in IX. +; Sets Z if number or symbol is valid, unset otherwise. +parseNumberOrSymbol: + call parseNumber + ret z + ; Not a number. Try symbol + push de + call symGetVal + jr nz, .notfound ; Z already unset + ; Found! value in DE. We need it in IX + ld ixh, d + ld ixl, e + ; Z already set +.notfound: + pop de ret - ; find argspec for string at (HL). Returns matching argspec in A. ; Return value 0xff holds a special meaning: arg is not empty, but doesn't match ; any argspec (A == 0 means arg is empty). A return value of 0xff means an @@ -174,7 +170,7 @@ parseArg: call enterParens jr z, .withParens ; (HL) has no parens - call parseNumber + call parseNumberOrSymbol jr nz, .nomatch ; We have a proper number in no parens. Number in IX. ld a, 'N' @@ -198,7 +194,7 @@ parseArg: .notY: ld c, 'x' .parseNumberInParens: - call parseNumber + call parseNumberOrSymbol jr nz, .nomatch ; We have a proper number in parens. Number in IX ld a, c ; M, x, or y diff --git a/apps/zasm/literal.asm b/apps/zasm/literal.asm index 13f6569..54acbce 100644 --- a/apps/zasm/literal.asm +++ b/apps/zasm/literal.asm @@ -6,9 +6,10 @@ parseDecimal: ; First, let's see if we have an easy 0-9 case cp '0' ret c ; if < '0', we have a problem - cp '9'+1 - ; We are in the 0-9 range - sub a, '0' ; C is clear + sub a, '0' ; our value now is valid if it's < 10 + cp 10 ; on success, C is set, which is the opposite + ; of what we want + ccf ; invert C flag ret ; Parses the string at (HL) and returns the 16-bit value in IX. diff --git a/apps/zasm/main.asm b/apps/zasm/main.asm index f589884..1eb2f3f 100644 --- a/apps/zasm/main.asm +++ b/apps/zasm/main.asm @@ -2,9 +2,12 @@ ; blockdev ; JUMP_STRNCMP ; JUMP_ADDDE +; JUMP_ADDHL ; JUMP_UPCASE ; JUMP_UNSETZ ; JUMP_INTODE +; JUMP_FINDCHAR +; RAMSTART (where we put our variables in RAM) ; *** Code *** ; Read file through GetC routine pointer at HL and outputs its upcodes through @@ -12,6 +15,8 @@ main: ld (ioGetCPtr), hl ld (ioPutCPtr), de + ld hl, 0 + ld (curOutputOffset), hl .loop: call ioReadLine or a ; is A 0? @@ -28,10 +33,21 @@ main: #include "literal.asm" #include "instr.asm" #include "directive.asm" +.equ SYM_RAMSTART RAMSTART +#include "symbol.asm" -; Parse line in (HL), write the resulting opcode(s) in (DE) and returns the -; number of written bytes in IXL. Advances HL where tokenization stopped and DE -; to where we should write the next upcode. +; Increase (curOutputOffset) by A +incOutputOffset: + push de + ld de, (curOutputOffset) + call JUMP_ADDDE + ld (curOutputOffset), de + pop de + ret + +; Parse line in (HL), write the resulting opcode(s) in (DE) and increases +; (curOutputOffset) by the number of bytes written. Advances HL where +; tokenization stopped and DE to where we should write the next upcode. ; Sets Z if parse was successful, unset if there was an error or EOF. parseLine: push bc @@ -42,6 +58,8 @@ parseLine: jr z, .instr cp TOK_DIRECTIVE jr z, .direc + cp TOK_LABEL + jr z, .label cp TOK_EMPTY jr z, .success ; empty line? do nothing but don't error out. jr .error ; token not supported @@ -50,6 +68,7 @@ parseLine: call parseInstruction or a ; is zero? jr z, .error + call incOutputOffset ld b, a ld hl, instrUpcode .loopInstr: @@ -61,6 +80,7 @@ parseLine: .direc: ld a, c ; D_* call parseDirective + call incOutputOffset ld b, a ld hl, direcData .loopDirec: @@ -68,15 +88,24 @@ parseLine: call ioPutC inc hl djnz .loopDirec + jr .success +.label: + ; The string in (scratchpad) is a label with its trailing ':' removed. + ld hl, scratchpad + ld de, (curOutputOffset) + call symRegister + jr .success .success: - ld ixl, a xor a ; ensure Z jr .end .error: - xor ixl call JUMP_UNSETZ .end: pop bc ret +; *** Variables *** +; The offset where we currently are with regards to outputting opcodes +curOutputOffset: + .fill 2 diff --git a/apps/zasm/parse.asm b/apps/zasm/parse.asm index 3f131a9..02e15c8 100644 --- a/apps/zasm/parse.asm +++ b/apps/zasm/parse.asm @@ -1,6 +1,7 @@ ; *** Consts *** TOK_INSTR .equ 0x01 TOK_DIRECTIVE .equ 0x02 +TOK_LABEL .equ 0x03 TOK_EMPTY .equ 0xfe ; not a bad token, just an empty line TOK_BAD .equ 0xff @@ -34,6 +35,34 @@ isSepOrLineEnd: call isLineEndOrComment ret +; Checks whether string at (HL) is a label, that is, whether it ends with a ":" +; Sets Z if yes, unset if no. +; +; If it's a label, we change the trailing ':' char with a null char. It's a bit +; dirty, but it's the easiest way to proceed. +isLabel: + push hl + ld a, ':' + call JUMP_FINDCHAR + ld a, (hl) + cp ':' + jr nz, .nomatch + ; We also have to check that it's our last char. + inc hl + ld a, (hl) + or a ; cp 0 + jr nz, .nomatch ; not a null char following the :. no match. + ; We have a match! + ; Remove trailing ':' + xor a ; Z is set + ld (hl), a + jr .end +.nomatch: + call JUMP_UNSETZ +.end: + pop hl + ret + ; read word in (HL) and put it in (scratchpad), null terminated, for a maximum ; of SCRATCHPAD_SIZE-1 characters. As a result, A is the read length. HL is ; advanced to the next separator char. @@ -90,6 +119,8 @@ tokenize: call readWord push hl ; Save advanced HL for later ld hl, scratchpad + call isLabel + jr z, .label call getInstID jr z, .instr call getDirectiveID @@ -102,6 +133,9 @@ tokenize: jr .end .direc: ld b, TOK_DIRECTIVE + jr .end +.label: + ld b, TOK_LABEL .end: ld c, a pop hl diff --git a/apps/zasm/symbol.asm b/apps/zasm/symbol.asm new file mode 100644 index 0000000..881e2a4 --- /dev/null +++ b/apps/zasm/symbol.asm @@ -0,0 +1,184 @@ +; Manages both constants and labels within a same namespace and registry. + +; *** Constants *** +; Duplicate symbol in registry +.equ SYM_ERR_DUPLICATE 0x01 +; Symbol registry buffer is full +.equ SYM_ERR_FULLBUF 0x02 + +; Maximum number of symbols we can have in the registry +.equ SYM_MAXCOUNT 0x100 + +; Size of the symbol name buffer size. This is a pool. There is no maximum name +; length for a single symbol, just a maximum size for the whole pool. +.equ SYM_BUFSIZE 0x1000 + +; *** Variables *** +; Each symbol is mapped to a word value saved here. +.equ SYM_VALUES SYM_RAMSTART + +; A list of symbol names separated by null characters. When we encounter a +; symbol name and want to get its value, we search the name here, retrieve the +; index of the name, then go get the value at that index in SYM_VALUES. +.equ SYM_NAMES SYM_VALUES+(SYM_MAXCOUNT*2) + +.equ SYM_RAMEND SYM_NAMES+SYM_BUFSIZE + +; *** Code *** + +; Place HL at the end of SYM_NAMES end (that is, at the point where we have two +; consecutive null chars. We return the index of that new name in A. +; If we're within bounds, Z is set, otherwise unset. +symNamesEnd: + push bc + push de + + ld b, 0 + ld hl, SYM_NAMES + ld de, SYM_NAMES+SYM_BUFSIZE +.loop: + ld a, (hl) + or a ; cp 0 + jr z, .success ; We've reached the end, Z is set, all good + xor a + call JUMP_FINDCHAR ; find next null char + ; go to the char after it. + inc hl + ; Are we out of bounds? + call cpHLDE + jr nc, .outOfBounds ; HL >= DE + djnz .loop + ; exhausted djnz? out of bounds +.outOfBounds: + call JUMP_UNSETZ + jr .end +.success: + ; Our index is 0 - B (if B is, for example 0xfd, A is 0x3) + xor a + sub b + cp a ; ensure Z +.end: + pop de + pop bc + ret + +; Register label in (HL) (minus the ending ":") into the symbol registry and +; set its value in that registry to DE. +; If successful, Z is set and A is the symbol index. Otherwise, Z is unset and +; A is an error code (SYM_ERR_*). +symRegister: + push hl + push bc + push de + + ; First, let's get our strlen + call strlen + ld c, a ; save that strlen for later + + ex hl, de ; symbol to add is now in DE + call symNamesEnd + jr nz, .error + ; A is our index. Save it + ex af, af' + ; Is our new name going to make us go out of bounds? + push hl + push de + ld de, SYM_NAMES+SYM_BUFSIZE + ld a, c + call JUMP_ADDHL + call cpHLDE + pop de + pop hl + jr nc, .error ; HL >= DE + + ; HL point to where we want to add the string + ex hl, de ; symbol to add in HL, dest in DE + ; Copy HL into DE until we reach null char + ; C already have our strlen (minus null char). Let's prepare BC for + ; a LDIR. + inc c ; include null char + ld b, 0 + ldir ; copy C chars from HL to DE + + ; I'd say we're pretty good just about now. What we need to do is to + ; save the value in our original DE that is just on top of the stack + ; into the proper index in SYM_VALUES. Our index, remember, is + ; currently in A'. + ex af, af' + pop de + push de ; push it right back to avoid stack imbalance + ld hl, SYM_VALUES + call JUMP_ADDHL + call JUMP_ADDHL ; twice because our values are words + + ; Everything is set! DE is our value HL points to the proper index in + ; SYM_VALUES. Let's just write it (little endian). + ld (hl), e + inc hl + ld (hl), d +.error: + ; Z already unset + pop de + pop bc + pop hl + ret + +; Find name (HL) in SYM_NAMES and returns matching index in A. +; If we find something, Z is set, otherwise unset. +symFind: + push hl + push bc + push de + + ; First, what's our strlen? + call strlen + ld c, a ; let's save that + + ex hl, de ; it's easier if HL is haystack and DE is + ; needle. + ld b, 0 + ld hl, SYM_NAMES +.loop: + ld a, (hl) + or a ; cp 0 + jr z, .nomatch + ld a, c + call JUMP_STRNCMP + jr z, .match + ; ok, next! + xor a + call JUMP_FINDCHAR ; find next null char + ; go to the char after it. + inc hl + djnz .loop + ; exhausted djnz? no match +.nomatch: + call JUMP_UNSETZ + jr .end +.match: + ; Our index is 0 - B (if B is, for example 0xfd, A is 0x3) + xor a + sub b + cp a ; ensure Z +.end: + pop de + pop bc + pop hl + ret + +; Return value associated with symbol string in (HL) into DE. +; Sets Z on success, unset on error. +symGetVal: + call symFind + ret nz ; not found + ; our index is in A. Let's fetch the proper value + push hl + ld hl, SYM_VALUES + call JUMP_ADDHL + call JUMP_ADDHL ; twice because our values are words + ld e, (hl) + inc hl + ld d, (hl) + pop hl + cp a ; ensure Z + ret diff --git a/apps/zasm/tests/test1.asm b/apps/zasm/tests/test1.asm index c0a802f..db6c334 100644 --- a/apps/zasm/tests/test1.asm +++ b/apps/zasm/tests/test1.asm @@ -1,7 +1,11 @@ ; comment add a, b ; comment +label1: inc a ; comment ; comment .db 42 +label2: .dw 42 .dw 3742 + ld a, (label1) + ld hl, label2 diff --git a/apps/zasm/util.asm b/apps/zasm/util.asm index 16b0bca..b0c598d 100644 --- a/apps/zasm/util.asm +++ b/apps/zasm/util.asm @@ -12,6 +12,37 @@ callHL: jp (hl) ret +; Compare HL with DE and sets Z and C in the same way as a regular cp X where +; HL is A and DE is X. +cpHLDE: + ld a, h + cp d + ret nz ; if not equal, flags are correct + ld a, l + cp e + ret ; flags are correct + +; Returns length of string at (HL) in A. +strlen: + push bc + push hl + ld bc, 0 + ld a, 0 ; look for null char +.loop: + cpi + jp z, .found + jr .loop +.found: + ; How many char do we have? the (NEG BC)-1, which started at 0 and + ; decreased at each CPI call. In this routine, we stay in the 8-bit + ; realm, so C only. + ld a, c + neg + dec a + pop hl + pop bc + ret + ; If string at (HL) starts with ( and ends with ), "enter" into the parens ; (advance HL and put a null char at the end of the string) and set Z. ; Otherwise, do nothing and reset Z. diff --git a/tools/emul/zasm_glue.asm b/tools/emul/zasm_glue.asm index 9f4d34f..d273cca 100644 --- a/tools/emul/zasm_glue.asm +++ b/tools/emul/zasm_glue.asm @@ -7,9 +7,11 @@ jr init ; 2 bytes ; *** JUMP TABLE *** jp strncmp jp addDE +jp addHL jp upcase jp unsetZ jp intoDE +jp findchar init: di diff --git a/tools/emul/zasm_user.asm b/tools/emul/zasm_user.asm index 570904b..871cd47 100644 --- a/tools/emul/zasm_user.asm +++ b/tools/emul/zasm_user.asm @@ -1,9 +1,13 @@ ; *** JUMP TABLE *** JUMP_STRNCMP .equ 0x02 JUMP_ADDDE .equ 0x05 -JUMP_UPCASE .equ 0x08 -JUMP_UNSETZ .equ 0x0b -JUMP_INTODE .equ 0x0e +JUMP_ADDHL .equ 0x08 +JUMP_UPCASE .equ 0x0b +JUMP_UNSETZ .equ 0x0e +JUMP_INTODE .equ 0x11 +JUMP_FINDCHAR .equ 0x14 -.org 0x4000 +.equ USER_CODE 0x4000 +.equ RAMSTART 0x6000 +.org USER_CODE #include "main.asm"