mirror of
https://github.com/hsoft/collapseos.git
synced 2025-04-05 08:58:39 +11:00
Major parsing optimisations
Totally reworked both parseDecimal and parseDecimalDigit parseDecimalDigit no longer exists, as it could be replaced by an inline alternative in the 4 places it appeared. This saves one byte overall, as the inline version is 4 bytes, 1 byte more than a call, and removing the function saved 5 bytes. It has been reduced from between 52 and 35 cycles (35 on error, so we'd expect 52 cycles to be more common unless someone's really bad at programming) to 14 cycles, so 2-3 times faster. parseDecimal has been reduced by a byte, and now the main loop is just about twice as fast, but with increased overhead. To put this into perspective, if we ignore error cases: For decimals of length 1 it'll be 1.20x faster, for decimals of length 2, 1.41x faster, for length 3, 1.51x faster, for length 4, 1.57x faster, and for length 5 and above, at least 1.48x faster (even faster if there's leading zeroes or not the worst case scenario). I believe there is still room for improvement, since the first iteration can be nearly replaced with "ld l, c" since 0*10=0, but when I tried this I could either add a zero check into the main loop, adding around 40 cycles and 10 bytes, or add 20 bytes to the overhead, and I don't think either of those options are worth it.
This commit is contained in:
parent
6bc516b2e7
commit
9797405789
@ -1,5 +1,5 @@
|
|||||||
; *** Requirements ***
|
; *** Requirements ***
|
||||||
; unsetZ
|
; None
|
||||||
;
|
;
|
||||||
; *** Code ***
|
; *** Code ***
|
||||||
|
|
||||||
@ -7,58 +7,72 @@
|
|||||||
; result in A.
|
; result in A.
|
||||||
;
|
;
|
||||||
; On success, the carry flag is reset. On error, it is set.
|
; On success, the carry flag is reset. On error, it is set.
|
||||||
parseDecimalDigit:
|
; Also, zero flag set if '0'
|
||||||
; First, let's see if we have an easy 0-9 case
|
; parseDecimalDigit has been replaced with the following code inline:
|
||||||
cp '0'
|
; add a, 0xc6 ; Maps '0'-'9' onto 0xf6-0xff
|
||||||
ret c ; if < '0', we have a problem
|
; sub 0xf6 ; Anything but 0xf6-0xff carries
|
||||||
sub '0' ; our value now is valid if it's < 10
|
; Maps 0xf6-0xff onto 0-9
|
||||||
cp 10 ; on success, C is set, which is the opposite
|
|
||||||
; of what we want
|
|
||||||
ccf ; invert C flag
|
|
||||||
ret
|
|
||||||
|
|
||||||
; Parse string at (HL) as a decimal value and return value in IX under the
|
; Parse string at (HL) as a decimal value and return value in IX under the
|
||||||
; same conditions as parseLiteral.
|
; same conditions as parseLiteral.
|
||||||
; Sets Z on success, unset on error.
|
; Sets Z on success, unset on error.
|
||||||
|
|
||||||
|
; 55 bytes, 32 cycles in first loop
|
||||||
|
; 90 cycles overhead + up to 69 cycles if length >= 5
|
||||||
|
; 140 cycles in loop
|
||||||
parseDecimal:
|
parseDecimal:
|
||||||
push hl
|
push hl
|
||||||
push de
|
|
||||||
|
|
||||||
ld ix, 0
|
.skip: ; Skips leading zeroes
|
||||||
.loop:
|
|
||||||
ld a, (hl)
|
ld a, (hl)
|
||||||
or a
|
|
||||||
jr z, .end ; success!
|
|
||||||
call parseDecimalDigit
|
|
||||||
jr c, .error
|
|
||||||
|
|
||||||
; Now, let's add A to IX. First, multiply by 10.
|
|
||||||
push ix \ pop de
|
|
||||||
add ix, ix ; x2
|
|
||||||
jr c, .error
|
|
||||||
add ix, ix ; x4
|
|
||||||
jr c, .error
|
|
||||||
add ix, ix ; x8
|
|
||||||
jr c, .error
|
|
||||||
add ix, de ; x9
|
|
||||||
jr c, .error
|
|
||||||
add ix, de ; x10
|
|
||||||
jr c, .error
|
|
||||||
ld d, 0
|
|
||||||
ld e, a
|
|
||||||
add ix, de
|
|
||||||
jr c, .error
|
|
||||||
|
|
||||||
inc hl
|
inc hl
|
||||||
jr .loop
|
cp '0'
|
||||||
|
jr z, .skip
|
||||||
|
|
||||||
cp a ; ensure Z
|
exx ; preserve bc, hl, de
|
||||||
jr .end
|
ld hl, 0
|
||||||
|
ld b, 5 ; Carries can only occur for decimals >=5 in length
|
||||||
|
jr .start
|
||||||
|
|
||||||
|
.loop:
|
||||||
|
ld c, a ; c holds current digit
|
||||||
|
exx ;swap hl back in to get address
|
||||||
|
ld a, (hl) ; a checks if following digit is null at end of loop
|
||||||
|
inc hl
|
||||||
|
exx
|
||||||
|
add hl, hl ; x2
|
||||||
|
ld d, h
|
||||||
|
ld e, l ; de is x2
|
||||||
|
add hl, hl ; x4
|
||||||
|
add hl, hl ; x8
|
||||||
|
add hl, de ; x10
|
||||||
|
ld d, 0
|
||||||
|
ld e, c
|
||||||
|
add hl, de
|
||||||
|
jr c, .error ; if hl was 0x1999, it may carry here
|
||||||
|
; This check could be taken outside the loop, but at the cost of 6 bytes
|
||||||
|
|
||||||
|
.start:
|
||||||
|
add a, 0xc6 ; converts '0'-'9' to 0-9
|
||||||
|
sub 0xf6 ; carries if out of range
|
||||||
|
jr c, .error
|
||||||
|
|
||||||
|
djnz .loop
|
||||||
|
|
||||||
|
|
||||||
|
inc b ; so loop only executes once more
|
||||||
|
; only numbers >0x1999 can carry when multiplied by 10.
|
||||||
|
ld de, 0xE666
|
||||||
|
ex de, hl
|
||||||
|
add hl, de
|
||||||
|
ex de, hl
|
||||||
|
jr nc, .loop ; if it doesn't carry, it's small enough
|
||||||
.error:
|
.error:
|
||||||
call unsetZ
|
sub 0xd0 ; if a is null, set Z
|
||||||
|
; a is checked for null before any errors
|
||||||
.end:
|
.end:
|
||||||
pop de
|
push hl
|
||||||
pop hl
|
pop ix
|
||||||
|
exx ; restore original de and bc
|
||||||
|
pop hl
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user