From d0c5d3a741dd2df51c7d07cc4de4057b8808d5f1 Mon Sep 17 00:00:00 2001
From: Virgil Dupras <hsoft@hardcoded.net>
Date: Wed, 1 Apr 2020 22:19:37 -0400
Subject: [PATCH] Forth self-bootstraps!!!

This commit cuts Forth's dependency on zasm. It now self-assembles
its own boot-binary! Things are a bit messy now, I'll clean up
later.
---
 emul/Makefile         |   7 +-
 emul/forth/boot.bin   | Bin 0 -> 451 bytes
 emul/forth/conf.fs    |   2 +
 emul/forth/stage0.asm |   3 -
 emul/forth/z80c.bin   | Bin 1748 -> 1748 bytes
 forth/boot.fs         | 384 +++++++++++++++++++++++++++++++++++++++
 forth/forth.asm       | 414 ------------------------------------------
 forth/z80a.fs         |  37 +++-
 8 files changed, 421 insertions(+), 426 deletions(-)
 create mode 100644 emul/forth/boot.bin
 create mode 100644 emul/forth/conf.fs
 delete mode 100644 emul/forth/stage0.asm
 create mode 100644 forth/boot.fs
 delete mode 100644 forth/forth.asm

diff --git a/emul/Makefile b/emul/Makefile
index 0ebdaa1..cde198c 100644
--- a/emul/Makefile
+++ b/emul/Makefile
@@ -29,8 +29,8 @@ shell/shell: shell/shell.c $(SHELLOBJS) shell/shell-bin.h
 
 # z80c.bin is not in the prerequisites because its a bootstrap binary that
 # should be updated manually through make fbootstrap.
-forth/forth0.bin: forth/stage0.asm $(ZASMBIN)
-	$(ZASMBIN) $(KERNEL) ../forth < forth/stage0.asm | cat - forth/z80c.bin | tee $@ > /dev/null
+forth/forth0.bin:
+	cat forth/boot.bin forth/z80c.bin > $@
 
 forth/forth0-bin.h: forth/forth0.bin
 	./bin2c.sh KERNEL < forth/forth0.bin | tee $@ > /dev/null
@@ -39,7 +39,7 @@ forth/stage1: forth/stage.c $(OBJS) forth/forth0-bin.h
 	$(CC) forth/stage.c $(OBJS) -o $@
 
 forth/stage1dbg: forth/stage.c $(OBJS) forth/forth0-bin.h 
-	$(CC) -DDEBUG forth/stage.c $(OBJS) -o $@
+	$(CC) -DDEBUG -DBOOT forth/stage.c $(OBJS) -o $@
 
 forth/core.bin: $(FORTHSRC_PATHS) forth/stage1
 	cat $(FORTHSRC_PATHS) | ./forth/stage1 | tee $@ > /dev/null
@@ -103,6 +103,7 @@ updatebootstrap: $(ZASMBIN)
 # words and they write to HERE at initialization.
 .PHONY: fbootstrap
 fbootstrap: forth/stage2
+	cat ./forth/conf.fs ../forth/boot.fs | ./forth/stage2 > forth/boot.bin
 	cat ../forth/dummy.fs ../forth/z80c.fs forth/emul.fs ../forth/icore.fs ../forth/dummy.fs | ./forth/stage2 | tee forth/z80c.bin > /dev/null
 
 .PHONY: clean
diff --git a/emul/forth/boot.bin b/emul/forth/boot.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c5e2c3681a863a47dd6af48f0e59a1e665a236e5
GIT binary patch
literal 451
zcmX|-IY<LR6o%j1#CSx+JDOEUvdx+WK|4VakDyk87)1~if~HYZcp!>kWn*V&Ym+tv
z5wQ_u6il%Y!Bn!~6r&?yAY~8+olV5|y8q`L|3|Vw9-&s`49O0XP9%dUzkno>jNv6_
z5qKLX%}CA>Dy@8&H}Mwk<r<HzEF=NE0kFFv;^QRft}#=)L!GV|_3<@h+3K%5=A_bl
zpdkR%9)yz{^IiFdQ8!~Nv6-c0YycVqAYET(K&DX|()gN=$CEJf{Ui2(J7K6qn;@d`
zh?j@2!1h3iT)-1UfSkx5iP;HrG;WDijkA&-{5ZV}`E`9tw}ST}t-bm}&0ePKda0})
zqsP~#M<mKL^qL!|4+XX2*;DoV3yz`s0Gxd+I2mx#=va<bFb~q(heHrH@$V>$0=HD%
zid|al`*axXfXb~%6#L#(0DB(As*k6(y1K+-4TSrsifng)WV9B%9+D|tGR00ZnbBn0
g!z3kbfAm~FCFQ9>mYa)<{6A(zdOfw7*%-lwUqq;}CIA2c

literal 0
HcmV?d00001

diff --git a/emul/forth/conf.fs b/emul/forth/conf.fs
new file mode 100644
index 0000000..0c5aeeb
--- /dev/null
+++ b/emul/forth/conf.fs
@@ -0,0 +1,2 @@
+0xe800 CONSTANT RAMSTART
+0xf000 CONSTANT RS_ADDR
diff --git a/emul/forth/stage0.asm b/emul/forth/stage0.asm
deleted file mode 100644
index 6870f0a..0000000
--- a/emul/forth/stage0.asm
+++ /dev/null
@@ -1,3 +0,0 @@
-.equ	RAMSTART	0xe800
-.equ	STDIO_PORT	0x00
-.inc "forth.asm"
diff --git a/emul/forth/z80c.bin b/emul/forth/z80c.bin
index d141bf0a15e6b4bf7b15fa18e7ae3673ca5c0e74..107bc6bc37e5366437554d331b05a6df82d6e2a6 100644
GIT binary patch
delta 15
Wcmcb@dxdv{3p10P#AY|<K4t(a6$F0(

delta 15
Wcmcb@dxdv{3p3MX@y%|`earwYLIm>w

diff --git a/forth/boot.fs b/forth/boot.fs
new file mode 100644
index 0000000..15e0d3b
--- /dev/null
+++ b/forth/boot.fs
@@ -0,0 +1,384 @@
+( Configuration words: RAMSTART, RS_ADDR )
+H@ 256 /MOD 2 PC! 2 PC!
+
+( STABLE ABI
+  Those jumps below are supposed to stay at these offsets,
+  always. If they change bootstrap binaries have to be
+  adjusted because they rely on them. Those entries are
+  referenced directly by their offset in Forth code with a
+  comment indicating what that number refers to.
+)
+
+H@ ORG !
+
+0 JPnn,           ( 00, main )
+0 JPnn,           ( 03, find )
+NOP, NOP,         ( 06, unused )
+NOP, NOP,         ( 08, LATEST )
+NOP,              ( 0a, unused )
+0 JPnn,           ( 0b, cellWord )
+0 JPnn,           ( 0e, compiledWord )
+0 JPnn,           ( 11, pushRS )
+0 JPnn,           ( 14, popRS )
+JP(IY), NOP,      ( 17, nativeWord )
+0 JPnn,           ( 1a, next )
+0 JPnn,           ( 1d, chkPS )
+NOP, NOP,         ( 20, numberWord )
+NOP, NOP,         ( 22, litWord )
+RAMSTART ,        ( 24, INITIAL_SP )
+RAMSTART 0x0e + , ( 26, WORDBUF )
+0 JPnn,           ( 28, flagsToBC )
+0 JPnn,           ( 2b, doesWord )
+RS_ADDR ,         ( 2e, RS_ADDR )
+RAMSTART 0x0c + , ( 30, CINPTR )
+RAMSTART 0x2e + , ( 32, SYSVNXT )
+RAMSTART 0x08 + , ( 34, FLAGS )
+RAMSTART 0x0a + , ( 36, PARSEPTR )
+RAMSTART 0x04 + , ( 38, HERE )
+RAMSTART 0x02 + , ( 3a, CURRENT )
+
+( BOOT DICT
+  There are only 5 words in the boot dict, but these words'
+  offset need to be stable, so they're part of the "stable
+  ABI"
+)
+'E' A, 'X' A, 'I' A, 'T' A,
+0 A,,   ( prev )
+4 A,
+L1 BSET ( EXIT )
+    0x17 A,,         ( nativeWord )
+    0x14 CALLnn,     ( popRS )
+    RAMSTART 0x06 + LD(nn)HL, ( RAMSTART+0x06 == IP )
+    JPNEXT,
+
+NOP, NOP, NOP,   ( unused )
+
+'(' A, 'b' A, 'r' A, ')' A,
+PC L1 @ - A,, ( prev )
+4 A,
+L1 BSET ( BR )
+    0x17 A,,         ( nativeWord )
+L2 BSET ( used in CBR )
+    RAMSTART 0x06 + LDHL(nn), ( RAMSTART+0x06 == IP )
+    E (HL) LDrr,
+    HL INCss,
+    D (HL) LDrr,
+    HL DECss,
+    DE ADDHLss,
+    RAMSTART 0x06 + LD(nn)HL, ( RAMSTART+0x06 == IP )
+    JPNEXT,
+
+'(' A, '?' A, 'b' A, 'r' A, ')' A,
+PC L1 @ - A,, ( prev )
+5 A,
+L1 BSET ( CBR )
+    0x17 A,,         ( nativeWord )
+    HL POPqq,
+    chkPS,
+    A H LDrr,
+    L ORr,
+    JRZ, L2 BWR ( BR + 2. False, branch )
+    ( True, skip next 2 bytes and don't branch )
+    RAMSTART 0x06 + LDHL(nn), ( RAMSTART+0x06 == IP )
+    HL INCss,
+    HL INCss,
+    RAMSTART 0x06 + LD(nn)HL, ( RAMSTART+0x06 == IP )
+    JPNEXT,
+
+'E' A, 'X' A, 'E' A, 'C' A, 'U' A, 'T' A, 'E' A,
+PC L1 @ - A,, ( prev )
+7 A,
+L2 BSET ( used frequently below )
+    0x17 A,,         ( nativeWord )
+    IY POPqq,        ( is a wordref )
+    chkPS,
+    L 0 IY+ LDrIXY,
+    H 1 IY+ LDrIXY,
+    ( HL points to code pointer )
+    IY INCss,
+    IY INCss,
+    ( IY points to PFA )
+    JP(HL),
+
+( END OF STABLE ABI )
+
+( Name of BOOT word )
+L1 BSET
+'B' A, 'O' A, 'O' A, 'T' A, 0 A,
+
+PC ORG @ 1 + ! ( main )
+( STACK OVERFLOW PROTECTION:
+  To avoid having to check for stack underflow after each pop
+  operation (which can end up being prohibitive in terms of
+  costs), we give ourselves a nice 6 bytes buffer. 6 bytes
+  because we seldom have words requiring more than 3 items
+  from the stack. Then, at each "exit" call we check for
+  stack underflow.
+)
+    SP 0xfffa LDddnn,
+    0x24 @ SP LD(nn)dd, ( 24 == INITIAL_SP )
+    IX RS_ADDR LDddnn,
+( LATEST is a label to the latest entry of the dict. It is
+  written at offset 0x08 by the process or person building
+  Forth. )
+    0x08 LDHL(nn),
+    0x3a @ LD(nn)HL,    ( 3a == CURRENT )
+    0x38 @ LD(nn)HL,    ( 38 == HERE )
+    HL L1 @ LDddnn,
+    0x03 CALLnn,        ( 03 == find )
+    DE PUSHqq,
+    L2 @ 2 + JPnn,
+
+PC ORG @ 4 + ! ( find )
+( Find the entry corresponding to word where (HL) points to
+  and sets DE to point to that entry. Z if found, NZ if not.
+)
+
+    BC PUSHqq,
+    HL PUSHqq,
+	( First, figure out string len )
+    BC 0 LDddnn,
+    A XORr,
+    CPIR,
+	( C has our length, negative, -1 )
+    A C LDrr,
+    NEG,
+    A DECr,
+	( special case. zero len? we never find anything. )
+    JRZ, L1 FWR ( fail )
+
+    C A LDrr, ( C holds our length )
+( Let's do something weird: We'll hold HL by the *tail*.
+  Because of our dict structure and because we know our
+  lengths, it's easier to compare starting from the end.
+  Currently, after CPIR, HL points to char after null. Let's
+  adjust. Because the compare loop pre-decrements, instead
+  of DECing HL twice, we DEC it once. )
+    HL DECss,
+    DE 0x3a @ LDdd(nn),   ( 3a == CURRENT )
+L3 BSET ( inner )
+    ( DE is a wordref, first step, do our len correspond? )
+    HL PUSHqq,          ( --> lvl 1 )
+    DE PUSHqq,          ( --> lvl 2 )
+    DE DECss,
+    LDA(DE),
+    0x7f ANDn,          ( remove IMMEDIATE flag )
+    C CPr,
+    JRNZ, L4 FWR ( loopend )
+    ( match, let's compare the string then )
+    DE DECss, ( Skip prev field. One less because we )
+    DE DECss, ( pre-decrement )
+    B C LDrr, ( loop C times )
+L5 BSET ( loop )
+    ( pre-decrement for easier Z matching )
+    DE DECss,
+    HL DECss,
+    LDA(DE),
+    (HL) CPr,
+    JRNZ, L6 FWR ( loopend )
+    DJNZ, L5 BWR ( loop )
+L4 FSET L6 FSET ( loopend )
+( At this point, Z is set if we have a match. In all cases,
+  we want to pop HL and DE )
+    DE POPqq,           ( <-- lvl 2 )
+    HL POPqq,           ( <-- lvl 1 )
+    JRZ, L4 FWR ( end, match? we're done! )
+    ( no match, go to prev and continue )
+    HL PUSHqq,          ( --> lvl 1 )
+    DE DECss,
+    DE DECss,
+    DE DECss,           ( prev field )
+    DE PUSHqq,          ( --> lvl 2 )
+    EXDEHL,
+    E (HL) LDrr,
+    HL INCss,
+    D (HL) LDrr,
+    ( DE conains prev offset )
+    HL POPqq,           ( <-- lvl 2 )
+    ( HL is prev field's addr. Is offset zero? )
+    A D LDrr,
+    E ORr,
+    JRZ, L6 FWR ( noprev )
+    ( get absolute addr from offset )
+    ( carry cleared from "or e" )
+    DE SBCHLss,
+    EXDEHL,             ( result in DE )
+L6 FSET ( noprev )
+    HL POPqq,           ( <-- lvl 1 )
+    JRNZ, L3 BWR ( inner, try to match again )
+    ( Z set? end of dict, unset Z )
+L1 FSET ( fail )
+    A XORr,
+    A INCr,
+L4 FSET ( end )
+    HL POPqq,
+    BC POPqq,
+    RET,
+
+PC ORG @ 0x29 + ! ( flagsToBC )
+    BC 0 LDddnn,
+    CZ RETcc, ( equal )
+    BC INCss,
+    CM RETcc, ( > )
+    ( < )
+    BC DECss,
+    BC DECss,
+    RET,
+
+PC ORG @ 0x12 + ! ( pushRS )
+    IX INCss,
+    IX INCss,
+    0 IX+ L LDIXYr,
+    1 IX+ H LDIXYr,
+    RET,
+
+PC ORG @ 0x15 + ! ( popRS )
+    L 0 IX+ LDrIXY,
+    H 1 IX+ LDrIXY,
+    IX DECss,
+    IX DECss,
+    RET,
+
+'(' A, 'u' A, 'f' A, 'l' A, 'w' A, ')' A, 0 A,
+L1 BSET ( abortUnderflow )
+    HL PC 7 - LDddnn,
+    0x03 CALLnn, ( find )
+    DE PUSHqq,
+    L2 @ 2 + JPnn, ( EXECUTE, skip nativeWord )
+
+
+PC ORG @ 0x1e + ! ( chkPS )
+    HL PUSHqq,
+    0x24 @ LDHL(nn), ( 24 == INITIAL_SP )
+( We have the return address for this very call on the stack
+  and protected registers. Let's compensate )
+    HL DECss,
+    HL DECss,
+    HL DECss,
+    HL DECss,
+    A ORr,           ( clear carry )
+    SP SBCHLss,
+    HL POPqq,
+    CNC RETcc,      ( INITIAL_SP >= SP? good )
+    JR, L1 BWR ( abortUnderflow )
+
+L3 BSET ( chkRS )
+    IX PUSHqq, HL POPqq,
+    DE RS_ADDR LDddnn,
+    A ORr,           ( clear carry )
+    DE SBCHLss,
+    CNC RETcc,      ( IX >= RS_ADDR? good )
+    JR, L1 BWR ( abortUnderflow )
+
+
+PC ORG @ 0x1b + ! ( next )
+( This routine is jumped to at the end of every word. In it,
+  we jump to current IP, but we also take care of increasing
+  it by 2 before jumping. )
+	( Before we continue: are stacks within bounds? )
+    0x1d CALLnn, ( chkPS )
+    L3 @ CALLnn, ( chkRS )
+    DE RAMSTART 0x06 + LDdd(nn), ( RAMSTART+0x06 == IP )
+    H D LDrr,
+    L E LDrr,
+    DE INCss,
+    DE INCss,
+    RAMSTART 0x06 + DE LD(nn)dd, ( RAMSTART+0x06 == IP )
+	( HL is an atom list pointer. We need to go into it to
+      have a wordref )
+    E (HL) LDrr,
+    HL INCss,
+    D (HL) LDrr,
+    DE PUSHqq,
+    L2 @ 2 + JPnn, ( EXECUTE, skip nativeWord )
+
+( WORD ROUTINES )
+
+PC ORG @ 0x0f + ! ( compiledWord )
+( Execute a list of atoms, which always end with EXIT.
+  IY points to that list. What do we do:
+  1. Push current IP to RS
+  2. Set new IP to the second atom of the list
+  3. Execute the first atom of the list. )
+    RAMSTART 0x06 + LDHL(nn), ( RAMSTART+0x06 == IP )
+    0x11 CALLnn,     ( 11 == pushRS )
+    IY PUSHqq, HL POPqq,
+    HL INCss,
+    HL INCss,
+    RAMSTART 0x06 + LD(nn)HL, ( RAMSTART+0x06 == IP )
+	( IY still is our atom reference )
+    L 0 IY+ LDrIXY,
+    H 1 IY+ LDrIXY,
+    HL PUSHqq,      ( arg for EXECUTE )
+    L2 @ 2 + JPnn, ( EXECUTE, skip nativeWord )
+
+PC ORG @ 0x0c + ! ( cellWord )
+( Pushes the PFA directly )
+    IY PUSHqq,
+    JPNEXT,
+
+PC ORG @ 0x2c + ! ( doesWord )
+( The word was spawned from a definition word that has a
+  DOES>. PFA+2 (right after the actual cell) is a link to the
+  slot right after that DOES>. Therefore, what we need to do
+  push the cell addr like a regular cell, then follow the
+  linkfrom the PFA, and then continue as a regular
+  compiledWord.
+)
+    IY PUSHqq, ( like a regular cell )
+    L 2 IY+ LDrIXY,
+    H 3 IY+ LDrIXY,
+    HL PUSHqq, IY POPqq,
+    0x0e JPnn, ( 0e == compiledWord )
+
+
+PC ORG @ 0x20 + ! ( numberWord )
+( This is not a word, but a number literal. This works a bit
+  differently than others: PF means nothing and the actual
+  number is placed next to the numberWord reference in the
+  compiled word list. What we need to do to fetch that number
+  is to play with the IP.
+)
+    RAMSTART 0x06 + LDHL(nn), ( RAMSTART+0x06 == IP )
+    E (HL) LDrr,
+    HL INCss,
+    D (HL) LDrr,
+    HL INCss,
+    RAMSTART 0x06 + LD(nn)HL, ( RAMSTART+0x06 == IP )
+    DE PUSHqq,
+    JPNEXT,
+
+PC ORG @ 0x22 + ! ( litWord )
+( Similarly to numberWord, this is not a real word, but a
+  string literal. Instead of being followed by a 2 bytes
+  number, it's followed by a null-terminated string. When
+  called, puts the string's address on PS )
+    RAMSTART 0x06 + LDHL(nn), ( RAMSTART+0x06 == IP )
+    HL PUSHqq,
+    ( skip to null char )
+    A XORr, ( look for null )
+    B A LDrr,
+    C A LDrr,
+    CPIR,
+	( CPIR advances HL regardless of comparison, so goes one
+      char after NULL. This is good, because that's what we
+      want... )
+    RAMSTART 0x06 + LD(nn)HL, ( RAMSTART+0x06 == IP )
+    JPNEXT,
+
+( filler )
+NOP, NOP, NOP, NOP, NOP, NOP,
+
+( DICT HOOK )
+( This dummy dictionary entry serves two purposes:
+  1. Allow binary grafting. Because each binary dict always
+     end with a dummy entry, we always have a predictable
+     prev offset for the grafter's first entry.
+  2. Tell icore's "_c" routine where the boot binary ends.
+     See comment there.
+)
+'_' A, 'b' A, 'e' A, 'n' A, 'd' A,
+PC L2 @ - A,, ( prev )
+5 A,
+
+H@ 256 /MOD 2 PC! 2 PC!
diff --git a/forth/forth.asm b/forth/forth.asm
deleted file mode 100644
index 1148482..0000000
--- a/forth/forth.asm
+++ /dev/null
@@ -1,414 +0,0 @@
-; Collapse OS Forth's boot binary
-
-; *** Const ***
-; Base of the Return Stack
-.equ	RS_ADDR		0xf000
-; Buffer where WORD copies its read word to.
-.equ	WORD_BUFSIZE		0x20
-; Allocated space for sysvars (see comment above SYSVCNT)
-.equ	SYSV_BUFSIZE		0x10
-
-; *** Variables ***
-.equ	INITIAL_SP	RAMSTART
-; wordref of the last entry of the dict.
-.equ	CURRENT		@+2
-; Pointer to the next free byte in dict.
-.equ	HERE		@+2
-; Interpreter pointer. See Execution model comment below.
-.equ	IP		@+2
-; Global flags
-; Bit 0: whether the interpreter is executing a word (as opposed to parsing)
-.equ	FLAGS		@+2
-; Pointer to the system's number parsing function. It points to then entry that
-; had the "(parse)" name at startup. During stage0, it's out builtin PARSE,
-; but at stage1, it becomes "(parse)" from core.fs. It can also be changed at
-; runtime.
-.equ	PARSEPTR	@+2
-; Pointer to the word executed by "C<". During stage0, this points to KEY.
-; However, KEY ain't very interactive. This is why we implement a readline
-; interface in Forth, which we plug in during init. If "(c<)" exists in the
-; dict, CINPTR is set to it. Otherwise, we set KEY
-.equ	CINPTR		@+2
-.equ	WORDBUF		@+2
-; Sys Vars are variables with their value living in the system RAM segment. We
-; need this mechanisms for core Forth source needing variables. Because core
-; Forth source is pre-compiled, it needs to be able to live in ROM, which means
-; that we can't compile a regular variable in it. SYSVNXT points to the next
-; free space in SYSVBUF. Then, at the word level, it's a regular sysvarWord.
-.equ	SYSVNXT		@+WORD_BUFSIZE
-.equ	RAMEND		@+SYSV_BUFSIZE+2
-
-; *** Stable ABI ***
-; Those jumps below are supposed to stay at these offsets, always. If they
-; change bootstrap binaries have to be adjusted because they rely on them.
-; Those entries are referenced directly by their offset in Forth code with a
-; comment indicating what that number refers to.
-;
-; We're at 0 here
-	jp	forthMain
-; 3
-	jp	find
-	nop \ nop	; unused
-; 8
-	nop \ nop	; Placeholder for LATEST
-	nop		; unused
-; 11
-	jp	cellWord
-	jp	compiledWord
-	jp	pushRS
-	jp	popRS
-; 23
-	jp	(iy)	; nativeWord. why use a jump when the real deal is
-	nop		; more compact?
-	jp	next
-	jp	chkPS
-; 32
-	.dw	numberWord
-	.dw	litWord
-	.dw	INITIAL_SP
-	.dw	WORDBUF
-	jp	flagsToBC
-	jp	doesWord
-; 46
-	.dw	RS_ADDR
-	.dw	CINPTR
-	.dw	SYSVNXT
-	.dw	FLAGS
-; 54
-	.dw	PARSEPTR
-	.dw	HERE
-	.dw	CURRENT
-
-; *** Boot dict ***
-; There are only 5 words in the boot dict, but these words' offset need to be
-; stable, so they're part of the "stable ABI"
-
-; Pop previous IP from Return stack and execute it.
-; ( R:I -- )
-	.db	"EXIT"
-	.dw	0
-	.db	4
-EXIT:
-	.dw	23
-	call	popRS
-	ld	(IP), hl
-	jp	next
-
-.fill 3
-
-	.db	"(br)"
-	.dw	$-EXIT
-	.db	4
-BR:
-	.dw	23
-	ld	hl, (IP)
-	ld	e, (hl)
-	inc	hl
-	ld	d, (hl)
-	dec	hl
-	add	hl, de
-	ld	(IP), hl
-	jp	next
-
-	.db	"(?br)"
-	.dw	$-BR
-	.db	5
-CBR:
-	.dw	23
-	pop	hl
-	call	chkPS
-	ld	a, h
-	or	l
-	jr	z, BR+2		; False, branch
-	; True, skip next 2 bytes and don't branch
-	ld	hl, (IP)
-	inc	hl
-	inc	hl
-	ld	(IP), hl
-	jp	next
-
-	.db "EXECUTE"
-	.dw $-CBR
-	.db 7
-EXECUTE:
-	.dw 23
-	pop	iy	; is a wordref
-	call	chkPS
-	ld	l, (iy)
-	ld	h, (iy+1)
-	; HL points to code pointer
-	inc	iy
-	inc	iy
-	; IY points to PFA
-	jp	(hl)	; go!
-
-; Offset: 00a1
-.out $
-; *** End of stable ABI ***
-
-forthMain:
-	; STACK OVERFLOW PROTECTION:
-	; To avoid having to check for stack underflow after each pop operation
-	; (which can end up being prohibitive in terms of costs), we give
-	; ourselves a nice 6 bytes buffer. 6 bytes because we seldom have words
-	; requiring more than 3 items from the stack. Then, at each "exit" call
-	; we check for stack underflow.
-	ld	sp, 0xfffa
-	ld	(INITIAL_SP), sp
-	ld	ix, RS_ADDR
-	; LATEST is a label to the latest entry of the dict. It is written at
-        ; offset 0x08 by the process or person building Forth.
-	ld	hl, (0x08)
-	ld	(CURRENT), hl
-	; For now, we'll always make HERE start right after LATEST. This will
-	; not work on ROM-based system, but I'll adjust later.
-	ld	(HERE), hl
-	ld	hl, .bootName
-	call	find
-	push	de
-	jp	EXECUTE+2
-
-.bootName:
-	.db	"BOOT", 0
-
-; Find the entry corresponding to word where (HL) points to and sets DE to
-; point to that entry.
-; Z if found, NZ if not.
-find:
-	push	bc
-	push	hl
-	; First, figure out string len
-	ld	bc, 0
-	xor	a
-	cpir
-	; C has our length, negative, -1
-	ld	a, c
-	neg
-	dec	a
-	; special case. zero len? we never find anything.
-	jr	z, .fail
-	ld	c, a		; C holds our length
-	; Let's do something weird: We'll hold HL by the *tail*. Because of our
-	; dict structure and because we know our lengths, it's easier to
-	; compare starting from the end. Currently, after CPIR, HL points to
-	; char after null. Let's adjust
-	; Because the compare loop pre-decrements, instead of DECing HL twice,
-	; we DEC it once.
-	dec	hl
-	ld	de, (CURRENT)
-.inner:
-	; DE is a wordref. First step, do our len correspond?
-	push	hl		; --> lvl 1
-	push	de		; --> lvl 2
-	dec	de
-	ld	a, (de)
-	and	0x7f		; remove IMMEDIATE flag
-	cp	c
-	jr	nz, .loopend
-	; match, let's compare the string then
-	dec	de \ dec de	; skip prev field. One less because we
-				; pre-decrement
-	ld	b, c		; loop C times
-.loop:
-	; pre-decrement for easier Z matching
-	dec	de
-	dec	hl
-	ld	a, (de)
-	cp	(hl)
-	jr	nz, .loopend
-	djnz	.loop
-.loopend:
-	; At this point, Z is set if we have a match. In all cases, we want
-	; to pop HL and DE
-	pop	de		; <-- lvl 2
-	pop	hl		; <-- lvl 1
-	jr	z, .end		; match? we're done!
-	; no match, go to prev and continue
-	push	hl			; --> lvl 1
-	dec	de \ dec de \ dec de	; prev field
-	push	de			; --> lvl 2
-	ex 	de, hl
-	ld 	e, (hl)
-	inc 	hl
-	ld 	d, (hl)
-	; DE contains prev offset
-	pop	hl			; <-- lvl 2
-	; HL is prev field's addr
-	; Is offset zero?
-	ld	a, d
-	or	e
-	jr	z, .noprev		; no prev entry
-	; get absolute addr from offset
-	; carry cleared from "or e"
-	sbc	hl, de
-	ex	de, hl			; result in DE
-.noprev:
-	pop	hl			; <-- lvl 1
-	jr	nz, .inner		; try to match again
-	; Z set? end of dict unset Z
-.fail:
-	xor	a
-	inc	a
-.end:
-	pop	hl
-	pop	bc
-	ret
-
-; Checks flags Z and S and sets BC to 0 if Z, 1 if C and -1 otherwise
-flagsToBC:
-	ld	bc, 0
-	ret	z	; equal
-	inc	bc
-	ret	m	; >
-	; <
-	dec	bc
-	dec	bc
-	ret
-
-; Push value HL to RS
-pushRS:
-	inc	ix
-	inc	ix
-	ld	(ix), l
-	ld	(ix+1), h
-	ret
-
-; Pop RS' TOS to HL
-popRS:
-	ld	l, (ix)
-	ld	h, (ix+1)
-	dec ix
-	dec ix
-	ret
-
-; Verifies that SP and RS are within bounds. If it's not, call ABORT
-chkRS:
-	push	ix \ pop hl
-	push	de		; --> lvl 1
-	ld	de, RS_ADDR
-	or	a		; clear carry
-	sbc	hl, de
-	pop	de		; <-- lvl 1
-	jp	c, abortUnderflow
-	ret
-
-chkPS:
-	push	hl
-	ld	hl, (INITIAL_SP)
-	; We have the return address for this very call on the stack and
-	; protected registers. Let's compensate
-	dec	hl \ dec hl
-	dec	hl \ dec hl
-	or	a		; clear carry
-	sbc	hl, sp
-	pop	hl
-	ret	nc		; (INITIAL_SP) >= SP? good
-	jp	abortUnderflow
-
-abortUnderflow:
-	ld	hl, .name
-	call	find
-	push	de
-	jp	EXECUTE+2
-.name:
-	.db "(uflw)", 0
-
-; This routine is jumped to at the end of every word. In it, we jump to current
-; IP, but we also take care of increasing it my 2 before jumping
-next:
-	; Before we continue: are stacks within bounds?
-	call	chkPS
-	call	chkRS
-	ld	de, (IP)
-	ld	h, d
-	ld	l, e
-	inc	de \ inc de
-	ld	(IP), de
-	; HL is an atom list pointer. We need to go into it to have a wordref
-	ld	e, (hl)
-	inc	hl
-	ld	d, (hl)
-	push	de
-	jp	EXECUTE+2
-
-
-; *** Word routines ***
-
-; Execute a list of atoms, which always end with EXIT.
-; IY points to that list. What do we do:
-; 1. Push current IP to RS
-; 2. Set new IP to the second atom of the list
-; 3. Execute the first atom of the list.
-compiledWord:
-	ld	hl, (IP)
-	call	pushRS
-	push	iy \ pop hl
-	inc	hl
-	inc	hl
-	ld	(IP), hl
-	; IY still is our atom reference...
-	ld	l, (iy)
-	ld	h, (iy+1)
-	push	hl	; argument for EXECUTE
-	jp	EXECUTE+2
-
-; Pushes the PFA directly
-cellWord:
-	push	iy
-	jp	next
-
-; The word was spawned from a definition word that has a DOES>. PFA+2 (right
-; after the actual cell) is a link to the slot right after that DOES>.
-; Therefore, what we need to do push the cell addr like a regular cell, then
-; follow the link from the PFA, and then continue as a regular compiledWord.
-doesWord:
-	push	iy	; like a regular cell
-	ld	l, (iy+2)
-	ld	h, (iy+3)
-	push	hl \ pop iy
-	jr	compiledWord
-
-; This is not a word, but a number literal. This works a bit differently than
-; others: PF means nothing and the actual number is placed next to the
-; numberWord reference in the compiled word list. What we need to do to fetch
-; that number is to play with the IP.
-numberWord:
-	ld	hl, (IP)	; (HL) is out number
-	ld	e, (hl)
-	inc	hl
-	ld	d, (hl)
-	inc	hl
-	ld	(IP), hl	; advance IP by 2
-	push	de
-	jp	next
-
-; Similarly to numberWord, this is not a real word, but a string literal.
-; Instead of being followed by a 2 bytes number, it's followed by a
-; null-terminated string. When called, puts the string's address on PS
-litWord:
-	ld	hl, (IP)
-	push	hl
-	; Skip to null char
-	xor	a	; look for null char
-	ld	b, a
-	ld	c, a
-	cpir
-	; CPIR advances HL regardless of comparison, so goes one char after
-	; NULL. This is good, because that's what we want...
-	ld	(IP), hl
-	jp	next
-
-.fill 3
-; *** Dict hook ***
-; This dummy dictionary entry serves two purposes:
-; 1. Allow binary grafting. Because each binary dict always end with a dummy
-;    entry, we always have a predictable prev offset for the grafter's first
-;    entry.
-; 2. Tell icore's "_c" routine where the boot binary ends. See comment there.
-
-	.db	"_bend"
-	.dw	$-EXECUTE
-	.db	5
-
-; Offset: 01c3
-.out $
diff --git a/forth/z80a.fs b/forth/z80a.fs
index 463f8a2..48e2fbf 100644
--- a/forth/z80a.fs
+++ b/forth/z80a.fs
@@ -5,8 +5,15 @@
     256 /MOD SWAP
 ;
 
-( A, spits an assembled byte, A,, spits an assembled word )
-( To debug, change C, to .X )
+
+( H@ offset at which we consider our PC 0. Used to compute
+  PC. To have a proper PC, call  "H@ ORG !" at the beginning
+  of your assembly process. )
+(sysv) ORG
+: PC H@ ORG @ - ;
+
+( A, spits an assembled byte, A,, spits an assembled word
+  Both increase PC. To debug, change C, to .X )
 : A, C, ;
 : A,, SPLITB A, A, ;
 
@@ -19,12 +26,14 @@
 
   To avoid using dict memory in compilation targets, we
   pre-declare label variables here, which means we have a
-  limited number of it. For now, 4 ought to be enough. )
+  limited number of it. For now, 6 ought to be enough. )
 
 (sysv) L1
 (sysv) L2
 (sysv) L3
 (sysv) L4
+(sysv) L5
+(sysv) L6
 
 ( There are 2 label types: backward and forward. For each
   type, there are two actions: set and write. Setting a label
@@ -49,11 +58,17 @@
   another byte before writing the offset.
 )
 
-: BSET H@ SWAP ! ;
-: BWR @ H@ - 1 - A, ;
+: BSET PC SWAP ! ;
+: BWR @ PC - 1 - A, ;
 ( same as BSET, but we need to write a placeholder )
 : FWR BSET 0 A, ;
-: FSET @ DUP H@ -^ 1 - SWAP C! ;
+: FSET
+    @ DUP PC        ( l l pc )
+    -^ 1 -          ( l off )
+    ( warning: l is a PC offset, not a mem addr! )
+    SWAP ORG @ +    ( off addr )
+    C!
+;
 
 
 ( "r" register constants )
@@ -105,6 +120,7 @@
 0xe9 OP1 JP(HL),
 0x12 OP1 LD(DE)A,
 0x1a OP1 LDA(DE),
+0x00 OP1 NOP,
 0xc9 OP1 RET,
 0x17 OP1 RLA,
 0x07 OP1 RLCA,
@@ -199,6 +215,10 @@
     LDIXYr,
 ;
 
+: OP2 CREATE , DOES> @ 256 /MOD A, A, ;
+0xedb1 OP2 CPIR,
+0xed44 OP2 NEG,
+
 ( n -- )
 : OP2n
     CREATE C,
@@ -208,6 +228,8 @@
 0xd3 OP2n OUTnA,
 0xdb OP2n INAn,
 0xc6 OP2n ADDn,
+0xe6 OP2n ANDn,
+0xf6 OP2n Orn,
 0xd6 OP2n SUBn,
 
 ( r n -- )
@@ -316,6 +338,9 @@
     A,,
 ;
 
+: JP(IX), IX DROP JP(HL), ;
+: JP(IY), IY DROP JP(HL), ;
+
 ( 26 == next )
 : JPNEXT, 26 JPnn, ;