Browse code

Added libraries for math32

neauoire authored on 07/02/2022 23:52:22
Showing 4 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,137 @@
1
+( mandelbrot )
2
+
3
+%+  { ADD }  %-  { SUB }  %*  { MUL }  %/  { DIV }
4
+%<  { LTH }  %>  { GTH }  %=  { EQU }  %!  { NEQ }
5
+%++ { ADD2 } %-- { SUB2 } %** { MUL2 } %// { DIV2 }
6
+%<< { LTH2 } %>> { GTH2 } %== { EQU2 } %!! { NEQ2 }
7
+%AUTO-X    { #01 .Screen/auto DEO }
8
+%NEXT-LINE { #0000 .Screen/x DEO2 .Screen/y DEI2k INC2 ROT DEO2 }
9
+
10
+%XMIN { #de69 } ( -8601 )
11
+%XMAX { #0b33 } ( 2867 )
12
+%YMIN { #ecc7 } ( -4915 )
13
+%YMAX { #1333 } ( 4915 )
14
+%MAXI { #20 } ( 32 )
15
+%DX { XMAX XMIN -- #004f // } ( (XMAX-XMIN)/79 )
16
+%DY { YMAX YMIN -- #0018 // } ( (YMAX-YMIN)/24 )
17
+%X { .x LDZ2 }   %Y { .y LDZ2 }
18
+%X2 { .x2 LDZ2 } %Y2 { .y2 LDZ2 }
19
+
20
+%GTS2 { #8000 ++ SWP2 #8000 ++ << }
21
+
22
+%HALT   { #010f DEO }
23
+%EMIT   { #18 DEO }
24
+%PRINT  { ;print-str JSR2 #0a EMIT }
25
+%DEBUG  { ;print-hex/byte JSR2 #0a EMIT }
26
+%DEBUG2 { ;print-hex JSR2 #0a EMIT }
27
+
28
+|00 @System     &vector $2 &wst      $1 &rst    $1 &eaddr  $2 &ecode  $1 &pad     $1 &r       $2 &g      $2 &b     $2 &debug  $1 &halt $1
29
+|20 @Screen     &vector $2 &width    $2 &height $2 &auto   $1 &pad    $1 &x       $2 &y       $2 &addr   $2 &pixel $1 &sprite $1
30
+
31
+|0000 ( zero-page )
32
+
33
+@x  $2 @y  $2
34
+@x2 $2 @y2 $2
35
+
36
+|0100 ( -> )
37
+
38
+	( theme ) 
39
+	#048c .System/r DEO2 
40
+	#048c .System/g DEO2 
41
+	#048c .System/b DEO2
42
+
43
+	#0280 .Screen/width DEO2 ( 640 )
44
+	#01e0 .Screen/height DEO2 ( 480 )
45
+
46
+	#0000 .Screen/x DEO2
47
+	#0000 .Screen/y DEO2
48
+
49
+	AUTO-X
50
+	;draw-mandel JSR2
51
+
52
+BRK
53
+
54
+@draw-mandel ( -- )
55
+
56
+	YMAX YMIN
57
+	&ver
58
+		DUP2 ,&y STR2
59
+		XMAX XMIN
60
+		&hor
61
+			DUP2 ,&x STR2
62
+			#0000 DUP2 DUP2 DUP2 .x STZ2 .y STZ2 .x2 STZ2 .y2 STZ2
63
+			MAXI #00
64
+			&loop
65
+				X Y ;smul2 JSR2 #0b SFT2 [ LIT2 &y $2 ] ++ .y STZ2
66
+				X2 Y2 -- [ LIT2 &x $2 ] ++ .x STZ2
67
+				X X ;smul2 JSR2 #0c SFT2 .x2 STZ2
68
+				Y Y ;smul2 JSR2 #0c SFT2 .y2 STZ2
69
+				X2 Y2 ++ >> #4000 ,&end JCN
70
+				INC GTHk ,&loop JCN
71
+				&end
72
+			NIP POP #03 .Screen/pixel DEO
73
+			DX ++ OVR2 OVR2 GTS2 ;&hor JCN2
74
+		POP2 POP2
75
+		NEXT-LINE
76
+		DY ++ OVR2 OVR2 GTS2 ;&ver JCN2
77
+	POP2 POP2
78
+
79
+JMP2r
80
+
81
+@print-hex ( value* -- )
82
+
83
+	SWP ,&byte JSR 
84
+	&byte ( byte -- )
85
+		STHk #04 SFT ,&parse JSR #18 DEO
86
+		STHr #0f AND ,&parse JSR #18 DEO
87
+	JMP2r
88
+	&parse ( byte -- char ) DUP #09 GTH ,&above JCN #30 ADD JMP2r 
89
+	&above #57 ADD JMP2r
90
+
91
+JMP2r
92
+
93
+@smul2 ( a* b* -- c* )
94
+
95
+	OVR2 POP #80 AND #07 SFT STH 
96
+	OVR #80 AND #07 SFT STHr ADD #01 AND ,&sign STR
97
+	#10 SFT2 #01 SFT2
98
+	SWP2
99
+	#10 SFT2 #01 SFT2
100
+	MUL2
101
+	,&sign LDR ,&flip JCN
102
+		JMP2r
103
+		&flip
104
+	#0000 SWP2 --
105
+
106
+JMP2r
107
+	&sign $1
108
+
109
+@sprites
110
+	0000 0000 0000 0000 0000 0000 0000 0000
111
+	0000 0018 1800 0000 0000 0000 0000 0000
112
+	0000 183c 3c18 0000 0000 0000 0000 0000
113
+	0018 3c7e 7e3c 1800 0000 0000 0000 0000
114
+	183c 7eff ff7e 3c18 0000 0000 0000 0000
115
+	3c7e ffff ffff 7e3c 0000 0000 0000 0000
116
+	7eff ffff ffff ff7e 0000 0000 0000 0000
117
+	ffff ffff ffff ffff 0000 0000 0000 0000
118
+	ffff ffe7 e7ff ffff 0000 0018 1800 0000
119
+	ffff e7c3 c3e7 ffff 0000 183c 3c18 0000
120
+	ffe7 c381 81c3 e7ff 0018 3c7e 7e3c 1800
121
+	e7c3 8100 0081 c3e7 183c 7eff ff7e 3c18
122
+	c381 0000 0000 81c3 3c7e ffff ffff 7e3c
123
+	8100 0000 0000 0081 7eff ffff ffff ff7e
124
+	0000 0000 0000 0000 ffff ffff ffff ffff
125
+	0000 0018 1800 0000 ffff ffff ffff ffff
126
+	0000 183c 3c18 0000 ffff ffff ffff ffff
127
+	0018 3c7e 7e3c 1800 ffff ffff ffff ffff
128
+	183c 7eff ff7e 3c18 ffff ffff ffff ffff
129
+	3c7e ffff ffff 7e3c ffff ffff ffff ffff
130
+	7eff ffff ffff ff7e ffff ffff ffff ffff
131
+	ffff ffff ffff ffff ffff ffff ffff ffff
132
+	ffff ffe7 e7ff ffff ffff ffe7 e7ff ffff
133
+	ffff e7c3 c3e7 ffff ffff e7c3 c3e7 ffff
134
+	ffe7 c381 81c3 e7ff ffe7 c381 81c3 e7ff
135
+	e7c3 8100 0081 c3e7 e7c3 8100 0081 c3e7
136
+	c381 0000 0000 81c3 c381 0000 0000 81c3
137
+	8100 0000 0000 0081 8100 0000 0000 0081
0 138
new file mode 100644
... ...
@@ -0,0 +1,243 @@
1
+%BYE { #01 .System/halt DEO BRK }
2
+%DEBUG { #ab .System/debug DEO }
3
+%IN-RANGE { ROT INCk SWP SUB2 GTH }
4
+%MOD { DIVk MUL SUB }
5
+%MOD2 { DIV2k MUL2 SUB2 }
6
+%NL { #0a .Console/write DEO }
7
+%SP { #20 .Console/write DEO }
8
+
9
+@print-string ( string* -- )
10
+	LDAk ,&not-end JCN
11
+	POP2 JMP2r
12
+	&not-end
13
+	LDAk .Console/write DEO
14
+	INC2
15
+	,print-string JMP
16
+
17
+@print-short-decimal ( short* -- )
18
+	#03e8 DIV2k
19
+		DUP ,print-byte-decimal/second JSR
20
+		MUL2 SUB2
21
+	#0064 DIV2k
22
+		DUP ,print-byte-decimal/third JSR
23
+		MUL2 SUB2
24
+	NIP ,print-byte-decimal/second JMP
25
+
26
+@print-byte-decimal ( byte -- )
27
+	#64 DIVk DUP #30 ADD .Console/write DEO MUL SUB
28
+	&second
29
+	#0a DIVk DUP #30 ADD .Console/write DEO MUL SUB
30
+	&third
31
+	             #30 ADD .Console/write DEO
32
+	JMP2r
33
+
34
+@print-32z-hex ( 32-zp -- )
35
+	#00 SWP
36
+	,print-32-hex JMP
37
+
38
+@print-64z-hex ( 64-zp -- )
39
+	#00 SWP
40
+	( fall through )
41
+
42
+@print-64-hex ( 64-ptr* -- )
43
+	DUP2 #0004 ADD2 SWP2 ( lo32-ptr* hi32-ptr* )
44
+	,print-32-hex JSR
45
+	( fall through )
46
+
47
+@print-32-hex ( 32-ptr* -- )
48
+	INC2k INC2 SWP2 ( lo-ptr* hi-ptr* )
49
+	LDA2 ,print-short-hex JSR
50
+	LDA2 ( fall through )
51
+
52
+@print-short-hex ( short* -- )
53
+	SWP ,print-byte-hex JSR
54
+	( fall through )
55
+
56
+@print-byte-hex ( byte -- )
57
+	DUP #04 SFT ,print-nibble-hex JSR
58
+	#0f AND ( fall through )
59
+
60
+@print-nibble-hex ( nibble -- )
61
+	#30 ADD DUP #39 GTH #07 MUL ADD .Console/write DEO
62
+	JMP2r
63
+
64
+@next-input-byte ( -- number 00
65
+                   OR 01 at end of file )
66
+	,next-input-short JSR ,&eof JCN
67
+	NIP #00
68
+	JMP2r
69
+
70
+	&eof
71
+	#01
72
+	JMP2r
73
+
74
+@next-input-short ( -- number* 00
75
+                    OR 01 at end of file )
76
+	LIT2 &ptr :heap
77
+	LIT2r 0000
78
+	&ffwd
79
+	LDAk #3039 IN-RANGE ,&number JCN
80
+	INC2k SWP2 LDA ,&ffwd JCN
81
+	( eof )
82
+	POP2 POP2r
83
+	;heap ,&ptr STR2
84
+	#01 JMP2r
85
+
86
+	&number
87
+	LIT2r 000a MUL2r
88
+	LDAk #30 SUB #00 STH STH ADD2r
89
+	INC2
90
+	LDAk #3039 IN-RANGE ,&number JCN
91
+
92
+	,&ptr STR2
93
+	STH2r #00
94
+	JMP2r
95
+
96
+@add64 ( dest-ptr* src-ptr* -- carry )
97
+	OVR2 #0004 ADD2 OVR2 #0004 ADD2
98
+	,add32 JSR
99
+	( fall through )
100
+
101
+@adc32 ( dest-ptr* src-ptr* carry -- carry )
102
+	STH
103
+	OVR2 #0002 ADD2 OVR2 #0002 ADD2
104
+	STHr ,adc16 JSR
105
+	,adc16 JMP ( tail call )
106
+
107
+@add64z ( dest-zp src-zp -- carry )
108
+	OVR #04 ADD OVR #04 ADD
109
+	,add32z JSR
110
+	( fall through )
111
+
112
+@adc32z ( dest-zp src-zp carry -- carry )
113
+	STH
114
+	OVR #02 ADD OVR #02 ADD
115
+	STHr ,adc16z JSR
116
+	,adc16z JMP ( tail call )
117
+
118
+@add32z-short ( dest-zp src* -- carry )
119
+	#00 SWP SWP2 ROT
120
+	( fall through )
121
+
122
+@add32-short ( dest-ptr* src* -- carry )
123
+	,&short STR2
124
+	;&src ,add32 JMP ( tail call )
125
+
126
+	&src 0000 &short 0000
127
+
128
+@add32 ( dest-ptr* src-ptr* -- carry )
129
+	OVR2 #0002 ADD2 OVR2 #0002 ADD2
130
+	,add16 JSR
131
+	( fall through )
132
+
133
+@adc16 ( dest-ptr* src-ptr* carry -- carry )
134
+	#00 EQU ,add16 JCN
135
+	OVR2 ;&one ,add16 JSR STH
136
+	,add16 JSR
137
+	STHr ORA
138
+	JMP2r
139
+
140
+	&one 0001
141
+
142
+@add16 ( dest-ptr* src-ptr* -- carry )
143
+	OVR2 LDA2 DUP2 ROT2 LDA2 ( dest-ptr* dest* dest* src* )
144
+	ADD2 GTH2k STH NIP2 ( dest-ptr* sum* / carry )
145
+	SWP2 STA2 STHr ( carry )
146
+	JMP2r
147
+
148
+@add32z ( dest-zp src-zp -- carry )
149
+	OVR #02 ADD OVR #02 ADD
150
+	,add16z JSR
151
+	( fall through )
152
+
153
+@adc16z ( dest-zp src-zp carry -- carry )
154
+	#00 EQU ,add16z JCN
155
+	OVR #00 SWP ;adc16/one ,add16 JSR STH
156
+	,add16z JSR
157
+	STHr ORA
158
+	JMP2r
159
+
160
+@add16z ( dest-zp src-zp -- carry )
161
+	OVR LDZ2 ROT LDZ2 OVR2 ( dest-zp dest* src* dest* )
162
+	ADD2 GTH2k STH NIP2 ( dest-zp sum* / carry )
163
+	ROT STZ2 STHr ( carry )
164
+	JMP2r
165
+
166
+@gth64 ( left-ptr* right-ptr* -- 01 if left > right
167
+                              OR 00 otherwise )
168
+	OVR2 OVR2 ,gth32 JSR ,&greater JCN
169
+	OVR2 OVR2 SWP2 ,gth32 JSR ,&less JCN
170
+	#0004 ADD2 SWP2 #0004 ADD2 SWP2 ,gth32 JMP ( tail call )
171
+
172
+	&greater POP2 POP2 #01 JMP2r
173
+	&less    POP2 POP2 #00 JMP2r
174
+
175
+@gth32z ( left-zp* right-zp* -- 01 if left > right
176
+                             OR 00 otherwise )
177
+	#00 ROT ROT #00 SWP
178
+	( fall through )
179
+
180
+@gth32 ( left-ptr* right-ptr* -- 01 if left > right
181
+                              OR 00 otherwise )
182
+	OVR2 LDA2 OVR2 LDA2 ( left-ptr* right-ptr* left* right* )
183
+	EQU2k ,&lo JCN
184
+	GTH2 NIP2 NIP NIP
185
+	JMP2r
186
+
187
+	&lo
188
+	POP2 POP2
189
+	INC2 INC2 LDA2 SWP2 INC2 INC2 LDA2 ( right-lo* left-lo* )
190
+	LTH2
191
+	JMP2r
192
+
193
+@add32z-short-short-mul ( dest-zp a* b* -- carry )
194
+	STH2 STH2 #00 SWP STH2r STH2r
195
+	( fall through )
196
+
197
+@add32-short-short-mul ( dest-ptr* a* b* -- carry )
198
+	LITr 00 STH LITr 00 STH ( dest-ptr* a* / blo* bhi* )
199
+	#00 ROT ROT #00 SWP ( dest-ptr* ahi* alo* / blo* bhi* )
200
+	STH2kr OVR2 MUL2 ,&alo-bhi STR2
201
+	OVR2 STH2r MUL2 ,&ahi-bhi STR2 ( dest-ptr ahi* alo* / blo* )
202
+	STH2kr MUL2 ,&alo-blo STR2 ( dest-ptr* ahi* / blo* )
203
+	STH2r MUL2 ,&ahi-blo STR2 ( dest-ptr* )
204
+	DUP2 ;&sum1 ;add32 JSR2 STH
205
+	DUP2 ;&sum2 ;add32 JSR2 STH
206
+	     ;&sum3 ;add32 JSR2
207
+	STH2r ORA ORA
208
+	JMP2r
209
+
210
+	&sum1 &ahi-bhi 0000 &alo-blo 0000
211
+	&sum2 00 &ahi-blo 0000 00
212
+	&sum3 00 &alo-bhi 0000 00
213
+
214
+@zero64 ( ptr* -- )
215
+	#08 ,zero JMP ( tail call )
216
+
217
+@zero32z ( zp -- )
218
+	#00 SWP
219
+	( fall through )
220
+
221
+@zero32 ( ptr* -- )
222
+	#04
223
+	( fall through )
224
+
225
+@zero ( ptr* len -- )
226
+	#00 SWP ADD2k NIP2 SWP2
227
+	&loop
228
+	DUP2 #00 ROT ROT STA
229
+	INC2
230
+	GTH2k ,&loop JCN
231
+	POP2 POP2
232
+	JMP2r
233
+
234
+@is-nonzero64 ( ptr* -- flag )
235
+	DUP2 ,is-nonzero32 JSR STH
236
+	#0004 ADD2 ,is-nonzero32 JSR STHr ORA
237
+	JMP2r
238
+
239
+@is-nonzero32 ( ptr* -- flag )
240
+	LDA2k ORA STH
241
+	INC2 INC2 LDA2 ORA STHr ORA
242
+	JMP2r
243
+
0 244
new file mode 100644
... ...
@@ -0,0 +1,435 @@
1
+( math32.tal )
2
+( )
3
+( This library supports arithmetic on 32-bit unsigned integers, )
4
+( also known as long values. )
5
+( )
6
+( 32-bit long values are represented by two 16-bit short values: )
7
+( )
8
+(      decimal  hexadecimal  uxn literals )
9
+(            0   0x00000000   #0000 #0000 )
10
+(            1   0x00000001   #0000 #0001 )
11
+(         4660   0x00001234   #0000 #1234 )
12
+(        65535   0x0000ffff   #0000 #ffff )
13
+(        65536   0x00010000   #0001 #0000 )
14
+(     16777215   0x00ffffff   #00ff #ffff )
15
+(   4294967295   0xffffffff   #ffff #ffff )
16
+( )
17
+( The most significant 16-bit, the "high bits", are stored first. )
18
+( We document long values as x** -- equivalent to xhi* xlo*. )
19
+( )
20
+( Operations supported: )
21
+( )
22
+(   NAME            STACK EFFECT        DEFINITION       )
23
+(   add32           x** y** -> z**      x + y            )
24
+(   sub32           x** y** -> z**      x - y            )
25
+(   mul16           x*  y*  -> z**      x * y            )
26
+(   mul32           x** y** -> z**      x * y            )
27
+(   div32           x** y** -> q**      x / y            )
28
+(   mod32           x** y** -> r**      x % y            )
29
+(   divmod32        x** y** -> q** r**  x / y, x % y     )
30
+(   gcd32           x** y** -> z**      gcd(x, y)        )
31
+(   negate32        x**     -> z**      -x               )
32
+(   lshift32        x** n^  -> z**      x<<n             )
33
+(   rshift32        x** n^  -> z**      x>>n             )
34
+(   and32           x** y** -> z**      x & y            )
35
+(   or32            x** y** -> z**      x | y            )
36
+(   xor32           x** y** -> z**      x ^ y            )
37
+(   complement32    x**     -> z**      ~x               )
38
+(   eq32            x** y** -> bool^    x == y           )
39
+(   ne32            x** y** -> bool^    x != y           )
40
+(   is-zero32       x**     -> bool^    x == 0           )
41
+(   non-zero32      x**     -> bool^    x != 0           )
42
+(   lt32            x** y** -> bool^    x < y            )
43
+(   gt32            x** y** -> bool^    x > y            )
44
+(   lteq32          x** y** -> bool^    x <= y           )
45
+(   gteq32          x** y** -> bool^    x >= y           )
46
+(   bitcount8       x^      -> bool^    floor(log2(x))+1 )
47
+(   bitcount16      x*      -> bool^    floor(log2(x))+1 )
48
+(   bitcount32      x**     -> bool^    floor(log2(x))+1 )
49
+( )
50
+( In addition to the code this file uses 44 bytes of registers )
51
+( to store temporary state: )
52
+( )
53
+(   - shared memory, 16 bytes )
54
+(   - mul32 memory, 12 bytes )
55
+(   - _divmod32 memory, 16 bytes )
56
+
57
+%DEBUG { #ff #0e DEO }
58
+%RTN  { JMP2r }
59
+%TOR { ROT ROT } ( a b c -> c a b )
60
+%COMPLEMENT32 { SWP2 #ffff EOR2 SWP2 #ffff EOR2 }
61
+%DUP4 { OVR2 OVR2 }
62
+%POP4 { POP2 POP2 }
63
+
64
+( bitcount: number of bits needed to represent number )
65
+( equivalent to floor[log2[x]] + 1 )
66
+
67
+@bitcount8 ( x^ -> n^ )
68
+    #00 SWP ( n x )
69
+    &loop
70
+    DUP #00 EQU ( n x x=0 )
71
+    ,&done JCN ( n x )
72
+    #01 SFT ( n x>>1 )
73
+    SWP INC SWP ( n+1 x>>1 )
74
+    ,&loop JMP
75
+    &done
76
+    POP ( n )
77
+    RTN
78
+
79
+@bitcount16 ( x* -> n^ )
80
+    SWP ( xlo xhi )
81
+    ;bitcount8 JSR2 ( xlo nhi )
82
+    DUP #00 NEQ ( xlo nhi nhi!=0 )
83
+    ,&hi-set JCN ( xlo nhi )
84
+    SWP ;bitcount8 JSR2 ADD ( nhi+nlo )
85
+    RTN 
86
+    &hi-set
87
+    SWP POP #08 ADD ( nhi+8 )
88
+    RTN
89
+
90
+@bitcount32 ( x** -> n^ )
91
+    SWP2 ( xlo* xhi* )
92
+    ;bitcount16 JSR2 ( xlo* nhi )
93
+    DUP #00 NEQ ( xlo* nhi nhi!=0 )
94
+    ,&hi-set JCN ( xlo* nhi )
95
+    TOR ;bitcount16 JSR2 ADD RTN ( nhi+nlo )
96
+    &hi-set
97
+    TOR POP2 #10 ADD ( nhi+16 )    
98
+    RTN
99
+
100
+( equality )
101
+
102
+( x == y )
103
+@eq32 ( xhi* xlo* yhi* ylo* -> bool^ )
104
+    ROT2 EQU2 STH
105
+    EQU2 STHr AND RTN
106
+
107
+( x != y )
108
+@ne32 ( xhi* xlo* yhi* ylo* -> bool^ )
109
+    ROT2 NEQ2 STH
110
+    NEQ2 STHr ORA RTN
111
+
112
+( x == 0 )
113
+@is-zero32 ( x** -> bool^ )
114
+    ORA2 #0000 EQU2 RTN
115
+
116
+( x != 0 )
117
+@non-zero32 ( x** -> bool^ )
118
+    ORA2 #0000 NEQ2 RTN
119
+
120
+( comparisons )
121
+
122
+( x < y )
123
+@lt32 ( x** y** -> bool^ )
124
+    ROT2 SWP2 ( xhi yhi xlo ylo )
125
+    LTH2 ,&lt-lo JCN ( xhi yhi )
126
+    LTH2 RTN
127
+    &lt-lo
128
+    GTH2 #00 EQU RTN
129
+
130
+( x <= y )
131
+@lteq32 ( x** y** -> bool^ )
132
+    ROT2 SWP2 ( xhi yhi xlo ylo )
133
+    GTH2 ,&gt-lo JCN ( xhi yhi )
134
+    GTH2 #00 EQU RTN
135
+    &gt-lo
136
+    LTH2 RTN
137
+
138
+( x > y )
139
+@gt32 ( x** y** -> bool^ )
140
+    ROT2 SWP2 ( xhi yhi xlo ylo )
141
+    GTH2 ,&gt-lo JCN ( xhi yhi )
142
+    GTH2 RTN
143
+    &gt-lo
144
+    LTH2 #00 EQU RTN
145
+
146
+( x > y )
147
+@gteq32 ( x** y** -> bool^ )
148
+    ROT2 SWP2 ( xhi yhi xlo ylo )
149
+    LTH2 ,&lt-lo JCN ( xhi yhi )
150
+    LTH2 #00 EQU RTN
151
+    &lt-lo
152
+    GTH2 RTN
153
+
154
+( bitwise operations )
155
+
156
+( x & y )
157
+@and32 ( xhi* xlo* yhi* ylo* -> xhi|yhi* xlo|ylo* )
158
+    ROT2 AND2 STH2 AND2 STH2r RTN
159
+
160
+( x | y )
161
+@or32 ( xhi* xlo* yhi* ylo* -> xhi|yhi* xlo|ylo* )
162
+    ROT2 ORA2 STH2 ORA2 STH2r RTN
163
+
164
+( x ^ y )
165
+@xor32 ( xhi* xlo* yhi* ylo* -> xhi|yhi* xlo|ylo* )
166
+    ROT2 EOR2 STH2 EOR2 STH2r RTN
167
+
168
+( ~x )
169
+@complement32 ( x** -> ~x** )
170
+    COMPLEMENT32 RTN
171
+
172
+( temporary registers )
173
+( shared by most operations, except mul32 and div32 )
174
+[ @x0 $1 @x1 $1 @x2 $1 @x3 $1
175
+  @y0 $1 @y1 $1 @y2 $1 @y3 $1
176
+  @z0 $1 @z1 $1 @z2 $1 @z3 $1
177
+  @w0 $1 @w1 $1 @w2 $2 ]
178
+
179
+( bit shifting )
180
+
181
+( x >> n )
182
+@rshift32 ( x** n^ -> x<<n )
183
+    DUP #08 LTH ;rshift32-0 JCN2 ( x n )
184
+    DUP #10 LTH ;rshift32-1 JCN2 ( x n )
185
+    DUP #18 LTH ;rshift32-2 JCN2 ( x n )
186
+    ;rshift32-3 JMP2 ( x n )
187
+    RTN
188
+
189
+( shift right by 0-7 bits )
190
+@rshift32-0 ( x** n^ -> x<<n )
191
+        STHk  SFT                      ;z3 STA  ( write z3 )
192
+    #00 STHkr SFT2 #00 ;z3 LDA ORA2 ;z2 STA2 ( write z2,z3 )
193
+    #00 STHkr SFT2 #00 ;z2 LDA ORA2 ;z1 STA2 ( write z1,z2 )
194
+    #00 STHr  SFT2 #00 ;z1 LDA ORA2             ( compute z0,z1 )
195
+    ;z2 LDA2
196
+    RTN
197
+
198
+( shift right by 8-15 bits )
199
+@rshift32-1 ( x** n^ -> x<<n )
200
+    #08 SUB STH POP 
201
+        STHkr SFT                      ;z3 STA  ( write z3 )
202
+    #00 STHkr SFT2 #00 ;z3 LDA ORA2 ;z2 STA2 ( write z2,z3 )
203
+    #00 STHr  SFT2 #00 ;z2 LDA ORA2             ( compute z1,z2 )
204
+    #00 TOR ;z3 LDA
205
+    RTN
206
+
207
+( shift right by 16-23 bits )
208
+@rshift32-2 ( x** n^ -> x<<n )
209
+    #10 SUB STH POP2
210
+        STHkr SFT                      ;z3 STA ( write z3 )
211
+    #00 STHr  SFT2 #00 ;z3 LDA ORA2            ( compute z2,z3 )
212
+    #0000 SWP2
213
+    RTN
214
+
215
+( shift right by 16-23 bits )
216
+@rshift32-3 ( x** n^ -> x<<n )
217
+    #18 SUB STH POP2 POP ( x0 )
218
+    #00 SWP #0000 SWP2 ( 00 00 00 x0 )
219
+    STHr SFT
220
+    RTN
221
+
222
+( x << n )
223
+@lshift32 ( x** n^ -> x<<n )
224
+    DUP #08 LTH ;lshift32-0 JCN2 ( x n )
225
+    DUP #10 LTH ;lshift32-1 JCN2 ( x n )
226
+    DUP #18 LTH ;lshift32-2 JCN2 ( x n )
227
+    ;lshift32-3 JMP2 ( x n )
228
+    RTN
229
+
230
+( shift left by 0-7 bits )
231
+@lshift32-0 ( x** n^ -> x<<n )
232
+    #40 SFT STH ( stash n<<4 )
233
+    #00 SWP STHkr SFT2                     ;z2 STA2 ( store z2,z3 )
234
+    #00 SWP STHkr SFT2 #00 ;z2 LDA ORA2 ;z1 STA2 ( store z1,z2 )
235
+    #00 SWP STHkr SFT2 #00 ;z1 LDA ORA2 ;z0 STA2 ( store z0,z1 )
236
+            STHr  SFT      ;z0 LDA ORA              ( calculate z0 )
237
+    ;z1 LDA ;z2 LDA2
238
+    RTN
239
+
240
+( shift left by 8-15 bits )
241
+@lshift32-1 ( x** n^ -> x<<n )
242
+    #08 SUB #40 SFT STH ( stash [n-8]<<4 )
243
+    #00 SWP STHkr SFT2                     ;z1 STA2 ( store z1,z2 )
244
+    #00 SWP STHkr SFT2 #00 ;z1 LDA ORA2 ;z0 STA2 ( store z0,z1 )
245
+            STHr  SFT      ;z0 LDA ORA              ( calculate z0 )
246
+    SWP POP ( x0 unused )
247
+    ;z1 LDA2 #00
248
+    RTN
249
+
250
+( shift left by 16-23 bits )
251
+@lshift32-2 ( x** n^ -> x<<n )
252
+    #10 SUB #40 SFT STH ( stash [n-16]<<4 )
253
+    #00 SWP STHkr SFT2                ;z0 STA2 ( store z0,z1 )
254
+            STHr  SFT  ;z0 LDA ORA             ( calculate z0 )
255
+    STH POP2 STHr
256
+    ;z1 LDA #0000
257
+    RTN
258
+
259
+( shift left by 24-31 bits )
260
+@lshift32-3 ( x** n^ -> x<<n )
261
+    #18 SUB #40 SFT ( x0 x1 x2 x3 r=[n-24]<<4 )
262
+    SFT ( x0 x1 x2 x3<<r )
263
+    SWP2 POP2 SWP POP #0000 #00
264
+    RTN
265
+
266
+( arithmetic )
267
+
268
+( x + y )
269
+@add32 ( xhi* xlo* yhi* ylo* -> zhi* zlo* )
270
+    ;y2 STA2 ;y0 STA2 ( save ylo, yhi )
271
+    ;x2 STA2 ;x0 STA2 ( save xlo, xhi )
272
+    #0000 #0000 ;z0 STA2 ;z2 STA2 ( reset zhi, zlo )
273
+
274
+    ( x3 + y3 => z2z3 )
275
+    #00 ;x3 LDA #00 ;y3 LDA ADD2 ;z2 STA2
276
+
277
+    ( x2 + y2 + z2 => z1z2 )
278
+    #00 ;x2 LDA ;z1 LDA2 ADD2 ;z1 STA2
279
+    #00 ;y2 LDA ;z1 LDA2 ADD2 ;z1 STA2
280
+
281
+    ( x1 + y1 + z1 => z0z1 )
282
+    #00 ;x1 LDA ;z0 LDA2 ADD2 ;z0 STA2
283
+    #00 ;y1 LDA ;z0 LDA2 ADD2 ;z0 STA2
284
+
285
+    ( x0 + y0 + z0 => z0 )
286
+    ;x0 LDA ;z0 LDA ADD ;z0 STA
287
+    ;y0 LDA ;z0 LDA ADD ;z0 STA
288
+
289
+    ( load zhi,zlo )
290
+    ;z0 LDA2 ;z2 LDA2
291
+    RTN
292
+
293
+( -x )
294
+@negate32 ( x** -> -x** )
295
+    COMPLEMENT32
296
+    INC2 ( ~xhi -xlo )
297
+    DUP2 #0000 NEQ2 ( ~xhi -xlo non-zero? )
298
+    ,&done JCN ( xlo non-zero => don't inc hi )
299
+    SWP2 INC2 SWP2 ( -xhi -xlo )
300
+    &done
301
+    RTN
302
+
303
+( x - y )
304
+@sub32 ( x** y** -> z** )
305
+    ;negate32 JSR2 ;add32 JSR2 RTN
306
+
307
+( 16-bit multiplication )
308
+@mul16 ( x* y* -> z** )
309
+    ;y1 STA ;y0 STA ( save ylo, yhi )
310
+    ;x1 STA ;x0 STA ( save xlo, xhi )
311
+    #0000 #00 ;z1 STA2 ;z3 STA ( reset z1,z2,z3 )
312
+    #0000 #00 ;w0 STA2 ;w2 STA ( reset w0,w1,w2 )
313
+
314
+    ( x1 * y1 => z1z2 )
315
+    #00 ;x1 LDA #00 ;y1 LDA MUL2 ;z2 STA2
316
+
317
+    ( x0 * y1 => z0z1 )
318
+    #00 ;x0 LDA #00 ;y1 LDA MUL2 ;z1 LDA2 ADD2 ;z1 STA2
319
+
320
+    ( x1 * y0 => w1w2 )
321
+    #00 ;x1 LDA #00 ;y0 LDA MUL2 ;w1 STA2
322
+
323
+    ( x0 * y0 => w0w1 )
324
+    #00 ;x0 LDA #00 ;y0 LDA MUL2 ;w0 LDA2 ADD2 ;w0 STA2
325
+
326
+    ( add z and a<<8 )
327
+    #00 ;z1 LDA2 ;z3 LDA
328
+    ;w0 LDA2 ;w2 LDA #00
329
+    ;add32 JSR2
330
+    RTN
331
+
332
+( x * y )
333
+@mul32 ( x** y** -> z** ) 
334
+    ,&y1 STR2 ,&y0 STR2 ( save ylo, yhi )
335
+    ,&x1 STR2 ,&x0 STR2 ( save xlo, xhi )
336
+    ,&y1 LDR2 ,&x1 LDR2 ;mul16 JSR2 ( [x1*y1] )
337
+    ,&z1 STR2 ,&z0 STR2 ( sum = x1*y1, save zlo, zhi )
338
+    ,&y1 LDR2 ,&x0 LDR2 MUL2 ( [x0*y1]<<16 )
339
+    ,&y0 LDR2 ,&x1 LDR2 MUL2 ( [x1*y0]<<16 )
340
+    ( [x0*y0]<<32 will completely overflow )
341
+    ADD2 ,&z0 LDR2 ADD2 ( sum += x0*y1<<16 + x1*y0<<16 )
342
+    ,&z1 LDR2
343
+    RTN
344
+[ &x0 $2 &x1 $2
345
+  &y0 $2 &y1 $2
346
+  &z0 $2 &z1 $2 ]
347
+
348
+@div32 ( x** y** -> q** )
349
+    ;_divmod32 JSR2
350
+    ;_divmod32/quo0 LDA2 ;_divmod32/quo1 LDA2
351
+    RTN
352
+
353
+@mod32 ( x** y** -> r** )
354
+    ;_divmod32 JSR2
355
+    ;_divmod32/rem0 LDA2 ;_divmod32/rem1 LDA2
356
+    RTN
357
+
358
+@divmod32 ( x** y** -> q** r** )
359
+    ;_divmod32 JSR2
360
+    ;_divmod32/quo0 LDA2 ;_divmod32/quo1 LDA2
361
+    ;_divmod32/rem0 LDA2 ;_divmod32/rem1 LDA2
362
+    RTN
363
+
364
+( calculate and store x / y and x % y )
365
+@_divmod32 ( x** y** -> )
366
+    ( store y and x for repeated use )
367
+    ,&div1 STR2 ,&div0 STR2 ( y -> div )
368
+    ,&rem1 STR2 ,&rem0 STR2 ( x -> rem )
369
+
370
+    ( if x < y then the answer is 0 )
371
+    ,&rem0 LDR2 ,&rem1 LDR2
372
+    ,&div0 LDR2 ,&div1 LDR2
373
+    ;lt32 JSR2 ,&is-zero JCN ,&not-zero JMP
374
+    &is-zero
375
+    #0000 ,&quo0 STR2 #0000 ,&quo1 STR2 RTN
376
+
377
+    ( x >= y so the answer is >= 1 )
378
+    &not-zero
379
+    #0000 ,&quo0 STR2 #0000 ,&quo1 STR2 ( 0 -> quo )
380
+
381
+    ( bitcount[x] - bitcount[y] determines the largest multiple of y to try )
382
+    ,&rem0 LDR2 ,&rem1 LDR2 ;bitcount32 JSR2 ( rbits^ )
383
+    ,&div0 LDR2 ,&div1 LDR2 ;bitcount32 JSR2 ( rbits^ dbits^ )
384
+    SUB ( shift=rbits-dits )
385
+    #00 DUP2 ( shift 0 shift 0 )
386
+
387
+    ( 1<<shift -> cur )
388
+    #0000 #0001 ROT2 POP
389
+    ;lshift32 JSR2 ,&cur1 STR2 ,&cur0 STR2
390
+    
391
+    ( div<<shift -> div )
392
+    ,&div0 LDR2 ,&div1 LDR2 ROT2 POP
393
+    ;lshift32 JSR2 ,&div1 STR2 ,&div0 STR2 
394
+
395
+    ,&loop JMP
396
+
397
+    [ &div0 $2 &div1 $2
398
+      &rem0 $2 &rem1 $2
399
+      &quo0 $2 &quo1 $2
400
+      &cur0 $2 &cur1 $2 ]
401
+
402
+    &loop
403
+    ( if rem >= the current divisor, we can subtract it and add to quotient )
404
+    ,&rem0 LDR2 ,&rem1 LDR2 ,&div0 LDR2 ,&div1 LDR2 ;lt32 JSR2 ( is rem < div? )
405
+    ,&rem-lt JCN ( if rem < div skip this iteration )
406
+
407
+    ( since rem >= div, we have found a multiple of y that divides x )
408
+    ,&rem0 LDR2 ,&rem1 LDR2 ,&div0 LDR2 ,&div1 LDR2 ;sub32 JSR2 ,&rem1 STR2 ,&rem0 STR2 ( rem -= div )
409
+    ,&quo0 LDR2 ,&quo1 LDR2 ,&cur0 LDR2 ,&cur1 LDR2 ;add32 JSR2 ,&quo1 STR2 ,&quo0 STR2 ( quo += cur )
410
+
411
+    &rem-lt
412
+    ,&div0 LDR2 ,&div1 LDR2 #01 ;rshift32 JSR2 ,&div1 STR2 ,&div0 STR2 ( div >>= 1 )
413
+    ,&cur0 LDR2 ,&cur1 LDR2 #01 ;rshift32 JSR2 ,&cur1 STR2 ,&cur0 STR2 ( cur >>= 1 )
414
+    ,&cur0 LDR2 ,&cur1 LDR2 ;non-zero32 JSR2 ,&loop JCN ( if cur>0, loop. else we're done )
415
+    RTN
416
+
417
+( greatest common divisor - euclidean algorithm )
418
+@gcd32 ( x** y** -> z** )
419
+    &loop ( x y )
420
+    DUP4 ( x y y )
421
+    ;is-zero32 JSR2 ( x y y=0? )
422
+    ,&done JCN ( x y )
423
+    DUP4 ( x y y )
424
+    STH2 STH2 ( x y [y] )
425
+    ;mod32 JSR2 ( r=x%y [y] )
426
+    STH2r ( rhi rlo yhi [ylo] )
427
+    ROT2 ( rlo yhi rhi [ylo] )
428
+    ROT2 ( yhi rhi rlo [ylo] )
429
+    STH2r ( yhi rhi rlo ylo )
430
+    ROT2 ( yhi rlo ylo rhi )
431
+    ROT2 ( yhi ylo rhi rlo )
432
+    ,&loop JMP
433
+    &done
434
+    POP4 ( x )
435
+    RTN
0 436
new file mode 100644
1 437
Binary files /dev/null and b/untitled.chr differ