Browse code

Progress on asma

Andrew Alderwick authored on 15/05/2021 17:08:08
Showing 3 changed files
... ...
@@ -200,7 +200,8 @@ do
200 200
   _with_0:write('(          automatically generated code below          )\n')
201 201
   _with_0:write('(          see etc/asma.moon for instructions          )\n')
202 202
   _with_0:write('\n(')
203
-  _with_0:write(fmt('label', 'less than', 'greater than', 'key', 'data )'))
203
+  _with_0:write(fmt('label', 'less', 'greater', 'key', 'binary'))
204
+  _with_0:write(fmt('', 'than', 'than', 'string', 'data )'))
204 205
   _with_0:write('\n')
205 206
   for name, tree in spairs(trees) do
206 207
     _with_0:write(('@%s\n'):format(name))
... ...
@@ -229,7 +230,43 @@ do
229 230
     end
230 231
     _with_0:write('\n')
231 232
   end
232
-  _with_0:write('@asma-heap\n\n')
233
+  _with_0:write([[(
234
+	Heap, a large temporary area for keeping track of labels. More complex
235
+	programs need more of this space. If there's insufficient space then the
236
+	assembly process will fail, but having extra space above what the most
237
+	complex program needs provides no benefit.
238
+
239
+	This heap, and the buffers below, are free to be used to hold temporary
240
+	data between assembly runs, and do not need to be initialized with any
241
+	particular contents to use the assembler.
242
+)
243
+
244
+@asma-heap
245
+
246
+|ff00 &end
247
+
248
+(
249
+	Buffer for use with loading source code.
250
+	The minimum size is the length of the longest token plus one, which is
251
+	0x21 to keep the same capability of the C assembler.
252
+	Larger sizes are more efficient, provided there is enough
253
+	heap space to keep track of all the labels.
254
+)
255
+
256
+@asma-read-buffer
257
+
258
+|ff80 &end
259
+
260
+(
261
+	Buffer for use with writing output.
262
+	The minimum size is 1, and larger sizes are more efficient.
263
+)
264
+
265
+@asma-write-buffer
266
+
267
+|ffff &end
268
+
269
+]])
233 270
   _with_0:close()
234 271
 end
235 272
 return os.execute('mv projects/software/asma.usm.tmp projects/software/asma.usm')
... ...
@@ -141,7 +141,8 @@ with assert io.open 'projects/software/asma.usm.tmp', 'w'
141 141
 	\write '(          automatically generated code below          )\n'
142 142
 	\write '(          see etc/asma.moon for instructions          )\n'
143 143
 	\write '\n('
144
-	\write fmt 'label', 'less than', 'greater than', 'key', 'data )'
144
+	\write fmt 'label', 'less', 'greater', 'key', 'binary'
145
+	\write fmt '', 'than', 'than', 'string', 'data )'
145 146
 	\write '\n'
146 147
 	for name, tree in spairs trees
147 148
 		\write '@%s\n'\format name
... ...
@@ -163,7 +164,43 @@ with assert io.open 'projects/software/asma.usm.tmp', 'w'
163 164
 				''
164 165
 			\write fmt label, lefts[k] or ' $2', rights[k] or ' $2', unpack v
165 166
 		\write '\n'
166
-	\write '@asma-heap\n\n'
167
+	\write [[(
168
+	Heap, a large temporary area for keeping track of labels. More complex
169
+	programs need more of this space. If there's insufficient space then the
170
+	assembly process will fail, but having extra space above what the most
171
+	complex program needs provides no benefit.
172
+
173
+	This heap, and the buffers below, are free to be used to hold temporary
174
+	data between assembly runs, and do not need to be initialized with any
175
+	particular contents to use the assembler.
176
+)
177
+
178
+@asma-heap
179
+
180
+|ff00 &end
181
+
182
+(
183
+	Buffer for use with loading source code.
184
+	The minimum size is the length of the longest token plus one, which is
185
+	0x21 to keep the same capability of the C assembler.
186
+	Larger sizes are more efficient, provided there is enough
187
+	heap space to keep track of all the labels.
188
+)
189
+
190
+@asma-read-buffer
191
+
192
+|ff80 &end
193
+
194
+(
195
+	Buffer for use with writing output.
196
+	The minimum size is 1, and larger sizes are more efficient.
197
+)
198
+
199
+@asma-write-buffer
200
+
201
+|ffff &end
202
+
203
+]]
167 204
 	\close!
168 205
 os.execute 'mv projects/software/asma.usm.tmp projects/software/asma.usm'
169 206
 
... ...
@@ -8,6 +8,16 @@
8 8
 |0100
9 9
 	;reset JMP2
10 10
 
11
+(
12
+	Asma's public interface.
13
+	These routines are what are expected to be called from programs that bundle
14
+	Asma into bigger projects.
15
+)
16
+
17
+(
18
+	Common macros for use later on.
19
+)
20
+
11 21
 %asma-IF-ERROR { ;asma/error LDA2 ORA }
12 22
 %asma-LOG { #01 }
13 23
 (
... ...
@@ -19,6 +29,14 @@
19 29
 %asma-DEO2 { asma-LOG NEQ JMP DEO2k POP POP2 }
20 30
 %asma-DEO { asma-LOG NEQ JMP DEOk POP2 }
21 31
 
32
+(
33
+	Debugging routines. These all output extra information to the Console.
34
+	These can be stripped out to save space, once the references to them are
35
+	removed. Look for the word DEBUG later on to find these references: the
36
+	lines that contain that word can be deleted to strip out the functionality
37
+	cleanly.
38
+)
39
+
22 40
 @asma-dump-sublabels ( incoming-ptr* -- )
23 41
 	LDA2
24 42
 	ORAk ,&valid-incoming-ptr JCN
... ...
@@ -82,11 +100,10 @@
82 100
 	;asma-trees/labels ;asma-dump-labels JSR2
83 101
 	;asma/line LDA2 .Console/short #04 asma-DEO2
84 102
 	;&lines .Console/string #04 asma-DEO2
85
-	#0000 DIV
86 103
 	BRK
87 104
 
88 105
 	&filename
89
-		( "test.usm 00 )
106
+		"test.usm 00
90 107
 		"projects/examples/gui/label.usm 00
91 108
 
92 109
 	&lines [ 20 "lines 20 "in 20 "total. 0a 00 ]
... ...
@@ -175,9 +192,21 @@
175 192
 	POP POP2 POP2
176 193
 	JMP2r
177 194
 
178
-@asma [ &pass $1 &state $1 &line $2 &token $2 &orig-token $2 &heap $2 &addr $2 &written-addr $2 &scope-addr $2 &error $2 ]
195
+@asma [ &pass $1 &state $1 &line $2 &token $2 &orig-token $2 &heap $2 &addr $2 &written-addr $2 &flush-fn $2 &scope-addr $2 &error $2 ]
179 196
 @asma-trees [ &labels $2 &macros $2 &opcodes $2 &scope $2 ]
180 197
 
198
+(
199
+	The main routine to assemble a single token.
200
+	asma/state contains several meaningful bits:
201
+	0x02 we are in a comment,
202
+	0x04 we are in a macro body, and
203
+	0x08 we are in a macro body that we are ignoring
204
+	   (because the macro was already defined in a previous pass).
205
+	Since 0x08 never appears without 0x04, the lowest bit set in asma/state is
206
+	always 0x00, 0x02, or 0x04, which is very handy for use with jump tables.
207
+	The lowest bit set can be found easily by #00 (n) SUBk AND.
208
+)
209
+
181 210
 @asma-assemble-token ( string-ptr* -- )
182 211
 	DUP2 .Console/string #02 asma-DEO2
183 212
 	#0a .Console/char #02 asma-DEO
... ...
@@ -195,8 +224,6 @@
195 224
 	LITr 00 STH2 ( / end* char end* 00 end* )
196 225
 	STAr ( / end* char end* )
197 226
 
198
-	( find lowest set bit of assembler/state
199
-	  in C, this would be i & -i )
200 227
 	#00 ;asma/state LDA SUBk AND ( tree-offset* / end* )
201 228
 	DUP2 ;&first-char-trees ADD2 ( tree-offset* incoming-ptr* / end* )
202 229
 	;asma-traverse-tree JSR2
... ...
@@ -213,7 +240,7 @@
213 240
 
214 241
 	&not-found ( tree-offset* dummy* / end* )
215 242
 	POP2 POP2r
216
-	;&first-char-dispatch ADD2 LDA2
243
+	;&body-routines ADD2 LDA2
217 244
 	JMP2 ( tail call )
218 245
 
219 246
 	&first-char-trees
... ...
@@ -221,7 +248,7 @@
221 248
 		:asma-first-char-comment/_entry
222 249
 		:asma-first-char-macro/_entry
223 250
 
224
-	&first-char-dispatch
251
+	&body-routines
225 252
 		:asma-normal-body
226 253
 		:asma-ignore
227 254
 		:asma-macro-body
... ...
@@ -421,7 +448,12 @@
421 448
 	POP2r ROT ROT POP2
422 449
 	JMP2r
423 450
 
424
-( actions based on first character )
451
+(
452
+	First character routines.
453
+	The following routines (that don't have a FORTH-like signature) are called
454
+	to deal with tokens that begin with particular first letters, or (for
455
+	-body routines) tokens that fail to match any first letter in their tree.
456
+)
425 457
 
426 458
 %asma-STATE-SET { ;asma/state LDA ORA ;asma/state STA }
427 459
 %asma-STATE-CLEAR { #ff EOR ;asma/state LDA AND ;asma/state STA }
... ...
@@ -457,7 +489,9 @@
457 489
 	JMP2r
458 490
 
459 491
 @asma-macro-body
492
+	;asma/state LDA #08 AND ,&skip JCN
460 493
 	;asma/token LDA2 ;asma-append-heap-string JSR2
494
+	&skip
461 495
 	JMP2r
462 496
 
463 497
 @asma-macro-end
... ...
@@ -673,7 +707,7 @@
673 707
 	;asma-msg-label ;asma/error STA2
674 708
 	JMP2r
675 709
 
676
-( messages )
710
+( Error messages )
677 711
 
678 712
 @asma-msg-hex       "Invalid 20 "hexadecimal 00
679 713
 @asma-msg-zero-page "Address 20 "not 20 "in 20 "zero 20 "page 00
... ...
@@ -688,7 +722,8 @@
688 722
 (          automatically generated code below          )
689 723
 (          see etc/asma.moon for instructions          )
690 724
 
691
-(	label       less than  greater than key            data )
725
+(	label       less       greater      key            binary
726
+	            than       than         string         data )
692 727
 
693 728
 @asma-first-char-comment
694 729
 	&_entry      $2         $2          ') 00          :asma-comment-end
... ...
@@ -838,5 +873,39 @@
838 873
 	&EOR         $2         $2          "EOR 00
839 874
 	&SFT         $2         $2          "SFT 00
840 875
 
876
+(
877
+	Heap, a large temporary area for keeping track of labels. More complex
878
+	programs need more of this space. If there's insufficient space then the
879
+	assembly process will fail, but having extra space above what the most
880
+	complex program needs provides no benefit.
881
+
882
+	This heap, and the buffers below, are free to be used to hold temporary
883
+	data between assembly runs, and do not need to be initialized with any
884
+	particular contents to use the assembler.
885
+)
886
+
841 887
 @asma-heap
842 888
 
889
+|ff00 &end
890
+
891
+(
892
+	Buffer for use with loading source code.
893
+	The minimum size is the length of the longest token plus one, which is
894
+	0x21 to keep the same capability of the C assembler.
895
+	Larger sizes are more efficient, provided there is enough
896
+	heap space to keep track of all the labels.
897
+)
898
+
899
+@asma-read-buffer
900
+
901
+|ff80 &end
902
+
903
+(
904
+	Buffer for use with writing output.
905
+	The minimum size is 1, and larger sizes are more efficient.
906
+)
907
+
908
+@asma-write-buffer
909
+
910
+|ffff &end
911
+