Browse code

(mandelbrot) Improved speed

Devine Lu Linvega authored on 05/03/2023 19:06:45
Showing 1 changed files
... ...
@@ -1,4 +1,6 @@
1
-( mandelbrot )
1
+( mandelbrot.tal )
2
+( )
3
+( by alderwick and d_m )
2 4
 
3 5
 %WIDTH { #02a0 }
4 6
 %HEIGHT { #0200 }
... ...
@@ -17,88 +19,106 @@
17 19
 	#0ff0 .System/g DEO2
18 20
 	#00ff .System/b DEO2
19 21
 
20
-	WIDTH .Screen/width DEO2 ( 640 )
21
-	HEIGHT .Screen/height DEO2 ( 480 )
22
+	( size )
23
+	WIDTH  .Screen/width  DEO2
24
+	HEIGHT .Screen/height DEO2
22 25
 
26
+	( run )
23 27
 	draw-mandel
28
+	BRK
24 29
 
25
-BRK
26
-
27
-@draw-mandel ( -- )
28
-
29
-	XMAX XMIN SUB2 WIDTH DIV2 ;&dx STA2
30
-	YMAX YMIN SUB2 HEIGHT DIV2 ;&dy STA2
31
-	[ LIT2 01 -Screen/auto ] DEO
32
-	YMAX YMIN
33
-	&ver
34
-		DUP2 ,&y STR2
35
-		XMAX XMIN
36
-		&hor
37
-			DUP2 ,&x STR2
38
-			#0000
39
-				DUP2 ,&x1 STR2
40
-				DUP2 ,&y1 STR2
41
-				DUP2 ,&x2 STR2
42
-				,&y2 STR2
43
-			( pixel )
44
-			#2000
45
-			&loop
46
-				[ LIT2 &x1 $2 ] [ LIT2 &y1 $2 ] smul2 DUP2 ADD2
47
-					[ LIT2 &y $2 ] ADD2 ,&y1 STR2
48
-				[ LIT2 &x2 $2 ] [ LIT2 &y2 $2 ] SUB2
49
-					[ LIT2 &x $2 ] ADD2 ,&x1 STR2
50
-				,&x1 LDR2 DUP2 smul2
51
-					DUP2 ,&x2 STR2
52
-				,&y1 LDR2 DUP2 smul2
53
-					DUP2 ,&y2 STR2
54
-				ADD2 #4000 GTH2 ?&end
55
-				INC GTHk ?&loop
56
-				&end
57
-			NIP .Screen/pixel DEO
58
-			( done. )
59
-			[ LIT2 &dx $2 ] ADD2 OVR2 #8000 ADD2 OVR2 #8000 ADD2 SWP2 LTH2 ?&hor
60
-		POP2 POP2
61
-		#0000 .Screen/x DEO2
62
-		.Screen/y DEI2k INC2 ROT DEO2
63
-		[ LIT2 &dy $2 ] ADD2 OVR2 #8000 ADD2 OVR2 #8000 ADD2 SWP2 LTH2 ?&ver
64
-	POP2 POP2
65
-
66
-JMP2r
67
-
68
-@smul2 ( a* b* -- c* )
69
-	LITr 00
70
-	DUP2 #8000 LTH2 ?&b-positive
71
-	INCr DUP2k EOR2 SWP2 SUB2
72
-	&b-positive
73
-	SWP2
74
-	DUP2 #8000 LTH2 ?&a-positive
75
-	INCr DUP2k EOR2 SWP2 SUB2
76
-	&a-positive
77
-	( ahi alo bhi blo )
78
-	LITr 00 STH ( ahi alo bhi / blo* )
79
-	OVRr STH ( ahi alo / blo* bhi* )
80
-	OVRr STH ( ahi / blo* bhi* alo* )
81
-	OVRr STH ( asign / blo* bhi* alo* ahi* )
82
-	ROT2r MUL2kr STH2r ( asign ahi-bhi* / blo* alo* ahi* bhi* )
83
-	ROT2r MUL2kr STH2r ( asign ahi-bhi* alo-bhi* / blo* ahi* bhi* alo* )
84
-	NIP2r ( asign ahi-bhi* alo-bhi* / blo* ahi* alo* )
85
-	ROT2r MUL2kr STH2r ( asign ahi-bhi* alo-bhi* alo-blo* / ahi* alo* blo* )
86
-	ROT2r MUL2r STH2r POP2r ( asign ahi-bhi* alo-bhi* alo-blo* ahi-blo* )
87
-	SWP2 ( asign ahi-bhi* alo-bhi* ahi-blo* alo-blo* )
88
-	( 32-bit result is [ r3 r2 r1 r0 ] )
89
-	POP #00 SWP ( asign ahi-bhi* alo-bhi* ahi-blo* r21* )
90
-	( r21 max is 00fe, ahi-blo max is 7e81, max sum is 7f7f )
91
-	ADD2 ( asign ahi-bhi* alo-bhi* r21'* )
92
-	( r21' max is 7f7f, alo-bhi max is 7e81, max sum is fe00 )
93
-	ADD2 ( asign ahi-bhi* r21"* )
94
-	( The result we want is bits 27-12 due to the fixed point representation we use. )
95
-	#04 SFT2 SWP2 #07ff min #40 SFT2 ADD2
96
-	( saturate to +/-7.fff )
97
-	#7fff min
98
-	STHr #01 NEQ ?&result-positive
99
-	DUP2k EOR2 SWP2 SUB2
100
-	&result-positive
101
-JMP2r
102
-
103
-@min ( x* y* -- min* )
104
-	GTH2k [ JMP SWP2 NIP2 ] JMP2r
30
+( draw the mandelbrot set using 4.12 fixed point numbers )
31
+@draw-mandel ( -> )
32
+	XMAX XMIN SUB2 WIDTH DIV2 ,&dx STR2  ( ; &dx<-{xmax-min}/width )
33
+	YMAX YMIN SUB2 HEIGHT DIV2 ,&dy STR2 ( ; &dy<-{ymax-ymin}/height )
34
+	[ LIT2 01 -Screen/auto ] DEO         ( ; auto<-1 )
35
+	LIT2r 8000                           ( [8000] )
36
+	YMAX YMIN                            ( ymax* ymin* [8000] )
37
+	&yloop                               ( ymax* y* [8000] )
38
+		XMAX XMIN                        ( ymax* y* xmax* xmin* [8000] )
39
+		&xloop                           ( ymax* y* xmax* x* [8000] )
40
+			ROT2k evaluate               ( ymax* y* xmax* x* xmax* count^ [8000] )
41
+			.Screen/pixel DEO POP2       ( ymax* y* xmax* x* [8000] )
42
+			[ LIT2 &dx $2 ] ADD2         ( ymax* y* xmax* x+dx* [8000] )
43
+			OVR2 STH2kr ADD2             ( ymax* y* xmax* x+dx* 8000+xmax* [8000] )
44
+			OVR2 STH2kr ADD2             ( ymax* y* xmax* x+dx* 8000+xmax* 8000+x+dx* [8000] )
45
+			GTH2 ?&xloop                 ( ymax* y* xmax* x+dx* [8000] )
46
+		POP2 POP2                        ( ymax* y* [8000] )
47
+		#0000 .Screen/x DEO2             ( ymax* y* [8000] ; sc/x<-0 )
48
+		.Screen/y DEI2k                  ( ymax* y* d^ sy* [8000] )
49
+		INC2 ROT DEO2                    ( ymax* y* [8000] ; sc/y<-sy+1 )
50
+		[ LIT2 &dy $2 ] ADD2             ( ymax* y+dy* [8000] )
51
+		OVR2 STH2kr ADD2                 ( ymax* y+dy* 8000+ymax* [8000] )
52
+		OVR2 STH2kr ADD2                 ( ymax* y+dy* 8000+ymax* 8000+y+dy* [8000] )
53
+		GTH2 ?&yloop                     ( ymax* y+dy* [8000] )
54
+	POP2 POP2 POP2r JMP2r                ( )
55
+
56
+@evaluate ( x* y* -> count^ )
57
+	#0000 DUP2 ,&x1 STR2         ( x* y* ; x1<-0 )
58
+		  DUP2 ,&y1 STR2         ( x* y* ; y1<-0 )
59
+		  DUP2 ,&x2 STR2         ( x* y* ; x2<-0 )
60
+			   ,&y2 STR2         ( x* y* ; y2<-0 )
61
+	LIT2r 2000                   ( x* y* [20 00] )
62
+	&loop                        ( x* y* [20 n^] )
63
+		[ LIT2 &x1 $2 ]          ( x* y* x1* [20 n^] )
64
+		[ LIT2 &y1 $2 ]          ( x* y* x1* y1* [20 n^] )
65
+		smul2 DUP2 ADD2          ( x* y* 2x1y1* [20 n^] )
66
+		OVR2 ADD2 ,&y1 STR2      ( x* y* [20 n^] ; y1<-2x1y1+y* )
67
+		SWP2 [ LIT2 &x2 $2 ]     ( y* x* x2* [20 n^] )
68
+		[ LIT2 &y2 $2 ] SUB2     ( y* x* x2-y2* [20 n^] )
69
+		OVR2 ADD2 ,&x1 STR2 SWP2 ( x* y* [20 n^] ; x1<-x2-y2+x* )
70
+		,&x1 LDR2 square         ( x* y* x1^2* [20 n^] )
71
+		DUP2 ,&x2 STR2           ( x* y* x1^2* [20 n^] ; x2<-x1^2* )
72
+		,&y1 LDR2 square         ( x* y* x1^2* y1^2* [20 n^] )
73
+		DUP2 ,&y2 STR2           ( x* y* x1^2* y1^2* [20 n^] ; y2<-y1^2* )
74
+		ADD2 #4000 GTH2 ?&end    ( x* y* [20 n^] )
75
+		INCr GTHkr STHr ?&loop   ( x* y* [20 n+1*] )
76
+	&end                         ( x* y* [20 count^] )
77
+	POP2 POP2 NIPr STHr JMP2r    ( count^ )
78
+
79
+( multiply two signed 4.12 fixed point numbers )
80
+@smul2 ( a* b* -> ab* )
81
+	LIT2r 0001 DUP2 #8000 LTH2 ?&bpos negate SWPr ( a* |b|* [sign*] )
82
+	&bpos SWP2 DUP2 #8000 LTH2 ?&apos negate SWPr ( |b|* |a|* [sign*] )
83
+	&apos smul2-pos STHr ?&abpos negate           ( ab* [scrap^] )
84
+	&abpos POPr JMP2r                             ( ab* )
85
+
86
+( multiply two non-negative fixed point numbers )
87
+( )
88
+( a * b = {a0/16 + a1/4096} * {b0/16 + b1/4096} )
89
+(       = a0b0/256 + a1b0/65536 + a0b1/65536 + a1b1/16777216 )
90
+(       = x + y + z + 0 ; the last term is too small to represent, i.e. zero )
91
+( )
92
+( x = a0b0 << 4 )
93
+( y = a1b0 >> 4 )
94
+( z = a0b1 >> 4 )
95
+@smul2-pos ( a* b* -> ab* )
96
+	aerate ROT2 aerate           ( b0* b1* a0* a1* )
97
+	STH2 ROT2k                   ( b0* b1* a0* b1* a0* b0* [a1*] )
98
+	STH2 MUL2r                   ( b0* b1* a0* b1* a0* [a1b0*] )
99
+	MUL2 STH2 ADD2r              ( b0* b1* a0* [a1b0+a0b1*] )
100
+	NIP2 MUL2 #07ff min #40 SFT2 ( a0b0* [y+z*] )
101
+	STH2r #04 SFT2 ADD2          ( x* [y+z*] )
102
+	#7fff !min                   ( ab* )
103
+
104
+( equivalent to DUP2 smul2 but faster )
105
+@square ( a* -> aa* )
106
+	DUP2 #8000 LTH2 ?&pos negate &pos
107
+
108
+( >> )
109
+
110
+( equivalent to DUP2 smul2-pos but faster )
111
+@square-pos ( a* -> aa* )
112
+	aerate                       ( 00 ahi^ 00 alo^ )
113
+	OVR2 MUL2 #03 SFT2 SWP2      ( yz* ahi* )
114
+	DUP2 MUL2 #07ff min #40 SFT2 ( x* yz* )
115
+	ADD2 #7fff !min              ( aa* )
116
+
117
+( convert each byte of a a short into a short )
118
+@aerate ( x* -> 00 xhi^ 00 xlo^ ) SWP #0000 ROT SWP2 SWP JMP2r
119
+
120
+( negate a fixed point number. doesn't work for #8000 )
121
+@negate ( x* -> -x* ) DUP2k EOR2 SWP2 SUB2 JMP2r
122
+
123
+( return the minimum of two non-negative numbers. )
124
+@min ( x* y* ) GTH2k [ JMP SWP2 ] NIP2 JMP2r