Browse code

ppu_blit: tiny bit faster by doing less memory reads in the inner loop

Sigrid Solveig Haflínudóttir authored on 25/12/2021 17:56:36
Showing 1 changed files
... ...
@@ -103,13 +103,11 @@ ppu_write(Ppu *p, Layer *layer, Uint16 x, Uint16 y, Uint8 color)
103 103
 void
104 104
 ppu_blit(Ppu *p, Layer *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy, Uint8 twobpp)
105 105
 {
106
-	Uint8 opaque = blending[4][color];
107
-	Uint16 v, h;
108
-	for(v = 0; v < 8; ++v)
109
-		for(h = 0; h < 8; ++h) {
110
-			Uint8 ch = (sprite[v + 0] >> (7 - h)) & 0x1;
111
-			if(twobpp)
112
-				ch |= ((sprite[v + 8] >> (7 - h)) & 0x1) << 1;
106
+	int v, h, opaque = blending[4][color];
107
+	for(v = 0; v < 8; ++v) {
108
+		int c = sprite[v] | (twobpp ? sprite[v + 8] : 0) << 8;
109
+		for(h = 7; h >= 0; --h, c >>= 1) {
110
+			Uint8 ch = (c & 1) | ((c >> 7) & 2);
113 111
 			if(opaque || ch)
114 112
 				ppu_write(p,
115 113
 					layer,
... ...
@@ -117,6 +115,7 @@ ppu_blit(Ppu *p, Layer *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, U
117 115
 					y + (flipy ? 7 - v : v),
118 116
 					blending[ch][color]);
119 117
 		}
118
+	}
120 119
 }
121 120
 
122 121
 void