1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,28 @@ |
1 |
+#include <arm_neon.h> |
|
2 |
+#include "ppu.h" |
|
3 |
+ |
|
4 |
+void |
|
5 |
+ppu_redraw(Ppu *p, Uint32 *screen) |
|
6 |
+{ |
|
7 |
+ /* FIXME(sigrid): do this better */ |
|
8 |
+ Uint32 *rgba = __builtin_assume_aligned(screen, 16); |
|
9 |
+ Uint8 *fg = __builtin_assume_aligned(p->fg.pixels, 16); |
|
10 |
+ Uint8 *bg = __builtin_assume_aligned(p->bg.pixels, 16); |
|
11 |
+ Uint8 *palette = __builtin_assume_aligned((Uint8*)p->palette, 16); |
|
12 |
+ uint8x16x4_t pal = vld4q_u8(palette); enum { R, G, B, A }; |
|
13 |
+ int i; |
|
14 |
+ |
|
15 |
+ for(i = 0; i < p->width * p->height; i += 16, fg += 16, bg += 16, rgba += 16) { |
|
16 |
+ uint8x16_t fg8 = vld1q_u8(fg); |
|
17 |
+ uint8x16_t bg8 = vld1q_u8(bg); |
|
18 |
+ uint8x16_t bgmask = vceqzq_u8(fg8); |
|
19 |
+ uint8x16_t px8 = vorrq_u8(vandq_u8(bg8, bgmask), vandq_u8(fg8, vceqzq_u8(bgmask))); |
|
20 |
+ uint8x16x4_t px = { |
|
21 |
+ vqtbl1q_u8(pal.val[R], px8), |
|
22 |
+ vqtbl1q_u8(pal.val[G], px8), |
|
23 |
+ vqtbl1q_u8(pal.val[B], px8), |
|
24 |
+ vqtbl1q_u8(pal.val[A], px8), |
|
25 |
+ }; |
|
26 |
+ vst4q_u8((uint8_t*)rgba, px); |
|
27 |
+ } |
|
28 |
+} |