| 1 | 1 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,28 @@ |
| 1 |
+#include <arm_neon.h> |
|
| 2 |
+#include "ppu.h" |
|
| 3 |
+ |
|
| 4 |
+void |
|
| 5 |
+ppu_redraw(Ppu *p, Uint32 *screen) |
|
| 6 |
+{
|
|
| 7 |
+ /* FIXME(sigrid): do this better */ |
|
| 8 |
+ Uint32 *rgba = __builtin_assume_aligned(screen, 16); |
|
| 9 |
+ Uint8 *fg = __builtin_assume_aligned(p->fg.pixels, 16); |
|
| 10 |
+ Uint8 *bg = __builtin_assume_aligned(p->bg.pixels, 16); |
|
| 11 |
+ Uint8 *palette = __builtin_assume_aligned((Uint8*)p->palette, 16); |
|
| 12 |
+ uint8x16x4_t pal = vld4q_u8(palette); enum { R, G, B, A };
|
|
| 13 |
+ int i; |
|
| 14 |
+ |
|
| 15 |
+ for(i = 0; i < p->width * p->height; i += 16, fg += 16, bg += 16, rgba += 16) {
|
|
| 16 |
+ uint8x16_t fg8 = vld1q_u8(fg); |
|
| 17 |
+ uint8x16_t bg8 = vld1q_u8(bg); |
|
| 18 |
+ uint8x16_t bgmask = vceqzq_u8(fg8); |
|
| 19 |
+ uint8x16_t px8 = vorrq_u8(vandq_u8(bg8, bgmask), vandq_u8(fg8, vceqzq_u8(bgmask))); |
|
| 20 |
+ uint8x16x4_t px = {
|
|
| 21 |
+ vqtbl1q_u8(pal.val[R], px8), |
|
| 22 |
+ vqtbl1q_u8(pal.val[G], px8), |
|
| 23 |
+ vqtbl1q_u8(pal.val[B], px8), |
|
| 24 |
+ vqtbl1q_u8(pal.val[A], px8), |
|
| 25 |
+ }; |
|
| 26 |
+ vst4q_u8((uint8_t*)rgba, px); |
|
| 27 |
+ } |
|
| 28 |
+} |