Browse code

ppu_aarch64: initial version of ppu_redraw using Neon

Sigrid Solveig Haflínudóttir authored on 26/12/2021 06:32:35
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,28 @@
1
+#include <arm_neon.h>
2
+#include "ppu.h"
3
+
4
+void
5
+ppu_redraw(Ppu *p, Uint32 *screen)
6
+{
7
+	/* FIXME(sigrid): do this better */
8
+	Uint32 *rgba = __builtin_assume_aligned(screen, 16);
9
+	Uint8 *fg = __builtin_assume_aligned(p->fg.pixels, 16);
10
+	Uint8 *bg = __builtin_assume_aligned(p->bg.pixels, 16);
11
+	Uint8 *palette = __builtin_assume_aligned((Uint8*)p->palette, 16);
12
+	uint8x16x4_t pal = vld4q_u8(palette); enum { R, G, B, A };
13
+	int i;
14
+
15
+	for(i = 0; i < p->width * p->height; i += 16, fg += 16, bg += 16, rgba += 16) {
16
+		uint8x16_t fg8 = vld1q_u8(fg);
17
+		uint8x16_t bg8 = vld1q_u8(bg);
18
+		uint8x16_t bgmask = vceqzq_u8(fg8);
19
+		uint8x16_t px8 = vorrq_u8(vandq_u8(bg8, bgmask), vandq_u8(fg8, vceqzq_u8(bgmask)));
20
+		uint8x16x4_t px = {
21
+			vqtbl1q_u8(pal.val[R], px8),
22
+			vqtbl1q_u8(pal.val[G], px8),
23
+			vqtbl1q_u8(pal.val[B], px8),
24
+			vqtbl1q_u8(pal.val[A], px8),
25
+		};
26
+		vst4q_u8((uint8_t*)rgba, px);
27
+	}
28
+}