Browse code

ppu: change the API to signal when redraw is required

To lower CPU load in idle mode (no changes on screen)
make ppu_pixel return non-zero when a change has been
made.

25% → 4% (piano.rom) after this change on Linux amd64,
Thinkpad X220.

Sigrid Solveig Haflínudóttir authored on 19/09/2021 21:30:53
Showing 3 changed files
... ...
@@ -31,54 +31,63 @@ ppu_clear(Ppu *p)
31 31
 	}
32 32
 }
33 33
 
34
-void
34
+int
35 35
 ppu_pixel(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 color)
36 36
 {
37
-	int row = (y % 8) + ((x / 8 + y / 8 * p->width / 8) * 16), col = x % 8;
37
+	int row = (y % 8) + ((x / 8 + y / 8 * p->width / 8) * 16), col = x % 8, ret;
38
+	Uint8 w;
38 39
 	if(x >= p->width || y >= p->height)
39
-		return;
40
+		return 0;
41
+	w = layer[row];
40 42
 	if(color == 0 || color == 2)
41 43
 		layer[row] &= ~(1UL << (7 - col));
42 44
 	else
43 45
 		layer[row] |= 1UL << (7 - col);
46
+	ret = w ^ layer[row];
47
+	w = layer[row + 8];
44 48
 	if(color == 0 || color == 1)
45 49
 		layer[row + 8] &= ~(1UL << (7 - col));
46 50
 	else
47 51
 		layer[row + 8] |= 1UL << (7 - col);
52
+	return ret | (w ^ layer[row + 8]);
48 53
 }
49 54
 
50
-void
55
+int
51 56
 ppu_1bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy)
52 57
 {
53 58
 	Uint16 v, h;
59
+	int ret = 0;
54 60
 	for(v = 0; v < 8; v++)
55 61
 		for(h = 0; h < 8; h++) {
56 62
 			Uint8 ch1 = (sprite[v] >> (7 - h)) & 0x1;
57 63
 			if(ch1 || blending[4][color])
58
-				ppu_pixel(p,
64
+				ret |= ppu_pixel(p,
59 65
 					layer,
60 66
 					x + (flipx ? 7 - h : h),
61 67
 					y + (flipy ? 7 - v : v),
62 68
 					blending[ch1][color]);
63 69
 		}
70
+	return ret;
64 71
 }
65 72
 
66
-void
73
+int
67 74
 ppu_2bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy)
68 75
 {
69 76
 	Uint16 v, h;
77
+	int ret = 0;
70 78
 	for(v = 0; v < 8; v++)
71 79
 		for(h = 0; h < 8; h++) {
72 80
 			Uint8 ch1 = ((sprite[v] >> (7 - h)) & 0x1);
73 81
 			Uint8 ch2 = ((sprite[v + 8] >> (7 - h)) & 0x1);
74 82
 			Uint8 ch = ch1 + ch2 * 2;
75 83
 			if(ch || blending[4][color])
76
-				ppu_pixel(p,
84
+				ret |= ppu_pixel(p,
77 85
 					layer,
78 86
 					x + (flipx ? 7 - h : h),
79 87
 					y + (flipy ? 7 - v : v),
80 88
 					blending[ch][color]);
81 89
 		}
90
+	return ret;
82 91
 }
83 92
 
84 93
 /* output */
... ...
@@ -24,6 +24,6 @@ typedef struct Ppu {
24 24
 
25 25
 int ppu_init(Ppu *p, Uint8 hor, Uint8 ver);
26 26
 int ppu_resize(Ppu *p, Uint8 hor, Uint8 ver);
27
-void ppu_pixel(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 color);
28
-void ppu_1bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy);
29
-void ppu_2bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy);
27
+int ppu_pixel(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 color);
28
+int ppu_1bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy);
29
+int ppu_2bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy);
... ...
@@ -353,25 +353,23 @@ screen_talk(Device *d, Uint8 b0, Uint8 w)
353 353
 		Uint16 x = peek16(d->dat, 0x8);
354 354
 		Uint16 y = peek16(d->dat, 0xa);
355 355
 		Uint8 layer = d->dat[0xe] & 0x40;
356
-		ppu_pixel(&ppu, layer ? ppu.fg : ppu.bg, x, y, d->dat[0xe] & 0x3);
356
+		reqdraw |= ppu_pixel(&ppu, layer ? ppu.fg : ppu.bg, x, y, d->dat[0xe] & 0x3);
357 357
 		if(d->dat[0x6] & 0x01) poke16(d->dat, 0x8, x + 1); /* auto x+1 */
358 358
 		if(d->dat[0x6] & 0x02) poke16(d->dat, 0xa, y + 1); /* auto y+1 */
359
-		reqdraw = 1;
360 359
 	} else if(b0 == 0xf) {
361 360
 		Uint16 x = peek16(d->dat, 0x8);
362 361
 		Uint16 y = peek16(d->dat, 0xa);
363 362
 		Uint8 layer = d->dat[0xf] & 0x40;
364 363
 		Uint8 *addr = &d->mem[peek16(d->dat, 0xc)];
365 364
 		if(d->dat[0xf] & 0x80) {
366
-			ppu_2bpp(&ppu, layer ? ppu.fg : ppu.bg, x, y, addr, d->dat[0xf] & 0xf, d->dat[0xf] & 0x10, d->dat[0xf] & 0x20);
365
+			reqdraw |= ppu_2bpp(&ppu, layer ? ppu.fg : ppu.bg, x, y, addr, d->dat[0xf] & 0xf, d->dat[0xf] & 0x10, d->dat[0xf] & 0x20);
367 366
 			if(d->dat[0x6] & 0x04) poke16(d->dat, 0xc, peek16(d->dat, 0xc) + 16); /* auto addr+16 */
368 367
 		} else {
369
-			ppu_1bpp(&ppu, layer ? ppu.fg : ppu.bg, x, y, addr, d->dat[0xf] & 0xf, d->dat[0xf] & 0x10, d->dat[0xf] & 0x20);
368
+			reqdraw |= ppu_1bpp(&ppu, layer ? ppu.fg : ppu.bg, x, y, addr, d->dat[0xf] & 0xf, d->dat[0xf] & 0x10, d->dat[0xf] & 0x20);
370 369
 			if(d->dat[0x6] & 0x04) poke16(d->dat, 0xc, peek16(d->dat, 0xc) + 8); /* auto addr+8 */
371 370
 		}
372 371
 		if(d->dat[0x6] & 0x01) poke16(d->dat, 0x8, x + 8); /* auto x+8 */
373 372
 		if(d->dat[0x6] & 0x02) poke16(d->dat, 0xa, y + 8); /* auto y+8 */
374
-		reqdraw = 1;
375 373
 	}
376 374
 	return 1;
377 375
 }