To lower CPU load in idle mode (no changes on screen)
make ppu_pixel return non-zero when a change has been
made.
25% → 4% (piano.rom) after this change on Linux amd64,
Thinkpad X220.
| ... | ... |
@@ -31,54 +31,63 @@ ppu_clear(Ppu *p) |
| 31 | 31 |
} |
| 32 | 32 |
} |
| 33 | 33 |
|
| 34 |
-void |
|
| 34 |
+int |
|
| 35 | 35 |
ppu_pixel(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 color) |
| 36 | 36 |
{
|
| 37 |
- int row = (y % 8) + ((x / 8 + y / 8 * p->width / 8) * 16), col = x % 8; |
|
| 37 |
+ int row = (y % 8) + ((x / 8 + y / 8 * p->width / 8) * 16), col = x % 8, ret; |
|
| 38 |
+ Uint8 w; |
|
| 38 | 39 |
if(x >= p->width || y >= p->height) |
| 39 |
- return; |
|
| 40 |
+ return 0; |
|
| 41 |
+ w = layer[row]; |
|
| 40 | 42 |
if(color == 0 || color == 2) |
| 41 | 43 |
layer[row] &= ~(1UL << (7 - col)); |
| 42 | 44 |
else |
| 43 | 45 |
layer[row] |= 1UL << (7 - col); |
| 46 |
+ ret = w ^ layer[row]; |
|
| 47 |
+ w = layer[row + 8]; |
|
| 44 | 48 |
if(color == 0 || color == 1) |
| 45 | 49 |
layer[row + 8] &= ~(1UL << (7 - col)); |
| 46 | 50 |
else |
| 47 | 51 |
layer[row + 8] |= 1UL << (7 - col); |
| 52 |
+ return ret | (w ^ layer[row + 8]); |
|
| 48 | 53 |
} |
| 49 | 54 |
|
| 50 |
-void |
|
| 55 |
+int |
|
| 51 | 56 |
ppu_1bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy) |
| 52 | 57 |
{
|
| 53 | 58 |
Uint16 v, h; |
| 59 |
+ int ret = 0; |
|
| 54 | 60 |
for(v = 0; v < 8; v++) |
| 55 | 61 |
for(h = 0; h < 8; h++) {
|
| 56 | 62 |
Uint8 ch1 = (sprite[v] >> (7 - h)) & 0x1; |
| 57 | 63 |
if(ch1 || blending[4][color]) |
| 58 |
- ppu_pixel(p, |
|
| 64 |
+ ret |= ppu_pixel(p, |
|
| 59 | 65 |
layer, |
| 60 | 66 |
x + (flipx ? 7 - h : h), |
| 61 | 67 |
y + (flipy ? 7 - v : v), |
| 62 | 68 |
blending[ch1][color]); |
| 63 | 69 |
} |
| 70 |
+ return ret; |
|
| 64 | 71 |
} |
| 65 | 72 |
|
| 66 |
-void |
|
| 73 |
+int |
|
| 67 | 74 |
ppu_2bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy) |
| 68 | 75 |
{
|
| 69 | 76 |
Uint16 v, h; |
| 77 |
+ int ret = 0; |
|
| 70 | 78 |
for(v = 0; v < 8; v++) |
| 71 | 79 |
for(h = 0; h < 8; h++) {
|
| 72 | 80 |
Uint8 ch1 = ((sprite[v] >> (7 - h)) & 0x1); |
| 73 | 81 |
Uint8 ch2 = ((sprite[v + 8] >> (7 - h)) & 0x1); |
| 74 | 82 |
Uint8 ch = ch1 + ch2 * 2; |
| 75 | 83 |
if(ch || blending[4][color]) |
| 76 |
- ppu_pixel(p, |
|
| 84 |
+ ret |= ppu_pixel(p, |
|
| 77 | 85 |
layer, |
| 78 | 86 |
x + (flipx ? 7 - h : h), |
| 79 | 87 |
y + (flipy ? 7 - v : v), |
| 80 | 88 |
blending[ch][color]); |
| 81 | 89 |
} |
| 90 |
+ return ret; |
|
| 82 | 91 |
} |
| 83 | 92 |
|
| 84 | 93 |
/* output */ |
| ... | ... |
@@ -24,6 +24,6 @@ typedef struct Ppu {
|
| 24 | 24 |
|
| 25 | 25 |
int ppu_init(Ppu *p, Uint8 hor, Uint8 ver); |
| 26 | 26 |
int ppu_resize(Ppu *p, Uint8 hor, Uint8 ver); |
| 27 |
-void ppu_pixel(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 color); |
|
| 28 |
-void ppu_1bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy); |
|
| 29 |
-void ppu_2bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy); |
|
| 27 |
+int ppu_pixel(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 color); |
|
| 28 |
+int ppu_1bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy); |
|
| 29 |
+int ppu_2bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy); |
| ... | ... |
@@ -353,25 +353,23 @@ screen_talk(Device *d, Uint8 b0, Uint8 w) |
| 353 | 353 |
Uint16 x = peek16(d->dat, 0x8); |
| 354 | 354 |
Uint16 y = peek16(d->dat, 0xa); |
| 355 | 355 |
Uint8 layer = d->dat[0xe] & 0x40; |
| 356 |
- ppu_pixel(&ppu, layer ? ppu.fg : ppu.bg, x, y, d->dat[0xe] & 0x3); |
|
| 356 |
+ reqdraw |= ppu_pixel(&ppu, layer ? ppu.fg : ppu.bg, x, y, d->dat[0xe] & 0x3); |
|
| 357 | 357 |
if(d->dat[0x6] & 0x01) poke16(d->dat, 0x8, x + 1); /* auto x+1 */ |
| 358 | 358 |
if(d->dat[0x6] & 0x02) poke16(d->dat, 0xa, y + 1); /* auto y+1 */ |
| 359 |
- reqdraw = 1; |
|
| 360 | 359 |
} else if(b0 == 0xf) {
|
| 361 | 360 |
Uint16 x = peek16(d->dat, 0x8); |
| 362 | 361 |
Uint16 y = peek16(d->dat, 0xa); |
| 363 | 362 |
Uint8 layer = d->dat[0xf] & 0x40; |
| 364 | 363 |
Uint8 *addr = &d->mem[peek16(d->dat, 0xc)]; |
| 365 | 364 |
if(d->dat[0xf] & 0x80) {
|
| 366 |
- ppu_2bpp(&ppu, layer ? ppu.fg : ppu.bg, x, y, addr, d->dat[0xf] & 0xf, d->dat[0xf] & 0x10, d->dat[0xf] & 0x20); |
|
| 365 |
+ reqdraw |= ppu_2bpp(&ppu, layer ? ppu.fg : ppu.bg, x, y, addr, d->dat[0xf] & 0xf, d->dat[0xf] & 0x10, d->dat[0xf] & 0x20); |
|
| 367 | 366 |
if(d->dat[0x6] & 0x04) poke16(d->dat, 0xc, peek16(d->dat, 0xc) + 16); /* auto addr+16 */ |
| 368 | 367 |
} else {
|
| 369 |
- ppu_1bpp(&ppu, layer ? ppu.fg : ppu.bg, x, y, addr, d->dat[0xf] & 0xf, d->dat[0xf] & 0x10, d->dat[0xf] & 0x20); |
|
| 368 |
+ reqdraw |= ppu_1bpp(&ppu, layer ? ppu.fg : ppu.bg, x, y, addr, d->dat[0xf] & 0xf, d->dat[0xf] & 0x10, d->dat[0xf] & 0x20); |
|
| 370 | 369 |
if(d->dat[0x6] & 0x04) poke16(d->dat, 0xc, peek16(d->dat, 0xc) + 8); /* auto addr+8 */ |
| 371 | 370 |
} |
| 372 | 371 |
if(d->dat[0x6] & 0x01) poke16(d->dat, 0x8, x + 8); /* auto x+8 */ |
| 373 | 372 |
if(d->dat[0x6] & 0x02) poke16(d->dat, 0xa, y + 8); /* auto y+8 */ |
| 374 |
- reqdraw = 1; |
|
| 375 | 373 |
} |
| 376 | 374 |
return 1; |
| 377 | 375 |
} |