Skip to content

Commit 2c8a8cb

Browse files
committed
Optimize PPU rendering using cached bg tiles
1 parent 869e85d commit 2c8a8cb

File tree

8 files changed

+214
-66
lines changed

8 files changed

+214
-66
lines changed

3ds/source/main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ bool handle_inputs(void) {
6565
}
6666

6767
int main(int argc, char* argv[]) {
68-
gfxInitDefault();
68+
gfxInit(GSP_BGR8_OES, GSP_BGR8_OES, false);
6969
consoleInit(GFX_BOTTOM, NULL);
7070
gfxSetDoubleBuffering(GFX_TOP, false);
7171
gfxSwapBuffers();

out/lib/ppu.c

Lines changed: 96 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010

1111
uint8_t chr_rom[8192];
1212
uint8_t nametable[2048];
13-
uint8_t palette_table[32];
13+
Palette palette;
14+
1415
uint8_t oam[256];
1516

1617
uint16_t ppu_v;
@@ -28,7 +29,9 @@ uint16_t vram_addr;
2829
uint8_t vram_internal_buffer;
2930
uint8_t oam_dma;
3031

31-
uint8_t frame[SCREEN_WIDTH * SCREEN_HEIGHT * 3];
32+
#define FRAME_BUFFER_SIZE (SCREEN_WIDTH * SCREEN_HEIGHT * 3)
33+
34+
uint8_t frame[FRAME_BUFFER_SIZE];
3235
bool opaque_bg_mask[SCREEN_WIDTH * SCREEN_HEIGHT];
3336

3437
// 64 RGB colors
@@ -48,6 +51,17 @@ uint8_t COLOR_PALETTE[] = {
4851
0x99, 0xFF, 0xFC, 0xDD, 0xDD, 0xDD, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
4952
};
5053

54+
typedef struct {
55+
bool is_ready;
56+
uint8_t pixels[8 * 8 * 3];
57+
bool opaque_mask[8 * 8];
58+
uint32_t palette_mask; // mask of palette indices used in this tile
59+
uint32_t palette; // palette value, masked by palette_mask
60+
uint8_t universal_color;
61+
} Tile;
62+
63+
Tile bg_cached_tiles[256][4]; // 256 tiles, 4 palettes
64+
5165
void clear_frame(void) {
5266
memset(frame, 0, SCREEN_WIDTH * SCREEN_HEIGHT * 3);
5367
}
@@ -60,6 +74,13 @@ void ppu_init(uint8_t* chr) {
6074
// copy CHR ROM
6175
memcpy(chr_rom, chr, 8192);
6276
clear_frame();
77+
78+
// init tiles
79+
for (int i = 0; i < 256; i++) {
80+
for (int j = 0; j < 4; j++) {
81+
bg_cached_tiles[i][j].is_ready = false;
82+
}
83+
}
6384
}
6485

6586
bool status_clear = false;
@@ -78,7 +99,7 @@ uint8_t ppu_read_register(uint16_t addr) {
7899
// as some loops wait for a flag to be set or cleared
79100

80101
status_clear = !status_clear;
81-
102+
82103
ppu_w = 0;
83104
// vblank and sprite 0 hit
84105
return status_clear ? 0 : 0b11000000;
@@ -156,17 +177,17 @@ uint8_t ppu_read(uint16_t addr) {
156177
if (addr < 0x2000) {
157178
return chr_rom[addr];
158179
}
159-
180+
160181
if (addr < 0x3f00) {
161182
return nametable[addr - 0x2000];
162183
}
163184

164185
if (addr == 0x3f10 || addr == 0x3f14 || addr == 0x3f18 || addr == 0x3f1c) {
165-
return palette_table[addr - 0x3f10];
186+
return palette.u8[addr - 0x3f10];
166187
}
167-
188+
168189
if (addr < 0x4000) {
169-
return palette_table[(addr - 0x3f00) & 31];
190+
return palette.u8[(addr - 0x3f00) & 31];
170191
}
171192

172193
return 0;
@@ -176,9 +197,9 @@ void ppu_write(uint16_t addr, uint8_t value) {
176197
if (addr >= 0x2000 && addr < 0x3f00) {
177198
nametable[addr - 0x2000] = value;
178199
} else if (addr == 0x3f10 || addr == 0x3f14 || addr == 0x3f18 || addr == 0x3f1c) {
179-
palette_table[addr - 0x3f10] = value;
200+
palette.u8[addr - 0x3f10] = value;
180201
} else if (addr < 0x4000) {
181-
palette_table[(addr - 0x3f00) & 31] = value;
202+
palette.u8[(addr - 0x3f00) & 31] = value;
182203
}
183204
}
184205

@@ -207,16 +228,20 @@ size_t get_background_palette_index(size_t tile_col, size_t tile_row, size_t nam
207228
return ((attr_table_byte >> shift) & 0b11) * BYTES_PER_PALETTE;
208229
}
209230

210-
void draw_background_tile(
211-
size_t n,
212-
size_t x,
213-
size_t y,
214-
size_t bank_offset,
215-
size_t palette_idx,
216-
int shift_x,
217-
int min_x,
218-
int max_x
219-
) {
231+
Tile* get_cached_background_tile(size_t n, size_t bank_offset, size_t palette_offset) {
232+
size_t palette_idx = palette_offset / 4;
233+
Tile *tile = &bg_cached_tiles[n][palette_idx];
234+
235+
if (
236+
tile->is_ready &&
237+
tile->universal_color == palette.u8[0] &&
238+
tile->palette == (palette.u32[palette_idx] & tile->palette_mask)
239+
) {
240+
return tile;
241+
}
242+
243+
uint32_t palette_mask = 0;
244+
220245
for (size_t tile_y = 0; tile_y < 8; tile_y++) {
221246
uint8_t plane1 = chr_rom[bank_offset + n * 16 + tile_y];
222247
uint8_t plane2 = chr_rom[bank_offset + n * 16 + tile_y + 8];
@@ -225,29 +250,71 @@ void draw_background_tile(
225250
uint8_t bit0 = plane1 & 1;
226251
uint8_t bit1 = plane2 & 1;
227252
uint8_t color_index = (uint8_t)((bit1 << 1) | bit0);
228-
253+
229254
plane1 >>= 1;
230255
plane2 >>= 1;
231256

232-
uint8_t palette_offset;
257+
uint8_t palette_idx;
233258
bool is_universal_bg_color = color_index == 0;
259+
palette_mask |= (uint32_t)(0xff << (color_index * 8));
234260

235261
if (is_universal_bg_color) {
236-
palette_offset = palette_table[0];
262+
palette_idx = palette.u8[0];
237263
} else {
238-
palette_offset = palette_table[palette_idx + color_index];
264+
palette_idx = palette.u8[palette_offset + color_index];
239265
}
240266

267+
size_t palette_offset = palette_idx * 3;
268+
269+
uint8_t r = COLOR_PALETTE[palette_offset];
270+
uint8_t g = COLOR_PALETTE[palette_offset + 1];
271+
uint8_t b = COLOR_PALETTE[palette_offset + 2];
272+
273+
size_t tile_offset = (tile_y * 8 + tile_x);
274+
size_t tile_offset_times_3 = tile_offset * 3;
275+
276+
tile->pixels[tile_offset_times_3] = r;
277+
tile->pixels[tile_offset_times_3 + 1] = g;
278+
tile->pixels[tile_offset_times_3 + 2] = b;
279+
tile->opaque_mask[tile_offset] = !is_universal_bg_color;
280+
}
281+
}
282+
283+
tile->palette_mask = palette_mask;
284+
tile->palette = palette.u32[palette_idx] & palette_mask;
285+
tile->universal_color = palette.u8[0];
286+
tile->is_ready = true;
287+
288+
return tile;
289+
}
290+
291+
void draw_background_tile(
292+
size_t n,
293+
size_t x,
294+
size_t y,
295+
size_t bank_offset,
296+
size_t palette_idx,
297+
int shift_x,
298+
int min_x,
299+
int max_x
300+
) {
301+
if ((int)(x + 7) < min_x || (int)x >= (max_x + 7)) {
302+
return;
303+
}
304+
305+
Tile *tile = get_cached_background_tile(n, bank_offset, palette_idx);
306+
307+
for (size_t tile_y = 0; tile_y < 8; tile_y++) {
308+
for (size_t tile_x = 0; tile_x < 8; tile_x++) {
309+
size_t tile_offset = (tile_y * 8 + tile_x);
241310
int nametable_x = (int)x + ((int)(7 - (int)tile_x));
242311

243312
if (nametable_x >= min_x && nametable_x < max_x) {
244313
size_t screen_x = (size_t)(shift_x + nametable_x);
245314
size_t screen_y = y + tile_y;
246-
set_pixel(screen_x, screen_y, palette_offset);
247315

248-
if (!is_universal_bg_color && screen_x >= 0 && screen_x < SCREEN_WIDTH) {
249-
opaque_bg_mask[screen_y * SCREEN_WIDTH + screen_x] = true;
250-
}
316+
memcpy(&frame[(screen_y * SCREEN_WIDTH + screen_x) * 3], &tile->pixels[tile_offset * 3], 3);
317+
opaque_bg_mask[screen_y * SCREEN_WIDTH + screen_x] = tile->opaque_mask[tile_offset];
251318
}
252319
}
253320
}
@@ -329,12 +396,12 @@ void draw_sprite_tile(
329396
uint8_t bit0 = plane1 & 1;
330397
uint8_t bit1 = plane2 & 1;
331398
uint8_t color_index = (uint8_t)((bit1 << 1) | bit0);
332-
399+
333400
plane1 >>= 1;
334401
plane2 >>= 1;
335402

336403
if (color_index != 0) {
337-
uint8_t palette_offset = palette_table[palette_idx + color_index - 1];
404+
uint8_t palette_offset = palette.u8[palette_idx + color_index - 1];
338405
uint8_t flipped_x = (uint8_t)(flip_x ? tile_x : 7 - tile_x);
339406
uint8_t flipped_y = (uint8_t)(flip_y ? 7 - tile_y : tile_y);
340407
size_t screen_x = x + flipped_x;

out/lib/ppu.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,16 @@
1313

1414
extern uint8_t chr_rom[CHR_ROM_SIZE]; // 1 page of CHR ROM (8KB)
1515
extern uint8_t nametable[NAMETABLE_SIZE]; // 2KB of nametable RAM
16-
extern uint8_t palette_table[PALETTE_SIZE]; // 32 bytes of palette RAM
1716
extern uint8_t oam[OAM_SIZE]; // 256 bytes of OAM RAM
1817

18+
// 32 bytes of palette RAM
19+
typedef union {
20+
uint8_t u8[PALETTE_SIZE];
21+
uint32_t u32[4];
22+
} Palette;
23+
24+
extern Palette palette;
25+
1926
extern uint16_t ppu_v; // current VRAM address
2027
extern uint8_t ppu_w; // write toggle (1 bit)
2128
extern uint8_t ppu_f; // even/odd frame flag (1 bit)

out/lib/state.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ void save_state(uint8_t *dest) {
1313
}
1414

1515
for (size_t i = 0; i < PALETTE_SIZE; i++) {
16-
dest[SAVE_STATE_PALETTE_OFFSET + i] = palette_table[i];
16+
dest[SAVE_STATE_PALETTE_OFFSET + i] = palette.u8[i];
1717
}
1818

1919
for (size_t i = 0; i < OAM_SIZE; i++) {
@@ -24,7 +24,7 @@ void save_state(uint8_t *dest) {
2424
void load_state(uint8_t *state) {
2525
memcpy(ram, state, RAM_SIZE);
2626
memcpy(nametable, state + SAVE_STATE_NAMETABLE_OFFSET, NAMETABLE_SIZE);
27-
memcpy(palette_table, state + SAVE_STATE_PALETTE_OFFSET, PALETTE_SIZE);
27+
memcpy(palette.u8, state + SAVE_STATE_PALETTE_OFFSET, PALETTE_SIZE);
2828
memcpy(oam, state + SAVE_STATE_OAM_OFFSET, OAM_SIZE);
2929
memset(audio_buffer, 0, AUDIO_BUFFER_SIZE); // clear audio buffer
3030
}

0 commit comments

Comments
 (0)