From ec91c68690014125ce11c3ea3ba5388f50329831 Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Tue, 14 May 2019 19:56:32 +0800 Subject: [PATCH] sfc: performance: Async render_scanline --- Makefile | 2 +- sfc/alt/ppu-performance/mmio/mmio.cpp | 113 ++++++++++++++++++---------------- sfc/alt/ppu-performance/ppu.cpp | 41 +++++++++++- sfc/alt/ppu-performance/ppu.hpp | 8 +++ target-libretro/Makefile | 2 +- 5 files changed, 111 insertions(+), 55 deletions(-) diff --git a/Makefile b/Makefile index e208c156..10737acf 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ sfc_lagfix := 1 ifeq ($(DEBUG), 1) flags := -I. -Ilibco -O0 -g else - flags := -I. -Ilibco -O3 -fomit-frame-pointer + flags := -I. -Ilibco -Ofast -ffast-math -fomit-frame-pointer endif cflags := -std=gnu99 -xc diff --git a/sfc/alt/ppu-performance/mmio/mmio.cpp b/sfc/alt/ppu-performance/mmio/mmio.cpp index 46244e23..28748c6b 100644 --- a/sfc/alt/ppu-performance/mmio/mmio.cpp +++ b/sfc/alt/ppu-performance/mmio/mmio.cpp @@ -281,12 +281,14 @@ uint8 PPU::mmio_read(unsigned addr) { void PPU::mmio_write(unsigned addr, uint8 data) { cpu.synchronize_ppu(); + pthread_mutex_lock(&render_mutex); + switch(addr & 0xffff) { case 0x2100: { //INIDISP if(regs.display_disable && cpu.vcounter() == display.height) sprite.address_reset(); regs.display_disable = data & 0x80; regs.display_brightness = data & 0x0f; - return; + break; } case 0x2101: { //OBSEL @@ -294,20 +296,20 @@ void PPU::mmio_write(unsigned addr, uint8 data) { sprite.regs.nameselect = (data >> 3) & 3; sprite.regs.tiledata_addr = (data & 3) << 14; sprite.list_valid = false; - return; + break; } case 0x2102: { //OAMADDL regs.oam_baseaddr = (regs.oam_baseaddr & 0x0100) | (data << 0); sprite.address_reset(); - return; + break; } case 0x2103: { //OAMADDH regs.oam_priority = data & 0x80; regs.oam_baseaddr = ((data & 1) << 8) | (regs.oam_baseaddr & 0x00ff); sprite.address_reset(); - return; + break; } case 0x2104: { //OAMDATA @@ -320,7 +322,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) { } regs.oam_addr = (regs.oam_addr + 1) & 0x03ff; sprite.set_first(); - return; + break; } case 0x2105: { //BGMODE @@ -331,7 +333,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) { regs.bg3_priority = data & 0x08; regs.bgmode = data & 0x07; mmio_update_video_mode(); - return; + break; } case 0x2106: { //MOSAIC @@ -340,43 +342,43 @@ void PPU::mmio_write(unsigned addr, uint8 data) { bg3.regs.mosaic = (data & 0x04 ? mosaic_size : 0); bg2.regs.mosaic = (data & 0x02 ? mosaic_size : 0); bg1.regs.mosaic = (data & 0x01 ? mosaic_size : 0); - return; + break; } case 0x2107: { //BG1SC bg1.regs.screen_addr = (data & 0x7c) << 9; bg1.regs.screen_size = data & 3; - return; + break; } case 0x2108: { //BG2SC bg2.regs.screen_addr = (data & 0x7c) << 9; bg2.regs.screen_size = data & 3; - return; + break; } case 0x2109: { //BG3SC bg3.regs.screen_addr = (data & 0x7c) << 9; bg3.regs.screen_size = data & 3; - return; + break; } case 0x210a: { //BG4SC bg4.regs.screen_addr = (data & 0x7c) << 9; bg4.regs.screen_size = data & 3; - return; + break; } case 0x210b: { //BG12NBA bg1.regs.tiledata_addr = (data & 0x07) << 13; bg2.regs.tiledata_addr = (data & 0x70) << 9; - return; + break; } case 0x210c: { //BG34NBA bg3.regs.tiledata_addr = (data & 0x07) << 13; bg4.regs.tiledata_addr = (data & 0x70) << 9; - return; + break; } case 0x210d: { //BG1HOFS @@ -385,7 +387,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) { bg1.regs.hoffset = (data << 8) | (regs.bgofs_latchdata & ~7) | ((bg1.regs.hoffset >> 8) & 7); regs.bgofs_latchdata = data; - return; + break; } case 0x210e: { //BG1VOFS @@ -394,43 +396,43 @@ void PPU::mmio_write(unsigned addr, uint8 data) { bg1.regs.voffset = (data << 8) | regs.bgofs_latchdata; regs.bgofs_latchdata = data; - return; + break; } case 0x210f: { //BG2HOFS bg2.regs.hoffset = (data << 8) | (regs.bgofs_latchdata & ~7) | ((bg2.regs.hoffset >> 8) & 7); regs.bgofs_latchdata = data; - return; + break; } case 0x2110: { //BG2VOFS bg2.regs.voffset = (data << 8) | regs.bgofs_latchdata; regs.bgofs_latchdata = data; - return; + break; } case 0x2111: { //BG3HOFS bg3.regs.hoffset = (data << 8) | (regs.bgofs_latchdata & ~7) | ((bg3.regs.hoffset >> 8) & 7); regs.bgofs_latchdata = data; - return; + break; } case 0x2112: { //BG3VOFS bg3.regs.voffset = (data << 8) | regs.bgofs_latchdata; regs.bgofs_latchdata = data; - return; + break; } case 0x2113: { //BG4HOFS bg4.regs.hoffset = (data << 8) | (regs.bgofs_latchdata & ~7) | ((bg4.regs.hoffset >> 8) & 7); regs.bgofs_latchdata = data; - return; + break; } case 0x2114: { //BG4VOFS bg4.regs.voffset = (data << 8) | regs.bgofs_latchdata; regs.bgofs_latchdata = data; - return; + break; } case 0x2115: { //VMAIN @@ -442,7 +444,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) { case 2: regs.vram_incsize = 128; break; case 3: regs.vram_incsize = 128; break; } - return; + break; } case 0x2116: { //VMADDL @@ -450,7 +452,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) { uint16 addr = get_vram_addr(); regs.vram_readbuffer = vram_read(addr + 0) << 0; regs.vram_readbuffer |= vram_read(addr + 1) << 8; - return; + break; } case 0x2117: { //VMADDH @@ -458,67 +460,67 @@ void PPU::mmio_write(unsigned addr, uint8 data) { uint16 addr = get_vram_addr(); regs.vram_readbuffer = vram_read(addr + 0) << 0; regs.vram_readbuffer |= vram_read(addr + 1) << 8; - return; + break; } case 0x2118: { //VMDATAL vram_write(get_vram_addr() + 0, data); if(regs.vram_incmode == 0) regs.vram_addr += regs.vram_incsize; - return; + break; } case 0x2119: { //VMDATAH vram_write(get_vram_addr() + 1, data); if(regs.vram_incmode == 1) regs.vram_addr += regs.vram_incsize; - return; + break; } case 0x211a: { //M7SEL regs.mode7_repeat = (data >> 6) & 3; regs.mode7_vflip = data & 0x02; regs.mode7_hflip = data & 0x01; - return; + break; } case 0x211b: { //M7A regs.m7a = (data << 8) | regs.mode7_latchdata; regs.mode7_latchdata = data; - return; + break; } case 0x211c: { //M7B regs.m7b = (data << 8) | regs.mode7_latchdata; regs.mode7_latchdata = data; - return; + break; } case 0x211d: { //M7C regs.m7c = (data << 8) | regs.mode7_latchdata; regs.mode7_latchdata = data; - return; + break; } case 0x211e: { //M7D regs.m7d = (data << 8) | regs.mode7_latchdata; regs.mode7_latchdata = data; - return; + break; } case 0x211f: { //M7X regs.m7x = (data << 8) | regs.mode7_latchdata; regs.mode7_latchdata = data; - return; + break; } case 0x2120: { //M7Y regs.m7y = (data << 8) | regs.mode7_latchdata; regs.mode7_latchdata = data; - return; + break; } case 0x2121: { //CGADD regs.cgram_addr = data << 1; - return; + break; } case 0x2122: { //CGDATA @@ -529,7 +531,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) { cgram_write((regs.cgram_addr & ~1) + 1, data & 0x7f); } regs.cgram_addr = (regs.cgram_addr + 1) & 0x01ff; - return; + break; } case 0x2123: { //W12SEL @@ -541,7 +543,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) { bg1.window.two_invert = data & 0x04; bg1.window.one_enable = data & 0x02; bg1.window.one_invert = data & 0x01; - return; + break; } case 0x2124: { //W34SEL @@ -553,7 +555,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) { bg3.window.two_invert = data & 0x04; bg3.window.one_enable = data & 0x02; bg3.window.one_invert = data & 0x01; - return; + break; } case 0x2125: { //WOBJSEL @@ -565,27 +567,27 @@ void PPU::mmio_write(unsigned addr, uint8 data) { sprite.window.two_invert = data & 0x04; sprite.window.one_enable = data & 0x02; sprite.window.one_invert = data & 0x01; - return; + break; } case 0x2126: { //WH0 regs.window_one_left = data; - return; + break; } case 0x2127: { //WH1 regs.window_one_right = data; - return; + break; } case 0x2128: { //WH2 regs.window_two_left = data; - return; + break; } case 0x2129: { //WH3 regs.window_two_right = data; - return; + break; } case 0x212a: { //WBGLOG @@ -593,13 +595,13 @@ void PPU::mmio_write(unsigned addr, uint8 data) { bg3.window.mask = (data >> 4) & 3; bg2.window.mask = (data >> 2) & 3; bg1.window.mask = (data >> 0) & 3; - return; + break; } case 0x212b: { //WOBJLOG screen.window.mask = (data >> 2) & 3; sprite.window.mask = (data >> 0) & 3; - return; + break; } case 0x212c: { //TM @@ -608,7 +610,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) { bg3.regs.main_enable = data & 0x04; bg2.regs.main_enable = data & 0x02; bg1.regs.main_enable = data & 0x01; - return; + break; } case 0x212d: { //TS @@ -617,7 +619,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) { bg3.regs.sub_enable = data & 0x04; bg2.regs.sub_enable = data & 0x02; bg1.regs.sub_enable = data & 0x01; - return; + break; } case 0x212e: { //TMW @@ -626,7 +628,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) { bg3.window.main_enable = data & 0x04; bg2.window.main_enable = data & 0x02; bg1.window.main_enable = data & 0x01; - return; + break; } case 0x212f: { //TSW @@ -635,7 +637,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) { bg3.window.sub_enable = data & 0x04; bg2.window.sub_enable = data & 0x02; bg1.window.sub_enable = data & 0x01; - return; + break; } case 0x2130: { //CGWSEL @@ -643,7 +645,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) { screen.window.sub_mask = (data >> 4) & 3; screen.regs.addsub_mode = data & 0x02; screen.regs.direct_color = data & 0x01; - return; + break; } case 0x2131: { //CGADDSUB @@ -656,7 +658,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) { screen.regs.color_enable[2] = data & 0x04; screen.regs.color_enable[1] = data & 0x02; screen.regs.color_enable[0] = data & 0x01; - return; + break; } case 0x2132: { //COLDATA @@ -664,7 +666,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) { if(data & 0x40) screen.regs.color_g = data & 0x1f; if(data & 0x20) screen.regs.color_r = data & 0x1f; screen.regs.color = (screen.regs.color_b << 10) | (screen.regs.color_g << 5) | (screen.regs.color_r << 0); - return; + break; } case 0x2133: { //SETINI @@ -675,12 +677,17 @@ void PPU::mmio_write(unsigned addr, uint8 data) { regs.interlace = data & 0x01; mmio_update_video_mode(); sprite.list_valid = false; - return; + break; } } + + pthread_mutex_unlock(&render_mutex); } void PPU::mmio_reset() { + + pthread_mutex_lock(&render_mutex); + //internal regs.ppu1_mdr = 0; regs.ppu2_mdr = 0; @@ -886,6 +893,8 @@ void PPU::mmio_reset() { sprite.regs.range_over = 0; mmio_update_video_mode(); + + pthread_mutex_unlock(&render_mutex); } #endif diff --git a/sfc/alt/ppu-performance/ppu.cpp b/sfc/alt/ppu-performance/ppu.cpp index b598cc70..c3cee939 100644 --- a/sfc/alt/ppu-performance/ppu.cpp +++ b/sfc/alt/ppu-performance/ppu.cpp @@ -27,6 +27,25 @@ void PPU::synchronize_cpu() { void PPU::Enter() { ppu.enter(); } +void *PPU::render_thread_fn(void *arg) { + PPU *ppu = (SuperFamicom::PPU *)arg; + + while(true) { + pthread_mutex_lock(&ppu->render_mutex); + while (!ppu->render_busy) + pthread_cond_wait(&ppu->render_cond, &ppu->render_mutex); + pthread_mutex_unlock(&ppu->render_mutex); + + ppu->render_scanline(); + ppu->render_busy = 0; + + pthread_mutex_lock(&ppu->render_mutex); + pthread_cond_signal(&ppu->render_cond); + pthread_mutex_unlock(&ppu->render_mutex); + } + return NULL; +} + void PPU::enter() { while(true) { if(scheduler.sync == Scheduler::SynchronizeMode::All) { @@ -36,7 +55,15 @@ void PPU::enter() { scanline(); if(vcounter() < display.height && vcounter()) { add_clocks(512); - render_scanline(); + + pthread_mutex_lock(&render_mutex); + while (render_busy) + pthread_cond_wait(&render_cond, &render_mutex); + + render_busy = 1; + pthread_cond_signal(&render_cond); + pthread_mutex_unlock(&render_mutex); + add_clocks(lineclocks() - 512); } else { add_clocks(lineclocks()); @@ -52,11 +79,17 @@ void PPU::add_clocks(unsigned clocks) { void PPU::render_scanline() { if(display.framecounter) return; //skip this frame? + + pthread_mutex_lock(&render_mutex); bg1.scanline(); bg2.scanline(); bg3.scanline(); bg4.scanline(); + pthread_mutex_unlock(&render_mutex); + if(regs.display_disable) return screen.render_black(); + + pthread_mutex_lock(&render_mutex); screen.scanline(); bg1.render(); bg2.render(); @@ -64,6 +97,7 @@ void PPU::render_scanline() { bg4.render(); sprite.render(); screen.render(); + pthread_mutex_unlock(&render_mutex); } void PPU::scanline() { @@ -141,6 +175,11 @@ screen(*this) { display.height = 224; display.frameskip = 0; display.framecounter = 0; + + render_busy = 0; + pthread_cond_init(&render_cond, NULL); + pthread_mutex_init(&render_mutex, NULL); + pthread_create(&render_thread, NULL, render_thread_fn, this); } PPU::~PPU() { diff --git a/sfc/alt/ppu-performance/ppu.hpp b/sfc/alt/ppu-performance/ppu.hpp index b3427e29..516209a1 100644 --- a/sfc/alt/ppu-performance/ppu.hpp +++ b/sfc/alt/ppu-performance/ppu.hpp @@ -1,3 +1,5 @@ +#include + struct PPU : Thread, public PPUcounter { uint8 vram[64 * 1024]; uint8 oam[544]; @@ -58,6 +60,12 @@ private: void add_clocks(unsigned clocks); void render_scanline(); + pthread_t render_thread; + volatile int render_busy; + pthread_cond_t render_cond; + pthread_mutex_t render_mutex; + static void *render_thread_fn(void *arg); + friend class PPU::Cache; friend class PPU::Background; friend class PPU::Sprite; diff --git a/target-libretro/Makefile b/target-libretro/Makefile index 997cd48b..56e316bf 100644 --- a/target-libretro/Makefile +++ b/target-libretro/Makefile @@ -46,7 +46,7 @@ obj/libretro-$(profile).o: $(ui)/libretro.cpp $(ui)/* #targets build: $(objects) ifeq ($(platform),linux) - $(compiler) -o out/bsnes_$(profile)_libretro.so -shared $(objects) -ldl -Wl,--no-undefined -Wl,--version-script=$(ui)/link.T + $(compiler) -o out/bsnes_$(profile)_libretro.so -shared $(objects) -ldl -Wl,--no-undefined -Wl,--version-script=$(ui)/link.T -lpthread else ifneq (,$(findstring ios,$(platform))) ifeq ($(platform),ios-arm64) $(compiler) -o out/bsnes_$(profile)_libretro_ios.dylib -dynamiclib $(objects) -isysroot $(IOSSDK) -arch arm64 -- 2.11.0