diff --git a/gfx/drivers/ctr_gfx.c b/gfx/drivers/ctr_gfx.c index 5f1e205ae8..09caa53539 100644 --- a/gfx/drivers/ctr_gfx.c +++ b/gfx/drivers/ctr_gfx.c @@ -202,7 +202,7 @@ static void* ctr_init(const video_info_t* video, CTRGU_ATTRIBFMT(GPU_SHORT, 2) << 4, sizeof(ctr_vertex_t)); GPUCMD_Finalize(); - GPUCMD_FlushAndRun(NULL); + ctrGuFlushAndRun(true); gspWaitForEvent(GSPEVENT_P3D, false); if (input && input_data) @@ -214,7 +214,7 @@ static void* ctr_init(const video_info_t* video, return ctr; } -//#define gspWaitForEvent(...) + static bool ctr_frame(void* data, const void* frame, unsigned width, unsigned height, unsigned pitch, const char* msg) { @@ -249,17 +249,17 @@ static bool ctr_frame(void* data, const void* frame, } frames++; - currentTick = osGetTime(); + currentTick = svcGetSystemTick(); uint32_t diff = currentTick - lastTick; - if(diff > 1000) + if(diff > CTR_CPU_TICKS_PER_SECOND) { - fps = (float)frames * (1000.0 / diff); + fps = (float)frames * ((float) CTR_CPU_TICKS_PER_SECOND / (float) diff); lastTick = currentTick; frames = 0; } printf("fps: %8.4f frames: %i\r", fps, total_frames++); - fflush(stdout); +// fflush(stdout); /* enable this to profile the core without video output */ #if 0 @@ -267,60 +267,83 @@ static bool ctr_frame(void* data, const void* frame, goto end; #endif + svcWaitSynchronization(gspEvents[GSPEVENT_P3D], 20000000); + svcClearEvent(gspEvents[GSPEVENT_P3D]); + svcWaitSynchronization(gspEvents[GSPEVENT_PPF], 20000000); + svcClearEvent(gspEvents[GSPEVENT_PPF]); + + gfxSwapBuffersGpu(); + + if (ctr->vsync) + gspWaitForEvent(GSPEVENT_VBlank0, true); + + ctrGuSetMemoryFill(true, (u32*)CTR_GPU_FRAMEBUFFER, 0x00000000, + (u32*)(CTR_GPU_FRAMEBUFFER + CTR_TOP_FRAMEBUFFER_WIDTH * CTR_TOP_FRAMEBUFFER_HEIGHT * sizeof(uint32_t)), + 0x201, (u32*)CTR_GPU_DEPTHBUFFER, 0x00000000, + (u32*)(CTR_GPU_DEPTHBUFFER + CTR_TOP_FRAMEBUFFER_WIDTH * CTR_TOP_FRAMEBUFFER_HEIGHT * sizeof(uint32_t)), + 0x201); + GPUCMD_SetBufferOffset(0); + if (width > ctr->texture_width) + width = ctr->texture_width; + if (height > ctr->texture_height) + height = ctr->texture_height; + if(frame) { - int i; - uint16_t* dst = (uint16_t*)ctr->texture_linear; - const uint8_t* src = frame; - if (width > ctr->texture_width) - width = ctr->texture_width; - if (height > ctr->texture_height) - height = ctr->texture_height; - for (i = 0; i < height; i++) + if(((((u32)(frame)) >= 0x14000000 && ((u32)(frame)) < 0x1c000000)) /* frame in linear memory */ + && !((u32)frame & 0x7F) /* 128-byte aligned */ + && !((pitch) & 0xF)) /* 16-byte aligned */ { - memcpy(dst, src, width * sizeof(uint16_t)); - dst += ctr->texture_width; - src += pitch; + /* can copy the buffer directly with the GPU */ + ctrGuCopyImage(false, frame, pitch / 2, height, CTRGU_RGB565, false, + ctr->texture_swizzled, ctr->texture_width, CTRGU_RGB565, true); } - GSPGPU_FlushDataCache(NULL, ctr->texture_linear, - ctr->texture_width * ctr->texture_height * sizeof(uint16_t)); + else + { + int i; + uint16_t* dst = (uint16_t*)ctr->texture_linear; + const uint8_t* src = frame; + for (i = 0; i < height; i++) + { + memcpy(dst, src, width * sizeof(uint16_t)); + dst += ctr->texture_width; + src += pitch; + } + GSPGPU_FlushDataCache(NULL, ctr->texture_linear, + ctr->texture_width * ctr->texture_height * sizeof(uint16_t)); - ctrGuCopyImage(ctr->texture_linear, ctr->texture_width, ctr->menu.texture_height, CTRGU_RGB565, false, - ctr->texture_swizzled, ctr->texture_width, CTRGU_RGB565, true); - - gspWaitForEvent(GSPEVENT_PPF, false); - - - ctrGuSetTexture(GPU_TEXUNIT0, VIRT_TO_PHYS(ctr->texture_swizzled), ctr->texture_width, ctr->texture_height, - GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_MIN_FILTER(GPU_LINEAR) | - GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_EDGE) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_EDGE), - GPU_RGB565); - - ctr->frame_coords->u = width; - ctr->frame_coords->v = height; - GSPGPU_FlushDataCache(NULL, (u8*)ctr->frame_coords, sizeof(ctr_vertex_t)); - - ctrGuSetAttributeBuffersAddress(VIRT_TO_PHYS(ctr->frame_coords)); - ctrGuSetVertexShaderFloatUniform(0, (float*)&ctr->scale_vector, 1); - GPU_DrawArray(GPU_UNKPRIM, 1); + ctrGuCopyImage(false, ctr->texture_linear, ctr->texture_width, ctr->menu.texture_height, CTRGU_RGB565, false, + ctr->texture_swizzled, ctr->texture_width, CTRGU_RGB565, true); + } } + + ctrGuSetTexture(GPU_TEXUNIT0, VIRT_TO_PHYS(ctr->texture_swizzled), ctr->texture_width, ctr->texture_height, + GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_MIN_FILTER(GPU_LINEAR) | + GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_EDGE) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_EDGE), + GPU_RGB565); + + ctr->frame_coords->u = width; + ctr->frame_coords->v = height; + GSPGPU_FlushDataCache(NULL, (u8*)ctr->frame_coords, sizeof(ctr_vertex_t)); + + ctrGuSetAttributeBuffersAddress(VIRT_TO_PHYS(ctr->frame_coords)); + ctrGuSetVertexShaderFloatUniform(0, (float*)&ctr->scale_vector, 1); + GPU_DrawArray(GPU_UNKPRIM, 1); + if (ctr->menu_texture_enable) { GSPGPU_FlushDataCache(NULL, ctr->menu.texture_linear, ctr->menu.texture_width * ctr->menu.texture_height * sizeof(uint16_t)); - ctrGuCopyImage(ctr->menu.texture_linear, ctr->menu.texture_width, ctr->menu.texture_height, CTRGU_RGBA4444,false, + ctrGuCopyImage(false, ctr->menu.texture_linear, ctr->menu.texture_width, ctr->menu.texture_height, CTRGU_RGBA4444,false, ctr->menu.texture_swizzled, ctr->menu.texture_width, CTRGU_RGBA4444, true); - gspWaitForEvent(GSPEVENT_PPF, false); - - ctrGuSetTexture(GPU_TEXUNIT0, VIRT_TO_PHYS(ctr->menu.texture_swizzled), ctr->menu.texture_width, ctr->menu.texture_height, GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_MIN_FILTER(GPU_LINEAR) | GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_EDGE) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_EDGE), @@ -334,27 +357,14 @@ static bool ctr_frame(void* data, const void* frame, GPU_FinishDrawing(); GPUCMD_Finalize(); - GPUCMD_FlushAndRun(NULL); - gspWaitForEvent(GSPEVENT_P3D, false); + ctrGuFlushAndRun(true); - ctrGuDisplayTransfer(CTR_GPU_FRAMEBUFFER, 240,400, CTRGU_RGBA8, + ctrGuDisplayTransfer(true, CTR_GPU_FRAMEBUFFER, 240,400, CTRGU_RGBA8, gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), 240,400,CTRGU_RGB8, CTRGU_MULTISAMPLE_NONE); - gspWaitForEvent(GSPEVENT_PPF, false); - - GX_SetMemoryFill(NULL, (u32*)CTR_GPU_FRAMEBUFFER, 0x00000000, - (u32*)(CTR_GPU_FRAMEBUFFER + CTR_TOP_FRAMEBUFFER_WIDTH * CTR_TOP_FRAMEBUFFER_HEIGHT * sizeof(uint32_t)), - 0x201, (u32*)CTR_GPU_DEPTHBUFFER, 0x00000000, - (u32*)(CTR_GPU_DEPTHBUFFER + CTR_TOP_FRAMEBUFFER_WIDTH * CTR_TOP_FRAMEBUFFER_HEIGHT * sizeof(uint32_t)), - 0x201); - - gspWaitForEvent(GSPEVENT_PSC0, false); - gfxSwapBuffersGpu(); - -// if (ctr->vsync) -// gspWaitForEvent(GSPEVENT_VBlank0, true); end: +// gspWaitForEvent(GSPEVENT_VBlank0, true); RARCH_PERFORMANCE_STOP(ctrframe_f); return true; } diff --git a/gfx/drivers/ctr_gu.h b/gfx/drivers/ctr_gu.h index 75fa426090..629f5537e4 100644 --- a/gfx/drivers/ctr_gu.h +++ b/gfx/drivers/ctr_gu.h @@ -48,36 +48,13 @@ #define CTRGU_MULTISAMPLE_2x1 (1 << 24) #define CTRGU_MULTISAMPLE_2x2 (2 << 24) -typedef struct -{ - uint32_t buffer[8]; -} gtrgu_gx_command_t; +#define CTR_CPU_TICKS_PER_SECOND 268123480 -__attribute__((always_inline)) -static INLINE int ctrGuWriteDisplayTransferCommand(gtrgu_gx_command_t* command, - void* src, int src_w, int src_h, - void* dst, int dst_w, int dst_h, - uint32_t flags) -{ - command->buffer[0] = 0x03; //CommandID - command->buffer[1] = (uint32_t)src; - command->buffer[2] = (uint32_t)dst; - command->buffer[3] = CTRGU_SIZE(src_w, src_h); - command->buffer[4] = CTRGU_SIZE(dst_w, dst_h); - command->buffer[5] = flags; - command->buffer[6] = 0x0; - command->buffer[7] = 0x0; - - return 0; -} - -__attribute__((always_inline)) -static INLINE int ctrGuSubmitGxCommand(u32* gxbuf, gtrgu_gx_command_t* command) -{ - if(!gxbuf) gxbuf = gxCmdBuf; - - return GSPGPU_SubmitGxCommand(gxbuf, (u32*)command, NULL); -} +extern Handle gspEvents[GSPEVENT_MAX]; +extern u32* gpuCmdBuf; +extern u32 gpuCmdBufOffset; +extern u32 __linear_heap_size; +extern u32* __linear_heap; __attribute__((always_inline)) static INLINE void ctrGuSetTexture(GPU_TEXUNIT unit, u32* data, @@ -108,14 +85,68 @@ static INLINE void ctrGuSetTexture(GPU_TEXUNIT unit, u32* data, } } +__attribute__((always_inline)) +static INLINE Result ctrGuSetCommandList_First(bool queued, u32* buf0a, u32 buf0s, u32* buf1a, u32 buf1s, u32* buf2a, u32 buf2s) +{ + u32 gxCommand[0x8]; + gxCommand[0]=0x05 | (queued? 0x01000000 : 0x0); //CommandID + gxCommand[1]=(u32)buf0a; //buf0 address + gxCommand[2]=(u32)buf0s; //buf0 size + gxCommand[3]=(u32)buf1a; //buf1 address + gxCommand[4]=(u32)buf1s; //buf1 size + gxCommand[5]=(u32)buf2a; //buf2 address + gxCommand[6]=(u32)buf2s; //buf2 size + gxCommand[7]=0x0; + + return GSPGPU_SubmitGxCommand(gxCmdBuf, gxCommand, NULL); +} + +__attribute__((always_inline)) +static INLINE Result ctrGuSetCommandList_Last(bool queued, u32* buf0a, u32 buf0s, u8 flags) +{ + u32 gxCommand[0x8]; + gxCommand[0]=0x01 | (queued? 0x01000000 : 0x0); //CommandID + gxCommand[1]=(u32)buf0a; //buf0 address + gxCommand[2]=(u32)buf0s; //buf0 size + gxCommand[3]=flags&1; //written to GSP module state + gxCommand[4]=gxCommand[5]=gxCommand[6]=0x0; + gxCommand[7]=(flags>>1)&1; //when non-zero, call svcFlushProcessDataCache() with the specified buffer + + return GSPGPU_SubmitGxCommand(gxCmdBuf, gxCommand, NULL); +} + +__attribute__((always_inline)) +static INLINE void ctrGuFlushAndRun(bool queued) +{ + //take advantage of GX_SetCommandList_First to flush gsp heap + ctrGuSetCommandList_First(queued, gpuCmdBuf, gpuCmdBufOffset*4, __linear_heap, __linear_heap_size, NULL, 0); + ctrGuSetCommandList_Last(queued, gpuCmdBuf, gpuCmdBufOffset*4, 0x0); +} + +__attribute__((always_inline)) +static INLINE Result ctrGuSetMemoryFill(bool queued, u32* buf0a, u32 buf0v, u32* buf0e, u16 width0, u32* buf1a, u32 buf1v, u32* buf1e, u16 width1) +{ + u32 gxCommand[0x8]; + gxCommand[0]=0x02 | (queued? 0x01000000 : 0x0); //CommandID + gxCommand[1]=(u32)buf0a; //buf0 address + gxCommand[2]=buf0v; //buf0 value + gxCommand[3]=(u32)buf0e; //buf0 end addr + gxCommand[4]=(u32)buf1a; //buf1 address + gxCommand[5]=buf1v; //buf1 value + gxCommand[6]=(u32)buf1e; //buf1 end addr + gxCommand[7]=(width0)|(width1<<16); + + return GSPGPU_SubmitGxCommand(gxCmdBuf, gxCommand, NULL); +} __attribute__((always_inline)) static INLINE Result ctrGuCopyImage - (void* src, int src_w, int src_h, int src_fmt, bool src_is_tiled, - void* dst, int dst_w, int dst_fmt, bool dst_is_tiled) + (bool queued, + const void* src, int src_w, int src_h, int src_fmt, bool src_is_tiled, + void* dst, int dst_w, int dst_fmt, bool dst_is_tiled) { u32 gxCommand[0x8]; - gxCommand[0]=0x03; //CommandID + gxCommand[0]=0x03 | (queued? 0x01000000 : 0x0); //CommandID gxCommand[1]=(u32)src; gxCommand[2]=(u32)dst; gxCommand[3]=dst_w&0xFF8; @@ -133,11 +164,12 @@ static INLINE Result ctrGuCopyImage __attribute__((always_inline)) static INLINE Result ctrGuDisplayTransfer - (void* src, int src_w, int src_h, int src_fmt, + (bool queued, + void* src, int src_w, int src_h, int src_fmt, void* dst, int dst_w, int dst_h, int dst_fmt, int multisample_lvl) { u32 gxCommand[0x8]; - gxCommand[0]=0x03; //CommandID + gxCommand[0]=0x03 | (queued? 0x01000000 : 0x0); //CommandID gxCommand[1]=(u32)src; gxCommand[2]=(u32)dst; gxCommand[3]=CTRGU_SIZE(dst_w, dst_h);