mirror of
https://github.com/libretro/RetroArch
synced 2025-04-24 15:02:35 +00:00
(3DS) video driver: performance improvements.
This commit is contained in:
parent
64e3e40bb6
commit
e754c328b0
@ -202,7 +202,7 @@ static void* ctr_init(const video_info_t* video,
|
|||||||
CTRGU_ATTRIBFMT(GPU_SHORT, 2) << 4,
|
CTRGU_ATTRIBFMT(GPU_SHORT, 2) << 4,
|
||||||
sizeof(ctr_vertex_t));
|
sizeof(ctr_vertex_t));
|
||||||
GPUCMD_Finalize();
|
GPUCMD_Finalize();
|
||||||
GPUCMD_FlushAndRun(NULL);
|
ctrGuFlushAndRun(true);
|
||||||
gspWaitForEvent(GSPEVENT_P3D, false);
|
gspWaitForEvent(GSPEVENT_P3D, false);
|
||||||
|
|
||||||
if (input && input_data)
|
if (input && input_data)
|
||||||
@ -214,7 +214,7 @@ static void* ctr_init(const video_info_t* video,
|
|||||||
|
|
||||||
return ctr;
|
return ctr;
|
||||||
}
|
}
|
||||||
//#define gspWaitForEvent(...)
|
|
||||||
static bool ctr_frame(void* data, const void* frame,
|
static bool ctr_frame(void* data, const void* frame,
|
||||||
unsigned width, unsigned height, unsigned pitch, const char* msg)
|
unsigned width, unsigned height, unsigned pitch, const char* msg)
|
||||||
{
|
{
|
||||||
@ -249,17 +249,17 @@ static bool ctr_frame(void* data, const void* frame,
|
|||||||
}
|
}
|
||||||
|
|
||||||
frames++;
|
frames++;
|
||||||
currentTick = osGetTime();
|
currentTick = svcGetSystemTick();
|
||||||
uint32_t diff = currentTick - lastTick;
|
uint32_t diff = currentTick - lastTick;
|
||||||
if(diff > 1000)
|
if(diff > CTR_CPU_TICKS_PER_SECOND)
|
||||||
{
|
{
|
||||||
fps = (float)frames * (1000.0 / diff);
|
fps = (float)frames * ((float) CTR_CPU_TICKS_PER_SECOND / (float) diff);
|
||||||
lastTick = currentTick;
|
lastTick = currentTick;
|
||||||
frames = 0;
|
frames = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("fps: %8.4f frames: %i\r", fps, total_frames++);
|
printf("fps: %8.4f frames: %i\r", fps, total_frames++);
|
||||||
fflush(stdout);
|
// fflush(stdout);
|
||||||
|
|
||||||
/* enable this to profile the core without video output */
|
/* enable this to profile the core without video output */
|
||||||
#if 0
|
#if 0
|
||||||
@ -267,17 +267,44 @@ static bool ctr_frame(void* data, const void* frame,
|
|||||||
goto end;
|
goto end;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
svcWaitSynchronization(gspEvents[GSPEVENT_P3D], 20000000);
|
||||||
|
svcClearEvent(gspEvents[GSPEVENT_P3D]);
|
||||||
|
svcWaitSynchronization(gspEvents[GSPEVENT_PPF], 20000000);
|
||||||
|
svcClearEvent(gspEvents[GSPEVENT_PPF]);
|
||||||
|
|
||||||
|
gfxSwapBuffersGpu();
|
||||||
|
|
||||||
|
if (ctr->vsync)
|
||||||
|
gspWaitForEvent(GSPEVENT_VBlank0, true);
|
||||||
|
|
||||||
|
ctrGuSetMemoryFill(true, (u32*)CTR_GPU_FRAMEBUFFER, 0x00000000,
|
||||||
|
(u32*)(CTR_GPU_FRAMEBUFFER + CTR_TOP_FRAMEBUFFER_WIDTH * CTR_TOP_FRAMEBUFFER_HEIGHT * sizeof(uint32_t)),
|
||||||
|
0x201, (u32*)CTR_GPU_DEPTHBUFFER, 0x00000000,
|
||||||
|
(u32*)(CTR_GPU_DEPTHBUFFER + CTR_TOP_FRAMEBUFFER_WIDTH * CTR_TOP_FRAMEBUFFER_HEIGHT * sizeof(uint32_t)),
|
||||||
|
0x201);
|
||||||
|
|
||||||
GPUCMD_SetBufferOffset(0);
|
GPUCMD_SetBufferOffset(0);
|
||||||
|
|
||||||
if(frame)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
uint16_t* dst = (uint16_t*)ctr->texture_linear;
|
|
||||||
const uint8_t* src = frame;
|
|
||||||
if (width > ctr->texture_width)
|
if (width > ctr->texture_width)
|
||||||
width = ctr->texture_width;
|
width = ctr->texture_width;
|
||||||
if (height > ctr->texture_height)
|
if (height > ctr->texture_height)
|
||||||
height = ctr->texture_height;
|
height = ctr->texture_height;
|
||||||
|
|
||||||
|
if(frame)
|
||||||
|
{
|
||||||
|
if(((((u32)(frame)) >= 0x14000000 && ((u32)(frame)) < 0x1c000000)) /* frame in linear memory */
|
||||||
|
&& !((u32)frame & 0x7F) /* 128-byte aligned */
|
||||||
|
&& !((pitch) & 0xF)) /* 16-byte aligned */
|
||||||
|
{
|
||||||
|
/* can copy the buffer directly with the GPU */
|
||||||
|
ctrGuCopyImage(false, frame, pitch / 2, height, CTRGU_RGB565, false,
|
||||||
|
ctr->texture_swizzled, ctr->texture_width, CTRGU_RGB565, true);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
uint16_t* dst = (uint16_t*)ctr->texture_linear;
|
||||||
|
const uint8_t* src = frame;
|
||||||
for (i = 0; i < height; i++)
|
for (i = 0; i < height; i++)
|
||||||
{
|
{
|
||||||
memcpy(dst, src, width * sizeof(uint16_t));
|
memcpy(dst, src, width * sizeof(uint16_t));
|
||||||
@ -287,10 +314,12 @@ static bool ctr_frame(void* data, const void* frame,
|
|||||||
GSPGPU_FlushDataCache(NULL, ctr->texture_linear,
|
GSPGPU_FlushDataCache(NULL, ctr->texture_linear,
|
||||||
ctr->texture_width * ctr->texture_height * sizeof(uint16_t));
|
ctr->texture_width * ctr->texture_height * sizeof(uint16_t));
|
||||||
|
|
||||||
ctrGuCopyImage(ctr->texture_linear, ctr->texture_width, ctr->menu.texture_height, CTRGU_RGB565, false,
|
ctrGuCopyImage(false, ctr->texture_linear, ctr->texture_width, ctr->menu.texture_height, CTRGU_RGB565, false,
|
||||||
ctr->texture_swizzled, ctr->texture_width, CTRGU_RGB565, true);
|
ctr->texture_swizzled, ctr->texture_width, CTRGU_RGB565, true);
|
||||||
|
|
||||||
gspWaitForEvent(GSPEVENT_PPF, false);
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
ctrGuSetTexture(GPU_TEXUNIT0, VIRT_TO_PHYS(ctr->texture_swizzled), ctr->texture_width, ctr->texture_height,
|
ctrGuSetTexture(GPU_TEXUNIT0, VIRT_TO_PHYS(ctr->texture_swizzled), ctr->texture_width, ctr->texture_height,
|
||||||
@ -306,21 +335,15 @@ static bool ctr_frame(void* data, const void* frame,
|
|||||||
ctrGuSetVertexShaderFloatUniform(0, (float*)&ctr->scale_vector, 1);
|
ctrGuSetVertexShaderFloatUniform(0, (float*)&ctr->scale_vector, 1);
|
||||||
GPU_DrawArray(GPU_UNKPRIM, 1);
|
GPU_DrawArray(GPU_UNKPRIM, 1);
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ctr->menu_texture_enable)
|
if (ctr->menu_texture_enable)
|
||||||
{
|
{
|
||||||
|
|
||||||
GSPGPU_FlushDataCache(NULL, ctr->menu.texture_linear,
|
GSPGPU_FlushDataCache(NULL, ctr->menu.texture_linear,
|
||||||
ctr->menu.texture_width * ctr->menu.texture_height * sizeof(uint16_t));
|
ctr->menu.texture_width * ctr->menu.texture_height * sizeof(uint16_t));
|
||||||
|
|
||||||
ctrGuCopyImage(ctr->menu.texture_linear, ctr->menu.texture_width, ctr->menu.texture_height, CTRGU_RGBA4444,false,
|
ctrGuCopyImage(false, ctr->menu.texture_linear, ctr->menu.texture_width, ctr->menu.texture_height, CTRGU_RGBA4444,false,
|
||||||
ctr->menu.texture_swizzled, ctr->menu.texture_width, CTRGU_RGBA4444, true);
|
ctr->menu.texture_swizzled, ctr->menu.texture_width, CTRGU_RGBA4444, true);
|
||||||
|
|
||||||
gspWaitForEvent(GSPEVENT_PPF, false);
|
|
||||||
|
|
||||||
|
|
||||||
ctrGuSetTexture(GPU_TEXUNIT0, VIRT_TO_PHYS(ctr->menu.texture_swizzled), ctr->menu.texture_width, ctr->menu.texture_height,
|
ctrGuSetTexture(GPU_TEXUNIT0, VIRT_TO_PHYS(ctr->menu.texture_swizzled), ctr->menu.texture_width, ctr->menu.texture_height,
|
||||||
GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_MIN_FILTER(GPU_LINEAR) |
|
GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_MIN_FILTER(GPU_LINEAR) |
|
||||||
GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_EDGE) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_EDGE),
|
GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_EDGE) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_EDGE),
|
||||||
@ -334,27 +357,14 @@ static bool ctr_frame(void* data, const void* frame,
|
|||||||
|
|
||||||
GPU_FinishDrawing();
|
GPU_FinishDrawing();
|
||||||
GPUCMD_Finalize();
|
GPUCMD_Finalize();
|
||||||
GPUCMD_FlushAndRun(NULL);
|
ctrGuFlushAndRun(true);
|
||||||
gspWaitForEvent(GSPEVENT_P3D, false);
|
|
||||||
|
|
||||||
ctrGuDisplayTransfer(CTR_GPU_FRAMEBUFFER, 240,400, CTRGU_RGBA8,
|
ctrGuDisplayTransfer(true, CTR_GPU_FRAMEBUFFER, 240,400, CTRGU_RGBA8,
|
||||||
gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), 240,400,CTRGU_RGB8, CTRGU_MULTISAMPLE_NONE);
|
gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), 240,400,CTRGU_RGB8, CTRGU_MULTISAMPLE_NONE);
|
||||||
|
|
||||||
gspWaitForEvent(GSPEVENT_PPF, false);
|
|
||||||
|
|
||||||
GX_SetMemoryFill(NULL, (u32*)CTR_GPU_FRAMEBUFFER, 0x00000000,
|
|
||||||
(u32*)(CTR_GPU_FRAMEBUFFER + CTR_TOP_FRAMEBUFFER_WIDTH * CTR_TOP_FRAMEBUFFER_HEIGHT * sizeof(uint32_t)),
|
|
||||||
0x201, (u32*)CTR_GPU_DEPTHBUFFER, 0x00000000,
|
|
||||||
(u32*)(CTR_GPU_DEPTHBUFFER + CTR_TOP_FRAMEBUFFER_WIDTH * CTR_TOP_FRAMEBUFFER_HEIGHT * sizeof(uint32_t)),
|
|
||||||
0x201);
|
|
||||||
|
|
||||||
gspWaitForEvent(GSPEVENT_PSC0, false);
|
|
||||||
gfxSwapBuffersGpu();
|
|
||||||
|
|
||||||
// if (ctr->vsync)
|
|
||||||
// gspWaitForEvent(GSPEVENT_VBlank0, true);
|
|
||||||
|
|
||||||
end:
|
end:
|
||||||
|
// gspWaitForEvent(GSPEVENT_VBlank0, true);
|
||||||
RARCH_PERFORMANCE_STOP(ctrframe_f);
|
RARCH_PERFORMANCE_STOP(ctrframe_f);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -48,36 +48,13 @@
|
|||||||
#define CTRGU_MULTISAMPLE_2x1 (1 << 24)
|
#define CTRGU_MULTISAMPLE_2x1 (1 << 24)
|
||||||
#define CTRGU_MULTISAMPLE_2x2 (2 << 24)
|
#define CTRGU_MULTISAMPLE_2x2 (2 << 24)
|
||||||
|
|
||||||
typedef struct
|
#define CTR_CPU_TICKS_PER_SECOND 268123480
|
||||||
{
|
|
||||||
uint32_t buffer[8];
|
|
||||||
} gtrgu_gx_command_t;
|
|
||||||
|
|
||||||
__attribute__((always_inline))
|
extern Handle gspEvents[GSPEVENT_MAX];
|
||||||
static INLINE int ctrGuWriteDisplayTransferCommand(gtrgu_gx_command_t* command,
|
extern u32* gpuCmdBuf;
|
||||||
void* src, int src_w, int src_h,
|
extern u32 gpuCmdBufOffset;
|
||||||
void* dst, int dst_w, int dst_h,
|
extern u32 __linear_heap_size;
|
||||||
uint32_t flags)
|
extern u32* __linear_heap;
|
||||||
{
|
|
||||||
command->buffer[0] = 0x03; //CommandID
|
|
||||||
command->buffer[1] = (uint32_t)src;
|
|
||||||
command->buffer[2] = (uint32_t)dst;
|
|
||||||
command->buffer[3] = CTRGU_SIZE(src_w, src_h);
|
|
||||||
command->buffer[4] = CTRGU_SIZE(dst_w, dst_h);
|
|
||||||
command->buffer[5] = flags;
|
|
||||||
command->buffer[6] = 0x0;
|
|
||||||
command->buffer[7] = 0x0;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
__attribute__((always_inline))
|
|
||||||
static INLINE int ctrGuSubmitGxCommand(u32* gxbuf, gtrgu_gx_command_t* command)
|
|
||||||
{
|
|
||||||
if(!gxbuf) gxbuf = gxCmdBuf;
|
|
||||||
|
|
||||||
return GSPGPU_SubmitGxCommand(gxbuf, (u32*)command, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
static INLINE void ctrGuSetTexture(GPU_TEXUNIT unit, u32* data,
|
static INLINE void ctrGuSetTexture(GPU_TEXUNIT unit, u32* data,
|
||||||
@ -108,14 +85,68 @@ static INLINE void ctrGuSetTexture(GPU_TEXUNIT unit, u32* data,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__attribute__((always_inline))
|
||||||
|
static INLINE Result ctrGuSetCommandList_First(bool queued, u32* buf0a, u32 buf0s, u32* buf1a, u32 buf1s, u32* buf2a, u32 buf2s)
|
||||||
|
{
|
||||||
|
u32 gxCommand[0x8];
|
||||||
|
gxCommand[0]=0x05 | (queued? 0x01000000 : 0x0); //CommandID
|
||||||
|
gxCommand[1]=(u32)buf0a; //buf0 address
|
||||||
|
gxCommand[2]=(u32)buf0s; //buf0 size
|
||||||
|
gxCommand[3]=(u32)buf1a; //buf1 address
|
||||||
|
gxCommand[4]=(u32)buf1s; //buf1 size
|
||||||
|
gxCommand[5]=(u32)buf2a; //buf2 address
|
||||||
|
gxCommand[6]=(u32)buf2s; //buf2 size
|
||||||
|
gxCommand[7]=0x0;
|
||||||
|
|
||||||
|
return GSPGPU_SubmitGxCommand(gxCmdBuf, gxCommand, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__((always_inline))
|
||||||
|
static INLINE Result ctrGuSetCommandList_Last(bool queued, u32* buf0a, u32 buf0s, u8 flags)
|
||||||
|
{
|
||||||
|
u32 gxCommand[0x8];
|
||||||
|
gxCommand[0]=0x01 | (queued? 0x01000000 : 0x0); //CommandID
|
||||||
|
gxCommand[1]=(u32)buf0a; //buf0 address
|
||||||
|
gxCommand[2]=(u32)buf0s; //buf0 size
|
||||||
|
gxCommand[3]=flags&1; //written to GSP module state
|
||||||
|
gxCommand[4]=gxCommand[5]=gxCommand[6]=0x0;
|
||||||
|
gxCommand[7]=(flags>>1)&1; //when non-zero, call svcFlushProcessDataCache() with the specified buffer
|
||||||
|
|
||||||
|
return GSPGPU_SubmitGxCommand(gxCmdBuf, gxCommand, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__((always_inline))
|
||||||
|
static INLINE void ctrGuFlushAndRun(bool queued)
|
||||||
|
{
|
||||||
|
//take advantage of GX_SetCommandList_First to flush gsp heap
|
||||||
|
ctrGuSetCommandList_First(queued, gpuCmdBuf, gpuCmdBufOffset*4, __linear_heap, __linear_heap_size, NULL, 0);
|
||||||
|
ctrGuSetCommandList_Last(queued, gpuCmdBuf, gpuCmdBufOffset*4, 0x0);
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__((always_inline))
|
||||||
|
static INLINE Result ctrGuSetMemoryFill(bool queued, u32* buf0a, u32 buf0v, u32* buf0e, u16 width0, u32* buf1a, u32 buf1v, u32* buf1e, u16 width1)
|
||||||
|
{
|
||||||
|
u32 gxCommand[0x8];
|
||||||
|
gxCommand[0]=0x02 | (queued? 0x01000000 : 0x0); //CommandID
|
||||||
|
gxCommand[1]=(u32)buf0a; //buf0 address
|
||||||
|
gxCommand[2]=buf0v; //buf0 value
|
||||||
|
gxCommand[3]=(u32)buf0e; //buf0 end addr
|
||||||
|
gxCommand[4]=(u32)buf1a; //buf1 address
|
||||||
|
gxCommand[5]=buf1v; //buf1 value
|
||||||
|
gxCommand[6]=(u32)buf1e; //buf1 end addr
|
||||||
|
gxCommand[7]=(width0)|(width1<<16);
|
||||||
|
|
||||||
|
return GSPGPU_SubmitGxCommand(gxCmdBuf, gxCommand, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
static INLINE Result ctrGuCopyImage
|
static INLINE Result ctrGuCopyImage
|
||||||
(void* src, int src_w, int src_h, int src_fmt, bool src_is_tiled,
|
(bool queued,
|
||||||
|
const void* src, int src_w, int src_h, int src_fmt, bool src_is_tiled,
|
||||||
void* dst, int dst_w, int dst_fmt, bool dst_is_tiled)
|
void* dst, int dst_w, int dst_fmt, bool dst_is_tiled)
|
||||||
{
|
{
|
||||||
u32 gxCommand[0x8];
|
u32 gxCommand[0x8];
|
||||||
gxCommand[0]=0x03; //CommandID
|
gxCommand[0]=0x03 | (queued? 0x01000000 : 0x0); //CommandID
|
||||||
gxCommand[1]=(u32)src;
|
gxCommand[1]=(u32)src;
|
||||||
gxCommand[2]=(u32)dst;
|
gxCommand[2]=(u32)dst;
|
||||||
gxCommand[3]=dst_w&0xFF8;
|
gxCommand[3]=dst_w&0xFF8;
|
||||||
@ -133,11 +164,12 @@ static INLINE Result ctrGuCopyImage
|
|||||||
|
|
||||||
__attribute__((always_inline))
|
__attribute__((always_inline))
|
||||||
static INLINE Result ctrGuDisplayTransfer
|
static INLINE Result ctrGuDisplayTransfer
|
||||||
(void* src, int src_w, int src_h, int src_fmt,
|
(bool queued,
|
||||||
|
void* src, int src_w, int src_h, int src_fmt,
|
||||||
void* dst, int dst_w, int dst_h, int dst_fmt, int multisample_lvl)
|
void* dst, int dst_w, int dst_h, int dst_fmt, int multisample_lvl)
|
||||||
{
|
{
|
||||||
u32 gxCommand[0x8];
|
u32 gxCommand[0x8];
|
||||||
gxCommand[0]=0x03; //CommandID
|
gxCommand[0]=0x03 | (queued? 0x01000000 : 0x0); //CommandID
|
||||||
gxCommand[1]=(u32)src;
|
gxCommand[1]=(u32)src;
|
||||||
gxCommand[2]=(u32)dst;
|
gxCommand[2]=(u32)dst;
|
||||||
gxCommand[3]=CTRGU_SIZE(dst_w, dst_h);
|
gxCommand[3]=CTRGU_SIZE(dst_w, dst_h);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user