diff --git a/Makefile.ctr b/Makefile.ctr index 55f413a4ad..ea29f89935 100644 --- a/Makefile.ctr +++ b/Makefile.ctr @@ -1,6 +1,6 @@ TARGET := retroarch_3ds OBJS := -OBJS += gfx/drivers/ctr_blit.o griffin/griffin.o +OBJS += gfx/drivers/ctr_sprite.o griffin/griffin.o #NO_SMDH = 1 DEBUG = 0 @@ -136,9 +136,11 @@ LD := $(CXX) 3dsxtool $< $@ $(_3DSXFLAGS) #--------------------------------------------------------------------------------- -%.elf: +%.elf: .FORCE @echo linking $(notdir $@) $(LD) $(LDFLAGS) $(OBJS) $(LIBDIRS) $(LIBS) -o $@ $(NM) -CSn $@ > $(notdir $*.lst) +.PHONY: .FORCE + diff --git a/gfx/drivers/ctr_blit.shader b/gfx/drivers/ctr_blit.shader deleted file mode 100644 index cfba558434..0000000000 --- a/gfx/drivers/ctr_blit.shader +++ /dev/null @@ -1,33 +0,0 @@ - .const c20, 0.0, 1.0, 0.0, 1.0 - -; .in v0, vertex -; .in v1, texCoord - - .out o0, result.position, 0xF - .out o1, result.color, 0xF - .out o2, result.texcoord0, 0x3 - -; .uniform c0, c0, vertexScale -; .uniform c1, c1, textureScale - - .vsh vmain, end_vmain - -;code - vmain: - mul r0, c0, v0 (0x0) - add o0, c20, r0 (0x1) - mov o0, c20 (0x2) - mov o1, c20 (0x3) - mul r0, c1, v1 (0x4) - add o2, c20, r0 (0x5) - nop - end - end_vmain: - -;operand descriptors - .opdesc xyz_, xyzw, yxzw ; 0x0 - .opdesc xyz_, yyzw, xyzw ; 0x1 - .opdesc ___w, xyzw, xyzw ; 0x2 - .opdesc xyzw, wwww, wwww ; 0x3 - .opdesc xyzw, xyzw, xyzw ; 0x4 - .opdesc xyzw, xyzw, xyzw ; 0x5 diff --git a/gfx/drivers/ctr_gfx.c b/gfx/drivers/ctr_gfx.c index 9e5cfdc79a..d98ba8b97d 100644 --- a/gfx/drivers/ctr_gfx.c +++ b/gfx/drivers/ctr_gfx.c @@ -18,7 +18,7 @@ #include #include #include "ctr_gu.h" -#include "ctr_blit_shader_shbin.h" +#include "ctr_sprite_shader_shbin.h" #include "../../general.h" #include "../../driver.h" @@ -26,15 +26,26 @@ #include "../video_monitor.h" #include "retroarch.h" +#include "performance.h" +#include "retro_inline.h" #define CTR_TOP_FRAMEBUFFER_WIDTH 400 #define CTR_TOP_FRAMEBUFFER_HEIGHT 240 #define CTR_GPU_FRAMEBUFFER ((void*)0x1F119400) #define CTR_GPU_DEPTHBUFFER ((void*)0x1F370800) + typedef struct { - s16 x, y, z; + float v; + float u; + float y; + float x; +} ctr_scale_vector_t; + +typedef struct +{ + s16 x0, y0, x1, y1; s16 u, v; } ctr_vertex_t; @@ -48,7 +59,7 @@ typedef struct ctr_video void* texture_swizzled; int texture_width; int texture_height; - float texture_scale[4]; + ctr_scale_vector_t scale_vector; ctr_vertex_t* frame_coords; }menu; @@ -59,8 +70,7 @@ typedef struct ctr_video int texture_width; int texture_height; - float vertex_scale[4]; - float texture_scale[4]; + ctr_scale_vector_t scale_vector; ctr_vertex_t* frame_coords; DVLB_s* dvlb; @@ -73,49 +83,14 @@ typedef struct ctr_video unsigned rotation; } ctr_video_t; - -#define PRINTFPOS(X,Y) "\x1b["#X";"#Y"H" -#define PRINTFPOS_STR(X,Y) "\x1b["X";"Y"H" - -static void ctr_set_frame_coords(ctr_vertex_t* v, int x0, int y0, int x1, int y1, int w, int h) +static INLINE void ctr_set_scale_vector(ctr_scale_vector_t* vec, + int viewport_width, int viewport_height, + int texture_width, int texture_height) { - v[0].x = x0; - v[0].y = y0; - v[0].z = -1; - v[0].u = 0; - v[0].v = 0; - - v[1].x = x1; - v[1].y = y0; - v[1].z = -1; - v[1].u = w; - v[1].v = 0; - - v[2].x = x1; - v[2].y = y1; - v[2].z = -1; - v[2].u = w; - v[2].v = h; - - v[3].x = x0; - v[3].y = y0; - v[3].z = -1; - v[3].u = 0; - v[3].v = 0; - - v[4].x = x1; - v[4].y = y1; - v[4].z = -1; - v[4].u = w; - v[4].v = h; - - v[5].x = x0; - v[5].y = y1; - v[5].z = -1; - v[5].u = 0; - v[5].v = h; - - + vec->x = -2.0 / viewport_width; + vec->y = -2.0 / viewport_height; + vec->u = 1.0 / texture_width; + vec->v = -1.0 / texture_height; } static void* ctr_init(const video_info_t* video, @@ -143,9 +118,14 @@ static void* ctr_init(const video_info_t* video, ctr->texture_swizzled = linearMemAlign(ctr->texture_width * ctr->texture_height * sizeof(uint32_t), 128); - ctr->frame_coords = linearAlloc(6 * sizeof(ctr_vertex_t)); - ctr_set_frame_coords(ctr->frame_coords, 0, 0, CTR_TOP_FRAMEBUFFER_WIDTH, CTR_TOP_FRAMEBUFFER_HEIGHT, - CTR_TOP_FRAMEBUFFER_WIDTH, CTR_TOP_FRAMEBUFFER_HEIGHT); + ctr->frame_coords = linearAlloc(sizeof(ctr_vertex_t)); + ctr->frame_coords->x0 = 0; + ctr->frame_coords->y0 = 0; + ctr->frame_coords->x1 = CTR_TOP_FRAMEBUFFER_WIDTH; + ctr->frame_coords->y1 = CTR_TOP_FRAMEBUFFER_HEIGHT; + ctr->frame_coords->u = CTR_TOP_FRAMEBUFFER_WIDTH; + ctr->frame_coords->v = CTR_TOP_FRAMEBUFFER_HEIGHT; + GSPGPU_FlushDataCache(NULL, (u8*)ctr->frame_coords, sizeof(ctr_vertex_t)); ctr->menu.texture_width = 512; ctr->menu.texture_height = 512; @@ -154,97 +134,27 @@ static void* ctr_init(const video_info_t* video, ctr->menu.texture_swizzled = linearMemAlign(ctr->texture_width * ctr->texture_height * sizeof(uint16_t), 128); - ctr->menu.frame_coords = linearAlloc(6 * sizeof(ctr_vertex_t)); - ctr_set_frame_coords(ctr->menu.frame_coords, 40, 0, CTR_TOP_FRAMEBUFFER_WIDTH - 40, CTR_TOP_FRAMEBUFFER_HEIGHT , - CTR_TOP_FRAMEBUFFER_WIDTH - 80, CTR_TOP_FRAMEBUFFER_HEIGHT); + ctr->menu.frame_coords = linearAlloc(sizeof(ctr_vertex_t)); + ctr->menu.frame_coords->x0 = 40; + ctr->menu.frame_coords->y0 = 0; + ctr->menu.frame_coords->x1 = CTR_TOP_FRAMEBUFFER_WIDTH - 40; + ctr->menu.frame_coords->y1 = CTR_TOP_FRAMEBUFFER_HEIGHT; + ctr->menu.frame_coords->u = CTR_TOP_FRAMEBUFFER_WIDTH - 80; + ctr->menu.frame_coords->v = CTR_TOP_FRAMEBUFFER_HEIGHT; + GSPGPU_FlushDataCache(NULL, (u8*)ctr->menu.frame_coords, sizeof(ctr_vertex_t)); + ctr_set_scale_vector(&ctr->scale_vector, + CTR_TOP_FRAMEBUFFER_WIDTH, CTR_TOP_FRAMEBUFFER_HEIGHT, + ctr->texture_width, ctr->texture_height); + ctr_set_scale_vector(&ctr->menu.scale_vector, + CTR_TOP_FRAMEBUFFER_WIDTH, CTR_TOP_FRAMEBUFFER_HEIGHT, + ctr->menu.texture_width, ctr->menu.texture_height); - ctr->vertex_scale[0] = 1.0; - ctr->vertex_scale[1] = 1.0; - ctr->vertex_scale[2] = -2.0 / CTR_TOP_FRAMEBUFFER_WIDTH; - ctr->vertex_scale[3] = -2.0 / CTR_TOP_FRAMEBUFFER_HEIGHT; - - ctr->texture_scale[0] = 1.0; - ctr->texture_scale[1] = 1.0; - ctr->texture_scale[2] = -1.0 / ctr->texture_height; - ctr->texture_scale[3] = 1.0 / ctr->texture_width; - - ctr->menu.texture_scale[0] = 1.0; - ctr->menu.texture_scale[1] = 1.0; - ctr->menu.texture_scale[2] = -1.0 / ctr->texture_height; - ctr->menu.texture_scale[3] = 1.0 / ctr->texture_width; - - - ctr->dvlb = DVLB_ParseFile((u32*)ctr_blit_shader_shbin, ctr_blit_shader_shbin_size); - shaderProgramInit(&ctr->shader); - shaderProgramSetVsh(&ctr->shader, &ctr->dvlb->DVLE[0]); + ctr->dvlb = DVLB_ParseFile((u32*)ctr_sprite_shader_shbin, ctr_sprite_shader_shbin_size); + ctrGuSetVshGsh(&ctr->shader, ctr->dvlb, 2, 2); shaderProgramUse(&ctr->shader); - - GPUCMD_Finalize(); - GPUCMD_FlushAndRun(NULL); - gspWaitForEvent(GSPEVENT_P3D, false); - - - if (input && input_data) - { - ctrinput = input_ctr.init(); - *input = ctrinput ? &input_ctr : NULL; - *input_data = ctrinput; - } - - return ctr; -} - -static bool ctr_frame(void* data, const void* frame, - unsigned width, unsigned height, unsigned pitch, const char* msg) -{ - ctr_video_t* ctr = (ctr_video_t*)data; - settings_t* settings = config_get_ptr(); - - static uint64_t currentTick,lastTick; - static float fps = 0.0; - static int total_frames = 0; - static int frames = 0; - - if (!width || !height) - { - gspWaitForEvent(GSPEVENT_VBlank0, true); - return true; - } - - if(!aptMainLoop()) - { - rarch_main_command(RARCH_CMD_QUIT); - return true; - } - - extern bool select_pressed; - if (select_pressed) - { - rarch_main_command(RARCH_CMD_QUIT); - return true; - } - - frames++; - currentTick = osGetTime(); - uint32_t diff = currentTick - lastTick; - if(diff > 1000) - { - fps = (float)frames * (1000.0 / diff); - lastTick = currentTick; - frames = 0; - } - - printf("fps: %8.4f frames: %i\r", fps, total_frames++); - fflush(stdout); - - GPUCMD_SetBufferOffset(0); - ctrGuSetVertexShaderFloatUniform(0, ctr->vertex_scale, 1); - - - GPU_SetViewport(VIRT_TO_PHYS(CTR_GPU_DEPTHBUFFER), VIRT_TO_PHYS(CTR_GPU_FRAMEBUFFER), 0, 0, CTR_TOP_FRAMEBUFFER_HEIGHT, CTR_TOP_FRAMEBUFFER_WIDTH); @@ -286,6 +196,78 @@ static bool ctr_frame(void* data, const void* frame, GPU_SetTexEnv(4, GPU_PREVIOUS,GPU_PREVIOUS, 0, 0, 0, 0, 0); GPU_SetTexEnv(5, GPU_PREVIOUS,GPU_PREVIOUS, 0, 0, 0, 0, 0); + ctrGuSetAttributeBuffers(2, + VIRT_TO_PHYS(ctr->menu.frame_coords), + CTRGU_ATTRIBFMT(GPU_SHORT, 4) << 0 | + CTRGU_ATTRIBFMT(GPU_SHORT, 2) << 4, + sizeof(ctr_vertex_t)); + GPUCMD_Finalize(); + GPUCMD_FlushAndRun(NULL); + gspWaitForEvent(GSPEVENT_P3D, false); + + if (input && input_data) + { + ctrinput = input_ctr.init(); + *input = ctrinput ? &input_ctr : NULL; + *input_data = ctrinput; + } + + return ctr; +} +//#define gspWaitForEvent(...) +static bool ctr_frame(void* data, const void* frame, + unsigned width, unsigned height, unsigned pitch, const char* msg) +{ + ctr_video_t* ctr = (ctr_video_t*)data; + settings_t* settings = config_get_ptr(); + + static uint64_t currentTick,lastTick; + static float fps = 0.0; + static int total_frames = 0; + static int frames = 0; + + RARCH_PERFORMANCE_INIT(ctrframe_f); + RARCH_PERFORMANCE_START(ctrframe_f); + + if (!width || !height) + { + gspWaitForEvent(GSPEVENT_VBlank0, true); + goto end; + } + + if(!aptMainLoop()) + { + rarch_main_command(RARCH_CMD_QUIT); + goto end; + } + + extern bool select_pressed; + if (select_pressed) + { + rarch_main_command(RARCH_CMD_QUIT); + goto end; + } + + frames++; + currentTick = osGetTime(); + uint32_t diff = currentTick - lastTick; + if(diff > 1000) + { + fps = (float)frames * (1000.0 / diff); + lastTick = currentTick; + frames = 0; + } + + printf("fps: %8.4f frames: %i\r", fps, total_frames++); + fflush(stdout); + + /* enable this to profile the core without video output */ +#if 0 + if (!ctr->menu_texture_enable) + goto end; +#endif + + GPUCMD_SetBufferOffset(0); if(frame) { @@ -316,20 +298,13 @@ static bool ctr_frame(void* data, const void* frame, GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_EDGE) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_EDGE), GPU_RGB565); + ctr->frame_coords->u = width; + ctr->frame_coords->v = height; + GSPGPU_FlushDataCache(NULL, (u8*)ctr->frame_coords, sizeof(ctr_vertex_t)); - ctr_set_frame_coords(ctr->frame_coords, 0, 0, CTR_TOP_FRAMEBUFFER_WIDTH, CTR_TOP_FRAMEBUFFER_HEIGHT, - width, height); - GSPGPU_FlushDataCache(NULL, (u8*)ctr->frame_coords, - 6 * sizeof(ctr_vertex_t)); - ctrGuSetAttributeBuffers(2, - VIRT_TO_PHYS(ctr->frame_coords), - CTRGU_ATTRIBFMT(GPU_SHORT, 3) << 0 | - CTRGU_ATTRIBFMT(GPU_SHORT, 2) << 4, - sizeof(ctr_vertex_t)); - - ctrGuSetVertexShaderFloatUniform(1, ctr->texture_scale, 1); - - GPU_DrawArray(GPU_TRIANGLES, 6); + ctrGuSetAttributeBuffersAddress(VIRT_TO_PHYS(ctr->frame_coords)); + ctrGuSetVertexShaderFloatUniform(0, (float*)&ctr->scale_vector, 1); + GPU_DrawArray(GPU_UNKPRIM, 1); } @@ -352,24 +327,12 @@ static bool ctr_frame(void* data, const void* frame, GPU_RGBA4); - GSPGPU_FlushDataCache(NULL, (u8*)ctr->menu.frame_coords, - 6 * sizeof(ctr_vertex_t)); - ctrGuSetAttributeBuffers(2, - VIRT_TO_PHYS(ctr->menu.frame_coords), - CTRGU_ATTRIBFMT(GPU_SHORT, 3) << 0 | - CTRGU_ATTRIBFMT(GPU_SHORT, 2) << 4, - sizeof(ctr_vertex_t)); - - ctrGuSetVertexShaderFloatUniform(1, ctr->menu.texture_scale, 1); - - GPU_DrawArray(GPU_TRIANGLES, 6); - - + ctrGuSetAttributeBuffersAddress(VIRT_TO_PHYS(ctr->menu.frame_coords)); + ctrGuSetVertexShaderFloatUniform(1, (float*)&ctr->menu.scale_vector, 1); + GPU_DrawArray(GPU_UNKPRIM, 1); } GPU_FinishDrawing(); - - GPUCMD_Finalize(); GPUCMD_FlushAndRun(NULL); gspWaitForEvent(GSPEVENT_P3D, false); @@ -391,6 +354,8 @@ static bool ctr_frame(void* data, const void* frame, // if (ctr->vsync) // gspWaitForEvent(GSPEVENT_VBlank0, true); +end: + RARCH_PERFORMANCE_STOP(ctrframe_f); return true; } @@ -471,6 +436,14 @@ static void ctr_set_texture_frame(void* data, const void* frame, bool rgb32, dst += ctr->menu.texture_width; src += width; } + + ctr->menu.frame_coords->x0 = (CTR_TOP_FRAMEBUFFER_WIDTH - width) / 2; + ctr->menu.frame_coords->y0 = (CTR_TOP_FRAMEBUFFER_HEIGHT - height) / 2; + ctr->menu.frame_coords->x1 = ctr->menu.frame_coords->x0 + width; + ctr->menu.frame_coords->y1 = ctr->menu.frame_coords->y0 + height; + ctr->menu.frame_coords->u = width; + ctr->menu.frame_coords->v = height; + GSPGPU_FlushDataCache(NULL, (u8*)ctr->menu.frame_coords, sizeof(ctr_vertex_t)); } static void ctr_set_texture_enable(void* data, bool state, bool full_screen) diff --git a/gfx/drivers/ctr_gu.h b/gfx/drivers/ctr_gu.h index b8f777a56e..75fa426090 100644 --- a/gfx/drivers/ctr_gu.h +++ b/gfx/drivers/ctr_gu.h @@ -13,11 +13,14 @@ * If not, see . */ +/* this file contains mostly modified functions from the ctrulib sdk */ + #ifndef CTR_GU_H #define CTR_GU_H #include <3ds.h> #include +#include #include #define VIRT_TO_PHYS(vaddr) \ @@ -176,4 +179,20 @@ static INLINE void ctrGuSetAttributeBuffers(u32 total_attributes, GPUCMD_AddIncrementalWrites(GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW, ((u32[]){0x76543210, 0xBA98}), 2); } +__attribute__((always_inline)) +static INLINE void ctrGuSetAttributeBuffersAddress(u32* baseAddress) +{ + GPUCMD_AddWrite(GPUREG_ATTRIBBUFFERS_LOC, ((u32)baseAddress)>>3); +} + +__attribute__((always_inline)) +static INLINE void ctrGuSetVshGsh(shaderProgram_s* sp, DVLB_s* dvlb, u32 vsh_output_count, u32 gsh_input_count) +{ + dvlb->DVLE[0].outmapData[0] = vsh_output_count; + dvlb->DVLE[0].outmapMask = (1 << vsh_output_count) - 1; + shaderProgramInit(sp); + shaderProgramSetVsh(sp, &dvlb->DVLE[0]); + shaderProgramSetGsh(sp, &dvlb->DVLE[1], gsh_input_count); +} + #endif // CTR_GU_H diff --git a/gfx/drivers/ctr_sprite.shader b/gfx/drivers/ctr_sprite.shader new file mode 100644 index 0000000000..75b2a361f2 --- /dev/null +++ b/gfx/drivers/ctr_sprite.shader @@ -0,0 +1,86 @@ + .const c20, 1.0, 1.0, 1.0, 1.0 + .const c21, 0.0, 0.0, 0.0, 0.0 + .const c22, 0.0, 1.0, 0.0, 1.0 + .const c23, 0.0, 0.0, -1.0, 1.0 + + +; .in v0, x0,y0,x1,y1 +; .in v1, tex_w,tex_h + + .out o0, result.position, 0xF + .out o1, result.color, 0xF + .out o2, result.texcoord0, 0x3 + +; .uniform c0, c0, scale_vector + + .vsh main_vsh, endmain_vsh + .gsh main_gsh, endmain_gsh + + main_vsh: + mul r0, c0, v0 (0x1) + add o0, c20, r0 (0x0) + mul r1, c0, v1 (0x2) + mov r1, c21 (0x3) + add o1, c22, r1 (0x0) + nop + end + endmain_vsh: + + main_gsh: + setemit vtx0, false, false + mov o0, v0 (0x3) + + mov o0, c23 (0x2) + mov o1, c20 (0x0) + + mov o2, v1 (0x0) + emit + + setemit vtx1, false, false + mov o0, v0 (0x4) + mov o0, v0 (0x8) + + mov o0, c23 (0x2) + mov o1, c20 (0x0) + + mov o2, v1 (0x7) + mov o2, v1 (0x9) + emit + + setemit vtx2, true, true + mov o0, v0 (0x6) + + mov o0, c23 (0x2) + mov o1, c20 (0x0) + + mov o2, v1 (0xB) + emit + + setemit vtx1, true, false + mov o0, v0 (0x7) + mov o0, v0 (0x5) + + mov o0, c23 (0x2) + mov o1, c20 (0x0) + + mov o2, v1 (0x4) + mov o2, v1 (0xA) + emit + + nop + end + endmain_gsh: + +; operand descriptors + .opdesc xyzw, xyzw, xyzw ; 0x0 + .opdesc xyzw, yxyx, yxwz ; 0x1 + .opdesc __zw, zwzw, xyxy ; 0x2 + .opdesc xy__, xyzw, xyzw ; 0x3 + .opdesc x___, xyzw, xyzw ; 0x4 + .opdesc _y__, xyzw, xyzw ; 0x5 + .opdesc xy__, zwzw, zwzw ; 0x6 + .opdesc x___, zwzw, zwzw ; 0x7 + .opdesc _y__, zwzw, zwzw ; 0x8 + .opdesc _yzw, xyxy, xyxy ; 0x9 + .opdesc _yzw, zwzw, zwzw ; 0xA + .opdesc xyzw, zwzw, zwzw ; 0xB