diff --git a/Makefile.wiiu b/Makefile.wiiu index 5a87329243..c8babcf725 100644 --- a/Makefile.wiiu +++ b/Makefile.wiiu @@ -66,7 +66,9 @@ else DEFINES += -DHAVE_FILTERS_BUILTIN OBJ += wiiu/system/missing_libc_functions.o + OBJ += wiiu/shader_utils.o OBJ += wiiu/tex_shader.o + OBJ += wiiu/sprite_shader.o ifeq ($(GRIFFIN_BUILD), 1) OBJ += griffin/griffin.o diff --git a/gfx/common/gx2_common.h b/gfx/common/gx2_common.h index c218818290..9402d72a2a 100644 --- a/gfx/common/gx2_common.h +++ b/gfx/common/gx2_common.h @@ -1,6 +1,7 @@ #include #include "wiiu/tex_shader.h" +#include "wiiu/sprite_shader.h" #undef _X #undef _B @@ -21,8 +22,6 @@ #define COLOR_ARGB(r, g, b, a) (((u32)(a) << 24) | ((u32)(r) << 16) | ((u32)(g) << 8) | ((u32)(b) << 0)) #define COLOR_RGBA(r, g, b, a) (((u32)(r) << 24) | ((u32)(g) << 16) | ((u32)(b) << 8) | ((u32)(a) << 0)) -//#define GX2_CAN_ACCESS_DATA_SECTION - typedef struct { int width; @@ -33,20 +32,19 @@ typedef struct struct gx2_overlay_data { GX2Texture tex; - tex_shader_vertex_t v[4]; + sprite_vertex_t v; float alpha_mod; }; typedef struct { - tex_shader_t* shader; struct { GX2Texture texture; int width; int height; bool enable; - tex_shader_vertex_t* v; + sprite_vertex_t* v; } menu; #ifdef HAVE_OVERLAY @@ -60,12 +58,19 @@ typedef struct GX2Sampler sampler_linear; GX2Texture texture; tex_shader_vertex_t* v; + GX2_vec2* ubo_vp; + GX2_vec2* ubo_tex; + void* input_ring_buffer; + u32 input_ring_buffer_size; + void* output_ring_buffer; + u32 output_ring_buffer_size; + int width; int height; struct { - tex_shader_vertex_t* v; + sprite_vertex_t* v; int size; int current; } vertex_cache; diff --git a/gfx/drivers/wiiu_gfx.c b/gfx/drivers/wiiu_gfx.c index b6c5c4ad84..6946e5f2dc 100644 --- a/gfx/drivers/wiiu_gfx.c +++ b/gfx/drivers/wiiu_gfx.c @@ -145,7 +145,6 @@ static void wiiu_gfx_update_viewport(wiiu_video_t* wiiu) wiiu->vp.height = height; } - float scale_w = wiiu->color_buffer.surface.width / wiiu->render_mode.width; float scale_h = wiiu->color_buffer.surface.height / wiiu->render_mode.height; wiiu_set_position(wiiu->v, &wiiu->color_buffer, @@ -154,6 +153,8 @@ static void wiiu_gfx_update_viewport(wiiu_video_t* wiiu) (wiiu->vp.x + wiiu->vp.width) * scale_w, (wiiu->vp.y + wiiu->vp.height) * scale_h); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->v, 4 * sizeof(*wiiu->v)); + wiiu->should_resize = false; } @@ -275,70 +276,25 @@ static void* wiiu_gfx_init(const video_info_t* video, GX2SetBlendControl(GX2_RENDER_TARGET_0, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA, GX2_BLEND_COMBINE_MODE_ADD, GX2_ENABLE, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA, GX2_BLEND_COMBINE_MODE_ADD); GX2SetCullOnlyControl(GX2_FRONT_FACE_CCW, GX2_DISABLE, GX2_DISABLE); -#ifdef GX2_CAN_ACCESS_DATA_SECTION - wiiu->shader = &tex_shader; -#else - /* Initialize shader */ - wiiu->shader = MEM2_alloc(sizeof(tex_shader), 0x1000); - memcpy(wiiu->shader, &tex_shader, sizeof(tex_shader)); - GX2Invalidate(GX2_INVALIDATE_MODE_CPU, wiiu->shader, sizeof(tex_shader)); + GX2InitShader(&tex_shader); + GX2InitShader(&sprite_shader); + GX2SetShader(&tex_shader); - wiiu->shader->vs.program = MEM2_alloc(wiiu->shader->vs.size, GX2_SHADER_ALIGNMENT); - memcpy(wiiu->shader->vs.program, tex_shader.vs.program, wiiu->shader->vs.size); - GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, wiiu->shader->vs.program, wiiu->shader->vs.size); - wiiu->shader->vs.attribVars = MEM2_alloc(wiiu->shader->vs.attribVarCount * sizeof(GX2AttribVar), - GX2_SHADER_ALIGNMENT); - memcpy(wiiu->shader->vs.attribVars, tex_shader.vs.attribVars , - wiiu->shader->vs.attribVarCount * sizeof(GX2AttribVar)); + wiiu->ubo_vp = MEM1_alloc(sizeof(*wiiu->ubo_vp), GX2_UNIFORM_BLOCK_ALIGNMENT); + wiiu->ubo_vp->width = wiiu->color_buffer.surface.width; + wiiu->ubo_vp->height = wiiu->color_buffer.surface.height; + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, wiiu->ubo_vp, sizeof(*wiiu->ubo_vp)); - wiiu->shader->ps.program = MEM2_alloc(wiiu->shader->ps.size, GX2_SHADER_ALIGNMENT); - memcpy(wiiu->shader->ps.program, tex_shader.ps.program, wiiu->shader->ps.size); - GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, wiiu->shader->ps.program, wiiu->shader->ps.size); - wiiu->shader->ps.samplerVars = MEM2_alloc(wiiu->shader->ps.samplerVarCount * sizeof(GX2SamplerVar), - GX2_SHADER_ALIGNMENT); - memcpy(wiiu->shader->ps.samplerVars, tex_shader.ps.samplerVars, - wiiu->shader->ps.samplerVarCount * sizeof(GX2SamplerVar)); + wiiu->ubo_tex = MEM1_alloc(sizeof(*wiiu->ubo_tex), GX2_UNIFORM_BLOCK_ALIGNMENT); + wiiu->ubo_tex->width = 1.0; + wiiu->ubo_tex->height = 1.0; + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, wiiu->ubo_tex, sizeof(*wiiu->ubo_tex)); -#endif - wiiu->shader->fs.size = GX2CalcFetchShaderSizeEx(sizeof(wiiu->shader->attribute_stream) / sizeof(GX2AttribStream), - GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE); - wiiu->shader->fs.program = MEM2_alloc(wiiu->shader->fs.size, GX2_SHADER_ALIGNMENT); - GX2InitFetchShaderEx(&wiiu->shader->fs, (uint8_t*)wiiu->shader->fs.program, - sizeof(wiiu->shader->attribute_stream) / sizeof(GX2AttribStream), - (GX2AttribStream*)&wiiu->shader->attribute_stream, - GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE); - GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, wiiu->shader->fs.program, wiiu->shader->fs.size); - GX2SetVertexShader(&wiiu->shader->vs); - GX2SetPixelShader(&wiiu->shader->ps); - GX2SetFetchShader(&wiiu->shader->fs); - - wiiu->v = MEM2_alloc(4 * sizeof(*wiiu->v), GX2_VERTEX_BUFFER_ALIGNMENT); - wiiu_set_position(wiiu->v, &wiiu->color_buffer, 0, 0, - wiiu->color_buffer.surface.width, wiiu->color_buffer.surface.height); - wiiu_set_tex_coords(wiiu->v, &wiiu->texture, 0, 0, - wiiu->texture.surface.width, wiiu->texture.surface.height, wiiu->rotation); - - wiiu->v[0].color = 0xFFFFFFFF; - wiiu->v[1].color = 0xFFFFFFFF; - wiiu->v[2].color = 0xFFFFFFFF; - wiiu->v[3].color = 0xFFFFFFFF; - - GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->v, 4 * sizeof(*wiiu->v)); - - GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->v), sizeof(*wiiu->v), wiiu->v); - - wiiu->menu.v = MEM2_alloc(4 * sizeof(*wiiu->menu.v), GX2_VERTEX_BUFFER_ALIGNMENT); - wiiu_set_position(wiiu->menu.v, &wiiu->color_buffer, 0, 0, - wiiu->color_buffer.surface.width, wiiu->color_buffer.surface.height); - wiiu_set_tex_coords(wiiu->menu.v, &wiiu->menu.texture, 0, 0, - wiiu->menu.texture.surface.width, wiiu->menu.texture.surface.height, 0); - - wiiu->menu.v[0].color = 0xFFFFFF80; - wiiu->menu.v[1].color = 0xFFFFFF80; - wiiu->menu.v[2].color = 0xFFFFFF80; - wiiu->menu.v[3].color = 0xFFFFFF80; - GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->menu.v, 4 * sizeof(*wiiu->menu.v)); + wiiu->input_ring_buffer_size = GX2CalcGeometryShaderInputRingBufferSize(sprite_shader.vs.ringItemSize); + wiiu->output_ring_buffer_size = GX2CalcGeometryShaderOutputRingBufferSize(sprite_shader.gs.ringItemSize); + wiiu->input_ring_buffer = MEM1_alloc(wiiu->input_ring_buffer_size, 0x1000); + wiiu->output_ring_buffer = MEM1_alloc(wiiu->output_ring_buffer_size, 0x1000); /* Initialize frame texture */ memset(&wiiu->texture, 0, sizeof(GX2Texture)); @@ -389,6 +345,28 @@ static void* wiiu_gfx_init(const video_info_t* video, GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->menu.texture.surface.image, wiiu->menu.texture.surface.imageSize); + wiiu->v = MEM2_alloc(4 * sizeof(*wiiu->v), GX2_VERTEX_BUFFER_ALIGNMENT); + + wiiu_set_position(wiiu->v, &wiiu->color_buffer, 0, 0, + wiiu->color_buffer.surface.width, wiiu->color_buffer.surface.height); + wiiu_set_tex_coords(wiiu->v, &wiiu->texture, 0, 0, + wiiu->texture.surface.width, wiiu->texture.surface.height, wiiu->rotation); + + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->v, 4 * sizeof(*wiiu->v)); + GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->v), sizeof(*wiiu->v), wiiu->v); + + wiiu->menu.v = MEM2_alloc(4 * sizeof(*wiiu->menu.v), GX2_VERTEX_BUFFER_ALIGNMENT); + wiiu->menu.v->pos.x = 0.0f; + wiiu->menu.v->pos.y = 0.0f; + wiiu->menu.v->pos.width = wiiu->color_buffer.surface.width; + wiiu->menu.v->pos.height = wiiu->color_buffer.surface.height; + wiiu->menu.v->coord.u = 0.0f; + wiiu->menu.v->coord.v = 0.0f; + wiiu->menu.v->coord.width = 1.0f; + wiiu->menu.v->coord.height = 1.0f; + wiiu->menu.v->color = 0xFFFFFF80; + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->menu.v, 4 * sizeof(*wiiu->menu.v)); + wiiu->vertex_cache.size = 0x1000; wiiu->vertex_cache.current = 0; wiiu->vertex_cache.v = MEM2_alloc(wiiu->vertex_cache.size @@ -399,8 +377,8 @@ static void* wiiu_gfx_init(const video_info_t* video, GX2InitSampler(&wiiu->sampler_linear, GX2_TEX_CLAMP_MODE_CLAMP, GX2_TEX_XY_FILTER_MODE_LINEAR); /* set Texture and Sampler */ - GX2SetPixelTexture(&wiiu->texture, wiiu->shader->sampler.location); - GX2SetPixelSampler(&wiiu->sampler_linear, wiiu->shader->sampler.location); + GX2SetPixelTexture(&wiiu->texture, tex_shader.ps.samplerVars[0].location); + GX2SetPixelSampler(&wiiu->sampler_linear, tex_shader.ps.samplerVars[0].location); /* clear leftover image */ GX2ClearColor(&wiiu->color_buffer, 0.0f, 0.0f, 0.0f, 1.0f); @@ -450,15 +428,11 @@ static void gx2_overlay_tex_geom(void *data, unsigned image, if (!o) return; - o->v[0].coord.u = x; - o->v[0].coord.v = y; - o->v[1].coord.u = x + w; - o->v[1].coord.v = y; - o->v[2].coord.u = x + w; - o->v[2].coord.v = y + h; - o->v[3].coord.u = x ; - o->v[3].coord.v = y + h; - GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, o->v, sizeof(o->v)); + o->v.coord.u = x; + o->v.coord.v = y; + o->v.coord.width = w; + o->v.coord.height = h; + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, &o->v, sizeof(o->v)); } static void gx2_overlay_vertex_geom(void *data, unsigned image, @@ -467,15 +441,6 @@ static void gx2_overlay_vertex_geom(void *data, unsigned image, wiiu_video_t *gx2 = (wiiu_video_t*)data; struct gx2_overlay_data *o = NULL; - /* Flipped, so we preserve top-down semantics. */ - y = 1.0f - y; - h = -h; - - /* expand from 0 - 1 to -1 - 1 */ - x = (x * 2.0f) - 1.0f; - y = (y * 2.0f) - 1.0f; - w = (w * 2.0f); - h = (h * 2.0f); if (gx2) o = (struct gx2_overlay_data*)&gx2->overlay[image]; @@ -483,19 +448,12 @@ static void gx2_overlay_vertex_geom(void *data, unsigned image, if (!o) return; - o->v[0].pos.x = x; - o->v[0].pos.y = y; + o->v.pos.x = x * gx2->color_buffer.surface.width; + o->v.pos.y = y * gx2->color_buffer.surface.height; + o->v.pos.width = w * gx2->color_buffer.surface.width; + o->v.pos.height = h * gx2->color_buffer.surface.height; - o->v[1].pos.x = x + w; - o->v[1].pos.y = y; - - o->v[2].pos.x = x + w; - o->v[2].pos.y = y + h; - - o->v[3].pos.x = x ; - o->v[3].pos.y = y + h; - - GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, o->v,sizeof(o->v)); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, &o->v,sizeof(o->v)); } static void gx2_free_overlay(wiiu_video_t *gx2) @@ -555,13 +513,9 @@ static bool gx2_overlay_load(void *data, gx2_overlay_tex_geom(gx2, i, 0, 0, 1, 1); gx2_overlay_vertex_geom(gx2, i, 0, 0, 1, 1); gx2->overlay[i].alpha_mod = 1.0f; - gx2->overlay[i].v[0].color = 0xFFFFFFFF; - gx2->overlay[i].v[1].color = 0xFFFFFFFF; - gx2->overlay[i].v[2].color = 0xFFFFFFFF; - gx2->overlay[i].v[3].color = 0xFFFFFFFF; + gx2->overlay[i].v.color = 0xFFFFFFFF; - - GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, o->v,sizeof(o->v)); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, &o->v,sizeof(o->v)); } @@ -588,11 +542,8 @@ static void gx2_overlay_set_alpha(void *data, unsigned image, float mod) if (gx2) { gx2->overlay[image].alpha_mod = mod; - gx2->overlay[image].v[0].color = COLOR_RGBA(0xFF, 0xFF, 0xFF, 0xFF * gx2->overlay[image].alpha_mod); - gx2->overlay[image].v[1].color = gx2->overlay[image].v[0].color; - gx2->overlay[image].v[2].color = gx2->overlay[image].v[0].color; - gx2->overlay[image].v[3].color = gx2->overlay[image].v[0].color; - GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, gx2->overlay[image].v, sizeof(gx2->overlay[image].v)); + gx2->overlay[image].v.color = COLOR_RGBA(0xFF, 0xFF, 0xFF, 0xFF * gx2->overlay[image].alpha_mod); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, &gx2->overlay[image].v, sizeof(gx2->overlay[image].v)); } } @@ -604,12 +555,12 @@ static void gx2_render_overlay(void *data) for (i = 0; i < gx2->overlays; i++){ - GX2SetAttribBuffer(0, sizeof(gx2->overlay[i].v), sizeof(*gx2->overlay[i].v), gx2->overlay[i].v); + GX2SetAttribBuffer(0, sizeof(gx2->overlay[i].v), sizeof(gx2->overlay[i].v), &gx2->overlay[i].v); - GX2SetPixelTexture(&gx2->overlay[i].tex, gx2->shader->sampler.location); - GX2SetPixelSampler(&gx2->sampler_linear, gx2->shader->sampler.location); + GX2SetPixelTexture(&gx2->overlay[i].tex, sprite_shader.ps.samplerVars[0].location); + GX2SetPixelSampler(&gx2->sampler_linear, sprite_shader.ps.samplerVars[0].location); - GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1); + GX2DrawEx(GX2_PRIMITIVE_MODE_POINTS, 1, 0, 1); } @@ -657,31 +608,26 @@ static void wiiu_gfx_free(void* data) GX2SetTVEnable(GX2_DISABLE); GX2SetDRCEnable(GX2_DISABLE); + GX2DestroyShader(&tex_shader); + GX2DestroyShader(&sprite_shader); + MEM2_free(wiiu->ctx_state); MEM2_free(wiiu->cmd_buffer); MEM2_free(wiiu->texture.surface.image); MEM2_free(wiiu->menu.texture.surface.image); + MEM2_free(wiiu->v); + MEM2_free(wiiu->menu.v); MEM2_free(wiiu->vertex_cache.v); MEM1_free(wiiu->color_buffer.surface.image); + MEM1_free(wiiu->ubo_vp); + MEM1_free(wiiu->ubo_tex); + MEM1_free(wiiu->input_ring_buffer); + MEM1_free(wiiu->output_ring_buffer); MEMBucket_free(wiiu->tv_scan_buffer); MEMBucket_free(wiiu->drc_scan_buffer); - MEM2_free(wiiu->shader->fs.program); -#ifndef GX2_CAN_ACCESS_DATA_SECTION - MEM2_free(wiiu->shader->vs.program); - MEM2_free(wiiu->shader->vs.attribVars); - - MEM2_free(wiiu->shader->ps.program); - MEM2_free(wiiu->shader->ps.samplerVars); - - - MEM2_free(wiiu->shader); -#endif - MEM2_free(wiiu->v); - MEM2_free(wiiu->menu.v); - free(wiiu); } @@ -794,17 +740,27 @@ static bool wiiu_gfx_frame(void* data, const void* frame, GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->texture.surface.image, wiiu->texture.surface.imageSize); + wiiu_set_tex_coords(wiiu->v, &wiiu->texture, 0, 0, width, height, wiiu->rotation); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->v, 4 * sizeof(*wiiu->v)); } - GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->v), sizeof(*wiiu->v), wiiu->v); + GX2SetShaderMode(GX2_SHADER_MODE_UNIFORM_REGISTER); + GX2SetShader(&tex_shader); - GX2SetPixelTexture(&wiiu->texture, wiiu->shader->sampler.location); - GX2SetPixelSampler(wiiu->smooth? &wiiu->sampler_linear : &wiiu->sampler_nearest, - wiiu->shader->sampler.location); + GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->v), sizeof(*wiiu->v), wiiu->v); + GX2SetPixelTexture(&wiiu->texture, tex_shader.ps.samplerVars[0].location); + GX2SetPixelSampler(wiiu->smooth? &wiiu->sampler_linear : &wiiu->sampler_nearest, tex_shader.ps.samplerVars[0].location); GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1); + GX2SetShaderMode(GX2_SHADER_MODE_GEOMETRY_SHADER); + GX2SetShader(&sprite_shader); + GX2SetGeometryShaderInputRingBuffer(wiiu->input_ring_buffer, wiiu->input_ring_buffer_size); + GX2SetGeometryShaderOutputRingBuffer(wiiu->output_ring_buffer, wiiu->output_ring_buffer_size); + GX2SetVertexUniformBlock(sprite_shader.vs.uniformBlocks[0].offset, sprite_shader.vs.uniformBlocks[0].size, wiiu->ubo_vp); + GX2SetVertexUniformBlock(sprite_shader.vs.uniformBlocks[1].offset, sprite_shader.vs.uniformBlocks[1].size, wiiu->ubo_tex); + #ifdef HAVE_OVERLAY if (wiiu->overlay_enable) gx2_render_overlay(wiiu); @@ -814,16 +770,16 @@ static bool wiiu_gfx_frame(void* data, const void* frame, { GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->menu.v), sizeof(*wiiu->menu.v), wiiu->menu.v); - GX2SetPixelTexture(&wiiu->menu.texture, wiiu->shader->sampler.location); - GX2SetPixelSampler(&wiiu->sampler_linear, wiiu->shader->sampler.location); + GX2SetPixelTexture(&wiiu->menu.texture, sprite_shader.ps.samplerVars[0].location); + GX2SetPixelSampler(&wiiu->sampler_linear, sprite_shader.ps.samplerVars[0].location); - GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1); + GX2DrawEx(GX2_PRIMITIVE_MODE_POINTS, 1, 0, 1); } wiiu->vertex_cache.current = 0; GX2SetAttribBuffer(0, wiiu->vertex_cache.size * sizeof(*wiiu->vertex_cache.v), sizeof(*wiiu->vertex_cache.v), wiiu->vertex_cache.v); - GX2SetPixelSampler(&wiiu->sampler_linear, wiiu->shader->sampler.location); + GX2SetPixelSampler(&wiiu->sampler_linear, sprite_shader.ps.samplerVars[0].location); wiiu->render_msg_enabled = true; @@ -837,7 +793,6 @@ static bool wiiu_gfx_frame(void* data, const void* frame, GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->vertex_cache.v, wiiu->vertex_cache.current * sizeof(*wiiu->vertex_cache.v)); - if (wiiu->menu.enable) GX2DrawDone(); @@ -895,7 +850,10 @@ static void wiiu_gfx_set_rotation(void* data, { wiiu_video_t* wiiu = (wiiu_video_t*) data; if(wiiu) + { wiiu->rotation = rotation; + wiiu->should_resize = true; + } } static void wiiu_gfx_viewport_info(void* data, @@ -1010,7 +968,16 @@ static void wiiu_gfx_set_texture_frame(void* data, const void* frame, bool rgb32 GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->menu.texture.surface.image, wiiu->menu.texture.surface.imageSize); - wiiu_set_tex_coords(wiiu->menu.v, &wiiu->menu.texture, 0, 0, width, height, 0); + wiiu->menu.v->pos.x = 0.0f; + wiiu->menu.v->pos.y = 0.0f; + wiiu->menu.v->pos.width = width; + wiiu->menu.v->pos.height = height; + wiiu->menu.v->coord.u = 0.0f; + wiiu->menu.v->coord.v = 0.0f; + wiiu->menu.v->coord.width = (float)width / wiiu->texture.surface.width; + wiiu->menu.v->coord.height = (float)height / wiiu->texture.surface.height; + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->menu.v, 4 * sizeof(*wiiu->menu.v)); + } static void wiiu_gfx_set_texture_enable(void* data, bool state, bool full_screen) diff --git a/gfx/drivers_font/wiiu_font.c b/gfx/drivers_font/wiiu_font.c index 7ace22c766..50d1266174 100644 --- a/gfx/drivers_font/wiiu_font.c +++ b/gfx/drivers_font/wiiu_font.c @@ -32,6 +32,7 @@ typedef struct { GX2Texture texture; + GX2_vec2* ubo_tex; const font_renderer_driver_t* font_driver; void* font_data; struct font_atlas* atlas; @@ -79,6 +80,13 @@ static void* wiiu_font_init_font(void* data, const char* font_path, font->atlas->dirty = false; + font->ubo_tex = MEM1_alloc(sizeof(*font->ubo_tex), GX2_UNIFORM_BLOCK_ALIGNMENT); + font->ubo_tex->width = font->texture.surface.width; + font->ubo_tex->height = font->texture.surface.height; + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, font->ubo_tex, + sizeof(*font->ubo_tex)); + + return font; } @@ -93,6 +101,7 @@ static void wiiu_font_free_font(void* data, bool is_threaded) font->font_driver->free(font->font_data); MEM1_free(font->texture.surface.image); + MEM1_free(font->ubo_tex); free(font); } @@ -142,9 +151,7 @@ static void wiiu_font_render_line( unsigned width = video_info->width; unsigned height = video_info->height; int x = roundf(pos_x * width); - int y = roundf((1.0f - pos_y) * height); - int delta_x = 0; - int delta_y = 0; + int y = roundf((1.0 - pos_y) * height); if(wiiu->vertex_cache.current + (msg_len * 4) > wiiu->vertex_cache.size) return; @@ -160,11 +167,10 @@ static void wiiu_font_render_line( break; } - tex_shader_vertex_t* v = wiiu->vertex_cache.v + wiiu->vertex_cache.current; + sprite_vertex_t* v = wiiu->vertex_cache.v + wiiu->vertex_cache.current; for (i = 0; i < msg_len; i++) { - int off_x, off_y, tex_x, tex_y, width, height; const char* msg_tmp = &msg[i]; unsigned code = utf8_walk(&msg_tmp); unsigned skip = msg_tmp - &msg[i]; @@ -181,50 +187,22 @@ static void wiiu_font_render_line( if (!glyph) continue; - off_x = glyph->draw_offset_x; - off_y = glyph->draw_offset_y; - tex_x = glyph->atlas_offset_x; - tex_y = glyph->atlas_offset_y; - width = glyph->width; - height = glyph->height; + v->pos.x = x + glyph->draw_offset_x * scale; + v->pos.y = y + glyph->draw_offset_y * scale; + v->pos.width = glyph->width * scale; + v->pos.height = glyph->height * scale; + v->coord.u = glyph->atlas_offset_x; + v->coord.v = glyph->atlas_offset_y; + v->coord.width = glyph->width; + v->coord.height = glyph->height; - float x0 = x + off_x + delta_x * scale; - float y0 = y + off_y + delta_y * scale + height * scale; - float u0 = tex_x; - float v0 = tex_y; - float x1 = x0 + width * scale; - float y1 = y0 - height * scale; - float u1 = u0 + width; - float v1 = v0 + height; + v->color = color; - v[0].pos.x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f; - v[0].pos.y = (-2.0f * y0 / wiiu->color_buffer.surface.height) + 1.0f; - v[1].pos.x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;; - v[1].pos.y = (-2.0f * y0 / wiiu->color_buffer.surface.height) + 1.0f; - v[2].pos.x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;; - v[2].pos.y = (-2.0f * y1 / wiiu->color_buffer.surface.height) + 1.0f; - v[3].pos.x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f;; - v[3].pos.y = (-2.0f * y1 / wiiu->color_buffer.surface.height) + 1.0f; + v++; - v[0].coord.u = u0 / font->texture.surface.width; - v[0].coord.v = v1 / font->texture.surface.height; - v[1].coord.u = u1 / font->texture.surface.width; - v[1].coord.v = v1 / font->texture.surface.height; - v[2].coord.u = u1 / font->texture.surface.width; - v[2].coord.v = v0 / font->texture.surface.height; - v[3].coord.u = u0 / font->texture.surface.width; - v[3].coord.v = v0 / font->texture.surface.height; - - v[0].color = color; - v[1].color = color; - v[2].color = color; - v[3].color = color; - - v += 4; - - delta_x += glyph->advance_x; - delta_y += glyph->advance_y; + x += glyph->advance_x * scale; + y += glyph->advance_y * scale; } int count = v - wiiu->vertex_cache.v - wiiu->vertex_cache.current; @@ -247,14 +225,12 @@ static void wiiu_font_render_line( } -#if 0 - printf("%s\n", msg); - DEBUG_VAR(color); -#endif + GX2SetPixelTexture(&font->texture, sprite_shader.ps.samplerVars[0].location); + GX2SetVertexUniformBlock(sprite_shader.vs.uniformBlocks[1].offset, sprite_shader.vs.uniformBlocks[1].size, font->ubo_tex); - GX2SetPixelTexture(&font->texture, wiiu->shader->sampler.location); + GX2DrawEx(GX2_PRIMITIVE_MODE_POINTS, count, wiiu->vertex_cache.current, 1); - GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, count, wiiu->vertex_cache.current, 1); + GX2SetVertexUniformBlock(sprite_shader.vs.uniformBlocks[1].offset, sprite_shader.vs.uniformBlocks[1].size, wiiu->ubo_tex); wiiu->vertex_cache.current = v - wiiu->vertex_cache.v; } diff --git a/menu/drivers_display/menu_display_wiiu.c b/menu/drivers_display/menu_display_wiiu.c index 11c4329d59..6fd911e50a 100644 --- a/menu/drivers_display/menu_display_wiiu.c +++ b/menu/drivers_display/menu_display_wiiu.c @@ -77,68 +77,48 @@ static void menu_display_wiiu_draw(void *data) if (wiiu->vertex_cache.current + 4 > wiiu->vertex_cache.size) return; - tex_shader_vertex_t* v = wiiu->vertex_cache.v + wiiu->vertex_cache.current; - - float x0 = draw->x; - float y0 = draw->y; - float x1 = x0 + draw->width; - float y1 = y0 + draw->height; + sprite_vertex_t* v = wiiu->vertex_cache.v + wiiu->vertex_cache.current; if(draw->coords->vertex && draw->coords->vertices == 4) { - for(int i = 0; i < 4; i++) - { - v[i].pos.x = draw->coords->vertex[i << 1] * 2.0f - 1.0f; - v[i].pos.y = draw->coords->vertex[(i << 1) + 1] * 2.0f - 1.0f; - } + v->pos.x = MIN(MIN(MIN(draw->coords->vertex[0], draw->coords->vertex[2]), draw->coords->vertex[4]), draw->coords->vertex[6]); + v->pos.y = 1.0 - MAX(MAX(MAX(draw->coords->vertex[1], draw->coords->vertex[3]), draw->coords->vertex[5]), draw->coords->vertex[7]); + v->pos.width = MAX(MAX(MAX(draw->coords->vertex[0], draw->coords->vertex[2]), draw->coords->vertex[4]), draw->coords->vertex[6]) - v->pos.x; + v->pos.height = 1.0 - MIN(MIN(MIN(draw->coords->vertex[1], draw->coords->vertex[3]), draw->coords->vertex[5]), draw->coords->vertex[7]) - v->pos.y; + v->pos.x *= wiiu->color_buffer.surface.width; + v->pos.y *= wiiu->color_buffer.surface.height; + v->pos.width *= wiiu->color_buffer.surface.width; + v->pos.height *= wiiu->color_buffer.surface.height; } else { - v[0].pos.x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f; - v[0].pos.y = (2.0f * y0 / wiiu->color_buffer.surface.height) - 1.0f; - v[1].pos.x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;; - v[1].pos.y = (2.0f * y0 / wiiu->color_buffer.surface.height) - 1.0f; - v[2].pos.x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;; - v[2].pos.y = (2.0f * y1 / wiiu->color_buffer.surface.height) - 1.0f; - v[3].pos.x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f;; - v[3].pos.y = (2.0f * y1 / wiiu->color_buffer.surface.height) - 1.0f; + v->pos.x = draw->x; + v->pos.y = wiiu->color_buffer.surface.height - draw->y - draw->height; + v->pos.width = draw->width; + v->pos.height = draw->height; } if(draw->coords->tex_coord && draw->coords->vertices == 4) { - for(int i = 0; i < 4; i++) - { - v[i].coord.u = draw->coords->tex_coord[i << 1]; - v[i].coord.v = draw->coords->tex_coord[(i << 1) + 1]; - } + v->coord.u = MIN(MIN(MIN(draw->coords->tex_coord[0], draw->coords->tex_coord[2]), draw->coords->tex_coord[4]), draw->coords->tex_coord[6]); + v->coord.v = MIN(MIN(MIN(draw->coords->tex_coord[1], draw->coords->tex_coord[3]), draw->coords->tex_coord[5]), draw->coords->tex_coord[7]); + v->coord.width = MAX(MAX(MAX(draw->coords->tex_coord[0], draw->coords->tex_coord[2]), draw->coords->tex_coord[4]), draw->coords->tex_coord[6]) - v->coord.u; + v->coord.height = MAX(MAX(MAX(draw->coords->tex_coord[1], draw->coords->tex_coord[3]), draw->coords->tex_coord[5]), draw->coords->tex_coord[7]) - v->coord.v; } else { - v[0].coord.u = 0.0f; - v[0].coord.v = 1.0f; - v[1].coord.u = 1.0f; - v[1].coord.v = 1.0f; - v[2].coord.u = 1.0f; - v[2].coord.v = 0.0f; - v[3].coord.u = 0.0f; - v[3].coord.v = 0.0f; + v->coord.u = 0.0f; + v->coord.v = 0.0f; + v->coord.width = 1.0f; + v->coord.height = 1.0f; } - v[0].color = COLOR_RGBA(0xFF * draw->coords->color[0], 0xFF * draw->coords->color[1], + v->color = COLOR_RGBA(0xFF * draw->coords->color[0], 0xFF * draw->coords->color[1], 0xFF * draw->coords->color[2], 0xFF * draw->coords->color[3]); - v[1].color = v[0].color; - v[2].color = v[0].color; - v[3].color = v[0].color; -// printf("color : %f, %f, %f, %f --> 0x%08X\n", draw->coords->color[0], draw->coords->color[1], draw->coords->color[2], draw->coords->color[3], col[0]); + GX2SetPixelTexture(texture, tex_shader.ps.samplerVars[0].location); - GX2SetPixelTexture(texture, wiiu->shader->sampler.location); - - - if(draw->coords->vertex && draw->coords->vertices == 4) - GX2DrawEx(GX2_PRIMITIVE_MODE_TRIANGLE_STRIP, 4, wiiu->vertex_cache.current, 1); - else - GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, wiiu->vertex_cache.current, 1); + GX2DrawEx(GX2_PRIMITIVE_MODE_POINTS, 1, wiiu->vertex_cache.current, 1); #if 0 printf("(%i,%i,%i,%i) , (%i,%i)\n", (int)draw->x, @@ -146,7 +126,7 @@ static void menu_display_wiiu_draw(void *data) texture->surface.width, texture->surface.height); #endif - wiiu->vertex_cache.current += 4; + wiiu->vertex_cache.current ++; } diff --git a/wiiu/gx2_shader_inl.h b/wiiu/gx2_shader_inl.h index aaf6056927..857104bf5a 100644 --- a/wiiu/gx2_shader_inl.h +++ b/wiiu/gx2_shader_inl.h @@ -17,19 +17,21 @@ #define GX2_SHADER_INL_H #ifdef MSB_FIRST -#define to_LE(x) __builtin_bswap32(x) +#define to_QWORD(w0, w1) (((u64)(w0) << 32ull) | (w1)) +#define to_LE(x) (__builtin_bswap32(x)) #else -#define to_LE(x) x +#define to_QWORD(w0, w1) (((u64)(w1) << 32ull) | (w0)) +#define to_LE(x) (x) #endif /* CF */ -#define CF_WORD0(addr) to_LE(addr) +#define CF_DWORD0(addr) to_LE(addr) -#define CF_WORD1(popCount, cfConst, cond, count, callCount, inst) \ +#define CF_DWORD1(popCount, cfConst, cond, count, callCount, inst) \ to_LE(popCount | (cfConst << 3) | (cond << 8) | (count << 10) | (callCount << 13) | (inst << 23) | (1 << 31)) #define CF_ALU_WORD0(addr, kcacheBank0, kcacheBank1, kcacheMode0) \ - to_LE(addr | (kcacheBank0 << 16) | (kcacheBank1 << 20) | (kcacheMode0 << 22)) + to_LE(addr | (kcacheBank0 << 22) | (kcacheBank1 << 26) | (kcacheMode0 << 30)) #define CF_ALU_WORD1(kcacheMode1, kcacheAddr0, kcacheAddr1, count, altConst, inst) \ to_LE(kcacheMode1 | (kcacheAddr0 << 2) | (kcacheAddr1 << 10) | (count << 18) | (altConst << 25) | (inst << 26) | (1 << 31)) @@ -39,26 +41,49 @@ #define CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, validPixelMode, inst) \ to_LE(srcSelX | (srcSelY << 3) | (srcSelZ << 6) | (srcSelW << 9) | (validPixelMode << 22) | (inst << 23) | (1 << 31)) -#define NO_BARRIER & to_LE(~(1 << 31)) -#define END_OF_PROGRAM | to_LE(1 << 21) -#define VALID_PIX | to_LE(1 << 22) -#define WHOLE_QUAD_MODE | to_LE(1 << 30) +#define CF_ALLOC_EXPORT_WORD0(arrayBase, type, dstReg, dstRel, indexGpr, elemSize) \ + to_LE(arrayBase | (type << 13) | (dstReg << 15) | (dstRel << 22) | (indexGpr << 23) | (elemSize << 30)) -#define ALU_LAST to_LE(1 << 31) | +#define CF_ALLOC_EXPORT_WORD1_BUF(arraySize, writeMask, inst) \ + to_LE(arraySize | (writeMask << 12) | (inst << 23) | (1 << 31)) + + +#define ALU_SRC_KCACHE0_BASE 0x80 +#define ALU_SRC_KCACHE1_BASE 0xA0 +#define CF_KCACHE_BANK_LOCK_1 0x1 +#define CB1 0x1 +#define CB2 0x2 +#define _0_15 CF_KCACHE_BANK_LOCK_1 + +#define KC0(x) (x + ALU_SRC_KCACHE0_BASE) +#define KC1(x) (x + ALU_SRC_KCACHE1_BASE) + +#define NO_BARRIER & ~to_QWORD(0,to_LE(1 << 31)) +#define END_OF_PROGRAM | to_QWORD(0,to_LE(1 << 21)) +#define VALID_PIX | to_QWORD(0,to_LE(1 << 22)) +#define WHOLE_QUAD_MODE | to_QWORD(0,to_LE(1 << 30)) +#define BURSTCNT(x) | to_QWORD(0,to_LE(x << 17)) +#define WRITE(x) (x >> 2) +#define ARRAY_SIZE(x) x +#define ELEM_SIZE(x) x +#define KCACHE0(bank, mode) | to_QWORD(CF_ALU_WORD0(0, bank, 0, mode), 0) +#define KCACHE1(bank, mode) | to_QWORD(CF_ALU_WORD0(0, 0, bank, 0), CF_ALU_WORD1(mode,0, 0, 0, 0, 0)) + +#define ALU_LAST | to_QWORD(to_LE(1ull << 31), 0) /* ALU */ #define ALU_WORD0(src0Sel, src0Rel, src0Chan, src0Neg, src1Sel, src1Rel, src1Chan, src1Neg, indexMode, predSel) \ - to_LE(src0Sel | (src0Rel << 9) | (src0Chan << 10) | (src0Neg << 12) | (src1Sel << 13) | (src1Rel << 22) \ - | (src1Chan << 23) | (src1Neg << 25) | (indexMode << 26) | (predSel << 29)) + to_LE(src0Sel | ((src0Rel) << 9) | ((src0Chan) << 10) | ((src0Neg) << 12) | ((src1Sel) << 13) | ((src1Rel) << 22) \ + | ((src1Chan) << 23) | ((src1Neg) << 25) | ((indexMode) << 26) | ((predSel) << 29)) #define ALU_WORD1_OP2(src0Abs, src1Abs, updateExecuteMask, updatePred, writeMask, omod, inst, encoding, bankSwizzle, dstGpr, dstRel, dstChan, clamp) \ to_LE(src0Abs | (src1Abs << 1) | (updateExecuteMask << 2) | (updatePred << 3) | (writeMask << 4) | (omod << 5) | (inst << 7) | \ - (encoding << 15) | (bankSwizzle << 18) | (dstGpr << 21) | (dstRel << 28) | (dstChan << 29) | (clamp << 31)) + (encoding << 15) | (bankSwizzle << 18) | ((dstGpr&0x7F) << 21) | (dstRel << 28) | ((dstChan&0x3) << 29) | (clamp << 31)) #define ALU_WORD1_OP3(src2Sel, src2Rel, src2Chan, src2Neg, inst, encoding, bankSwizzle, dstGpr, dstRel, dstChan, clamp) \ to_LE(src2Sel | (src2Rel << 9) | (src2Chan << 10) | (src2Neg << 12) | (inst << 13) | \ - (encoding << 15) | (bankSwizzle << 18) | (dstGpr << 21) | (dstRel << 28) | (dstChan << 29) | (clamp << 31) + (encoding << 15) | (bankSwizzle << 18) | ((dstGpr&0x7F) << 21) | (dstRel << 28) | ((dstChan&0x3) << 29) | (clamp << 31) /* TEX */ #define TEX_WORD0(inst, bcFracMode, fetchWholeQuad, resourceID, srcReg, srcRel, altConst) \ @@ -71,17 +96,28 @@ #define TEX_WORD2(offsetX, offsetY, offsetZ, samplerID, srcSelX, srcSelY, srcSelZ, srcSelW) \ to_LE(offsetX | (offsetY << 5) | (offsetZ << 10) | (samplerID << 15) | (srcSelX << 20) | (srcSelY << 23) | (srcSelZ << 26) | (srcSelW << 29)) +#define VTX_WORD0(inst, type, buffer_id, srcReg, srcSelX, mega) \ + to_LE(inst | (type << 5) | (buffer_id << 8) | (srcReg << 16) | (srcSelX << 24) | (mega << 26)) -#define _X 0 -#define _Y 1 -#define _Z 2 -#define _W 3 +#define VTX_WORD1(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW) \ + to_LE(dstReg | (dstSelX << 9) | (dstSelY << 12) | (dstSelZ << 15) | (dstSelW << 18) | (1 << 21)) + +#define VTX_WORD2(offset, ismega) \ + to_LE(offset| (ismega << 19)) + +#define _x 0 +#define _y 1 +#define _z 2 +#define _w 3 #define _0 4 #define _1 5 +#define _xyzw 0b1111 +#define _xy__ 0b0011 + #define GX2_COMP_SEL(c0, c1, c2, c3) (((c0) << 24) | ((c1) << 16) | ((c2) << 8) | (c3)) -#define ALU_LITERAL(v) to_LE(v) +#define ALU_LITERAL(v) to_QWORD(to_LE(v), 0) /* SRCx_SEL special constants */ #define ALU_SRC_1_DBL_L 0xF4 @@ -97,6 +133,49 @@ #define ALU_SRC_PV 0xFE #define ALU_SRC_PS 0xFF +#define _NEG | (1 << 12) + +#define ALU_OMOD_OFF 0x0 +#define ALU_OMOD_M2 0x1 +#define ALU_OMOD_M4 0x2 +#define ALU_OMOD_D2 0x3 + +#define ALU_VEC_012 0x0 +#define ALU_VEC_021 0x1 +#define ALU_VEC_120 0x2 +#define ALU_VEC_102 0x3 +#define ALU_VEC_201 0x4 +#define ALU_VEC_210 0x5 +#define VEC_012 | to_QWORD(0, to_LE(ALU_VEC_012 << 18)) +#define VEC_021 | to_QWORD(0, to_LE(ALU_VEC_021 << 18)) +#define VEC_120 | to_QWORD(0, to_LE(ALU_VEC_120 << 18)) +#define VEC_102 | to_QWORD(0, to_LE(ALU_VEC_102 << 18)) +#define VEC_201 | to_QWORD(0, to_LE(ALU_VEC_201 << 18)) +#define VEC_210 | to_QWORD(0, to_LE(ALU_VEC_210 << 18)) + +#define VALID_PIX | to_QWORD(0,to_LE(1 << 22)) + +#define ALU_SCL_210 0x0 +#define ALU_SCL_122 0x1 +#define ALU_SCL_212 0x2 +#define ALU_SCL_221 0x3 + +#define SCL_210 | to_QWORD(0, to_LE(ALU_SCL_210 << 18)) +#define SCL_122 | to_QWORD(0, to_LE(ALU_SCL_122 << 18)) +#define SCL_212 | to_QWORD(0, to_LE(ALU_SCL_212 << 18)) +#define SCL_221 | to_QWORD(0, to_LE(ALU_SCL_221 << 18)) + + +#define FETCH_TYPE(x) x +#define MINI(x) ((x) - 1) +#define MEGA(x) (MINI(x) | 0x80000000) +#define OFFSET(x) x + +#define VERTEX_DATA 0 +#define INSTANCE_DATA 1 +#define NO_INDEX_OFFSET 2 + + /* CF defines */ #define CF_COND_ACTIVE 0x0 #define CF_COND_FALSE 0x1 @@ -109,13 +188,18 @@ /* instructions */ /* CF */ -#define CF_INST_TEX 0x01 -#define CF_INST_CALL_FS 0x13 +#define CF_INST_TEX 0x01 +#define CF_INST_VTX 0x02 +#define CF_INST_ALU 0x08 +#define CF_INST_CALL_FS 0x13 +#define CF_INST_EMIT_VERTEX 0x15 +#define CF_INST_MEM_RING 0x26 /* ALU */ -#define ALU_INST_ALU 0x8 -#define OP2_INST_MUL 0x1 -#define OP2_INST_MOV 0x19 - +#define OP2_INST_ADD 0x0 +#define OP2_INST_MUL 0x1 +#define OP2_INST_MUL_IEEE 0x2 +#define OP2_INST_MOV 0x19 +#define OP2_INST_RECIP_IEEE 0x66 /* EXP */ #define CF_INST_EXP 0x27 #define CF_INST_EXP_DONE 0x28 @@ -123,6 +207,9 @@ /* TEX */ #define TEX_INST_SAMPLE 0x10 +/* VTX */ +#define VTX_INST_FETCH 0x0 + /* EXPORT_TYPE */ #define EXPORT_TYPE_PIXEL 0x0 #define EXPORT_TYPE_POS 0x1 @@ -142,11 +229,34 @@ #define PIX0 PIX(0) /* registers */ +#define __ (0x80) /* invalid regitser (write mask off) */ #define _R(x) x #define _R0 _R(0x0) #define _R1 _R(0x1) #define _R2 _R(0x2) #define _R3 _R(0x3) +#define _R4 _R(0x4) +#define _R5 _R(0x5) +#define _R6 _R(0x6) +#define _R7 _R(0x7) +#define _R8 _R(0x8) +#define _R9 _R(0x9) +#define _R10 _R(0xA) +#define _R11 _R(0xB) +#define _R12 _R(0xC) +#define _R13 _R(0xD) +#define _R14 _R(0xE) +#define _R15 _R(0xF) + +#define _R120 _R(0x78) +#define _R121 _R(0x79) +#define _R122 _R(0x7A) +#define _R123 _R(0x7B) +#define _R124 _R(0x7C) +#define _R125 _R(0x7D) +#define _R126 _R(0x7E) +#define _R127 _R(0x7F) + /* texture */ #define _t(x) x @@ -156,28 +266,61 @@ #define _s(x) x #define _s0 _s(0x0) -#define CALL_FS CF_WORD0(0), CF_WORD1(0,0,0,0,0,CF_INST_CALL_FS) +#define _b(x) x -#define TEX(addr, cnt) CF_WORD0(addr), CF_WORD1(0x0, 0x0, CF_COND_ACTIVE, 0x0, (cnt - 1), CF_INST_TEX) -#define ALU(addr, cnt) CF_ALU_WORD0(addr, 0x0, 0x0, 0x0), CF_ALU_WORD1(0x0, 0x0, 0x0, (cnt - 1), 0x0, ALU_INST_ALU) +#define CALL_FS to_QWORD(CF_DWORD0(0), CF_DWORD1(0,0,0,0,0,CF_INST_CALL_FS)) -#define EXP_DONE(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \ - CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP_DONE) +#define TEX(addr, cnt) to_QWORD(CF_DWORD0(addr), CF_DWORD1(0x0, 0x0, CF_COND_ACTIVE, (cnt - 1), 0x0, CF_INST_TEX)) +#define VTX(addr, cnt) to_QWORD(CF_DWORD0(addr), CF_DWORD1(0x0, 0x0, CF_COND_ACTIVE, (cnt - 1), 0x0, CF_INST_VTX)) -#define EXP(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \ - CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP) +#define ALU(addr, cnt) to_QWORD(CF_ALU_WORD0(addr, 0x0, 0x0, 0x0), CF_ALU_WORD1(0x0, 0x0, 0x0, (cnt - 1), 0x0, CF_INST_ALU)) -#define ALU_MOV(dstGpr, dstChan, src0Sel, src0Chan) ALU_WORD0(src0Sel, 0x0, src0Chan, 0x0, ALU_SRC_0, 0x0, 0x0, 0x0, 0x0, 0x0), \ - ALU_WORD1_OP2(0x0, 0x0, 0x0, 0x0, 0x1, 0x0, OP2_INST_MOV, 0x0, 0x0, dstGpr, 0x0, dstChan, 0x0) +#define EXP_DONE(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) to_QWORD(CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \ + CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP_DONE)) -#define ALU_MUL(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) ALU_WORD0(src0Sel, 0x0, src0Chan, 0x0, src1Sel, 0x0, src1Chan, 0x0, 0x0, 0x0), \ - ALU_WORD1_OP2(0x0, 0x0, 0x0, 0x0, 0x1, 0x0, OP2_INST_MUL, 0x0, 0x0, dstGpr, 0x0, dstChan, 0x0) +#define EXP(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) to_QWORD(CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \ + CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP)) + +#define MEM_RING(arrayBase, dstReg, writeMask, arraySize, elemSize) \ + to_QWORD(CF_ALLOC_EXPORT_WORD0(arrayBase, 0x00, dstReg, 0x00, 0x00, elemSize), \ + CF_ALLOC_EXPORT_WORD1_BUF(arraySize, writeMask, CF_INST_MEM_RING)) + +#define EMIT_VERTEX to_QWORD(0, CF_DWORD1(0, 0, 0, 0, 0, CF_INST_EMIT_VERTEX)) + +#define ALU_OP2(inst, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, omod) \ + to_QWORD(ALU_WORD0(src0Sel, 0x0, src0Chan, 0x0, src1Sel, 0x0, src1Chan, 0x0, 0x0, 0x0), \ + ALU_WORD1_OP2(0x0, 0x0, 0x0, 0x0, (((dstGpr&__) >> 7) ^ 0x1), omod, inst, 0x0, 0x0, dstGpr, 0x0, dstChan, 0x0)) + +#define ALU_MOV(dstGpr, dstChan, src0Sel, src0Chan) \ + ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF) + +#define ALU_MOV_x2(dstGpr, dstChan, src0Sel, src0Chan) \ + ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_M2) + +#define ALU_MUL(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ + ALU_OP2(OP2_INST_MUL, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) + +#define ALU_MUL_IEEE(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ + ALU_OP2(OP2_INST_MUL_IEEE, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) + +#define ALU_ADD(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ + ALU_OP2(OP2_INST_ADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) + +#define ALU_ADD_x2(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ + ALU_OP2(OP2_INST_ADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_M2) + +#define ALU_RECIP_IEEE(dstGpr, dstChan, src0Sel, src0Chan) \ + ALU_OP2(OP2_INST_RECIP_IEEE, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF) #define TEX_SAMPLE(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW, resourceID, samplerID)\ - TEX_WORD0(TEX_INST_SAMPLE, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \ - TEX_WORD1(dstReg, 0x0, dstSelX, dstSelY, dstSelZ, dstSelW, 0x0, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED), \ - TEX_WORD2(0x0, 0x0, 0x0, samplerID, _X, _Y, _0, _X) + to_QWORD(TEX_WORD0(TEX_INST_SAMPLE, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \ + TEX_WORD1(dstReg, 0x0, dstSelX, dstSelY, dstSelZ, dstSelW, 0x0, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED)), \ + to_QWORD(TEX_WORD2(0x0, 0x0, 0x0, samplerID, _x, _y, _0, _x), 0x00000000) + +#define VTX_FETCH(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, buffer_id, type, mega, offset) \ + to_QWORD(VTX_WORD0(VTX_INST_FETCH, type, buffer_id, srcReg, srcSelX, mega), VTX_WORD1(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW)) , \ + to_QWORD(VTX_WORD2(offset, (mega >> 31)), 0x00000000) #define _x2(v) v, v #define _x4(v) _x2(v), _x2(v) diff --git a/wiiu/include/wiiu/gx2/common.h b/wiiu/include/wiiu/gx2/common.h index f160d7b14b..74aae866f5 100644 --- a/wiiu/include/wiiu/gx2/common.h +++ b/wiiu/include/wiiu/gx2/common.h @@ -3,6 +3,7 @@ #define GX2_SCAN_BUFFER_ALIGNMENT 0x1000 #define GX2_SHADER_ALIGNMENT 0x100 #define GX2_CONTEXT_STATE_ALIGNMENT 0x100 +#define GX2_UNIFORM_BLOCK_ALIGNMENT 0x100 #define GX2_DISPLAY_LIST_ALIGNMENT 0x20 #define GX2_VERTEX_BUFFER_ALIGNMENT 0x40 #define GX2_INDEX_BUFFER_ALIGNMENT 0x20 diff --git a/wiiu/include/wiiu/gx2/enum.h b/wiiu/include/wiiu/gx2/enum.h index 05d71379ee..ea719268a5 100644 --- a/wiiu/include/wiiu/gx2/enum.h +++ b/wiiu/include/wiiu/gx2/enum.h @@ -175,18 +175,19 @@ typedef enum GX2IndexType typedef enum GX2InvalidateMode { - GX2_INVALIDATE_MODE_ATTRIBUTE_BUFFER = 1 << 0, - GX2_INVALIDATE_MODE_TEXTURE = 1 << 1, - GX2_INVALIDATE_MODE_UNIFORM_BLOCK = 1 << 2, - GX2_INVALIDATE_MODE_SHADER = 1 << 3, - GX2_INVALIDATE_MODE_COLOR_BUFFER = 1 << 4, - GX2_INVALIDATE_MODE_DEPTH_BUFFER = 1 << 5, - GX2_INVALIDATE_MODE_CPU = 1 << 6, - GX2_INVALIDATE_MODE_STREAM_OUT_BUFFER = 1 << 7, - GX2_INVALIDATE_MODE_EXPORT_BUFFER = 1 << 8, - GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER= GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_ATTRIBUTE_BUFFER, - GX2_INVALIDATE_MODE_CPU_TEXTURE = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_TEXTURE, - GX2_INVALIDATE_MODE_CPU_SHADER = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_SHADER, + GX2_INVALIDATE_MODE_ATTRIBUTE_BUFFER = 1 << 0, + GX2_INVALIDATE_MODE_TEXTURE = 1 << 1, + GX2_INVALIDATE_MODE_UNIFORM_BLOCK = 1 << 2, + GX2_INVALIDATE_MODE_SHADER = 1 << 3, + GX2_INVALIDATE_MODE_COLOR_BUFFER = 1 << 4, + GX2_INVALIDATE_MODE_DEPTH_BUFFER = 1 << 5, + GX2_INVALIDATE_MODE_CPU = 1 << 6, + GX2_INVALIDATE_MODE_STREAM_OUT_BUFFER = 1 << 7, + GX2_INVALIDATE_MODE_EXPORT_BUFFER = 1 << 8, + GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_ATTRIBUTE_BUFFER, + GX2_INVALIDATE_MODE_CPU_TEXTURE = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_TEXTURE, + GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_UNIFORM_BLOCK, + GX2_INVALIDATE_MODE_CPU_SHADER = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_SHADER, } GX2InvalidateMode; typedef enum GX2InitAttributes @@ -220,6 +221,7 @@ typedef enum GX2LogicOp typedef enum GX2PrimitiveMode { + GX2_PRIMITIVE_MODE_POINTS = 1, GX2_PRIMITIVE_MODE_LINES = 2, GX2_PRIMITIVE_MODE_LINE_STRIP = 3, GX2_PRIMITIVE_MODE_TRIANGLES = 4, diff --git a/wiiu/include/wiiu/gx2/shaders.h b/wiiu/include/wiiu/gx2/shaders.h index cfafc3ad30..ceb883e7af 100644 --- a/wiiu/include/wiiu/gx2/shaders.h +++ b/wiiu/include/wiiu/gx2/shaders.h @@ -69,93 +69,102 @@ typedef struct GX2AttribVar typedef struct GX2VertexShader { - struct + union { struct { - unsigned :2; - bool prime_cache_on_const :1; - bool prime_cache_enable :1; - bool uncached_first_inst :1; - unsigned fetch_cache_lines :3; - bool prime_cache_on_draw :1; - bool prime_cache_pgm_en :1; - bool dx10_clamp :1; - unsigned :5; - unsigned stack_size :8; - unsigned num_gprs :8; - }sq_pgm_resources_vs; + struct + { + unsigned : 2; + bool prime_cache_on_const : 1; + bool prime_cache_enable : 1; + bool uncached_first_inst : 1; + unsigned fetch_cache_lines : 3; + bool prime_cache_on_draw : 1; + bool prime_cache_pgm_en : 1; + bool dx10_clamp : 1; + unsigned : 5; + unsigned stack_size : 8; + unsigned num_gprs : 8; + } sq_pgm_resources_vs; - bool vgt_primitiveid_en; + struct + { + unsigned : 31; + unsigned enable: 1; + } vgt_primitiveid_en; - struct - { - unsigned :18; - unsigned vs_out_fog_vec_addr : 5; - bool vs_exports_fog : 1; - unsigned :2; - unsigned vs_export_count :5; - bool vs_per_component : 1; - }spi_vs_out_config; - uint32_t num_spi_vs_out_id; - struct - { - uint8_t semantic_3; - uint8_t semantic_2; - uint8_t semantic_1; - uint8_t semantic_0; - }spi_vs_out_id[10]; - struct - { - bool clip_dist_ena_7 :1; - bool clip_dist_ena_6 :1; - bool clip_dist_ena_5 :1; - bool clip_dist_ena_4 :1; - bool clip_dist_ena_3 :1; - bool clip_dist_ena_2 :1; - bool clip_dist_ena_1 :1; - bool clip_dist_ena_0 :1; - bool cull_dist_ena_7 :1; - bool cull_dist_ena_6 :1; - bool cull_dist_ena_5 :1; - bool cull_dist_ena_0 :1; - bool cull_dist_ena_4 :1; - bool cull_dist_ena_3 :1; - bool cull_dist_ena_2 :1; - bool cull_dist_ena_1 :1; - bool vs_out_misc_side_bus_ena :1; - bool vs_out_ccdist1_vec_ena :1; - bool vs_out_ccdist0_vec_ena :1; - bool vs_out_misc_vec_ena :1; - bool use_vtx_kill_flag :1; - bool use_vtx_viewport_indx :1; - bool use_vtx_render_target_indx :1; - bool use_vtx_edge_flag :1; - unsigned :6; - bool use_vtx_point_size :1; - bool use_vtx_gs_cut_flag :1; - }pa_cl_vs_out_cntl; - uint32_t sq_vtx_semantic_clear; - uint32_t num_sq_vtx_semantic; - uint32_t sq_vtx_semantic[32]; /* 8 bit */ - struct - { - bool buffer_3_en :1; - bool buffer_2_en :1; - bool buffer_1_en :1; - bool buffer_0_en :1; - }vgt_strmout_buffer_en; - struct - { - unsigned :24; - unsigned vtx_reuse_depth :8; - }vgt_vertex_reuse_block_cntl; - struct - { - unsigned :24; - unsigned reuse_depth :8; - }vgt_hos_reuse_depth; + struct + { + unsigned : 18; + unsigned vs_out_fog_vec_addr : 5; + bool vs_exports_fog : 1; + unsigned : 2; + unsigned vs_export_count : 5; + bool vs_per_component : 1; + } spi_vs_out_config; + + uint32_t num_spi_vs_out_id; + struct + { + uint8_t semantic_3; + uint8_t semantic_2; + uint8_t semantic_1; + uint8_t semantic_0; + } spi_vs_out_id[10]; + struct + { + bool clip_dist_ena_7 : 1; + bool clip_dist_ena_6 : 1; + bool clip_dist_ena_5 : 1; + bool clip_dist_ena_4 : 1; + bool clip_dist_ena_3 : 1; + bool clip_dist_ena_2 : 1; + bool clip_dist_ena_1 : 1; + bool clip_dist_ena_0 : 1; + bool cull_dist_ena_7 : 1; + bool cull_dist_ena_6 : 1; + bool cull_dist_ena_5 : 1; + bool cull_dist_ena_0 : 1; + bool cull_dist_ena_4 : 1; + bool cull_dist_ena_3 : 1; + bool cull_dist_ena_2 : 1; + bool cull_dist_ena_1 : 1; + bool vs_out_misc_side_bus_ena : 1; + bool vs_out_ccdist1_vec_ena : 1; + bool vs_out_ccdist0_vec_ena : 1; + bool vs_out_misc_vec_ena : 1; + bool use_vtx_kill_flag : 1; + bool use_vtx_viewport_indx : 1; + bool use_vtx_render_target_indx : 1; + bool use_vtx_edge_flag : 1; + unsigned : 6; + bool use_vtx_point_size : 1; + bool use_vtx_gs_cut_flag : 1; + } pa_cl_vs_out_cntl; + uint32_t sq_vtx_semantic_clear; + uint32_t num_sq_vtx_semantic; + uint32_t sq_vtx_semantic[32]; /* 8 bit */ + struct + { + bool buffer_3_en : 1; + bool buffer_2_en : 1; + bool buffer_1_en : 1; + bool buffer_0_en : 1; + } vgt_strmout_buffer_en; + struct + { + unsigned : 24; + unsigned vtx_reuse_depth : 8; + } vgt_vertex_reuse_block_cntl; + struct + { + unsigned : 24; + unsigned reuse_depth : 8; + } vgt_hos_reuse_depth; + }; + u32 vals[52]; } regs; uint32_t size; @@ -180,7 +189,7 @@ typedef struct GX2VertexShader uint32_t attribVarCount; GX2AttribVar *attribVars; - uint32_t ringItemsize; + uint32_t ringItemSize; BOOL hasStreamOut; uint32_t streamOutStride[4]; @@ -188,129 +197,136 @@ typedef struct GX2VertexShader GX2RBuffer gx2rBuffer; } GX2VertexShader; -typedef enum { +typedef enum +{ spi_baryc_cntl_centroids_only = 0, spi_baryc_cntl_centers_only = 1, spi_baryc_cntl_centroids_and_centers = 2, -}spi_baryc_cntl; +} spi_baryc_cntl; -typedef enum { +typedef enum +{ db_z_order_late_z = 0, db_z_order_early_z_then_late_z = 1, db_z_order_re_z = 2, db_z_order_early_z_then_re_z = 3, -}db_z_order; +} db_z_order; typedef struct GX2PixelShader { - struct + union { struct { - unsigned :2; - bool prime_cache_on_const :1; - bool prime_cache_enable :1; - bool uncached_first_inst :1; - unsigned fetch_cache_lines :3; - bool prime_cache_on_draw :1; - bool prime_cache_pgm_en :1; - bool dx10_clamp :1; - unsigned :5; - unsigned stack_size :8; - unsigned num_gprs :8; - }sq_pgm_resources_ps; + struct + { + unsigned : 2; + bool prime_cache_on_const : 1; + bool prime_cache_enable : 1; + bool uncached_first_inst : 1; + unsigned fetch_cache_lines : 3; + bool prime_cache_on_draw : 1; + bool prime_cache_pgm_en : 1; + bool dx10_clamp : 1; + unsigned : 5; + unsigned stack_size : 8; + unsigned num_gprs : 8; + } sq_pgm_resources_ps; - struct - { - unsigned :24; - unsigned export_mode :5; - }sq_pgm_exports_ps; + struct + { + unsigned : 27; + unsigned export_mode : 5; + } sq_pgm_exports_ps; - struct - { - bool baryc_at_sample_ena :1; - bool position_sample :1; - bool linear_gradient_ena :1; - bool persp_gradient_ena :1; - spi_baryc_cntl baryc_sample_cntl :2; - unsigned param_gen_addr : 7; - unsigned param_gen :4; - unsigned position_addr :5; - bool position_centroid :1; - bool position_ena :1; - unsigned :2; - unsigned num_interp :6; - }spi_ps_in_control_0; + struct + { + bool baryc_at_sample_ena : 1; + bool position_sample : 1; + bool linear_gradient_ena : 1; + bool persp_gradient_ena : 1; + spi_baryc_cntl baryc_sample_cntl : 2; + unsigned param_gen_addr : 7; + unsigned param_gen : 4; + unsigned position_addr : 5; + bool position_centroid : 1; + bool position_ena : 1; + unsigned : 2; + unsigned num_interp : 6; + } spi_ps_in_control_0; - struct - { - unsigned :1; - bool position_ulc :1; - unsigned fixed_pt_position_addr :5; - bool fixed_pt_position_ena :1; - unsigned fog_addr :7; - unsigned front_face_addr :5; - bool front_face_all_bits :1; - unsigned front_face_chan :2; - bool front_face_ena :1; - unsigned gen_index_pix_addr :7; - bool gen_index_pix :1; - }spi_ps_in_control_1; + struct + { + unsigned : 1; + bool position_ulc : 1; + unsigned fixed_pt_position_addr : 5; + bool fixed_pt_position_ena : 1; + unsigned fog_addr : 7; + unsigned front_face_addr : 5; + bool front_face_all_bits : 1; + unsigned front_face_chan : 2; + bool front_face_ena : 1; + unsigned gen_index_pix_addr : 7; + bool gen_index_pix : 1; + } spi_ps_in_control_1; - uint32_t num_spi_ps_input_cntl; + uint32_t num_spi_ps_input_cntl; - struct - { - unsigned :13; - bool sel_sample :1; - bool pt_sprite_tex :1; - unsigned cyl_wrap :4; - bool sel_linear :1; - bool sel_centroid :1; - bool flat_shade :1; - unsigned default_val :2; - unsigned semantic :8; - }spi_ps_input_cntls[32]; + struct + { + unsigned : 13; + bool sel_sample : 1; + bool pt_sprite_tex : 1; + unsigned cyl_wrap : 4; + bool sel_linear : 1; + bool sel_centroid : 1; + bool flat_shade : 1; + unsigned default_val : 2; + unsigned semantic : 8; + } spi_ps_input_cntls[32]; - struct - { - unsigned output7_enable :4; - unsigned output6_enable :4; - unsigned output5_enable :4; - unsigned output4_enable :4; - unsigned output3_enable :4; - unsigned output2_enable :4; - unsigned output1_enable :4; - unsigned output0_enable :4; - }cb_shader_mask; - struct { - unsigned :24; - bool rt7_enable :1; - bool rt6_enable :1; - bool rt5_enable :1; - bool rt4_enable :1; - bool rt3_enable :1; - bool rt2_enable :1; - bool rt1_enable :1; - bool rt0_enable :1; - }cb_shader_control; - struct - { - unsigned :19; - bool alpha_to_mask_disable :1; - bool exec_on_noop :1; - bool exec_on_hier_fail :1; - bool dual_export_enable :1; - bool mask_export_enable :1; - bool coverage_to_mask_enable :1; - bool kill_enable :1; - db_z_order z_order :2; - unsigned :2; - bool z_export_enable :1; - bool stencil_ref_export_enable :1; - } db_shader_control; + struct + { + unsigned output7_enable : 4; + unsigned output6_enable : 4; + unsigned output5_enable : 4; + unsigned output4_enable : 4; + unsigned output3_enable : 4; + unsigned output2_enable : 4; + unsigned output1_enable : 4; + unsigned output0_enable : 4; + } cb_shader_mask; + struct + { + unsigned : 24; + bool rt7_enable : 1; + bool rt6_enable : 1; + bool rt5_enable : 1; + bool rt4_enable : 1; + bool rt3_enable : 1; + bool rt2_enable : 1; + bool rt1_enable : 1; + bool rt0_enable : 1; + } cb_shader_control; + struct + { + unsigned : 19; + bool alpha_to_mask_disable : 1; + bool exec_on_noop : 1; + bool exec_on_hier_fail : 1; + bool dual_export_enable : 1; + bool mask_export_enable : 1; + bool coverage_to_mask_enable : 1; + bool kill_enable : 1; + db_z_order z_order : 2; + unsigned : 2; + bool z_export_enable : 1; + bool stencil_ref_export_enable : 1; + } db_shader_control; - bool spi_input_z; + bool spi_input_z; + }; + u32 vals[41]; } regs; uint32_t size; @@ -335,26 +351,148 @@ typedef struct GX2PixelShader GX2RBuffer gx2rBuffer; } GX2PixelShader; +typedef enum +{ + VGT_GS_OUT_PRIMITIVE_TYPE_POINTLIST = 0, + VGT_GS_OUT_PRIMITIVE_TYPE_LINESTRIP = 1, + VGT_GS_OUT_PRIMITIVE_TYPE_TRISTRIP = 2, + VGT_GS_OUT_PRIMITIVE_TYPE_MAX_ENUM = 0xFFFFFFFF +} vgt_gs_out_primitive_type; + +typedef enum +{ + VGT_GS_ENABLE_MODE_OFF = 0, + VGT_GS_ENABLE_MODE_SCENARIO_A = 1, + VGT_GS_ENABLE_MODE_SCENARIO_B = 2, + VGT_GS_ENABLE_MODE_SCENARIO_G = 3, +} vgt_gs_enable_mode; + +typedef enum +{ + VGT_GS_CUT_MODE_1024 = 0, + VGT_GS_CUT_MODE_512 = 1, + VGT_GS_CUT_MODE_256 = 2, + VGT_GS_CUT_MODE_128 = 3, +} vgt_gs_cut_mode; + typedef struct GX2GeometryShader { - struct + union { - uint32_t sq_pgm_resources_gs; - uint32_t vgt_gs_out_prim_type; - uint32_t vgt_gs_mode; - uint32_t pa_cl_vs_out_cntl; - uint32_t sq_pgm_resources_vs; - uint32_t sq_gs_vert_itemsize; - uint32_t spi_vs_out_config; - uint32_t num_spi_vs_out_id; - uint32_t spi_vs_out_id[10]; - uint32_t vgt_strmout_buffer_en; - } regs; + struct + { + struct + { + unsigned : 2; + bool prime_cache_on_const : 1; + bool prime_cache_enable : 1; + bool uncached_first_inst : 1; + unsigned fetch_cache_lines : 3; + bool prime_cache_on_draw : 1; + bool prime_cache_pgm_en : 1; + bool dx10_clamp : 1; + unsigned : 5; + unsigned stack_size : 8; + unsigned num_gprs : 8; + } sq_pgm_resources_gs; + vgt_gs_out_primitive_type vgt_gs_out_prim_type; + struct + { + unsigned : 14; + bool partial_thd_at_eoi : 1; + bool element_info_en : 1; + bool fast_compute_mode : 1; + bool compute_mode : 1; + unsigned : 2; + bool gs_c_pack_en : 1; + unsigned : 2; + bool mode_hi : 1; + unsigned : 3; + vgt_gs_cut_mode cut_mode : 2; + bool es_passthru : 1; + vgt_gs_enable_mode mode : 2; + } vgt_gs_mode; + struct + { + bool clip_dist_ena_7 : 1; + bool clip_dist_ena_6 : 1; + bool clip_dist_ena_5 : 1; + bool clip_dist_ena_4 : 1; + bool clip_dist_ena_3 : 1; + bool clip_dist_ena_2 : 1; + bool clip_dist_ena_1 : 1; + bool clip_dist_ena_0 : 1; + bool cull_dist_ena_7 : 1; + bool cull_dist_ena_6 : 1; + bool cull_dist_ena_5 : 1; + bool cull_dist_ena_0 : 1; + bool cull_dist_ena_4 : 1; + bool cull_dist_ena_3 : 1; + bool cull_dist_ena_2 : 1; + bool cull_dist_ena_1 : 1; + bool vs_out_misc_side_bus_ena : 1; + bool vs_out_ccdist1_vec_ena : 1; + bool vs_out_ccdist0_vec_ena : 1; + bool vs_out_misc_vec_ena : 1; + bool use_vtx_kill_flag : 1; + bool use_vtx_viewport_indx : 1; + bool use_vtx_render_target_indx : 1; + bool use_vtx_edge_flag : 1; + unsigned : 6; + bool use_vtx_point_size : 1; + bool use_vtx_gs_cut_flag : 1; + } pa_cl_vs_out_cntl; + struct + { + unsigned : 2; + bool prime_cache_on_const : 1; + bool prime_cache_enable : 1; + bool uncached_first_inst : 1; + unsigned fetch_cache_lines : 3; + bool prime_cache_on_draw : 1; + bool prime_cache_pgm_en : 1; + bool dx10_clamp : 1; + unsigned : 5; + unsigned stack_size : 8; + unsigned num_gprs : 8; + } sq_pgm_resources_vs; + uint32_t sq_gs_vert_itemsize; /* 15-bit */ + + struct + { + unsigned : 18; + unsigned vs_out_fog_vec_addr : 5; + bool vs_exports_fog : 1; + unsigned : 2; + unsigned vs_export_count : 5; + bool vs_per_component : 1; + } spi_vs_out_config; + + uint32_t num_spi_vs_out_id; + + struct + { + uint8_t semantic_3; + uint8_t semantic_2; + uint8_t semantic_1; + uint8_t semantic_0; + } spi_vs_out_id[10]; + + struct + { + bool buffer_3_en : 1; + bool buffer_2_en : 1; + bool buffer_1_en : 1; + bool buffer_0_en : 1; + } vgt_strmout_buffer_en; + }; + u32 vals[19]; + } regs; uint32_t size; uint8_t *program; - uint32_t vertexProgramSize; - uint8_t *vertexProgram; + uint32_t copyProgramSize; + uint8_t *copyProgram; GX2ShaderMode mode; uint32_t uniformBlockCount; @@ -419,6 +557,14 @@ void GX2SetShaderModeEx(GX2ShaderMode mode, uint32_t numGsGpr, uint32_t numGsStackEntries, uint32_t numPsGpr, uint32_t numPsStackEntries); +static inline void GX2SetShaderMode(GX2ShaderMode mode) +{ + if (mode == GX2_SHADER_MODE_GEOMETRY_SHADER) + GX2SetShaderModeEx(mode, 44, 32, 64, 48, 76, 176); + else + GX2SetShaderModeEx(mode, 48, 64, 0, 0, 200, 192); +} + void GX2SetStreamOutEnable(BOOL enable); void GX2SetGeometryShaderInputRingBuffer(void *buffer, uint32_t size); void GX2SetGeometryShaderOutputRingBuffer(void *buffer, uint32_t size); diff --git a/wiiu/include/wiiu/types.h b/wiiu/include/wiiu/types.h index d84180142e..3927c5bb52 100644 --- a/wiiu/include/wiiu/types.h +++ b/wiiu/include/wiiu/types.h @@ -37,3 +37,5 @@ typedef double f64; typedef volatile float vf32; typedef volatile double vf64; + +#define countof(array) (sizeof(array) / sizeof(*array)) diff --git a/wiiu/system/imports.h b/wiiu/system/imports.h index 12acb41878..df746ff3a3 100644 --- a/wiiu/system/imports.h +++ b/wiiu/system/imports.h @@ -170,6 +170,15 @@ IMPORT(GX2InitFetchShaderEx); IMPORT(GX2SetFetchShader); IMPORT(GX2SetVertexShader); IMPORT(GX2SetPixelShader); +IMPORT(GX2SetGeometryShader); +IMPORT(GX2SetGeometryUniformBlock); +IMPORT(GX2SetVertexUniformBlock); +IMPORT(GX2SetPixelUniformBlock); +IMPORT(GX2CalcGeometryShaderInputRingBufferSize); +IMPORT(GX2CalcGeometryShaderOutputRingBufferSize); +IMPORT(GX2SetGeometryShaderInputRingBuffer); +IMPORT(GX2SetGeometryShaderOutputRingBuffer); +IMPORT(GX2SetShaderModeEx); IMPORT(GX2SetAttribBuffer); IMPORT(GX2InitTextureRegs); IMPORT(GX2InitSampler); diff --git a/wiiu/tex_shader.c b/wiiu/tex_shader.c index 4675f6ec86..152acfc511 100644 --- a/wiiu/tex_shader.c +++ b/wiiu/tex_shader.c @@ -16,125 +16,67 @@ #include #include #include +#include #include "tex_shader.h" #include "gx2_shader_inl.h" -/******************************************************* - * Vertex Shader GLSL source: - ******************************************************* - - attribute vec2 position; - attribute vec2 tex_coord_in; - attribute vec4 color_in; - varying vec2 tex_coord; - varying vec4 color; - void main() - { - gl_Position = vec4(position, 0.0, 1.0); - tex_coord = tex_coord_in; - color = color_in; - } - - ****************************************************** - * assembly: - ****************************************************** - 00 CALL_FS NO_BARRIER - 01 ALU: ADDR(32) CNT(5) - 0 x: MOV R3.x, R3.x - y: MOV R3.y, R3.y - z: MOV R2.z, 0.0f - w: MOV R2.w, (0x3F800000, 1.0f).x - 02 EXP_DONE: POS0, R2 - 03 EXP: PARAM0, R1 NO_BARRIER - 04 EXP_DONE: PARAM1, R3.xyzz NO_BARRIER - END_OF_PROGRAM - ****************************************************** - */ - __attribute__((aligned(GX2_SHADER_ALIGNMENT))) static struct { - u32 cf[32 * 2]; /* first ADDR() * 2 */ - u32 alu[5 * 2]; /* alu CNT() * 2 */ + u64 cf[16]; } vs_program = { { CALL_FS NO_BARRIER, - ALU(32, 5), - EXP_DONE(POS0, _R2, _X, _Y, _Z, _W), - EXP(PARAM0, _R1, _X, _Y, _Z, _W) NO_BARRIER, - EXP_DONE(PARAM1, _R3, _X, _Y, _Z, _Z) NO_BARRIER + EXP_DONE(POS0, _R1, _x, _y, _0, _1), + EXP_DONE(PARAM0, _R2, _x, _y, _0, _0) NO_BARRIER END_OF_PROGRAM - }, - { - ALU_MOV(_R3,_X, _R3,_X), - ALU_MOV(_R3,_Y, _R3,_Y), - ALU_MOV(_R2,_Z, ALU_SRC_0,_X), - ALU_LAST - ALU_MOV(_R2,_W, ALU_SRC_LITERAL,_X), ALU_LITERAL(0x3F800000) } }; -/******************************************************* - * Pixel Shader GLSL source: - ******************************************************* - - varying vec2 tex_coord; - varying vec4 color; - uniform sampler2D s; - void main() - { - gl_FragColor = texture2D(s, tex_coord) * color; - } - - ****************************************************** - * assembly: - ****************************************************** - - 00 TEX: ADDR(48) CNT(1) VALID_PIX - 0 SAMPLE R1, R1.xy0x, t0, s0 - 01 ALU: ADDR(32) CNT(4) - 1 x: MUL R0.x, R0.x, R1.x - y: MUL R0.y, R0.y, R1.y - z: MUL R0.z, R0.z, R1.z - w: MUL R0.w, R0.w, R1.w - 02 EXP_DONE: PIX0, R0 - END_OF_PROGRAM - - ******************************************************* - */ - __attribute__((aligned(GX2_SHADER_ALIGNMENT))) static struct { - u32 cf[32 * 2]; /* first ADDR() * 2 */ - u32 alu[(48-32) * 2]; /* (tex ADDR() - alu ADDR()) * 2 */ - u32 tex[1 * 3]; /* tex CNT() * 3 */ -} ps_program = + u64 cf[16]; + u64 tex[1 * 2]; +} +ps_program = { { - TEX(48, 1) VALID_PIX, - ALU(32, 4), - EXP_DONE(PIX0, _R0, _X, _Y, _Z, _W) + TEX(16, 1) VALID_PIX, + EXP_DONE(PIX0, _R0, _x, _y, _z, _w) END_OF_PROGRAM }, { - ALU_MUL(_R0,_X, _R0,_X, _R1,_X), - ALU_MUL(_R0,_Y, _R0,_Y, _R1,_Y), - ALU_MUL(_R0,_Z, _R0,_Z, _R1,_Z), - ALU_LAST - ALU_MUL(_R0,_W, _R0,_W, _R1,_W), - }, - { - TEX_SAMPLE(_R1,_X,_Y,_Z,_W, _R1,_X,_Y,_0,_X, _t0, _s0) + TEX_SAMPLE(_R0,_x,_y,_z,_w, _R0,_x,_y,_0,_0, _t0, _s0) } }; -tex_shader_t tex_shader = +static GX2AttribVar attributes[] = +{ + { "position", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 0}, + { "tex_coord", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 1}, +}; + +static GX2AttribStream attribute_stream[] = +{ + {0, 0, offsetof(tex_shader_vertex_t, pos), GX2_ATTRIB_FORMAT_FLOAT_32_32, + GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT}, + {1, 0, offsetof(tex_shader_vertex_t, coord), GX2_ATTRIB_FORMAT_FLOAT_32_32, + GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT}, +}; + +static GX2SamplerVar samplers[] = +{ + { "s", GX2_SAMPLER_VAR_TYPE_SAMPLER_2D, 0 }, +}; + + +GX2Shader tex_shader = { { { - .sq_pgm_resources_vs.num_gprs = 4, + .sq_pgm_resources_vs.num_gprs = 3, .sq_pgm_resources_vs.stack_size = 1, .spi_vs_out_config.vs_export_count = 1, .num_spi_vs_out_id = 1, @@ -150,10 +92,10 @@ tex_shader_t tex_shader = {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, }, - .sq_vtx_semantic_clear = ~0x7, - .num_sq_vtx_semantic = 3, + .sq_vtx_semantic_clear = ~0x3, + .num_sq_vtx_semantic = 2, { - 0, 1, 2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0, 1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }, .vgt_vertex_reuse_block_cntl.vtx_reuse_depth = 0xE, @@ -162,11 +104,11 @@ tex_shader_t tex_shader = .size = sizeof(vs_program), .program = (uint8_t*)&vs_program, .mode = GX2_SHADER_MODE_UNIFORM_REGISTER, - .attribVarCount = sizeof(tex_shader.attributes) / sizeof(GX2AttribVar), (GX2AttribVar*) &tex_shader.attributes, + .attribVarCount = countof(attributes), attributes, }, { { - .sq_pgm_resources_ps.num_gprs = 2, + .sq_pgm_resources_ps.num_gprs = 1, .sq_pgm_exports_ps.export_mode = 0x2, .spi_ps_in_control_0.num_interp = 2, .spi_ps_in_control_0.persp_gradient_ena = 1, @@ -179,28 +121,7 @@ tex_shader_t tex_shader = .size = sizeof(ps_program), .program = (uint8_t*)&ps_program, .mode = GX2_SHADER_MODE_UNIFORM_REGISTER, - .samplerVarCount = 1, - .samplerVars = (GX2SamplerVar*) &tex_shader.sampler, - }, - .sampler = { "s", GX2_SAMPLER_VAR_TYPE_SAMPLER_2D, 0 }, - .attributes = { - .color = { "color_in", GX2_SHADER_VAR_TYPE_FLOAT4, 0, 0}, - .position = { "position", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 1}, - .tex_coord = { "tex_coord_in", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 2}, - }, - .attribute_stream = { - .color = { - 0, 0, offsetof(tex_shader_vertex_t, color), GX2_ATTRIB_FORMAT_UNORM_8_8_8_8, - GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _Z, _W), GX2_ENDIAN_SWAP_DEFAULT - }, - .position = { - 1, 0, offsetof(tex_shader_vertex_t, pos), GX2_ATTRIB_FORMAT_FLOAT_32_32, - GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT - }, - .tex_coord = { - 2, 0, offsetof(tex_shader_vertex_t, coord), GX2_ATTRIB_FORMAT_FLOAT_32_32, - GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT - } - }, - {}, + .samplerVarCount = countof(samplers), samplers, + }, + .attribute_stream = attribute_stream, }; diff --git a/wiiu/tex_shader.h b/wiiu/tex_shader.h index cc23499f9e..d9a80292af 100644 --- a/wiiu/tex_shader.h +++ b/wiiu/tex_shader.h @@ -15,32 +15,13 @@ #ifndef TEX_SHADER_H #define TEX_SHADER_H -#include + +#include #ifdef __cplusplus extern "C" { #endif -typedef struct __attribute__((aligned(GX2_VERTEX_BUFFER_ALIGNMENT))) -{ - GX2VertexShader vs; - GX2PixelShader ps; - GX2SamplerVar sampler; - struct - { - GX2AttribVar color; - GX2AttribVar position; - GX2AttribVar tex_coord; - } attributes; - struct - { - GX2AttribStream color; - GX2AttribStream position; - GX2AttribStream tex_coord; - } attribute_stream; - GX2FetchShader fs; -}tex_shader_t; - typedef struct { struct @@ -54,11 +35,9 @@ typedef struct float u; float v; }coord; - - u32 color; }tex_shader_vertex_t; -extern tex_shader_t tex_shader; +extern GX2Shader tex_shader; #ifdef __cplusplus }