(WIIU) add a sprite shader.

This commit is contained in:
aliaspider 2018-01-04 17:38:04 +01:00
parent e63697dca7
commit a7632620da
13 changed files with 760 additions and 627 deletions

View File

@ -66,7 +66,9 @@ else
DEFINES += -DHAVE_FILTERS_BUILTIN
OBJ += wiiu/system/missing_libc_functions.o
OBJ += wiiu/shader_utils.o
OBJ += wiiu/tex_shader.o
OBJ += wiiu/sprite_shader.o
ifeq ($(GRIFFIN_BUILD), 1)
OBJ += griffin/griffin.o

View File

@ -1,6 +1,7 @@
#include <wiiu/gx2.h>
#include "wiiu/tex_shader.h"
#include "wiiu/sprite_shader.h"
#undef _X
#undef _B
@ -21,8 +22,6 @@
#define COLOR_ARGB(r, g, b, a) (((u32)(a) << 24) | ((u32)(r) << 16) | ((u32)(g) << 8) | ((u32)(b) << 0))
#define COLOR_RGBA(r, g, b, a) (((u32)(r) << 24) | ((u32)(g) << 16) | ((u32)(b) << 8) | ((u32)(a) << 0))
//#define GX2_CAN_ACCESS_DATA_SECTION
typedef struct
{
int width;
@ -33,20 +32,19 @@ typedef struct
struct gx2_overlay_data
{
GX2Texture tex;
tex_shader_vertex_t v[4];
sprite_vertex_t v;
float alpha_mod;
};
typedef struct
{
tex_shader_t* shader;
struct
{
GX2Texture texture;
int width;
int height;
bool enable;
tex_shader_vertex_t* v;
sprite_vertex_t* v;
} menu;
#ifdef HAVE_OVERLAY
@ -60,12 +58,19 @@ typedef struct
GX2Sampler sampler_linear;
GX2Texture texture;
tex_shader_vertex_t* v;
GX2_vec2* ubo_vp;
GX2_vec2* ubo_tex;
void* input_ring_buffer;
u32 input_ring_buffer_size;
void* output_ring_buffer;
u32 output_ring_buffer_size;
int width;
int height;
struct
{
tex_shader_vertex_t* v;
sprite_vertex_t* v;
int size;
int current;
} vertex_cache;

View File

@ -145,7 +145,6 @@ static void wiiu_gfx_update_viewport(wiiu_video_t* wiiu)
wiiu->vp.height = height;
}
float scale_w = wiiu->color_buffer.surface.width / wiiu->render_mode.width;
float scale_h = wiiu->color_buffer.surface.height / wiiu->render_mode.height;
wiiu_set_position(wiiu->v, &wiiu->color_buffer,
@ -154,6 +153,8 @@ static void wiiu_gfx_update_viewport(wiiu_video_t* wiiu)
(wiiu->vp.x + wiiu->vp.width) * scale_w,
(wiiu->vp.y + wiiu->vp.height) * scale_h);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->v, 4 * sizeof(*wiiu->v));
wiiu->should_resize = false;
}
@ -275,70 +276,25 @@ static void* wiiu_gfx_init(const video_info_t* video,
GX2SetBlendControl(GX2_RENDER_TARGET_0, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA, GX2_BLEND_COMBINE_MODE_ADD,
GX2_ENABLE, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA, GX2_BLEND_COMBINE_MODE_ADD);
GX2SetCullOnlyControl(GX2_FRONT_FACE_CCW, GX2_DISABLE, GX2_DISABLE);
#ifdef GX2_CAN_ACCESS_DATA_SECTION
wiiu->shader = &tex_shader;
#else
/* Initialize shader */
wiiu->shader = MEM2_alloc(sizeof(tex_shader), 0x1000);
memcpy(wiiu->shader, &tex_shader, sizeof(tex_shader));
GX2Invalidate(GX2_INVALIDATE_MODE_CPU, wiiu->shader, sizeof(tex_shader));
GX2InitShader(&tex_shader);
GX2InitShader(&sprite_shader);
GX2SetShader(&tex_shader);
wiiu->shader->vs.program = MEM2_alloc(wiiu->shader->vs.size, GX2_SHADER_ALIGNMENT);
memcpy(wiiu->shader->vs.program, tex_shader.vs.program, wiiu->shader->vs.size);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, wiiu->shader->vs.program, wiiu->shader->vs.size);
wiiu->shader->vs.attribVars = MEM2_alloc(wiiu->shader->vs.attribVarCount * sizeof(GX2AttribVar),
GX2_SHADER_ALIGNMENT);
memcpy(wiiu->shader->vs.attribVars, tex_shader.vs.attribVars ,
wiiu->shader->vs.attribVarCount * sizeof(GX2AttribVar));
wiiu->ubo_vp = MEM1_alloc(sizeof(*wiiu->ubo_vp), GX2_UNIFORM_BLOCK_ALIGNMENT);
wiiu->ubo_vp->width = wiiu->color_buffer.surface.width;
wiiu->ubo_vp->height = wiiu->color_buffer.surface.height;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, wiiu->ubo_vp, sizeof(*wiiu->ubo_vp));
wiiu->shader->ps.program = MEM2_alloc(wiiu->shader->ps.size, GX2_SHADER_ALIGNMENT);
memcpy(wiiu->shader->ps.program, tex_shader.ps.program, wiiu->shader->ps.size);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, wiiu->shader->ps.program, wiiu->shader->ps.size);
wiiu->shader->ps.samplerVars = MEM2_alloc(wiiu->shader->ps.samplerVarCount * sizeof(GX2SamplerVar),
GX2_SHADER_ALIGNMENT);
memcpy(wiiu->shader->ps.samplerVars, tex_shader.ps.samplerVars,
wiiu->shader->ps.samplerVarCount * sizeof(GX2SamplerVar));
wiiu->ubo_tex = MEM1_alloc(sizeof(*wiiu->ubo_tex), GX2_UNIFORM_BLOCK_ALIGNMENT);
wiiu->ubo_tex->width = 1.0;
wiiu->ubo_tex->height = 1.0;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, wiiu->ubo_tex, sizeof(*wiiu->ubo_tex));
#endif
wiiu->shader->fs.size = GX2CalcFetchShaderSizeEx(sizeof(wiiu->shader->attribute_stream) / sizeof(GX2AttribStream),
GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE);
wiiu->shader->fs.program = MEM2_alloc(wiiu->shader->fs.size, GX2_SHADER_ALIGNMENT);
GX2InitFetchShaderEx(&wiiu->shader->fs, (uint8_t*)wiiu->shader->fs.program,
sizeof(wiiu->shader->attribute_stream) / sizeof(GX2AttribStream),
(GX2AttribStream*)&wiiu->shader->attribute_stream,
GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, wiiu->shader->fs.program, wiiu->shader->fs.size);
GX2SetVertexShader(&wiiu->shader->vs);
GX2SetPixelShader(&wiiu->shader->ps);
GX2SetFetchShader(&wiiu->shader->fs);
wiiu->v = MEM2_alloc(4 * sizeof(*wiiu->v), GX2_VERTEX_BUFFER_ALIGNMENT);
wiiu_set_position(wiiu->v, &wiiu->color_buffer, 0, 0,
wiiu->color_buffer.surface.width, wiiu->color_buffer.surface.height);
wiiu_set_tex_coords(wiiu->v, &wiiu->texture, 0, 0,
wiiu->texture.surface.width, wiiu->texture.surface.height, wiiu->rotation);
wiiu->v[0].color = 0xFFFFFFFF;
wiiu->v[1].color = 0xFFFFFFFF;
wiiu->v[2].color = 0xFFFFFFFF;
wiiu->v[3].color = 0xFFFFFFFF;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->v, 4 * sizeof(*wiiu->v));
GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->v), sizeof(*wiiu->v), wiiu->v);
wiiu->menu.v = MEM2_alloc(4 * sizeof(*wiiu->menu.v), GX2_VERTEX_BUFFER_ALIGNMENT);
wiiu_set_position(wiiu->menu.v, &wiiu->color_buffer, 0, 0,
wiiu->color_buffer.surface.width, wiiu->color_buffer.surface.height);
wiiu_set_tex_coords(wiiu->menu.v, &wiiu->menu.texture, 0, 0,
wiiu->menu.texture.surface.width, wiiu->menu.texture.surface.height, 0);
wiiu->menu.v[0].color = 0xFFFFFF80;
wiiu->menu.v[1].color = 0xFFFFFF80;
wiiu->menu.v[2].color = 0xFFFFFF80;
wiiu->menu.v[3].color = 0xFFFFFF80;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->menu.v, 4 * sizeof(*wiiu->menu.v));
wiiu->input_ring_buffer_size = GX2CalcGeometryShaderInputRingBufferSize(sprite_shader.vs.ringItemSize);
wiiu->output_ring_buffer_size = GX2CalcGeometryShaderOutputRingBufferSize(sprite_shader.gs.ringItemSize);
wiiu->input_ring_buffer = MEM1_alloc(wiiu->input_ring_buffer_size, 0x1000);
wiiu->output_ring_buffer = MEM1_alloc(wiiu->output_ring_buffer_size, 0x1000);
/* Initialize frame texture */
memset(&wiiu->texture, 0, sizeof(GX2Texture));
@ -389,6 +345,28 @@ static void* wiiu_gfx_init(const video_info_t* video,
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->menu.texture.surface.image,
wiiu->menu.texture.surface.imageSize);
wiiu->v = MEM2_alloc(4 * sizeof(*wiiu->v), GX2_VERTEX_BUFFER_ALIGNMENT);
wiiu_set_position(wiiu->v, &wiiu->color_buffer, 0, 0,
wiiu->color_buffer.surface.width, wiiu->color_buffer.surface.height);
wiiu_set_tex_coords(wiiu->v, &wiiu->texture, 0, 0,
wiiu->texture.surface.width, wiiu->texture.surface.height, wiiu->rotation);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->v, 4 * sizeof(*wiiu->v));
GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->v), sizeof(*wiiu->v), wiiu->v);
wiiu->menu.v = MEM2_alloc(4 * sizeof(*wiiu->menu.v), GX2_VERTEX_BUFFER_ALIGNMENT);
wiiu->menu.v->pos.x = 0.0f;
wiiu->menu.v->pos.y = 0.0f;
wiiu->menu.v->pos.width = wiiu->color_buffer.surface.width;
wiiu->menu.v->pos.height = wiiu->color_buffer.surface.height;
wiiu->menu.v->coord.u = 0.0f;
wiiu->menu.v->coord.v = 0.0f;
wiiu->menu.v->coord.width = 1.0f;
wiiu->menu.v->coord.height = 1.0f;
wiiu->menu.v->color = 0xFFFFFF80;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->menu.v, 4 * sizeof(*wiiu->menu.v));
wiiu->vertex_cache.size = 0x1000;
wiiu->vertex_cache.current = 0;
wiiu->vertex_cache.v = MEM2_alloc(wiiu->vertex_cache.size
@ -399,8 +377,8 @@ static void* wiiu_gfx_init(const video_info_t* video,
GX2InitSampler(&wiiu->sampler_linear, GX2_TEX_CLAMP_MODE_CLAMP, GX2_TEX_XY_FILTER_MODE_LINEAR);
/* set Texture and Sampler */
GX2SetPixelTexture(&wiiu->texture, wiiu->shader->sampler.location);
GX2SetPixelSampler(&wiiu->sampler_linear, wiiu->shader->sampler.location);
GX2SetPixelTexture(&wiiu->texture, tex_shader.ps.samplerVars[0].location);
GX2SetPixelSampler(&wiiu->sampler_linear, tex_shader.ps.samplerVars[0].location);
/* clear leftover image */
GX2ClearColor(&wiiu->color_buffer, 0.0f, 0.0f, 0.0f, 1.0f);
@ -450,15 +428,11 @@ static void gx2_overlay_tex_geom(void *data, unsigned image,
if (!o)
return;
o->v[0].coord.u = x;
o->v[0].coord.v = y;
o->v[1].coord.u = x + w;
o->v[1].coord.v = y;
o->v[2].coord.u = x + w;
o->v[2].coord.v = y + h;
o->v[3].coord.u = x ;
o->v[3].coord.v = y + h;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, o->v, sizeof(o->v));
o->v.coord.u = x;
o->v.coord.v = y;
o->v.coord.width = w;
o->v.coord.height = h;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, &o->v, sizeof(o->v));
}
static void gx2_overlay_vertex_geom(void *data, unsigned image,
@ -467,15 +441,6 @@ static void gx2_overlay_vertex_geom(void *data, unsigned image,
wiiu_video_t *gx2 = (wiiu_video_t*)data;
struct gx2_overlay_data *o = NULL;
/* Flipped, so we preserve top-down semantics. */
y = 1.0f - y;
h = -h;
/* expand from 0 - 1 to -1 - 1 */
x = (x * 2.0f) - 1.0f;
y = (y * 2.0f) - 1.0f;
w = (w * 2.0f);
h = (h * 2.0f);
if (gx2)
o = (struct gx2_overlay_data*)&gx2->overlay[image];
@ -483,19 +448,12 @@ static void gx2_overlay_vertex_geom(void *data, unsigned image,
if (!o)
return;
o->v[0].pos.x = x;
o->v[0].pos.y = y;
o->v.pos.x = x * gx2->color_buffer.surface.width;
o->v.pos.y = y * gx2->color_buffer.surface.height;
o->v.pos.width = w * gx2->color_buffer.surface.width;
o->v.pos.height = h * gx2->color_buffer.surface.height;
o->v[1].pos.x = x + w;
o->v[1].pos.y = y;
o->v[2].pos.x = x + w;
o->v[2].pos.y = y + h;
o->v[3].pos.x = x ;
o->v[3].pos.y = y + h;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, o->v,sizeof(o->v));
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, &o->v,sizeof(o->v));
}
static void gx2_free_overlay(wiiu_video_t *gx2)
@ -555,13 +513,9 @@ static bool gx2_overlay_load(void *data,
gx2_overlay_tex_geom(gx2, i, 0, 0, 1, 1);
gx2_overlay_vertex_geom(gx2, i, 0, 0, 1, 1);
gx2->overlay[i].alpha_mod = 1.0f;
gx2->overlay[i].v[0].color = 0xFFFFFFFF;
gx2->overlay[i].v[1].color = 0xFFFFFFFF;
gx2->overlay[i].v[2].color = 0xFFFFFFFF;
gx2->overlay[i].v[3].color = 0xFFFFFFFF;
gx2->overlay[i].v.color = 0xFFFFFFFF;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, o->v,sizeof(o->v));
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, &o->v,sizeof(o->v));
}
@ -588,11 +542,8 @@ static void gx2_overlay_set_alpha(void *data, unsigned image, float mod)
if (gx2)
{
gx2->overlay[image].alpha_mod = mod;
gx2->overlay[image].v[0].color = COLOR_RGBA(0xFF, 0xFF, 0xFF, 0xFF * gx2->overlay[image].alpha_mod);
gx2->overlay[image].v[1].color = gx2->overlay[image].v[0].color;
gx2->overlay[image].v[2].color = gx2->overlay[image].v[0].color;
gx2->overlay[image].v[3].color = gx2->overlay[image].v[0].color;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, gx2->overlay[image].v, sizeof(gx2->overlay[image].v));
gx2->overlay[image].v.color = COLOR_RGBA(0xFF, 0xFF, 0xFF, 0xFF * gx2->overlay[image].alpha_mod);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, &gx2->overlay[image].v, sizeof(gx2->overlay[image].v));
}
}
@ -604,12 +555,12 @@ static void gx2_render_overlay(void *data)
for (i = 0; i < gx2->overlays; i++){
GX2SetAttribBuffer(0, sizeof(gx2->overlay[i].v), sizeof(*gx2->overlay[i].v), gx2->overlay[i].v);
GX2SetAttribBuffer(0, sizeof(gx2->overlay[i].v), sizeof(gx2->overlay[i].v), &gx2->overlay[i].v);
GX2SetPixelTexture(&gx2->overlay[i].tex, gx2->shader->sampler.location);
GX2SetPixelSampler(&gx2->sampler_linear, gx2->shader->sampler.location);
GX2SetPixelTexture(&gx2->overlay[i].tex, sprite_shader.ps.samplerVars[0].location);
GX2SetPixelSampler(&gx2->sampler_linear, sprite_shader.ps.samplerVars[0].location);
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1);
GX2DrawEx(GX2_PRIMITIVE_MODE_POINTS, 1, 0, 1);
}
@ -657,31 +608,26 @@ static void wiiu_gfx_free(void* data)
GX2SetTVEnable(GX2_DISABLE);
GX2SetDRCEnable(GX2_DISABLE);
GX2DestroyShader(&tex_shader);
GX2DestroyShader(&sprite_shader);
MEM2_free(wiiu->ctx_state);
MEM2_free(wiiu->cmd_buffer);
MEM2_free(wiiu->texture.surface.image);
MEM2_free(wiiu->menu.texture.surface.image);
MEM2_free(wiiu->v);
MEM2_free(wiiu->menu.v);
MEM2_free(wiiu->vertex_cache.v);
MEM1_free(wiiu->color_buffer.surface.image);
MEM1_free(wiiu->ubo_vp);
MEM1_free(wiiu->ubo_tex);
MEM1_free(wiiu->input_ring_buffer);
MEM1_free(wiiu->output_ring_buffer);
MEMBucket_free(wiiu->tv_scan_buffer);
MEMBucket_free(wiiu->drc_scan_buffer);
MEM2_free(wiiu->shader->fs.program);
#ifndef GX2_CAN_ACCESS_DATA_SECTION
MEM2_free(wiiu->shader->vs.program);
MEM2_free(wiiu->shader->vs.attribVars);
MEM2_free(wiiu->shader->ps.program);
MEM2_free(wiiu->shader->ps.samplerVars);
MEM2_free(wiiu->shader);
#endif
MEM2_free(wiiu->v);
MEM2_free(wiiu->menu.v);
free(wiiu);
}
@ -794,17 +740,27 @@ static bool wiiu_gfx_frame(void* data, const void* frame,
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->texture.surface.image,
wiiu->texture.surface.imageSize);
wiiu_set_tex_coords(wiiu->v, &wiiu->texture, 0, 0, width, height, wiiu->rotation);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->v, 4 * sizeof(*wiiu->v));
}
GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->v), sizeof(*wiiu->v), wiiu->v);
GX2SetShaderMode(GX2_SHADER_MODE_UNIFORM_REGISTER);
GX2SetShader(&tex_shader);
GX2SetPixelTexture(&wiiu->texture, wiiu->shader->sampler.location);
GX2SetPixelSampler(wiiu->smooth? &wiiu->sampler_linear : &wiiu->sampler_nearest,
wiiu->shader->sampler.location);
GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->v), sizeof(*wiiu->v), wiiu->v);
GX2SetPixelTexture(&wiiu->texture, tex_shader.ps.samplerVars[0].location);
GX2SetPixelSampler(wiiu->smooth? &wiiu->sampler_linear : &wiiu->sampler_nearest, tex_shader.ps.samplerVars[0].location);
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1);
GX2SetShaderMode(GX2_SHADER_MODE_GEOMETRY_SHADER);
GX2SetShader(&sprite_shader);
GX2SetGeometryShaderInputRingBuffer(wiiu->input_ring_buffer, wiiu->input_ring_buffer_size);
GX2SetGeometryShaderOutputRingBuffer(wiiu->output_ring_buffer, wiiu->output_ring_buffer_size);
GX2SetVertexUniformBlock(sprite_shader.vs.uniformBlocks[0].offset, sprite_shader.vs.uniformBlocks[0].size, wiiu->ubo_vp);
GX2SetVertexUniformBlock(sprite_shader.vs.uniformBlocks[1].offset, sprite_shader.vs.uniformBlocks[1].size, wiiu->ubo_tex);
#ifdef HAVE_OVERLAY
if (wiiu->overlay_enable)
gx2_render_overlay(wiiu);
@ -814,16 +770,16 @@ static bool wiiu_gfx_frame(void* data, const void* frame,
{
GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->menu.v), sizeof(*wiiu->menu.v), wiiu->menu.v);
GX2SetPixelTexture(&wiiu->menu.texture, wiiu->shader->sampler.location);
GX2SetPixelSampler(&wiiu->sampler_linear, wiiu->shader->sampler.location);
GX2SetPixelTexture(&wiiu->menu.texture, sprite_shader.ps.samplerVars[0].location);
GX2SetPixelSampler(&wiiu->sampler_linear, sprite_shader.ps.samplerVars[0].location);
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1);
GX2DrawEx(GX2_PRIMITIVE_MODE_POINTS, 1, 0, 1);
}
wiiu->vertex_cache.current = 0;
GX2SetAttribBuffer(0, wiiu->vertex_cache.size * sizeof(*wiiu->vertex_cache.v),
sizeof(*wiiu->vertex_cache.v), wiiu->vertex_cache.v);
GX2SetPixelSampler(&wiiu->sampler_linear, wiiu->shader->sampler.location);
GX2SetPixelSampler(&wiiu->sampler_linear, sprite_shader.ps.samplerVars[0].location);
wiiu->render_msg_enabled = true;
@ -837,7 +793,6 @@ static bool wiiu_gfx_frame(void* data, const void* frame,
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER,
wiiu->vertex_cache.v, wiiu->vertex_cache.current * sizeof(*wiiu->vertex_cache.v));
if (wiiu->menu.enable)
GX2DrawDone();
@ -895,7 +850,10 @@ static void wiiu_gfx_set_rotation(void* data,
{
wiiu_video_t* wiiu = (wiiu_video_t*) data;
if(wiiu)
{
wiiu->rotation = rotation;
wiiu->should_resize = true;
}
}
static void wiiu_gfx_viewport_info(void* data,
@ -1010,7 +968,16 @@ static void wiiu_gfx_set_texture_frame(void* data, const void* frame, bool rgb32
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->menu.texture.surface.image,
wiiu->menu.texture.surface.imageSize);
wiiu_set_tex_coords(wiiu->menu.v, &wiiu->menu.texture, 0, 0, width, height, 0);
wiiu->menu.v->pos.x = 0.0f;
wiiu->menu.v->pos.y = 0.0f;
wiiu->menu.v->pos.width = width;
wiiu->menu.v->pos.height = height;
wiiu->menu.v->coord.u = 0.0f;
wiiu->menu.v->coord.v = 0.0f;
wiiu->menu.v->coord.width = (float)width / wiiu->texture.surface.width;
wiiu->menu.v->coord.height = (float)height / wiiu->texture.surface.height;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->menu.v, 4 * sizeof(*wiiu->menu.v));
}
static void wiiu_gfx_set_texture_enable(void* data, bool state, bool full_screen)

View File

@ -32,6 +32,7 @@
typedef struct
{
GX2Texture texture;
GX2_vec2* ubo_tex;
const font_renderer_driver_t* font_driver;
void* font_data;
struct font_atlas* atlas;
@ -79,6 +80,13 @@ static void* wiiu_font_init_font(void* data, const char* font_path,
font->atlas->dirty = false;
font->ubo_tex = MEM1_alloc(sizeof(*font->ubo_tex), GX2_UNIFORM_BLOCK_ALIGNMENT);
font->ubo_tex->width = font->texture.surface.width;
font->ubo_tex->height = font->texture.surface.height;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, font->ubo_tex,
sizeof(*font->ubo_tex));
return font;
}
@ -93,6 +101,7 @@ static void wiiu_font_free_font(void* data, bool is_threaded)
font->font_driver->free(font->font_data);
MEM1_free(font->texture.surface.image);
MEM1_free(font->ubo_tex);
free(font);
}
@ -142,9 +151,7 @@ static void wiiu_font_render_line(
unsigned width = video_info->width;
unsigned height = video_info->height;
int x = roundf(pos_x * width);
int y = roundf((1.0f - pos_y) * height);
int delta_x = 0;
int delta_y = 0;
int y = roundf((1.0 - pos_y) * height);
if(wiiu->vertex_cache.current + (msg_len * 4) > wiiu->vertex_cache.size)
return;
@ -160,11 +167,10 @@ static void wiiu_font_render_line(
break;
}
tex_shader_vertex_t* v = wiiu->vertex_cache.v + wiiu->vertex_cache.current;
sprite_vertex_t* v = wiiu->vertex_cache.v + wiiu->vertex_cache.current;
for (i = 0; i < msg_len; i++)
{
int off_x, off_y, tex_x, tex_y, width, height;
const char* msg_tmp = &msg[i];
unsigned code = utf8_walk(&msg_tmp);
unsigned skip = msg_tmp - &msg[i];
@ -181,50 +187,22 @@ static void wiiu_font_render_line(
if (!glyph)
continue;
off_x = glyph->draw_offset_x;
off_y = glyph->draw_offset_y;
tex_x = glyph->atlas_offset_x;
tex_y = glyph->atlas_offset_y;
width = glyph->width;
height = glyph->height;
v->pos.x = x + glyph->draw_offset_x * scale;
v->pos.y = y + glyph->draw_offset_y * scale;
v->pos.width = glyph->width * scale;
v->pos.height = glyph->height * scale;
v->coord.u = glyph->atlas_offset_x;
v->coord.v = glyph->atlas_offset_y;
v->coord.width = glyph->width;
v->coord.height = glyph->height;
float x0 = x + off_x + delta_x * scale;
float y0 = y + off_y + delta_y * scale + height * scale;
float u0 = tex_x;
float v0 = tex_y;
float x1 = x0 + width * scale;
float y1 = y0 - height * scale;
float u1 = u0 + width;
float v1 = v0 + height;
v->color = color;
v[0].pos.x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f;
v[0].pos.y = (-2.0f * y0 / wiiu->color_buffer.surface.height) + 1.0f;
v[1].pos.x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;;
v[1].pos.y = (-2.0f * y0 / wiiu->color_buffer.surface.height) + 1.0f;
v[2].pos.x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;;
v[2].pos.y = (-2.0f * y1 / wiiu->color_buffer.surface.height) + 1.0f;
v[3].pos.x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f;;
v[3].pos.y = (-2.0f * y1 / wiiu->color_buffer.surface.height) + 1.0f;
v++;
v[0].coord.u = u0 / font->texture.surface.width;
v[0].coord.v = v1 / font->texture.surface.height;
v[1].coord.u = u1 / font->texture.surface.width;
v[1].coord.v = v1 / font->texture.surface.height;
v[2].coord.u = u1 / font->texture.surface.width;
v[2].coord.v = v0 / font->texture.surface.height;
v[3].coord.u = u0 / font->texture.surface.width;
v[3].coord.v = v0 / font->texture.surface.height;
v[0].color = color;
v[1].color = color;
v[2].color = color;
v[3].color = color;
v += 4;
delta_x += glyph->advance_x;
delta_y += glyph->advance_y;
x += glyph->advance_x * scale;
y += glyph->advance_y * scale;
}
int count = v - wiiu->vertex_cache.v - wiiu->vertex_cache.current;
@ -247,14 +225,12 @@ static void wiiu_font_render_line(
}
#if 0
printf("%s\n", msg);
DEBUG_VAR(color);
#endif
GX2SetPixelTexture(&font->texture, sprite_shader.ps.samplerVars[0].location);
GX2SetVertexUniformBlock(sprite_shader.vs.uniformBlocks[1].offset, sprite_shader.vs.uniformBlocks[1].size, font->ubo_tex);
GX2SetPixelTexture(&font->texture, wiiu->shader->sampler.location);
GX2DrawEx(GX2_PRIMITIVE_MODE_POINTS, count, wiiu->vertex_cache.current, 1);
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, count, wiiu->vertex_cache.current, 1);
GX2SetVertexUniformBlock(sprite_shader.vs.uniformBlocks[1].offset, sprite_shader.vs.uniformBlocks[1].size, wiiu->ubo_tex);
wiiu->vertex_cache.current = v - wiiu->vertex_cache.v;
}

View File

@ -77,68 +77,48 @@ static void menu_display_wiiu_draw(void *data)
if (wiiu->vertex_cache.current + 4 > wiiu->vertex_cache.size)
return;
tex_shader_vertex_t* v = wiiu->vertex_cache.v + wiiu->vertex_cache.current;
float x0 = draw->x;
float y0 = draw->y;
float x1 = x0 + draw->width;
float y1 = y0 + draw->height;
sprite_vertex_t* v = wiiu->vertex_cache.v + wiiu->vertex_cache.current;
if(draw->coords->vertex && draw->coords->vertices == 4)
{
for(int i = 0; i < 4; i++)
{
v[i].pos.x = draw->coords->vertex[i << 1] * 2.0f - 1.0f;
v[i].pos.y = draw->coords->vertex[(i << 1) + 1] * 2.0f - 1.0f;
}
v->pos.x = MIN(MIN(MIN(draw->coords->vertex[0], draw->coords->vertex[2]), draw->coords->vertex[4]), draw->coords->vertex[6]);
v->pos.y = 1.0 - MAX(MAX(MAX(draw->coords->vertex[1], draw->coords->vertex[3]), draw->coords->vertex[5]), draw->coords->vertex[7]);
v->pos.width = MAX(MAX(MAX(draw->coords->vertex[0], draw->coords->vertex[2]), draw->coords->vertex[4]), draw->coords->vertex[6]) - v->pos.x;
v->pos.height = 1.0 - MIN(MIN(MIN(draw->coords->vertex[1], draw->coords->vertex[3]), draw->coords->vertex[5]), draw->coords->vertex[7]) - v->pos.y;
v->pos.x *= wiiu->color_buffer.surface.width;
v->pos.y *= wiiu->color_buffer.surface.height;
v->pos.width *= wiiu->color_buffer.surface.width;
v->pos.height *= wiiu->color_buffer.surface.height;
}
else
{
v[0].pos.x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f;
v[0].pos.y = (2.0f * y0 / wiiu->color_buffer.surface.height) - 1.0f;
v[1].pos.x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;;
v[1].pos.y = (2.0f * y0 / wiiu->color_buffer.surface.height) - 1.0f;
v[2].pos.x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;;
v[2].pos.y = (2.0f * y1 / wiiu->color_buffer.surface.height) - 1.0f;
v[3].pos.x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f;;
v[3].pos.y = (2.0f * y1 / wiiu->color_buffer.surface.height) - 1.0f;
v->pos.x = draw->x;
v->pos.y = wiiu->color_buffer.surface.height - draw->y - draw->height;
v->pos.width = draw->width;
v->pos.height = draw->height;
}
if(draw->coords->tex_coord && draw->coords->vertices == 4)
{
for(int i = 0; i < 4; i++)
{
v[i].coord.u = draw->coords->tex_coord[i << 1];
v[i].coord.v = draw->coords->tex_coord[(i << 1) + 1];
}
v->coord.u = MIN(MIN(MIN(draw->coords->tex_coord[0], draw->coords->tex_coord[2]), draw->coords->tex_coord[4]), draw->coords->tex_coord[6]);
v->coord.v = MIN(MIN(MIN(draw->coords->tex_coord[1], draw->coords->tex_coord[3]), draw->coords->tex_coord[5]), draw->coords->tex_coord[7]);
v->coord.width = MAX(MAX(MAX(draw->coords->tex_coord[0], draw->coords->tex_coord[2]), draw->coords->tex_coord[4]), draw->coords->tex_coord[6]) - v->coord.u;
v->coord.height = MAX(MAX(MAX(draw->coords->tex_coord[1], draw->coords->tex_coord[3]), draw->coords->tex_coord[5]), draw->coords->tex_coord[7]) - v->coord.v;
}
else
{
v[0].coord.u = 0.0f;
v[0].coord.v = 1.0f;
v[1].coord.u = 1.0f;
v[1].coord.v = 1.0f;
v[2].coord.u = 1.0f;
v[2].coord.v = 0.0f;
v[3].coord.u = 0.0f;
v[3].coord.v = 0.0f;
v->coord.u = 0.0f;
v->coord.v = 0.0f;
v->coord.width = 1.0f;
v->coord.height = 1.0f;
}
v[0].color = COLOR_RGBA(0xFF * draw->coords->color[0], 0xFF * draw->coords->color[1],
v->color = COLOR_RGBA(0xFF * draw->coords->color[0], 0xFF * draw->coords->color[1],
0xFF * draw->coords->color[2], 0xFF * draw->coords->color[3]);
v[1].color = v[0].color;
v[2].color = v[0].color;
v[3].color = v[0].color;
// printf("color : %f, %f, %f, %f --> 0x%08X\n", draw->coords->color[0], draw->coords->color[1], draw->coords->color[2], draw->coords->color[3], col[0]);
GX2SetPixelTexture(texture, tex_shader.ps.samplerVars[0].location);
GX2SetPixelTexture(texture, wiiu->shader->sampler.location);
if(draw->coords->vertex && draw->coords->vertices == 4)
GX2DrawEx(GX2_PRIMITIVE_MODE_TRIANGLE_STRIP, 4, wiiu->vertex_cache.current, 1);
else
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, wiiu->vertex_cache.current, 1);
GX2DrawEx(GX2_PRIMITIVE_MODE_POINTS, 1, wiiu->vertex_cache.current, 1);
#if 0
printf("(%i,%i,%i,%i) , (%i,%i)\n", (int)draw->x,
@ -146,7 +126,7 @@ static void menu_display_wiiu_draw(void *data)
texture->surface.width, texture->surface.height);
#endif
wiiu->vertex_cache.current += 4;
wiiu->vertex_cache.current ++;
}

View File

@ -17,19 +17,21 @@
#define GX2_SHADER_INL_H
#ifdef MSB_FIRST
#define to_LE(x) __builtin_bswap32(x)
#define to_QWORD(w0, w1) (((u64)(w0) << 32ull) | (w1))
#define to_LE(x) (__builtin_bswap32(x))
#else
#define to_LE(x) x
#define to_QWORD(w0, w1) (((u64)(w1) << 32ull) | (w0))
#define to_LE(x) (x)
#endif
/* CF */
#define CF_WORD0(addr) to_LE(addr)
#define CF_DWORD0(addr) to_LE(addr)
#define CF_WORD1(popCount, cfConst, cond, count, callCount, inst) \
#define CF_DWORD1(popCount, cfConst, cond, count, callCount, inst) \
to_LE(popCount | (cfConst << 3) | (cond << 8) | (count << 10) | (callCount << 13) | (inst << 23) | (1 << 31))
#define CF_ALU_WORD0(addr, kcacheBank0, kcacheBank1, kcacheMode0) \
to_LE(addr | (kcacheBank0 << 16) | (kcacheBank1 << 20) | (kcacheMode0 << 22))
to_LE(addr | (kcacheBank0 << 22) | (kcacheBank1 << 26) | (kcacheMode0 << 30))
#define CF_ALU_WORD1(kcacheMode1, kcacheAddr0, kcacheAddr1, count, altConst, inst) \
to_LE(kcacheMode1 | (kcacheAddr0 << 2) | (kcacheAddr1 << 10) | (count << 18) | (altConst << 25) | (inst << 26) | (1 << 31))
@ -39,26 +41,49 @@
#define CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, validPixelMode, inst) \
to_LE(srcSelX | (srcSelY << 3) | (srcSelZ << 6) | (srcSelW << 9) | (validPixelMode << 22) | (inst << 23) | (1 << 31))
#define NO_BARRIER & to_LE(~(1 << 31))
#define END_OF_PROGRAM | to_LE(1 << 21)
#define VALID_PIX | to_LE(1 << 22)
#define WHOLE_QUAD_MODE | to_LE(1 << 30)
#define CF_ALLOC_EXPORT_WORD0(arrayBase, type, dstReg, dstRel, indexGpr, elemSize) \
to_LE(arrayBase | (type << 13) | (dstReg << 15) | (dstRel << 22) | (indexGpr << 23) | (elemSize << 30))
#define ALU_LAST to_LE(1 << 31) |
#define CF_ALLOC_EXPORT_WORD1_BUF(arraySize, writeMask, inst) \
to_LE(arraySize | (writeMask << 12) | (inst << 23) | (1 << 31))
#define ALU_SRC_KCACHE0_BASE 0x80
#define ALU_SRC_KCACHE1_BASE 0xA0
#define CF_KCACHE_BANK_LOCK_1 0x1
#define CB1 0x1
#define CB2 0x2
#define _0_15 CF_KCACHE_BANK_LOCK_1
#define KC0(x) (x + ALU_SRC_KCACHE0_BASE)
#define KC1(x) (x + ALU_SRC_KCACHE1_BASE)
#define NO_BARRIER & ~to_QWORD(0,to_LE(1 << 31))
#define END_OF_PROGRAM | to_QWORD(0,to_LE(1 << 21))
#define VALID_PIX | to_QWORD(0,to_LE(1 << 22))
#define WHOLE_QUAD_MODE | to_QWORD(0,to_LE(1 << 30))
#define BURSTCNT(x) | to_QWORD(0,to_LE(x << 17))
#define WRITE(x) (x >> 2)
#define ARRAY_SIZE(x) x
#define ELEM_SIZE(x) x
#define KCACHE0(bank, mode) | to_QWORD(CF_ALU_WORD0(0, bank, 0, mode), 0)
#define KCACHE1(bank, mode) | to_QWORD(CF_ALU_WORD0(0, 0, bank, 0), CF_ALU_WORD1(mode,0, 0, 0, 0, 0))
#define ALU_LAST | to_QWORD(to_LE(1ull << 31), 0)
/* ALU */
#define ALU_WORD0(src0Sel, src0Rel, src0Chan, src0Neg, src1Sel, src1Rel, src1Chan, src1Neg, indexMode, predSel) \
to_LE(src0Sel | (src0Rel << 9) | (src0Chan << 10) | (src0Neg << 12) | (src1Sel << 13) | (src1Rel << 22) \
| (src1Chan << 23) | (src1Neg << 25) | (indexMode << 26) | (predSel << 29))
to_LE(src0Sel | ((src0Rel) << 9) | ((src0Chan) << 10) | ((src0Neg) << 12) | ((src1Sel) << 13) | ((src1Rel) << 22) \
| ((src1Chan) << 23) | ((src1Neg) << 25) | ((indexMode) << 26) | ((predSel) << 29))
#define ALU_WORD1_OP2(src0Abs, src1Abs, updateExecuteMask, updatePred, writeMask, omod, inst, encoding, bankSwizzle, dstGpr, dstRel, dstChan, clamp) \
to_LE(src0Abs | (src1Abs << 1) | (updateExecuteMask << 2) | (updatePred << 3) | (writeMask << 4) | (omod << 5) | (inst << 7) | \
(encoding << 15) | (bankSwizzle << 18) | (dstGpr << 21) | (dstRel << 28) | (dstChan << 29) | (clamp << 31))
(encoding << 15) | (bankSwizzle << 18) | ((dstGpr&0x7F) << 21) | (dstRel << 28) | ((dstChan&0x3) << 29) | (clamp << 31))
#define ALU_WORD1_OP3(src2Sel, src2Rel, src2Chan, src2Neg, inst, encoding, bankSwizzle, dstGpr, dstRel, dstChan, clamp) \
to_LE(src2Sel | (src2Rel << 9) | (src2Chan << 10) | (src2Neg << 12) | (inst << 13) | \
(encoding << 15) | (bankSwizzle << 18) | (dstGpr << 21) | (dstRel << 28) | (dstChan << 29) | (clamp << 31)
(encoding << 15) | (bankSwizzle << 18) | ((dstGpr&0x7F) << 21) | (dstRel << 28) | ((dstChan&0x3) << 29) | (clamp << 31)
/* TEX */
#define TEX_WORD0(inst, bcFracMode, fetchWholeQuad, resourceID, srcReg, srcRel, altConst) \
@ -71,17 +96,28 @@
#define TEX_WORD2(offsetX, offsetY, offsetZ, samplerID, srcSelX, srcSelY, srcSelZ, srcSelW) \
to_LE(offsetX | (offsetY << 5) | (offsetZ << 10) | (samplerID << 15) | (srcSelX << 20) | (srcSelY << 23) | (srcSelZ << 26) | (srcSelW << 29))
#define VTX_WORD0(inst, type, buffer_id, srcReg, srcSelX, mega) \
to_LE(inst | (type << 5) | (buffer_id << 8) | (srcReg << 16) | (srcSelX << 24) | (mega << 26))
#define _X 0
#define _Y 1
#define _Z 2
#define _W 3
#define VTX_WORD1(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW) \
to_LE(dstReg | (dstSelX << 9) | (dstSelY << 12) | (dstSelZ << 15) | (dstSelW << 18) | (1 << 21))
#define VTX_WORD2(offset, ismega) \
to_LE(offset| (ismega << 19))
#define _x 0
#define _y 1
#define _z 2
#define _w 3
#define _0 4
#define _1 5
#define _xyzw 0b1111
#define _xy__ 0b0011
#define GX2_COMP_SEL(c0, c1, c2, c3) (((c0) << 24) | ((c1) << 16) | ((c2) << 8) | (c3))
#define ALU_LITERAL(v) to_LE(v)
#define ALU_LITERAL(v) to_QWORD(to_LE(v), 0)
/* SRCx_SEL special constants */
#define ALU_SRC_1_DBL_L 0xF4
@ -97,6 +133,49 @@
#define ALU_SRC_PV 0xFE
#define ALU_SRC_PS 0xFF
#define _NEG | (1 << 12)
#define ALU_OMOD_OFF 0x0
#define ALU_OMOD_M2 0x1
#define ALU_OMOD_M4 0x2
#define ALU_OMOD_D2 0x3
#define ALU_VEC_012 0x0
#define ALU_VEC_021 0x1
#define ALU_VEC_120 0x2
#define ALU_VEC_102 0x3
#define ALU_VEC_201 0x4
#define ALU_VEC_210 0x5
#define VEC_012 | to_QWORD(0, to_LE(ALU_VEC_012 << 18))
#define VEC_021 | to_QWORD(0, to_LE(ALU_VEC_021 << 18))
#define VEC_120 | to_QWORD(0, to_LE(ALU_VEC_120 << 18))
#define VEC_102 | to_QWORD(0, to_LE(ALU_VEC_102 << 18))
#define VEC_201 | to_QWORD(0, to_LE(ALU_VEC_201 << 18))
#define VEC_210 | to_QWORD(0, to_LE(ALU_VEC_210 << 18))
#define VALID_PIX | to_QWORD(0,to_LE(1 << 22))
#define ALU_SCL_210 0x0
#define ALU_SCL_122 0x1
#define ALU_SCL_212 0x2
#define ALU_SCL_221 0x3
#define SCL_210 | to_QWORD(0, to_LE(ALU_SCL_210 << 18))
#define SCL_122 | to_QWORD(0, to_LE(ALU_SCL_122 << 18))
#define SCL_212 | to_QWORD(0, to_LE(ALU_SCL_212 << 18))
#define SCL_221 | to_QWORD(0, to_LE(ALU_SCL_221 << 18))
#define FETCH_TYPE(x) x
#define MINI(x) ((x) - 1)
#define MEGA(x) (MINI(x) | 0x80000000)
#define OFFSET(x) x
#define VERTEX_DATA 0
#define INSTANCE_DATA 1
#define NO_INDEX_OFFSET 2
/* CF defines */
#define CF_COND_ACTIVE 0x0
#define CF_COND_FALSE 0x1
@ -109,13 +188,18 @@
/* instructions */
/* CF */
#define CF_INST_TEX 0x01
#define CF_INST_CALL_FS 0x13
#define CF_INST_TEX 0x01
#define CF_INST_VTX 0x02
#define CF_INST_ALU 0x08
#define CF_INST_CALL_FS 0x13
#define CF_INST_EMIT_VERTEX 0x15
#define CF_INST_MEM_RING 0x26
/* ALU */
#define ALU_INST_ALU 0x8
#define OP2_INST_MUL 0x1
#define OP2_INST_MOV 0x19
#define OP2_INST_ADD 0x0
#define OP2_INST_MUL 0x1
#define OP2_INST_MUL_IEEE 0x2
#define OP2_INST_MOV 0x19
#define OP2_INST_RECIP_IEEE 0x66
/* EXP */
#define CF_INST_EXP 0x27
#define CF_INST_EXP_DONE 0x28
@ -123,6 +207,9 @@
/* TEX */
#define TEX_INST_SAMPLE 0x10
/* VTX */
#define VTX_INST_FETCH 0x0
/* EXPORT_TYPE */
#define EXPORT_TYPE_PIXEL 0x0
#define EXPORT_TYPE_POS 0x1
@ -142,11 +229,34 @@
#define PIX0 PIX(0)
/* registers */
#define __ (0x80) /* invalid regitser (write mask off) */
#define _R(x) x
#define _R0 _R(0x0)
#define _R1 _R(0x1)
#define _R2 _R(0x2)
#define _R3 _R(0x3)
#define _R4 _R(0x4)
#define _R5 _R(0x5)
#define _R6 _R(0x6)
#define _R7 _R(0x7)
#define _R8 _R(0x8)
#define _R9 _R(0x9)
#define _R10 _R(0xA)
#define _R11 _R(0xB)
#define _R12 _R(0xC)
#define _R13 _R(0xD)
#define _R14 _R(0xE)
#define _R15 _R(0xF)
#define _R120 _R(0x78)
#define _R121 _R(0x79)
#define _R122 _R(0x7A)
#define _R123 _R(0x7B)
#define _R124 _R(0x7C)
#define _R125 _R(0x7D)
#define _R126 _R(0x7E)
#define _R127 _R(0x7F)
/* texture */
#define _t(x) x
@ -156,28 +266,61 @@
#define _s(x) x
#define _s0 _s(0x0)
#define CALL_FS CF_WORD0(0), CF_WORD1(0,0,0,0,0,CF_INST_CALL_FS)
#define _b(x) x
#define TEX(addr, cnt) CF_WORD0(addr), CF_WORD1(0x0, 0x0, CF_COND_ACTIVE, 0x0, (cnt - 1), CF_INST_TEX)
#define ALU(addr, cnt) CF_ALU_WORD0(addr, 0x0, 0x0, 0x0), CF_ALU_WORD1(0x0, 0x0, 0x0, (cnt - 1), 0x0, ALU_INST_ALU)
#define CALL_FS to_QWORD(CF_DWORD0(0), CF_DWORD1(0,0,0,0,0,CF_INST_CALL_FS))
#define EXP_DONE(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \
CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP_DONE)
#define TEX(addr, cnt) to_QWORD(CF_DWORD0(addr), CF_DWORD1(0x0, 0x0, CF_COND_ACTIVE, (cnt - 1), 0x0, CF_INST_TEX))
#define VTX(addr, cnt) to_QWORD(CF_DWORD0(addr), CF_DWORD1(0x0, 0x0, CF_COND_ACTIVE, (cnt - 1), 0x0, CF_INST_VTX))
#define EXP(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \
CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP)
#define ALU(addr, cnt) to_QWORD(CF_ALU_WORD0(addr, 0x0, 0x0, 0x0), CF_ALU_WORD1(0x0, 0x0, 0x0, (cnt - 1), 0x0, CF_INST_ALU))
#define ALU_MOV(dstGpr, dstChan, src0Sel, src0Chan) ALU_WORD0(src0Sel, 0x0, src0Chan, 0x0, ALU_SRC_0, 0x0, 0x0, 0x0, 0x0, 0x0), \
ALU_WORD1_OP2(0x0, 0x0, 0x0, 0x0, 0x1, 0x0, OP2_INST_MOV, 0x0, 0x0, dstGpr, 0x0, dstChan, 0x0)
#define EXP_DONE(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) to_QWORD(CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \
CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP_DONE))
#define ALU_MUL(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) ALU_WORD0(src0Sel, 0x0, src0Chan, 0x0, src1Sel, 0x0, src1Chan, 0x0, 0x0, 0x0), \
ALU_WORD1_OP2(0x0, 0x0, 0x0, 0x0, 0x1, 0x0, OP2_INST_MUL, 0x0, 0x0, dstGpr, 0x0, dstChan, 0x0)
#define EXP(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) to_QWORD(CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \
CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP))
#define MEM_RING(arrayBase, dstReg, writeMask, arraySize, elemSize) \
to_QWORD(CF_ALLOC_EXPORT_WORD0(arrayBase, 0x00, dstReg, 0x00, 0x00, elemSize), \
CF_ALLOC_EXPORT_WORD1_BUF(arraySize, writeMask, CF_INST_MEM_RING))
#define EMIT_VERTEX to_QWORD(0, CF_DWORD1(0, 0, 0, 0, 0, CF_INST_EMIT_VERTEX))
#define ALU_OP2(inst, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, omod) \
to_QWORD(ALU_WORD0(src0Sel, 0x0, src0Chan, 0x0, src1Sel, 0x0, src1Chan, 0x0, 0x0, 0x0), \
ALU_WORD1_OP2(0x0, 0x0, 0x0, 0x0, (((dstGpr&__) >> 7) ^ 0x1), omod, inst, 0x0, 0x0, dstGpr, 0x0, dstChan, 0x0))
#define ALU_MOV(dstGpr, dstChan, src0Sel, src0Chan) \
ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF)
#define ALU_MOV_x2(dstGpr, dstChan, src0Sel, src0Chan) \
ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_M2)
#define ALU_MUL(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
ALU_OP2(OP2_INST_MUL, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
#define ALU_MUL_IEEE(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
ALU_OP2(OP2_INST_MUL_IEEE, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
#define ALU_ADD(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
ALU_OP2(OP2_INST_ADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
#define ALU_ADD_x2(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
ALU_OP2(OP2_INST_ADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_M2)
#define ALU_RECIP_IEEE(dstGpr, dstChan, src0Sel, src0Chan) \
ALU_OP2(OP2_INST_RECIP_IEEE, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF)
#define TEX_SAMPLE(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW, resourceID, samplerID)\
TEX_WORD0(TEX_INST_SAMPLE, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \
TEX_WORD1(dstReg, 0x0, dstSelX, dstSelY, dstSelZ, dstSelW, 0x0, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED), \
TEX_WORD2(0x0, 0x0, 0x0, samplerID, _X, _Y, _0, _X)
to_QWORD(TEX_WORD0(TEX_INST_SAMPLE, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \
TEX_WORD1(dstReg, 0x0, dstSelX, dstSelY, dstSelZ, dstSelW, 0x0, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED)), \
to_QWORD(TEX_WORD2(0x0, 0x0, 0x0, samplerID, _x, _y, _0, _x), 0x00000000)
#define VTX_FETCH(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, buffer_id, type, mega, offset) \
to_QWORD(VTX_WORD0(VTX_INST_FETCH, type, buffer_id, srcReg, srcSelX, mega), VTX_WORD1(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW)) , \
to_QWORD(VTX_WORD2(offset, (mega >> 31)), 0x00000000)
#define _x2(v) v, v
#define _x4(v) _x2(v), _x2(v)

View File

@ -3,6 +3,7 @@
#define GX2_SCAN_BUFFER_ALIGNMENT 0x1000
#define GX2_SHADER_ALIGNMENT 0x100
#define GX2_CONTEXT_STATE_ALIGNMENT 0x100
#define GX2_UNIFORM_BLOCK_ALIGNMENT 0x100
#define GX2_DISPLAY_LIST_ALIGNMENT 0x20
#define GX2_VERTEX_BUFFER_ALIGNMENT 0x40
#define GX2_INDEX_BUFFER_ALIGNMENT 0x20

View File

@ -175,18 +175,19 @@ typedef enum GX2IndexType
typedef enum GX2InvalidateMode
{
GX2_INVALIDATE_MODE_ATTRIBUTE_BUFFER = 1 << 0,
GX2_INVALIDATE_MODE_TEXTURE = 1 << 1,
GX2_INVALIDATE_MODE_UNIFORM_BLOCK = 1 << 2,
GX2_INVALIDATE_MODE_SHADER = 1 << 3,
GX2_INVALIDATE_MODE_COLOR_BUFFER = 1 << 4,
GX2_INVALIDATE_MODE_DEPTH_BUFFER = 1 << 5,
GX2_INVALIDATE_MODE_CPU = 1 << 6,
GX2_INVALIDATE_MODE_STREAM_OUT_BUFFER = 1 << 7,
GX2_INVALIDATE_MODE_EXPORT_BUFFER = 1 << 8,
GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER= GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_ATTRIBUTE_BUFFER,
GX2_INVALIDATE_MODE_CPU_TEXTURE = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_TEXTURE,
GX2_INVALIDATE_MODE_CPU_SHADER = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_SHADER,
GX2_INVALIDATE_MODE_ATTRIBUTE_BUFFER = 1 << 0,
GX2_INVALIDATE_MODE_TEXTURE = 1 << 1,
GX2_INVALIDATE_MODE_UNIFORM_BLOCK = 1 << 2,
GX2_INVALIDATE_MODE_SHADER = 1 << 3,
GX2_INVALIDATE_MODE_COLOR_BUFFER = 1 << 4,
GX2_INVALIDATE_MODE_DEPTH_BUFFER = 1 << 5,
GX2_INVALIDATE_MODE_CPU = 1 << 6,
GX2_INVALIDATE_MODE_STREAM_OUT_BUFFER = 1 << 7,
GX2_INVALIDATE_MODE_EXPORT_BUFFER = 1 << 8,
GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_ATTRIBUTE_BUFFER,
GX2_INVALIDATE_MODE_CPU_TEXTURE = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_TEXTURE,
GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_UNIFORM_BLOCK,
GX2_INVALIDATE_MODE_CPU_SHADER = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_SHADER,
} GX2InvalidateMode;
typedef enum GX2InitAttributes
@ -220,6 +221,7 @@ typedef enum GX2LogicOp
typedef enum GX2PrimitiveMode
{
GX2_PRIMITIVE_MODE_POINTS = 1,
GX2_PRIMITIVE_MODE_LINES = 2,
GX2_PRIMITIVE_MODE_LINE_STRIP = 3,
GX2_PRIMITIVE_MODE_TRIANGLES = 4,

View File

@ -69,93 +69,102 @@ typedef struct GX2AttribVar
typedef struct GX2VertexShader
{
struct
union
{
struct
{
unsigned :2;
bool prime_cache_on_const :1;
bool prime_cache_enable :1;
bool uncached_first_inst :1;
unsigned fetch_cache_lines :3;
bool prime_cache_on_draw :1;
bool prime_cache_pgm_en :1;
bool dx10_clamp :1;
unsigned :5;
unsigned stack_size :8;
unsigned num_gprs :8;
}sq_pgm_resources_vs;
struct
{
unsigned : 2;
bool prime_cache_on_const : 1;
bool prime_cache_enable : 1;
bool uncached_first_inst : 1;
unsigned fetch_cache_lines : 3;
bool prime_cache_on_draw : 1;
bool prime_cache_pgm_en : 1;
bool dx10_clamp : 1;
unsigned : 5;
unsigned stack_size : 8;
unsigned num_gprs : 8;
} sq_pgm_resources_vs;
bool vgt_primitiveid_en;
struct
{
unsigned : 31;
unsigned enable: 1;
} vgt_primitiveid_en;
struct
{
unsigned :18;
unsigned vs_out_fog_vec_addr : 5;
bool vs_exports_fog : 1;
unsigned :2;
unsigned vs_export_count :5;
bool vs_per_component : 1;
}spi_vs_out_config;
uint32_t num_spi_vs_out_id;
struct
{
uint8_t semantic_3;
uint8_t semantic_2;
uint8_t semantic_1;
uint8_t semantic_0;
}spi_vs_out_id[10];
struct
{
bool clip_dist_ena_7 :1;
bool clip_dist_ena_6 :1;
bool clip_dist_ena_5 :1;
bool clip_dist_ena_4 :1;
bool clip_dist_ena_3 :1;
bool clip_dist_ena_2 :1;
bool clip_dist_ena_1 :1;
bool clip_dist_ena_0 :1;
bool cull_dist_ena_7 :1;
bool cull_dist_ena_6 :1;
bool cull_dist_ena_5 :1;
bool cull_dist_ena_0 :1;
bool cull_dist_ena_4 :1;
bool cull_dist_ena_3 :1;
bool cull_dist_ena_2 :1;
bool cull_dist_ena_1 :1;
bool vs_out_misc_side_bus_ena :1;
bool vs_out_ccdist1_vec_ena :1;
bool vs_out_ccdist0_vec_ena :1;
bool vs_out_misc_vec_ena :1;
bool use_vtx_kill_flag :1;
bool use_vtx_viewport_indx :1;
bool use_vtx_render_target_indx :1;
bool use_vtx_edge_flag :1;
unsigned :6;
bool use_vtx_point_size :1;
bool use_vtx_gs_cut_flag :1;
}pa_cl_vs_out_cntl;
uint32_t sq_vtx_semantic_clear;
uint32_t num_sq_vtx_semantic;
uint32_t sq_vtx_semantic[32]; /* 8 bit */
struct
{
bool buffer_3_en :1;
bool buffer_2_en :1;
bool buffer_1_en :1;
bool buffer_0_en :1;
}vgt_strmout_buffer_en;
struct
{
unsigned :24;
unsigned vtx_reuse_depth :8;
}vgt_vertex_reuse_block_cntl;
struct
{
unsigned :24;
unsigned reuse_depth :8;
}vgt_hos_reuse_depth;
struct
{
unsigned : 18;
unsigned vs_out_fog_vec_addr : 5;
bool vs_exports_fog : 1;
unsigned : 2;
unsigned vs_export_count : 5;
bool vs_per_component : 1;
} spi_vs_out_config;
uint32_t num_spi_vs_out_id;
struct
{
uint8_t semantic_3;
uint8_t semantic_2;
uint8_t semantic_1;
uint8_t semantic_0;
} spi_vs_out_id[10];
struct
{
bool clip_dist_ena_7 : 1;
bool clip_dist_ena_6 : 1;
bool clip_dist_ena_5 : 1;
bool clip_dist_ena_4 : 1;
bool clip_dist_ena_3 : 1;
bool clip_dist_ena_2 : 1;
bool clip_dist_ena_1 : 1;
bool clip_dist_ena_0 : 1;
bool cull_dist_ena_7 : 1;
bool cull_dist_ena_6 : 1;
bool cull_dist_ena_5 : 1;
bool cull_dist_ena_0 : 1;
bool cull_dist_ena_4 : 1;
bool cull_dist_ena_3 : 1;
bool cull_dist_ena_2 : 1;
bool cull_dist_ena_1 : 1;
bool vs_out_misc_side_bus_ena : 1;
bool vs_out_ccdist1_vec_ena : 1;
bool vs_out_ccdist0_vec_ena : 1;
bool vs_out_misc_vec_ena : 1;
bool use_vtx_kill_flag : 1;
bool use_vtx_viewport_indx : 1;
bool use_vtx_render_target_indx : 1;
bool use_vtx_edge_flag : 1;
unsigned : 6;
bool use_vtx_point_size : 1;
bool use_vtx_gs_cut_flag : 1;
} pa_cl_vs_out_cntl;
uint32_t sq_vtx_semantic_clear;
uint32_t num_sq_vtx_semantic;
uint32_t sq_vtx_semantic[32]; /* 8 bit */
struct
{
bool buffer_3_en : 1;
bool buffer_2_en : 1;
bool buffer_1_en : 1;
bool buffer_0_en : 1;
} vgt_strmout_buffer_en;
struct
{
unsigned : 24;
unsigned vtx_reuse_depth : 8;
} vgt_vertex_reuse_block_cntl;
struct
{
unsigned : 24;
unsigned reuse_depth : 8;
} vgt_hos_reuse_depth;
};
u32 vals[52];
} regs;
uint32_t size;
@ -180,7 +189,7 @@ typedef struct GX2VertexShader
uint32_t attribVarCount;
GX2AttribVar *attribVars;
uint32_t ringItemsize;
uint32_t ringItemSize;
BOOL hasStreamOut;
uint32_t streamOutStride[4];
@ -188,129 +197,136 @@ typedef struct GX2VertexShader
GX2RBuffer gx2rBuffer;
} GX2VertexShader;
typedef enum {
typedef enum
{
spi_baryc_cntl_centroids_only = 0,
spi_baryc_cntl_centers_only = 1,
spi_baryc_cntl_centroids_and_centers = 2,
}spi_baryc_cntl;
} spi_baryc_cntl;
typedef enum {
typedef enum
{
db_z_order_late_z = 0,
db_z_order_early_z_then_late_z = 1,
db_z_order_re_z = 2,
db_z_order_early_z_then_re_z = 3,
}db_z_order;
} db_z_order;
typedef struct GX2PixelShader
{
struct
union
{
struct
{
unsigned :2;
bool prime_cache_on_const :1;
bool prime_cache_enable :1;
bool uncached_first_inst :1;
unsigned fetch_cache_lines :3;
bool prime_cache_on_draw :1;
bool prime_cache_pgm_en :1;
bool dx10_clamp :1;
unsigned :5;
unsigned stack_size :8;
unsigned num_gprs :8;
}sq_pgm_resources_ps;
struct
{
unsigned : 2;
bool prime_cache_on_const : 1;
bool prime_cache_enable : 1;
bool uncached_first_inst : 1;
unsigned fetch_cache_lines : 3;
bool prime_cache_on_draw : 1;
bool prime_cache_pgm_en : 1;
bool dx10_clamp : 1;
unsigned : 5;
unsigned stack_size : 8;
unsigned num_gprs : 8;
} sq_pgm_resources_ps;
struct
{
unsigned :24;
unsigned export_mode :5;
}sq_pgm_exports_ps;
struct
{
unsigned : 27;
unsigned export_mode : 5;
} sq_pgm_exports_ps;
struct
{
bool baryc_at_sample_ena :1;
bool position_sample :1;
bool linear_gradient_ena :1;
bool persp_gradient_ena :1;
spi_baryc_cntl baryc_sample_cntl :2;
unsigned param_gen_addr : 7;
unsigned param_gen :4;
unsigned position_addr :5;
bool position_centroid :1;
bool position_ena :1;
unsigned :2;
unsigned num_interp :6;
}spi_ps_in_control_0;
struct
{
bool baryc_at_sample_ena : 1;
bool position_sample : 1;
bool linear_gradient_ena : 1;
bool persp_gradient_ena : 1;
spi_baryc_cntl baryc_sample_cntl : 2;
unsigned param_gen_addr : 7;
unsigned param_gen : 4;
unsigned position_addr : 5;
bool position_centroid : 1;
bool position_ena : 1;
unsigned : 2;
unsigned num_interp : 6;
} spi_ps_in_control_0;
struct
{
unsigned :1;
bool position_ulc :1;
unsigned fixed_pt_position_addr :5;
bool fixed_pt_position_ena :1;
unsigned fog_addr :7;
unsigned front_face_addr :5;
bool front_face_all_bits :1;
unsigned front_face_chan :2;
bool front_face_ena :1;
unsigned gen_index_pix_addr :7;
bool gen_index_pix :1;
}spi_ps_in_control_1;
struct
{
unsigned : 1;
bool position_ulc : 1;
unsigned fixed_pt_position_addr : 5;
bool fixed_pt_position_ena : 1;
unsigned fog_addr : 7;
unsigned front_face_addr : 5;
bool front_face_all_bits : 1;
unsigned front_face_chan : 2;
bool front_face_ena : 1;
unsigned gen_index_pix_addr : 7;
bool gen_index_pix : 1;
} spi_ps_in_control_1;
uint32_t num_spi_ps_input_cntl;
uint32_t num_spi_ps_input_cntl;
struct
{
unsigned :13;
bool sel_sample :1;
bool pt_sprite_tex :1;
unsigned cyl_wrap :4;
bool sel_linear :1;
bool sel_centroid :1;
bool flat_shade :1;
unsigned default_val :2;
unsigned semantic :8;
}spi_ps_input_cntls[32];
struct
{
unsigned : 13;
bool sel_sample : 1;
bool pt_sprite_tex : 1;
unsigned cyl_wrap : 4;
bool sel_linear : 1;
bool sel_centroid : 1;
bool flat_shade : 1;
unsigned default_val : 2;
unsigned semantic : 8;
} spi_ps_input_cntls[32];
struct
{
unsigned output7_enable :4;
unsigned output6_enable :4;
unsigned output5_enable :4;
unsigned output4_enable :4;
unsigned output3_enable :4;
unsigned output2_enable :4;
unsigned output1_enable :4;
unsigned output0_enable :4;
}cb_shader_mask;
struct {
unsigned :24;
bool rt7_enable :1;
bool rt6_enable :1;
bool rt5_enable :1;
bool rt4_enable :1;
bool rt3_enable :1;
bool rt2_enable :1;
bool rt1_enable :1;
bool rt0_enable :1;
}cb_shader_control;
struct
{
unsigned :19;
bool alpha_to_mask_disable :1;
bool exec_on_noop :1;
bool exec_on_hier_fail :1;
bool dual_export_enable :1;
bool mask_export_enable :1;
bool coverage_to_mask_enable :1;
bool kill_enable :1;
db_z_order z_order :2;
unsigned :2;
bool z_export_enable :1;
bool stencil_ref_export_enable :1;
} db_shader_control;
struct
{
unsigned output7_enable : 4;
unsigned output6_enable : 4;
unsigned output5_enable : 4;
unsigned output4_enable : 4;
unsigned output3_enable : 4;
unsigned output2_enable : 4;
unsigned output1_enable : 4;
unsigned output0_enable : 4;
} cb_shader_mask;
struct
{
unsigned : 24;
bool rt7_enable : 1;
bool rt6_enable : 1;
bool rt5_enable : 1;
bool rt4_enable : 1;
bool rt3_enable : 1;
bool rt2_enable : 1;
bool rt1_enable : 1;
bool rt0_enable : 1;
} cb_shader_control;
struct
{
unsigned : 19;
bool alpha_to_mask_disable : 1;
bool exec_on_noop : 1;
bool exec_on_hier_fail : 1;
bool dual_export_enable : 1;
bool mask_export_enable : 1;
bool coverage_to_mask_enable : 1;
bool kill_enable : 1;
db_z_order z_order : 2;
unsigned : 2;
bool z_export_enable : 1;
bool stencil_ref_export_enable : 1;
} db_shader_control;
bool spi_input_z;
bool spi_input_z;
};
u32 vals[41];
} regs;
uint32_t size;
@ -335,26 +351,148 @@ typedef struct GX2PixelShader
GX2RBuffer gx2rBuffer;
} GX2PixelShader;
typedef enum
{
VGT_GS_OUT_PRIMITIVE_TYPE_POINTLIST = 0,
VGT_GS_OUT_PRIMITIVE_TYPE_LINESTRIP = 1,
VGT_GS_OUT_PRIMITIVE_TYPE_TRISTRIP = 2,
VGT_GS_OUT_PRIMITIVE_TYPE_MAX_ENUM = 0xFFFFFFFF
} vgt_gs_out_primitive_type;
typedef enum
{
VGT_GS_ENABLE_MODE_OFF = 0,
VGT_GS_ENABLE_MODE_SCENARIO_A = 1,
VGT_GS_ENABLE_MODE_SCENARIO_B = 2,
VGT_GS_ENABLE_MODE_SCENARIO_G = 3,
} vgt_gs_enable_mode;
typedef enum
{
VGT_GS_CUT_MODE_1024 = 0,
VGT_GS_CUT_MODE_512 = 1,
VGT_GS_CUT_MODE_256 = 2,
VGT_GS_CUT_MODE_128 = 3,
} vgt_gs_cut_mode;
typedef struct GX2GeometryShader
{
struct
union
{
uint32_t sq_pgm_resources_gs;
uint32_t vgt_gs_out_prim_type;
uint32_t vgt_gs_mode;
uint32_t pa_cl_vs_out_cntl;
uint32_t sq_pgm_resources_vs;
uint32_t sq_gs_vert_itemsize;
uint32_t spi_vs_out_config;
uint32_t num_spi_vs_out_id;
uint32_t spi_vs_out_id[10];
uint32_t vgt_strmout_buffer_en;
} regs;
struct
{
struct
{
unsigned : 2;
bool prime_cache_on_const : 1;
bool prime_cache_enable : 1;
bool uncached_first_inst : 1;
unsigned fetch_cache_lines : 3;
bool prime_cache_on_draw : 1;
bool prime_cache_pgm_en : 1;
bool dx10_clamp : 1;
unsigned : 5;
unsigned stack_size : 8;
unsigned num_gprs : 8;
} sq_pgm_resources_gs;
vgt_gs_out_primitive_type vgt_gs_out_prim_type;
struct
{
unsigned : 14;
bool partial_thd_at_eoi : 1;
bool element_info_en : 1;
bool fast_compute_mode : 1;
bool compute_mode : 1;
unsigned : 2;
bool gs_c_pack_en : 1;
unsigned : 2;
bool mode_hi : 1;
unsigned : 3;
vgt_gs_cut_mode cut_mode : 2;
bool es_passthru : 1;
vgt_gs_enable_mode mode : 2;
} vgt_gs_mode;
struct
{
bool clip_dist_ena_7 : 1;
bool clip_dist_ena_6 : 1;
bool clip_dist_ena_5 : 1;
bool clip_dist_ena_4 : 1;
bool clip_dist_ena_3 : 1;
bool clip_dist_ena_2 : 1;
bool clip_dist_ena_1 : 1;
bool clip_dist_ena_0 : 1;
bool cull_dist_ena_7 : 1;
bool cull_dist_ena_6 : 1;
bool cull_dist_ena_5 : 1;
bool cull_dist_ena_0 : 1;
bool cull_dist_ena_4 : 1;
bool cull_dist_ena_3 : 1;
bool cull_dist_ena_2 : 1;
bool cull_dist_ena_1 : 1;
bool vs_out_misc_side_bus_ena : 1;
bool vs_out_ccdist1_vec_ena : 1;
bool vs_out_ccdist0_vec_ena : 1;
bool vs_out_misc_vec_ena : 1;
bool use_vtx_kill_flag : 1;
bool use_vtx_viewport_indx : 1;
bool use_vtx_render_target_indx : 1;
bool use_vtx_edge_flag : 1;
unsigned : 6;
bool use_vtx_point_size : 1;
bool use_vtx_gs_cut_flag : 1;
} pa_cl_vs_out_cntl;
struct
{
unsigned : 2;
bool prime_cache_on_const : 1;
bool prime_cache_enable : 1;
bool uncached_first_inst : 1;
unsigned fetch_cache_lines : 3;
bool prime_cache_on_draw : 1;
bool prime_cache_pgm_en : 1;
bool dx10_clamp : 1;
unsigned : 5;
unsigned stack_size : 8;
unsigned num_gprs : 8;
} sq_pgm_resources_vs;
uint32_t sq_gs_vert_itemsize; /* 15-bit */
struct
{
unsigned : 18;
unsigned vs_out_fog_vec_addr : 5;
bool vs_exports_fog : 1;
unsigned : 2;
unsigned vs_export_count : 5;
bool vs_per_component : 1;
} spi_vs_out_config;
uint32_t num_spi_vs_out_id;
struct
{
uint8_t semantic_3;
uint8_t semantic_2;
uint8_t semantic_1;
uint8_t semantic_0;
} spi_vs_out_id[10];
struct
{
bool buffer_3_en : 1;
bool buffer_2_en : 1;
bool buffer_1_en : 1;
bool buffer_0_en : 1;
} vgt_strmout_buffer_en;
};
u32 vals[19];
} regs;
uint32_t size;
uint8_t *program;
uint32_t vertexProgramSize;
uint8_t *vertexProgram;
uint32_t copyProgramSize;
uint8_t *copyProgram;
GX2ShaderMode mode;
uint32_t uniformBlockCount;
@ -419,6 +557,14 @@ void GX2SetShaderModeEx(GX2ShaderMode mode,
uint32_t numGsGpr, uint32_t numGsStackEntries,
uint32_t numPsGpr, uint32_t numPsStackEntries);
static inline void GX2SetShaderMode(GX2ShaderMode mode)
{
if (mode == GX2_SHADER_MODE_GEOMETRY_SHADER)
GX2SetShaderModeEx(mode, 44, 32, 64, 48, 76, 176);
else
GX2SetShaderModeEx(mode, 48, 64, 0, 0, 200, 192);
}
void GX2SetStreamOutEnable(BOOL enable);
void GX2SetGeometryShaderInputRingBuffer(void *buffer, uint32_t size);
void GX2SetGeometryShaderOutputRingBuffer(void *buffer, uint32_t size);

View File

@ -37,3 +37,5 @@ typedef double f64;
typedef volatile float vf32;
typedef volatile double vf64;
#define countof(array) (sizeof(array) / sizeof(*array))

View File

@ -170,6 +170,15 @@ IMPORT(GX2InitFetchShaderEx);
IMPORT(GX2SetFetchShader);
IMPORT(GX2SetVertexShader);
IMPORT(GX2SetPixelShader);
IMPORT(GX2SetGeometryShader);
IMPORT(GX2SetGeometryUniformBlock);
IMPORT(GX2SetVertexUniformBlock);
IMPORT(GX2SetPixelUniformBlock);
IMPORT(GX2CalcGeometryShaderInputRingBufferSize);
IMPORT(GX2CalcGeometryShaderOutputRingBufferSize);
IMPORT(GX2SetGeometryShaderInputRingBuffer);
IMPORT(GX2SetGeometryShaderOutputRingBuffer);
IMPORT(GX2SetShaderModeEx);
IMPORT(GX2SetAttribBuffer);
IMPORT(GX2InitTextureRegs);
IMPORT(GX2InitSampler);

View File

@ -16,125 +16,67 @@
#include <stddef.h>
#include <malloc.h>
#include <string.h>
#include <wiiu/gx2/common.h>
#include "tex_shader.h"
#include "gx2_shader_inl.h"
/*******************************************************
* Vertex Shader GLSL source:
*******************************************************
attribute vec2 position;
attribute vec2 tex_coord_in;
attribute vec4 color_in;
varying vec2 tex_coord;
varying vec4 color;
void main()
{
gl_Position = vec4(position, 0.0, 1.0);
tex_coord = tex_coord_in;
color = color_in;
}
******************************************************
* assembly:
******************************************************
00 CALL_FS NO_BARRIER
01 ALU: ADDR(32) CNT(5)
0 x: MOV R3.x, R3.x
y: MOV R3.y, R3.y
z: MOV R2.z, 0.0f
w: MOV R2.w, (0x3F800000, 1.0f).x
02 EXP_DONE: POS0, R2
03 EXP: PARAM0, R1 NO_BARRIER
04 EXP_DONE: PARAM1, R3.xyzz NO_BARRIER
END_OF_PROGRAM
******************************************************
*/
__attribute__((aligned(GX2_SHADER_ALIGNMENT)))
static struct
{
u32 cf[32 * 2]; /* first ADDR() * 2 */
u32 alu[5 * 2]; /* alu CNT() * 2 */
u64 cf[16];
} vs_program =
{
{
CALL_FS NO_BARRIER,
ALU(32, 5),
EXP_DONE(POS0, _R2, _X, _Y, _Z, _W),
EXP(PARAM0, _R1, _X, _Y, _Z, _W) NO_BARRIER,
EXP_DONE(PARAM1, _R3, _X, _Y, _Z, _Z) NO_BARRIER
EXP_DONE(POS0, _R1, _x, _y, _0, _1),
EXP_DONE(PARAM0, _R2, _x, _y, _0, _0) NO_BARRIER
END_OF_PROGRAM
},
{
ALU_MOV(_R3,_X, _R3,_X),
ALU_MOV(_R3,_Y, _R3,_Y),
ALU_MOV(_R2,_Z, ALU_SRC_0,_X),
ALU_LAST
ALU_MOV(_R2,_W, ALU_SRC_LITERAL,_X), ALU_LITERAL(0x3F800000)
}
};
/*******************************************************
* Pixel Shader GLSL source:
*******************************************************
varying vec2 tex_coord;
varying vec4 color;
uniform sampler2D s;
void main()
{
gl_FragColor = texture2D(s, tex_coord) * color;
}
******************************************************
* assembly:
******************************************************
00 TEX: ADDR(48) CNT(1) VALID_PIX
0 SAMPLE R1, R1.xy0x, t0, s0
01 ALU: ADDR(32) CNT(4)
1 x: MUL R0.x, R0.x, R1.x
y: MUL R0.y, R0.y, R1.y
z: MUL R0.z, R0.z, R1.z
w: MUL R0.w, R0.w, R1.w
02 EXP_DONE: PIX0, R0
END_OF_PROGRAM
*******************************************************
*/
__attribute__((aligned(GX2_SHADER_ALIGNMENT)))
static struct
{
u32 cf[32 * 2]; /* first ADDR() * 2 */
u32 alu[(48-32) * 2]; /* (tex ADDR() - alu ADDR()) * 2 */
u32 tex[1 * 3]; /* tex CNT() * 3 */
} ps_program =
u64 cf[16];
u64 tex[1 * 2];
}
ps_program =
{
{
TEX(48, 1) VALID_PIX,
ALU(32, 4),
EXP_DONE(PIX0, _R0, _X, _Y, _Z, _W)
TEX(16, 1) VALID_PIX,
EXP_DONE(PIX0, _R0, _x, _y, _z, _w)
END_OF_PROGRAM
},
{
ALU_MUL(_R0,_X, _R0,_X, _R1,_X),
ALU_MUL(_R0,_Y, _R0,_Y, _R1,_Y),
ALU_MUL(_R0,_Z, _R0,_Z, _R1,_Z),
ALU_LAST
ALU_MUL(_R0,_W, _R0,_W, _R1,_W),
},
{
TEX_SAMPLE(_R1,_X,_Y,_Z,_W, _R1,_X,_Y,_0,_X, _t0, _s0)
TEX_SAMPLE(_R0,_x,_y,_z,_w, _R0,_x,_y,_0,_0, _t0, _s0)
}
};
tex_shader_t tex_shader =
static GX2AttribVar attributes[] =
{
{ "position", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 0},
{ "tex_coord", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 1},
};
static GX2AttribStream attribute_stream[] =
{
{0, 0, offsetof(tex_shader_vertex_t, pos), GX2_ATTRIB_FORMAT_FLOAT_32_32,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT},
{1, 0, offsetof(tex_shader_vertex_t, coord), GX2_ATTRIB_FORMAT_FLOAT_32_32,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT},
};
static GX2SamplerVar samplers[] =
{
{ "s", GX2_SAMPLER_VAR_TYPE_SAMPLER_2D, 0 },
};
GX2Shader tex_shader =
{
{
{
.sq_pgm_resources_vs.num_gprs = 4,
.sq_pgm_resources_vs.num_gprs = 3,
.sq_pgm_resources_vs.stack_size = 1,
.spi_vs_out_config.vs_export_count = 1,
.num_spi_vs_out_id = 1,
@ -150,10 +92,10 @@ tex_shader_t tex_shader =
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
},
.sq_vtx_semantic_clear = ~0x7,
.num_sq_vtx_semantic = 3,
.sq_vtx_semantic_clear = ~0x3,
.num_sq_vtx_semantic = 2,
{
0, 1, 2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0, 1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
},
.vgt_vertex_reuse_block_cntl.vtx_reuse_depth = 0xE,
@ -162,11 +104,11 @@ tex_shader_t tex_shader =
.size = sizeof(vs_program),
.program = (uint8_t*)&vs_program,
.mode = GX2_SHADER_MODE_UNIFORM_REGISTER,
.attribVarCount = sizeof(tex_shader.attributes) / sizeof(GX2AttribVar), (GX2AttribVar*) &tex_shader.attributes,
.attribVarCount = countof(attributes), attributes,
},
{
{
.sq_pgm_resources_ps.num_gprs = 2,
.sq_pgm_resources_ps.num_gprs = 1,
.sq_pgm_exports_ps.export_mode = 0x2,
.spi_ps_in_control_0.num_interp = 2,
.spi_ps_in_control_0.persp_gradient_ena = 1,
@ -179,28 +121,7 @@ tex_shader_t tex_shader =
.size = sizeof(ps_program),
.program = (uint8_t*)&ps_program,
.mode = GX2_SHADER_MODE_UNIFORM_REGISTER,
.samplerVarCount = 1,
.samplerVars = (GX2SamplerVar*) &tex_shader.sampler,
},
.sampler = { "s", GX2_SAMPLER_VAR_TYPE_SAMPLER_2D, 0 },
.attributes = {
.color = { "color_in", GX2_SHADER_VAR_TYPE_FLOAT4, 0, 0},
.position = { "position", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 1},
.tex_coord = { "tex_coord_in", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 2},
},
.attribute_stream = {
.color = {
0, 0, offsetof(tex_shader_vertex_t, color), GX2_ATTRIB_FORMAT_UNORM_8_8_8_8,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _Z, _W), GX2_ENDIAN_SWAP_DEFAULT
},
.position = {
1, 0, offsetof(tex_shader_vertex_t, pos), GX2_ATTRIB_FORMAT_FLOAT_32_32,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT
},
.tex_coord = {
2, 0, offsetof(tex_shader_vertex_t, coord), GX2_ATTRIB_FORMAT_FLOAT_32_32,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT
}
},
{},
.samplerVarCount = countof(samplers), samplers,
},
.attribute_stream = attribute_stream,
};

View File

@ -15,32 +15,13 @@
#ifndef TEX_SHADER_H
#define TEX_SHADER_H
#include <wiiu/gx2.h>
#include <wiiu/shader_utils.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct __attribute__((aligned(GX2_VERTEX_BUFFER_ALIGNMENT)))
{
GX2VertexShader vs;
GX2PixelShader ps;
GX2SamplerVar sampler;
struct
{
GX2AttribVar color;
GX2AttribVar position;
GX2AttribVar tex_coord;
} attributes;
struct
{
GX2AttribStream color;
GX2AttribStream position;
GX2AttribStream tex_coord;
} attribute_stream;
GX2FetchShader fs;
}tex_shader_t;
typedef struct
{
struct
@ -54,11 +35,9 @@ typedef struct
float u;
float v;
}coord;
u32 color;
}tex_shader_vertex_t;
extern tex_shader_t tex_shader;
extern GX2Shader tex_shader;
#ifdef __cplusplus
}