Merge pull request #6042 from aliaspider/master

(WIIU) add a sprite shader.
This commit is contained in:
Twinaphex 2018-01-04 18:00:18 +01:00 committed by GitHub
commit c04b555f04
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 1369 additions and 632 deletions

View File

@ -66,7 +66,9 @@ else
DEFINES += -DHAVE_FILTERS_BUILTIN DEFINES += -DHAVE_FILTERS_BUILTIN
OBJ += wiiu/system/missing_libc_functions.o OBJ += wiiu/system/missing_libc_functions.o
OBJ += wiiu/shader_utils.o
OBJ += wiiu/tex_shader.o OBJ += wiiu/tex_shader.o
OBJ += wiiu/sprite_shader.o
ifeq ($(GRIFFIN_BUILD), 1) ifeq ($(GRIFFIN_BUILD), 1)
OBJ += griffin/griffin.o OBJ += griffin/griffin.o

View File

@ -124,14 +124,14 @@ struct command
#endif #endif
}; };
#ifdef HAVE_CHEEVOS #if defined(HAVE_COMMAND) && defined(HAVE_CHEEVOS)
static bool command_read_ram(const char *arg); static bool command_read_ram(const char *arg);
static bool command_write_ram(const char *arg); static bool command_write_ram(const char *arg);
#endif #endif
static const struct cmd_action_map action_map[] = { static const struct cmd_action_map action_map[] = {
{ "SET_SHADER", command_set_shader, "<shader path>" }, { "SET_SHADER", command_set_shader, "<shader path>" },
#ifdef HAVE_CHEEVOS #if defined(HAVE_COMMAND) && defined(HAVE_CHEEVOS)
{ "READ_CORE_RAM", command_read_ram, "<address> <number of bytes>" }, { "READ_CORE_RAM", command_read_ram, "<address> <number of bytes>" },
{ "WRITE_CORE_RAM", command_write_ram, "<address> <byte1> <byte2> ..." }, { "WRITE_CORE_RAM", command_write_ram, "<address> <byte1> <byte2> ..." },
#endif #endif

View File

@ -351,16 +351,17 @@ static ssize_t wiiu_log_write(struct _reent *r, void *fd, const char *ptr, size_
wiiu_log_lock = 1; wiiu_log_lock = 1;
int ret; int ret;
int remaining = len;
while (len > 0) while (remaining > 0)
{ {
int block = len < 1400 ? len : 1400; // take max 1400 bytes per UDP packet int block = remaining < 1400 ? remaining : 1400; // take max 1400 bytes per UDP packet
ret = send(wiiu_log_socket, ptr, block, 0); ret = send(wiiu_log_socket, ptr, block, 0);
if (ret < 0) if (ret < 0)
break; break;
len -= ret; remaining -= ret;
ptr += ret; ptr += ret;
} }

View File

@ -1,6 +1,7 @@
#include <wiiu/gx2.h> #include <wiiu/gx2.h>
#include "wiiu/tex_shader.h" #include "wiiu/tex_shader.h"
#include "wiiu/sprite_shader.h"
#undef _X #undef _X
#undef _B #undef _B
@ -21,8 +22,6 @@
#define COLOR_ARGB(r, g, b, a) (((u32)(a) << 24) | ((u32)(r) << 16) | ((u32)(g) << 8) | ((u32)(b) << 0)) #define COLOR_ARGB(r, g, b, a) (((u32)(a) << 24) | ((u32)(r) << 16) | ((u32)(g) << 8) | ((u32)(b) << 0))
#define COLOR_RGBA(r, g, b, a) (((u32)(r) << 24) | ((u32)(g) << 16) | ((u32)(b) << 8) | ((u32)(a) << 0)) #define COLOR_RGBA(r, g, b, a) (((u32)(r) << 24) | ((u32)(g) << 16) | ((u32)(b) << 8) | ((u32)(a) << 0))
//#define GX2_CAN_ACCESS_DATA_SECTION
typedef struct typedef struct
{ {
int width; int width;
@ -33,20 +32,19 @@ typedef struct
struct gx2_overlay_data struct gx2_overlay_data
{ {
GX2Texture tex; GX2Texture tex;
tex_shader_vertex_t v[4]; sprite_vertex_t v;
float alpha_mod; float alpha_mod;
}; };
typedef struct typedef struct
{ {
tex_shader_t* shader;
struct struct
{ {
GX2Texture texture; GX2Texture texture;
int width; int width;
int height; int height;
bool enable; bool enable;
tex_shader_vertex_t* v; sprite_vertex_t* v;
} menu; } menu;
#ifdef HAVE_OVERLAY #ifdef HAVE_OVERLAY
@ -60,12 +58,19 @@ typedef struct
GX2Sampler sampler_linear; GX2Sampler sampler_linear;
GX2Texture texture; GX2Texture texture;
tex_shader_vertex_t* v; tex_shader_vertex_t* v;
GX2_vec2* ubo_vp;
GX2_vec2* ubo_tex;
void* input_ring_buffer;
u32 input_ring_buffer_size;
void* output_ring_buffer;
u32 output_ring_buffer_size;
int width; int width;
int height; int height;
struct struct
{ {
tex_shader_vertex_t* v; sprite_vertex_t* v;
int size; int size;
int current; int current;
} vertex_cache; } vertex_cache;

View File

@ -145,7 +145,6 @@ static void wiiu_gfx_update_viewport(wiiu_video_t* wiiu)
wiiu->vp.height = height; wiiu->vp.height = height;
} }
float scale_w = wiiu->color_buffer.surface.width / wiiu->render_mode.width; float scale_w = wiiu->color_buffer.surface.width / wiiu->render_mode.width;
float scale_h = wiiu->color_buffer.surface.height / wiiu->render_mode.height; float scale_h = wiiu->color_buffer.surface.height / wiiu->render_mode.height;
wiiu_set_position(wiiu->v, &wiiu->color_buffer, wiiu_set_position(wiiu->v, &wiiu->color_buffer,
@ -154,6 +153,8 @@ static void wiiu_gfx_update_viewport(wiiu_video_t* wiiu)
(wiiu->vp.x + wiiu->vp.width) * scale_w, (wiiu->vp.x + wiiu->vp.width) * scale_w,
(wiiu->vp.y + wiiu->vp.height) * scale_h); (wiiu->vp.y + wiiu->vp.height) * scale_h);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->v, 4 * sizeof(*wiiu->v));
wiiu->should_resize = false; wiiu->should_resize = false;
} }
@ -275,70 +276,25 @@ static void* wiiu_gfx_init(const video_info_t* video,
GX2SetBlendControl(GX2_RENDER_TARGET_0, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA, GX2_BLEND_COMBINE_MODE_ADD, GX2SetBlendControl(GX2_RENDER_TARGET_0, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA, GX2_BLEND_COMBINE_MODE_ADD,
GX2_ENABLE, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA, GX2_BLEND_COMBINE_MODE_ADD); GX2_ENABLE, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA, GX2_BLEND_COMBINE_MODE_ADD);
GX2SetCullOnlyControl(GX2_FRONT_FACE_CCW, GX2_DISABLE, GX2_DISABLE); GX2SetCullOnlyControl(GX2_FRONT_FACE_CCW, GX2_DISABLE, GX2_DISABLE);
#ifdef GX2_CAN_ACCESS_DATA_SECTION
wiiu->shader = &tex_shader;
#else
/* Initialize shader */ GX2InitShader(&tex_shader);
wiiu->shader = MEM2_alloc(sizeof(tex_shader), 0x1000); GX2InitShader(&sprite_shader);
memcpy(wiiu->shader, &tex_shader, sizeof(tex_shader)); GX2SetShader(&tex_shader);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU, wiiu->shader, sizeof(tex_shader));
wiiu->shader->vs.program = MEM2_alloc(wiiu->shader->vs.size, GX2_SHADER_ALIGNMENT); wiiu->ubo_vp = MEM1_alloc(sizeof(*wiiu->ubo_vp), GX2_UNIFORM_BLOCK_ALIGNMENT);
memcpy(wiiu->shader->vs.program, tex_shader.vs.program, wiiu->shader->vs.size); wiiu->ubo_vp->width = wiiu->color_buffer.surface.width;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, wiiu->shader->vs.program, wiiu->shader->vs.size); wiiu->ubo_vp->height = wiiu->color_buffer.surface.height;
wiiu->shader->vs.attribVars = MEM2_alloc(wiiu->shader->vs.attribVarCount * sizeof(GX2AttribVar), GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, wiiu->ubo_vp, sizeof(*wiiu->ubo_vp));
GX2_SHADER_ALIGNMENT);
memcpy(wiiu->shader->vs.attribVars, tex_shader.vs.attribVars ,
wiiu->shader->vs.attribVarCount * sizeof(GX2AttribVar));
wiiu->shader->ps.program = MEM2_alloc(wiiu->shader->ps.size, GX2_SHADER_ALIGNMENT); wiiu->ubo_tex = MEM1_alloc(sizeof(*wiiu->ubo_tex), GX2_UNIFORM_BLOCK_ALIGNMENT);
memcpy(wiiu->shader->ps.program, tex_shader.ps.program, wiiu->shader->ps.size); wiiu->ubo_tex->width = 1.0;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, wiiu->shader->ps.program, wiiu->shader->ps.size); wiiu->ubo_tex->height = 1.0;
wiiu->shader->ps.samplerVars = MEM2_alloc(wiiu->shader->ps.samplerVarCount * sizeof(GX2SamplerVar), GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, wiiu->ubo_tex, sizeof(*wiiu->ubo_tex));
GX2_SHADER_ALIGNMENT);
memcpy(wiiu->shader->ps.samplerVars, tex_shader.ps.samplerVars,
wiiu->shader->ps.samplerVarCount * sizeof(GX2SamplerVar));
#endif wiiu->input_ring_buffer_size = GX2CalcGeometryShaderInputRingBufferSize(sprite_shader.vs.ringItemSize);
wiiu->shader->fs.size = GX2CalcFetchShaderSizeEx(sizeof(wiiu->shader->attribute_stream) / sizeof(GX2AttribStream), wiiu->output_ring_buffer_size = GX2CalcGeometryShaderOutputRingBufferSize(sprite_shader.gs.ringItemSize);
GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE); wiiu->input_ring_buffer = MEM1_alloc(wiiu->input_ring_buffer_size, 0x1000);
wiiu->shader->fs.program = MEM2_alloc(wiiu->shader->fs.size, GX2_SHADER_ALIGNMENT); wiiu->output_ring_buffer = MEM1_alloc(wiiu->output_ring_buffer_size, 0x1000);
GX2InitFetchShaderEx(&wiiu->shader->fs, (uint8_t*)wiiu->shader->fs.program,
sizeof(wiiu->shader->attribute_stream) / sizeof(GX2AttribStream),
(GX2AttribStream*)&wiiu->shader->attribute_stream,
GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, wiiu->shader->fs.program, wiiu->shader->fs.size);
GX2SetVertexShader(&wiiu->shader->vs);
GX2SetPixelShader(&wiiu->shader->ps);
GX2SetFetchShader(&wiiu->shader->fs);
wiiu->v = MEM2_alloc(4 * sizeof(*wiiu->v), GX2_VERTEX_BUFFER_ALIGNMENT);
wiiu_set_position(wiiu->v, &wiiu->color_buffer, 0, 0,
wiiu->color_buffer.surface.width, wiiu->color_buffer.surface.height);
wiiu_set_tex_coords(wiiu->v, &wiiu->texture, 0, 0,
wiiu->texture.surface.width, wiiu->texture.surface.height, wiiu->rotation);
wiiu->v[0].color = 0xFFFFFFFF;
wiiu->v[1].color = 0xFFFFFFFF;
wiiu->v[2].color = 0xFFFFFFFF;
wiiu->v[3].color = 0xFFFFFFFF;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->v, 4 * sizeof(*wiiu->v));
GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->v), sizeof(*wiiu->v), wiiu->v);
wiiu->menu.v = MEM2_alloc(4 * sizeof(*wiiu->menu.v), GX2_VERTEX_BUFFER_ALIGNMENT);
wiiu_set_position(wiiu->menu.v, &wiiu->color_buffer, 0, 0,
wiiu->color_buffer.surface.width, wiiu->color_buffer.surface.height);
wiiu_set_tex_coords(wiiu->menu.v, &wiiu->menu.texture, 0, 0,
wiiu->menu.texture.surface.width, wiiu->menu.texture.surface.height, 0);
wiiu->menu.v[0].color = 0xFFFFFF80;
wiiu->menu.v[1].color = 0xFFFFFF80;
wiiu->menu.v[2].color = 0xFFFFFF80;
wiiu->menu.v[3].color = 0xFFFFFF80;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->menu.v, 4 * sizeof(*wiiu->menu.v));
/* Initialize frame texture */ /* Initialize frame texture */
memset(&wiiu->texture, 0, sizeof(GX2Texture)); memset(&wiiu->texture, 0, sizeof(GX2Texture));
@ -389,6 +345,28 @@ static void* wiiu_gfx_init(const video_info_t* video,
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->menu.texture.surface.image, GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->menu.texture.surface.image,
wiiu->menu.texture.surface.imageSize); wiiu->menu.texture.surface.imageSize);
wiiu->v = MEM2_alloc(4 * sizeof(*wiiu->v), GX2_VERTEX_BUFFER_ALIGNMENT);
wiiu_set_position(wiiu->v, &wiiu->color_buffer, 0, 0,
wiiu->color_buffer.surface.width, wiiu->color_buffer.surface.height);
wiiu_set_tex_coords(wiiu->v, &wiiu->texture, 0, 0,
wiiu->texture.surface.width, wiiu->texture.surface.height, wiiu->rotation);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->v, 4 * sizeof(*wiiu->v));
GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->v), sizeof(*wiiu->v), wiiu->v);
wiiu->menu.v = MEM2_alloc(4 * sizeof(*wiiu->menu.v), GX2_VERTEX_BUFFER_ALIGNMENT);
wiiu->menu.v->pos.x = 0.0f;
wiiu->menu.v->pos.y = 0.0f;
wiiu->menu.v->pos.width = wiiu->color_buffer.surface.width;
wiiu->menu.v->pos.height = wiiu->color_buffer.surface.height;
wiiu->menu.v->coord.u = 0.0f;
wiiu->menu.v->coord.v = 0.0f;
wiiu->menu.v->coord.width = 1.0f;
wiiu->menu.v->coord.height = 1.0f;
wiiu->menu.v->color = 0xFFFFFF80;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->menu.v, 4 * sizeof(*wiiu->menu.v));
wiiu->vertex_cache.size = 0x1000; wiiu->vertex_cache.size = 0x1000;
wiiu->vertex_cache.current = 0; wiiu->vertex_cache.current = 0;
wiiu->vertex_cache.v = MEM2_alloc(wiiu->vertex_cache.size wiiu->vertex_cache.v = MEM2_alloc(wiiu->vertex_cache.size
@ -399,8 +377,8 @@ static void* wiiu_gfx_init(const video_info_t* video,
GX2InitSampler(&wiiu->sampler_linear, GX2_TEX_CLAMP_MODE_CLAMP, GX2_TEX_XY_FILTER_MODE_LINEAR); GX2InitSampler(&wiiu->sampler_linear, GX2_TEX_CLAMP_MODE_CLAMP, GX2_TEX_XY_FILTER_MODE_LINEAR);
/* set Texture and Sampler */ /* set Texture and Sampler */
GX2SetPixelTexture(&wiiu->texture, wiiu->shader->sampler.location); GX2SetPixelTexture(&wiiu->texture, tex_shader.ps.samplerVars[0].location);
GX2SetPixelSampler(&wiiu->sampler_linear, wiiu->shader->sampler.location); GX2SetPixelSampler(&wiiu->sampler_linear, tex_shader.ps.samplerVars[0].location);
/* clear leftover image */ /* clear leftover image */
GX2ClearColor(&wiiu->color_buffer, 0.0f, 0.0f, 0.0f, 1.0f); GX2ClearColor(&wiiu->color_buffer, 0.0f, 0.0f, 0.0f, 1.0f);
@ -450,15 +428,11 @@ static void gx2_overlay_tex_geom(void *data, unsigned image,
if (!o) if (!o)
return; return;
o->v[0].coord.u = x; o->v.coord.u = x;
o->v[0].coord.v = y; o->v.coord.v = y;
o->v[1].coord.u = x + w; o->v.coord.width = w;
o->v[1].coord.v = y; o->v.coord.height = h;
o->v[2].coord.u = x + w; GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, &o->v, sizeof(o->v));
o->v[2].coord.v = y + h;
o->v[3].coord.u = x ;
o->v[3].coord.v = y + h;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, o->v, sizeof(o->v));
} }
static void gx2_overlay_vertex_geom(void *data, unsigned image, static void gx2_overlay_vertex_geom(void *data, unsigned image,
@ -467,15 +441,6 @@ static void gx2_overlay_vertex_geom(void *data, unsigned image,
wiiu_video_t *gx2 = (wiiu_video_t*)data; wiiu_video_t *gx2 = (wiiu_video_t*)data;
struct gx2_overlay_data *o = NULL; struct gx2_overlay_data *o = NULL;
/* Flipped, so we preserve top-down semantics. */
y = 1.0f - y;
h = -h;
/* expand from 0 - 1 to -1 - 1 */
x = (x * 2.0f) - 1.0f;
y = (y * 2.0f) - 1.0f;
w = (w * 2.0f);
h = (h * 2.0f);
if (gx2) if (gx2)
o = (struct gx2_overlay_data*)&gx2->overlay[image]; o = (struct gx2_overlay_data*)&gx2->overlay[image];
@ -483,19 +448,12 @@ static void gx2_overlay_vertex_geom(void *data, unsigned image,
if (!o) if (!o)
return; return;
o->v[0].pos.x = x; o->v.pos.x = x * gx2->color_buffer.surface.width;
o->v[0].pos.y = y; o->v.pos.y = y * gx2->color_buffer.surface.height;
o->v.pos.width = w * gx2->color_buffer.surface.width;
o->v.pos.height = h * gx2->color_buffer.surface.height;
o->v[1].pos.x = x + w; GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, &o->v,sizeof(o->v));
o->v[1].pos.y = y;
o->v[2].pos.x = x + w;
o->v[2].pos.y = y + h;
o->v[3].pos.x = x ;
o->v[3].pos.y = y + h;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, o->v,sizeof(o->v));
} }
static void gx2_free_overlay(wiiu_video_t *gx2) static void gx2_free_overlay(wiiu_video_t *gx2)
@ -555,13 +513,9 @@ static bool gx2_overlay_load(void *data,
gx2_overlay_tex_geom(gx2, i, 0, 0, 1, 1); gx2_overlay_tex_geom(gx2, i, 0, 0, 1, 1);
gx2_overlay_vertex_geom(gx2, i, 0, 0, 1, 1); gx2_overlay_vertex_geom(gx2, i, 0, 0, 1, 1);
gx2->overlay[i].alpha_mod = 1.0f; gx2->overlay[i].alpha_mod = 1.0f;
gx2->overlay[i].v[0].color = 0xFFFFFFFF; gx2->overlay[i].v.color = 0xFFFFFFFF;
gx2->overlay[i].v[1].color = 0xFFFFFFFF;
gx2->overlay[i].v[2].color = 0xFFFFFFFF;
gx2->overlay[i].v[3].color = 0xFFFFFFFF;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, &o->v,sizeof(o->v));
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, o->v,sizeof(o->v));
} }
@ -588,11 +542,8 @@ static void gx2_overlay_set_alpha(void *data, unsigned image, float mod)
if (gx2) if (gx2)
{ {
gx2->overlay[image].alpha_mod = mod; gx2->overlay[image].alpha_mod = mod;
gx2->overlay[image].v[0].color = COLOR_RGBA(0xFF, 0xFF, 0xFF, 0xFF * gx2->overlay[image].alpha_mod); gx2->overlay[image].v.color = COLOR_RGBA(0xFF, 0xFF, 0xFF, 0xFF * gx2->overlay[image].alpha_mod);
gx2->overlay[image].v[1].color = gx2->overlay[image].v[0].color; GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, &gx2->overlay[image].v, sizeof(gx2->overlay[image].v));
gx2->overlay[image].v[2].color = gx2->overlay[image].v[0].color;
gx2->overlay[image].v[3].color = gx2->overlay[image].v[0].color;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, gx2->overlay[image].v, sizeof(gx2->overlay[image].v));
} }
} }
@ -604,12 +555,12 @@ static void gx2_render_overlay(void *data)
for (i = 0; i < gx2->overlays; i++){ for (i = 0; i < gx2->overlays; i++){
GX2SetAttribBuffer(0, sizeof(gx2->overlay[i].v), sizeof(*gx2->overlay[i].v), gx2->overlay[i].v); GX2SetAttribBuffer(0, sizeof(gx2->overlay[i].v), sizeof(gx2->overlay[i].v), &gx2->overlay[i].v);
GX2SetPixelTexture(&gx2->overlay[i].tex, gx2->shader->sampler.location); GX2SetPixelTexture(&gx2->overlay[i].tex, sprite_shader.ps.samplerVars[0].location);
GX2SetPixelSampler(&gx2->sampler_linear, gx2->shader->sampler.location); GX2SetPixelSampler(&gx2->sampler_linear, sprite_shader.ps.samplerVars[0].location);
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1); GX2DrawEx(GX2_PRIMITIVE_MODE_POINTS, 1, 0, 1);
} }
@ -657,31 +608,26 @@ static void wiiu_gfx_free(void* data)
GX2SetTVEnable(GX2_DISABLE); GX2SetTVEnable(GX2_DISABLE);
GX2SetDRCEnable(GX2_DISABLE); GX2SetDRCEnable(GX2_DISABLE);
GX2DestroyShader(&tex_shader);
GX2DestroyShader(&sprite_shader);
MEM2_free(wiiu->ctx_state); MEM2_free(wiiu->ctx_state);
MEM2_free(wiiu->cmd_buffer); MEM2_free(wiiu->cmd_buffer);
MEM2_free(wiiu->texture.surface.image); MEM2_free(wiiu->texture.surface.image);
MEM2_free(wiiu->menu.texture.surface.image); MEM2_free(wiiu->menu.texture.surface.image);
MEM2_free(wiiu->v);
MEM2_free(wiiu->menu.v);
MEM2_free(wiiu->vertex_cache.v); MEM2_free(wiiu->vertex_cache.v);
MEM1_free(wiiu->color_buffer.surface.image); MEM1_free(wiiu->color_buffer.surface.image);
MEM1_free(wiiu->ubo_vp);
MEM1_free(wiiu->ubo_tex);
MEM1_free(wiiu->input_ring_buffer);
MEM1_free(wiiu->output_ring_buffer);
MEMBucket_free(wiiu->tv_scan_buffer); MEMBucket_free(wiiu->tv_scan_buffer);
MEMBucket_free(wiiu->drc_scan_buffer); MEMBucket_free(wiiu->drc_scan_buffer);
MEM2_free(wiiu->shader->fs.program);
#ifndef GX2_CAN_ACCESS_DATA_SECTION
MEM2_free(wiiu->shader->vs.program);
MEM2_free(wiiu->shader->vs.attribVars);
MEM2_free(wiiu->shader->ps.program);
MEM2_free(wiiu->shader->ps.samplerVars);
MEM2_free(wiiu->shader);
#endif
MEM2_free(wiiu->v);
MEM2_free(wiiu->menu.v);
free(wiiu); free(wiiu);
} }
@ -794,17 +740,27 @@ static bool wiiu_gfx_frame(void* data, const void* frame,
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->texture.surface.image, GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->texture.surface.image,
wiiu->texture.surface.imageSize); wiiu->texture.surface.imageSize);
wiiu_set_tex_coords(wiiu->v, &wiiu->texture, 0, 0, width, height, wiiu->rotation); wiiu_set_tex_coords(wiiu->v, &wiiu->texture, 0, 0, width, height, wiiu->rotation);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->v, 4 * sizeof(*wiiu->v));
} }
GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->v), sizeof(*wiiu->v), wiiu->v); GX2SetShaderMode(GX2_SHADER_MODE_UNIFORM_REGISTER);
GX2SetShader(&tex_shader);
GX2SetPixelTexture(&wiiu->texture, wiiu->shader->sampler.location); GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->v), sizeof(*wiiu->v), wiiu->v);
GX2SetPixelSampler(wiiu->smooth? &wiiu->sampler_linear : &wiiu->sampler_nearest, GX2SetPixelTexture(&wiiu->texture, tex_shader.ps.samplerVars[0].location);
wiiu->shader->sampler.location); GX2SetPixelSampler(wiiu->smooth? &wiiu->sampler_linear : &wiiu->sampler_nearest, tex_shader.ps.samplerVars[0].location);
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1); GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1);
GX2SetShaderMode(GX2_SHADER_MODE_GEOMETRY_SHADER);
GX2SetShader(&sprite_shader);
GX2SetGeometryShaderInputRingBuffer(wiiu->input_ring_buffer, wiiu->input_ring_buffer_size);
GX2SetGeometryShaderOutputRingBuffer(wiiu->output_ring_buffer, wiiu->output_ring_buffer_size);
GX2SetVertexUniformBlock(sprite_shader.vs.uniformBlocks[0].offset, sprite_shader.vs.uniformBlocks[0].size, wiiu->ubo_vp);
GX2SetVertexUniformBlock(sprite_shader.vs.uniformBlocks[1].offset, sprite_shader.vs.uniformBlocks[1].size, wiiu->ubo_tex);
#ifdef HAVE_OVERLAY #ifdef HAVE_OVERLAY
if (wiiu->overlay_enable) if (wiiu->overlay_enable)
gx2_render_overlay(wiiu); gx2_render_overlay(wiiu);
@ -814,16 +770,16 @@ static bool wiiu_gfx_frame(void* data, const void* frame,
{ {
GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->menu.v), sizeof(*wiiu->menu.v), wiiu->menu.v); GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->menu.v), sizeof(*wiiu->menu.v), wiiu->menu.v);
GX2SetPixelTexture(&wiiu->menu.texture, wiiu->shader->sampler.location); GX2SetPixelTexture(&wiiu->menu.texture, sprite_shader.ps.samplerVars[0].location);
GX2SetPixelSampler(&wiiu->sampler_linear, wiiu->shader->sampler.location); GX2SetPixelSampler(&wiiu->sampler_linear, sprite_shader.ps.samplerVars[0].location);
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1); GX2DrawEx(GX2_PRIMITIVE_MODE_POINTS, 1, 0, 1);
} }
wiiu->vertex_cache.current = 0; wiiu->vertex_cache.current = 0;
GX2SetAttribBuffer(0, wiiu->vertex_cache.size * sizeof(*wiiu->vertex_cache.v), GX2SetAttribBuffer(0, wiiu->vertex_cache.size * sizeof(*wiiu->vertex_cache.v),
sizeof(*wiiu->vertex_cache.v), wiiu->vertex_cache.v); sizeof(*wiiu->vertex_cache.v), wiiu->vertex_cache.v);
GX2SetPixelSampler(&wiiu->sampler_linear, wiiu->shader->sampler.location); GX2SetPixelSampler(&wiiu->sampler_linear, sprite_shader.ps.samplerVars[0].location);
wiiu->render_msg_enabled = true; wiiu->render_msg_enabled = true;
@ -837,7 +793,6 @@ static bool wiiu_gfx_frame(void* data, const void* frame,
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER,
wiiu->vertex_cache.v, wiiu->vertex_cache.current * sizeof(*wiiu->vertex_cache.v)); wiiu->vertex_cache.v, wiiu->vertex_cache.current * sizeof(*wiiu->vertex_cache.v));
if (wiiu->menu.enable) if (wiiu->menu.enable)
GX2DrawDone(); GX2DrawDone();
@ -895,7 +850,10 @@ static void wiiu_gfx_set_rotation(void* data,
{ {
wiiu_video_t* wiiu = (wiiu_video_t*) data; wiiu_video_t* wiiu = (wiiu_video_t*) data;
if(wiiu) if(wiiu)
{
wiiu->rotation = rotation; wiiu->rotation = rotation;
wiiu->should_resize = true;
}
} }
static void wiiu_gfx_viewport_info(void* data, static void wiiu_gfx_viewport_info(void* data,
@ -1010,7 +968,16 @@ static void wiiu_gfx_set_texture_frame(void* data, const void* frame, bool rgb32
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->menu.texture.surface.image, GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->menu.texture.surface.image,
wiiu->menu.texture.surface.imageSize); wiiu->menu.texture.surface.imageSize);
wiiu_set_tex_coords(wiiu->menu.v, &wiiu->menu.texture, 0, 0, width, height, 0); wiiu->menu.v->pos.x = 0.0f;
wiiu->menu.v->pos.y = 0.0f;
wiiu->menu.v->pos.width = width;
wiiu->menu.v->pos.height = height;
wiiu->menu.v->coord.u = 0.0f;
wiiu->menu.v->coord.v = 0.0f;
wiiu->menu.v->coord.width = (float)width / wiiu->texture.surface.width;
wiiu->menu.v->coord.height = (float)height / wiiu->texture.surface.height;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->menu.v, 4 * sizeof(*wiiu->menu.v));
} }
static void wiiu_gfx_set_texture_enable(void* data, bool state, bool full_screen) static void wiiu_gfx_set_texture_enable(void* data, bool state, bool full_screen)

View File

@ -32,6 +32,7 @@
typedef struct typedef struct
{ {
GX2Texture texture; GX2Texture texture;
GX2_vec2* ubo_tex;
const font_renderer_driver_t* font_driver; const font_renderer_driver_t* font_driver;
void* font_data; void* font_data;
struct font_atlas* atlas; struct font_atlas* atlas;
@ -79,6 +80,13 @@ static void* wiiu_font_init_font(void* data, const char* font_path,
font->atlas->dirty = false; font->atlas->dirty = false;
font->ubo_tex = MEM1_alloc(sizeof(*font->ubo_tex), GX2_UNIFORM_BLOCK_ALIGNMENT);
font->ubo_tex->width = font->texture.surface.width;
font->ubo_tex->height = font->texture.surface.height;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, font->ubo_tex,
sizeof(*font->ubo_tex));
return font; return font;
} }
@ -93,6 +101,7 @@ static void wiiu_font_free_font(void* data, bool is_threaded)
font->font_driver->free(font->font_data); font->font_driver->free(font->font_data);
MEM1_free(font->texture.surface.image); MEM1_free(font->texture.surface.image);
MEM1_free(font->ubo_tex);
free(font); free(font);
} }
@ -142,9 +151,7 @@ static void wiiu_font_render_line(
unsigned width = video_info->width; unsigned width = video_info->width;
unsigned height = video_info->height; unsigned height = video_info->height;
int x = roundf(pos_x * width); int x = roundf(pos_x * width);
int y = roundf((1.0f - pos_y) * height); int y = roundf((1.0 - pos_y) * height);
int delta_x = 0;
int delta_y = 0;
if(wiiu->vertex_cache.current + (msg_len * 4) > wiiu->vertex_cache.size) if(wiiu->vertex_cache.current + (msg_len * 4) > wiiu->vertex_cache.size)
return; return;
@ -160,11 +167,10 @@ static void wiiu_font_render_line(
break; break;
} }
tex_shader_vertex_t* v = wiiu->vertex_cache.v + wiiu->vertex_cache.current; sprite_vertex_t* v = wiiu->vertex_cache.v + wiiu->vertex_cache.current;
for (i = 0; i < msg_len; i++) for (i = 0; i < msg_len; i++)
{ {
int off_x, off_y, tex_x, tex_y, width, height;
const char* msg_tmp = &msg[i]; const char* msg_tmp = &msg[i];
unsigned code = utf8_walk(&msg_tmp); unsigned code = utf8_walk(&msg_tmp);
unsigned skip = msg_tmp - &msg[i]; unsigned skip = msg_tmp - &msg[i];
@ -181,50 +187,22 @@ static void wiiu_font_render_line(
if (!glyph) if (!glyph)
continue; continue;
off_x = glyph->draw_offset_x; v->pos.x = x + glyph->draw_offset_x * scale;
off_y = glyph->draw_offset_y; v->pos.y = y + glyph->draw_offset_y * scale;
tex_x = glyph->atlas_offset_x; v->pos.width = glyph->width * scale;
tex_y = glyph->atlas_offset_y; v->pos.height = glyph->height * scale;
width = glyph->width;
height = glyph->height;
v->coord.u = glyph->atlas_offset_x;
v->coord.v = glyph->atlas_offset_y;
v->coord.width = glyph->width;
v->coord.height = glyph->height;
float x0 = x + off_x + delta_x * scale; v->color = color;
float y0 = y + off_y + delta_y * scale + height * scale;
float u0 = tex_x;
float v0 = tex_y;
float x1 = x0 + width * scale;
float y1 = y0 - height * scale;
float u1 = u0 + width;
float v1 = v0 + height;
v[0].pos.x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f; v++;
v[0].pos.y = (-2.0f * y0 / wiiu->color_buffer.surface.height) + 1.0f;
v[1].pos.x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;;
v[1].pos.y = (-2.0f * y0 / wiiu->color_buffer.surface.height) + 1.0f;
v[2].pos.x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;;
v[2].pos.y = (-2.0f * y1 / wiiu->color_buffer.surface.height) + 1.0f;
v[3].pos.x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f;;
v[3].pos.y = (-2.0f * y1 / wiiu->color_buffer.surface.height) + 1.0f;
v[0].coord.u = u0 / font->texture.surface.width; x += glyph->advance_x * scale;
v[0].coord.v = v1 / font->texture.surface.height; y += glyph->advance_y * scale;
v[1].coord.u = u1 / font->texture.surface.width;
v[1].coord.v = v1 / font->texture.surface.height;
v[2].coord.u = u1 / font->texture.surface.width;
v[2].coord.v = v0 / font->texture.surface.height;
v[3].coord.u = u0 / font->texture.surface.width;
v[3].coord.v = v0 / font->texture.surface.height;
v[0].color = color;
v[1].color = color;
v[2].color = color;
v[3].color = color;
v += 4;
delta_x += glyph->advance_x;
delta_y += glyph->advance_y;
} }
int count = v - wiiu->vertex_cache.v - wiiu->vertex_cache.current; int count = v - wiiu->vertex_cache.v - wiiu->vertex_cache.current;
@ -247,14 +225,12 @@ static void wiiu_font_render_line(
} }
#if 0 GX2SetPixelTexture(&font->texture, sprite_shader.ps.samplerVars[0].location);
printf("%s\n", msg); GX2SetVertexUniformBlock(sprite_shader.vs.uniformBlocks[1].offset, sprite_shader.vs.uniformBlocks[1].size, font->ubo_tex);
DEBUG_VAR(color);
#endif
GX2SetPixelTexture(&font->texture, wiiu->shader->sampler.location); GX2DrawEx(GX2_PRIMITIVE_MODE_POINTS, count, wiiu->vertex_cache.current, 1);
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, count, wiiu->vertex_cache.current, 1); GX2SetVertexUniformBlock(sprite_shader.vs.uniformBlocks[1].offset, sprite_shader.vs.uniformBlocks[1].size, wiiu->ubo_tex);
wiiu->vertex_cache.current = v - wiiu->vertex_cache.v; wiiu->vertex_cache.current = v - wiiu->vertex_cache.v;
} }

View File

@ -77,68 +77,48 @@ static void menu_display_wiiu_draw(void *data)
if (wiiu->vertex_cache.current + 4 > wiiu->vertex_cache.size) if (wiiu->vertex_cache.current + 4 > wiiu->vertex_cache.size)
return; return;
tex_shader_vertex_t* v = wiiu->vertex_cache.v + wiiu->vertex_cache.current; sprite_vertex_t* v = wiiu->vertex_cache.v + wiiu->vertex_cache.current;
float x0 = draw->x;
float y0 = draw->y;
float x1 = x0 + draw->width;
float y1 = y0 + draw->height;
if(draw->coords->vertex && draw->coords->vertices == 4) if(draw->coords->vertex && draw->coords->vertices == 4)
{ {
for(int i = 0; i < 4; i++) v->pos.x = MIN(MIN(MIN(draw->coords->vertex[0], draw->coords->vertex[2]), draw->coords->vertex[4]), draw->coords->vertex[6]);
{ v->pos.y = 1.0 - MAX(MAX(MAX(draw->coords->vertex[1], draw->coords->vertex[3]), draw->coords->vertex[5]), draw->coords->vertex[7]);
v[i].pos.x = draw->coords->vertex[i << 1] * 2.0f - 1.0f; v->pos.width = MAX(MAX(MAX(draw->coords->vertex[0], draw->coords->vertex[2]), draw->coords->vertex[4]), draw->coords->vertex[6]) - v->pos.x;
v[i].pos.y = draw->coords->vertex[(i << 1) + 1] * 2.0f - 1.0f; v->pos.height = 1.0 - MIN(MIN(MIN(draw->coords->vertex[1], draw->coords->vertex[3]), draw->coords->vertex[5]), draw->coords->vertex[7]) - v->pos.y;
} v->pos.x *= wiiu->color_buffer.surface.width;
v->pos.y *= wiiu->color_buffer.surface.height;
v->pos.width *= wiiu->color_buffer.surface.width;
v->pos.height *= wiiu->color_buffer.surface.height;
} }
else else
{ {
v[0].pos.x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f; v->pos.x = draw->x;
v[0].pos.y = (2.0f * y0 / wiiu->color_buffer.surface.height) - 1.0f; v->pos.y = wiiu->color_buffer.surface.height - draw->y - draw->height;
v[1].pos.x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;; v->pos.width = draw->width;
v[1].pos.y = (2.0f * y0 / wiiu->color_buffer.surface.height) - 1.0f; v->pos.height = draw->height;
v[2].pos.x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;;
v[2].pos.y = (2.0f * y1 / wiiu->color_buffer.surface.height) - 1.0f;
v[3].pos.x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f;;
v[3].pos.y = (2.0f * y1 / wiiu->color_buffer.surface.height) - 1.0f;
} }
if(draw->coords->tex_coord && draw->coords->vertices == 4) if(draw->coords->tex_coord && draw->coords->vertices == 4)
{ {
for(int i = 0; i < 4; i++) v->coord.u = MIN(MIN(MIN(draw->coords->tex_coord[0], draw->coords->tex_coord[2]), draw->coords->tex_coord[4]), draw->coords->tex_coord[6]);
{ v->coord.v = MIN(MIN(MIN(draw->coords->tex_coord[1], draw->coords->tex_coord[3]), draw->coords->tex_coord[5]), draw->coords->tex_coord[7]);
v[i].coord.u = draw->coords->tex_coord[i << 1]; v->coord.width = MAX(MAX(MAX(draw->coords->tex_coord[0], draw->coords->tex_coord[2]), draw->coords->tex_coord[4]), draw->coords->tex_coord[6]) - v->coord.u;
v[i].coord.v = draw->coords->tex_coord[(i << 1) + 1]; v->coord.height = MAX(MAX(MAX(draw->coords->tex_coord[1], draw->coords->tex_coord[3]), draw->coords->tex_coord[5]), draw->coords->tex_coord[7]) - v->coord.v;
}
} }
else else
{ {
v[0].coord.u = 0.0f; v->coord.u = 0.0f;
v[0].coord.v = 1.0f; v->coord.v = 0.0f;
v[1].coord.u = 1.0f; v->coord.width = 1.0f;
v[1].coord.v = 1.0f; v->coord.height = 1.0f;
v[2].coord.u = 1.0f;
v[2].coord.v = 0.0f;
v[3].coord.u = 0.0f;
v[3].coord.v = 0.0f;
} }
v[0].color = COLOR_RGBA(0xFF * draw->coords->color[0], 0xFF * draw->coords->color[1], v->color = COLOR_RGBA(0xFF * draw->coords->color[0], 0xFF * draw->coords->color[1],
0xFF * draw->coords->color[2], 0xFF * draw->coords->color[3]); 0xFF * draw->coords->color[2], 0xFF * draw->coords->color[3]);
v[1].color = v[0].color;
v[2].color = v[0].color;
v[3].color = v[0].color;
// printf("color : %f, %f, %f, %f --> 0x%08X\n", draw->coords->color[0], draw->coords->color[1], draw->coords->color[2], draw->coords->color[3], col[0]); GX2SetPixelTexture(texture, tex_shader.ps.samplerVars[0].location);
GX2SetPixelTexture(texture, wiiu->shader->sampler.location); GX2DrawEx(GX2_PRIMITIVE_MODE_POINTS, 1, wiiu->vertex_cache.current, 1);
if(draw->coords->vertex && draw->coords->vertices == 4)
GX2DrawEx(GX2_PRIMITIVE_MODE_TRIANGLE_STRIP, 4, wiiu->vertex_cache.current, 1);
else
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, wiiu->vertex_cache.current, 1);
#if 0 #if 0
printf("(%i,%i,%i,%i) , (%i,%i)\n", (int)draw->x, printf("(%i,%i,%i,%i) , (%i,%i)\n", (int)draw->x,
@ -146,7 +126,7 @@ static void menu_display_wiiu_draw(void *data)
texture->surface.width, texture->surface.height); texture->surface.width, texture->surface.height);
#endif #endif
wiiu->vertex_cache.current += 4; wiiu->vertex_cache.current ++;
} }

View File

@ -17,19 +17,21 @@
#define GX2_SHADER_INL_H #define GX2_SHADER_INL_H
#ifdef MSB_FIRST #ifdef MSB_FIRST
#define to_LE(x) __builtin_bswap32(x) #define to_QWORD(w0, w1) (((u64)(w0) << 32ull) | (w1))
#define to_LE(x) (__builtin_bswap32(x))
#else #else
#define to_LE(x) x #define to_QWORD(w0, w1) (((u64)(w1) << 32ull) | (w0))
#define to_LE(x) (x)
#endif #endif
/* CF */ /* CF */
#define CF_WORD0(addr) to_LE(addr) #define CF_DWORD0(addr) to_LE(addr)
#define CF_WORD1(popCount, cfConst, cond, count, callCount, inst) \ #define CF_DWORD1(popCount, cfConst, cond, count, callCount, inst) \
to_LE(popCount | (cfConst << 3) | (cond << 8) | (count << 10) | (callCount << 13) | (inst << 23) | (1 << 31)) to_LE(popCount | (cfConst << 3) | (cond << 8) | (count << 10) | (callCount << 13) | (inst << 23) | (1 << 31))
#define CF_ALU_WORD0(addr, kcacheBank0, kcacheBank1, kcacheMode0) \ #define CF_ALU_WORD0(addr, kcacheBank0, kcacheBank1, kcacheMode0) \
to_LE(addr | (kcacheBank0 << 16) | (kcacheBank1 << 20) | (kcacheMode0 << 22)) to_LE(addr | (kcacheBank0 << 22) | (kcacheBank1 << 26) | (kcacheMode0 << 30))
#define CF_ALU_WORD1(kcacheMode1, kcacheAddr0, kcacheAddr1, count, altConst, inst) \ #define CF_ALU_WORD1(kcacheMode1, kcacheAddr0, kcacheAddr1, count, altConst, inst) \
to_LE(kcacheMode1 | (kcacheAddr0 << 2) | (kcacheAddr1 << 10) | (count << 18) | (altConst << 25) | (inst << 26) | (1 << 31)) to_LE(kcacheMode1 | (kcacheAddr0 << 2) | (kcacheAddr1 << 10) | (count << 18) | (altConst << 25) | (inst << 26) | (1 << 31))
@ -39,26 +41,49 @@
#define CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, validPixelMode, inst) \ #define CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, validPixelMode, inst) \
to_LE(srcSelX | (srcSelY << 3) | (srcSelZ << 6) | (srcSelW << 9) | (validPixelMode << 22) | (inst << 23) | (1 << 31)) to_LE(srcSelX | (srcSelY << 3) | (srcSelZ << 6) | (srcSelW << 9) | (validPixelMode << 22) | (inst << 23) | (1 << 31))
#define NO_BARRIER & to_LE(~(1 << 31)) #define CF_ALLOC_EXPORT_WORD0(arrayBase, type, dstReg, dstRel, indexGpr, elemSize) \
#define END_OF_PROGRAM | to_LE(1 << 21) to_LE(arrayBase | (type << 13) | (dstReg << 15) | (dstRel << 22) | (indexGpr << 23) | (elemSize << 30))
#define VALID_PIX | to_LE(1 << 22)
#define WHOLE_QUAD_MODE | to_LE(1 << 30)
#define ALU_LAST to_LE(1 << 31) | #define CF_ALLOC_EXPORT_WORD1_BUF(arraySize, writeMask, inst) \
to_LE(arraySize | (writeMask << 12) | (inst << 23) | (1 << 31))
#define ALU_SRC_KCACHE0_BASE 0x80
#define ALU_SRC_KCACHE1_BASE 0xA0
#define CF_KCACHE_BANK_LOCK_1 0x1
#define CB1 0x1
#define CB2 0x2
#define _0_15 CF_KCACHE_BANK_LOCK_1
#define KC0(x) (x + ALU_SRC_KCACHE0_BASE)
#define KC1(x) (x + ALU_SRC_KCACHE1_BASE)
#define NO_BARRIER & ~to_QWORD(0,to_LE(1 << 31))
#define END_OF_PROGRAM | to_QWORD(0,to_LE(1 << 21))
#define VALID_PIX | to_QWORD(0,to_LE(1 << 22))
#define WHOLE_QUAD_MODE | to_QWORD(0,to_LE(1 << 30))
#define BURSTCNT(x) | to_QWORD(0,to_LE(x << 17))
#define WRITE(x) (x >> 2)
#define ARRAY_SIZE(x) x
#define ELEM_SIZE(x) x
#define KCACHE0(bank, mode) | to_QWORD(CF_ALU_WORD0(0, bank, 0, mode), 0)
#define KCACHE1(bank, mode) | to_QWORD(CF_ALU_WORD0(0, 0, bank, 0), CF_ALU_WORD1(mode,0, 0, 0, 0, 0))
#define ALU_LAST | to_QWORD(to_LE(1ull << 31), 0)
/* ALU */ /* ALU */
#define ALU_WORD0(src0Sel, src0Rel, src0Chan, src0Neg, src1Sel, src1Rel, src1Chan, src1Neg, indexMode, predSel) \ #define ALU_WORD0(src0Sel, src0Rel, src0Chan, src0Neg, src1Sel, src1Rel, src1Chan, src1Neg, indexMode, predSel) \
to_LE(src0Sel | (src0Rel << 9) | (src0Chan << 10) | (src0Neg << 12) | (src1Sel << 13) | (src1Rel << 22) \ to_LE(src0Sel | ((src0Rel) << 9) | ((src0Chan) << 10) | ((src0Neg) << 12) | ((src1Sel) << 13) | ((src1Rel) << 22) \
| (src1Chan << 23) | (src1Neg << 25) | (indexMode << 26) | (predSel << 29)) | ((src1Chan) << 23) | ((src1Neg) << 25) | ((indexMode) << 26) | ((predSel) << 29))
#define ALU_WORD1_OP2(src0Abs, src1Abs, updateExecuteMask, updatePred, writeMask, omod, inst, encoding, bankSwizzle, dstGpr, dstRel, dstChan, clamp) \ #define ALU_WORD1_OP2(src0Abs, src1Abs, updateExecuteMask, updatePred, writeMask, omod, inst, encoding, bankSwizzle, dstGpr, dstRel, dstChan, clamp) \
to_LE(src0Abs | (src1Abs << 1) | (updateExecuteMask << 2) | (updatePred << 3) | (writeMask << 4) | (omod << 5) | (inst << 7) | \ to_LE(src0Abs | (src1Abs << 1) | (updateExecuteMask << 2) | (updatePred << 3) | (writeMask << 4) | (omod << 5) | (inst << 7) | \
(encoding << 15) | (bankSwizzle << 18) | (dstGpr << 21) | (dstRel << 28) | (dstChan << 29) | (clamp << 31)) (encoding << 15) | (bankSwizzle << 18) | ((dstGpr&0x7F) << 21) | (dstRel << 28) | ((dstChan&0x3) << 29) | (clamp << 31))
#define ALU_WORD1_OP3(src2Sel, src2Rel, src2Chan, src2Neg, inst, encoding, bankSwizzle, dstGpr, dstRel, dstChan, clamp) \ #define ALU_WORD1_OP3(src2Sel, src2Rel, src2Chan, src2Neg, inst, encoding, bankSwizzle, dstGpr, dstRel, dstChan, clamp) \
to_LE(src2Sel | (src2Rel << 9) | (src2Chan << 10) | (src2Neg << 12) | (inst << 13) | \ to_LE(src2Sel | (src2Rel << 9) | (src2Chan << 10) | (src2Neg << 12) | (inst << 13) | \
(encoding << 15) | (bankSwizzle << 18) | (dstGpr << 21) | (dstRel << 28) | (dstChan << 29) | (clamp << 31) (encoding << 15) | (bankSwizzle << 18) | ((dstGpr&0x7F) << 21) | (dstRel << 28) | ((dstChan&0x3) << 29) | (clamp << 31)
/* TEX */ /* TEX */
#define TEX_WORD0(inst, bcFracMode, fetchWholeQuad, resourceID, srcReg, srcRel, altConst) \ #define TEX_WORD0(inst, bcFracMode, fetchWholeQuad, resourceID, srcReg, srcRel, altConst) \
@ -71,17 +96,28 @@
#define TEX_WORD2(offsetX, offsetY, offsetZ, samplerID, srcSelX, srcSelY, srcSelZ, srcSelW) \ #define TEX_WORD2(offsetX, offsetY, offsetZ, samplerID, srcSelX, srcSelY, srcSelZ, srcSelW) \
to_LE(offsetX | (offsetY << 5) | (offsetZ << 10) | (samplerID << 15) | (srcSelX << 20) | (srcSelY << 23) | (srcSelZ << 26) | (srcSelW << 29)) to_LE(offsetX | (offsetY << 5) | (offsetZ << 10) | (samplerID << 15) | (srcSelX << 20) | (srcSelY << 23) | (srcSelZ << 26) | (srcSelW << 29))
#define VTX_WORD0(inst, type, buffer_id, srcReg, srcSelX, mega) \
to_LE(inst | (type << 5) | (buffer_id << 8) | (srcReg << 16) | (srcSelX << 24) | (mega << 26))
#define _X 0 #define VTX_WORD1(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW) \
#define _Y 1 to_LE(dstReg | (dstSelX << 9) | (dstSelY << 12) | (dstSelZ << 15) | (dstSelW << 18) | (1 << 21))
#define _Z 2
#define _W 3 #define VTX_WORD2(offset, ismega) \
to_LE(offset| (ismega << 19))
#define _x 0
#define _y 1
#define _z 2
#define _w 3
#define _0 4 #define _0 4
#define _1 5 #define _1 5
#define _xyzw 0b1111
#define _xy__ 0b0011
#define GX2_COMP_SEL(c0, c1, c2, c3) (((c0) << 24) | ((c1) << 16) | ((c2) << 8) | (c3)) #define GX2_COMP_SEL(c0, c1, c2, c3) (((c0) << 24) | ((c1) << 16) | ((c2) << 8) | (c3))
#define ALU_LITERAL(v) to_LE(v) #define ALU_LITERAL(v) to_QWORD(to_LE(v), 0)
/* SRCx_SEL special constants */ /* SRCx_SEL special constants */
#define ALU_SRC_1_DBL_L 0xF4 #define ALU_SRC_1_DBL_L 0xF4
@ -97,6 +133,49 @@
#define ALU_SRC_PV 0xFE #define ALU_SRC_PV 0xFE
#define ALU_SRC_PS 0xFF #define ALU_SRC_PS 0xFF
#define _NEG | (1 << 12)
#define ALU_OMOD_OFF 0x0
#define ALU_OMOD_M2 0x1
#define ALU_OMOD_M4 0x2
#define ALU_OMOD_D2 0x3
#define ALU_VEC_012 0x0
#define ALU_VEC_021 0x1
#define ALU_VEC_120 0x2
#define ALU_VEC_102 0x3
#define ALU_VEC_201 0x4
#define ALU_VEC_210 0x5
#define VEC_012 | to_QWORD(0, to_LE(ALU_VEC_012 << 18))
#define VEC_021 | to_QWORD(0, to_LE(ALU_VEC_021 << 18))
#define VEC_120 | to_QWORD(0, to_LE(ALU_VEC_120 << 18))
#define VEC_102 | to_QWORD(0, to_LE(ALU_VEC_102 << 18))
#define VEC_201 | to_QWORD(0, to_LE(ALU_VEC_201 << 18))
#define VEC_210 | to_QWORD(0, to_LE(ALU_VEC_210 << 18))
#define VALID_PIX | to_QWORD(0,to_LE(1 << 22))
#define ALU_SCL_210 0x0
#define ALU_SCL_122 0x1
#define ALU_SCL_212 0x2
#define ALU_SCL_221 0x3
#define SCL_210 | to_QWORD(0, to_LE(ALU_SCL_210 << 18))
#define SCL_122 | to_QWORD(0, to_LE(ALU_SCL_122 << 18))
#define SCL_212 | to_QWORD(0, to_LE(ALU_SCL_212 << 18))
#define SCL_221 | to_QWORD(0, to_LE(ALU_SCL_221 << 18))
#define FETCH_TYPE(x) x
#define MINI(x) ((x) - 1)
#define MEGA(x) (MINI(x) | 0x80000000)
#define OFFSET(x) x
#define VERTEX_DATA 0
#define INSTANCE_DATA 1
#define NO_INDEX_OFFSET 2
/* CF defines */ /* CF defines */
#define CF_COND_ACTIVE 0x0 #define CF_COND_ACTIVE 0x0
#define CF_COND_FALSE 0x1 #define CF_COND_FALSE 0x1
@ -109,13 +188,18 @@
/* instructions */ /* instructions */
/* CF */ /* CF */
#define CF_INST_TEX 0x01 #define CF_INST_TEX 0x01
#define CF_INST_CALL_FS 0x13 #define CF_INST_VTX 0x02
#define CF_INST_ALU 0x08
#define CF_INST_CALL_FS 0x13
#define CF_INST_EMIT_VERTEX 0x15
#define CF_INST_MEM_RING 0x26
/* ALU */ /* ALU */
#define ALU_INST_ALU 0x8 #define OP2_INST_ADD 0x0
#define OP2_INST_MUL 0x1 #define OP2_INST_MUL 0x1
#define OP2_INST_MOV 0x19 #define OP2_INST_MUL_IEEE 0x2
#define OP2_INST_MOV 0x19
#define OP2_INST_RECIP_IEEE 0x66
/* EXP */ /* EXP */
#define CF_INST_EXP 0x27 #define CF_INST_EXP 0x27
#define CF_INST_EXP_DONE 0x28 #define CF_INST_EXP_DONE 0x28
@ -123,6 +207,9 @@
/* TEX */ /* TEX */
#define TEX_INST_SAMPLE 0x10 #define TEX_INST_SAMPLE 0x10
/* VTX */
#define VTX_INST_FETCH 0x0
/* EXPORT_TYPE */ /* EXPORT_TYPE */
#define EXPORT_TYPE_PIXEL 0x0 #define EXPORT_TYPE_PIXEL 0x0
#define EXPORT_TYPE_POS 0x1 #define EXPORT_TYPE_POS 0x1
@ -142,11 +229,34 @@
#define PIX0 PIX(0) #define PIX0 PIX(0)
/* registers */ /* registers */
#define __ (0x80) /* invalid regitser (write mask off) */
#define _R(x) x #define _R(x) x
#define _R0 _R(0x0) #define _R0 _R(0x0)
#define _R1 _R(0x1) #define _R1 _R(0x1)
#define _R2 _R(0x2) #define _R2 _R(0x2)
#define _R3 _R(0x3) #define _R3 _R(0x3)
#define _R4 _R(0x4)
#define _R5 _R(0x5)
#define _R6 _R(0x6)
#define _R7 _R(0x7)
#define _R8 _R(0x8)
#define _R9 _R(0x9)
#define _R10 _R(0xA)
#define _R11 _R(0xB)
#define _R12 _R(0xC)
#define _R13 _R(0xD)
#define _R14 _R(0xE)
#define _R15 _R(0xF)
#define _R120 _R(0x78)
#define _R121 _R(0x79)
#define _R122 _R(0x7A)
#define _R123 _R(0x7B)
#define _R124 _R(0x7C)
#define _R125 _R(0x7D)
#define _R126 _R(0x7E)
#define _R127 _R(0x7F)
/* texture */ /* texture */
#define _t(x) x #define _t(x) x
@ -156,28 +266,61 @@
#define _s(x) x #define _s(x) x
#define _s0 _s(0x0) #define _s0 _s(0x0)
#define CALL_FS CF_WORD0(0), CF_WORD1(0,0,0,0,0,CF_INST_CALL_FS) #define _b(x) x
#define TEX(addr, cnt) CF_WORD0(addr), CF_WORD1(0x0, 0x0, CF_COND_ACTIVE, 0x0, (cnt - 1), CF_INST_TEX)
#define ALU(addr, cnt) CF_ALU_WORD0(addr, 0x0, 0x0, 0x0), CF_ALU_WORD1(0x0, 0x0, 0x0, (cnt - 1), 0x0, ALU_INST_ALU) #define CALL_FS to_QWORD(CF_DWORD0(0), CF_DWORD1(0,0,0,0,0,CF_INST_CALL_FS))
#define EXP_DONE(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \ #define TEX(addr, cnt) to_QWORD(CF_DWORD0(addr), CF_DWORD1(0x0, 0x0, CF_COND_ACTIVE, (cnt - 1), 0x0, CF_INST_TEX))
CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP_DONE) #define VTX(addr, cnt) to_QWORD(CF_DWORD0(addr), CF_DWORD1(0x0, 0x0, CF_COND_ACTIVE, (cnt - 1), 0x0, CF_INST_VTX))
#define EXP(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \ #define ALU(addr, cnt) to_QWORD(CF_ALU_WORD0(addr, 0x0, 0x0, 0x0), CF_ALU_WORD1(0x0, 0x0, 0x0, (cnt - 1), 0x0, CF_INST_ALU))
CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP)
#define ALU_MOV(dstGpr, dstChan, src0Sel, src0Chan) ALU_WORD0(src0Sel, 0x0, src0Chan, 0x0, ALU_SRC_0, 0x0, 0x0, 0x0, 0x0, 0x0), \ #define EXP_DONE(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) to_QWORD(CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \
ALU_WORD1_OP2(0x0, 0x0, 0x0, 0x0, 0x1, 0x0, OP2_INST_MOV, 0x0, 0x0, dstGpr, 0x0, dstChan, 0x0) CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP_DONE))
#define ALU_MUL(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) ALU_WORD0(src0Sel, 0x0, src0Chan, 0x0, src1Sel, 0x0, src1Chan, 0x0, 0x0, 0x0), \ #define EXP(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) to_QWORD(CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \
ALU_WORD1_OP2(0x0, 0x0, 0x0, 0x0, 0x1, 0x0, OP2_INST_MUL, 0x0, 0x0, dstGpr, 0x0, dstChan, 0x0) CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP))
#define MEM_RING(arrayBase, dstReg, writeMask, arraySize, elemSize) \
to_QWORD(CF_ALLOC_EXPORT_WORD0(arrayBase, 0x00, dstReg, 0x00, 0x00, elemSize), \
CF_ALLOC_EXPORT_WORD1_BUF(arraySize, writeMask, CF_INST_MEM_RING))
#define EMIT_VERTEX to_QWORD(0, CF_DWORD1(0, 0, 0, 0, 0, CF_INST_EMIT_VERTEX))
#define ALU_OP2(inst, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, omod) \
to_QWORD(ALU_WORD0(src0Sel, 0x0, src0Chan, 0x0, src1Sel, 0x0, src1Chan, 0x0, 0x0, 0x0), \
ALU_WORD1_OP2(0x0, 0x0, 0x0, 0x0, (((dstGpr&__) >> 7) ^ 0x1), omod, inst, 0x0, 0x0, dstGpr, 0x0, dstChan, 0x0))
#define ALU_MOV(dstGpr, dstChan, src0Sel, src0Chan) \
ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF)
#define ALU_MOV_x2(dstGpr, dstChan, src0Sel, src0Chan) \
ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_M2)
#define ALU_MUL(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
ALU_OP2(OP2_INST_MUL, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
#define ALU_MUL_IEEE(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
ALU_OP2(OP2_INST_MUL_IEEE, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
#define ALU_ADD(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
ALU_OP2(OP2_INST_ADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
#define ALU_ADD_x2(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
ALU_OP2(OP2_INST_ADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_M2)
#define ALU_RECIP_IEEE(dstGpr, dstChan, src0Sel, src0Chan) \
ALU_OP2(OP2_INST_RECIP_IEEE, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF)
#define TEX_SAMPLE(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW, resourceID, samplerID)\ #define TEX_SAMPLE(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW, resourceID, samplerID)\
TEX_WORD0(TEX_INST_SAMPLE, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \ to_QWORD(TEX_WORD0(TEX_INST_SAMPLE, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \
TEX_WORD1(dstReg, 0x0, dstSelX, dstSelY, dstSelZ, dstSelW, 0x0, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED), \ TEX_WORD1(dstReg, 0x0, dstSelX, dstSelY, dstSelZ, dstSelW, 0x0, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED)), \
TEX_WORD2(0x0, 0x0, 0x0, samplerID, _X, _Y, _0, _X) to_QWORD(TEX_WORD2(0x0, 0x0, 0x0, samplerID, _x, _y, _0, _x), 0x00000000)
#define VTX_FETCH(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, buffer_id, type, mega, offset) \
to_QWORD(VTX_WORD0(VTX_INST_FETCH, type, buffer_id, srcReg, srcSelX, mega), VTX_WORD1(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW)) , \
to_QWORD(VTX_WORD2(offset, (mega >> 31)), 0x00000000)
#define _x2(v) v, v #define _x2(v) v, v
#define _x4(v) _x2(v), _x2(v) #define _x4(v) _x2(v), _x2(v)

View File

@ -3,6 +3,7 @@
#define GX2_SCAN_BUFFER_ALIGNMENT 0x1000 #define GX2_SCAN_BUFFER_ALIGNMENT 0x1000
#define GX2_SHADER_ALIGNMENT 0x100 #define GX2_SHADER_ALIGNMENT 0x100
#define GX2_CONTEXT_STATE_ALIGNMENT 0x100 #define GX2_CONTEXT_STATE_ALIGNMENT 0x100
#define GX2_UNIFORM_BLOCK_ALIGNMENT 0x100
#define GX2_DISPLAY_LIST_ALIGNMENT 0x20 #define GX2_DISPLAY_LIST_ALIGNMENT 0x20
#define GX2_VERTEX_BUFFER_ALIGNMENT 0x40 #define GX2_VERTEX_BUFFER_ALIGNMENT 0x40
#define GX2_INDEX_BUFFER_ALIGNMENT 0x20 #define GX2_INDEX_BUFFER_ALIGNMENT 0x20

View File

@ -175,18 +175,19 @@ typedef enum GX2IndexType
typedef enum GX2InvalidateMode typedef enum GX2InvalidateMode
{ {
GX2_INVALIDATE_MODE_ATTRIBUTE_BUFFER = 1 << 0, GX2_INVALIDATE_MODE_ATTRIBUTE_BUFFER = 1 << 0,
GX2_INVALIDATE_MODE_TEXTURE = 1 << 1, GX2_INVALIDATE_MODE_TEXTURE = 1 << 1,
GX2_INVALIDATE_MODE_UNIFORM_BLOCK = 1 << 2, GX2_INVALIDATE_MODE_UNIFORM_BLOCK = 1 << 2,
GX2_INVALIDATE_MODE_SHADER = 1 << 3, GX2_INVALIDATE_MODE_SHADER = 1 << 3,
GX2_INVALIDATE_MODE_COLOR_BUFFER = 1 << 4, GX2_INVALIDATE_MODE_COLOR_BUFFER = 1 << 4,
GX2_INVALIDATE_MODE_DEPTH_BUFFER = 1 << 5, GX2_INVALIDATE_MODE_DEPTH_BUFFER = 1 << 5,
GX2_INVALIDATE_MODE_CPU = 1 << 6, GX2_INVALIDATE_MODE_CPU = 1 << 6,
GX2_INVALIDATE_MODE_STREAM_OUT_BUFFER = 1 << 7, GX2_INVALIDATE_MODE_STREAM_OUT_BUFFER = 1 << 7,
GX2_INVALIDATE_MODE_EXPORT_BUFFER = 1 << 8, GX2_INVALIDATE_MODE_EXPORT_BUFFER = 1 << 8,
GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER= GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_ATTRIBUTE_BUFFER, GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_ATTRIBUTE_BUFFER,
GX2_INVALIDATE_MODE_CPU_TEXTURE = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_TEXTURE, GX2_INVALIDATE_MODE_CPU_TEXTURE = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_TEXTURE,
GX2_INVALIDATE_MODE_CPU_SHADER = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_SHADER, GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_UNIFORM_BLOCK,
GX2_INVALIDATE_MODE_CPU_SHADER = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_SHADER,
} GX2InvalidateMode; } GX2InvalidateMode;
typedef enum GX2InitAttributes typedef enum GX2InitAttributes
@ -220,6 +221,7 @@ typedef enum GX2LogicOp
typedef enum GX2PrimitiveMode typedef enum GX2PrimitiveMode
{ {
GX2_PRIMITIVE_MODE_POINTS = 1,
GX2_PRIMITIVE_MODE_LINES = 2, GX2_PRIMITIVE_MODE_LINES = 2,
GX2_PRIMITIVE_MODE_LINE_STRIP = 3, GX2_PRIMITIVE_MODE_LINE_STRIP = 3,
GX2_PRIMITIVE_MODE_TRIANGLES = 4, GX2_PRIMITIVE_MODE_TRIANGLES = 4,

View File

@ -69,93 +69,102 @@ typedef struct GX2AttribVar
typedef struct GX2VertexShader typedef struct GX2VertexShader
{ {
struct union
{ {
struct struct
{ {
unsigned :2; struct
bool prime_cache_on_const :1; {
bool prime_cache_enable :1; unsigned : 2;
bool uncached_first_inst :1; bool prime_cache_on_const : 1;
unsigned fetch_cache_lines :3; bool prime_cache_enable : 1;
bool prime_cache_on_draw :1; bool uncached_first_inst : 1;
bool prime_cache_pgm_en :1; unsigned fetch_cache_lines : 3;
bool dx10_clamp :1; bool prime_cache_on_draw : 1;
unsigned :5; bool prime_cache_pgm_en : 1;
unsigned stack_size :8; bool dx10_clamp : 1;
unsigned num_gprs :8; unsigned : 5;
}sq_pgm_resources_vs; unsigned stack_size : 8;
unsigned num_gprs : 8;
} sq_pgm_resources_vs;
bool vgt_primitiveid_en; struct
{
unsigned : 31;
unsigned enable: 1;
} vgt_primitiveid_en;
struct
{
unsigned :18;
unsigned vs_out_fog_vec_addr : 5;
bool vs_exports_fog : 1;
unsigned :2;
unsigned vs_export_count :5;
bool vs_per_component : 1;
}spi_vs_out_config;
uint32_t num_spi_vs_out_id; struct
struct {
{ unsigned : 18;
uint8_t semantic_3; unsigned vs_out_fog_vec_addr : 5;
uint8_t semantic_2; bool vs_exports_fog : 1;
uint8_t semantic_1; unsigned : 2;
uint8_t semantic_0; unsigned vs_export_count : 5;
}spi_vs_out_id[10]; bool vs_per_component : 1;
struct } spi_vs_out_config;
{
bool clip_dist_ena_7 :1; uint32_t num_spi_vs_out_id;
bool clip_dist_ena_6 :1; struct
bool clip_dist_ena_5 :1; {
bool clip_dist_ena_4 :1; uint8_t semantic_3;
bool clip_dist_ena_3 :1; uint8_t semantic_2;
bool clip_dist_ena_2 :1; uint8_t semantic_1;
bool clip_dist_ena_1 :1; uint8_t semantic_0;
bool clip_dist_ena_0 :1; } spi_vs_out_id[10];
bool cull_dist_ena_7 :1; struct
bool cull_dist_ena_6 :1; {
bool cull_dist_ena_5 :1; bool clip_dist_ena_7 : 1;
bool cull_dist_ena_0 :1; bool clip_dist_ena_6 : 1;
bool cull_dist_ena_4 :1; bool clip_dist_ena_5 : 1;
bool cull_dist_ena_3 :1; bool clip_dist_ena_4 : 1;
bool cull_dist_ena_2 :1; bool clip_dist_ena_3 : 1;
bool cull_dist_ena_1 :1; bool clip_dist_ena_2 : 1;
bool vs_out_misc_side_bus_ena :1; bool clip_dist_ena_1 : 1;
bool vs_out_ccdist1_vec_ena :1; bool clip_dist_ena_0 : 1;
bool vs_out_ccdist0_vec_ena :1; bool cull_dist_ena_7 : 1;
bool vs_out_misc_vec_ena :1; bool cull_dist_ena_6 : 1;
bool use_vtx_kill_flag :1; bool cull_dist_ena_5 : 1;
bool use_vtx_viewport_indx :1; bool cull_dist_ena_0 : 1;
bool use_vtx_render_target_indx :1; bool cull_dist_ena_4 : 1;
bool use_vtx_edge_flag :1; bool cull_dist_ena_3 : 1;
unsigned :6; bool cull_dist_ena_2 : 1;
bool use_vtx_point_size :1; bool cull_dist_ena_1 : 1;
bool use_vtx_gs_cut_flag :1; bool vs_out_misc_side_bus_ena : 1;
}pa_cl_vs_out_cntl; bool vs_out_ccdist1_vec_ena : 1;
uint32_t sq_vtx_semantic_clear; bool vs_out_ccdist0_vec_ena : 1;
uint32_t num_sq_vtx_semantic; bool vs_out_misc_vec_ena : 1;
uint32_t sq_vtx_semantic[32]; /* 8 bit */ bool use_vtx_kill_flag : 1;
struct bool use_vtx_viewport_indx : 1;
{ bool use_vtx_render_target_indx : 1;
bool buffer_3_en :1; bool use_vtx_edge_flag : 1;
bool buffer_2_en :1; unsigned : 6;
bool buffer_1_en :1; bool use_vtx_point_size : 1;
bool buffer_0_en :1; bool use_vtx_gs_cut_flag : 1;
}vgt_strmout_buffer_en; } pa_cl_vs_out_cntl;
struct uint32_t sq_vtx_semantic_clear;
{ uint32_t num_sq_vtx_semantic;
unsigned :24; uint32_t sq_vtx_semantic[32]; /* 8 bit */
unsigned vtx_reuse_depth :8; struct
}vgt_vertex_reuse_block_cntl; {
struct bool buffer_3_en : 1;
{ bool buffer_2_en : 1;
unsigned :24; bool buffer_1_en : 1;
unsigned reuse_depth :8; bool buffer_0_en : 1;
}vgt_hos_reuse_depth; } vgt_strmout_buffer_en;
struct
{
unsigned : 24;
unsigned vtx_reuse_depth : 8;
} vgt_vertex_reuse_block_cntl;
struct
{
unsigned : 24;
unsigned reuse_depth : 8;
} vgt_hos_reuse_depth;
};
u32 vals[52];
} regs; } regs;
uint32_t size; uint32_t size;
@ -180,7 +189,7 @@ typedef struct GX2VertexShader
uint32_t attribVarCount; uint32_t attribVarCount;
GX2AttribVar *attribVars; GX2AttribVar *attribVars;
uint32_t ringItemsize; uint32_t ringItemSize;
BOOL hasStreamOut; BOOL hasStreamOut;
uint32_t streamOutStride[4]; uint32_t streamOutStride[4];
@ -188,129 +197,136 @@ typedef struct GX2VertexShader
GX2RBuffer gx2rBuffer; GX2RBuffer gx2rBuffer;
} GX2VertexShader; } GX2VertexShader;
typedef enum { typedef enum
{
spi_baryc_cntl_centroids_only = 0, spi_baryc_cntl_centroids_only = 0,
spi_baryc_cntl_centers_only = 1, spi_baryc_cntl_centers_only = 1,
spi_baryc_cntl_centroids_and_centers = 2, spi_baryc_cntl_centroids_and_centers = 2,
}spi_baryc_cntl; } spi_baryc_cntl;
typedef enum { typedef enum
{
db_z_order_late_z = 0, db_z_order_late_z = 0,
db_z_order_early_z_then_late_z = 1, db_z_order_early_z_then_late_z = 1,
db_z_order_re_z = 2, db_z_order_re_z = 2,
db_z_order_early_z_then_re_z = 3, db_z_order_early_z_then_re_z = 3,
}db_z_order; } db_z_order;
typedef struct GX2PixelShader typedef struct GX2PixelShader
{ {
struct union
{ {
struct struct
{ {
unsigned :2; struct
bool prime_cache_on_const :1; {
bool prime_cache_enable :1; unsigned : 2;
bool uncached_first_inst :1; bool prime_cache_on_const : 1;
unsigned fetch_cache_lines :3; bool prime_cache_enable : 1;
bool prime_cache_on_draw :1; bool uncached_first_inst : 1;
bool prime_cache_pgm_en :1; unsigned fetch_cache_lines : 3;
bool dx10_clamp :1; bool prime_cache_on_draw : 1;
unsigned :5; bool prime_cache_pgm_en : 1;
unsigned stack_size :8; bool dx10_clamp : 1;
unsigned num_gprs :8; unsigned : 5;
}sq_pgm_resources_ps; unsigned stack_size : 8;
unsigned num_gprs : 8;
} sq_pgm_resources_ps;
struct struct
{ {
unsigned :24; unsigned : 27;
unsigned export_mode :5; unsigned export_mode : 5;
}sq_pgm_exports_ps; } sq_pgm_exports_ps;
struct struct
{ {
bool baryc_at_sample_ena :1; bool baryc_at_sample_ena : 1;
bool position_sample :1; bool position_sample : 1;
bool linear_gradient_ena :1; bool linear_gradient_ena : 1;
bool persp_gradient_ena :1; bool persp_gradient_ena : 1;
spi_baryc_cntl baryc_sample_cntl :2; spi_baryc_cntl baryc_sample_cntl : 2;
unsigned param_gen_addr : 7; unsigned param_gen_addr : 7;
unsigned param_gen :4; unsigned param_gen : 4;
unsigned position_addr :5; unsigned position_addr : 5;
bool position_centroid :1; bool position_centroid : 1;
bool position_ena :1; bool position_ena : 1;
unsigned :2; unsigned : 2;
unsigned num_interp :6; unsigned num_interp : 6;
}spi_ps_in_control_0; } spi_ps_in_control_0;
struct struct
{ {
unsigned :1; unsigned : 1;
bool position_ulc :1; bool position_ulc : 1;
unsigned fixed_pt_position_addr :5; unsigned fixed_pt_position_addr : 5;
bool fixed_pt_position_ena :1; bool fixed_pt_position_ena : 1;
unsigned fog_addr :7; unsigned fog_addr : 7;
unsigned front_face_addr :5; unsigned front_face_addr : 5;
bool front_face_all_bits :1; bool front_face_all_bits : 1;
unsigned front_face_chan :2; unsigned front_face_chan : 2;
bool front_face_ena :1; bool front_face_ena : 1;
unsigned gen_index_pix_addr :7; unsigned gen_index_pix_addr : 7;
bool gen_index_pix :1; bool gen_index_pix : 1;
}spi_ps_in_control_1; } spi_ps_in_control_1;
uint32_t num_spi_ps_input_cntl; uint32_t num_spi_ps_input_cntl;
struct struct
{ {
unsigned :13; unsigned : 13;
bool sel_sample :1; bool sel_sample : 1;
bool pt_sprite_tex :1; bool pt_sprite_tex : 1;
unsigned cyl_wrap :4; unsigned cyl_wrap : 4;
bool sel_linear :1; bool sel_linear : 1;
bool sel_centroid :1; bool sel_centroid : 1;
bool flat_shade :1; bool flat_shade : 1;
unsigned default_val :2; unsigned default_val : 2;
unsigned semantic :8; unsigned semantic : 8;
}spi_ps_input_cntls[32]; } spi_ps_input_cntls[32];
struct struct
{ {
unsigned output7_enable :4; unsigned output7_enable : 4;
unsigned output6_enable :4; unsigned output6_enable : 4;
unsigned output5_enable :4; unsigned output5_enable : 4;
unsigned output4_enable :4; unsigned output4_enable : 4;
unsigned output3_enable :4; unsigned output3_enable : 4;
unsigned output2_enable :4; unsigned output2_enable : 4;
unsigned output1_enable :4; unsigned output1_enable : 4;
unsigned output0_enable :4; unsigned output0_enable : 4;
}cb_shader_mask; } cb_shader_mask;
struct { struct
unsigned :24; {
bool rt7_enable :1; unsigned : 24;
bool rt6_enable :1; bool rt7_enable : 1;
bool rt5_enable :1; bool rt6_enable : 1;
bool rt4_enable :1; bool rt5_enable : 1;
bool rt3_enable :1; bool rt4_enable : 1;
bool rt2_enable :1; bool rt3_enable : 1;
bool rt1_enable :1; bool rt2_enable : 1;
bool rt0_enable :1; bool rt1_enable : 1;
}cb_shader_control; bool rt0_enable : 1;
struct } cb_shader_control;
{ struct
unsigned :19; {
bool alpha_to_mask_disable :1; unsigned : 19;
bool exec_on_noop :1; bool alpha_to_mask_disable : 1;
bool exec_on_hier_fail :1; bool exec_on_noop : 1;
bool dual_export_enable :1; bool exec_on_hier_fail : 1;
bool mask_export_enable :1; bool dual_export_enable : 1;
bool coverage_to_mask_enable :1; bool mask_export_enable : 1;
bool kill_enable :1; bool coverage_to_mask_enable : 1;
db_z_order z_order :2; bool kill_enable : 1;
unsigned :2; db_z_order z_order : 2;
bool z_export_enable :1; unsigned : 2;
bool stencil_ref_export_enable :1; bool z_export_enable : 1;
} db_shader_control; bool stencil_ref_export_enable : 1;
} db_shader_control;
bool spi_input_z; bool spi_input_z;
};
u32 vals[41];
} regs; } regs;
uint32_t size; uint32_t size;
@ -335,26 +351,148 @@ typedef struct GX2PixelShader
GX2RBuffer gx2rBuffer; GX2RBuffer gx2rBuffer;
} GX2PixelShader; } GX2PixelShader;
typedef enum
{
VGT_GS_OUT_PRIMITIVE_TYPE_POINTLIST = 0,
VGT_GS_OUT_PRIMITIVE_TYPE_LINESTRIP = 1,
VGT_GS_OUT_PRIMITIVE_TYPE_TRISTRIP = 2,
VGT_GS_OUT_PRIMITIVE_TYPE_MAX_ENUM = 0xFFFFFFFF
} vgt_gs_out_primitive_type;
typedef enum
{
VGT_GS_ENABLE_MODE_OFF = 0,
VGT_GS_ENABLE_MODE_SCENARIO_A = 1,
VGT_GS_ENABLE_MODE_SCENARIO_B = 2,
VGT_GS_ENABLE_MODE_SCENARIO_G = 3,
} vgt_gs_enable_mode;
typedef enum
{
VGT_GS_CUT_MODE_1024 = 0,
VGT_GS_CUT_MODE_512 = 1,
VGT_GS_CUT_MODE_256 = 2,
VGT_GS_CUT_MODE_128 = 3,
} vgt_gs_cut_mode;
typedef struct GX2GeometryShader typedef struct GX2GeometryShader
{ {
struct union
{ {
uint32_t sq_pgm_resources_gs; struct
uint32_t vgt_gs_out_prim_type; {
uint32_t vgt_gs_mode; struct
uint32_t pa_cl_vs_out_cntl; {
uint32_t sq_pgm_resources_vs; unsigned : 2;
uint32_t sq_gs_vert_itemsize; bool prime_cache_on_const : 1;
uint32_t spi_vs_out_config; bool prime_cache_enable : 1;
uint32_t num_spi_vs_out_id; bool uncached_first_inst : 1;
uint32_t spi_vs_out_id[10]; unsigned fetch_cache_lines : 3;
uint32_t vgt_strmout_buffer_en; bool prime_cache_on_draw : 1;
} regs; bool prime_cache_pgm_en : 1;
bool dx10_clamp : 1;
unsigned : 5;
unsigned stack_size : 8;
unsigned num_gprs : 8;
} sq_pgm_resources_gs;
vgt_gs_out_primitive_type vgt_gs_out_prim_type;
struct
{
unsigned : 14;
bool partial_thd_at_eoi : 1;
bool element_info_en : 1;
bool fast_compute_mode : 1;
bool compute_mode : 1;
unsigned : 2;
bool gs_c_pack_en : 1;
unsigned : 2;
bool mode_hi : 1;
unsigned : 3;
vgt_gs_cut_mode cut_mode : 2;
bool es_passthru : 1;
vgt_gs_enable_mode mode : 2;
} vgt_gs_mode;
struct
{
bool clip_dist_ena_7 : 1;
bool clip_dist_ena_6 : 1;
bool clip_dist_ena_5 : 1;
bool clip_dist_ena_4 : 1;
bool clip_dist_ena_3 : 1;
bool clip_dist_ena_2 : 1;
bool clip_dist_ena_1 : 1;
bool clip_dist_ena_0 : 1;
bool cull_dist_ena_7 : 1;
bool cull_dist_ena_6 : 1;
bool cull_dist_ena_5 : 1;
bool cull_dist_ena_0 : 1;
bool cull_dist_ena_4 : 1;
bool cull_dist_ena_3 : 1;
bool cull_dist_ena_2 : 1;
bool cull_dist_ena_1 : 1;
bool vs_out_misc_side_bus_ena : 1;
bool vs_out_ccdist1_vec_ena : 1;
bool vs_out_ccdist0_vec_ena : 1;
bool vs_out_misc_vec_ena : 1;
bool use_vtx_kill_flag : 1;
bool use_vtx_viewport_indx : 1;
bool use_vtx_render_target_indx : 1;
bool use_vtx_edge_flag : 1;
unsigned : 6;
bool use_vtx_point_size : 1;
bool use_vtx_gs_cut_flag : 1;
} pa_cl_vs_out_cntl;
struct
{
unsigned : 2;
bool prime_cache_on_const : 1;
bool prime_cache_enable : 1;
bool uncached_first_inst : 1;
unsigned fetch_cache_lines : 3;
bool prime_cache_on_draw : 1;
bool prime_cache_pgm_en : 1;
bool dx10_clamp : 1;
unsigned : 5;
unsigned stack_size : 8;
unsigned num_gprs : 8;
} sq_pgm_resources_vs;
uint32_t sq_gs_vert_itemsize; /* 15-bit */
struct
{
unsigned : 18;
unsigned vs_out_fog_vec_addr : 5;
bool vs_exports_fog : 1;
unsigned : 2;
unsigned vs_export_count : 5;
bool vs_per_component : 1;
} spi_vs_out_config;
uint32_t num_spi_vs_out_id;
struct
{
uint8_t semantic_3;
uint8_t semantic_2;
uint8_t semantic_1;
uint8_t semantic_0;
} spi_vs_out_id[10];
struct
{
bool buffer_3_en : 1;
bool buffer_2_en : 1;
bool buffer_1_en : 1;
bool buffer_0_en : 1;
} vgt_strmout_buffer_en;
};
u32 vals[19];
} regs;
uint32_t size; uint32_t size;
uint8_t *program; uint8_t *program;
uint32_t vertexProgramSize; uint32_t copyProgramSize;
uint8_t *vertexProgram; uint8_t *copyProgram;
GX2ShaderMode mode; GX2ShaderMode mode;
uint32_t uniformBlockCount; uint32_t uniformBlockCount;
@ -419,6 +557,14 @@ void GX2SetShaderModeEx(GX2ShaderMode mode,
uint32_t numGsGpr, uint32_t numGsStackEntries, uint32_t numGsGpr, uint32_t numGsStackEntries,
uint32_t numPsGpr, uint32_t numPsStackEntries); uint32_t numPsGpr, uint32_t numPsStackEntries);
static inline void GX2SetShaderMode(GX2ShaderMode mode)
{
if (mode == GX2_SHADER_MODE_GEOMETRY_SHADER)
GX2SetShaderModeEx(mode, 44, 32, 64, 48, 76, 176);
else
GX2SetShaderModeEx(mode, 48, 64, 0, 0, 200, 192);
}
void GX2SetStreamOutEnable(BOOL enable); void GX2SetStreamOutEnable(BOOL enable);
void GX2SetGeometryShaderInputRingBuffer(void *buffer, uint32_t size); void GX2SetGeometryShaderInputRingBuffer(void *buffer, uint32_t size);
void GX2SetGeometryShaderOutputRingBuffer(void *buffer, uint32_t size); void GX2SetGeometryShaderOutputRingBuffer(void *buffer, uint32_t size);

View File

@ -37,3 +37,5 @@ typedef double f64;
typedef volatile float vf32; typedef volatile float vf32;
typedef volatile double vf64; typedef volatile double vf64;
#define countof(array) (sizeof(array) / sizeof(*array))

14
wiiu/run.sh Normal file
View File

@ -0,0 +1,14 @@
#!/bin/sh
if [ -z $1 ] ; then
echo
echo "usage: $0 <rpx>"
echo
exit 0
fi
wiiload $1
echo ===== START: `date` =====
netcat -p 4405 -l
echo ===== END: `date` =====

151
wiiu/shader_utils.c Normal file
View File

@ -0,0 +1,151 @@
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <wiiu/gx2.h>
#include <wiiu/system/memory.h>
#include <wiiu/shader_utils.h>
#include <wiiu/wiiu_dbg.h>
/* this is a hack for elf builds since their data section is below 0x10000000
* and thus can't be accessed by the GX2 hardware */
#ifndef GX2_CAN_ACCESS_DATA_SECTION
typedef struct
{
void* vs_program;
void* ps_program;
void* gs_program;
void* gs_copy_program;
}org_programs_t;
#endif
void GX2InitShader(GX2Shader* shader)
{
if (shader->fs.program)
return;
shader->fs.size = GX2CalcFetchShaderSizeEx(shader->vs.attribVarCount,
GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE);
#ifdef GX2_CAN_ACCESS_DATA_SECTION
shader->fs.program = MEM2_alloc(shader->fs.size, GX2_SHADER_ALIGNMENT);
#else
shader->fs.program = MEM2_alloc(shader->fs.size + sizeof(org_programs_t), GX2_SHADER_ALIGNMENT);
#endif
GX2InitFetchShaderEx(&shader->fs, (uint8_t*)shader->fs.program,
shader->vs.attribVarCount,
shader->attribute_stream,
GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->fs.program, shader->fs.size);
#ifndef GX2_CAN_ACCESS_DATA_SECTION
org_programs_t* org = (org_programs_t*)(shader->fs.program + shader->fs.size);
org->vs_program = shader->vs.program;
org->ps_program = shader->ps.program;
org->gs_program = shader->gs.program;
org->gs_copy_program = shader->gs.copyProgram;
shader->vs.program = MEM2_alloc(shader->vs.size, GX2_SHADER_ALIGNMENT);
memcpy(shader->vs.program, org->vs_program, shader->vs.size);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->vs.program, shader->vs.size);
shader->ps.program = MEM2_alloc(shader->ps.size, GX2_SHADER_ALIGNMENT);
memcpy(shader->ps.program, org->ps_program, shader->ps.size);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->ps.program, shader->ps.size);
if(org->gs_program)
{
shader->gs.program = MEM2_alloc(shader->gs.size, GX2_SHADER_ALIGNMENT);
memcpy(shader->gs.program, org->gs_program, shader->gs.size);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->gs.program, shader->gs.size);
shader->gs.copyProgram = MEM2_alloc(shader->gs.copyProgramSize, GX2_SHADER_ALIGNMENT);
memcpy(shader->gs.copyProgram, org->gs_copy_program, shader->gs.copyProgramSize);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->gs.copyProgram, shader->gs.copyProgramSize);
}
#endif
}
void GX2DestroyShader(GX2Shader* shader)
{
#ifndef GX2_CAN_ACCESS_DATA_SECTION
MEM2_free(shader->vs.program);
MEM2_free(shader->ps.program);
MEM2_free(shader->gs.program);
MEM2_free(shader->gs.copyProgram);
org_programs_t* org = (org_programs_t*)(shader->fs.program + shader->fs.size);
shader->vs.program = org->vs_program;
shader->ps.program = org->ps_program;
shader->gs.program = org->gs_program;
shader->gs.copyProgram = org->gs_copy_program;
#endif
MEM2_free(shader->fs.program);
shader->fs.program = NULL;
}
void GX2SetShader(GX2Shader* shader)
{
GX2SetVertexShader(&shader->vs);
GX2SetPixelShader(&shader->ps);
GX2SetFetchShader(&shader->fs);
if(shader->gs.program)
GX2SetGeometryShader(&shader->gs);
}
void check_shader_verbose(u32* shader, u32 shader_size, u32* org, u32 org_size, const char* name)
{
printf("%s :\n", name);
DEBUG_VAR(shader_size);
DEBUG_VAR(org_size);
if(shader_size != org_size)
printf("size mismatch : 0x%08X should be 0x%08X\n", shader_size, org_size);
for(int i = 0; i < shader_size / 4; i+=4)
{
printf("0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\n",
shader[i], shader[i+1], shader[i+2], shader[i+3],
org[i], org[i+1], org[i+2], org[i+3]);
}
for(int i = 0; i < shader_size / 4; i++)
{
if (shader[i] != org[i])
{
printf("%i(%X): 0x%08X(0x%08X) should be 0x%08X(0x%08X) \n", i, i, shader[i], __builtin_bswap32(shader[i]) , org[i], __builtin_bswap32(org[i]));
}
}
}
void check_shader(const void* shader_, u32 shader_size, const void* org_, u32 org_size, const char* name)
{
u32* shader = (u32*)shader_;
u32* org = (u32*)org_;
bool different = false;
printf("%-20s : ", name);
if(shader_size != org_size)
{
different = true;
printf("\nsize mismatch : 0x%08X should be 0x%08X", shader_size, org_size);
}
for(int i = 0; i < shader_size / 4; i++)
{
if (shader[i] != org[i])
{
different = true;
printf("\n%i(%X): 0x%08X(0x%08X) should be 0x%08X(0x%08X)", i, i, shader[i], __builtin_bswap32(shader[i]) , org[i], __builtin_bswap32(org[i]));
}
}
if(!different)
printf("no errors");
printf("\n");
}

67
wiiu/shader_utils.h Normal file
View File

@ -0,0 +1,67 @@
#pragma once
#include <wiiu/gx2/shaders.h>
/* incompatible with elf builds */
//#define GX2_CAN_ACCESS_DATA_SECTION
#ifdef __cplusplus
extern "C" {
#endif
typedef union
__attribute__((aligned (16)))
{
struct __attribute__((scalar_storage_order ("little-endian")))
{
float x;
float y;
};
struct __attribute__((scalar_storage_order ("little-endian")))
{
float width;
float height;
};
}GX2_vec2;
typedef struct
__attribute__((aligned (16)))
__attribute__((scalar_storage_order ("little-endian")))
{
float x;
float y;
union
{
struct __attribute__((scalar_storage_order ("little-endian")))
{
float z;
float w;
};
struct __attribute__((scalar_storage_order ("little-endian")))
{
float width;
float height;
};
};
}GX2_vec4;
typedef struct
{
GX2VertexShader vs;
GX2PixelShader ps;
GX2GeometryShader gs;
GX2FetchShader fs;
GX2AttribStream* attribute_stream;
}GX2Shader;
void GX2InitShader(GX2Shader* shader);
void GX2DestroyShader(GX2Shader* shader);
void GX2SetShader(GX2Shader* shader);
void check_shader(const void* shader_, u32 shader_size, const void* org_, u32 org_size, const char* name);
void check_shader_verbose(u32* shader, u32 shader_size, u32* org, u32 org_size, const char* name);
#ifdef __cplusplus
}
#endif

319
wiiu/sprite_shader.c Normal file
View File

@ -0,0 +1,319 @@
/* RetroArch - A frontend for libretro.
* Copyright (C) 2014-2016 - Ali Bouhlel
*
* RetroArch is free software: you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with RetroArch.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include <stddef.h>
#include <malloc.h>
#include <string.h>
#include <wiiu/gx2/common.h>
#include "sprite_shader.h"
#include "gx2_shader_inl.h"
__attribute__((aligned(GX2_SHADER_ALIGNMENT)))
static struct
{
u64 cf[32];
u64 alu[26];
} vs_program =
{
{
CALL_FS NO_BARRIER,
ALU(32, 26) KCACHE0(CB1, _0_15) KCACHE1(CB2, _0_15),
MEM_RING(WRITE( 0), _R1, _xyzw, ARRAY_SIZE(1), ELEM_SIZE(3)) BURSTCNT(1),
MEM_RING(WRITE(32), _R0, _xyzw, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER
END_OF_PROGRAM
},
{
ALU_MOV_x2(_R127,_x, _R3,_y), //@64
ALU_MOV_x2(__,_y, _R3,_x),
ALU_MOV_x2(_R127,_z, _R3,_w),
ALU_MOV_x2(__,_w, _R3,_z), //@70
ALU_RECIP_IEEE(__,__, KC0(0), _x) SCL_210
ALU_LAST,
ALU_MUL_IEEE(_R0,_z, ALU_SRC_PV, _w, ALU_SRC_PS, _x),
ALU_MUL_IEEE(__,_w, ALU_SRC_PV,_y, ALU_SRC_PS,_x),
ALU_RECIP_IEEE(__,_z, KC0(0),_y) SCL_210
ALU_LAST,
ALU_ADD(_R0,_x, ALU_SRC_PV,_w, ALU_SRC_1 _NEG,_x), //@80
ALU_MUL_IEEE(__,_z, _R127,_x, ALU_SRC_PS,_x),
ALU_MUL_IEEE(_R0,_w, _R127,_z, ALU_SRC_PS,_x),
ALU_RECIP_IEEE(__,__, KC1(0),_x) SCL_210
ALU_LAST,
ALU_MUL_IEEE(_R3,_x, _R2,_x, ALU_SRC_PS,_x),
ALU_ADD(_R0,_y, ALU_SRC_PV _NEG,_z, ALU_SRC_1,_x), //@90
ALU_MUL_IEEE(_R3,_z, _R2,_z, ALU_SRC_PS,_x),
ALU_RECIP_IEEE(__,__, KC1(0),_y) SCL_210
ALU_LAST,
ALU_MUL_IEEE(_R3,_y, _R2,_y, ALU_SRC_PS,_x),
ALU_MUL_IEEE(_R3,_w, _R2,_w, ALU_SRC_PS,_x)
ALU_LAST,
ALU_MOV(_R1,_x, _R1,_x), //@100
ALU_MOV(_R1,_y, _R1,_y),
ALU_MOV(_R1,_z, _R1,_z),
ALU_MOV(_R1,_w, _R1,_w)
ALU_LAST,
ALU_MOV(_R2,_x, _R3,_x),
ALU_MOV(_R2,_y, _R3,_y),
ALU_MOV(_R2,_z, _R3,_z),
ALU_MOV(_R2,_w, _R3,_w)
ALU_LAST,
}
};
__attribute__((aligned(GX2_SHADER_ALIGNMENT)))
static struct
{
u64 cf[32]; // @0
u64 alu[16]; // @32
u64 tex[1 * 2]; // @48
} ps_program =
{
{
TEX(48, 1) VALID_PIX,
ALU(32, 4),
EXP_DONE(PIX0, _R0, _x, _y, _z, _w)
END_OF_PROGRAM
},
{
ALU_MUL(_R0,_x, _R0,_x, _R1,_x),
ALU_MUL(_R0,_y, _R0,_y, _R1,_y),
ALU_MUL(_R0,_z, _R0,_z, _R1,_z),
ALU_MUL(_R0,_w, _R0,_w, _R1,_w)
ALU_LAST,
},
{
TEX_SAMPLE(_R1,_x,_y,_z,_w, _R1,_x,_y,_0,_x, _t0, _s0)
}
};
__attribute__((aligned(GX2_SHADER_ALIGNMENT)))
static struct
{
u64 cf[32]; // @0
u64 alu[80-32]; // @32
u64 tex[3 * 2]; // @80
} gs_program =
{
{
TEX(80, 3),
MEM_RING(WRITE( 0), _R7, _xyzw, ARRAY_SIZE(0), ELEM_SIZE(3)),
ALU(32, 33),
MEM_RING(WRITE( 16), _R2, _xy__, ARRAY_SIZE(0), ELEM_SIZE(3)),
MEM_RING(WRITE( 32), _R3, _xyzw, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
EMIT_VERTEX,
MEM_RING(WRITE( 48), _R7, _xyzw, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
MEM_RING(WRITE( 64), _R4, _xy__, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
MEM_RING(WRITE( 80), _R0, _xyzw, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
EMIT_VERTEX,
MEM_RING(WRITE( 96), _R7, _xyzw, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
MEM_RING(WRITE(112), _R5, _xy__, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
MEM_RING(WRITE(128), _R6, _xyzw, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
EMIT_VERTEX,
MEM_RING(WRITE(144), _R7, _xyzw, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
MEM_RING(WRITE(160), _R8, _xy__, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
MEM_RING(WRITE(176), _R9, _xyzw, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
EMIT_VERTEX
END_OF_PROGRAM
},
{
ALU_MOV(_R127,_x, _R1,_z),
ALU_MOV(__,_y, ALU_SRC_0,_x),
ALU_MOV(_R3,_z, ALU_SRC_0,_x),
ALU_MOV(_R127,_w, ALU_SRC_0,_x),
ALU_MOV(_R3,_w, ALU_SRC_LITERAL,_x)
ALU_LAST,
ALU_LITERAL(0x3F800000),
ALU_ADD(_R3,_x, _R1,_x, ALU_SRC_PV,_x),
ALU_ADD(_R3,_y, _R1,_y, ALU_SRC_PV,_y),
ALU_MOV(__,_z, _R0,_z),
ALU_MOV(__,_w, ALU_SRC_0,_x),
ALU_ADD(_R4,_x, _R0,_x, ALU_SRC_0,_x)
ALU_LAST,
ALU_ADD(_R2,_x, _R0,_x, ALU_SRC_PV,_z),
ALU_ADD(_R2,_y, _R0,_y, ALU_SRC_PV,_w),
ALU_MOV(_R127,_z, _R1 _NEG,_w),
ALU_MOV(_R126,_w, _R1 _NEG,_w),
ALU_ADD(_R4,_y, _R0,_y, ALU_SRC_0,_x)
ALU_LAST,
ALU_ADD(_R5,_x, _R0,_x, _R0,_z),
ALU_ADD(_R5,_y, _R0,_y, _R0,_w),
ALU_MOV(__,_z, _R0,_w),
ALU_ADD(_R8,_x, _R127,_w, _R0,_x)
ALU_LAST,
ALU_ADD(_R0,_x, _R1,_x, ALU_SRC_0,_x),
ALU_ADD(_R8,_y, ALU_SRC_PV,_z, _R0,_y),
ALU_MOV(_R0,_z, _R3,_z),
ALU_MOV(_R0,_w, _R3,_w),
ALU_ADD(_R0,_y, _R1,_y, ALU_SRC_0,_x)
ALU_LAST,
ALU_ADD(_R6,_x, _R1,_x, _R127,_x) VEC_021,
ALU_ADD(_R6,_y, _R1,_y, _R127,_z),
ALU_MOV(_R6,_z, _R3,_z),
ALU_MOV(_R6,_w, _R3,_w),
ALU_ADD(_R9,_x, _R127,_w, _R1,_x)
ALU_LAST,
ALU_ADD(_R9,_y, _R126,_w, _R1,_y),
ALU_MOV(_R9,_z, _R3,_z),
ALU_MOV(_R9,_w, _R3,_w) VEC_120
ALU_LAST,
},
{
VTX_FETCH(_R7,_x,_y,_z,_w, _R0,_x, _b(159), FETCH_TYPE(NO_INDEX_OFFSET), MEGA(16), OFFSET(0)), // @160
VTX_FETCH(_R1,_x,_y,_z,_w, _R0,_x, _b(159), FETCH_TYPE(NO_INDEX_OFFSET), MEGA(16), OFFSET(32)),
VTX_FETCH(_R0,_x,_y,_z,_w, _R0,_x, _b(159), FETCH_TYPE(NO_INDEX_OFFSET), MEGA(16), OFFSET(16)),
}
};
__attribute__((aligned(GX2_SHADER_ALIGNMENT)))
static struct
{
u64 cf[16]; // @0
u64 vtx[3 * 2]; // @16
} gs_copy_program=
{
{
VTX(16, 3),
EXP_DONE(POS0, _R1,_x,_y,_z,_w),
EXP_DONE(PARAM0, _R2,_x,_y,_z,_w) BURSTCNT(1)
END_OF_PROGRAM
},
{
VTX_FETCH(_R1,_x,_y,_z,_w, _R0,_x, _b(159), FETCH_TYPE(NO_INDEX_OFFSET), MEGA(16), OFFSET(32)),
VTX_FETCH(_R2,_x,_y,_z,_w, _R0,_x, _b(159), FETCH_TYPE(NO_INDEX_OFFSET), MEGA(32), OFFSET(0)),
VTX_FETCH(_R3,_x,_y,_z,_w, _R0,_x, _b(159), FETCH_TYPE(NO_INDEX_OFFSET), MINI(16), OFFSET(16)),
}
};
static GX2AttribVar attributes[] =
{
{"position", GX2_SHADER_VAR_TYPE_FLOAT4, 0, 0},
{"coords", GX2_SHADER_VAR_TYPE_FLOAT4, 0, 1},
{"color", GX2_SHADER_VAR_TYPE_FLOAT4, 0, 2},
};
static GX2AttribStream attribute_stream[] =
{
{0, 0, offsetof(sprite_vertex_t, pos), GX2_ATTRIB_FORMAT_FLOAT_32_32_32_32,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _z, _w), GX2_ENDIAN_SWAP_DEFAULT},
{1, 0, offsetof(sprite_vertex_t, coord), GX2_ATTRIB_FORMAT_FLOAT_32_32_32_32,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _z, _w), GX2_ENDIAN_SWAP_DEFAULT},
{2, 0, offsetof(sprite_vertex_t, color), GX2_ATTRIB_FORMAT_UNORM_8_8_8_8,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _z, _w), GX2_ENDIAN_SWAP_DEFAULT},
};
static GX2SamplerVar samplers[] =
{
{ "s", GX2_SAMPLER_VAR_TYPE_SAMPLER_2D, 0 },
};
GX2UniformBlock uniform_blocks[] =
{
{"UBO_vp", 1, sizeof(GX2_vec2)},
{"UBO_tex", 2, sizeof(GX2_vec2)},
};
GX2UniformVar uniform_vars[] =
{
{"vp_size", GX2_SHADER_VAR_TYPE_FLOAT2, 1, 0, 0},
{"tex_size", GX2_SHADER_VAR_TYPE_FLOAT2, 1, 0, 1},
};
GX2Shader sprite_shader =
{
{
{
.sq_pgm_resources_vs.num_gprs = 4,
.sq_pgm_resources_vs.stack_size = 1,
.vgt_primitiveid_en.enable = TRUE,
.spi_vs_out_config.vs_export_count = 0,
.num_spi_vs_out_id = 0,
{
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
},
.sq_vtx_semantic_clear = ~0x7,
.num_sq_vtx_semantic = 3,
{
2, 1, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
},
.vgt_vertex_reuse_block_cntl.vtx_reuse_depth = 0x0,
.vgt_hos_reuse_depth.reuse_depth = 0x0,
}, /* regs */
.size = sizeof(vs_program),
.program = (uint8_t*)&vs_program,
.mode = GX2_SHADER_MODE_GEOMETRY_SHADER,
.uniformBlockCount = countof(uniform_blocks), uniform_blocks,
.uniformVarCount = countof(uniform_vars), uniform_vars,
.attribVarCount = countof(attributes), attributes,
.ringItemSize = 12,
},
{
{
.sq_pgm_resources_ps.num_gprs = 2,
.sq_pgm_exports_ps.export_mode = 0x2,
.spi_ps_in_control_0.num_interp = 2,
.spi_ps_in_control_0.persp_gradient_ena = 1,
.spi_ps_in_control_0.baryc_sample_cntl = spi_baryc_cntl_centers_only,
.num_spi_ps_input_cntl = 2, {{.semantic = 0, .default_val = 1},{.semantic = 1, .default_val = 1}},
.cb_shader_mask.output0_enable = 0xF,
.cb_shader_control.rt0_enable = TRUE,
.db_shader_control.z_order = db_z_order_early_z_then_late_z,
}, /* regs */
.size = sizeof(ps_program),
.program = (uint8_t*)&ps_program,
.mode = GX2_SHADER_MODE_GEOMETRY_SHADER,
.samplerVarCount = countof(samplers), samplers,
},
{
{
.sq_pgm_resources_gs.num_gprs = 10,
.vgt_gs_out_prim_type = VGT_GS_OUT_PRIMITIVE_TYPE_TRISTRIP,
.vgt_gs_mode.mode = VGT_GS_ENABLE_MODE_SCENARIO_G,
.vgt_gs_mode.cut_mode = VGT_GS_CUT_MODE_128,
.num_spi_vs_out_id = 1,
{
{.semantic_0 = 0, .semantic_1 = 1, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
},
.sq_pgm_resources_vs.num_gprs = 4,
.sq_gs_vert_itemsize = 12,
.spi_vs_out_config.vs_export_count = 1,
}, /* regs */
.size = sizeof(gs_program),
.program = (uint8_t*)&gs_program,
.copyProgramSize = sizeof(gs_copy_program),
.copyProgram = (uint8_t*)&gs_copy_program,
.mode = GX2_SHADER_MODE_GEOMETRY_SHADER,
.ringItemSize = 48,
},
.attribute_stream = attribute_stream,
};

52
wiiu/sprite_shader.h Normal file
View File

@ -0,0 +1,52 @@
/* RetroArch - A frontend for libretro.
* Copyright (C) 2014-2016 - Ali Bouhlel
*
* RetroArch is free software: you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with RetroArch.
* If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef SPRITE_SHADER_H
#define SPRITE_SHADER_H
#include <wiiu/shader_utils.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct
{
struct
{
float x;
float y;
float width;
float height;
}pos;
struct
{
float u;
float v;
float width;
float height;
}coord;
u32 color;
}sprite_vertex_t;
extern GX2Shader sprite_shader;
#ifdef __cplusplus
}
#endif
#endif // SPRITE_SHADER_H

View File

@ -170,6 +170,15 @@ IMPORT(GX2InitFetchShaderEx);
IMPORT(GX2SetFetchShader); IMPORT(GX2SetFetchShader);
IMPORT(GX2SetVertexShader); IMPORT(GX2SetVertexShader);
IMPORT(GX2SetPixelShader); IMPORT(GX2SetPixelShader);
IMPORT(GX2SetGeometryShader);
IMPORT(GX2SetGeometryUniformBlock);
IMPORT(GX2SetVertexUniformBlock);
IMPORT(GX2SetPixelUniformBlock);
IMPORT(GX2CalcGeometryShaderInputRingBufferSize);
IMPORT(GX2CalcGeometryShaderOutputRingBufferSize);
IMPORT(GX2SetGeometryShaderInputRingBuffer);
IMPORT(GX2SetGeometryShaderOutputRingBuffer);
IMPORT(GX2SetShaderModeEx);
IMPORT(GX2SetAttribBuffer); IMPORT(GX2SetAttribBuffer);
IMPORT(GX2InitTextureRegs); IMPORT(GX2InitTextureRegs);
IMPORT(GX2InitSampler); IMPORT(GX2InitSampler);

View File

@ -16,125 +16,67 @@
#include <stddef.h> #include <stddef.h>
#include <malloc.h> #include <malloc.h>
#include <string.h> #include <string.h>
#include <wiiu/gx2/common.h>
#include "tex_shader.h" #include "tex_shader.h"
#include "gx2_shader_inl.h" #include "gx2_shader_inl.h"
/*******************************************************
* Vertex Shader GLSL source:
*******************************************************
attribute vec2 position;
attribute vec2 tex_coord_in;
attribute vec4 color_in;
varying vec2 tex_coord;
varying vec4 color;
void main()
{
gl_Position = vec4(position, 0.0, 1.0);
tex_coord = tex_coord_in;
color = color_in;
}
******************************************************
* assembly:
******************************************************
00 CALL_FS NO_BARRIER
01 ALU: ADDR(32) CNT(5)
0 x: MOV R3.x, R3.x
y: MOV R3.y, R3.y
z: MOV R2.z, 0.0f
w: MOV R2.w, (0x3F800000, 1.0f).x
02 EXP_DONE: POS0, R2
03 EXP: PARAM0, R1 NO_BARRIER
04 EXP_DONE: PARAM1, R3.xyzz NO_BARRIER
END_OF_PROGRAM
******************************************************
*/
__attribute__((aligned(GX2_SHADER_ALIGNMENT))) __attribute__((aligned(GX2_SHADER_ALIGNMENT)))
static struct static struct
{ {
u32 cf[32 * 2]; /* first ADDR() * 2 */ u64 cf[16];
u32 alu[5 * 2]; /* alu CNT() * 2 */
} vs_program = } vs_program =
{ {
{ {
CALL_FS NO_BARRIER, CALL_FS NO_BARRIER,
ALU(32, 5), EXP_DONE(POS0, _R1, _x, _y, _0, _1),
EXP_DONE(POS0, _R2, _X, _Y, _Z, _W), EXP_DONE(PARAM0, _R2, _x, _y, _0, _0) NO_BARRIER
EXP(PARAM0, _R1, _X, _Y, _Z, _W) NO_BARRIER,
EXP_DONE(PARAM1, _R3, _X, _Y, _Z, _Z) NO_BARRIER
END_OF_PROGRAM END_OF_PROGRAM
},
{
ALU_MOV(_R3,_X, _R3,_X),
ALU_MOV(_R3,_Y, _R3,_Y),
ALU_MOV(_R2,_Z, ALU_SRC_0,_X),
ALU_LAST
ALU_MOV(_R2,_W, ALU_SRC_LITERAL,_X), ALU_LITERAL(0x3F800000)
} }
}; };
/*******************************************************
* Pixel Shader GLSL source:
*******************************************************
varying vec2 tex_coord;
varying vec4 color;
uniform sampler2D s;
void main()
{
gl_FragColor = texture2D(s, tex_coord) * color;
}
******************************************************
* assembly:
******************************************************
00 TEX: ADDR(48) CNT(1) VALID_PIX
0 SAMPLE R1, R1.xy0x, t0, s0
01 ALU: ADDR(32) CNT(4)
1 x: MUL R0.x, R0.x, R1.x
y: MUL R0.y, R0.y, R1.y
z: MUL R0.z, R0.z, R1.z
w: MUL R0.w, R0.w, R1.w
02 EXP_DONE: PIX0, R0
END_OF_PROGRAM
*******************************************************
*/
__attribute__((aligned(GX2_SHADER_ALIGNMENT))) __attribute__((aligned(GX2_SHADER_ALIGNMENT)))
static struct static struct
{ {
u32 cf[32 * 2]; /* first ADDR() * 2 */ u64 cf[16];
u32 alu[(48-32) * 2]; /* (tex ADDR() - alu ADDR()) * 2 */ u64 tex[1 * 2];
u32 tex[1 * 3]; /* tex CNT() * 3 */ }
} ps_program = ps_program =
{ {
{ {
TEX(48, 1) VALID_PIX, TEX(16, 1) VALID_PIX,
ALU(32, 4), EXP_DONE(PIX0, _R0, _x, _y, _z, _w)
EXP_DONE(PIX0, _R0, _X, _Y, _Z, _W)
END_OF_PROGRAM END_OF_PROGRAM
}, },
{ {
ALU_MUL(_R0,_X, _R0,_X, _R1,_X), TEX_SAMPLE(_R0,_x,_y,_z,_w, _R0,_x,_y,_0,_0, _t0, _s0)
ALU_MUL(_R0,_Y, _R0,_Y, _R1,_Y),
ALU_MUL(_R0,_Z, _R0,_Z, _R1,_Z),
ALU_LAST
ALU_MUL(_R0,_W, _R0,_W, _R1,_W),
},
{
TEX_SAMPLE(_R1,_X,_Y,_Z,_W, _R1,_X,_Y,_0,_X, _t0, _s0)
} }
}; };
tex_shader_t tex_shader = static GX2AttribVar attributes[] =
{
{ "position", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 0},
{ "tex_coord", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 1},
};
static GX2AttribStream attribute_stream[] =
{
{0, 0, offsetof(tex_shader_vertex_t, pos), GX2_ATTRIB_FORMAT_FLOAT_32_32,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT},
{1, 0, offsetof(tex_shader_vertex_t, coord), GX2_ATTRIB_FORMAT_FLOAT_32_32,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT},
};
static GX2SamplerVar samplers[] =
{
{ "s", GX2_SAMPLER_VAR_TYPE_SAMPLER_2D, 0 },
};
GX2Shader tex_shader =
{ {
{ {
{ {
.sq_pgm_resources_vs.num_gprs = 4, .sq_pgm_resources_vs.num_gprs = 3,
.sq_pgm_resources_vs.stack_size = 1, .sq_pgm_resources_vs.stack_size = 1,
.spi_vs_out_config.vs_export_count = 1, .spi_vs_out_config.vs_export_count = 1,
.num_spi_vs_out_id = 1, .num_spi_vs_out_id = 1,
@ -150,10 +92,10 @@ tex_shader_t tex_shader =
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
}, },
.sq_vtx_semantic_clear = ~0x7, .sq_vtx_semantic_clear = ~0x3,
.num_sq_vtx_semantic = 3, .num_sq_vtx_semantic = 2,
{ {
0, 1, 2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
}, },
.vgt_vertex_reuse_block_cntl.vtx_reuse_depth = 0xE, .vgt_vertex_reuse_block_cntl.vtx_reuse_depth = 0xE,
@ -162,11 +104,11 @@ tex_shader_t tex_shader =
.size = sizeof(vs_program), .size = sizeof(vs_program),
.program = (uint8_t*)&vs_program, .program = (uint8_t*)&vs_program,
.mode = GX2_SHADER_MODE_UNIFORM_REGISTER, .mode = GX2_SHADER_MODE_UNIFORM_REGISTER,
.attribVarCount = sizeof(tex_shader.attributes) / sizeof(GX2AttribVar), (GX2AttribVar*) &tex_shader.attributes, .attribVarCount = countof(attributes), attributes,
}, },
{ {
{ {
.sq_pgm_resources_ps.num_gprs = 2, .sq_pgm_resources_ps.num_gprs = 1,
.sq_pgm_exports_ps.export_mode = 0x2, .sq_pgm_exports_ps.export_mode = 0x2,
.spi_ps_in_control_0.num_interp = 2, .spi_ps_in_control_0.num_interp = 2,
.spi_ps_in_control_0.persp_gradient_ena = 1, .spi_ps_in_control_0.persp_gradient_ena = 1,
@ -179,28 +121,7 @@ tex_shader_t tex_shader =
.size = sizeof(ps_program), .size = sizeof(ps_program),
.program = (uint8_t*)&ps_program, .program = (uint8_t*)&ps_program,
.mode = GX2_SHADER_MODE_UNIFORM_REGISTER, .mode = GX2_SHADER_MODE_UNIFORM_REGISTER,
.samplerVarCount = 1, .samplerVarCount = countof(samplers), samplers,
.samplerVars = (GX2SamplerVar*) &tex_shader.sampler, },
}, .attribute_stream = attribute_stream,
.sampler = { "s", GX2_SAMPLER_VAR_TYPE_SAMPLER_2D, 0 },
.attributes = {
.color = { "color_in", GX2_SHADER_VAR_TYPE_FLOAT4, 0, 0},
.position = { "position", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 1},
.tex_coord = { "tex_coord_in", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 2},
},
.attribute_stream = {
.color = {
0, 0, offsetof(tex_shader_vertex_t, color), GX2_ATTRIB_FORMAT_UNORM_8_8_8_8,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _Z, _W), GX2_ENDIAN_SWAP_DEFAULT
},
.position = {
1, 0, offsetof(tex_shader_vertex_t, pos), GX2_ATTRIB_FORMAT_FLOAT_32_32,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT
},
.tex_coord = {
2, 0, offsetof(tex_shader_vertex_t, coord), GX2_ATTRIB_FORMAT_FLOAT_32_32,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT
}
},
{},
}; };

View File

@ -15,32 +15,13 @@
#ifndef TEX_SHADER_H #ifndef TEX_SHADER_H
#define TEX_SHADER_H #define TEX_SHADER_H
#include <wiiu/gx2.h>
#include <wiiu/shader_utils.h>
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
typedef struct __attribute__((aligned(GX2_VERTEX_BUFFER_ALIGNMENT)))
{
GX2VertexShader vs;
GX2PixelShader ps;
GX2SamplerVar sampler;
struct
{
GX2AttribVar color;
GX2AttribVar position;
GX2AttribVar tex_coord;
} attributes;
struct
{
GX2AttribStream color;
GX2AttribStream position;
GX2AttribStream tex_coord;
} attribute_stream;
GX2FetchShader fs;
}tex_shader_t;
typedef struct typedef struct
{ {
struct struct
@ -54,11 +35,9 @@ typedef struct
float u; float u;
float v; float v;
}coord; }coord;
u32 color;
}tex_shader_vertex_t; }tex_shader_vertex_t;
extern tex_shader_t tex_shader; extern GX2Shader tex_shader;
#ifdef __cplusplus #ifdef __cplusplus
} }