Merge pull request #6042 from aliaspider/master

(WIIU) add a sprite shader.
This commit is contained in:
Twinaphex 2018-01-04 18:00:18 +01:00 committed by GitHub
commit c04b555f04
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 1369 additions and 632 deletions

View File

@ -66,7 +66,9 @@ else
DEFINES += -DHAVE_FILTERS_BUILTIN
OBJ += wiiu/system/missing_libc_functions.o
OBJ += wiiu/shader_utils.o
OBJ += wiiu/tex_shader.o
OBJ += wiiu/sprite_shader.o
ifeq ($(GRIFFIN_BUILD), 1)
OBJ += griffin/griffin.o

View File

@ -124,14 +124,14 @@ struct command
#endif
};
#ifdef HAVE_CHEEVOS
#if defined(HAVE_COMMAND) && defined(HAVE_CHEEVOS)
static bool command_read_ram(const char *arg);
static bool command_write_ram(const char *arg);
#endif
static const struct cmd_action_map action_map[] = {
{ "SET_SHADER", command_set_shader, "<shader path>" },
#ifdef HAVE_CHEEVOS
#if defined(HAVE_COMMAND) && defined(HAVE_CHEEVOS)
{ "READ_CORE_RAM", command_read_ram, "<address> <number of bytes>" },
{ "WRITE_CORE_RAM", command_write_ram, "<address> <byte1> <byte2> ..." },
#endif

View File

@ -351,16 +351,17 @@ static ssize_t wiiu_log_write(struct _reent *r, void *fd, const char *ptr, size_
wiiu_log_lock = 1;
int ret;
int remaining = len;
while (len > 0)
while (remaining > 0)
{
int block = len < 1400 ? len : 1400; // take max 1400 bytes per UDP packet
int block = remaining < 1400 ? remaining : 1400; // take max 1400 bytes per UDP packet
ret = send(wiiu_log_socket, ptr, block, 0);
if (ret < 0)
break;
len -= ret;
remaining -= ret;
ptr += ret;
}

View File

@ -1,6 +1,7 @@
#include <wiiu/gx2.h>
#include "wiiu/tex_shader.h"
#include "wiiu/sprite_shader.h"
#undef _X
#undef _B
@ -21,8 +22,6 @@
#define COLOR_ARGB(r, g, b, a) (((u32)(a) << 24) | ((u32)(r) << 16) | ((u32)(g) << 8) | ((u32)(b) << 0))
#define COLOR_RGBA(r, g, b, a) (((u32)(r) << 24) | ((u32)(g) << 16) | ((u32)(b) << 8) | ((u32)(a) << 0))
//#define GX2_CAN_ACCESS_DATA_SECTION
typedef struct
{
int width;
@ -33,20 +32,19 @@ typedef struct
struct gx2_overlay_data
{
GX2Texture tex;
tex_shader_vertex_t v[4];
sprite_vertex_t v;
float alpha_mod;
};
typedef struct
{
tex_shader_t* shader;
struct
{
GX2Texture texture;
int width;
int height;
bool enable;
tex_shader_vertex_t* v;
sprite_vertex_t* v;
} menu;
#ifdef HAVE_OVERLAY
@ -60,12 +58,19 @@ typedef struct
GX2Sampler sampler_linear;
GX2Texture texture;
tex_shader_vertex_t* v;
GX2_vec2* ubo_vp;
GX2_vec2* ubo_tex;
void* input_ring_buffer;
u32 input_ring_buffer_size;
void* output_ring_buffer;
u32 output_ring_buffer_size;
int width;
int height;
struct
{
tex_shader_vertex_t* v;
sprite_vertex_t* v;
int size;
int current;
} vertex_cache;

View File

@ -145,7 +145,6 @@ static void wiiu_gfx_update_viewport(wiiu_video_t* wiiu)
wiiu->vp.height = height;
}
float scale_w = wiiu->color_buffer.surface.width / wiiu->render_mode.width;
float scale_h = wiiu->color_buffer.surface.height / wiiu->render_mode.height;
wiiu_set_position(wiiu->v, &wiiu->color_buffer,
@ -154,6 +153,8 @@ static void wiiu_gfx_update_viewport(wiiu_video_t* wiiu)
(wiiu->vp.x + wiiu->vp.width) * scale_w,
(wiiu->vp.y + wiiu->vp.height) * scale_h);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->v, 4 * sizeof(*wiiu->v));
wiiu->should_resize = false;
}
@ -275,70 +276,25 @@ static void* wiiu_gfx_init(const video_info_t* video,
GX2SetBlendControl(GX2_RENDER_TARGET_0, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA, GX2_BLEND_COMBINE_MODE_ADD,
GX2_ENABLE, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA, GX2_BLEND_COMBINE_MODE_ADD);
GX2SetCullOnlyControl(GX2_FRONT_FACE_CCW, GX2_DISABLE, GX2_DISABLE);
#ifdef GX2_CAN_ACCESS_DATA_SECTION
wiiu->shader = &tex_shader;
#else
/* Initialize shader */
wiiu->shader = MEM2_alloc(sizeof(tex_shader), 0x1000);
memcpy(wiiu->shader, &tex_shader, sizeof(tex_shader));
GX2Invalidate(GX2_INVALIDATE_MODE_CPU, wiiu->shader, sizeof(tex_shader));
GX2InitShader(&tex_shader);
GX2InitShader(&sprite_shader);
GX2SetShader(&tex_shader);
wiiu->shader->vs.program = MEM2_alloc(wiiu->shader->vs.size, GX2_SHADER_ALIGNMENT);
memcpy(wiiu->shader->vs.program, tex_shader.vs.program, wiiu->shader->vs.size);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, wiiu->shader->vs.program, wiiu->shader->vs.size);
wiiu->shader->vs.attribVars = MEM2_alloc(wiiu->shader->vs.attribVarCount * sizeof(GX2AttribVar),
GX2_SHADER_ALIGNMENT);
memcpy(wiiu->shader->vs.attribVars, tex_shader.vs.attribVars ,
wiiu->shader->vs.attribVarCount * sizeof(GX2AttribVar));
wiiu->ubo_vp = MEM1_alloc(sizeof(*wiiu->ubo_vp), GX2_UNIFORM_BLOCK_ALIGNMENT);
wiiu->ubo_vp->width = wiiu->color_buffer.surface.width;
wiiu->ubo_vp->height = wiiu->color_buffer.surface.height;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, wiiu->ubo_vp, sizeof(*wiiu->ubo_vp));
wiiu->shader->ps.program = MEM2_alloc(wiiu->shader->ps.size, GX2_SHADER_ALIGNMENT);
memcpy(wiiu->shader->ps.program, tex_shader.ps.program, wiiu->shader->ps.size);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, wiiu->shader->ps.program, wiiu->shader->ps.size);
wiiu->shader->ps.samplerVars = MEM2_alloc(wiiu->shader->ps.samplerVarCount * sizeof(GX2SamplerVar),
GX2_SHADER_ALIGNMENT);
memcpy(wiiu->shader->ps.samplerVars, tex_shader.ps.samplerVars,
wiiu->shader->ps.samplerVarCount * sizeof(GX2SamplerVar));
wiiu->ubo_tex = MEM1_alloc(sizeof(*wiiu->ubo_tex), GX2_UNIFORM_BLOCK_ALIGNMENT);
wiiu->ubo_tex->width = 1.0;
wiiu->ubo_tex->height = 1.0;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, wiiu->ubo_tex, sizeof(*wiiu->ubo_tex));
#endif
wiiu->shader->fs.size = GX2CalcFetchShaderSizeEx(sizeof(wiiu->shader->attribute_stream) / sizeof(GX2AttribStream),
GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE);
wiiu->shader->fs.program = MEM2_alloc(wiiu->shader->fs.size, GX2_SHADER_ALIGNMENT);
GX2InitFetchShaderEx(&wiiu->shader->fs, (uint8_t*)wiiu->shader->fs.program,
sizeof(wiiu->shader->attribute_stream) / sizeof(GX2AttribStream),
(GX2AttribStream*)&wiiu->shader->attribute_stream,
GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, wiiu->shader->fs.program, wiiu->shader->fs.size);
GX2SetVertexShader(&wiiu->shader->vs);
GX2SetPixelShader(&wiiu->shader->ps);
GX2SetFetchShader(&wiiu->shader->fs);
wiiu->v = MEM2_alloc(4 * sizeof(*wiiu->v), GX2_VERTEX_BUFFER_ALIGNMENT);
wiiu_set_position(wiiu->v, &wiiu->color_buffer, 0, 0,
wiiu->color_buffer.surface.width, wiiu->color_buffer.surface.height);
wiiu_set_tex_coords(wiiu->v, &wiiu->texture, 0, 0,
wiiu->texture.surface.width, wiiu->texture.surface.height, wiiu->rotation);
wiiu->v[0].color = 0xFFFFFFFF;
wiiu->v[1].color = 0xFFFFFFFF;
wiiu->v[2].color = 0xFFFFFFFF;
wiiu->v[3].color = 0xFFFFFFFF;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->v, 4 * sizeof(*wiiu->v));
GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->v), sizeof(*wiiu->v), wiiu->v);
wiiu->menu.v = MEM2_alloc(4 * sizeof(*wiiu->menu.v), GX2_VERTEX_BUFFER_ALIGNMENT);
wiiu_set_position(wiiu->menu.v, &wiiu->color_buffer, 0, 0,
wiiu->color_buffer.surface.width, wiiu->color_buffer.surface.height);
wiiu_set_tex_coords(wiiu->menu.v, &wiiu->menu.texture, 0, 0,
wiiu->menu.texture.surface.width, wiiu->menu.texture.surface.height, 0);
wiiu->menu.v[0].color = 0xFFFFFF80;
wiiu->menu.v[1].color = 0xFFFFFF80;
wiiu->menu.v[2].color = 0xFFFFFF80;
wiiu->menu.v[3].color = 0xFFFFFF80;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->menu.v, 4 * sizeof(*wiiu->menu.v));
wiiu->input_ring_buffer_size = GX2CalcGeometryShaderInputRingBufferSize(sprite_shader.vs.ringItemSize);
wiiu->output_ring_buffer_size = GX2CalcGeometryShaderOutputRingBufferSize(sprite_shader.gs.ringItemSize);
wiiu->input_ring_buffer = MEM1_alloc(wiiu->input_ring_buffer_size, 0x1000);
wiiu->output_ring_buffer = MEM1_alloc(wiiu->output_ring_buffer_size, 0x1000);
/* Initialize frame texture */
memset(&wiiu->texture, 0, sizeof(GX2Texture));
@ -389,6 +345,28 @@ static void* wiiu_gfx_init(const video_info_t* video,
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->menu.texture.surface.image,
wiiu->menu.texture.surface.imageSize);
wiiu->v = MEM2_alloc(4 * sizeof(*wiiu->v), GX2_VERTEX_BUFFER_ALIGNMENT);
wiiu_set_position(wiiu->v, &wiiu->color_buffer, 0, 0,
wiiu->color_buffer.surface.width, wiiu->color_buffer.surface.height);
wiiu_set_tex_coords(wiiu->v, &wiiu->texture, 0, 0,
wiiu->texture.surface.width, wiiu->texture.surface.height, wiiu->rotation);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->v, 4 * sizeof(*wiiu->v));
GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->v), sizeof(*wiiu->v), wiiu->v);
wiiu->menu.v = MEM2_alloc(4 * sizeof(*wiiu->menu.v), GX2_VERTEX_BUFFER_ALIGNMENT);
wiiu->menu.v->pos.x = 0.0f;
wiiu->menu.v->pos.y = 0.0f;
wiiu->menu.v->pos.width = wiiu->color_buffer.surface.width;
wiiu->menu.v->pos.height = wiiu->color_buffer.surface.height;
wiiu->menu.v->coord.u = 0.0f;
wiiu->menu.v->coord.v = 0.0f;
wiiu->menu.v->coord.width = 1.0f;
wiiu->menu.v->coord.height = 1.0f;
wiiu->menu.v->color = 0xFFFFFF80;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->menu.v, 4 * sizeof(*wiiu->menu.v));
wiiu->vertex_cache.size = 0x1000;
wiiu->vertex_cache.current = 0;
wiiu->vertex_cache.v = MEM2_alloc(wiiu->vertex_cache.size
@ -399,8 +377,8 @@ static void* wiiu_gfx_init(const video_info_t* video,
GX2InitSampler(&wiiu->sampler_linear, GX2_TEX_CLAMP_MODE_CLAMP, GX2_TEX_XY_FILTER_MODE_LINEAR);
/* set Texture and Sampler */
GX2SetPixelTexture(&wiiu->texture, wiiu->shader->sampler.location);
GX2SetPixelSampler(&wiiu->sampler_linear, wiiu->shader->sampler.location);
GX2SetPixelTexture(&wiiu->texture, tex_shader.ps.samplerVars[0].location);
GX2SetPixelSampler(&wiiu->sampler_linear, tex_shader.ps.samplerVars[0].location);
/* clear leftover image */
GX2ClearColor(&wiiu->color_buffer, 0.0f, 0.0f, 0.0f, 1.0f);
@ -450,15 +428,11 @@ static void gx2_overlay_tex_geom(void *data, unsigned image,
if (!o)
return;
o->v[0].coord.u = x;
o->v[0].coord.v = y;
o->v[1].coord.u = x + w;
o->v[1].coord.v = y;
o->v[2].coord.u = x + w;
o->v[2].coord.v = y + h;
o->v[3].coord.u = x ;
o->v[3].coord.v = y + h;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, o->v, sizeof(o->v));
o->v.coord.u = x;
o->v.coord.v = y;
o->v.coord.width = w;
o->v.coord.height = h;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, &o->v, sizeof(o->v));
}
static void gx2_overlay_vertex_geom(void *data, unsigned image,
@ -467,15 +441,6 @@ static void gx2_overlay_vertex_geom(void *data, unsigned image,
wiiu_video_t *gx2 = (wiiu_video_t*)data;
struct gx2_overlay_data *o = NULL;
/* Flipped, so we preserve top-down semantics. */
y = 1.0f - y;
h = -h;
/* expand from 0 - 1 to -1 - 1 */
x = (x * 2.0f) - 1.0f;
y = (y * 2.0f) - 1.0f;
w = (w * 2.0f);
h = (h * 2.0f);
if (gx2)
o = (struct gx2_overlay_data*)&gx2->overlay[image];
@ -483,19 +448,12 @@ static void gx2_overlay_vertex_geom(void *data, unsigned image,
if (!o)
return;
o->v[0].pos.x = x;
o->v[0].pos.y = y;
o->v.pos.x = x * gx2->color_buffer.surface.width;
o->v.pos.y = y * gx2->color_buffer.surface.height;
o->v.pos.width = w * gx2->color_buffer.surface.width;
o->v.pos.height = h * gx2->color_buffer.surface.height;
o->v[1].pos.x = x + w;
o->v[1].pos.y = y;
o->v[2].pos.x = x + w;
o->v[2].pos.y = y + h;
o->v[3].pos.x = x ;
o->v[3].pos.y = y + h;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, o->v,sizeof(o->v));
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, &o->v,sizeof(o->v));
}
static void gx2_free_overlay(wiiu_video_t *gx2)
@ -555,13 +513,9 @@ static bool gx2_overlay_load(void *data,
gx2_overlay_tex_geom(gx2, i, 0, 0, 1, 1);
gx2_overlay_vertex_geom(gx2, i, 0, 0, 1, 1);
gx2->overlay[i].alpha_mod = 1.0f;
gx2->overlay[i].v[0].color = 0xFFFFFFFF;
gx2->overlay[i].v[1].color = 0xFFFFFFFF;
gx2->overlay[i].v[2].color = 0xFFFFFFFF;
gx2->overlay[i].v[3].color = 0xFFFFFFFF;
gx2->overlay[i].v.color = 0xFFFFFFFF;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, o->v,sizeof(o->v));
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, &o->v,sizeof(o->v));
}
@ -588,11 +542,8 @@ static void gx2_overlay_set_alpha(void *data, unsigned image, float mod)
if (gx2)
{
gx2->overlay[image].alpha_mod = mod;
gx2->overlay[image].v[0].color = COLOR_RGBA(0xFF, 0xFF, 0xFF, 0xFF * gx2->overlay[image].alpha_mod);
gx2->overlay[image].v[1].color = gx2->overlay[image].v[0].color;
gx2->overlay[image].v[2].color = gx2->overlay[image].v[0].color;
gx2->overlay[image].v[3].color = gx2->overlay[image].v[0].color;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, gx2->overlay[image].v, sizeof(gx2->overlay[image].v));
gx2->overlay[image].v.color = COLOR_RGBA(0xFF, 0xFF, 0xFF, 0xFF * gx2->overlay[image].alpha_mod);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, &gx2->overlay[image].v, sizeof(gx2->overlay[image].v));
}
}
@ -604,12 +555,12 @@ static void gx2_render_overlay(void *data)
for (i = 0; i < gx2->overlays; i++){
GX2SetAttribBuffer(0, sizeof(gx2->overlay[i].v), sizeof(*gx2->overlay[i].v), gx2->overlay[i].v);
GX2SetAttribBuffer(0, sizeof(gx2->overlay[i].v), sizeof(gx2->overlay[i].v), &gx2->overlay[i].v);
GX2SetPixelTexture(&gx2->overlay[i].tex, gx2->shader->sampler.location);
GX2SetPixelSampler(&gx2->sampler_linear, gx2->shader->sampler.location);
GX2SetPixelTexture(&gx2->overlay[i].tex, sprite_shader.ps.samplerVars[0].location);
GX2SetPixelSampler(&gx2->sampler_linear, sprite_shader.ps.samplerVars[0].location);
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1);
GX2DrawEx(GX2_PRIMITIVE_MODE_POINTS, 1, 0, 1);
}
@ -657,31 +608,26 @@ static void wiiu_gfx_free(void* data)
GX2SetTVEnable(GX2_DISABLE);
GX2SetDRCEnable(GX2_DISABLE);
GX2DestroyShader(&tex_shader);
GX2DestroyShader(&sprite_shader);
MEM2_free(wiiu->ctx_state);
MEM2_free(wiiu->cmd_buffer);
MEM2_free(wiiu->texture.surface.image);
MEM2_free(wiiu->menu.texture.surface.image);
MEM2_free(wiiu->v);
MEM2_free(wiiu->menu.v);
MEM2_free(wiiu->vertex_cache.v);
MEM1_free(wiiu->color_buffer.surface.image);
MEM1_free(wiiu->ubo_vp);
MEM1_free(wiiu->ubo_tex);
MEM1_free(wiiu->input_ring_buffer);
MEM1_free(wiiu->output_ring_buffer);
MEMBucket_free(wiiu->tv_scan_buffer);
MEMBucket_free(wiiu->drc_scan_buffer);
MEM2_free(wiiu->shader->fs.program);
#ifndef GX2_CAN_ACCESS_DATA_SECTION
MEM2_free(wiiu->shader->vs.program);
MEM2_free(wiiu->shader->vs.attribVars);
MEM2_free(wiiu->shader->ps.program);
MEM2_free(wiiu->shader->ps.samplerVars);
MEM2_free(wiiu->shader);
#endif
MEM2_free(wiiu->v);
MEM2_free(wiiu->menu.v);
free(wiiu);
}
@ -794,17 +740,27 @@ static bool wiiu_gfx_frame(void* data, const void* frame,
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->texture.surface.image,
wiiu->texture.surface.imageSize);
wiiu_set_tex_coords(wiiu->v, &wiiu->texture, 0, 0, width, height, wiiu->rotation);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->v, 4 * sizeof(*wiiu->v));
}
GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->v), sizeof(*wiiu->v), wiiu->v);
GX2SetShaderMode(GX2_SHADER_MODE_UNIFORM_REGISTER);
GX2SetShader(&tex_shader);
GX2SetPixelTexture(&wiiu->texture, wiiu->shader->sampler.location);
GX2SetPixelSampler(wiiu->smooth? &wiiu->sampler_linear : &wiiu->sampler_nearest,
wiiu->shader->sampler.location);
GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->v), sizeof(*wiiu->v), wiiu->v);
GX2SetPixelTexture(&wiiu->texture, tex_shader.ps.samplerVars[0].location);
GX2SetPixelSampler(wiiu->smooth? &wiiu->sampler_linear : &wiiu->sampler_nearest, tex_shader.ps.samplerVars[0].location);
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1);
GX2SetShaderMode(GX2_SHADER_MODE_GEOMETRY_SHADER);
GX2SetShader(&sprite_shader);
GX2SetGeometryShaderInputRingBuffer(wiiu->input_ring_buffer, wiiu->input_ring_buffer_size);
GX2SetGeometryShaderOutputRingBuffer(wiiu->output_ring_buffer, wiiu->output_ring_buffer_size);
GX2SetVertexUniformBlock(sprite_shader.vs.uniformBlocks[0].offset, sprite_shader.vs.uniformBlocks[0].size, wiiu->ubo_vp);
GX2SetVertexUniformBlock(sprite_shader.vs.uniformBlocks[1].offset, sprite_shader.vs.uniformBlocks[1].size, wiiu->ubo_tex);
#ifdef HAVE_OVERLAY
if (wiiu->overlay_enable)
gx2_render_overlay(wiiu);
@ -814,16 +770,16 @@ static bool wiiu_gfx_frame(void* data, const void* frame,
{
GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->menu.v), sizeof(*wiiu->menu.v), wiiu->menu.v);
GX2SetPixelTexture(&wiiu->menu.texture, wiiu->shader->sampler.location);
GX2SetPixelSampler(&wiiu->sampler_linear, wiiu->shader->sampler.location);
GX2SetPixelTexture(&wiiu->menu.texture, sprite_shader.ps.samplerVars[0].location);
GX2SetPixelSampler(&wiiu->sampler_linear, sprite_shader.ps.samplerVars[0].location);
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1);
GX2DrawEx(GX2_PRIMITIVE_MODE_POINTS, 1, 0, 1);
}
wiiu->vertex_cache.current = 0;
GX2SetAttribBuffer(0, wiiu->vertex_cache.size * sizeof(*wiiu->vertex_cache.v),
sizeof(*wiiu->vertex_cache.v), wiiu->vertex_cache.v);
GX2SetPixelSampler(&wiiu->sampler_linear, wiiu->shader->sampler.location);
GX2SetPixelSampler(&wiiu->sampler_linear, sprite_shader.ps.samplerVars[0].location);
wiiu->render_msg_enabled = true;
@ -837,7 +793,6 @@ static bool wiiu_gfx_frame(void* data, const void* frame,
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER,
wiiu->vertex_cache.v, wiiu->vertex_cache.current * sizeof(*wiiu->vertex_cache.v));
if (wiiu->menu.enable)
GX2DrawDone();
@ -895,7 +850,10 @@ static void wiiu_gfx_set_rotation(void* data,
{
wiiu_video_t* wiiu = (wiiu_video_t*) data;
if(wiiu)
{
wiiu->rotation = rotation;
wiiu->should_resize = true;
}
}
static void wiiu_gfx_viewport_info(void* data,
@ -1010,7 +968,16 @@ static void wiiu_gfx_set_texture_frame(void* data, const void* frame, bool rgb32
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->menu.texture.surface.image,
wiiu->menu.texture.surface.imageSize);
wiiu_set_tex_coords(wiiu->menu.v, &wiiu->menu.texture, 0, 0, width, height, 0);
wiiu->menu.v->pos.x = 0.0f;
wiiu->menu.v->pos.y = 0.0f;
wiiu->menu.v->pos.width = width;
wiiu->menu.v->pos.height = height;
wiiu->menu.v->coord.u = 0.0f;
wiiu->menu.v->coord.v = 0.0f;
wiiu->menu.v->coord.width = (float)width / wiiu->texture.surface.width;
wiiu->menu.v->coord.height = (float)height / wiiu->texture.surface.height;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->menu.v, 4 * sizeof(*wiiu->menu.v));
}
static void wiiu_gfx_set_texture_enable(void* data, bool state, bool full_screen)

View File

@ -32,6 +32,7 @@
typedef struct
{
GX2Texture texture;
GX2_vec2* ubo_tex;
const font_renderer_driver_t* font_driver;
void* font_data;
struct font_atlas* atlas;
@ -79,6 +80,13 @@ static void* wiiu_font_init_font(void* data, const char* font_path,
font->atlas->dirty = false;
font->ubo_tex = MEM1_alloc(sizeof(*font->ubo_tex), GX2_UNIFORM_BLOCK_ALIGNMENT);
font->ubo_tex->width = font->texture.surface.width;
font->ubo_tex->height = font->texture.surface.height;
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, font->ubo_tex,
sizeof(*font->ubo_tex));
return font;
}
@ -93,6 +101,7 @@ static void wiiu_font_free_font(void* data, bool is_threaded)
font->font_driver->free(font->font_data);
MEM1_free(font->texture.surface.image);
MEM1_free(font->ubo_tex);
free(font);
}
@ -142,9 +151,7 @@ static void wiiu_font_render_line(
unsigned width = video_info->width;
unsigned height = video_info->height;
int x = roundf(pos_x * width);
int y = roundf((1.0f - pos_y) * height);
int delta_x = 0;
int delta_y = 0;
int y = roundf((1.0 - pos_y) * height);
if(wiiu->vertex_cache.current + (msg_len * 4) > wiiu->vertex_cache.size)
return;
@ -160,11 +167,10 @@ static void wiiu_font_render_line(
break;
}
tex_shader_vertex_t* v = wiiu->vertex_cache.v + wiiu->vertex_cache.current;
sprite_vertex_t* v = wiiu->vertex_cache.v + wiiu->vertex_cache.current;
for (i = 0; i < msg_len; i++)
{
int off_x, off_y, tex_x, tex_y, width, height;
const char* msg_tmp = &msg[i];
unsigned code = utf8_walk(&msg_tmp);
unsigned skip = msg_tmp - &msg[i];
@ -181,50 +187,22 @@ static void wiiu_font_render_line(
if (!glyph)
continue;
off_x = glyph->draw_offset_x;
off_y = glyph->draw_offset_y;
tex_x = glyph->atlas_offset_x;
tex_y = glyph->atlas_offset_y;
width = glyph->width;
height = glyph->height;
v->pos.x = x + glyph->draw_offset_x * scale;
v->pos.y = y + glyph->draw_offset_y * scale;
v->pos.width = glyph->width * scale;
v->pos.height = glyph->height * scale;
v->coord.u = glyph->atlas_offset_x;
v->coord.v = glyph->atlas_offset_y;
v->coord.width = glyph->width;
v->coord.height = glyph->height;
float x0 = x + off_x + delta_x * scale;
float y0 = y + off_y + delta_y * scale + height * scale;
float u0 = tex_x;
float v0 = tex_y;
float x1 = x0 + width * scale;
float y1 = y0 - height * scale;
float u1 = u0 + width;
float v1 = v0 + height;
v->color = color;
v[0].pos.x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f;
v[0].pos.y = (-2.0f * y0 / wiiu->color_buffer.surface.height) + 1.0f;
v[1].pos.x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;;
v[1].pos.y = (-2.0f * y0 / wiiu->color_buffer.surface.height) + 1.0f;
v[2].pos.x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;;
v[2].pos.y = (-2.0f * y1 / wiiu->color_buffer.surface.height) + 1.0f;
v[3].pos.x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f;;
v[3].pos.y = (-2.0f * y1 / wiiu->color_buffer.surface.height) + 1.0f;
v++;
v[0].coord.u = u0 / font->texture.surface.width;
v[0].coord.v = v1 / font->texture.surface.height;
v[1].coord.u = u1 / font->texture.surface.width;
v[1].coord.v = v1 / font->texture.surface.height;
v[2].coord.u = u1 / font->texture.surface.width;
v[2].coord.v = v0 / font->texture.surface.height;
v[3].coord.u = u0 / font->texture.surface.width;
v[3].coord.v = v0 / font->texture.surface.height;
v[0].color = color;
v[1].color = color;
v[2].color = color;
v[3].color = color;
v += 4;
delta_x += glyph->advance_x;
delta_y += glyph->advance_y;
x += glyph->advance_x * scale;
y += glyph->advance_y * scale;
}
int count = v - wiiu->vertex_cache.v - wiiu->vertex_cache.current;
@ -247,14 +225,12 @@ static void wiiu_font_render_line(
}
#if 0
printf("%s\n", msg);
DEBUG_VAR(color);
#endif
GX2SetPixelTexture(&font->texture, sprite_shader.ps.samplerVars[0].location);
GX2SetVertexUniformBlock(sprite_shader.vs.uniformBlocks[1].offset, sprite_shader.vs.uniformBlocks[1].size, font->ubo_tex);
GX2SetPixelTexture(&font->texture, wiiu->shader->sampler.location);
GX2DrawEx(GX2_PRIMITIVE_MODE_POINTS, count, wiiu->vertex_cache.current, 1);
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, count, wiiu->vertex_cache.current, 1);
GX2SetVertexUniformBlock(sprite_shader.vs.uniformBlocks[1].offset, sprite_shader.vs.uniformBlocks[1].size, wiiu->ubo_tex);
wiiu->vertex_cache.current = v - wiiu->vertex_cache.v;
}

View File

@ -77,68 +77,48 @@ static void menu_display_wiiu_draw(void *data)
if (wiiu->vertex_cache.current + 4 > wiiu->vertex_cache.size)
return;
tex_shader_vertex_t* v = wiiu->vertex_cache.v + wiiu->vertex_cache.current;
float x0 = draw->x;
float y0 = draw->y;
float x1 = x0 + draw->width;
float y1 = y0 + draw->height;
sprite_vertex_t* v = wiiu->vertex_cache.v + wiiu->vertex_cache.current;
if(draw->coords->vertex && draw->coords->vertices == 4)
{
for(int i = 0; i < 4; i++)
{
v[i].pos.x = draw->coords->vertex[i << 1] * 2.0f - 1.0f;
v[i].pos.y = draw->coords->vertex[(i << 1) + 1] * 2.0f - 1.0f;
}
v->pos.x = MIN(MIN(MIN(draw->coords->vertex[0], draw->coords->vertex[2]), draw->coords->vertex[4]), draw->coords->vertex[6]);
v->pos.y = 1.0 - MAX(MAX(MAX(draw->coords->vertex[1], draw->coords->vertex[3]), draw->coords->vertex[5]), draw->coords->vertex[7]);
v->pos.width = MAX(MAX(MAX(draw->coords->vertex[0], draw->coords->vertex[2]), draw->coords->vertex[4]), draw->coords->vertex[6]) - v->pos.x;
v->pos.height = 1.0 - MIN(MIN(MIN(draw->coords->vertex[1], draw->coords->vertex[3]), draw->coords->vertex[5]), draw->coords->vertex[7]) - v->pos.y;
v->pos.x *= wiiu->color_buffer.surface.width;
v->pos.y *= wiiu->color_buffer.surface.height;
v->pos.width *= wiiu->color_buffer.surface.width;
v->pos.height *= wiiu->color_buffer.surface.height;
}
else
{
v[0].pos.x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f;
v[0].pos.y = (2.0f * y0 / wiiu->color_buffer.surface.height) - 1.0f;
v[1].pos.x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;;
v[1].pos.y = (2.0f * y0 / wiiu->color_buffer.surface.height) - 1.0f;
v[2].pos.x = (2.0f * x1 / wiiu->color_buffer.surface.width) - 1.0f;;
v[2].pos.y = (2.0f * y1 / wiiu->color_buffer.surface.height) - 1.0f;
v[3].pos.x = (2.0f * x0 / wiiu->color_buffer.surface.width) - 1.0f;;
v[3].pos.y = (2.0f * y1 / wiiu->color_buffer.surface.height) - 1.0f;
v->pos.x = draw->x;
v->pos.y = wiiu->color_buffer.surface.height - draw->y - draw->height;
v->pos.width = draw->width;
v->pos.height = draw->height;
}
if(draw->coords->tex_coord && draw->coords->vertices == 4)
{
for(int i = 0; i < 4; i++)
{
v[i].coord.u = draw->coords->tex_coord[i << 1];
v[i].coord.v = draw->coords->tex_coord[(i << 1) + 1];
}
v->coord.u = MIN(MIN(MIN(draw->coords->tex_coord[0], draw->coords->tex_coord[2]), draw->coords->tex_coord[4]), draw->coords->tex_coord[6]);
v->coord.v = MIN(MIN(MIN(draw->coords->tex_coord[1], draw->coords->tex_coord[3]), draw->coords->tex_coord[5]), draw->coords->tex_coord[7]);
v->coord.width = MAX(MAX(MAX(draw->coords->tex_coord[0], draw->coords->tex_coord[2]), draw->coords->tex_coord[4]), draw->coords->tex_coord[6]) - v->coord.u;
v->coord.height = MAX(MAX(MAX(draw->coords->tex_coord[1], draw->coords->tex_coord[3]), draw->coords->tex_coord[5]), draw->coords->tex_coord[7]) - v->coord.v;
}
else
{
v[0].coord.u = 0.0f;
v[0].coord.v = 1.0f;
v[1].coord.u = 1.0f;
v[1].coord.v = 1.0f;
v[2].coord.u = 1.0f;
v[2].coord.v = 0.0f;
v[3].coord.u = 0.0f;
v[3].coord.v = 0.0f;
v->coord.u = 0.0f;
v->coord.v = 0.0f;
v->coord.width = 1.0f;
v->coord.height = 1.0f;
}
v[0].color = COLOR_RGBA(0xFF * draw->coords->color[0], 0xFF * draw->coords->color[1],
v->color = COLOR_RGBA(0xFF * draw->coords->color[0], 0xFF * draw->coords->color[1],
0xFF * draw->coords->color[2], 0xFF * draw->coords->color[3]);
v[1].color = v[0].color;
v[2].color = v[0].color;
v[3].color = v[0].color;
// printf("color : %f, %f, %f, %f --> 0x%08X\n", draw->coords->color[0], draw->coords->color[1], draw->coords->color[2], draw->coords->color[3], col[0]);
GX2SetPixelTexture(texture, tex_shader.ps.samplerVars[0].location);
GX2SetPixelTexture(texture, wiiu->shader->sampler.location);
if(draw->coords->vertex && draw->coords->vertices == 4)
GX2DrawEx(GX2_PRIMITIVE_MODE_TRIANGLE_STRIP, 4, wiiu->vertex_cache.current, 1);
else
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, wiiu->vertex_cache.current, 1);
GX2DrawEx(GX2_PRIMITIVE_MODE_POINTS, 1, wiiu->vertex_cache.current, 1);
#if 0
printf("(%i,%i,%i,%i) , (%i,%i)\n", (int)draw->x,
@ -146,7 +126,7 @@ static void menu_display_wiiu_draw(void *data)
texture->surface.width, texture->surface.height);
#endif
wiiu->vertex_cache.current += 4;
wiiu->vertex_cache.current ++;
}

View File

@ -17,19 +17,21 @@
#define GX2_SHADER_INL_H
#ifdef MSB_FIRST
#define to_LE(x) __builtin_bswap32(x)
#define to_QWORD(w0, w1) (((u64)(w0) << 32ull) | (w1))
#define to_LE(x) (__builtin_bswap32(x))
#else
#define to_LE(x) x
#define to_QWORD(w0, w1) (((u64)(w1) << 32ull) | (w0))
#define to_LE(x) (x)
#endif
/* CF */
#define CF_WORD0(addr) to_LE(addr)
#define CF_DWORD0(addr) to_LE(addr)
#define CF_WORD1(popCount, cfConst, cond, count, callCount, inst) \
#define CF_DWORD1(popCount, cfConst, cond, count, callCount, inst) \
to_LE(popCount | (cfConst << 3) | (cond << 8) | (count << 10) | (callCount << 13) | (inst << 23) | (1 << 31))
#define CF_ALU_WORD0(addr, kcacheBank0, kcacheBank1, kcacheMode0) \
to_LE(addr | (kcacheBank0 << 16) | (kcacheBank1 << 20) | (kcacheMode0 << 22))
to_LE(addr | (kcacheBank0 << 22) | (kcacheBank1 << 26) | (kcacheMode0 << 30))
#define CF_ALU_WORD1(kcacheMode1, kcacheAddr0, kcacheAddr1, count, altConst, inst) \
to_LE(kcacheMode1 | (kcacheAddr0 << 2) | (kcacheAddr1 << 10) | (count << 18) | (altConst << 25) | (inst << 26) | (1 << 31))
@ -39,26 +41,49 @@
#define CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, validPixelMode, inst) \
to_LE(srcSelX | (srcSelY << 3) | (srcSelZ << 6) | (srcSelW << 9) | (validPixelMode << 22) | (inst << 23) | (1 << 31))
#define NO_BARRIER & to_LE(~(1 << 31))
#define END_OF_PROGRAM | to_LE(1 << 21)
#define VALID_PIX | to_LE(1 << 22)
#define WHOLE_QUAD_MODE | to_LE(1 << 30)
#define CF_ALLOC_EXPORT_WORD0(arrayBase, type, dstReg, dstRel, indexGpr, elemSize) \
to_LE(arrayBase | (type << 13) | (dstReg << 15) | (dstRel << 22) | (indexGpr << 23) | (elemSize << 30))
#define ALU_LAST to_LE(1 << 31) |
#define CF_ALLOC_EXPORT_WORD1_BUF(arraySize, writeMask, inst) \
to_LE(arraySize | (writeMask << 12) | (inst << 23) | (1 << 31))
#define ALU_SRC_KCACHE0_BASE 0x80
#define ALU_SRC_KCACHE1_BASE 0xA0
#define CF_KCACHE_BANK_LOCK_1 0x1
#define CB1 0x1
#define CB2 0x2
#define _0_15 CF_KCACHE_BANK_LOCK_1
#define KC0(x) (x + ALU_SRC_KCACHE0_BASE)
#define KC1(x) (x + ALU_SRC_KCACHE1_BASE)
#define NO_BARRIER & ~to_QWORD(0,to_LE(1 << 31))
#define END_OF_PROGRAM | to_QWORD(0,to_LE(1 << 21))
#define VALID_PIX | to_QWORD(0,to_LE(1 << 22))
#define WHOLE_QUAD_MODE | to_QWORD(0,to_LE(1 << 30))
#define BURSTCNT(x) | to_QWORD(0,to_LE(x << 17))
#define WRITE(x) (x >> 2)
#define ARRAY_SIZE(x) x
#define ELEM_SIZE(x) x
#define KCACHE0(bank, mode) | to_QWORD(CF_ALU_WORD0(0, bank, 0, mode), 0)
#define KCACHE1(bank, mode) | to_QWORD(CF_ALU_WORD0(0, 0, bank, 0), CF_ALU_WORD1(mode,0, 0, 0, 0, 0))
#define ALU_LAST | to_QWORD(to_LE(1ull << 31), 0)
/* ALU */
#define ALU_WORD0(src0Sel, src0Rel, src0Chan, src0Neg, src1Sel, src1Rel, src1Chan, src1Neg, indexMode, predSel) \
to_LE(src0Sel | (src0Rel << 9) | (src0Chan << 10) | (src0Neg << 12) | (src1Sel << 13) | (src1Rel << 22) \
| (src1Chan << 23) | (src1Neg << 25) | (indexMode << 26) | (predSel << 29))
to_LE(src0Sel | ((src0Rel) << 9) | ((src0Chan) << 10) | ((src0Neg) << 12) | ((src1Sel) << 13) | ((src1Rel) << 22) \
| ((src1Chan) << 23) | ((src1Neg) << 25) | ((indexMode) << 26) | ((predSel) << 29))
#define ALU_WORD1_OP2(src0Abs, src1Abs, updateExecuteMask, updatePred, writeMask, omod, inst, encoding, bankSwizzle, dstGpr, dstRel, dstChan, clamp) \
to_LE(src0Abs | (src1Abs << 1) | (updateExecuteMask << 2) | (updatePred << 3) | (writeMask << 4) | (omod << 5) | (inst << 7) | \
(encoding << 15) | (bankSwizzle << 18) | (dstGpr << 21) | (dstRel << 28) | (dstChan << 29) | (clamp << 31))
(encoding << 15) | (bankSwizzle << 18) | ((dstGpr&0x7F) << 21) | (dstRel << 28) | ((dstChan&0x3) << 29) | (clamp << 31))
#define ALU_WORD1_OP3(src2Sel, src2Rel, src2Chan, src2Neg, inst, encoding, bankSwizzle, dstGpr, dstRel, dstChan, clamp) \
to_LE(src2Sel | (src2Rel << 9) | (src2Chan << 10) | (src2Neg << 12) | (inst << 13) | \
(encoding << 15) | (bankSwizzle << 18) | (dstGpr << 21) | (dstRel << 28) | (dstChan << 29) | (clamp << 31)
(encoding << 15) | (bankSwizzle << 18) | ((dstGpr&0x7F) << 21) | (dstRel << 28) | ((dstChan&0x3) << 29) | (clamp << 31)
/* TEX */
#define TEX_WORD0(inst, bcFracMode, fetchWholeQuad, resourceID, srcReg, srcRel, altConst) \
@ -71,17 +96,28 @@
#define TEX_WORD2(offsetX, offsetY, offsetZ, samplerID, srcSelX, srcSelY, srcSelZ, srcSelW) \
to_LE(offsetX | (offsetY << 5) | (offsetZ << 10) | (samplerID << 15) | (srcSelX << 20) | (srcSelY << 23) | (srcSelZ << 26) | (srcSelW << 29))
#define VTX_WORD0(inst, type, buffer_id, srcReg, srcSelX, mega) \
to_LE(inst | (type << 5) | (buffer_id << 8) | (srcReg << 16) | (srcSelX << 24) | (mega << 26))
#define _X 0
#define _Y 1
#define _Z 2
#define _W 3
#define VTX_WORD1(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW) \
to_LE(dstReg | (dstSelX << 9) | (dstSelY << 12) | (dstSelZ << 15) | (dstSelW << 18) | (1 << 21))
#define VTX_WORD2(offset, ismega) \
to_LE(offset| (ismega << 19))
#define _x 0
#define _y 1
#define _z 2
#define _w 3
#define _0 4
#define _1 5
#define _xyzw 0b1111
#define _xy__ 0b0011
#define GX2_COMP_SEL(c0, c1, c2, c3) (((c0) << 24) | ((c1) << 16) | ((c2) << 8) | (c3))
#define ALU_LITERAL(v) to_LE(v)
#define ALU_LITERAL(v) to_QWORD(to_LE(v), 0)
/* SRCx_SEL special constants */
#define ALU_SRC_1_DBL_L 0xF4
@ -97,6 +133,49 @@
#define ALU_SRC_PV 0xFE
#define ALU_SRC_PS 0xFF
#define _NEG | (1 << 12)
#define ALU_OMOD_OFF 0x0
#define ALU_OMOD_M2 0x1
#define ALU_OMOD_M4 0x2
#define ALU_OMOD_D2 0x3
#define ALU_VEC_012 0x0
#define ALU_VEC_021 0x1
#define ALU_VEC_120 0x2
#define ALU_VEC_102 0x3
#define ALU_VEC_201 0x4
#define ALU_VEC_210 0x5
#define VEC_012 | to_QWORD(0, to_LE(ALU_VEC_012 << 18))
#define VEC_021 | to_QWORD(0, to_LE(ALU_VEC_021 << 18))
#define VEC_120 | to_QWORD(0, to_LE(ALU_VEC_120 << 18))
#define VEC_102 | to_QWORD(0, to_LE(ALU_VEC_102 << 18))
#define VEC_201 | to_QWORD(0, to_LE(ALU_VEC_201 << 18))
#define VEC_210 | to_QWORD(0, to_LE(ALU_VEC_210 << 18))
#define VALID_PIX | to_QWORD(0,to_LE(1 << 22))
#define ALU_SCL_210 0x0
#define ALU_SCL_122 0x1
#define ALU_SCL_212 0x2
#define ALU_SCL_221 0x3
#define SCL_210 | to_QWORD(0, to_LE(ALU_SCL_210 << 18))
#define SCL_122 | to_QWORD(0, to_LE(ALU_SCL_122 << 18))
#define SCL_212 | to_QWORD(0, to_LE(ALU_SCL_212 << 18))
#define SCL_221 | to_QWORD(0, to_LE(ALU_SCL_221 << 18))
#define FETCH_TYPE(x) x
#define MINI(x) ((x) - 1)
#define MEGA(x) (MINI(x) | 0x80000000)
#define OFFSET(x) x
#define VERTEX_DATA 0
#define INSTANCE_DATA 1
#define NO_INDEX_OFFSET 2
/* CF defines */
#define CF_COND_ACTIVE 0x0
#define CF_COND_FALSE 0x1
@ -109,13 +188,18 @@
/* instructions */
/* CF */
#define CF_INST_TEX 0x01
#define CF_INST_CALL_FS 0x13
#define CF_INST_TEX 0x01
#define CF_INST_VTX 0x02
#define CF_INST_ALU 0x08
#define CF_INST_CALL_FS 0x13
#define CF_INST_EMIT_VERTEX 0x15
#define CF_INST_MEM_RING 0x26
/* ALU */
#define ALU_INST_ALU 0x8
#define OP2_INST_MUL 0x1
#define OP2_INST_MOV 0x19
#define OP2_INST_ADD 0x0
#define OP2_INST_MUL 0x1
#define OP2_INST_MUL_IEEE 0x2
#define OP2_INST_MOV 0x19
#define OP2_INST_RECIP_IEEE 0x66
/* EXP */
#define CF_INST_EXP 0x27
#define CF_INST_EXP_DONE 0x28
@ -123,6 +207,9 @@
/* TEX */
#define TEX_INST_SAMPLE 0x10
/* VTX */
#define VTX_INST_FETCH 0x0
/* EXPORT_TYPE */
#define EXPORT_TYPE_PIXEL 0x0
#define EXPORT_TYPE_POS 0x1
@ -142,11 +229,34 @@
#define PIX0 PIX(0)
/* registers */
#define __ (0x80) /* invalid regitser (write mask off) */
#define _R(x) x
#define _R0 _R(0x0)
#define _R1 _R(0x1)
#define _R2 _R(0x2)
#define _R3 _R(0x3)
#define _R4 _R(0x4)
#define _R5 _R(0x5)
#define _R6 _R(0x6)
#define _R7 _R(0x7)
#define _R8 _R(0x8)
#define _R9 _R(0x9)
#define _R10 _R(0xA)
#define _R11 _R(0xB)
#define _R12 _R(0xC)
#define _R13 _R(0xD)
#define _R14 _R(0xE)
#define _R15 _R(0xF)
#define _R120 _R(0x78)
#define _R121 _R(0x79)
#define _R122 _R(0x7A)
#define _R123 _R(0x7B)
#define _R124 _R(0x7C)
#define _R125 _R(0x7D)
#define _R126 _R(0x7E)
#define _R127 _R(0x7F)
/* texture */
#define _t(x) x
@ -156,28 +266,61 @@
#define _s(x) x
#define _s0 _s(0x0)
#define CALL_FS CF_WORD0(0), CF_WORD1(0,0,0,0,0,CF_INST_CALL_FS)
#define _b(x) x
#define TEX(addr, cnt) CF_WORD0(addr), CF_WORD1(0x0, 0x0, CF_COND_ACTIVE, 0x0, (cnt - 1), CF_INST_TEX)
#define ALU(addr, cnt) CF_ALU_WORD0(addr, 0x0, 0x0, 0x0), CF_ALU_WORD1(0x0, 0x0, 0x0, (cnt - 1), 0x0, ALU_INST_ALU)
#define CALL_FS to_QWORD(CF_DWORD0(0), CF_DWORD1(0,0,0,0,0,CF_INST_CALL_FS))
#define EXP_DONE(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \
CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP_DONE)
#define TEX(addr, cnt) to_QWORD(CF_DWORD0(addr), CF_DWORD1(0x0, 0x0, CF_COND_ACTIVE, (cnt - 1), 0x0, CF_INST_TEX))
#define VTX(addr, cnt) to_QWORD(CF_DWORD0(addr), CF_DWORD1(0x0, 0x0, CF_COND_ACTIVE, (cnt - 1), 0x0, CF_INST_VTX))
#define EXP(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \
CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP)
#define ALU(addr, cnt) to_QWORD(CF_ALU_WORD0(addr, 0x0, 0x0, 0x0), CF_ALU_WORD1(0x0, 0x0, 0x0, (cnt - 1), 0x0, CF_INST_ALU))
#define ALU_MOV(dstGpr, dstChan, src0Sel, src0Chan) ALU_WORD0(src0Sel, 0x0, src0Chan, 0x0, ALU_SRC_0, 0x0, 0x0, 0x0, 0x0, 0x0), \
ALU_WORD1_OP2(0x0, 0x0, 0x0, 0x0, 0x1, 0x0, OP2_INST_MOV, 0x0, 0x0, dstGpr, 0x0, dstChan, 0x0)
#define EXP_DONE(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) to_QWORD(CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \
CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP_DONE))
#define ALU_MUL(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) ALU_WORD0(src0Sel, 0x0, src0Chan, 0x0, src1Sel, 0x0, src1Chan, 0x0, 0x0, 0x0), \
ALU_WORD1_OP2(0x0, 0x0, 0x0, 0x0, 0x1, 0x0, OP2_INST_MUL, 0x0, 0x0, dstGpr, 0x0, dstChan, 0x0)
#define EXP(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) to_QWORD(CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \
CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP))
#define MEM_RING(arrayBase, dstReg, writeMask, arraySize, elemSize) \
to_QWORD(CF_ALLOC_EXPORT_WORD0(arrayBase, 0x00, dstReg, 0x00, 0x00, elemSize), \
CF_ALLOC_EXPORT_WORD1_BUF(arraySize, writeMask, CF_INST_MEM_RING))
#define EMIT_VERTEX to_QWORD(0, CF_DWORD1(0, 0, 0, 0, 0, CF_INST_EMIT_VERTEX))
#define ALU_OP2(inst, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, omod) \
to_QWORD(ALU_WORD0(src0Sel, 0x0, src0Chan, 0x0, src1Sel, 0x0, src1Chan, 0x0, 0x0, 0x0), \
ALU_WORD1_OP2(0x0, 0x0, 0x0, 0x0, (((dstGpr&__) >> 7) ^ 0x1), omod, inst, 0x0, 0x0, dstGpr, 0x0, dstChan, 0x0))
#define ALU_MOV(dstGpr, dstChan, src0Sel, src0Chan) \
ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF)
#define ALU_MOV_x2(dstGpr, dstChan, src0Sel, src0Chan) \
ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_M2)
#define ALU_MUL(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
ALU_OP2(OP2_INST_MUL, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
#define ALU_MUL_IEEE(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
ALU_OP2(OP2_INST_MUL_IEEE, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
#define ALU_ADD(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
ALU_OP2(OP2_INST_ADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF)
#define ALU_ADD_x2(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \
ALU_OP2(OP2_INST_ADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_M2)
#define ALU_RECIP_IEEE(dstGpr, dstChan, src0Sel, src0Chan) \
ALU_OP2(OP2_INST_RECIP_IEEE, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF)
#define TEX_SAMPLE(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW, resourceID, samplerID)\
TEX_WORD0(TEX_INST_SAMPLE, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \
TEX_WORD1(dstReg, 0x0, dstSelX, dstSelY, dstSelZ, dstSelW, 0x0, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED), \
TEX_WORD2(0x0, 0x0, 0x0, samplerID, _X, _Y, _0, _X)
to_QWORD(TEX_WORD0(TEX_INST_SAMPLE, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \
TEX_WORD1(dstReg, 0x0, dstSelX, dstSelY, dstSelZ, dstSelW, 0x0, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED)), \
to_QWORD(TEX_WORD2(0x0, 0x0, 0x0, samplerID, _x, _y, _0, _x), 0x00000000)
#define VTX_FETCH(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, buffer_id, type, mega, offset) \
to_QWORD(VTX_WORD0(VTX_INST_FETCH, type, buffer_id, srcReg, srcSelX, mega), VTX_WORD1(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW)) , \
to_QWORD(VTX_WORD2(offset, (mega >> 31)), 0x00000000)
#define _x2(v) v, v
#define _x4(v) _x2(v), _x2(v)

View File

@ -3,6 +3,7 @@
#define GX2_SCAN_BUFFER_ALIGNMENT 0x1000
#define GX2_SHADER_ALIGNMENT 0x100
#define GX2_CONTEXT_STATE_ALIGNMENT 0x100
#define GX2_UNIFORM_BLOCK_ALIGNMENT 0x100
#define GX2_DISPLAY_LIST_ALIGNMENT 0x20
#define GX2_VERTEX_BUFFER_ALIGNMENT 0x40
#define GX2_INDEX_BUFFER_ALIGNMENT 0x20

View File

@ -175,18 +175,19 @@ typedef enum GX2IndexType
typedef enum GX2InvalidateMode
{
GX2_INVALIDATE_MODE_ATTRIBUTE_BUFFER = 1 << 0,
GX2_INVALIDATE_MODE_TEXTURE = 1 << 1,
GX2_INVALIDATE_MODE_UNIFORM_BLOCK = 1 << 2,
GX2_INVALIDATE_MODE_SHADER = 1 << 3,
GX2_INVALIDATE_MODE_COLOR_BUFFER = 1 << 4,
GX2_INVALIDATE_MODE_DEPTH_BUFFER = 1 << 5,
GX2_INVALIDATE_MODE_CPU = 1 << 6,
GX2_INVALIDATE_MODE_STREAM_OUT_BUFFER = 1 << 7,
GX2_INVALIDATE_MODE_EXPORT_BUFFER = 1 << 8,
GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER= GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_ATTRIBUTE_BUFFER,
GX2_INVALIDATE_MODE_CPU_TEXTURE = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_TEXTURE,
GX2_INVALIDATE_MODE_CPU_SHADER = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_SHADER,
GX2_INVALIDATE_MODE_ATTRIBUTE_BUFFER = 1 << 0,
GX2_INVALIDATE_MODE_TEXTURE = 1 << 1,
GX2_INVALIDATE_MODE_UNIFORM_BLOCK = 1 << 2,
GX2_INVALIDATE_MODE_SHADER = 1 << 3,
GX2_INVALIDATE_MODE_COLOR_BUFFER = 1 << 4,
GX2_INVALIDATE_MODE_DEPTH_BUFFER = 1 << 5,
GX2_INVALIDATE_MODE_CPU = 1 << 6,
GX2_INVALIDATE_MODE_STREAM_OUT_BUFFER = 1 << 7,
GX2_INVALIDATE_MODE_EXPORT_BUFFER = 1 << 8,
GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_ATTRIBUTE_BUFFER,
GX2_INVALIDATE_MODE_CPU_TEXTURE = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_TEXTURE,
GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_UNIFORM_BLOCK,
GX2_INVALIDATE_MODE_CPU_SHADER = GX2_INVALIDATE_MODE_CPU | GX2_INVALIDATE_MODE_SHADER,
} GX2InvalidateMode;
typedef enum GX2InitAttributes
@ -220,6 +221,7 @@ typedef enum GX2LogicOp
typedef enum GX2PrimitiveMode
{
GX2_PRIMITIVE_MODE_POINTS = 1,
GX2_PRIMITIVE_MODE_LINES = 2,
GX2_PRIMITIVE_MODE_LINE_STRIP = 3,
GX2_PRIMITIVE_MODE_TRIANGLES = 4,

View File

@ -69,93 +69,102 @@ typedef struct GX2AttribVar
typedef struct GX2VertexShader
{
struct
union
{
struct
{
unsigned :2;
bool prime_cache_on_const :1;
bool prime_cache_enable :1;
bool uncached_first_inst :1;
unsigned fetch_cache_lines :3;
bool prime_cache_on_draw :1;
bool prime_cache_pgm_en :1;
bool dx10_clamp :1;
unsigned :5;
unsigned stack_size :8;
unsigned num_gprs :8;
}sq_pgm_resources_vs;
struct
{
unsigned : 2;
bool prime_cache_on_const : 1;
bool prime_cache_enable : 1;
bool uncached_first_inst : 1;
unsigned fetch_cache_lines : 3;
bool prime_cache_on_draw : 1;
bool prime_cache_pgm_en : 1;
bool dx10_clamp : 1;
unsigned : 5;
unsigned stack_size : 8;
unsigned num_gprs : 8;
} sq_pgm_resources_vs;
bool vgt_primitiveid_en;
struct
{
unsigned : 31;
unsigned enable: 1;
} vgt_primitiveid_en;
struct
{
unsigned :18;
unsigned vs_out_fog_vec_addr : 5;
bool vs_exports_fog : 1;
unsigned :2;
unsigned vs_export_count :5;
bool vs_per_component : 1;
}spi_vs_out_config;
uint32_t num_spi_vs_out_id;
struct
{
uint8_t semantic_3;
uint8_t semantic_2;
uint8_t semantic_1;
uint8_t semantic_0;
}spi_vs_out_id[10];
struct
{
bool clip_dist_ena_7 :1;
bool clip_dist_ena_6 :1;
bool clip_dist_ena_5 :1;
bool clip_dist_ena_4 :1;
bool clip_dist_ena_3 :1;
bool clip_dist_ena_2 :1;
bool clip_dist_ena_1 :1;
bool clip_dist_ena_0 :1;
bool cull_dist_ena_7 :1;
bool cull_dist_ena_6 :1;
bool cull_dist_ena_5 :1;
bool cull_dist_ena_0 :1;
bool cull_dist_ena_4 :1;
bool cull_dist_ena_3 :1;
bool cull_dist_ena_2 :1;
bool cull_dist_ena_1 :1;
bool vs_out_misc_side_bus_ena :1;
bool vs_out_ccdist1_vec_ena :1;
bool vs_out_ccdist0_vec_ena :1;
bool vs_out_misc_vec_ena :1;
bool use_vtx_kill_flag :1;
bool use_vtx_viewport_indx :1;
bool use_vtx_render_target_indx :1;
bool use_vtx_edge_flag :1;
unsigned :6;
bool use_vtx_point_size :1;
bool use_vtx_gs_cut_flag :1;
}pa_cl_vs_out_cntl;
uint32_t sq_vtx_semantic_clear;
uint32_t num_sq_vtx_semantic;
uint32_t sq_vtx_semantic[32]; /* 8 bit */
struct
{
bool buffer_3_en :1;
bool buffer_2_en :1;
bool buffer_1_en :1;
bool buffer_0_en :1;
}vgt_strmout_buffer_en;
struct
{
unsigned :24;
unsigned vtx_reuse_depth :8;
}vgt_vertex_reuse_block_cntl;
struct
{
unsigned :24;
unsigned reuse_depth :8;
}vgt_hos_reuse_depth;
struct
{
unsigned : 18;
unsigned vs_out_fog_vec_addr : 5;
bool vs_exports_fog : 1;
unsigned : 2;
unsigned vs_export_count : 5;
bool vs_per_component : 1;
} spi_vs_out_config;
uint32_t num_spi_vs_out_id;
struct
{
uint8_t semantic_3;
uint8_t semantic_2;
uint8_t semantic_1;
uint8_t semantic_0;
} spi_vs_out_id[10];
struct
{
bool clip_dist_ena_7 : 1;
bool clip_dist_ena_6 : 1;
bool clip_dist_ena_5 : 1;
bool clip_dist_ena_4 : 1;
bool clip_dist_ena_3 : 1;
bool clip_dist_ena_2 : 1;
bool clip_dist_ena_1 : 1;
bool clip_dist_ena_0 : 1;
bool cull_dist_ena_7 : 1;
bool cull_dist_ena_6 : 1;
bool cull_dist_ena_5 : 1;
bool cull_dist_ena_0 : 1;
bool cull_dist_ena_4 : 1;
bool cull_dist_ena_3 : 1;
bool cull_dist_ena_2 : 1;
bool cull_dist_ena_1 : 1;
bool vs_out_misc_side_bus_ena : 1;
bool vs_out_ccdist1_vec_ena : 1;
bool vs_out_ccdist0_vec_ena : 1;
bool vs_out_misc_vec_ena : 1;
bool use_vtx_kill_flag : 1;
bool use_vtx_viewport_indx : 1;
bool use_vtx_render_target_indx : 1;
bool use_vtx_edge_flag : 1;
unsigned : 6;
bool use_vtx_point_size : 1;
bool use_vtx_gs_cut_flag : 1;
} pa_cl_vs_out_cntl;
uint32_t sq_vtx_semantic_clear;
uint32_t num_sq_vtx_semantic;
uint32_t sq_vtx_semantic[32]; /* 8 bit */
struct
{
bool buffer_3_en : 1;
bool buffer_2_en : 1;
bool buffer_1_en : 1;
bool buffer_0_en : 1;
} vgt_strmout_buffer_en;
struct
{
unsigned : 24;
unsigned vtx_reuse_depth : 8;
} vgt_vertex_reuse_block_cntl;
struct
{
unsigned : 24;
unsigned reuse_depth : 8;
} vgt_hos_reuse_depth;
};
u32 vals[52];
} regs;
uint32_t size;
@ -180,7 +189,7 @@ typedef struct GX2VertexShader
uint32_t attribVarCount;
GX2AttribVar *attribVars;
uint32_t ringItemsize;
uint32_t ringItemSize;
BOOL hasStreamOut;
uint32_t streamOutStride[4];
@ -188,129 +197,136 @@ typedef struct GX2VertexShader
GX2RBuffer gx2rBuffer;
} GX2VertexShader;
typedef enum {
typedef enum
{
spi_baryc_cntl_centroids_only = 0,
spi_baryc_cntl_centers_only = 1,
spi_baryc_cntl_centroids_and_centers = 2,
}spi_baryc_cntl;
} spi_baryc_cntl;
typedef enum {
typedef enum
{
db_z_order_late_z = 0,
db_z_order_early_z_then_late_z = 1,
db_z_order_re_z = 2,
db_z_order_early_z_then_re_z = 3,
}db_z_order;
} db_z_order;
typedef struct GX2PixelShader
{
struct
union
{
struct
{
unsigned :2;
bool prime_cache_on_const :1;
bool prime_cache_enable :1;
bool uncached_first_inst :1;
unsigned fetch_cache_lines :3;
bool prime_cache_on_draw :1;
bool prime_cache_pgm_en :1;
bool dx10_clamp :1;
unsigned :5;
unsigned stack_size :8;
unsigned num_gprs :8;
}sq_pgm_resources_ps;
struct
{
unsigned : 2;
bool prime_cache_on_const : 1;
bool prime_cache_enable : 1;
bool uncached_first_inst : 1;
unsigned fetch_cache_lines : 3;
bool prime_cache_on_draw : 1;
bool prime_cache_pgm_en : 1;
bool dx10_clamp : 1;
unsigned : 5;
unsigned stack_size : 8;
unsigned num_gprs : 8;
} sq_pgm_resources_ps;
struct
{
unsigned :24;
unsigned export_mode :5;
}sq_pgm_exports_ps;
struct
{
unsigned : 27;
unsigned export_mode : 5;
} sq_pgm_exports_ps;
struct
{
bool baryc_at_sample_ena :1;
bool position_sample :1;
bool linear_gradient_ena :1;
bool persp_gradient_ena :1;
spi_baryc_cntl baryc_sample_cntl :2;
unsigned param_gen_addr : 7;
unsigned param_gen :4;
unsigned position_addr :5;
bool position_centroid :1;
bool position_ena :1;
unsigned :2;
unsigned num_interp :6;
}spi_ps_in_control_0;
struct
{
bool baryc_at_sample_ena : 1;
bool position_sample : 1;
bool linear_gradient_ena : 1;
bool persp_gradient_ena : 1;
spi_baryc_cntl baryc_sample_cntl : 2;
unsigned param_gen_addr : 7;
unsigned param_gen : 4;
unsigned position_addr : 5;
bool position_centroid : 1;
bool position_ena : 1;
unsigned : 2;
unsigned num_interp : 6;
} spi_ps_in_control_0;
struct
{
unsigned :1;
bool position_ulc :1;
unsigned fixed_pt_position_addr :5;
bool fixed_pt_position_ena :1;
unsigned fog_addr :7;
unsigned front_face_addr :5;
bool front_face_all_bits :1;
unsigned front_face_chan :2;
bool front_face_ena :1;
unsigned gen_index_pix_addr :7;
bool gen_index_pix :1;
}spi_ps_in_control_1;
struct
{
unsigned : 1;
bool position_ulc : 1;
unsigned fixed_pt_position_addr : 5;
bool fixed_pt_position_ena : 1;
unsigned fog_addr : 7;
unsigned front_face_addr : 5;
bool front_face_all_bits : 1;
unsigned front_face_chan : 2;
bool front_face_ena : 1;
unsigned gen_index_pix_addr : 7;
bool gen_index_pix : 1;
} spi_ps_in_control_1;
uint32_t num_spi_ps_input_cntl;
uint32_t num_spi_ps_input_cntl;
struct
{
unsigned :13;
bool sel_sample :1;
bool pt_sprite_tex :1;
unsigned cyl_wrap :4;
bool sel_linear :1;
bool sel_centroid :1;
bool flat_shade :1;
unsigned default_val :2;
unsigned semantic :8;
}spi_ps_input_cntls[32];
struct
{
unsigned : 13;
bool sel_sample : 1;
bool pt_sprite_tex : 1;
unsigned cyl_wrap : 4;
bool sel_linear : 1;
bool sel_centroid : 1;
bool flat_shade : 1;
unsigned default_val : 2;
unsigned semantic : 8;
} spi_ps_input_cntls[32];
struct
{
unsigned output7_enable :4;
unsigned output6_enable :4;
unsigned output5_enable :4;
unsigned output4_enable :4;
unsigned output3_enable :4;
unsigned output2_enable :4;
unsigned output1_enable :4;
unsigned output0_enable :4;
}cb_shader_mask;
struct {
unsigned :24;
bool rt7_enable :1;
bool rt6_enable :1;
bool rt5_enable :1;
bool rt4_enable :1;
bool rt3_enable :1;
bool rt2_enable :1;
bool rt1_enable :1;
bool rt0_enable :1;
}cb_shader_control;
struct
{
unsigned :19;
bool alpha_to_mask_disable :1;
bool exec_on_noop :1;
bool exec_on_hier_fail :1;
bool dual_export_enable :1;
bool mask_export_enable :1;
bool coverage_to_mask_enable :1;
bool kill_enable :1;
db_z_order z_order :2;
unsigned :2;
bool z_export_enable :1;
bool stencil_ref_export_enable :1;
} db_shader_control;
struct
{
unsigned output7_enable : 4;
unsigned output6_enable : 4;
unsigned output5_enable : 4;
unsigned output4_enable : 4;
unsigned output3_enable : 4;
unsigned output2_enable : 4;
unsigned output1_enable : 4;
unsigned output0_enable : 4;
} cb_shader_mask;
struct
{
unsigned : 24;
bool rt7_enable : 1;
bool rt6_enable : 1;
bool rt5_enable : 1;
bool rt4_enable : 1;
bool rt3_enable : 1;
bool rt2_enable : 1;
bool rt1_enable : 1;
bool rt0_enable : 1;
} cb_shader_control;
struct
{
unsigned : 19;
bool alpha_to_mask_disable : 1;
bool exec_on_noop : 1;
bool exec_on_hier_fail : 1;
bool dual_export_enable : 1;
bool mask_export_enable : 1;
bool coverage_to_mask_enable : 1;
bool kill_enable : 1;
db_z_order z_order : 2;
unsigned : 2;
bool z_export_enable : 1;
bool stencil_ref_export_enable : 1;
} db_shader_control;
bool spi_input_z;
bool spi_input_z;
};
u32 vals[41];
} regs;
uint32_t size;
@ -335,26 +351,148 @@ typedef struct GX2PixelShader
GX2RBuffer gx2rBuffer;
} GX2PixelShader;
typedef enum
{
VGT_GS_OUT_PRIMITIVE_TYPE_POINTLIST = 0,
VGT_GS_OUT_PRIMITIVE_TYPE_LINESTRIP = 1,
VGT_GS_OUT_PRIMITIVE_TYPE_TRISTRIP = 2,
VGT_GS_OUT_PRIMITIVE_TYPE_MAX_ENUM = 0xFFFFFFFF
} vgt_gs_out_primitive_type;
typedef enum
{
VGT_GS_ENABLE_MODE_OFF = 0,
VGT_GS_ENABLE_MODE_SCENARIO_A = 1,
VGT_GS_ENABLE_MODE_SCENARIO_B = 2,
VGT_GS_ENABLE_MODE_SCENARIO_G = 3,
} vgt_gs_enable_mode;
typedef enum
{
VGT_GS_CUT_MODE_1024 = 0,
VGT_GS_CUT_MODE_512 = 1,
VGT_GS_CUT_MODE_256 = 2,
VGT_GS_CUT_MODE_128 = 3,
} vgt_gs_cut_mode;
typedef struct GX2GeometryShader
{
struct
union
{
uint32_t sq_pgm_resources_gs;
uint32_t vgt_gs_out_prim_type;
uint32_t vgt_gs_mode;
uint32_t pa_cl_vs_out_cntl;
uint32_t sq_pgm_resources_vs;
uint32_t sq_gs_vert_itemsize;
uint32_t spi_vs_out_config;
uint32_t num_spi_vs_out_id;
uint32_t spi_vs_out_id[10];
uint32_t vgt_strmout_buffer_en;
} regs;
struct
{
struct
{
unsigned : 2;
bool prime_cache_on_const : 1;
bool prime_cache_enable : 1;
bool uncached_first_inst : 1;
unsigned fetch_cache_lines : 3;
bool prime_cache_on_draw : 1;
bool prime_cache_pgm_en : 1;
bool dx10_clamp : 1;
unsigned : 5;
unsigned stack_size : 8;
unsigned num_gprs : 8;
} sq_pgm_resources_gs;
vgt_gs_out_primitive_type vgt_gs_out_prim_type;
struct
{
unsigned : 14;
bool partial_thd_at_eoi : 1;
bool element_info_en : 1;
bool fast_compute_mode : 1;
bool compute_mode : 1;
unsigned : 2;
bool gs_c_pack_en : 1;
unsigned : 2;
bool mode_hi : 1;
unsigned : 3;
vgt_gs_cut_mode cut_mode : 2;
bool es_passthru : 1;
vgt_gs_enable_mode mode : 2;
} vgt_gs_mode;
struct
{
bool clip_dist_ena_7 : 1;
bool clip_dist_ena_6 : 1;
bool clip_dist_ena_5 : 1;
bool clip_dist_ena_4 : 1;
bool clip_dist_ena_3 : 1;
bool clip_dist_ena_2 : 1;
bool clip_dist_ena_1 : 1;
bool clip_dist_ena_0 : 1;
bool cull_dist_ena_7 : 1;
bool cull_dist_ena_6 : 1;
bool cull_dist_ena_5 : 1;
bool cull_dist_ena_0 : 1;
bool cull_dist_ena_4 : 1;
bool cull_dist_ena_3 : 1;
bool cull_dist_ena_2 : 1;
bool cull_dist_ena_1 : 1;
bool vs_out_misc_side_bus_ena : 1;
bool vs_out_ccdist1_vec_ena : 1;
bool vs_out_ccdist0_vec_ena : 1;
bool vs_out_misc_vec_ena : 1;
bool use_vtx_kill_flag : 1;
bool use_vtx_viewport_indx : 1;
bool use_vtx_render_target_indx : 1;
bool use_vtx_edge_flag : 1;
unsigned : 6;
bool use_vtx_point_size : 1;
bool use_vtx_gs_cut_flag : 1;
} pa_cl_vs_out_cntl;
struct
{
unsigned : 2;
bool prime_cache_on_const : 1;
bool prime_cache_enable : 1;
bool uncached_first_inst : 1;
unsigned fetch_cache_lines : 3;
bool prime_cache_on_draw : 1;
bool prime_cache_pgm_en : 1;
bool dx10_clamp : 1;
unsigned : 5;
unsigned stack_size : 8;
unsigned num_gprs : 8;
} sq_pgm_resources_vs;
uint32_t sq_gs_vert_itemsize; /* 15-bit */
struct
{
unsigned : 18;
unsigned vs_out_fog_vec_addr : 5;
bool vs_exports_fog : 1;
unsigned : 2;
unsigned vs_export_count : 5;
bool vs_per_component : 1;
} spi_vs_out_config;
uint32_t num_spi_vs_out_id;
struct
{
uint8_t semantic_3;
uint8_t semantic_2;
uint8_t semantic_1;
uint8_t semantic_0;
} spi_vs_out_id[10];
struct
{
bool buffer_3_en : 1;
bool buffer_2_en : 1;
bool buffer_1_en : 1;
bool buffer_0_en : 1;
} vgt_strmout_buffer_en;
};
u32 vals[19];
} regs;
uint32_t size;
uint8_t *program;
uint32_t vertexProgramSize;
uint8_t *vertexProgram;
uint32_t copyProgramSize;
uint8_t *copyProgram;
GX2ShaderMode mode;
uint32_t uniformBlockCount;
@ -419,6 +557,14 @@ void GX2SetShaderModeEx(GX2ShaderMode mode,
uint32_t numGsGpr, uint32_t numGsStackEntries,
uint32_t numPsGpr, uint32_t numPsStackEntries);
static inline void GX2SetShaderMode(GX2ShaderMode mode)
{
if (mode == GX2_SHADER_MODE_GEOMETRY_SHADER)
GX2SetShaderModeEx(mode, 44, 32, 64, 48, 76, 176);
else
GX2SetShaderModeEx(mode, 48, 64, 0, 0, 200, 192);
}
void GX2SetStreamOutEnable(BOOL enable);
void GX2SetGeometryShaderInputRingBuffer(void *buffer, uint32_t size);
void GX2SetGeometryShaderOutputRingBuffer(void *buffer, uint32_t size);

View File

@ -37,3 +37,5 @@ typedef double f64;
typedef volatile float vf32;
typedef volatile double vf64;
#define countof(array) (sizeof(array) / sizeof(*array))

14
wiiu/run.sh Normal file
View File

@ -0,0 +1,14 @@
#!/bin/sh
if [ -z $1 ] ; then
echo
echo "usage: $0 <rpx>"
echo
exit 0
fi
wiiload $1
echo ===== START: `date` =====
netcat -p 4405 -l
echo ===== END: `date` =====

151
wiiu/shader_utils.c Normal file
View File

@ -0,0 +1,151 @@
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <wiiu/gx2.h>
#include <wiiu/system/memory.h>
#include <wiiu/shader_utils.h>
#include <wiiu/wiiu_dbg.h>
/* this is a hack for elf builds since their data section is below 0x10000000
* and thus can't be accessed by the GX2 hardware */
#ifndef GX2_CAN_ACCESS_DATA_SECTION
typedef struct
{
void* vs_program;
void* ps_program;
void* gs_program;
void* gs_copy_program;
}org_programs_t;
#endif
void GX2InitShader(GX2Shader* shader)
{
if (shader->fs.program)
return;
shader->fs.size = GX2CalcFetchShaderSizeEx(shader->vs.attribVarCount,
GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE);
#ifdef GX2_CAN_ACCESS_DATA_SECTION
shader->fs.program = MEM2_alloc(shader->fs.size, GX2_SHADER_ALIGNMENT);
#else
shader->fs.program = MEM2_alloc(shader->fs.size + sizeof(org_programs_t), GX2_SHADER_ALIGNMENT);
#endif
GX2InitFetchShaderEx(&shader->fs, (uint8_t*)shader->fs.program,
shader->vs.attribVarCount,
shader->attribute_stream,
GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->fs.program, shader->fs.size);
#ifndef GX2_CAN_ACCESS_DATA_SECTION
org_programs_t* org = (org_programs_t*)(shader->fs.program + shader->fs.size);
org->vs_program = shader->vs.program;
org->ps_program = shader->ps.program;
org->gs_program = shader->gs.program;
org->gs_copy_program = shader->gs.copyProgram;
shader->vs.program = MEM2_alloc(shader->vs.size, GX2_SHADER_ALIGNMENT);
memcpy(shader->vs.program, org->vs_program, shader->vs.size);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->vs.program, shader->vs.size);
shader->ps.program = MEM2_alloc(shader->ps.size, GX2_SHADER_ALIGNMENT);
memcpy(shader->ps.program, org->ps_program, shader->ps.size);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->ps.program, shader->ps.size);
if(org->gs_program)
{
shader->gs.program = MEM2_alloc(shader->gs.size, GX2_SHADER_ALIGNMENT);
memcpy(shader->gs.program, org->gs_program, shader->gs.size);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->gs.program, shader->gs.size);
shader->gs.copyProgram = MEM2_alloc(shader->gs.copyProgramSize, GX2_SHADER_ALIGNMENT);
memcpy(shader->gs.copyProgram, org->gs_copy_program, shader->gs.copyProgramSize);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->gs.copyProgram, shader->gs.copyProgramSize);
}
#endif
}
void GX2DestroyShader(GX2Shader* shader)
{
#ifndef GX2_CAN_ACCESS_DATA_SECTION
MEM2_free(shader->vs.program);
MEM2_free(shader->ps.program);
MEM2_free(shader->gs.program);
MEM2_free(shader->gs.copyProgram);
org_programs_t* org = (org_programs_t*)(shader->fs.program + shader->fs.size);
shader->vs.program = org->vs_program;
shader->ps.program = org->ps_program;
shader->gs.program = org->gs_program;
shader->gs.copyProgram = org->gs_copy_program;
#endif
MEM2_free(shader->fs.program);
shader->fs.program = NULL;
}
void GX2SetShader(GX2Shader* shader)
{
GX2SetVertexShader(&shader->vs);
GX2SetPixelShader(&shader->ps);
GX2SetFetchShader(&shader->fs);
if(shader->gs.program)
GX2SetGeometryShader(&shader->gs);
}
void check_shader_verbose(u32* shader, u32 shader_size, u32* org, u32 org_size, const char* name)
{
printf("%s :\n", name);
DEBUG_VAR(shader_size);
DEBUG_VAR(org_size);
if(shader_size != org_size)
printf("size mismatch : 0x%08X should be 0x%08X\n", shader_size, org_size);
for(int i = 0; i < shader_size / 4; i+=4)
{
printf("0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\n",
shader[i], shader[i+1], shader[i+2], shader[i+3],
org[i], org[i+1], org[i+2], org[i+3]);
}
for(int i = 0; i < shader_size / 4; i++)
{
if (shader[i] != org[i])
{
printf("%i(%X): 0x%08X(0x%08X) should be 0x%08X(0x%08X) \n", i, i, shader[i], __builtin_bswap32(shader[i]) , org[i], __builtin_bswap32(org[i]));
}
}
}
void check_shader(const void* shader_, u32 shader_size, const void* org_, u32 org_size, const char* name)
{
u32* shader = (u32*)shader_;
u32* org = (u32*)org_;
bool different = false;
printf("%-20s : ", name);
if(shader_size != org_size)
{
different = true;
printf("\nsize mismatch : 0x%08X should be 0x%08X", shader_size, org_size);
}
for(int i = 0; i < shader_size / 4; i++)
{
if (shader[i] != org[i])
{
different = true;
printf("\n%i(%X): 0x%08X(0x%08X) should be 0x%08X(0x%08X)", i, i, shader[i], __builtin_bswap32(shader[i]) , org[i], __builtin_bswap32(org[i]));
}
}
if(!different)
printf("no errors");
printf("\n");
}

67
wiiu/shader_utils.h Normal file
View File

@ -0,0 +1,67 @@
#pragma once
#include <wiiu/gx2/shaders.h>
/* incompatible with elf builds */
//#define GX2_CAN_ACCESS_DATA_SECTION
#ifdef __cplusplus
extern "C" {
#endif
typedef union
__attribute__((aligned (16)))
{
struct __attribute__((scalar_storage_order ("little-endian")))
{
float x;
float y;
};
struct __attribute__((scalar_storage_order ("little-endian")))
{
float width;
float height;
};
}GX2_vec2;
typedef struct
__attribute__((aligned (16)))
__attribute__((scalar_storage_order ("little-endian")))
{
float x;
float y;
union
{
struct __attribute__((scalar_storage_order ("little-endian")))
{
float z;
float w;
};
struct __attribute__((scalar_storage_order ("little-endian")))
{
float width;
float height;
};
};
}GX2_vec4;
typedef struct
{
GX2VertexShader vs;
GX2PixelShader ps;
GX2GeometryShader gs;
GX2FetchShader fs;
GX2AttribStream* attribute_stream;
}GX2Shader;
void GX2InitShader(GX2Shader* shader);
void GX2DestroyShader(GX2Shader* shader);
void GX2SetShader(GX2Shader* shader);
void check_shader(const void* shader_, u32 shader_size, const void* org_, u32 org_size, const char* name);
void check_shader_verbose(u32* shader, u32 shader_size, u32* org, u32 org_size, const char* name);
#ifdef __cplusplus
}
#endif

319
wiiu/sprite_shader.c Normal file
View File

@ -0,0 +1,319 @@
/* RetroArch - A frontend for libretro.
* Copyright (C) 2014-2016 - Ali Bouhlel
*
* RetroArch is free software: you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with RetroArch.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include <stddef.h>
#include <malloc.h>
#include <string.h>
#include <wiiu/gx2/common.h>
#include "sprite_shader.h"
#include "gx2_shader_inl.h"
__attribute__((aligned(GX2_SHADER_ALIGNMENT)))
static struct
{
u64 cf[32];
u64 alu[26];
} vs_program =
{
{
CALL_FS NO_BARRIER,
ALU(32, 26) KCACHE0(CB1, _0_15) KCACHE1(CB2, _0_15),
MEM_RING(WRITE( 0), _R1, _xyzw, ARRAY_SIZE(1), ELEM_SIZE(3)) BURSTCNT(1),
MEM_RING(WRITE(32), _R0, _xyzw, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER
END_OF_PROGRAM
},
{
ALU_MOV_x2(_R127,_x, _R3,_y), //@64
ALU_MOV_x2(__,_y, _R3,_x),
ALU_MOV_x2(_R127,_z, _R3,_w),
ALU_MOV_x2(__,_w, _R3,_z), //@70
ALU_RECIP_IEEE(__,__, KC0(0), _x) SCL_210
ALU_LAST,
ALU_MUL_IEEE(_R0,_z, ALU_SRC_PV, _w, ALU_SRC_PS, _x),
ALU_MUL_IEEE(__,_w, ALU_SRC_PV,_y, ALU_SRC_PS,_x),
ALU_RECIP_IEEE(__,_z, KC0(0),_y) SCL_210
ALU_LAST,
ALU_ADD(_R0,_x, ALU_SRC_PV,_w, ALU_SRC_1 _NEG,_x), //@80
ALU_MUL_IEEE(__,_z, _R127,_x, ALU_SRC_PS,_x),
ALU_MUL_IEEE(_R0,_w, _R127,_z, ALU_SRC_PS,_x),
ALU_RECIP_IEEE(__,__, KC1(0),_x) SCL_210
ALU_LAST,
ALU_MUL_IEEE(_R3,_x, _R2,_x, ALU_SRC_PS,_x),
ALU_ADD(_R0,_y, ALU_SRC_PV _NEG,_z, ALU_SRC_1,_x), //@90
ALU_MUL_IEEE(_R3,_z, _R2,_z, ALU_SRC_PS,_x),
ALU_RECIP_IEEE(__,__, KC1(0),_y) SCL_210
ALU_LAST,
ALU_MUL_IEEE(_R3,_y, _R2,_y, ALU_SRC_PS,_x),
ALU_MUL_IEEE(_R3,_w, _R2,_w, ALU_SRC_PS,_x)
ALU_LAST,
ALU_MOV(_R1,_x, _R1,_x), //@100
ALU_MOV(_R1,_y, _R1,_y),
ALU_MOV(_R1,_z, _R1,_z),
ALU_MOV(_R1,_w, _R1,_w)
ALU_LAST,
ALU_MOV(_R2,_x, _R3,_x),
ALU_MOV(_R2,_y, _R3,_y),
ALU_MOV(_R2,_z, _R3,_z),
ALU_MOV(_R2,_w, _R3,_w)
ALU_LAST,
}
};
__attribute__((aligned(GX2_SHADER_ALIGNMENT)))
static struct
{
u64 cf[32]; // @0
u64 alu[16]; // @32
u64 tex[1 * 2]; // @48
} ps_program =
{
{
TEX(48, 1) VALID_PIX,
ALU(32, 4),
EXP_DONE(PIX0, _R0, _x, _y, _z, _w)
END_OF_PROGRAM
},
{
ALU_MUL(_R0,_x, _R0,_x, _R1,_x),
ALU_MUL(_R0,_y, _R0,_y, _R1,_y),
ALU_MUL(_R0,_z, _R0,_z, _R1,_z),
ALU_MUL(_R0,_w, _R0,_w, _R1,_w)
ALU_LAST,
},
{
TEX_SAMPLE(_R1,_x,_y,_z,_w, _R1,_x,_y,_0,_x, _t0, _s0)
}
};
__attribute__((aligned(GX2_SHADER_ALIGNMENT)))
static struct
{
u64 cf[32]; // @0
u64 alu[80-32]; // @32
u64 tex[3 * 2]; // @80
} gs_program =
{
{
TEX(80, 3),
MEM_RING(WRITE( 0), _R7, _xyzw, ARRAY_SIZE(0), ELEM_SIZE(3)),
ALU(32, 33),
MEM_RING(WRITE( 16), _R2, _xy__, ARRAY_SIZE(0), ELEM_SIZE(3)),
MEM_RING(WRITE( 32), _R3, _xyzw, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
EMIT_VERTEX,
MEM_RING(WRITE( 48), _R7, _xyzw, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
MEM_RING(WRITE( 64), _R4, _xy__, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
MEM_RING(WRITE( 80), _R0, _xyzw, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
EMIT_VERTEX,
MEM_RING(WRITE( 96), _R7, _xyzw, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
MEM_RING(WRITE(112), _R5, _xy__, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
MEM_RING(WRITE(128), _R6, _xyzw, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
EMIT_VERTEX,
MEM_RING(WRITE(144), _R7, _xyzw, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
MEM_RING(WRITE(160), _R8, _xy__, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
MEM_RING(WRITE(176), _R9, _xyzw, ARRAY_SIZE(0), ELEM_SIZE(3)) NO_BARRIER,
EMIT_VERTEX
END_OF_PROGRAM
},
{
ALU_MOV(_R127,_x, _R1,_z),
ALU_MOV(__,_y, ALU_SRC_0,_x),
ALU_MOV(_R3,_z, ALU_SRC_0,_x),
ALU_MOV(_R127,_w, ALU_SRC_0,_x),
ALU_MOV(_R3,_w, ALU_SRC_LITERAL,_x)
ALU_LAST,
ALU_LITERAL(0x3F800000),
ALU_ADD(_R3,_x, _R1,_x, ALU_SRC_PV,_x),
ALU_ADD(_R3,_y, _R1,_y, ALU_SRC_PV,_y),
ALU_MOV(__,_z, _R0,_z),
ALU_MOV(__,_w, ALU_SRC_0,_x),
ALU_ADD(_R4,_x, _R0,_x, ALU_SRC_0,_x)
ALU_LAST,
ALU_ADD(_R2,_x, _R0,_x, ALU_SRC_PV,_z),
ALU_ADD(_R2,_y, _R0,_y, ALU_SRC_PV,_w),
ALU_MOV(_R127,_z, _R1 _NEG,_w),
ALU_MOV(_R126,_w, _R1 _NEG,_w),
ALU_ADD(_R4,_y, _R0,_y, ALU_SRC_0,_x)
ALU_LAST,
ALU_ADD(_R5,_x, _R0,_x, _R0,_z),
ALU_ADD(_R5,_y, _R0,_y, _R0,_w),
ALU_MOV(__,_z, _R0,_w),
ALU_ADD(_R8,_x, _R127,_w, _R0,_x)
ALU_LAST,
ALU_ADD(_R0,_x, _R1,_x, ALU_SRC_0,_x),
ALU_ADD(_R8,_y, ALU_SRC_PV,_z, _R0,_y),
ALU_MOV(_R0,_z, _R3,_z),
ALU_MOV(_R0,_w, _R3,_w),
ALU_ADD(_R0,_y, _R1,_y, ALU_SRC_0,_x)
ALU_LAST,
ALU_ADD(_R6,_x, _R1,_x, _R127,_x) VEC_021,
ALU_ADD(_R6,_y, _R1,_y, _R127,_z),
ALU_MOV(_R6,_z, _R3,_z),
ALU_MOV(_R6,_w, _R3,_w),
ALU_ADD(_R9,_x, _R127,_w, _R1,_x)
ALU_LAST,
ALU_ADD(_R9,_y, _R126,_w, _R1,_y),
ALU_MOV(_R9,_z, _R3,_z),
ALU_MOV(_R9,_w, _R3,_w) VEC_120
ALU_LAST,
},
{
VTX_FETCH(_R7,_x,_y,_z,_w, _R0,_x, _b(159), FETCH_TYPE(NO_INDEX_OFFSET), MEGA(16), OFFSET(0)), // @160
VTX_FETCH(_R1,_x,_y,_z,_w, _R0,_x, _b(159), FETCH_TYPE(NO_INDEX_OFFSET), MEGA(16), OFFSET(32)),
VTX_FETCH(_R0,_x,_y,_z,_w, _R0,_x, _b(159), FETCH_TYPE(NO_INDEX_OFFSET), MEGA(16), OFFSET(16)),
}
};
__attribute__((aligned(GX2_SHADER_ALIGNMENT)))
static struct
{
u64 cf[16]; // @0
u64 vtx[3 * 2]; // @16
} gs_copy_program=
{
{
VTX(16, 3),
EXP_DONE(POS0, _R1,_x,_y,_z,_w),
EXP_DONE(PARAM0, _R2,_x,_y,_z,_w) BURSTCNT(1)
END_OF_PROGRAM
},
{
VTX_FETCH(_R1,_x,_y,_z,_w, _R0,_x, _b(159), FETCH_TYPE(NO_INDEX_OFFSET), MEGA(16), OFFSET(32)),
VTX_FETCH(_R2,_x,_y,_z,_w, _R0,_x, _b(159), FETCH_TYPE(NO_INDEX_OFFSET), MEGA(32), OFFSET(0)),
VTX_FETCH(_R3,_x,_y,_z,_w, _R0,_x, _b(159), FETCH_TYPE(NO_INDEX_OFFSET), MINI(16), OFFSET(16)),
}
};
static GX2AttribVar attributes[] =
{
{"position", GX2_SHADER_VAR_TYPE_FLOAT4, 0, 0},
{"coords", GX2_SHADER_VAR_TYPE_FLOAT4, 0, 1},
{"color", GX2_SHADER_VAR_TYPE_FLOAT4, 0, 2},
};
static GX2AttribStream attribute_stream[] =
{
{0, 0, offsetof(sprite_vertex_t, pos), GX2_ATTRIB_FORMAT_FLOAT_32_32_32_32,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _z, _w), GX2_ENDIAN_SWAP_DEFAULT},
{1, 0, offsetof(sprite_vertex_t, coord), GX2_ATTRIB_FORMAT_FLOAT_32_32_32_32,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _z, _w), GX2_ENDIAN_SWAP_DEFAULT},
{2, 0, offsetof(sprite_vertex_t, color), GX2_ATTRIB_FORMAT_UNORM_8_8_8_8,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _z, _w), GX2_ENDIAN_SWAP_DEFAULT},
};
static GX2SamplerVar samplers[] =
{
{ "s", GX2_SAMPLER_VAR_TYPE_SAMPLER_2D, 0 },
};
GX2UniformBlock uniform_blocks[] =
{
{"UBO_vp", 1, sizeof(GX2_vec2)},
{"UBO_tex", 2, sizeof(GX2_vec2)},
};
GX2UniformVar uniform_vars[] =
{
{"vp_size", GX2_SHADER_VAR_TYPE_FLOAT2, 1, 0, 0},
{"tex_size", GX2_SHADER_VAR_TYPE_FLOAT2, 1, 0, 1},
};
GX2Shader sprite_shader =
{
{
{
.sq_pgm_resources_vs.num_gprs = 4,
.sq_pgm_resources_vs.stack_size = 1,
.vgt_primitiveid_en.enable = TRUE,
.spi_vs_out_config.vs_export_count = 0,
.num_spi_vs_out_id = 0,
{
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
},
.sq_vtx_semantic_clear = ~0x7,
.num_sq_vtx_semantic = 3,
{
2, 1, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
},
.vgt_vertex_reuse_block_cntl.vtx_reuse_depth = 0x0,
.vgt_hos_reuse_depth.reuse_depth = 0x0,
}, /* regs */
.size = sizeof(vs_program),
.program = (uint8_t*)&vs_program,
.mode = GX2_SHADER_MODE_GEOMETRY_SHADER,
.uniformBlockCount = countof(uniform_blocks), uniform_blocks,
.uniformVarCount = countof(uniform_vars), uniform_vars,
.attribVarCount = countof(attributes), attributes,
.ringItemSize = 12,
},
{
{
.sq_pgm_resources_ps.num_gprs = 2,
.sq_pgm_exports_ps.export_mode = 0x2,
.spi_ps_in_control_0.num_interp = 2,
.spi_ps_in_control_0.persp_gradient_ena = 1,
.spi_ps_in_control_0.baryc_sample_cntl = spi_baryc_cntl_centers_only,
.num_spi_ps_input_cntl = 2, {{.semantic = 0, .default_val = 1},{.semantic = 1, .default_val = 1}},
.cb_shader_mask.output0_enable = 0xF,
.cb_shader_control.rt0_enable = TRUE,
.db_shader_control.z_order = db_z_order_early_z_then_late_z,
}, /* regs */
.size = sizeof(ps_program),
.program = (uint8_t*)&ps_program,
.mode = GX2_SHADER_MODE_GEOMETRY_SHADER,
.samplerVarCount = countof(samplers), samplers,
},
{
{
.sq_pgm_resources_gs.num_gprs = 10,
.vgt_gs_out_prim_type = VGT_GS_OUT_PRIMITIVE_TYPE_TRISTRIP,
.vgt_gs_mode.mode = VGT_GS_ENABLE_MODE_SCENARIO_G,
.vgt_gs_mode.cut_mode = VGT_GS_CUT_MODE_128,
.num_spi_vs_out_id = 1,
{
{.semantic_0 = 0, .semantic_1 = 1, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
},
.sq_pgm_resources_vs.num_gprs = 4,
.sq_gs_vert_itemsize = 12,
.spi_vs_out_config.vs_export_count = 1,
}, /* regs */
.size = sizeof(gs_program),
.program = (uint8_t*)&gs_program,
.copyProgramSize = sizeof(gs_copy_program),
.copyProgram = (uint8_t*)&gs_copy_program,
.mode = GX2_SHADER_MODE_GEOMETRY_SHADER,
.ringItemSize = 48,
},
.attribute_stream = attribute_stream,
};

52
wiiu/sprite_shader.h Normal file
View File

@ -0,0 +1,52 @@
/* RetroArch - A frontend for libretro.
* Copyright (C) 2014-2016 - Ali Bouhlel
*
* RetroArch is free software: you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with RetroArch.
* If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef SPRITE_SHADER_H
#define SPRITE_SHADER_H
#include <wiiu/shader_utils.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct
{
struct
{
float x;
float y;
float width;
float height;
}pos;
struct
{
float u;
float v;
float width;
float height;
}coord;
u32 color;
}sprite_vertex_t;
extern GX2Shader sprite_shader;
#ifdef __cplusplus
}
#endif
#endif // SPRITE_SHADER_H

View File

@ -170,6 +170,15 @@ IMPORT(GX2InitFetchShaderEx);
IMPORT(GX2SetFetchShader);
IMPORT(GX2SetVertexShader);
IMPORT(GX2SetPixelShader);
IMPORT(GX2SetGeometryShader);
IMPORT(GX2SetGeometryUniformBlock);
IMPORT(GX2SetVertexUniformBlock);
IMPORT(GX2SetPixelUniformBlock);
IMPORT(GX2CalcGeometryShaderInputRingBufferSize);
IMPORT(GX2CalcGeometryShaderOutputRingBufferSize);
IMPORT(GX2SetGeometryShaderInputRingBuffer);
IMPORT(GX2SetGeometryShaderOutputRingBuffer);
IMPORT(GX2SetShaderModeEx);
IMPORT(GX2SetAttribBuffer);
IMPORT(GX2InitTextureRegs);
IMPORT(GX2InitSampler);

View File

@ -16,125 +16,67 @@
#include <stddef.h>
#include <malloc.h>
#include <string.h>
#include <wiiu/gx2/common.h>
#include "tex_shader.h"
#include "gx2_shader_inl.h"
/*******************************************************
* Vertex Shader GLSL source:
*******************************************************
attribute vec2 position;
attribute vec2 tex_coord_in;
attribute vec4 color_in;
varying vec2 tex_coord;
varying vec4 color;
void main()
{
gl_Position = vec4(position, 0.0, 1.0);
tex_coord = tex_coord_in;
color = color_in;
}
******************************************************
* assembly:
******************************************************
00 CALL_FS NO_BARRIER
01 ALU: ADDR(32) CNT(5)
0 x: MOV R3.x, R3.x
y: MOV R3.y, R3.y
z: MOV R2.z, 0.0f
w: MOV R2.w, (0x3F800000, 1.0f).x
02 EXP_DONE: POS0, R2
03 EXP: PARAM0, R1 NO_BARRIER
04 EXP_DONE: PARAM1, R3.xyzz NO_BARRIER
END_OF_PROGRAM
******************************************************
*/
__attribute__((aligned(GX2_SHADER_ALIGNMENT)))
static struct
{
u32 cf[32 * 2]; /* first ADDR() * 2 */
u32 alu[5 * 2]; /* alu CNT() * 2 */
u64 cf[16];
} vs_program =
{
{
CALL_FS NO_BARRIER,
ALU(32, 5),
EXP_DONE(POS0, _R2, _X, _Y, _Z, _W),
EXP(PARAM0, _R1, _X, _Y, _Z, _W) NO_BARRIER,
EXP_DONE(PARAM1, _R3, _X, _Y, _Z, _Z) NO_BARRIER
EXP_DONE(POS0, _R1, _x, _y, _0, _1),
EXP_DONE(PARAM0, _R2, _x, _y, _0, _0) NO_BARRIER
END_OF_PROGRAM
},
{
ALU_MOV(_R3,_X, _R3,_X),
ALU_MOV(_R3,_Y, _R3,_Y),
ALU_MOV(_R2,_Z, ALU_SRC_0,_X),
ALU_LAST
ALU_MOV(_R2,_W, ALU_SRC_LITERAL,_X), ALU_LITERAL(0x3F800000)
}
};
/*******************************************************
* Pixel Shader GLSL source:
*******************************************************
varying vec2 tex_coord;
varying vec4 color;
uniform sampler2D s;
void main()
{
gl_FragColor = texture2D(s, tex_coord) * color;
}
******************************************************
* assembly:
******************************************************
00 TEX: ADDR(48) CNT(1) VALID_PIX
0 SAMPLE R1, R1.xy0x, t0, s0
01 ALU: ADDR(32) CNT(4)
1 x: MUL R0.x, R0.x, R1.x
y: MUL R0.y, R0.y, R1.y
z: MUL R0.z, R0.z, R1.z
w: MUL R0.w, R0.w, R1.w
02 EXP_DONE: PIX0, R0
END_OF_PROGRAM
*******************************************************
*/
__attribute__((aligned(GX2_SHADER_ALIGNMENT)))
static struct
{
u32 cf[32 * 2]; /* first ADDR() * 2 */
u32 alu[(48-32) * 2]; /* (tex ADDR() - alu ADDR()) * 2 */
u32 tex[1 * 3]; /* tex CNT() * 3 */
} ps_program =
u64 cf[16];
u64 tex[1 * 2];
}
ps_program =
{
{
TEX(48, 1) VALID_PIX,
ALU(32, 4),
EXP_DONE(PIX0, _R0, _X, _Y, _Z, _W)
TEX(16, 1) VALID_PIX,
EXP_DONE(PIX0, _R0, _x, _y, _z, _w)
END_OF_PROGRAM
},
{
ALU_MUL(_R0,_X, _R0,_X, _R1,_X),
ALU_MUL(_R0,_Y, _R0,_Y, _R1,_Y),
ALU_MUL(_R0,_Z, _R0,_Z, _R1,_Z),
ALU_LAST
ALU_MUL(_R0,_W, _R0,_W, _R1,_W),
},
{
TEX_SAMPLE(_R1,_X,_Y,_Z,_W, _R1,_X,_Y,_0,_X, _t0, _s0)
TEX_SAMPLE(_R0,_x,_y,_z,_w, _R0,_x,_y,_0,_0, _t0, _s0)
}
};
tex_shader_t tex_shader =
static GX2AttribVar attributes[] =
{
{ "position", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 0},
{ "tex_coord", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 1},
};
static GX2AttribStream attribute_stream[] =
{
{0, 0, offsetof(tex_shader_vertex_t, pos), GX2_ATTRIB_FORMAT_FLOAT_32_32,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT},
{1, 0, offsetof(tex_shader_vertex_t, coord), GX2_ATTRIB_FORMAT_FLOAT_32_32,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT},
};
static GX2SamplerVar samplers[] =
{
{ "s", GX2_SAMPLER_VAR_TYPE_SAMPLER_2D, 0 },
};
GX2Shader tex_shader =
{
{
{
.sq_pgm_resources_vs.num_gprs = 4,
.sq_pgm_resources_vs.num_gprs = 3,
.sq_pgm_resources_vs.stack_size = 1,
.spi_vs_out_config.vs_export_count = 1,
.num_spi_vs_out_id = 1,
@ -150,10 +92,10 @@ tex_shader_t tex_shader =
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
{.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF},
},
.sq_vtx_semantic_clear = ~0x7,
.num_sq_vtx_semantic = 3,
.sq_vtx_semantic_clear = ~0x3,
.num_sq_vtx_semantic = 2,
{
0, 1, 2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0, 1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
},
.vgt_vertex_reuse_block_cntl.vtx_reuse_depth = 0xE,
@ -162,11 +104,11 @@ tex_shader_t tex_shader =
.size = sizeof(vs_program),
.program = (uint8_t*)&vs_program,
.mode = GX2_SHADER_MODE_UNIFORM_REGISTER,
.attribVarCount = sizeof(tex_shader.attributes) / sizeof(GX2AttribVar), (GX2AttribVar*) &tex_shader.attributes,
.attribVarCount = countof(attributes), attributes,
},
{
{
.sq_pgm_resources_ps.num_gprs = 2,
.sq_pgm_resources_ps.num_gprs = 1,
.sq_pgm_exports_ps.export_mode = 0x2,
.spi_ps_in_control_0.num_interp = 2,
.spi_ps_in_control_0.persp_gradient_ena = 1,
@ -179,28 +121,7 @@ tex_shader_t tex_shader =
.size = sizeof(ps_program),
.program = (uint8_t*)&ps_program,
.mode = GX2_SHADER_MODE_UNIFORM_REGISTER,
.samplerVarCount = 1,
.samplerVars = (GX2SamplerVar*) &tex_shader.sampler,
},
.sampler = { "s", GX2_SAMPLER_VAR_TYPE_SAMPLER_2D, 0 },
.attributes = {
.color = { "color_in", GX2_SHADER_VAR_TYPE_FLOAT4, 0, 0},
.position = { "position", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 1},
.tex_coord = { "tex_coord_in", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 2},
},
.attribute_stream = {
.color = {
0, 0, offsetof(tex_shader_vertex_t, color), GX2_ATTRIB_FORMAT_UNORM_8_8_8_8,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _Z, _W), GX2_ENDIAN_SWAP_DEFAULT
},
.position = {
1, 0, offsetof(tex_shader_vertex_t, pos), GX2_ATTRIB_FORMAT_FLOAT_32_32,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT
},
.tex_coord = {
2, 0, offsetof(tex_shader_vertex_t, coord), GX2_ATTRIB_FORMAT_FLOAT_32_32,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT
}
},
{},
.samplerVarCount = countof(samplers), samplers,
},
.attribute_stream = attribute_stream,
};

View File

@ -15,32 +15,13 @@
#ifndef TEX_SHADER_H
#define TEX_SHADER_H
#include <wiiu/gx2.h>
#include <wiiu/shader_utils.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct __attribute__((aligned(GX2_VERTEX_BUFFER_ALIGNMENT)))
{
GX2VertexShader vs;
GX2PixelShader ps;
GX2SamplerVar sampler;
struct
{
GX2AttribVar color;
GX2AttribVar position;
GX2AttribVar tex_coord;
} attributes;
struct
{
GX2AttribStream color;
GX2AttribStream position;
GX2AttribStream tex_coord;
} attribute_stream;
GX2FetchShader fs;
}tex_shader_t;
typedef struct
{
struct
@ -54,11 +35,9 @@ typedef struct
float u;
float v;
}coord;
u32 color;
}tex_shader_vertex_t;
extern tex_shader_t tex_shader;
extern GX2Shader tex_shader;
#ifdef __cplusplus
}