(WIIU) GX2 rendering.

This commit is contained in:
aliaspider 2016-11-05 15:05:46 +01:00
parent 0433cae3c9
commit d3b3c18a6f
7 changed files with 802 additions and 103 deletions

View File

@ -10,6 +10,7 @@ OBJ += wiiu/fs/sd_fat_devoptab.o
OBJ += wiiu/fs/fs_utils.o
OBJ += wiiu/system/dynamic.o
OBJ += wiiu/system/dyn_stubs.o
OBJ += wiiu/tex_shader.o
DEFINES :=

View File

@ -255,7 +255,6 @@ int __entry_menu(int argc, char **argv)
memoryInitialize();
mount_sd_fat("sd");
VPADInit();
OSScreenInit();
verbosity_enable();
DEBUG_VAR(argc);
@ -263,7 +262,9 @@ int __entry_menu(int argc, char **argv)
DEBUG_STR(argv[1]);
#if 0
int argc_ = 2;
char* argv_[] = {"sd:/retroarch/retroarch.elf", "sd:/smb3.nes", NULL};
// char* argv_[] = {"sd:/retroarch/retroarch.elf", "sd:/rom.nes", NULL};
char* argv_[] = {"sd:/retroarch/retroarch.elf", "sd:/content/rom.sfc", NULL};
rarch_main(argc_, argv_, NULL);
#else
rarch_main(argc, argv, NULL);
@ -284,7 +285,6 @@ int __entry_menu(int argc, char **argv)
// }while(frames++ < 300);
main_exit(NULL);
unmount_sd_fat("sd");
memoryRelease();
fflush(stdout);

View File

@ -17,40 +17,132 @@
#include "../../configuration.h"
#include "../../verbosity.h"
#include <string.h>
#include <coreinit/screen.h>
#include <coreinit/cache.h>
#include "gx2.h"
#include "system/memory.h"
#include "string.h"
#include "tex_shader.h"
#include "wiiu_dbg.h"
#define _X 0x00
#define _Y 0x01
#define _Z 0x02
#define _W 0x03
#define _R 0x00
#define _G 0x01
#define _B 0x02
#define _A 0x03
#define _0 0x04
#define _1 0x05
#define GX2_COMP_SEL(c0, c1, c2, c3) (((c0) << 24) | ((c1) << 16) | ((c2) << 8) | (c3))
typedef struct
{
void* screen_buffer0;
int screen_buffer0_size;
void* screen_buffer1;
int screen_buffer1_size;
int screen_buffer0_id;
int screen_buffer1_id;
int width;
int height;
GX2TVRenderMode mode;
} wiiu_render_mode_t;
typedef struct
{
float x;
float y;
}position_t;
typedef struct
{
float u;
float v;
}tex_coord_t;
typedef struct
{
tex_shader_t* shader;
struct
{
void* texture;
int tex_width;
int tex_height;
GX2Texture texture;
int width;
int height;
bool enable;
position_t* position;
tex_coord_t* tex_coord;
} menu;
void* texture;
int tex_width;
int tex_height;
GX2Sampler sampler_nearest;
GX2Sampler sampler_linear;
GX2Texture texture;
position_t* position;
tex_coord_t* tex_coord;
int width;
int height;
void* drc_scan_buffer;
void* tv_scan_buffer;
GX2ColorBuffer color_buffer;
GX2ContextState* ctx_state;
void* cmd_buffer;
wiiu_render_mode_t render_mode;
int frames;
} wiiu_video_t;
static const wiiu_render_mode_t wiiu_render_mode_map[] =
{
{0}, /* GX2_TV_SCAN_MODE_NONE */
{854, 480, GX2_TV_RENDER_MODE_WIDE_480P}, /* GX2_TV_SCAN_MODE_576I */
{854, 480, GX2_TV_RENDER_MODE_WIDE_480P}, /* GX2_TV_SCAN_MODE_480I */
{854, 480, GX2_TV_RENDER_MODE_WIDE_480P}, /* GX2_TV_SCAN_MODE_480P */
{1280, 720, GX2_TV_RENDER_MODE_WIDE_720P}, /* GX2_TV_SCAN_MODE_720P */
{0}, /* GX2_TV_SCAN_MODE_unk */
{1920, 1080, GX2_TV_RENDER_MODE_WIDE_1080P}, /* GX2_TV_SCAN_MODE_1080I */
{1920, 1080, GX2_TV_RENDER_MODE_WIDE_1080P} /* GX2_TV_SCAN_MODE_1080P */
};
static wiiu_set_position(position_t* position, GX2ColorBuffer* draw_buffer, float x0, float y0, float x1, float y1)
{
position[0].x = -1.0f;
position[0].y = -1.0f;
position[1].x = 1.0f;
position[1].y = -1.0f;
position[2].x = 1.0f;
position[2].y = 1.0f;
position[3].x = -1.0f;
position[3].y = 1.0f;
DEBUG_FLOAT(position[0].x);
DEBUG_FLOAT(position[0].y);
DEBUG_FLOAT(position[1].x);
DEBUG_FLOAT(position[1].y);
DEBUG_FLOAT(position[2].x);
DEBUG_FLOAT(position[2].y);
DEBUG_FLOAT(position[3].x);
DEBUG_FLOAT(position[3].y);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, position, 4 * sizeof(*position));
}
static void wiiu_set_tex_coords(tex_coord_t* tex_coord, GX2Texture* texture, float u0, float v0, float u1, float v1)
{
tex_coord[0].u = u0 / texture->surface.width;
tex_coord[0].v = (v1 / texture->surface.height);
tex_coord[1].u = u1 / texture->surface.width;
tex_coord[1].v = (v1 / texture->surface.height);
tex_coord[2].u = u1 / texture->surface.width;
tex_coord[2].v = (v0 / texture->surface.height);
tex_coord[3].u = u0 / texture->surface.width;
tex_coord[3].v = (v0 / texture->surface.height);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, tex_coord, 4 * sizeof(*tex_coord));
}
static void* wiiu_gfx_init(const video_info_t* video,
const input_driver_t** input, void** input_data)
{
int i;
*input = NULL;
*input_data = NULL;
@ -59,37 +151,179 @@ static void* wiiu_gfx_init(const video_info_t* video,
if (!wiiu)
return NULL;
wiiu->screen_buffer0_size = OSScreenGetBufferSizeEx(0);
wiiu->screen_buffer0 = MEM1_alloc(wiiu->screen_buffer0_size, 0x40);
wiiu->screen_buffer1_size = OSScreenGetBufferSizeEx(1);
wiiu->screen_buffer1 = MEM1_alloc(wiiu->screen_buffer1_size, 0x40);
/* video init */
wiiu->cmd_buffer = MEM2_alloc(0x400000, 0x40);
u32 init_attributes[] =
{
GX2_INIT_CMD_BUF_BASE, (u32)wiiu->cmd_buffer,
GX2_INIT_CMD_BUF_POOL_SIZE, 0x400000,
GX2_INIT_ARGC, 0,
GX2_INIT_ARGV, 0,
GX2_INIT_END
};
GX2Init(init_attributes);
DEBUG_INT(wiiu->screen_buffer0_size);
DEBUG_INT(wiiu->screen_buffer1_size);
/* setup scanbuffers */
u32 size = 0;
u32 tmp = 0;
wiiu->render_mode = wiiu_render_mode_map[GX2GetSystemTVScanMode()];
GX2CalcTVSize(wiiu->render_mode.mode, GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8, GX2_BUFFERING_MODE_DOUBLE, &size, &tmp);
wiiu->tv_scan_buffer = MEMBucket_alloc(size, GX2_SCAN_BUFFER_ALIGNMENT);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU, wiiu->tv_scan_buffer, size);
GX2SetTVBuffer(wiiu->tv_scan_buffer, size, wiiu->render_mode.mode, GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8,
GX2_BUFFERING_MODE_DOUBLE);
OSScreenSetBufferEx(0, wiiu->screen_buffer0);
OSScreenSetBufferEx(1, wiiu->screen_buffer1);
OSScreenEnableEx(0, 1);
OSScreenEnableEx(1, 1);
OSScreenClearBufferEx(0, 0);
OSScreenClearBufferEx(1, 0);
GX2CalcDRCSize(GX2_DRC_RENDER_MODE_SINGLE, GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8, GX2_BUFFERING_MODE_DOUBLE, &size,
&tmp);
wiiu->drc_scan_buffer = MEMBucket_alloc(size, GX2_SCAN_BUFFER_ALIGNMENT);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU, wiiu->drc_scan_buffer, size);
GX2SetDRCBuffer(wiiu->drc_scan_buffer, size, GX2_DRC_RENDER_MODE_SINGLE, GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8,
GX2_BUFFERING_MODE_DOUBLE);
DCFlushRange(wiiu->screen_buffer0, wiiu->screen_buffer0_size);
DCFlushRange(wiiu->screen_buffer1, wiiu->screen_buffer1_size);
memset(&wiiu->color_buffer, 0, sizeof(GX2ColorBuffer));
wiiu->color_buffer.surface.dim = GX2_SURFACE_DIM_TEXTURE_2D;
wiiu->color_buffer.surface.width = wiiu->render_mode.width;
wiiu->color_buffer.surface.height = wiiu->render_mode.height;
wiiu->color_buffer.surface.depth = 1;
wiiu->color_buffer.surface.mipLevels = 1;
wiiu->color_buffer.surface.format = GX2_SURFACE_FORMAT_UNORM_R8_G8_B8_A8;
wiiu->color_buffer.surface.use = GX2_SURFACE_USE_TEXTURE_COLOR_BUFFER_TV;
wiiu->color_buffer.viewNumSlices = 1;
GX2CalcSurfaceSizeAndAlignment(&wiiu->color_buffer.surface);
GX2InitColorBufferRegs(&wiiu->color_buffer);
OSScreenFlipBuffersEx(0);
wiiu->screen_buffer0_id = 0;
OSScreenFlipBuffersEx(1);
wiiu->screen_buffer1_id = 0;
wiiu->color_buffer.surface.image = MEM1_alloc(wiiu->color_buffer.surface.imageSize,
wiiu->color_buffer.surface.alignment);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU, wiiu->color_buffer.surface.image, wiiu->color_buffer.surface.imageSize);
wiiu->menu.tex_width = 512;
wiiu->menu.tex_height = 512;
wiiu->menu.texture = malloc(wiiu->menu.tex_width * wiiu->menu.tex_height * sizeof(uint16_t));
wiiu->ctx_state = (GX2ContextState*)MEM2_alloc(sizeof(GX2ContextState), GX2_CONTEXT_STATE_ALIGNMENT);
GX2SetupContextStateEx(wiiu->ctx_state, GX2_TRUE);
wiiu->tex_width = video->input_scale * RARCH_SCALE_BASE;;
wiiu->tex_height = video->input_scale * RARCH_SCALE_BASE;;
wiiu->texture = malloc(wiiu->tex_width * wiiu->tex_height * sizeof(uint16_t));
GX2SetContextState(wiiu->ctx_state);
GX2SetColorBuffer(&wiiu->color_buffer, GX2_RENDER_TARGET_0);
GX2SetViewport(0.0f, 0.0f, wiiu->color_buffer.surface.width, wiiu->color_buffer.surface.height, 0.0f, 1.0f);
GX2SetScissor(0, 0, wiiu->color_buffer.surface.width, wiiu->color_buffer.surface.height);
GX2SetDepthOnlyControl(GX2_DISABLE, GX2_DISABLE, GX2_COMPARE_FUNC_ALWAYS);
GX2SetColorControl(GX2_LOGIC_OP_COPY, 1, GX2_DISABLE, GX2_ENABLE);
#if 0
GX2SetBlendControl(GX2_RENDER_TARGET_0, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA,
GX2_BLEND_COMBINE_MODE_ADD,
GX2_ENABLE, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA, GX2_BLEND_COMBINE_MODE_ADD);
#else
GX2SetBlendControl(GX2_RENDER_TARGET_0, GX2_BLEND_MODE_ONE, GX2_BLEND_MODE_ZERO, GX2_BLEND_COMBINE_MODE_ADD,
GX2_DISABLE, GX2_BLEND_MODE_ONE, GX2_BLEND_MODE_ZERO, GX2_BLEND_COMBINE_MODE_ADD);
#endif
GX2SetCullOnlyControl(GX2_FRONT_FACE_CCW, GX2_DISABLE, GX2_DISABLE);
GX2SetSwapInterval(1);
/* init shader */
// wiiu->shader = MEM2_alloc(sizeof(*wiiu->shader), GX2_VERTEX_BUFFER_ALIGNMENT);
wiiu->shader = MEM2_alloc(sizeof(tex_shader), 0x1000);
memcpy(wiiu->shader, &tex_shader, sizeof(tex_shader));
GX2Invalidate(GX2_INVALIDATE_MODE_CPU, wiiu->shader, sizeof(tex_shader));
wiiu->shader->vs.program = MEM2_alloc(wiiu->shader->vs.size, GX2_SHADER_ALIGNMENT);
memcpy(wiiu->shader->vs.program, tex_shader.vs.program, wiiu->shader->vs.size);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, wiiu->shader->vs.program, wiiu->shader->vs.size);
wiiu->shader->vs.attribVars = MEM2_alloc(wiiu->shader->vs.attribVarCount * sizeof(GX2AttribVar),
GX2_SHADER_ALIGNMENT);
memcpy(wiiu->shader->vs.attribVars, tex_shader.vs.attribVars ,
wiiu->shader->vs.attribVarCount * sizeof(GX2AttribVar));
wiiu->shader->ps.program = MEM2_alloc(wiiu->shader->ps.size, GX2_SHADER_ALIGNMENT);
memcpy(wiiu->shader->ps.program, tex_shader.ps.program, wiiu->shader->ps.size);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, wiiu->shader->ps.program, wiiu->shader->ps.size);
wiiu->shader->ps.samplerVars = MEM2_alloc(wiiu->shader->ps.samplerVarCount * sizeof(GX2SamplerVar),
GX2_SHADER_ALIGNMENT);
memcpy(wiiu->shader->ps.samplerVars, tex_shader.ps.samplerVars,
wiiu->shader->ps.samplerVarCount * sizeof(GX2SamplerVar));
wiiu->shader->fs.size = GX2CalcFetchShaderSizeEx(2, GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE);
wiiu->shader->fs.program = MEM2_alloc(wiiu->shader->fs.size, GX2_SHADER_ALIGNMENT);
GX2InitFetchShaderEx(&wiiu->shader->fs, (uint8_t*)wiiu->shader->fs.program,
sizeof(wiiu->shader->attribute_stream) / sizeof(GX2AttribStream),
(GX2AttribStream*)&wiiu->shader->attribute_stream,
GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, wiiu->shader->fs.program, wiiu->shader->fs.size);
GX2SetVertexShader(&wiiu->shader->vs);
GX2SetPixelShader(&wiiu->shader->ps);
GX2SetFetchShader(&wiiu->shader->fs);
wiiu->position = MEM2_alloc(4 * sizeof(*wiiu->position), GX2_VERTEX_BUFFER_ALIGNMENT);
wiiu_set_position(wiiu->position, &wiiu->color_buffer, 0, 0, wiiu->color_buffer.surface.width, wiiu->color_buffer.surface.height);
wiiu->tex_coord = MEM2_alloc(4 * sizeof(*wiiu->tex_coord), GX2_VERTEX_BUFFER_ALIGNMENT);
wiiu_set_tex_coords(wiiu->tex_coord, &wiiu->texture, 0, 0, wiiu->texture.surface.width, wiiu->texture.surface.height);
GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->position), sizeof(*wiiu->position), wiiu->position);
GX2SetAttribBuffer(1, 4 * sizeof(*wiiu->tex_coord), sizeof(*wiiu->tex_coord), wiiu->tex_coord);
wiiu->menu.position = MEM2_alloc(4 * sizeof(*wiiu->menu.position), GX2_VERTEX_BUFFER_ALIGNMENT);
wiiu_set_position(wiiu->menu.position, &wiiu->color_buffer, 0, 0, wiiu->color_buffer.surface.width, wiiu->color_buffer.surface.height);
wiiu->menu.tex_coord = MEM2_alloc(4 * sizeof(*wiiu->menu.tex_coord), GX2_VERTEX_BUFFER_ALIGNMENT);
wiiu_set_tex_coords(wiiu->menu.tex_coord, &wiiu->menu.texture, 0, 0, wiiu->menu.texture.surface.width, wiiu->menu.texture.surface.height);
/* init frame texture */
memset(&wiiu->texture, 0, sizeof(GX2Texture));
wiiu->texture.surface.width = video->input_scale * RARCH_SCALE_BASE;
wiiu->texture.surface.height = video->input_scale * RARCH_SCALE_BASE;
wiiu->texture.surface.depth = 1;
wiiu->texture.surface.dim = GX2_SURFACE_DIM_TEXTURE_2D;
wiiu->texture.surface.format = GX2_SURFACE_FORMAT_UNORM_R5_G6_B5;
wiiu->texture.surface.tileMode = GX2_TILE_MODE_LINEAR_ALIGNED;
wiiu->texture.viewNumSlices = 1;
wiiu->texture.compMap = GX2_COMP_SEL(_B, _G, _R, _1);
GX2CalcSurfaceSizeAndAlignment(&wiiu->texture.surface);
GX2InitTextureRegs(&wiiu->texture);
wiiu->texture.surface.image = MEM2_alloc(wiiu->texture.surface.imageSize,
wiiu->texture.surface.alignment);
// memset(wiiu->texture.surface.image, 0x88, wiiu->texture.surface.imageSize);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->texture.surface.image,
wiiu->texture.surface.imageSize);
/* init menu texture */
memset(&wiiu->menu.texture, 0, sizeof(GX2Texture));
wiiu->menu.texture.surface.width = 512;
wiiu->menu.texture.surface.height = 512;
wiiu->menu.texture.surface.depth = 1;
wiiu->menu.texture.surface.dim = GX2_SURFACE_DIM_TEXTURE_2D;
wiiu->menu.texture.surface.format = GX2_SURFACE_FORMAT_UNORM_R4_G4_B4_A4;
wiiu->menu.texture.surface.tileMode = GX2_TILE_MODE_LINEAR_ALIGNED;
wiiu->menu.texture.viewNumSlices = 1;
wiiu->menu.texture.compMap = GX2_COMP_SEL(_A, _R, _G, _B);
GX2CalcSurfaceSizeAndAlignment(&wiiu->menu.texture.surface);
GX2InitTextureRegs(&wiiu->menu.texture);
wiiu->menu.texture.surface.image = MEM2_alloc(wiiu->menu.texture.surface.imageSize,
wiiu->menu.texture.surface.alignment);
for (i = 0; i < wiiu->menu.texture.surface.imageSize / 4; i++)
((uint32_t*)wiiu->menu.texture.surface.image)[i] = 0xFFFFFFFF;
((uint32_t*)wiiu->menu.texture.surface.image)[0] = 0xFF0000FF;
DEBUG_VAR(wiiu->menu.texture.surface.width);
DEBUG_VAR(wiiu->menu.texture.surface.height);
DEBUG_VAR(wiiu->menu.texture.surface.alignment);
DEBUG_VAR(wiiu->menu.texture.surface.imageSize);
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->menu.texture.surface.image,
wiiu->menu.texture.surface.imageSize);
/* init samplers */
GX2InitSampler(&wiiu->sampler_nearest, GX2_TEX_CLAMP_MODE_CLAMP, GX2_TEX_XY_FILTER_MODE_POINT);
GX2InitSampler(&wiiu->sampler_linear, GX2_TEX_CLAMP_MODE_CLAMP, GX2_TEX_XY_FILTER_MODE_LINEAR);
/* set Texture and Sampler */
GX2SetPixelTexture(&wiiu->texture, wiiu->shader->sampler.location);
GX2SetPixelSampler(&wiiu->sampler_linear, wiiu->shader->sampler.location);
GX2SetTVEnable(GX2_ENABLE);
GX2SetDRCEnable(GX2_ENABLE);
if (input && input_data)
{
@ -110,13 +344,41 @@ static void wiiu_gfx_free(void* data)
if (!wiiu)
return;
MEM1_free(wiiu->screen_buffer0);
MEM1_free(wiiu->screen_buffer1);
free(wiiu->menu.texture);
free(wiiu->texture);
GX2Flush();
GX2DrawDone();
GX2Shutdown();
GX2SetTVEnable(GX2_DISABLE);
GX2SetDRCEnable(GX2_DISABLE);
MEM2_free(wiiu->ctx_state);
MEM2_free(wiiu->cmd_buffer);
MEM2_free(wiiu->texture.surface.image);
MEM2_free(wiiu->menu.texture.surface.image);
MEM1_free(wiiu->color_buffer.surface.image);
MEMBucket_free(wiiu->tv_scan_buffer);
MEMBucket_free(wiiu->drc_scan_buffer);
MEM2_free(wiiu->shader->vs.program);
MEM2_free(wiiu->shader->vs.attribVars);
MEM2_free(wiiu->shader->ps.program);
MEM2_free(wiiu->shader->ps.samplerVars);
MEM2_free(wiiu->shader->fs.program);
MEM2_free(wiiu->shader);
MEM2_free(wiiu->position);
MEM2_free(wiiu->tex_coord);
MEM2_free(wiiu->menu.position);
MEM2_free(wiiu->menu.tex_coord);
free(wiiu);
DEBUG_LINE();
}
static bool wiiu_gfx_frame(void* data, const void* frame,
@ -131,68 +393,73 @@ static bool wiiu_gfx_frame(void* data, const void* frame,
int i;
wiiu_video_t* wiiu = (wiiu_video_t*) data;
GX2ClearColor(&wiiu->color_buffer, 0.0f, 0.0f, 0.0f, 1.0f);
// GX2ClearColor(&wiiu->color_buffer, 0.0f, 0.3f, 0.8f, 1.0f);
/* can't call GX2ClearColor after GX2SetContextState for whatever reason */
GX2SetContextState(wiiu->ctx_state);
static int frames = 0;
char frames_str [512];
snprintf(frames_str, sizeof(frames_str), "frames : %i", frames++);
if (!width || !height)
{
GX2WaitForVsync();
return;
}
OSScreenClearBufferEx(1, 0);
if(frame)
{
if (width > wiiu->texture.surface.width)
width = wiiu->texture.surface.width;
if (height > wiiu->texture.surface.height)
height = wiiu->texture.surface.height;
wiiu->width = width;
wiiu->height = height;
const uint16_t* src = frame;
uint16_t* dst = (uint16_t*)wiiu->texture.surface.image;
for (i = 0; i < height; i++)
{
// memcpy(dst, src, width * sizeof(uint16_t));
int j;
for(j = 0; j < width; j++)
dst[j] = __builtin_bswap16(src[j]);
dst += wiiu->texture.surface.pitch;
src += pitch / 2;
}
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->texture.surface.image,
wiiu->texture.surface.imageSize);
wiiu_set_tex_coords(wiiu->tex_coord, &wiiu->texture, 0, 0, width, height);
}
GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->position), sizeof(*wiiu->position), wiiu->position);
GX2SetAttribBuffer(1, 4 * sizeof(*wiiu->tex_coord), sizeof(*wiiu->tex_coord), wiiu->tex_coord);
GX2SetPixelTexture(&wiiu->texture, wiiu->shader->sampler.location);
GX2SetPixelSampler(&wiiu->sampler_linear, wiiu->shader->sampler.location);
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1);
if (wiiu->menu.enable)
{
const uint16_t* src = (uint16_t*)wiiu->menu.texture;
uint32_t* dst = (uint32_t*)((uint8_t*)wiiu->screen_buffer1 + wiiu->screen_buffer1_id * wiiu->screen_buffer1_size / 2);
GX2SetAttribBuffer(0, 4 * sizeof(*wiiu->menu.position), sizeof(*wiiu->menu.position), wiiu->menu.position);
GX2SetAttribBuffer(1, 4 * sizeof(*wiiu->menu.tex_coord), sizeof(*wiiu->menu.tex_coord), wiiu->menu.tex_coord);
dst += 896 * (480 - wiiu->menu.height) / 2 + (896 - wiiu->menu.width) / 2;
int x, y;
GX2SetPixelTexture(&wiiu->menu.texture, wiiu->shader->sampler.location);
GX2SetPixelSampler(&wiiu->sampler_linear, wiiu->shader->sampler.location);
for (y = 0; y < wiiu->menu.height; y++)
{
for (x = 0; x < wiiu->menu.width; x++)
{
int r = ((src[x] >> 12) & 0xF) << 4;
int g = ((src[x] >> 8) & 0xF) << 4;
int b = ((src[x] >> 4) & 0xF) << 4;
dst[x] = (r << 0) | (b << 8) | (g << 16);
}
src += wiiu->menu.tex_width;
dst += 896;
}
}
else
{
const uint16_t* src = (uint16_t*)frame;
uint32_t* dst = (uint32_t*)((uint8_t*)wiiu->screen_buffer1 + wiiu->screen_buffer1_id * wiiu->screen_buffer1_size / 2);
dst += (896 * (480 - height) + width) / 2;
int x, y;
for (y = 0; y < height; y++)
{
for (x = 0; x < width; x++)
{
int r = ((src[x] >> 11) & 0x1F) << 3;
int g = ((src[x] >> 5) & 0x3F) << 2;
int b = ((src[x] >> 0) & 0x1F) << 3;
dst[x] = (r << 0) | (b << 8) | (g << 16);
}
src += pitch/2;
dst += 896;
}
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1);
}
GX2CopyColorBufferToScanBuffer(&wiiu->color_buffer, GX2_SCAN_TARGET_DRC);
GX2CopyColorBufferToScanBuffer(&wiiu->color_buffer, GX2_SCAN_TARGET_TV);
OSScreenPutFontEx(1, 0, 16, frames_str);
DCFlushRange(((uint8_t*)wiiu->screen_buffer1 + wiiu->screen_buffer1_id * wiiu->screen_buffer1_size / 2)
, wiiu->screen_buffer1_size / 2);
OSScreenFlipBuffersEx(1);
wiiu->screen_buffer1_id ^= 1;
GX2SwapScanBuffers();
GX2Flush();
GX2WaitForVsync();
printf("\rframe : %5i", wiiu->frames++);
fflush(stdout);
return true;
}
@ -302,24 +569,33 @@ static void wiiu_set_texture_frame(void* data, const void* frame, bool rgb32,
if (!frame || !width || !height)
return;
if (width > wiiu->menu.tex_width)
width = wiiu->menu.tex_width;
if (width > wiiu->menu.texture.surface.width)
width = wiiu->menu.texture.surface.width;
if (height > wiiu->menu.tex_height)
height = wiiu->menu.tex_height;
if (height > wiiu->menu.texture.surface.height)
height = wiiu->menu.texture.surface.height;
wiiu->menu.width = width;
wiiu->menu.height = height;
const uint16_t* src = frame;
uint16_t* dst = (uint16_t*)wiiu->menu.texture;
uint16_t* dst = (uint16_t*)wiiu->menu.texture.surface.image;
DEBUG_VAR(width);
DEBUG_VAR(height);
for (i = 0; i < height; i++)
{
memcpy(dst, src, width * sizeof(uint16_t));
dst += wiiu->menu.tex_width;
dst += wiiu->menu.texture.surface.pitch;
src += width;
}
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_TEXTURE, wiiu->menu.texture.surface.image,
wiiu->menu.texture.surface.imageSize);
wiiu_set_tex_coords(wiiu->menu.tex_coord, &wiiu->menu.texture, 0, 0, width, height);
// wiiu_set_tex_coords(wiiu->menu.tex_coord, &wiiu->menu.texture, 0, 0, wiiu->menu.texture.surface.width, wiiu->menu.texture.surface.height);
}
static void wiiu_set_texture_enable(void* data, bool state, bool full_screen)

33
wiiu/gx2.h Normal file
View File

@ -0,0 +1,33 @@
#ifndef GX2_H
#define GX2_H
#include <gx2/clear.h>
#include <gx2/context.h>
#include <gx2/display.h>
#include <gx2/displaylist.h>
#include <gx2/draw.h>
#include <gx2/enum.h>
#include <gx2/event.h>
#include <gx2/mem.h>
#include <gx2/registers.h>
#include <gx2/sampler.h>
#include <gx2/shaders.h>
#include <gx2/state.h>
#include <gx2/surface.h>
#include <gx2/swap.h>
#include <gx2/tessellation.h>
#include <gx2/texture.h>
#define GX2_SCAN_BUFFER_ALIGNMENT 0x1000
#define GX2_SHADER_ALIGNMENT 0x100
#define GX2_CONTEXT_STATE_ALIGNMENT 0x100
#define GX2_DISPLAY_LIST_ALIGNMENT 0x20
#define GX2_VERTEX_BUFFER_ALIGNMENT 0x40
#define GX2_INDEX_BUFFER_ALIGNMENT 0x20
#define GX2_ENABLE TRUE
#define GX2_DISABLE FALSE
#define GX2_TRUE TRUE
#define GX2_FALSE FALSE
#endif // GX2_H

167
wiiu/gx2_shader_inl.h Normal file
View File

@ -0,0 +1,167 @@
#ifndef GX2_SHADER_INL_H
#define GX2_SHADER_INL_H
#ifdef MSB_FIRST
#define to_LE(x) __builtin_bswap32(x)
#else
#define to_LE(x) x
#endif
/* CF */
#define CF_WORD0(addr) to_LE(addr)
#define CF_WORD1(popCount, cfConst, cond, count, callCount, inst) \
to_LE(popCount | (cfConst << 3) | (cond << 8) | (count << 10) | (callCount << 13) | (inst << 23) | (1 << 31))
#define CF_ALU_WORD0(addr, kcacheBank0, kcacheBank1, kcacheMode0) \
to_LE(addr | (kcacheBank0 << 16) | (kcacheBank1 << 20) | (kcacheMode0 << 22))
#define CF_ALU_WORD1(kcacheMode1, kcacheAddr0, kcacheAddr1, count, altConst, inst) \
to_LE(kcacheMode1 | (kcacheAddr0 << 2) | (kcacheAddr1 << 10) | (count << 18) | (altConst << 25) | (inst << 26) | (1 << 31))
#define CF_EXP_WORD0(dstReg_and_type, srcReg, srcRel, indexGpr, elemSize)\
to_LE(dstReg_and_type | (srcReg << 15) | (srcRel << 22) | (indexGpr << 23) | (elemSize << 30))
#define CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, validPixelMode, inst) \
to_LE(srcSelX | (srcSelY << 3) | (srcSelZ << 6) | (srcSelW << 9) | (validPixelMode << 22) | (inst << 23) | (1 << 31))
#define NO_BARRIER & to_LE(~(1 << 31))
#define END_OF_PROGRAM | to_LE(1 << 21)
#define VALID_PIX | to_LE(1 << 22)
#define WHOLE_QUAD_MODE | to_LE(1 << 30)
#define ALU_LAST to_LE(1 << 31) |
/* ALU */
#define ALU_WORD0(src0Sel, src0Rel, src0Chan, src0Neg, src1Sel, src1Rel, src1Chan, src1Neg, indexMode, predSel) \
to_LE(src0Sel | (src0Rel << 9) | (src0Chan << 10) | (src0Neg << 12) | (src1Sel << 13) | (src1Rel << 22) \
| (src1Chan << 23) | (src1Neg << 25) | (indexMode << 26) | (predSel << 29))
#define ALU_WORD1_OP2(src0Abs, src1Abs, updateExecuteMask, updatePred, writeMask, omod, inst, encoding, bankSwizzle, dstGpr, dstRel, dstChan, clamp) \
to_LE(src0Abs | (src1Abs << 1) | (updateExecuteMask << 2) | (updatePred << 3) | (writeMask << 4) | (omod << 5) | (inst << 7) | \
(encoding << 15) | (bankSwizzle << 18) | (dstGpr << 21) | (dstRel << 28) | (dstChan << 29) | (clamp << 31))
#define ALU_WORD1_OP3(src2Sel, src2Rel, src2Chan, src2Neg, inst, encoding, bankSwizzle, dstGpr, dstRel, dstChan, clamp) \
to_LE(src2Sel | (src2Rel << 9) | (src2Chan << 10) | (src2Neg << 12) | (inst << 13) | \
(encoding << 15) | (bankSwizzle << 18) | (dstGpr << 21) | (dstRel << 28) | (dstChan << 29) | (clamp << 31)
/* TEX */
#define TEX_WORD0(inst, bcFracMode, fetchWholeQuad, resourceID, srcReg, srcRel, altConst) \
to_LE(inst | (bcFracMode << 5) | (fetchWholeQuad << 7) | (resourceID << 8) | (srcReg << 16) | (srcRel << 23) | (altConst << 24))
#define TEX_WORD1(dstReg, dstRel, dstSelX, dstSelY, dstSelZ, dstSelW, lodBias, coordTypeX, coordTypeY, coordTypeZ, coordTypeW) \
to_LE(dstReg | (dstRel << 7) | (dstSelX << 9) | (dstSelY << 12) | (dstSelZ << 15) | (dstSelW << 18) | \
(lodBias << 21) | (coordTypeX << 28) | (coordTypeY << 29) | (coordTypeZ << 30) | (coordTypeW << 31))
#define TEX_WORD2(offsetX, offsetY, offsetZ, samplerID, srcSelX, srcSelY, srcSelZ, srcSelW) \
to_LE(offsetX | (offsetY << 5) | (offsetZ << 10) | (samplerID << 15) | (srcSelX << 20) | (srcSelY << 23) | (srcSelZ << 26) | (srcSelW << 29))
#define _X 0
#define _Y 1
#define _Z 2
#define _W 3
#define _0 4
#define _1 5
#define GX2_COMP_SEL(c0, c1, c2, c3) (((c0) << 24) | ((c1) << 16) | ((c2) << 8) | (c3))
#define ALU_LITERAL(v) to_LE(v)
/* SRCx_SEL special constants */
#define ALU_SRC_1_DBL_L 0xF4
#define ALU_SRC_1_DBL_M 0xF5
#define ALU_SRC_0_5_DBL_L 0xF6
#define ALU_SRC_0_5_DBL_M 0xF7
#define ALU_SRC_0 0xF8
#define ALU_SRC_1 0xF9
#define ALU_SRC_1_INT 0xFA
#define ALU_SRC_M_1_INT 0xFB
#define ALU_SRC_0_5 0xFC
#define ALU_SRC_LITERAL 0xFD
#define ALU_SRC_PV 0xFE
#define ALU_SRC_PS 0xFF
/* CF defines */
#define CF_COND_ACTIVE 0x0
#define CF_COND_FALSE 0x1
#define CF_COND_BOOL 0x2
#define CF_COND_NOT_BOOL 0x3
/* TEX defines */
#define TEX_UNNORMALIZED 0x0
#define TEX_NORMALIZED 0x1
/* instructions */
/* CF */
#define CF_INST_TEX 0x01
#define CF_INST_CALL_FS 0x13
/* ALU */
#define ALU_INST_ALU 0x8
#define OP2_INST_MOV 0x19
/* EXP */
#define CF_INST_EXP_DONE 0x28
/* TEX */
#define TEX_INST_SAMPLE 0x10
/* EXPORT_TYPE */
#define EXPORT_TYPE_PIXEL 0x0
#define EXPORT_TYPE_POS 0x1
#define EXPORT_TYPE_PARAM 0x2
#define EXPORT_ARRAY_BASE_POS(id) (0x3C + id) // [0, 3]
#define EXPORT_ARRAY_BASE_PARAM(id) id // [0, 31]
#define EXPORT_ARRAY_BASE_PIX(id) id
/* exports */
#define POS(id) EXPORT_ARRAY_BASE_POS(id) | (EXPORT_TYPE_POS << 13)
#define PARAM(id) EXPORT_ARRAY_BASE_PARAM(id) | (EXPORT_TYPE_PARAM << 13)
#define PIX(id) EXPORT_ARRAY_BASE_PIX(id) | (EXPORT_TYPE_PIXEL << 13)
#define POS0 POS(0)
#define PARAM0 PARAM(0)
#define PIX0 PIX(0)
/* registers */
#define _R(x) x
#define _R0 _R(0x0)
#define _R1 _R(0x1)
#define _R2 _R(0x2)
/* texture */
#define _t(x) x
#define _t0 _t(0x0)
/* sampler */
#define _s(x) x
#define _s0 _s(0x0)
#define CALL_FS CF_WORD0(0), CF_WORD1(0,0,0,0,0,CF_INST_CALL_FS)
#define TEX(addr, cnt) CF_WORD0(addr), CF_WORD1(0x0, 0x0, CF_COND_ACTIVE, 0x0, (cnt - 1), CF_INST_TEX)
#define ALU(addr, cnt) CF_ALU_WORD0(addr, 0x0, 0x0, 0x0), CF_ALU_WORD1(0x0, 0x0, 0x0, (cnt - 1), 0x0, ALU_INST_ALU)
#define EXP_DONE(dstReg_and_type, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW) CF_EXP_WORD0(dstReg_and_type, srcReg, 0x0, 0x0, 0x0), \
CF_EXP_WORD1(srcSelX, srcSelY, srcSelZ, srcSelW, 0x0, CF_INST_EXP_DONE)
#define ALU_MOV(dstGpr, dstChan, src0Sel, src0Chan) ALU_WORD0(src0Sel, 0x0, src0Chan, 0x0, ALU_SRC_0, 0x0, 0x0, 0x0, 0x0, 0x0), \
ALU_WORD1_OP2(0x0, 0x0, 0x0, 0x0, 0x1, 0x0, OP2_INST_MOV, 0x0, 0x0, dstGpr, 0x0, dstChan, 0x0)
#define TEX_SAMPLE(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW, resourceID, samplerID)\
TEX_WORD0(TEX_INST_SAMPLE, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \
TEX_WORD1(dstReg, 0x0, dstSelX, dstSelY, dstSelZ, dstSelW, 0x0, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED), \
TEX_WORD2(0x0, 0x0, 0x0, samplerID, _X, _Y, _0, _X)
#define _x2(v) v, v
#define _x4(v) _x2(v), _x2(v)
#define _x8(v) _x4(v), _x4(v)
#define _x16(v) _x8(v), _x8(v)
#define _x9(v) _x8(v), v
#define _x30(v) _x16(v), _x8(v), _x4(v),_x2(v)
#define _x31(v) _x30(v), v
#endif // GX2_SHADER_INL_H

188
wiiu/tex_shader.c Normal file
View File

@ -0,0 +1,188 @@
#include <stddef.h>
#include <malloc.h>
#include <string.h>
#include "tex_shader.h"
#include "gx2_shader_inl.h"
/*******************************************************
*******************************************************
*
* Vertex Shader GLSL source:
*
*******************************************************
*******************************************************
*
* attribute vec2 position;
* attribute vec2 tex_coord_in;
* varying vec2 tex_coord;
* void main()
* {
* gl_Position = vec4(position, 0.0, 1.0);
* tex_coord = tex_coord_in;
* }
*
******************************************************
******************************************************
*
* assembly output from AMD's GPU ShaderAnalyzer :
*
******************************************************
******************************************************
*
* 00 CALL_FS NO_BARRIER
* 01 ALU: ADDR(32) CNT(5)
* 0 x: MOV R2.x, R2.x
* y: MOV R2.y, R2.y
* z: MOV R1.z, 0.0f
* w: MOV R1.w, (0x3F800000, 1.0f).x
* 02 EXP_DONE: POS0, R1
* 03 EXP_DONE: PARAM0, R2.xyzz NO_BARRIER
* END_OF_PROGRAM
*
******************************************************
******************************************************
*/
__attribute__((aligned(GX2_SHADER_ALIGNMENT)))
static struct
{
u32 cf[32 * 2]; /* first ADDR() * 2 */
u32 alu[5 * 2]; /* CNT() sum * 2 */
} vs_program =
{
{
CALL_FS NO_BARRIER,
ALU(32, 5),
EXP_DONE(POS0, _R1, _X, _Y, _Z, _W),
EXP_DONE(PARAM0, _R2, _X, _Y, _Z, _Z) NO_BARRIER
END_OF_PROGRAM
},
{
ALU_MOV(_R2, _X, _R2, _X),
ALU_MOV(_R2, _Y, _R2, _Y),
ALU_MOV(_R1, _Z, ALU_SRC_0, _X),
ALU_LAST ALU_MOV(_R1, _W, ALU_SRC_LITERAL, _X), ALU_LITERAL(0x3F800000)
}
};
/*******************************************************
*******************************************************
*
* Pixel Shader GLSL source:
*
*******************************************************
*******************************************************
*
* varying vec2 tex_coord;
* uniform sampler2D s;
* void main()
* {
* gl_FragColor = texture2D(s, tex_coord);
* }
*
******************************************************
******************************************************
*
* assembly output from AMD's GPU ShaderAnalyzer :
*
******************************************************
******************************************************
*
* 00 TEX: ADDR(16) CNT(1) VALID_PIX
* 0 SAMPLE R0, R0.xy0x, t0, s0
* 01 EXP_DONE: PIX0, R0
* END_OF_PROGRAM
*
*******************************************************
*******************************************************
*/
__attribute__((aligned(GX2_SHADER_ALIGNMENT)))
static struct
{
u32 cf[16 * 2]; /* first ADDR() * 2 */
u32 tex[1 * 3]; /* CNT() sum * 3 */
} ps_program =
{
{
TEX(16, 1) VALID_PIX,
EXP_DONE(PIX0, _R0, _X, _Y, _Z, _W)
END_OF_PROGRAM
},
{
TEX_SAMPLE(_R0, _X, _Y, _Z, _W, _R0, _X, _Y, _0, _X, _t0, _s0)
}
};
tex_shader_t tex_shader =
{
{
{
0x00000103, 0x00000000, 0x00000000, 0x00000001, /* sq_pgm_resources_vs, vgt_primitiveid_en, spi_vs_out_config, num_spi_vs_out_id */
{ 0xffffff00, _x9(0xffffffff) }, /* spi_vs_out_id @10 */
0x00000000, 0xfffffffc, 0x00000002, /* pa_cl_vs_out_cntl, sq_vtx_semantic_clear, num_sq_vtx_semantic */
{
0x00000000, 0x00000001, _x30(0x000000ff) /* sq_vtx_semantic @32 */
},
0x00000000, 0x0000000e, 0x00000010 /* vgt_strmout_buffer_en, vgt_vertex_reuse_block_cntl, vgt_hos_reuse_depth */
}, /* regs */
sizeof(vs_program), /* size */
(uint8_t*)&vs_program, /* program */
GX2_SHADER_MODE_UNIFORM_REGISTER, /* mode */
0, /* uniformBlockCount */
NULL, /* uniformBlocks */
0, /* uniformVarCount */
NULL, /* uniformVars */
0, /* initialValueCount */
NULL, /* initialValues */
0, /* loopVarCount */
NULL, /* loopVars */
0, /* samplerVarCount */
NULL, /* samplerVars */
sizeof(tex_shader.attributes) / sizeof(GX2AttribVar), /* attribVarCount */
(GX2AttribVar*) &tex_shader.attributes, /* attribVars */
0, /* ringItemsize */
FALSE, /* hasStreamOut */
{0}, /* streamOutStride @4 */
{} /* gx2rBuffer */
},
{
{
0x00000001, 0x00000002, 0x14000001, 0x00000000, /* sq_pgm_resources_ps, sq_pgm_exports_ps, spi_ps_in_control_0, spi_ps_in_control_1 */
0x00000001, /* num_spi_ps_input_cntl */
{ 0x00000100, _x30(0x00000000)}, /* spi_ps_input_cntls @ 32*/
0x0000000f, 0x00000001, 0x00000010, 0x00000000 /* cb_shader_mask, cb_shader_control, db_shader_control, spi_input_z */
}, /* regs */
sizeof(ps_program), /* size */
(uint8_t*)&ps_program, /* program */
GX2_SHADER_MODE_UNIFORM_REGISTER, /* mode */
0, /* uniformBlockCount */
NULL, /* uniformBlocks */
0, /* uniformVarCount */
NULL, /* uniformVars */
0, /* initialValueCount */
NULL, /* initialValues */
0, /* loopVarCount */
NULL, /* loopVars */
1, /* samplerVarCount */
(GX2SamplerVar*) &tex_shader.sampler, /* samplerVars */
{} /* gx2rBuffer */
},
{ "s", GX2_SAMPLER_VAR_TYPE_SAMPLER_2D, 0 },
{
{ "position", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 0},
{ "tex_coord_in", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 1}
},
{
{
0, 0, 0, GX2_ATTRIB_FORMAT_FLOAT_32_32,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT
},
{
1, 1, 0, GX2_ATTRIB_FORMAT_FLOAT_32_32,
GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_X, _Y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT
}
},
{},
};

34
wiiu/tex_shader.h Normal file
View File

@ -0,0 +1,34 @@
#ifndef TEX_SHADER_H
#define TEX_SHADER_H
#include <gx2.h>
#include "system/memory.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct __attribute__((aligned(GX2_VERTEX_BUFFER_ALIGNMENT)))
{
GX2VertexShader vs;
GX2PixelShader ps;
GX2SamplerVar sampler;
struct
{
GX2AttribVar position;
GX2AttribVar tex_coord;
} attributes;
struct
{
GX2AttribStream position;
GX2AttribStream tex_coord;
} attribute_stream;
GX2FetchShader fs;
}tex_shader_t;
extern tex_shader_t tex_shader;
#ifdef __cplusplus
}
#endif
#endif // TEX_SHADER_H