Merge pull request #1402 from RPCS3/rsx

RSX emulation improvements
This commit is contained in:
DHrpcs3 2016-01-06 15:25:28 +02:00
commit ecda02c099
21 changed files with 1494 additions and 1072 deletions

View File

@ -1,5 +1,6 @@
#include "stdafx.h"
#include "BufferUtils.h"
#include "../rsx_methods.h"
#define MIN2(x, y) ((x) < (y)) ? (x) : (y)
#define MAX2(x, y) ((x) > (y)) ? (x) : (y)

View File

@ -2,6 +2,7 @@
#include "Emu/Memory/vm.h"
#include "TextureUtils.h"
#include "../RSXThread.h"
#include "../rsx_utils.h"
#define MAX2(a, b) ((a) > (b)) ? (a) : (b)

View File

@ -6,6 +6,7 @@
#include "d3dx12.h"
#include "../Common/BufferUtils.h"
#include "D3D12Formats.h"
#include "../rsx_methods.h"
namespace
{

View File

@ -10,6 +10,7 @@
#include <d3d11on12.h>
#include "Emu/state.h"
#include "D3D12Formats.h"
#include "../rsx_methods.h"
PFN_D3D12_CREATE_DEVICE wrapD3D12CreateDevice;
PFN_D3D12_GET_DEBUG_INTERFACE wrapD3D12GetDebugInterface;

View File

@ -5,6 +5,7 @@
#include "D3D12GSRender.h"
#include "Emu/state.h"
#include "D3D12Formats.h"
#include "../rsx_methods.h"
#define TO_STRING(x) #x

View File

@ -7,6 +7,7 @@
#include "Emu/System.h"
#include "Emu/state.h"
#include "Emu/RSX/GSRender.h"
#include "../rsx_methods.h"
#include "D3D12.h"
#include "D3D12GSRender.h"

View File

@ -451,7 +451,17 @@ enum
CELL_GCM_POLYGON_MODE_FILL = 0x1B02,
CELL_GCM_TRUE = 1,
CELL_GCM_FALSE = 0
CELL_GCM_FALSE = 0,
CELL_GCM_WINDOW_ORIGIN_TOP = 0,
CELL_GCM_WINDOW_ORIGIN_BOTTOM = 1,
CELL_GCM_WINDOW_PIXEL_CENTER_HALF = 0,
CELL_GCM_WINDOW_PIXEL_CENTER_INTEGER = 1,
CELL_GCM_USER_CLIP_PLANE_DISABLE = 0,
CELL_GCM_USER_CLIP_PLANE_ENABLE_LT = 1,
CELL_GCM_USER_CLIP_PLANE_ENABLE_GE = 2,
};
enum

View File

@ -4,6 +4,7 @@
#include "Emu/System.h"
#include "Emu/state.h"
#include "GLGSRender.h"
#include "../rsx_methods.h"
#define DUMP_VERTEX_DATA 0
@ -64,24 +65,6 @@ void GLGSRender::begin()
__glcheck glDepthMask(rsx::method_registers[NV4097_SET_DEPTH_MASK]);
__glcheck glStencilMask(rsx::method_registers[NV4097_SET_STENCIL_MASK]);
int viewport_x = int(rsx::method_registers[NV4097_SET_VIEWPORT_HORIZONTAL] & 0xffff);
int viewport_y = int(rsx::method_registers[NV4097_SET_VIEWPORT_VERTICAL] & 0xffff);
int viewport_w = int(rsx::method_registers[NV4097_SET_VIEWPORT_HORIZONTAL] >> 16);
int viewport_h = int(rsx::method_registers[NV4097_SET_VIEWPORT_VERTICAL] >> 16);
glViewport(viewport_x, viewport_y, viewport_w, viewport_h);
//scissor test is always enabled
glEnable(GL_SCISSOR_TEST);
u32 scissor_horizontal = rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL];
u32 scissor_vertical = rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL];
u16 scissor_x = scissor_horizontal;
u16 scissor_w = scissor_horizontal >> 16;
u16 scissor_y = scissor_vertical;
u16 scissor_h = scissor_vertical >> 16;
__glcheck glScissor(scissor_x, scissor_y, scissor_w, scissor_h);
if (__glcheck enable(rsx::method_registers[NV4097_SET_DEPTH_TEST_ENABLE], GL_DEPTH_TEST))
{
__glcheck glDepthFunc(rsx::method_registers[NV4097_SET_DEPTH_FUNC]);
@ -235,9 +218,10 @@ void GLGSRender::begin()
if (__glcheck enable(rsx::method_registers[NV4097_SET_CULL_FACE_ENABLE], GL_CULL_FACE))
{
__glcheck glCullFace(rsx::method_registers[NV4097_SET_CULL_FACE]);
__glcheck glFrontFace(rsx::method_registers[NV4097_SET_FRONT_FACE]);
}
__glcheck glFrontFace(rsx::method_registers[NV4097_SET_FRONT_FACE] ^ 1);
__glcheck enable(rsx::method_registers[NV4097_SET_POLY_SMOOTH_ENABLE], GL_POLYGON_SMOOTH);
//NV4097_SET_COLOR_KEY_COLOR
@ -325,13 +309,15 @@ void GLGSRender::end()
for (int i = 0; i < rsx::limits::textures_count; ++i)
{
if (!textures[i].enabled())
{
continue;
}
int location;
if (m_program->uniforms.has_location("tex" + std::to_string(i), &location))
{
__glcheck m_gl_textures[i].init(textures[i]);
__glcheck m_program->uniforms.texture(location, i, gl::texture_view(gl::texture::target::texture2D, m_gl_textures[i].id()));
__glcheck m_gl_textures[i].init(i, textures[i]);
glProgramUniform1i(m_program->id(), location, i);
}
}
@ -477,6 +463,44 @@ void GLGSRender::end()
rsx::thread::end();
}
void GLGSRender::set_viewport()
{
u32 viewport_horizontal = rsx::method_registers[NV4097_SET_VIEWPORT_HORIZONTAL];
u32 viewport_vertical = rsx::method_registers[NV4097_SET_VIEWPORT_VERTICAL];
u16 viewport_x = viewport_horizontal & 0xffff;
u16 viewport_y = viewport_vertical & 0xffff;
u16 viewport_w = viewport_horizontal >> 16;
u16 viewport_h = viewport_vertical >> 16;
u32 scissor_horizontal = rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL];
u32 scissor_vertical = rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL];
u16 scissor_x = scissor_horizontal;
u16 scissor_w = scissor_horizontal >> 16;
u16 scissor_y = scissor_vertical;
u16 scissor_h = scissor_vertical >> 16;
u32 shader_window = rsx::method_registers[NV4097_SET_SHADER_WINDOW];
u8 shader_window_origin = (shader_window >> 12) & 0xf;
//TODO
if (true || shader_window_origin == CELL_GCM_WINDOW_ORIGIN_BOTTOM)
{
__glcheck glViewport(viewport_x, viewport_y, viewport_w, viewport_h);
__glcheck glScissor(scissor_x, scissor_y, scissor_w, scissor_h);
}
else
{
u16 shader_window_height = shader_window & 0xfff;
__glcheck glViewport(viewport_x, shader_window_height - viewport_y - viewport_h - 1, viewport_w, viewport_h);
__glcheck glScissor(scissor_x, shader_window_height - scissor_y - scissor_h - 1, scissor_w, scissor_h);
}
glEnable(GL_SCISSOR_TEST);
}
void GLGSRender::on_init_thread()
{
GSRender::on_init_thread();
@ -555,9 +579,6 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
return;
}
renderer->draw_fbo.bind();
glEnable(GL_SCISSOR_TEST);
/*
u16 clear_x = rsx::method_registers[NV4097_SET_CLEAR_RECT_HORIZONTAL];
u16 clear_y = rsx::method_registers[NV4097_SET_CLEAR_RECT_VERTICAL];
@ -566,14 +587,8 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
glScissor(clear_x, clear_y, clear_w, clear_h);
*/
u32 scissor_horizontal = rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL];
u32 scissor_vertical = rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL];
u16 scissor_x = scissor_horizontal;
u16 scissor_w = scissor_horizontal >> 16;
u16 scissor_y = scissor_vertical;
u16 scissor_h = scissor_vertical >> 16;
glScissor(scissor_x, scissor_y, scissor_w, scissor_h);
renderer->init_buffers(true);
renderer->draw_fbo.bind();
GLbitfield mask = 0;
@ -613,8 +628,8 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
mask |= GLenum(gl::buffers::color);
}
renderer->clear_surface_buffers = (gl::buffers)mask;
renderer->draw_fbo.clear((gl::buffers)mask);
glClear(mask);
renderer->write_buffers();
}
using rsx_method_impl_t = void(*)(u32, GLGSRender*);
@ -808,7 +823,7 @@ std::pair<gl::texture::type, gl::texture::format> surface_depth_format_to_gl(int
}
}
void GLGSRender::init_buffers()
void GLGSRender::init_buffers(bool skip_reading)
{
u32 surface_format = rsx::method_registers[NV4097_SET_SURFACE_FORMAT];
@ -890,11 +905,13 @@ void GLGSRender::init_buffers()
__glcheck m_draw_tex_depth_stencil.pixel_unpack_settings().aligment(1);
}
if (clear_surface_buffers == gl::buffers::none)
if (!skip_reading)
{
read_buffers();
}
set_viewport();
switch (rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])
{
case CELL_GCM_SURFACE_TARGET_NONE: break;
@ -923,13 +940,6 @@ void GLGSRender::init_buffers()
LOG_ERROR(RSX, "Bad surface color target: %d", rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]);
break;
}
if (clear_surface_buffers != gl::buffers::none)
{
//draw_fbo.clear(clear_surface_buffers);
clear_surface_buffers = gl::buffers::none;
}
}
static const u32 mr_color_offset[rsx::limits::color_buffers_count] =
@ -948,6 +958,14 @@ static const u32 mr_color_dma[rsx::limits::color_buffers_count] =
NV4097_SET_CONTEXT_DMA_COLOR_D
};
static const u32 mr_color_pitch[rsx::limits::color_buffers_count] =
{
NV4097_SET_SURFACE_PITCH_A,
NV4097_SET_SURFACE_PITCH_B,
NV4097_SET_SURFACE_PITCH_C,
NV4097_SET_SURFACE_PITCH_D
};
void GLGSRender::read_buffers()
{
if (!draw_fbo)
@ -961,10 +979,34 @@ void GLGSRender::read_buffers()
auto read_color_buffers = [&](int index, int count)
{
u32 width = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
u32 height = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
for (int i = index; i < index + count; ++i)
{
u32 color_address = rsx::get_address(rsx::method_registers[mr_color_offset[i]], rsx::method_registers[mr_color_dma[i]]);
__glcheck m_draw_tex_color[i].copy_from(vm::base(color_address), color_format.format, color_format.type);
u32 offset = rsx::method_registers[mr_color_offset[i]];
u32 location = rsx::method_registers[mr_color_dma[i]];
u32 pitch = rsx::method_registers[mr_color_pitch[i]];
if (pitch <= 64)
continue;
m_draw_tex_color[i].pixel_unpack_settings().row_length(pitch / (color_format.channel_size * color_format.channel_count));
rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
if (!color_buffer.tile)
{
__glcheck m_draw_tex_color[i].copy_from(color_buffer.ptr, color_format.format, color_format.type);
}
else
{
std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
color_buffer.read(buffer.get(), width, height, pitch);
__glcheck m_draw_tex_color[i].copy_from(buffer.get(), color_format.format, color_format.type);
}
}
};
@ -997,6 +1039,12 @@ void GLGSRender::read_buffers()
if (rpcs3::state.config.rsx.opengl.read_depth_buffer)
{
//TODO: use pitch
u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z];
if (pitch <= 64)
return;
auto depth_format = surface_depth_format_to_gl(m_surface.depth_format);
int pixel_size = m_surface.depth_format == CELL_GCM_SURFACE_Z16 ? 2 : 4;
@ -1046,6 +1094,9 @@ void GLGSRender::write_buffers()
auto write_color_buffers = [&](int index, int count)
{
u32 width = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
u32 height = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
for (int i = index; i < index + count; ++i)
{
//TODO: swizzle
@ -1065,8 +1116,29 @@ void GLGSRender::write_buffers()
//}, gl::buffer::access::read);
u32 color_address = rsx::get_address(rsx::method_registers[mr_color_offset[i]], rsx::method_registers[mr_color_dma[i]]);
__glcheck m_draw_tex_color[i].copy_to(vm::base(color_address), color_format.format, color_format.type);
u32 offset = rsx::method_registers[mr_color_offset[i]];
u32 location = rsx::method_registers[mr_color_dma[i]];
u32 pitch = rsx::method_registers[mr_color_pitch[i]];
if (pitch <= 64)
continue;
m_draw_tex_color[i].pixel_pack_settings().row_length(pitch / (color_format.channel_size * color_format.channel_count));
rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
if (!color_buffer.tile)
{
__glcheck m_draw_tex_color[i].copy_to(color_buffer.ptr, color_format.format, color_format.type);
}
else
{
std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
__glcheck m_draw_tex_color[i].copy_to(buffer.get(), color_format.format, color_format.type);
color_buffer.write(buffer.get(), width, height, pitch);
}
}
};
@ -1099,6 +1171,12 @@ void GLGSRender::write_buffers()
if (rpcs3::state.config.rsx.opengl.write_depth_buffer)
{
//TODO: use pitch
u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z];
if (pitch <= 64)
return;
auto depth_format = surface_depth_format_to_gl(m_surface.depth_format);
gl::buffer pbo_depth;
@ -1141,7 +1219,9 @@ void GLGSRender::flip(int buffer)
u32 buffer_width = gcm_buffers[buffer].width;
u32 buffer_height = gcm_buffers[buffer].height;
u32 buffer_pitch = gcm_buffers[buffer].pitch;
u32 buffer_address = rsx::get_address(gcm_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL);
rsx::tiled_region buffer_region = get_tiled_address(gcm_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL);
bool skip_read = false;
if (draw_fbo && !rpcs3::state.config.rsx.opengl.write_color_buffers)
@ -1173,8 +1253,7 @@ void GLGSRender::flip(int buffer)
.type(gl::texture::type::uint_8_8_8_8)
.format(gl::texture::format::bgra);
m_flip_tex_color.pixel_unpack_settings().aligment(1);
m_flip_tex_color.pixel_pack_settings().aligment(1);
m_flip_tex_color.pixel_unpack_settings().aligment(1).row_length(buffer_pitch / 4);
__glcheck m_flip_fbo.recreate();
__glcheck m_flip_fbo.color = m_flip_tex_color;
@ -1191,7 +1270,16 @@ void GLGSRender::flip(int buffer)
glDisable(GL_LOGIC_OP);
glDisable(GL_CULL_FACE);
__glcheck m_flip_tex_color.copy_from(vm::base(buffer_address), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8);
if (buffer_region.tile)
{
std::unique_ptr<u8> temp(new u8[buffer_height * buffer_pitch]);
buffer_region.read(temp.get(), buffer_width, buffer_height, buffer_pitch);
__glcheck m_flip_tex_color.copy_from(temp.get(), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8);
}
else
{
__glcheck m_flip_tex_color.copy_from(buffer_region.ptr, gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8);
}
}
areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height });

View File

@ -24,7 +24,6 @@ private:
public:
gl::fbo draw_fbo;
gl::buffers clear_surface_buffers = gl::buffers::none;
private:
GLProgramBuffer m_prog_buffer;
@ -53,9 +52,10 @@ private:
public:
bool load_program();
void init_buffers();
void init_buffers(bool skip_reading = false);
void read_buffers();
void write_buffers();
void set_viewport();
protected:
void begin() override;

View File

@ -4,11 +4,45 @@
#include "../GCM.h"
#include "../RSXThread.h"
#include "../RSXTexture.h"
#include "../rsx_utils.h"
namespace rsx
{
namespace gl
{
static const int gl_tex_min_filter[] =
{
GL_NEAREST, // unused
GL_NEAREST,
GL_LINEAR,
GL_NEAREST_MIPMAP_NEAREST,
GL_LINEAR_MIPMAP_NEAREST,
GL_NEAREST_MIPMAP_LINEAR,
GL_LINEAR_MIPMAP_LINEAR,
GL_NEAREST, // CELL_GCM_TEXTURE_CONVOLUTION_MIN
};
static const int gl_tex_mag_filter[] =
{
GL_NEAREST, // unused
GL_NEAREST,
GL_LINEAR,
GL_NEAREST, // unused
GL_LINEAR // CELL_GCM_TEXTURE_CONVOLUTION_MAG
};
static const int gl_tex_zfunc[] =
{
GL_NEVER,
GL_LESS,
GL_EQUAL,
GL_LEQUAL,
GL_GREATER,
GL_NOTEQUAL,
GL_GEQUAL,
GL_ALWAYS,
};
void texture::create()
{
if (m_id)
@ -17,7 +51,6 @@ namespace rsx
}
glGenTextures(1, &m_id);
bind();
}
int texture::gl_wrap(int wrap)
@ -56,11 +89,14 @@ namespace rsx
return 1.0f;
}
void texture::init(rsx::texture& tex)
void texture::init(int index, rsx::texture& tex)
{
if (!m_id)
{
create();
}
glActiveTexture(GL_TEXTURE0 + index);
bind();
const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
@ -69,8 +105,10 @@ namespace rsx
//TODO: safe init
int format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
bool is_swizzled = !(tex.format() & CELL_GCM_TEXTURE_LN);
u32 full_format = tex.format();
u32 format = full_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
bool is_swizzled = ~full_format & CELL_GCM_TEXTURE_LN;
const u8* pixels = vm::ps3::_ptr<u8>(texaddr);
u8 *unswizzledPixels;
@ -78,10 +116,16 @@ namespace rsx
// NOTE: This must be in ARGB order in all forms below.
const GLint *glRemap = glRemapStandard;
::gl::pixel_pack_settings().apply();
::gl::pixel_unpack_settings().apply();
u32 pitch = tex.pitch();
switch (format)
{
case CELL_GCM_TEXTURE_B8: // One 8-bit fixed-point number
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_BLUE, GL_UNSIGNED_BYTE, pixels);
static const GLint swizzleMaskB8[] = { GL_BLUE, GL_BLUE, GL_BLUE, GL_BLUE };
@ -92,6 +136,7 @@ namespace rsx
case CELL_GCM_TEXTURE_A1R5G5B5:
{
glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
// TODO: texture swizzling
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, pixels);
@ -101,6 +146,7 @@ namespace rsx
case CELL_GCM_TEXTURE_A4R4G4B4:
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4, pixels);
// We read it in as R4G4B4A4, so we need to remap each component.
@ -111,6 +157,7 @@ namespace rsx
case CELL_GCM_TEXTURE_R5G6B5:
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, tex.width(), tex.height(), 0, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, pixels);
glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
@ -119,6 +166,8 @@ namespace rsx
case CELL_GCM_TEXTURE_A8R8G8B8:
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4);
if (is_swizzled)
{
u32 *src, *dst;
@ -167,6 +216,7 @@ namespace rsx
case CELL_GCM_TEXTURE_G8B8:
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RG, GL_UNSIGNED_BYTE, pixels);
static const GLint swizzleMaskG8B8[] = { GL_RED, GL_GREEN, GL_RED, GL_GREEN };
@ -176,6 +226,8 @@ namespace rsx
case CELL_GCM_TEXTURE_R6G5B5:
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
// TODO: Probably need to actually unswizzle if is_swizzled.
const u32 numPixels = tex.width() * tex.height();
unswizzledPixels = (u8 *)malloc(numPixels * 4);
@ -196,30 +248,36 @@ namespace rsx
case CELL_GCM_TEXTURE_DEPTH24_D8: // 24-bit unsigned fixed-point number and 8 bits of garbage
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4);
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT24, tex.width(), tex.height(), 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, pixels);
break;
}
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: // 24-bit unsigned float and 8 bits of garbage
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4);
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT24, tex.width(), tex.height(), 0, GL_DEPTH_COMPONENT, GL_FLOAT, pixels);
break;
}
case CELL_GCM_TEXTURE_DEPTH16: // 16-bit unsigned fixed-point number
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT16, tex.width(), tex.height(), 0, GL_DEPTH_COMPONENT, GL_SHORT, pixels);
break;
}
case CELL_GCM_TEXTURE_DEPTH16_FLOAT: // 16-bit unsigned float
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT16, tex.width(), tex.height(), 0, GL_DEPTH_COMPONENT, GL_FLOAT, pixels);
break;
}
case CELL_GCM_TEXTURE_X16: // A 16-bit fixed-point number
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RED, GL_UNSIGNED_SHORT, pixels);
glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
@ -231,6 +289,7 @@ namespace rsx
case CELL_GCM_TEXTURE_Y16_X16: // Two 16-bit fixed-point numbers
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4);
glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RG, GL_UNSIGNED_SHORT, pixels);
glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
@ -241,6 +300,7 @@ namespace rsx
case CELL_GCM_TEXTURE_R5G5B5A1:
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1, pixels);
glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
@ -249,6 +309,7 @@ namespace rsx
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: // Four fp16 values
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 8);
glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RGBA, GL_HALF_FLOAT, pixels);
glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
@ -257,12 +318,16 @@ namespace rsx
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: // Four fp32 values
{
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_BGRA, GL_FLOAT, pixels);
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 16);
glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RGBA, GL_FLOAT, pixels);
glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
break;
}
case CELL_GCM_TEXTURE_X32_FLOAT: // One 32-bit floating-point number
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RED, GL_FLOAT, pixels);
static const GLint swizzleMaskX32_FLOAT[] = { GL_RED, GL_ONE, GL_ONE, GL_ONE };
@ -272,9 +337,9 @@ namespace rsx
case CELL_GCM_TEXTURE_D1R5G5B5:
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2);
glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
// TODO: Texture swizzling
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, pixels);
@ -287,6 +352,7 @@ namespace rsx
case CELL_GCM_TEXTURE_D8R8G8B8: // 8 bits of garbage and three unsigned 8-bit fixed-point numbers
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, pixels);
static const GLint swizzleMaskX32_D8R8G8B8[] = { GL_ONE, GL_RED, GL_GREEN, GL_BLUE };
@ -297,6 +363,7 @@ namespace rsx
case CELL_GCM_TEXTURE_Y16_X16_FLOAT: // Two fp16 values
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4);
glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RG, GL_HALF_FLOAT, pixels);
glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
@ -308,6 +375,8 @@ namespace rsx
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4);
const u32 numPixels = tex.width() * tex.height();
unswizzledPixels = (u8 *)malloc(numPixels * 4);
// TODO: Speed.
@ -332,6 +401,8 @@ namespace rsx
case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4);
const u32 numPixels = tex.width() * tex.height();
unswizzledPixels = (u8 *)malloc(numPixels * 4);
// TODO: Speed.
@ -385,18 +456,6 @@ namespace rsx
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_B, glRemap[3]);
}
static const int gl_tex_zfunc[] =
{
GL_NEVER,
GL_LESS,
GL_EQUAL,
GL_LEQUAL,
GL_GREATER,
GL_NOTEQUAL,
GL_GEQUAL,
GL_ALWAYS,
};
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, gl_wrap(tex.wrap_s()));
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, gl_wrap(tex.wrap_t()));
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_R, gl_wrap(tex.wrap_r()));
@ -407,34 +466,10 @@ namespace rsx
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_LOD, (tex.min_lod() >> 8));
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LOD, (tex.max_lod() >> 8));
static const int gl_tex_min_filter[] =
{
GL_NEAREST, // unused
GL_NEAREST,
GL_LINEAR,
GL_NEAREST_MIPMAP_NEAREST,
GL_LINEAR_MIPMAP_NEAREST,
GL_NEAREST_MIPMAP_LINEAR,
GL_LINEAR_MIPMAP_LINEAR,
GL_NEAREST, // CELL_GCM_TEXTURE_CONVOLUTION_MIN
};
static const int gl_tex_mag_filter[] = {
GL_NEAREST, // unused
GL_NEAREST,
GL_LINEAR,
GL_NEAREST, // unused
GL_LINEAR // CELL_GCM_TEXTURE_CONVOLUTION_MAG
};
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, gl_tex_min_filter[tex.min_filter()]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, gl_tex_mag_filter[tex.mag_filter()]);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_aniso(tex.max_aniso()));
//Unbind();
if (is_swizzled && format == CELL_GCM_TEXTURE_A8R8G8B8)
{
free(unswizzledPixels);

View File

@ -34,7 +34,7 @@ namespace rsx
return (v << 2) | (v >> 4);
}
void init(rsx::texture& tex);
void init(int index, rsx::texture& tex);
void bind();
void unbind();
void remove();

View File

@ -2,6 +2,7 @@
#include "Emu/Memory/Memory.h"
#include "RSXThread.h"
#include "RSXTexture.h"
#include "rsx_methods.h"
namespace rsx
{
@ -278,24 +279,6 @@ namespace rsx
return ((method_registers[NV4097_SET_VERTEX_TEXTURE_FORMAT + (m_index * 8)] >> 16) & 0xffff);
}
u8 vertex_texture::wrap_s() const
{
return 1;
//return ((method_registers[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 8)]) & 0xf);
}
u8 vertex_texture::wrap_t() const
{
return 1;
//return ((method_registers[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 8)] >> 8) & 0xf);
}
u8 vertex_texture::wrap_r() const
{
return 1;
//return ((method_registers[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 8)] >> 16) & 0xf);
}
u8 vertex_texture::unsigned_remap() const
{
return ((method_registers[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 8)] >> 12) & 0xf);
@ -346,11 +329,6 @@ namespace rsx
return ((method_registers[NV4097_SET_VERTEX_TEXTURE_CONTROL0 + (m_index * 8)] >> 2) & 0x1);
}
u32 vertex_texture::remap() const
{
return 0 | (1 << 2) | (2 << 4) | (3 << 6);//(method_registers[NV4097_SET_VERTEX_TEXTURE_CONTROL1 + (m_index * 8)]);
}
u16 vertex_texture::bias() const
{
return ((method_registers[NV4097_SET_VERTEX_TEXTURE_FILTER + (m_index * 8)]) & 0x1fff);

View File

@ -86,9 +86,6 @@ namespace rsx
u16 mipmap() const;
// Address
u8 wrap_s() const;
u8 wrap_t() const;
u8 wrap_r() const;
u8 unsigned_remap() const;
u8 zfunc() const;
u8 gamma() const;
@ -102,9 +99,6 @@ namespace rsx
u8 max_aniso() const;
bool alpha_kill_enabled() const;
// Control1
u32 remap() const;
// Filter
u16 bias() const;
u8 min_filter() const;

View File

@ -10,11 +10,7 @@
#include "Emu/SysCalls/lv2/sys_time.h"
#include "Common/BufferUtils.h"
extern "C"
{
#include "libswscale/swscale.h"
}
#include "rsx_methods.h"
#define CMD_DEBUG 0
@ -23,798 +19,6 @@ frame_capture_data frame_debug;
namespace rsx
{
using rsx_method_t = void(*)(thread*, u32);
u32 method_registers[0x10000 >> 2];
rsx_method_t methods[0x10000 >> 2]{};
template<typename Type> struct vertex_data_type_from_element_type;
template<> struct vertex_data_type_from_element_type<float> { enum { type = CELL_GCM_VERTEX_F }; };
template<> struct vertex_data_type_from_element_type<f16> { enum { type = CELL_GCM_VERTEX_SF }; };
template<> struct vertex_data_type_from_element_type<u8> { enum { type = CELL_GCM_VERTEX_UB }; };
template<> struct vertex_data_type_from_element_type<u16> { enum { type = CELL_GCM_VERTEX_S1 }; };
namespace nv406e
{
force_inline void set_reference(thread* rsx, u32 arg)
{
rsx->ctrl->ref.exchange(arg);
}
force_inline void semaphore_acquire(thread* rsx, u32 arg)
{
//TODO: dma
while (vm::read32(rsx->label_addr + method_registers[NV406E_SEMAPHORE_OFFSET]) != arg)
{
if (Emu.IsStopped())
break;
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
}
force_inline void semaphore_release(thread* rsx, u32 arg)
{
//TODO: dma
vm::write32(rsx->label_addr + method_registers[NV406E_SEMAPHORE_OFFSET], arg);
}
}
namespace nv4097
{
force_inline void texture_read_semaphore_release(thread* rsx, u32 arg)
{
//TODO: dma
vm::write32(rsx->label_addr + method_registers[NV4097_SET_SEMAPHORE_OFFSET], arg);
}
force_inline void back_end_write_semaphore_release(thread* rsx, u32 arg)
{
//TODO: dma
vm::write32(rsx->label_addr + method_registers[NV4097_SET_SEMAPHORE_OFFSET],
(arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff));
}
//fire only when all data passed to rsx cmd buffer
template<u32 id, u32 index, int count, typename type>
force_inline void set_vertex_data_impl(thread* rsx, u32 arg)
{
static const size_t element_size = (count * sizeof(type));
static const size_t element_size_in_words = element_size / sizeof(u32);
auto& info = rsx->register_vertex_info[index];
info.type = vertex_data_type_from_element_type<type>::type;
info.size = count;
info.frequency = 0;
info.stride = 0;
auto& entry = rsx->register_vertex_data[index];
//find begin of data
size_t begin = id + index * element_size_in_words;
size_t position = 0;//entry.size();
entry.resize(position + element_size);
memcpy(entry.data() + position, method_registers + begin, element_size);
}
template<u32 index>
struct set_vertex_data4ub_m
{
force_inline static void impl(thread* rsx, u32 arg)
{
set_vertex_data_impl<NV4097_SET_VERTEX_DATA4UB_M, index, 4, u8>(rsx, arg);
}
};
template<u32 index>
struct set_vertex_data1f_m
{
force_inline static void impl(thread* rsx, u32 arg)
{
set_vertex_data_impl<NV4097_SET_VERTEX_DATA1F_M, index, 1, f32>(rsx, arg);
}
};
template<u32 index>
struct set_vertex_data2f_m
{
force_inline static void impl(thread* rsx, u32 arg)
{
set_vertex_data_impl<NV4097_SET_VERTEX_DATA2F_M, index, 2, f32>(rsx, arg);
}
};
template<u32 index>
struct set_vertex_data3f_m
{
force_inline static void impl(thread* rsx, u32 arg)
{
set_vertex_data_impl<NV4097_SET_VERTEX_DATA3F_M, index, 3, f32>(rsx, arg);
}
};
template<u32 index>
struct set_vertex_data4f_m
{
force_inline static void impl(thread* rsx, u32 arg)
{
set_vertex_data_impl<NV4097_SET_VERTEX_DATA4F_M, index, 4, f32>(rsx, arg);
}
};
template<u32 index>
struct set_vertex_data2s_m
{
force_inline static void impl(thread* rsx, u32 arg)
{
set_vertex_data_impl<NV4097_SET_VERTEX_DATA2S_M, index, 2, u16>(rsx, arg);
}
};
template<u32 index>
struct set_vertex_data4s_m
{
force_inline static void impl(thread* rsx, u32 arg)
{
set_vertex_data_impl<NV4097_SET_VERTEX_DATA4S_M, index, 4, u16>(rsx, arg);
}
};
template<u32 index>
struct set_vertex_data_array_format
{
force_inline static void impl(thread* rsx, u32 arg)
{
auto& info = rsx->vertex_arrays_info[index];
info.unpack_array(arg);
}
};
force_inline void draw_arrays(thread* rsx, u32 arg)
{
rsx->draw_command = thread::Draw_command::draw_command_array;
u32 first = arg & 0xffffff;
u32 count = (arg >> 24) + 1;
rsx->load_vertex_data(first, count);
}
force_inline void draw_index_array(thread* rsx, u32 arg)
{
rsx->draw_command = thread::Draw_command::draw_command_indexed;
u32 first = arg & 0xffffff;
u32 count = (arg >> 24) + 1;
rsx->load_vertex_data(first, count);
rsx->load_vertex_index_data(first, count);
}
force_inline void draw_inline_array(thread* rsx, u32 arg)
{
rsx->draw_command = thread::Draw_command::draw_command_inlined_array;
rsx->draw_inline_vertex_array = true;
rsx->inline_vertex_array.push_back(arg);
}
template<u32 index>
struct set_transform_constant
{
force_inline static void impl(thread* rsxthr, u32 arg)
{
u32 load = method_registers[NV4097_SET_TRANSFORM_CONSTANT_LOAD];
static const size_t count = 4;
static const size_t size = count * sizeof(f32);
size_t reg = index / 4;
size_t subreg = index % 4;
memcpy(rsxthr->transform_constants[load + reg].rgba + subreg, method_registers + NV4097_SET_TRANSFORM_CONSTANT + reg * count + subreg, sizeof(f32));
}
};
template<u32 index>
struct set_transform_program
{
force_inline static void impl(thread* rsx, u32 arg)
{
u32& load = method_registers[NV4097_SET_TRANSFORM_PROGRAM_LOAD];
static const size_t count = 4;
static const size_t size = count * sizeof(u32);
memcpy(rsx->transform_program + load++ * count, method_registers + NV4097_SET_TRANSFORM_PROGRAM + index * count, size);
}
};
force_inline void set_begin_end(thread* rsx, u32 arg)
{
if (arg)
{
rsx->draw_inline_vertex_array = false;
rsx->inline_vertex_array.clear();
rsx->begin();
return;
}
if (!rsx->vertex_draw_count)
{
bool has_array = false;
for (int i = 0; i < rsx::limits::vertex_count; ++i)
{
if (rsx->vertex_arrays_info[i].size > 0)
{
has_array = true;
break;
}
}
if (!has_array)
{
u32 min_count = ~0;
for (int i = 0; i < rsx::limits::vertex_count; ++i)
{
if (!rsx->register_vertex_info[i].size)
continue;
u32 count = u32(rsx->register_vertex_data[i].size()) /
rsx::get_vertex_type_size(rsx->register_vertex_info[i].type) * rsx->register_vertex_info[i].size;
if (count < min_count)
min_count = count;
}
if (min_count && min_count < ~0)
{
rsx->vertex_draw_count = min_count;
}
}
}
rsx->end();
rsx->vertex_draw_count = 0;
}
force_inline void get_report(thread* rsx, u32 arg)
{
u8 type = arg >> 24;
u32 offset = arg & 0xffffff;
//TODO: use DMA
vm::ptr<CellGcmReportData> result = { rsx->local_mem_addr + offset, vm::addr };
result->timer = rsx->timestamp();
switch (type)
{
case CELL_GCM_ZPASS_PIXEL_CNT:
case CELL_GCM_ZCULL_STATS:
case CELL_GCM_ZCULL_STATS1:
case CELL_GCM_ZCULL_STATS2:
case CELL_GCM_ZCULL_STATS3:
result->value = 0;
LOG_WARNING(RSX, "NV4097_GET_REPORT: Unimplemented type %d", type);
break;
default:
result->value = 0;
LOG_ERROR(RSX, "NV4097_GET_REPORT: Bad type %d", type);
break;
}
//result->padding = 0;
}
force_inline void clear_report_value(thread* rsx, u32 arg)
{
switch (arg)
{
case CELL_GCM_ZPASS_PIXEL_CNT:
LOG_WARNING(RSX, "TODO: NV4097_CLEAR_REPORT_VALUE: ZPASS_PIXEL_CNT");
break;
case CELL_GCM_ZCULL_STATS:
LOG_WARNING(RSX, "TODO: NV4097_CLEAR_REPORT_VALUE: ZCULL_STATS");
break;
default:
LOG_ERROR(RSX, "NV4097_CLEAR_REPORT_VALUE: Bad type: %d", arg);
break;
}
}
}
namespace nv308a
{
template<u32 index>
struct color
{
force_inline static void impl(u32 arg)
{
u32 point = method_registers[NV308A_POINT];
u16 x = point;
u16 y = point >> 16;
if (y)
{
LOG_ERROR(RSX, "%s: y is not null (0x%x)", __FUNCTION__, y);
}
u32 address = get_address(method_registers[NV3062_SET_OFFSET_DESTIN] + (x << 2) + index * 4, method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN]);
vm::write32(address, arg);
}
};
}
namespace nv3089
{
never_inline void image_in(u32 arg)
{
const u16 src_height = method_registers[NV3089_IMAGE_IN_SIZE] >> 16;
const u16 src_pitch = method_registers[NV3089_IMAGE_IN_FORMAT];
const u8 src_origin = method_registers[NV3089_IMAGE_IN_FORMAT] >> 16;
const u8 src_inter = method_registers[NV3089_IMAGE_IN_FORMAT] >> 24;
const u32 src_color_format = method_registers[NV3089_SET_COLOR_FORMAT];
const u32 operation = method_registers[NV3089_SET_OPERATION];
const u16 out_w = method_registers[NV3089_IMAGE_OUT_SIZE];
const u16 out_h = method_registers[NV3089_IMAGE_OUT_SIZE] >> 16;
// handle weird RSX quirk, doesn't report less than 16 pixels width in some cases
u16 src_width = method_registers[NV3089_IMAGE_IN_SIZE];
if (src_width == 16 && out_w < 16 && method_registers[NV3089_DS_DX] == (1 << 20))
{
src_width = out_w;
}
const u16 u = method_registers[NV3089_IMAGE_IN]; // inX (currently ignored)
const u16 v = method_registers[NV3089_IMAGE_IN] >> 16; // inY (currently ignored)
if (src_origin != CELL_GCM_TRANSFER_ORIGIN_CORNER)
{
LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown origin (%d)", src_origin);
}
if (src_inter != CELL_GCM_TRANSFER_INTERPOLATOR_ZOH && src_inter != CELL_GCM_TRANSFER_INTERPOLATOR_FOH)
{
LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown inter (%d)", src_inter);
}
if (operation != CELL_GCM_TRANSFER_OPERATION_SRCCOPY)
{
LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown operation (%d)", operation);
}
const u32 src_offset = method_registers[NV3089_IMAGE_IN_OFFSET];
const u32 src_dma = method_registers[NV3089_SET_CONTEXT_DMA_IMAGE];
u32 dst_offset;
u32 dst_dma = 0;
u16 dst_color_format;
switch (method_registers[NV3089_SET_CONTEXT_SURFACE])
{
case CELL_GCM_CONTEXT_SURFACE2D:
dst_dma = method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN];
dst_offset = method_registers[NV3062_SET_OFFSET_DESTIN];
dst_color_format = method_registers[NV3062_SET_COLOR_FORMAT];
break;
case CELL_GCM_CONTEXT_SWIZZLE2D:
dst_dma = method_registers[NV309E_SET_CONTEXT_DMA_IMAGE];
dst_offset = method_registers[NV309E_SET_OFFSET];
dst_color_format = method_registers[NV309E_SET_FORMAT];
break;
default:
LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown m_context_surface (0x%x)", method_registers[NV3089_SET_CONTEXT_SURFACE]);
break;
}
if (!dst_dma)
{
LOG_ERROR(RSX, "dst_dma not set");
return;
}
LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_offset, dst_offset);
u8* pixels_src = vm::_ptr<u8>(get_address(src_offset, src_dma));
u8* pixels_dst = vm::_ptr<u8>(get_address(dst_offset, dst_dma));
if (dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 &&
dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_A8R8G8B8)
{
LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown dst_color_format (%d)", dst_color_format);
}
if (src_color_format != CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 &&
src_color_format != CELL_GCM_TRANSFER_SCALE_FORMAT_A8R8G8B8)
{
LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown src_color_format (%d)", src_color_format);
}
LOG_WARNING(RSX, "NV3089_IMAGE_IN_SIZE: SIZE=0x%08x, pitch=0x%x, offset=0x%x, scaleX=%f, scaleY=%f, CLIP_SIZE=0x%08x, OUT_SIZE=0x%08x",
method_registers[NV3089_IMAGE_IN_SIZE], src_pitch, src_offset, double(1 << 20) / (method_registers[NV3089_DS_DX]), double(1 << 20) / (method_registers[NV3089_DT_DY]),
method_registers[NV3089_CLIP_SIZE], method_registers[NV3089_IMAGE_OUT_SIZE]);
const u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel
const u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4;
std::unique_ptr<u8[]> temp1, temp2;
// resize/convert if necessary
if (in_bpp != out_bpp && src_width != out_w && src_height != out_h)
{
temp1.reset(new u8[out_bpp * out_w * out_h]);
AVPixelFormat in_format = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
AVPixelFormat out_format = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
std::unique_ptr<SwsContext, void(*)(SwsContext*)> sws(sws_getContext(src_width, src_height, in_format, out_w, out_h, out_format,
src_inter ? SWS_FAST_BILINEAR : SWS_POINT, NULL, NULL, NULL), sws_freeContext);
int in_line = in_bpp * src_width;
u8* out_ptr = temp1.get();
int out_line = out_bpp * out_w;
sws_scale(sws.get(), &pixels_src, &in_line, 0, src_height, &out_ptr, &out_line);
pixels_src = out_ptr; // use resized image as a source
}
// Not sure if swizzle should be after clipping or not
if (method_registers[NV3089_SET_CONTEXT_SURFACE] == CELL_GCM_CONTEXT_SWIZZLE2D)
{
u8 sw_width_log2 = method_registers[NV309E_SET_FORMAT] >> 16;
u8 sw_height_log2 = method_registers[NV309E_SET_FORMAT] >> 24;
// 0 indicates height of 1 pixel
sw_height_log2 = sw_height_log2 == 0 ? 1 : sw_height_log2;
// swizzle based on destination size
u16 sw_width = 1 << sw_width_log2;
u16 sw_height = 1 << sw_height_log2;
std::unique_ptr<u8[]> sw_temp;
temp2.reset(new u8[out_bpp * sw_width * sw_height]);
u8* linear_pixels = pixels_src;
u8* swizzled_pixels = temp2.get();
// Check and pad texture out if we are given non square texture for swizzle to be correct
if (sw_width != out_w || sw_height != out_h)
{
sw_temp.reset(new u8[out_bpp * sw_width * sw_height]);
switch (out_bpp)
{
case 1:
pad_texture<u8>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height);
break;
case 2:
pad_texture<u16>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height);
break;
case 4:
pad_texture<u32>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height);
break;
}
linear_pixels = sw_temp.get();
}
switch (out_bpp)
{
case 1:
convert_linear_swizzle<u8>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
break;
case 2:
convert_linear_swizzle<u16>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
break;
case 4:
convert_linear_swizzle<u32>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
break;
}
//pixels_src = swizzled_pixels;
// TODO: Handle Clipping/Image out when swizzled
std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height);
return;
}
// clip if necessary
if (method_registers[NV3089_CLIP_SIZE] != method_registers[NV3089_IMAGE_OUT_SIZE] ||
method_registers[NV3089_CLIP_POINT] || method_registers[NV3089_IMAGE_OUT_POINT])
{
// Note: There are cases currently where the if statement above is true, but this for loop doesn't hit, leading to nothing getting copied to pixels_dst
// Currently it seems needed to avoid some errors/crashes
for (s32 y = (method_registers[NV3089_CLIP_POINT] >> 16), dst_y = (method_registers[NV3089_IMAGE_OUT_POINT] >> 16); y < out_h; y++, dst_y++)
{
if (dst_y >= 0 && dst_y < method_registers[NV3089_IMAGE_OUT_SIZE] >> 16)
{
// destination line
u8* dst_line = pixels_dst + dst_y * out_bpp * (method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff)
+ std::min<s32>(std::max<s32>(method_registers[NV3089_IMAGE_OUT_POINT] & 0xffff, 0), method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff);
size_t dst_max = std::min<s32>(
std::max<s32>((s32)(method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff) - (method_registers[NV3089_IMAGE_OUT_POINT] & 0xffff), 0),
method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff) * out_bpp;
if (y >= 0 && y < std::min<s32>(method_registers[NV3089_CLIP_SIZE] >> 16, out_h))
{
// source line
u8* src_line = pixels_src + y * out_bpp * out_w +
std::min<s32>(std::max<s32>(method_registers[NV3089_CLIP_POINT] & 0xffff, 0), method_registers[NV3089_CLIP_SIZE] & 0xffff);
size_t src_max = std::min<s32>(
std::max<s32>((s32)(method_registers[NV3089_CLIP_SIZE] & 0xffff) - (method_registers[NV3089_CLIP_POINT] & 0xffff), 0),
method_registers[NV3089_CLIP_SIZE] & 0xffff) * out_bpp;
std::pair<u8*, size_t>
z0 = { src_line + 0, std::min<size_t>(dst_max, std::max<s64>(0, method_registers[NV3089_CLIP_POINT] & 0xffff)) },
d0 = { src_line + z0.second, std::min<size_t>(dst_max - z0.second, src_max) },
z1 = { src_line + d0.second, dst_max - z0.second - d0.second };
std::memset(z0.first, 0, z0.second);
std::memcpy(d0.first, src_line, d0.second);
std::memset(z1.first, 0, z1.second);
}
else
{
std::memset(dst_line, 0, dst_max);
}
}
}
}
else
{
std::memcpy(pixels_dst, pixels_src, out_w * out_h * out_bpp);
}
}
}
namespace nv0039
{
force_inline void buffer_notify(u32 arg)
{
const u32 inPitch = method_registers[NV0039_PITCH_IN];
const u32 outPitch = method_registers[NV0039_PITCH_OUT];
const u32 lineLength = method_registers[NV0039_LINE_LENGTH_IN];
const u32 lineCount = method_registers[NV0039_LINE_COUNT];
const u8 outFormat = method_registers[NV0039_FORMAT] >> 8;
const u8 inFormat = method_registers[NV0039_FORMAT];
const u32 notify = arg;
// The existing GCM commands use only the value 0x1 for inFormat and outFormat
if (inFormat != 0x01 || outFormat != 0x01)
{
LOG_ERROR(RSX, "NV0039_OFFSET_IN: Unsupported format: inFormat=%d, outFormat=%d", inFormat, outFormat);
}
if (lineCount == 1 && !inPitch && !outPitch && !notify)
{
std::memcpy(
vm::base(get_address(method_registers[NV0039_OFFSET_OUT], method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_OUT])),
vm::base(get_address(method_registers[NV0039_OFFSET_IN], method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_IN])),
lineLength);
}
else
{
LOG_ERROR(RSX, "NV0039_OFFSET_IN: bad offset(in=0x%x, out=0x%x), pitch(in=0x%x, out=0x%x), line(len=0x%x, cnt=0x%x), fmt(in=0x%x, out=0x%x), notify=0x%x",
method_registers[NV0039_OFFSET_IN], method_registers[NV0039_OFFSET_OUT], inPitch, outPitch, lineLength, lineCount, inFormat, outFormat, notify);
}
}
}
void flip_command(thread* rsx, u32 arg)
{
if (user_asked_for_frame_capture)
{
rsx->capture_current_frame = true;
user_asked_for_frame_capture = false;
frame_debug.reset();
}
else if (rsx->capture_current_frame)
{
rsx->capture_current_frame = false;
Emu.Pause();
}
rsx->gcm_current_buffer = arg;
rsx->flip(arg);
// After each flip PS3 system is executing a routine that changes registers value to some default.
// Some game use this default state (SH3).
rsx->reset();
rsx->last_flip_time = get_system_time() - 1000000;
rsx->gcm_current_buffer = arg;
rsx->flip_status = 0;
if (rsx->flip_handler)
{
Emu.GetCallbackManager().Async([func = rsx->flip_handler](PPUThread& ppu)
{
func(ppu, 1);
});
}
rsx->sem_flip.post_and_wait();
//sync
double limit;
switch (rpcs3::state.config.rsx.frame_limit.value())
{
case rsx_frame_limit::_50: limit = 50.; break;
case rsx_frame_limit::_59_94: limit = 59.94; break;
case rsx_frame_limit::_30: limit = 30.; break;
case rsx_frame_limit::_60: limit = 60.; break;
case rsx_frame_limit::Auto: limit = rsx->fps_limit; break; //TODO
case rsx_frame_limit::Off:
default:
return;
}
std::this_thread::sleep_for(std::chrono::milliseconds((s64)(1000.0 / limit - rsx->timer_sync.GetElapsedTimeInMilliSec())));
rsx->timer_sync.Start();
rsx->local_transform_constants.clear();
}
void user_command(thread* rsx, u32 arg)
{
if (rsx->user_handler)
{
Emu.GetCallbackManager().Async([func = rsx->user_handler, arg](PPUThread& ppu)
{
func(ppu, arg);
});
}
else
{
throw EXCEPTION("User handler not set");
}
}
struct __rsx_methods_t
{
using rsx_impl_method_t = void(*)(u32);
template<rsx_method_t impl_func>
force_inline static void call_impl_func(thread *rsx, u32 arg)
{
impl_func(rsx, arg);
}
template<rsx_impl_method_t impl_func>
force_inline static void call_impl_func(thread *rsx, u32 arg)
{
impl_func(arg);
}
template<int id, typename T, T impl_func>
static void wrapper(thread *rsx, u32 arg)
{
// try process using gpu
if (rsx->do_method(id, arg))
{
if (rsx->capture_current_frame && id == NV4097_CLEAR_SURFACE)
rsx->capture_frame("clear");
return;
}
// not handled by renderer
// try process using cpu
if (impl_func != nullptr)
call_impl_func<impl_func>(rsx, arg);
}
template<int id, int step, int count, template<u32> class T, int index = 0>
struct bind_range_impl_t
{
force_inline static void impl()
{
bind_range_impl_t<id + step, step, count, T, index + 1>::impl();
bind<id, T<index>::impl>();
}
};
template<int id, int step, int count, template<u32> class T>
struct bind_range_impl_t<id, step, count, T, count>
{
force_inline static void impl()
{
}
};
template<int id, int step, int count, template<u32> class T, int index = 0>
force_inline static void bind_range()
{
bind_range_impl_t<id, step, count, T, index>::impl();
}
[[noreturn]] never_inline static void bind_redefinition_error(int id)
{
throw EXCEPTION("RSX method implementation redefinition (0x%04x)", id);
}
template<int id, typename T, T impl_func>
static void bind_impl()
{
if (methods[id])
{
bind_redefinition_error(id);
}
methods[id] = wrapper<id, T, impl_func>;
}
template<int id, typename T, T impl_func>
static void bind_cpu_only_impl()
{
if (methods[id])
{
bind_redefinition_error(id);
}
methods[id] = call_impl_func<impl_func>;
}
template<int id, rsx_impl_method_t impl_func> static void bind() { bind_impl<id, rsx_impl_method_t, impl_func>(); }
template<int id, rsx_method_t impl_func = nullptr> static void bind() { bind_impl<id, rsx_method_t, impl_func>(); }
//do not try process on gpu
template<int id, rsx_impl_method_t impl_func> static void bind_cpu_only() { bind_cpu_only_impl<id, rsx_impl_method_t, impl_func>(); }
//do not try process on gpu
template<int id, rsx_method_t impl_func = nullptr> static void bind_cpu_only() { bind_cpu_only_impl<id, rsx_method_t, impl_func>(); }
__rsx_methods_t()
{
// NV406E
bind_cpu_only<NV406E_SET_REFERENCE, nv406e::set_reference>();
bind<NV406E_SEMAPHORE_ACQUIRE, nv406e::semaphore_acquire>();
bind<NV406E_SEMAPHORE_RELEASE, nv406e::semaphore_release>();
// NV4097
bind<NV4097_TEXTURE_READ_SEMAPHORE_RELEASE, nv4097::texture_read_semaphore_release>();
bind<NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE, nv4097::back_end_write_semaphore_release>();
bind<NV4097_SET_BEGIN_END, nv4097::set_begin_end>();
bind<NV4097_CLEAR_SURFACE>();
bind<NV4097_DRAW_ARRAYS, nv4097::draw_arrays>();
bind<NV4097_DRAW_INDEX_ARRAY, nv4097::draw_index_array>();
bind<NV4097_INLINE_ARRAY, nv4097::draw_inline_array>();
bind_range<NV4097_SET_VERTEX_DATA_ARRAY_FORMAT, 1, 16, nv4097::set_vertex_data_array_format>();
bind_range<NV4097_SET_VERTEX_DATA4UB_M, 1, 16, nv4097::set_vertex_data4ub_m>();
bind_range<NV4097_SET_VERTEX_DATA1F_M, 1, 16, nv4097::set_vertex_data1f_m>();
bind_range<NV4097_SET_VERTEX_DATA2F_M + 1, 2, 16, nv4097::set_vertex_data2f_m>();
bind_range<NV4097_SET_VERTEX_DATA3F_M + 2, 3, 16, nv4097::set_vertex_data3f_m>();
bind_range<NV4097_SET_VERTEX_DATA4F_M + 3, 4, 16, nv4097::set_vertex_data4f_m>();
bind_range<NV4097_SET_VERTEX_DATA2S_M, 1, 16, nv4097::set_vertex_data2s_m>();
bind_range<NV4097_SET_VERTEX_DATA4S_M + 1, 2, 16, nv4097::set_vertex_data4s_m>();
bind_range<NV4097_SET_TRANSFORM_CONSTANT, 1, 32, nv4097::set_transform_constant>();
bind_range<NV4097_SET_TRANSFORM_PROGRAM + 3, 4, 128, nv4097::set_transform_program>();
bind_cpu_only<NV4097_GET_REPORT, nv4097::get_report>();
bind_cpu_only<NV4097_CLEAR_REPORT_VALUE, nv4097::clear_report_value>();
//NV308A
bind_range<NV308A_COLOR, 1, 256, nv308a::color>();
bind_range<NV308A_COLOR + 256, 1, 512, nv308a::color, 256>();
//NV3089
bind<NV3089_IMAGE_IN, nv3089::image_in>();
//NV0039
bind<NV0039_BUFFER_NOTIFY, nv0039::buffer_notify>();
// custom methods
bind_cpu_only<GCM_FLIP_COMMAND, flip_command>();
bind_cpu_only<GCM_SET_USER_COMMAND, user_command>();
}
} __rsx_methods;
std::string shaders_cache::path_to_root()
{
return fs::get_executable_dir() + "data/";
@ -935,6 +139,108 @@ namespace rsx
}
}
void tiled_region::write(const void *src, u32 width, u32 height, u32 pitch)
{
if (!tile)
{
memcpy(ptr, src, height * pitch);
return;
}
u32 offset_x = base % tile->pitch;
u32 offset_y = base / tile->pitch;
switch (tile->comp)
{
case CELL_GCM_COMPMODE_C32_2X1:
case CELL_GCM_COMPMODE_DISABLED:
for (int y = 0; y < height; ++y)
{
memcpy(ptr + (offset_y + y) * tile->pitch + offset_x, (u8*)src + pitch * y, pitch);
}
break;
/*
case CELL_GCM_COMPMODE_C32_2X1:
for (u32 y = 0; y < height; ++y)
{
for (u32 x = 0; x < width; ++x)
{
u32 value = *(u32*)((u8*)src + pitch * y + x * sizeof(u32));
*(u32*)(ptr + (offset_y + y) * tile->pitch + offset_x + (x * 2 + 0) * sizeof(u32)) = value;
*(u32*)(ptr + (offset_y + y) * tile->pitch + offset_x + (x * 2 + 1) * sizeof(u32)) = value;
}
}
break;
*/
case CELL_GCM_COMPMODE_C32_2X2:
for (u32 y = 0; y < height; ++y)
{
for (u32 x = 0; x < width; ++x)
{
u32 value = *(u32*)((u8*)src + pitch * y + x * sizeof(u32));
*(u32*)(ptr + (offset_y + y * 2 + 0) * tile->pitch + offset_x + (x * 2 + 0) * sizeof(u32)) = value;
*(u32*)(ptr + (offset_y + y * 2 + 0) * tile->pitch + offset_x + (x * 2 + 1) * sizeof(u32)) = value;
*(u32*)(ptr + (offset_y + y * 2 + 1) * tile->pitch + offset_x + (x * 2 + 0) * sizeof(u32)) = value;
*(u32*)(ptr + (offset_y + y * 2 + 1) * tile->pitch + offset_x + (x * 2 + 1) * sizeof(u32)) = value;
}
}
break;
default:
throw;
}
}
void tiled_region::read(void *dst, u32 width, u32 height, u32 pitch)
{
if (!tile)
{
memcpy(dst, ptr, height * pitch);
return;
}
u32 offset_x = base % tile->pitch;
u32 offset_y = base / tile->pitch;
switch (tile->comp)
{
case CELL_GCM_COMPMODE_C32_2X1:
case CELL_GCM_COMPMODE_DISABLED:
for (int y = 0; y < height; ++y)
{
memcpy((u8*)dst + pitch * y, ptr + (offset_y + y) * tile->pitch + offset_x, pitch);
}
break;
/*
case CELL_GCM_COMPMODE_C32_2X1:
for (u32 y = 0; y < height; ++y)
{
for (u32 x = 0; x < width; ++x)
{
u32 value = *(u32*)(ptr + (offset_y + y) * tile->pitch + offset_x + (x * 2 + 0) * sizeof(u32));
*(u32*)((u8*)dst + pitch * y + x * sizeof(u32)) = value;
}
}
break;
*/
case CELL_GCM_COMPMODE_C32_2X2:
for (u32 y = 0; y < height; ++y)
{
for (u32 x = 0; x < width; ++x)
{
u32 value = *(u32*)(ptr + (offset_y + y * 2 + 0) * tile->pitch + offset_x + (x * 2 + 0) * sizeof(u32));
*(u32*)((u8*)dst + pitch * y + x * sizeof(u32)) = value;
}
}
break;
default:
throw;
}
}
void thread::load_vertex_data(u32 first, u32 count)
{
vertex_draw_count += count;
@ -1044,14 +350,19 @@ namespace rsx
void thread::end()
{
vertex_index_array.clear();
for (auto &vertex_array : vertex_arrays)
{
vertex_array.clear();
}
transform_constants.clear();
if (capture_current_frame)
{
capture_frame("Draw " + std::to_string(vertex_draw_count));
}
}
void thread::on_task()
{
@ -1318,13 +629,20 @@ namespace rsx
// Reset vertex attrib array
for (int i = 0; i < limits::vertex_count; i++)
{
vertex_arrays_info[i].size = 0;
}
// Construct Textures
for (int i = 0; i < limits::textures_count; i++)
{
textures[i].init(i);
}
for (int i = 0; i < limits::vertex_textures_count; i++)
{
vertex_textures[i].init(i);
}
}
void thread::init(const u32 ioAddress, const u32 ioSize, const u32 ctrlAddress, const u32 localAddress)
@ -1341,6 +659,40 @@ namespace rsx
start();
}
GcmTileInfo *thread::find_tile(u32 offset, u32 location)
{
for (GcmTileInfo &tile : tiles)
{
if (!tile.binded || tile.location != location)
{
continue;
}
if (offset >= tile.offset && offset < tile.offset + tile.size)
{
return &tile;
}
}
return nullptr;
}
tiled_region thread::get_tiled_address(u32 offset, u32 location)
{
u32 address = get_address(offset, location);
GcmTileInfo *tile = find_tile(offset, location);
u32 base = 0;
if (tile)
{
base = offset - tile->offset;
address = get_address(tile->offset, location);
}
return{ address, base, tile, (u8*)vm::base(address) };
}
u32 thread::ReadIO32(u32 addr)
{
u32 value;

View File

@ -148,158 +148,20 @@ namespace rsx
static std::string path_to_root();
};
//TODO
union alignas(4) method_registers_t
{
u8 _u8[0x10000];
u32 _u32[0x10000 >> 2];
/*
struct alignas(4)
{
u8 pad[NV4097_SET_TEXTURE_OFFSET - 4];
struct alignas(4) texture_t
{
u32 offset;
union format_t
{
u32 _u32;
struct
{
u32: 1;
u32 location : 1;
u32 cubemap : 1;
u32 border_type : 1;
u32 dimension : 4;
u32 format : 8;
u32 mipmap : 16;
};
} format;
union address_t
{
u32 _u32;
struct
{
u32 wrap_s : 4;
u32 aniso_bias : 4;
u32 wrap_t : 4;
u32 unsigned_remap : 4;
u32 wrap_r : 4;
u32 gamma : 4;
u32 signed_remap : 4;
u32 zfunc : 4;
};
} address;
u32 control0;
u32 control1;
u32 filter;
u32 image_rect;
u32 border_color;
} textures[limits::textures_count];
};
*/
u32& operator[](int index)
{
return _u32[index >> 2];
}
};
extern u32 method_registers[0x10000 >> 2];
u32 get_vertex_type_size(u32 type);
u32 get_address(u32 offset, u32 location);
template<typename T>
void pad_texture(void* inputPixels, void* outputPixels, u16 inputWidth, u16 inputHeight, u16 outputWidth, u16 outputHeight)
struct tiled_region
{
T *src, *dst;
src = static_cast<T*>(inputPixels);
dst = static_cast<T*>(outputPixels);
u32 address;
u32 base;
GcmTileInfo *tile;
u8 *ptr;
for (u16 h = 0; h < inputHeight; ++h)
{
const u32 padded_pos = h * outputWidth;
const u32 pos = h * inputWidth;
for (u16 w = 0; w < inputWidth; ++w)
{
dst[padded_pos + w] = src[pos + w];
}
}
}
/* Note: What the ps3 calls swizzling in this case is actually z-ordering / morton ordering of pixels
* - Input can be swizzled or linear, bool flag handles conversion to and from
* - It will handle any width and height that are a power of 2, square or non square
* Restriction: It has mixed results if the height or width is not a power of 2
*/
template<typename T>
void convert_linear_swizzle(void* inputPixels, void* outputPixels, u16 width, u16 height, bool inputIsSwizzled)
{
u32 log2width, log2height;
log2width = log2(width);
log2height = log2(height);
// Max mask possible for square texture
u32 x_mask = 0x55555555;
u32 y_mask = 0xAAAAAAAA;
// We have to limit the masks to the lower of the two dimensions to allow for non-square textures
u32 limit_mask = (log2width < log2height) ? log2width : log2height;
// double the limit mask to account for bits in both x and y
limit_mask = 1 << (limit_mask << 1);
//x_mask, bits above limit are 1's for x-carry
x_mask = (x_mask | ~(limit_mask - 1));
//y_mask. bits above limit are 0'd, as we use a different method for y-carry over
y_mask = (y_mask & (limit_mask - 1));
u32 offs_y = 0;
u32 offs_x = 0;
u32 offs_x0 = 0; //total y-carry offset for x
u32 y_incr = limit_mask;
T *src, *dst;
if (!inputIsSwizzled)
{
for (int y = 0; y < height; ++y)
{
src = static_cast<T*>(inputPixels) + y*width;
dst = static_cast<T*>(outputPixels) + offs_y;
offs_x = offs_x0;
for (int x = 0; x < width; ++x)
{
dst[offs_x] = src[x];
offs_x = (offs_x - x_mask) & x_mask;
}
offs_y = (offs_y - y_mask) & y_mask;
if (offs_y == 0) offs_x0 += y_incr;
}
}
else
{
for (int y = 0; y < height; ++y)
{
src = static_cast<T*>(inputPixels) + offs_y;
dst = static_cast<T*>(outputPixels) + y*width;
offs_x = offs_x0;
for (int x = 0; x < width; ++x)
{
dst[x] = src[offs_x];
offs_x = (offs_x - x_mask) & x_mask;
}
offs_y = (offs_y - y_mask) & y_mask;
if (offs_y == 0) offs_x0 += y_incr;
}
}
}
void write(const void *src, u32 width, u32 height, u32 pitch);
void read(void *dst, u32 width, u32 height, u32 pitch);
};
struct surface_info
{
@ -504,6 +366,9 @@ namespace rsx
void reset();
void init(const u32 ioAddress, const u32 ioSize, const u32 ctrlAddress, const u32 localAddress);
tiled_region get_tiled_address(u32 offset, u32 location);
GcmTileInfo *find_tile(u32 offset, u32 location);
u32 ReadIO32(u32 addr);
void WriteIO32(u32 addr, u32 value);
};

View File

@ -0,0 +1,853 @@
#include "stdafx.h"
#include "rsx_methods.h"
#include "RSXThread.h"
#include "Emu/Memory/Memory.h"
#include "Emu/System.h"
#include "Emu/state.h"
#include "rsx_utils.h"
#include "Emu/SysCalls/Callback.h"
#include "Emu/SysCalls/CB_FUNC.h"
namespace rsx
{
u32 method_registers[0x10000 >> 2];
rsx_method_t methods[0x10000 >> 2]{};
template<typename Type> struct vertex_data_type_from_element_type;
template<> struct vertex_data_type_from_element_type<float> { enum { type = CELL_GCM_VERTEX_F }; };
template<> struct vertex_data_type_from_element_type<f16> { enum { type = CELL_GCM_VERTEX_SF }; };
template<> struct vertex_data_type_from_element_type<u8> { enum { type = CELL_GCM_VERTEX_UB }; };
template<> struct vertex_data_type_from_element_type<u16> { enum { type = CELL_GCM_VERTEX_S1 }; };
namespace nv406e
{
force_inline void set_reference(thread* rsx, u32 arg)
{
rsx->ctrl->ref.exchange(arg);
}
force_inline void semaphore_acquire(thread* rsx, u32 arg)
{
//TODO: dma
while (vm::ps3::read32(rsx->label_addr + method_registers[NV406E_SEMAPHORE_OFFSET]) != arg)
{
if (Emu.IsStopped())
break;
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
}
force_inline void semaphore_release(thread* rsx, u32 arg)
{
//TODO: dma
vm::ps3::write32(rsx->label_addr + method_registers[NV406E_SEMAPHORE_OFFSET], arg);
}
}
namespace nv4097
{
force_inline void texture_read_semaphore_release(thread* rsx, u32 arg)
{
//TODO: dma
vm::ps3::write32(rsx->label_addr + method_registers[NV4097_SET_SEMAPHORE_OFFSET], arg);
}
force_inline void back_end_write_semaphore_release(thread* rsx, u32 arg)
{
//TODO: dma
vm::ps3::write32(rsx->label_addr + method_registers[NV4097_SET_SEMAPHORE_OFFSET],
(arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff));
}
//fire only when all data passed to rsx cmd buffer
template<u32 id, u32 index, int count, typename type>
force_inline void set_vertex_data_impl(thread* rsx, u32 arg)
{
static const size_t element_size = (count * sizeof(type));
static const size_t element_size_in_words = element_size / sizeof(u32);
auto& info = rsx->register_vertex_info[index];
info.type = vertex_data_type_from_element_type<type>::type;
info.size = count;
info.frequency = 0;
info.stride = 0;
auto& entry = rsx->register_vertex_data[index];
//find begin of data
size_t begin = id + index * element_size_in_words;
size_t position = 0;//entry.size();
entry.resize(position + element_size);
memcpy(entry.data() + position, method_registers + begin, element_size);
}
template<u32 index>
struct set_vertex_data4ub_m
{
force_inline static void impl(thread* rsx, u32 arg)
{
set_vertex_data_impl<NV4097_SET_VERTEX_DATA4UB_M, index, 4, u8>(rsx, arg);
}
};
template<u32 index>
struct set_vertex_data1f_m
{
force_inline static void impl(thread* rsx, u32 arg)
{
set_vertex_data_impl<NV4097_SET_VERTEX_DATA1F_M, index, 1, f32>(rsx, arg);
}
};
template<u32 index>
struct set_vertex_data2f_m
{
force_inline static void impl(thread* rsx, u32 arg)
{
set_vertex_data_impl<NV4097_SET_VERTEX_DATA2F_M, index, 2, f32>(rsx, arg);
}
};
template<u32 index>
struct set_vertex_data3f_m
{
force_inline static void impl(thread* rsx, u32 arg)
{
set_vertex_data_impl<NV4097_SET_VERTEX_DATA3F_M, index, 3, f32>(rsx, arg);
}
};
template<u32 index>
struct set_vertex_data4f_m
{
force_inline static void impl(thread* rsx, u32 arg)
{
set_vertex_data_impl<NV4097_SET_VERTEX_DATA4F_M, index, 4, f32>(rsx, arg);
}
};
template<u32 index>
struct set_vertex_data2s_m
{
force_inline static void impl(thread* rsx, u32 arg)
{
set_vertex_data_impl<NV4097_SET_VERTEX_DATA2S_M, index, 2, u16>(rsx, arg);
}
};
template<u32 index>
struct set_vertex_data4s_m
{
force_inline static void impl(thread* rsx, u32 arg)
{
set_vertex_data_impl<NV4097_SET_VERTEX_DATA4S_M, index, 4, u16>(rsx, arg);
}
};
template<u32 index>
struct set_vertex_data_array_format
{
force_inline static void impl(thread* rsx, u32 arg)
{
auto& info = rsx->vertex_arrays_info[index];
info.unpack_array(arg);
}
};
force_inline void draw_arrays(thread* rsx, u32 arg)
{
rsx->draw_command = thread::Draw_command::draw_command_array;
u32 first = arg & 0xffffff;
u32 count = (arg >> 24) + 1;
rsx->load_vertex_data(first, count);
}
force_inline void draw_index_array(thread* rsx, u32 arg)
{
rsx->draw_command = thread::Draw_command::draw_command_indexed;
u32 first = arg & 0xffffff;
u32 count = (arg >> 24) + 1;
rsx->load_vertex_data(first, count);
rsx->load_vertex_index_data(first, count);
}
force_inline void draw_inline_array(thread* rsx, u32 arg)
{
rsx->draw_command = thread::Draw_command::draw_command_inlined_array;
rsx->draw_inline_vertex_array = true;
rsx->inline_vertex_array.push_back(arg);
}
template<u32 index>
struct set_transform_constant
{
force_inline static void impl(thread* rsxthr, u32 arg)
{
u32 load = method_registers[NV4097_SET_TRANSFORM_CONSTANT_LOAD];
static const size_t count = 4;
static const size_t size = count * sizeof(f32);
size_t reg = index / 4;
size_t subreg = index % 4;
memcpy(rsxthr->transform_constants[load + reg].rgba + subreg, method_registers + NV4097_SET_TRANSFORM_CONSTANT + reg * count + subreg, sizeof(f32));
}
};
template<u32 index>
struct set_transform_program
{
force_inline static void impl(thread* rsx, u32 arg)
{
u32& load = method_registers[NV4097_SET_TRANSFORM_PROGRAM_LOAD];
static const size_t count = 4;
static const size_t size = count * sizeof(u32);
memcpy(rsx->transform_program + load++ * count, method_registers + NV4097_SET_TRANSFORM_PROGRAM + index * count, size);
}
};
force_inline void set_begin_end(thread* rsx, u32 arg)
{
if (arg)
{
rsx->draw_inline_vertex_array = false;
rsx->inline_vertex_array.clear();
rsx->begin();
return;
}
if (!rsx->vertex_draw_count)
{
bool has_array = false;
for (int i = 0; i < rsx::limits::vertex_count; ++i)
{
if (rsx->vertex_arrays_info[i].size > 0)
{
has_array = true;
break;
}
}
if (!has_array)
{
u32 min_count = ~0;
for (int i = 0; i < rsx::limits::vertex_count; ++i)
{
if (!rsx->register_vertex_info[i].size)
continue;
u32 count = u32(rsx->register_vertex_data[i].size()) /
rsx::get_vertex_type_size(rsx->register_vertex_info[i].type) * rsx->register_vertex_info[i].size;
if (count < min_count)
min_count = count;
}
if (min_count && min_count < ~0)
{
rsx->vertex_draw_count = min_count;
}
}
}
rsx->end();
rsx->vertex_draw_count = 0;
}
force_inline void get_report(thread* rsx, u32 arg)
{
u8 type = arg >> 24;
u32 offset = arg & 0xffffff;
//TODO: use DMA
vm::ps3::ptr<CellGcmReportData> result = { rsx->local_mem_addr + offset, vm::addr };
result->timer = rsx->timestamp();
switch (type)
{
case CELL_GCM_ZPASS_PIXEL_CNT:
case CELL_GCM_ZCULL_STATS:
case CELL_GCM_ZCULL_STATS1:
case CELL_GCM_ZCULL_STATS2:
case CELL_GCM_ZCULL_STATS3:
result->value = 0;
LOG_WARNING(RSX, "NV4097_GET_REPORT: Unimplemented type %d", type);
break;
default:
result->value = 0;
LOG_ERROR(RSX, "NV4097_GET_REPORT: Bad type %d", type);
break;
}
//result->padding = 0;
}
force_inline void clear_report_value(thread* rsx, u32 arg)
{
switch (arg)
{
case CELL_GCM_ZPASS_PIXEL_CNT:
LOG_WARNING(RSX, "TODO: NV4097_CLEAR_REPORT_VALUE: ZPASS_PIXEL_CNT");
break;
case CELL_GCM_ZCULL_STATS:
LOG_WARNING(RSX, "TODO: NV4097_CLEAR_REPORT_VALUE: ZCULL_STATS");
break;
default:
LOG_ERROR(RSX, "NV4097_CLEAR_REPORT_VALUE: Bad type: %d", arg);
break;
}
}
}
namespace nv308a
{
template<u32 index>
struct color
{
force_inline static void impl(u32 arg)
{
u32 point = method_registers[NV308A_POINT];
u16 x = point;
u16 y = point >> 16;
if (y)
{
LOG_ERROR(RSX, "%s: y is not null (0x%x)", __FUNCTION__, y);
}
u32 address = get_address(method_registers[NV3062_SET_OFFSET_DESTIN] + (x << 2) + index * 4, method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN]);
vm::ps3::write32(address, arg);
}
};
}
namespace nv3089
{
never_inline void image_in(thread *rsx, u32 arg)
{
u32 operation = method_registers[NV3089_SET_OPERATION];
u32 clip_x = method_registers[NV3089_CLIP_POINT] & 0xffff;
u32 clip_y = method_registers[NV3089_CLIP_POINT] >> 16;
u32 clip_w = method_registers[NV3089_CLIP_SIZE] & 0xffff;
u32 clip_h = method_registers[NV3089_CLIP_SIZE] >> 16;
u32 out_x = method_registers[NV3089_IMAGE_OUT_POINT] & 0xffff;
u32 out_y = method_registers[NV3089_IMAGE_OUT_POINT] >> 16;
u32 out_w = method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff;
u32 out_h = method_registers[NV3089_IMAGE_OUT_SIZE] >> 16;
u16 in_w = method_registers[NV3089_IMAGE_IN_SIZE];
u16 in_h = method_registers[NV3089_IMAGE_IN_SIZE] >> 16;
u16 in_pitch = method_registers[NV3089_IMAGE_IN_FORMAT];
u8 in_origin = method_registers[NV3089_IMAGE_IN_FORMAT] >> 16;
u8 in_inter = method_registers[NV3089_IMAGE_IN_FORMAT] >> 24;
u32 src_color_format = method_registers[NV3089_SET_COLOR_FORMAT];
f32 in_x = (method_registers[NV3089_IMAGE_IN] & 0xffff) / 16.f;
f32 in_y = (method_registers[NV3089_IMAGE_IN] >> 16) / 16.f;
if (in_origin != CELL_GCM_TRANSFER_ORIGIN_CORNER)
{
LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown origin (%d)", in_origin);
}
if (in_inter != CELL_GCM_TRANSFER_INTERPOLATOR_ZOH && in_inter != CELL_GCM_TRANSFER_INTERPOLATOR_FOH)
{
LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown inter (%d)", in_inter);
}
if (operation != CELL_GCM_TRANSFER_OPERATION_SRCCOPY)
{
LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown operation (%d)", operation);
}
const u32 src_offset = method_registers[NV3089_IMAGE_IN_OFFSET];
const u32 src_dma = method_registers[NV3089_SET_CONTEXT_DMA_IMAGE];
u32 dst_offset;
u32 dst_dma = 0;
u16 dst_color_format;
u32 out_pitch = 0;
u32 out_aligment = 64;
switch (method_registers[NV3089_SET_CONTEXT_SURFACE])
{
case CELL_GCM_CONTEXT_SURFACE2D:
dst_dma = method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN];
dst_offset = method_registers[NV3062_SET_OFFSET_DESTIN];
dst_color_format = method_registers[NV3062_SET_COLOR_FORMAT];
out_pitch = method_registers[NV3062_SET_PITCH] >> 16;
out_aligment = method_registers[NV3062_SET_PITCH] & 0xffff;
break;
case CELL_GCM_CONTEXT_SWIZZLE2D:
dst_dma = method_registers[NV309E_SET_CONTEXT_DMA_IMAGE];
dst_offset = method_registers[NV309E_SET_OFFSET];
dst_color_format = method_registers[NV309E_SET_FORMAT];
break;
default:
LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown m_context_surface (0x%x)", method_registers[NV3089_SET_CONTEXT_SURFACE]);
return;
}
u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel
u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4;
u32 in_offset = u32(in_x * in_bpp + in_pitch * in_y);
u32 out_offset = out_x * out_bpp + out_pitch * out_y;
tiled_region src_region = rsx->get_tiled_address(src_offset + in_offset, src_dma & 0xf);//get_address(src_offset, src_dma);
u32 dst_address = get_address(dst_offset + out_offset, dst_dma);
if (out_pitch == 0)
{
out_pitch = out_bpp * out_w;
}
if (in_pitch == 0)
{
in_pitch = in_bpp * in_w;
}
if (clip_w > out_w)
{
clip_w = out_w;
}
if (clip_h > out_h)
{
clip_h = out_h;
}
//LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_address, dst_address);
u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr;
u8* pixels_dst = vm::ps3::_ptr<u8>(dst_address + out_offset);
if (dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 &&
dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_A8R8G8B8)
{
LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown dst_color_format (%d)", dst_color_format);
}
if (src_color_format != CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 &&
src_color_format != CELL_GCM_TRANSFER_SCALE_FORMAT_A8R8G8B8)
{
LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown src_color_format (%d)", src_color_format);
}
//LOG_WARNING(RSX, "NV3089_IMAGE_IN_SIZE: SIZE=0x%08x, pitch=0x%x, offset=0x%x, scaleX=%f, scaleY=%f, CLIP_SIZE=0x%08x, OUT_SIZE=0x%08x",
// method_registers[NV3089_IMAGE_IN_SIZE], in_pitch, src_offset, double(1 << 20) / (method_registers[NV3089_DS_DX]), double(1 << 20) / (method_registers[NV3089_DT_DY]),
// method_registers[NV3089_CLIP_SIZE], method_registers[NV3089_IMAGE_OUT_SIZE]);
std::unique_ptr<u8[]> temp1, temp2;
AVPixelFormat in_format = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
AVPixelFormat out_format = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
f32 scale_x = 1048576.f / method_registers[NV3089_DS_DX];
f32 scale_y = 1048576.f / method_registers[NV3089_DT_DY];
u32 convert_w = (u32)(scale_x * in_w);
u32 convert_h = (u32)(scale_y * in_h);
bool need_clip =
method_registers[NV3089_CLIP_SIZE] != method_registers[NV3089_IMAGE_IN_SIZE] ||
method_registers[NV3089_CLIP_POINT] || convert_w != out_w || convert_h != out_h;
bool need_convert = out_format != in_format || scale_x != 1.0 || scale_y != 1.0;
u32 slice_h = clip_h;
if (src_region.tile)
{
if (src_region.tile->comp == CELL_GCM_COMPMODE_C32_2X2)
{
slice_h *= 2;
}
u32 size = slice_h * in_pitch;
if (size > src_region.tile->size - src_region.base)
{
u32 diff = size - (src_region.tile->size - src_region.base);
slice_h -= diff / in_pitch + (diff % in_pitch ? 1 : 0);
}
}
if (method_registers[NV3089_SET_CONTEXT_SURFACE] != CELL_GCM_CONTEXT_SWIZZLE2D)
{
if (need_convert || need_clip)
{
if (need_clip)
{
if (need_convert)
{
convert_scale_image(temp1, out_format, convert_w, convert_h, out_pitch,
pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
clip_image(pixels_dst + out_offset, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch);
}
else
{
clip_image(pixels_dst + out_offset, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch);
}
}
else
{
convert_scale_image(pixels_dst + out_offset, out_format, out_w, out_h, out_pitch,
pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
}
}
else
{
if (out_pitch != in_pitch || out_pitch != out_bpp * out_w)
{
for (u32 y = 0; y < out_h; ++y)
{
u8 *dst = pixels_dst + out_pitch * y;
u8 *src = pixels_src + in_pitch * y;
std::memmove(dst, src, out_w * out_bpp);
}
}
else
{
std::memmove(pixels_dst + out_offset, pixels_src, out_pitch * out_h);
}
}
}
else
{
if (need_convert || need_clip)
{
if (need_clip)
{
if (need_convert)
{
convert_scale_image(temp1, out_format, convert_w, convert_h, out_pitch,
pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
clip_image(temp2, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch);
}
else
{
clip_image(temp2, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch);
}
}
else
{
convert_scale_image(temp2, out_format, out_w, out_h, out_pitch,
pixels_src, in_format, in_w, in_h, in_pitch, clip_h, in_inter ? true : false);
}
pixels_src = temp2.get();
}
u8 sw_width_log2 = method_registers[NV309E_SET_FORMAT] >> 16;
u8 sw_height_log2 = method_registers[NV309E_SET_FORMAT] >> 24;
// 0 indicates height of 1 pixel
sw_height_log2 = sw_height_log2 == 0 ? 1 : sw_height_log2;
// swizzle based on destination size
u16 sw_width = 1 << sw_width_log2;
u16 sw_height = 1 << sw_height_log2;
temp2.reset(new u8[out_bpp * sw_width * sw_height]);
u8* linear_pixels = pixels_src;
u8* swizzled_pixels = temp2.get();
// Check and pad texture out if we are given non square texture for swizzle to be correct
if (sw_width != out_w || sw_height != out_h)
{
std::unique_ptr<u8[]> sw_temp(new u8[out_bpp * sw_width * sw_height]);
switch (out_bpp)
{
case 1:
pad_texture<u8>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height);
break;
case 2:
pad_texture<u16>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height);
break;
case 4:
pad_texture<u32>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height);
break;
}
linear_pixels = sw_temp.get();
}
switch (out_bpp)
{
case 1:
convert_linear_swizzle<u8>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
break;
case 2:
convert_linear_swizzle<u16>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
break;
case 4:
convert_linear_swizzle<u32>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
break;
}
std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height);
}
}
}
namespace nv0039
{
force_inline void buffer_notify(u32 arg)
{
const u32 inPitch = method_registers[NV0039_PITCH_IN];
const u32 outPitch = method_registers[NV0039_PITCH_OUT];
const u32 lineLength = method_registers[NV0039_LINE_LENGTH_IN];
const u32 lineCount = method_registers[NV0039_LINE_COUNT];
const u8 outFormat = method_registers[NV0039_FORMAT] >> 8;
const u8 inFormat = method_registers[NV0039_FORMAT];
const u32 notify = arg;
// The existing GCM commands use only the value 0x1 for inFormat and outFormat
if (inFormat != 0x01 || outFormat != 0x01)
{
LOG_ERROR(RSX, "NV0039_OFFSET_IN: Unsupported format: inFormat=%d, outFormat=%d", inFormat, outFormat);
}
if (lineCount == 1 && !inPitch && !outPitch && !notify)
{
std::memcpy(
vm::base(get_address(method_registers[NV0039_OFFSET_OUT], method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_OUT])),
vm::base(get_address(method_registers[NV0039_OFFSET_IN], method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_IN])),
lineLength);
}
else
{
LOG_ERROR(RSX, "NV0039_OFFSET_IN: bad offset(in=0x%x, out=0x%x), pitch(in=0x%x, out=0x%x), line(len=0x%x, cnt=0x%x), fmt(in=0x%x, out=0x%x), notify=0x%x",
method_registers[NV0039_OFFSET_IN], method_registers[NV0039_OFFSET_OUT], inPitch, outPitch, lineLength, lineCount, inFormat, outFormat, notify);
}
}
}
void flip_command(thread* rsx, u32 arg)
{
if (user_asked_for_frame_capture)
{
rsx->capture_current_frame = true;
user_asked_for_frame_capture = false;
frame_debug.reset();
}
else if (rsx->capture_current_frame)
{
rsx->capture_current_frame = false;
Emu.Pause();
}
rsx->gcm_current_buffer = arg;
rsx->flip(arg);
// After each flip PS3 system is executing a routine that changes registers value to some default.
// Some game use this default state (SH3).
rsx->reset();
rsx->last_flip_time = get_system_time() - 1000000;
rsx->gcm_current_buffer = arg;
rsx->flip_status = 0;
if (rsx->flip_handler)
{
Emu.GetCallbackManager().Async([func = rsx->flip_handler](PPUThread& ppu)
{
func(ppu, 1);
});
}
rsx->sem_flip.post_and_wait();
//sync
double limit;
switch (rpcs3::state.config.rsx.frame_limit.value())
{
case rsx_frame_limit::_50: limit = 50.; break;
case rsx_frame_limit::_59_94: limit = 59.94; break;
case rsx_frame_limit::_30: limit = 30.; break;
case rsx_frame_limit::_60: limit = 60.; break;
case rsx_frame_limit::Auto: limit = rsx->fps_limit; break; //TODO
case rsx_frame_limit::Off:
default:
return;
}
std::this_thread::sleep_for(std::chrono::milliseconds((s64)(1000.0 / limit - rsx->timer_sync.GetElapsedTimeInMilliSec())));
rsx->timer_sync.Start();
rsx->local_transform_constants.clear();
}
void user_command(thread* rsx, u32 arg)
{
if (rsx->user_handler)
{
Emu.GetCallbackManager().Async([func = rsx->user_handler, arg](PPUThread& ppu)
{
func(ppu, arg);
});
}
else
{
throw EXCEPTION("User handler not set");
}
}
struct __rsx_methods_t
{
using rsx_impl_method_t = void(*)(u32);
template<rsx_method_t impl_func>
force_inline static void call_impl_func(thread *rsx, u32 arg)
{
impl_func(rsx, arg);
}
template<rsx_impl_method_t impl_func>
force_inline static void call_impl_func(thread *rsx, u32 arg)
{
impl_func(arg);
}
template<int id, typename T, T impl_func>
static void wrapper(thread *rsx, u32 arg)
{
// try process using gpu
if (rsx->do_method(id, arg))
{
if (rsx->capture_current_frame && id == NV4097_CLEAR_SURFACE)
rsx->capture_frame("clear");
return;
}
// not handled by renderer
// try process using cpu
if (impl_func != nullptr)
call_impl_func<impl_func>(rsx, arg);
}
template<int id, int step, int count, template<u32> class T, int index = 0>
struct bind_range_impl_t
{
force_inline static void impl()
{
bind_range_impl_t<id + step, step, count, T, index + 1>::impl();
bind<id, T<index>::impl>();
}
};
template<int id, int step, int count, template<u32> class T>
struct bind_range_impl_t<id, step, count, T, count>
{
force_inline static void impl()
{
}
};
template<int id, int step, int count, template<u32> class T, int index = 0>
force_inline static void bind_range()
{
bind_range_impl_t<id, step, count, T, index>::impl();
}
[[noreturn]] never_inline static void bind_redefinition_error(int id)
{
throw EXCEPTION("RSX method implementation redefinition (0x%04x)", id);
}
template<int id, typename T, T impl_func>
static void bind_impl()
{
if (methods[id])
{
bind_redefinition_error(id);
}
methods[id] = wrapper<id, T, impl_func>;
}
template<int id, typename T, T impl_func>
static void bind_cpu_only_impl()
{
if (methods[id])
{
bind_redefinition_error(id);
}
methods[id] = call_impl_func<impl_func>;
}
template<int id, rsx_impl_method_t impl_func> static void bind() { bind_impl<id, rsx_impl_method_t, impl_func>(); }
template<int id, rsx_method_t impl_func = nullptr> static void bind() { bind_impl<id, rsx_method_t, impl_func>(); }
//do not try process on gpu
template<int id, rsx_impl_method_t impl_func> static void bind_cpu_only() { bind_cpu_only_impl<id, rsx_impl_method_t, impl_func>(); }
//do not try process on gpu
template<int id, rsx_method_t impl_func = nullptr> static void bind_cpu_only() { bind_cpu_only_impl<id, rsx_method_t, impl_func>(); }
__rsx_methods_t()
{
// NV406E
bind_cpu_only<NV406E_SET_REFERENCE, nv406e::set_reference>();
bind<NV406E_SEMAPHORE_ACQUIRE, nv406e::semaphore_acquire>();
bind<NV406E_SEMAPHORE_RELEASE, nv406e::semaphore_release>();
// NV4097
bind<NV4097_TEXTURE_READ_SEMAPHORE_RELEASE, nv4097::texture_read_semaphore_release>();
bind<NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE, nv4097::back_end_write_semaphore_release>();
bind<NV4097_SET_BEGIN_END, nv4097::set_begin_end>();
bind<NV4097_CLEAR_SURFACE>();
bind<NV4097_DRAW_ARRAYS, nv4097::draw_arrays>();
bind<NV4097_DRAW_INDEX_ARRAY, nv4097::draw_index_array>();
bind<NV4097_INLINE_ARRAY, nv4097::draw_inline_array>();
bind_range<NV4097_SET_VERTEX_DATA_ARRAY_FORMAT, 1, 16, nv4097::set_vertex_data_array_format>();
bind_range<NV4097_SET_VERTEX_DATA4UB_M, 1, 16, nv4097::set_vertex_data4ub_m>();
bind_range<NV4097_SET_VERTEX_DATA1F_M, 1, 16, nv4097::set_vertex_data1f_m>();
bind_range<NV4097_SET_VERTEX_DATA2F_M + 1, 2, 16, nv4097::set_vertex_data2f_m>();
bind_range<NV4097_SET_VERTEX_DATA3F_M + 2, 3, 16, nv4097::set_vertex_data3f_m>();
bind_range<NV4097_SET_VERTEX_DATA4F_M + 3, 4, 16, nv4097::set_vertex_data4f_m>();
bind_range<NV4097_SET_VERTEX_DATA2S_M, 1, 16, nv4097::set_vertex_data2s_m>();
bind_range<NV4097_SET_VERTEX_DATA4S_M + 1, 2, 16, nv4097::set_vertex_data4s_m>();
bind_range<NV4097_SET_TRANSFORM_CONSTANT, 1, 32, nv4097::set_transform_constant>();
bind_range<NV4097_SET_TRANSFORM_PROGRAM + 3, 4, 128, nv4097::set_transform_program>();
bind_cpu_only<NV4097_GET_REPORT, nv4097::get_report>();
bind_cpu_only<NV4097_CLEAR_REPORT_VALUE, nv4097::clear_report_value>();
//NV308A
bind_range<NV308A_COLOR, 1, 256, nv308a::color>();
bind_range<NV308A_COLOR + 256, 1, 512, nv308a::color, 256>();
//NV3089
bind<NV3089_IMAGE_IN, nv3089::image_in>();
//NV0039
bind<NV0039_BUFFER_NOTIFY, nv0039::buffer_notify>();
// custom methods
bind_cpu_only<GCM_FLIP_COMMAND, flip_command>();
bind_cpu_only<GCM_SET_USER_COMMAND, user_command>();
}
} __rsx_methods;
}

View File

@ -0,0 +1,69 @@
#pragma once
namespace rsx
{
//TODO
union alignas(4) method_registers_t
{
u8 _u8[0x10000];
u32 _u32[0x10000 >> 2];
/*
struct alignas(4)
{
u8 pad[NV4097_SET_TEXTURE_OFFSET - 4];
struct alignas(4) texture_t
{
u32 offset;
union format_t
{
u32 _u32;
struct
{
u32: 1;
u32 location : 1;
u32 cubemap : 1;
u32 border_type : 1;
u32 dimension : 4;
u32 format : 8;
u32 mipmap : 16;
};
} format;
union address_t
{
u32 _u32;
struct
{
u32 wrap_s : 4;
u32 aniso_bias : 4;
u32 wrap_t : 4;
u32 unsigned_remap : 4;
u32 wrap_r : 4;
u32 gamma : 4;
u32 signed_remap : 4;
u32 zfunc : 4;
};
} address;
u32 control0;
u32 control1;
u32 filter;
u32 image_rect;
u32 border_color;
} textures[limits::textures_count];
};
*/
u32& operator[](int index)
{
return _u32[index >> 2];
}
};
using rsx_method_t = void(*)(class thread*, u32);
extern u32 method_registers[0x10000 >> 2];
extern rsx_method_t methods[0x10000 >> 2];
}

View File

@ -0,0 +1,45 @@
#include "stdafx.h"
#include "rsx_utils.h"
extern "C"
{
#include "libswscale/swscale.h"
}
namespace rsx
{
void convert_scale_image(u8 *dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch,
const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear)
{
std::unique_ptr<SwsContext, void(*)(SwsContext*)> sws(sws_getContext(src_width, src_height, src_format,
dst_width, dst_height, dst_format, bilinear ? SWS_FAST_BILINEAR : SWS_POINT, NULL, NULL, NULL), sws_freeContext);
sws_scale(sws.get(), &src, &src_pitch, 0, src_slice_h, &dst, &dst_pitch);
}
void convert_scale_image(std::unique_ptr<u8[]>& dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch,
const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear)
{
dst.reset(new u8[dst_pitch * dst_height]);
convert_scale_image(dst.get(), dst_format, dst_width, dst_height, dst_pitch,
src, src_format, src_width, src_height, src_pitch, src_slice_h, bilinear);
}
void clip_image(u8 *dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch)
{
for (int y = 0; y < clip_h; ++y)
{
u8 *dst_row = dst + y * dst_pitch;
const u8 *src_row = src + (y + clip_y) * src_pitch + clip_x * bpp;
std::memmove(dst_row, src_row, clip_w * bpp);
}
}
void clip_image(std::unique_ptr<u8[]>& dst, const u8 *src,
int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch)
{
dst.reset(new u8[clip_h * dst_pitch]);
clip_image(dst.get(), src, clip_x, clip_y, clip_w, clip_h, bpp, src_pitch, dst_pitch);
}
}

111
rpcs3/Emu/RSX/rsx_utils.h Normal file
View File

@ -0,0 +1,111 @@
#pragma once
extern "C"
{
#include <libavutil/pixfmt.h>
}
namespace rsx
{
template<typename T>
void pad_texture(void* input_pixels, void* output_pixels, u16 input_width, u16 input_height, u16 output_width, u16 output_height)
{
T *src = static_cast<T*>(input_pixels);
T *dst = static_cast<T*>(output_pixels);
for (u16 h = 0; h < input_height; ++h)
{
const u32 padded_pos = h * output_width;
const u32 pos = h * input_width;
for (u16 w = 0; w < input_width; ++w)
{
dst[padded_pos + w] = src[pos + w];
}
}
}
/* Note: What the ps3 calls swizzling in this case is actually z-ordering / morton ordering of pixels
* - Input can be swizzled or linear, bool flag handles conversion to and from
* - It will handle any width and height that are a power of 2, square or non square
* Restriction: It has mixed results if the height or width is not a power of 2
*/
template<typename T>
void convert_linear_swizzle(void* input_pixels, void* output_pixels, u16 width, u16 height, bool input_is_swizzled)
{
u32 log2width = log2(width);
u32 log2height = log2(height);
// Max mask possible for square texture
u32 x_mask = 0x55555555;
u32 y_mask = 0xAAAAAAAA;
// We have to limit the masks to the lower of the two dimensions to allow for non-square textures
u32 limit_mask = (log2width < log2height) ? log2width : log2height;
// double the limit mask to account for bits in both x and y
limit_mask = 1 << (limit_mask << 1);
//x_mask, bits above limit are 1's for x-carry
x_mask = (x_mask | ~(limit_mask - 1));
//y_mask. bits above limit are 0'd, as we use a different method for y-carry over
y_mask = (y_mask & (limit_mask - 1));
u32 offs_y = 0;
u32 offs_x = 0;
u32 offs_x0 = 0; //total y-carry offset for x
u32 y_incr = limit_mask;
if (!input_is_swizzled)
{
for (int y = 0; y < height; ++y)
{
T *src = static_cast<T*>(input_pixels) + y * width;
T *dst = static_cast<T*>(output_pixels) + offs_y;
offs_x = offs_x0;
for (int x = 0; x < width; ++x)
{
dst[offs_x] = src[x];
offs_x = (offs_x - x_mask) & x_mask;
}
offs_y = (offs_y - y_mask) & y_mask;
if (offs_y == 0)
{
offs_x0 += y_incr;
}
}
}
else
{
for (int y = 0; y < height; ++y)
{
T *src = static_cast<T*>(input_pixels) + offs_y;
T *dst = static_cast<T*>(output_pixels) + y * width;
offs_x = offs_x0;
for (int x = 0; x < width; ++x)
{
dst[x] = src[offs_x];
offs_x = (offs_x - x_mask) & x_mask;
}
offs_y = (offs_y - y_mask) & y_mask;
if (offs_y == 0)
{
offs_x0 += y_incr;
}
}
}
}
void convert_scale_image(u8 *dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch,
const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear);
void convert_scale_image(std::unique_ptr<u8[]>& dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch,
const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear);
void clip_image(u8 *dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch);
void clip_image(std::unique_ptr<u8[]>& dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch);
}

View File

@ -129,6 +129,8 @@
<ClCompile Include="Emu\RSX\Common\VertexProgramDecompiler.cpp" />
<ClCompile Include="Emu\RSX\GCM.cpp" />
<ClCompile Include="Emu\RSX\Null\NullGSRender.cpp" />
<ClCompile Include="Emu\RSX\rsx_methods.cpp" />
<ClCompile Include="Emu\RSX\rsx_utils.cpp" />
<ClCompile Include="Emu\state.cpp" />
<ClCompile Include="Emu\SysCalls\lv2\sys_dbg.cpp" />
<ClCompile Include="Emu\SysCalls\lv2\sys_fs.cpp" />
@ -574,6 +576,8 @@
<ClInclude Include="Emu\Memory\vm_ptr.h" />
<ClInclude Include="Emu\Memory\vm_ref.h" />
<ClInclude Include="Emu\Memory\vm_var.h" />
<ClInclude Include="Emu\RSX\rsx_methods.h" />
<ClInclude Include="Emu\RSX\rsx_utils.h" />
<ClInclude Include="Emu\state.h" />
<ClInclude Include="Emu\SysCalls\Callback.h" />
<ClInclude Include="Emu\SysCalls\CB_FUNC.h" />

View File

@ -377,9 +377,6 @@
<ClCompile Include="Loader\TRP.cpp">
<Filter>Loader</Filter>
</ClCompile>
<ClCompile Include="stdafx.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Utilities\StrFmt.cpp">
<Filter>Utilities</Filter>
</ClCompile>
@ -927,6 +924,15 @@
<ClCompile Include="Emu\RSX\GCM.cpp">
<Filter>Emu\GPU\RSX</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\rsx_utils.cpp">
<Filter>Emu\GPU\RSX</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\rsx_methods.cpp">
<Filter>Emu\GPU\RSX</Filter>
</ClCompile>
<ClCompile Include="stdafx.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Crypto\aes.h">
@ -1773,5 +1779,11 @@
<ClInclude Include="..\Utilities\BitField.h">
<Filter>Utilities</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\rsx_utils.h">
<Filter>Emu\GPU\RSX</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\rsx_methods.h">
<Filter>Emu\GPU\RSX</Filter>
</ClInclude>
</ItemGroup>
</Project>