Merge pull request #1530 from kd-11/gl_surface_cache_traits

gl: Use shared surface store (updated)
This commit is contained in:
B1ackDaemon 2016-03-05 12:51:25 +02:00
commit a196ee1957
9 changed files with 594 additions and 481 deletions

View File

@ -21,16 +21,6 @@ namespace
throw EXCEPTION("Unknow depth format");
}
u8 get_pixel_size(rsx::surface_depth_format format)
{
switch (format)
{
case rsx::surface_depth_format::z16: return 2;
case rsx::surface_depth_format::z24s8: return 4;
}
throw EXCEPTION("Unknow depth format");
}
u32 to_gl_internal_type(rsx::vertex_base_type type, u8 size)
{
/**
@ -417,14 +407,14 @@ void GLGSRender::end()
if (!textures[i].enabled())
{
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(target, NULL);
glBindTexture(target, 0);
glProgramUniform1i(m_program->id(), location, i);
continue;
}
m_gl_textures[i].set_target(target);
__glcheck m_gl_texture_cache.upload_texture(i, textures[i], m_gl_textures[i]);
__glcheck m_gl_texture_cache.upload_texture(i, textures[i], m_gl_textures[i], m_rtts);
glProgramUniform1i(m_program->id(), location, i);
}
}
@ -500,7 +490,7 @@ void GLGSRender::end()
if (!vertex_info.size) // disabled, bind a null sampler
{
glActiveTexture(GL_TEXTURE0 + index + rsx::limits::textures_count);
glBindTexture(GL_TEXTURE_BUFFER, NULL);
glBindTexture(GL_TEXTURE_BUFFER, 0);
glProgramUniform1i(m_program->id(), location, index + rsx::limits::textures_count);
continue;
}
@ -567,7 +557,7 @@ void GLGSRender::end()
if (!enabled)
{
glActiveTexture(GL_TEXTURE0 + index + rsx::limits::textures_count);
glBindTexture(GL_TEXTURE_BUFFER, NULL);
glBindTexture(GL_TEXTURE_BUFFER, 0);
glProgramUniform1i(m_program->id(), location, index + rsx::limits::textures_count);
continue;
}
@ -664,7 +654,7 @@ void GLGSRender::end()
else
{
glActiveTexture(GL_TEXTURE0 + index + rsx::limits::textures_count);
glBindTexture(GL_TEXTURE_BUFFER, NULL);
glBindTexture(GL_TEXTURE_BUFFER, 0);
glProgramUniform1i(m_program->id(), location, index + rsx::limits::textures_count);
continue;
}
@ -790,12 +780,6 @@ void GLGSRender::on_exit()
if (draw_fbo)
draw_fbo.remove();
for (auto &tex : m_draw_tex_color)
if (tex) tex.remove();
if (m_draw_tex_depth_stencil)
m_draw_tex_depth_stencil.remove();
if (m_flip_fbo)
m_flip_fbo.remove();
@ -987,433 +971,6 @@ bool GLGSRender::load_program()
return true;
}
struct color_swizzle
{
gl::texture::channel a = gl::texture::channel::a;
gl::texture::channel r = gl::texture::channel::r;
gl::texture::channel g = gl::texture::channel::g;
gl::texture::channel b = gl::texture::channel::b;
color_swizzle() = default;
color_swizzle(gl::texture::channel a, gl::texture::channel r, gl::texture::channel g, gl::texture::channel b)
: a(a), r(r), g(g), b(b)
{
}
};
struct color_format
{
gl::texture::type type;
gl::texture::format format;
bool swap_bytes;
int channel_count;
int channel_size;
color_swizzle swizzle;
};
color_format surface_color_format_to_gl(rsx::surface_color_format color_format)
{
//color format
switch (color_format)
{
case rsx::surface_color_format::r5g6b5:
return{ gl::texture::type::ushort_5_6_5, gl::texture::format::bgr, false, 3, 2 };
case rsx::surface_color_format::a8r8g8b8:
return{ gl::texture::type::uint_8_8_8_8, gl::texture::format::bgra, false, 4, 1 };
case rsx::surface_color_format::x8r8g8b8_o8r8g8b8:
return{ gl::texture::type::uint_8_8_8_8, gl::texture::format::bgra, false, 4, 1,
{ gl::texture::channel::one, gl::texture::channel::r, gl::texture::channel::g, gl::texture::channel::b } };
case rsx::surface_color_format::w16z16y16x16:
return{ gl::texture::type::f16, gl::texture::format::rgba, true, 4, 2 };
case rsx::surface_color_format::w32z32y32x32:
return{ gl::texture::type::f32, gl::texture::format::rgba, true, 4, 4 };
case rsx::surface_color_format::b8:
case rsx::surface_color_format::x1r5g5b5_o1r5g5b5:
case rsx::surface_color_format::x1r5g5b5_z1r5g5b5:
case rsx::surface_color_format::x8r8g8b8_z8r8g8b8:
case rsx::surface_color_format::g8b8:
case rsx::surface_color_format::x32:
case rsx::surface_color_format::x8b8g8r8_o8b8g8r8:
case rsx::surface_color_format::x8b8g8r8_z8b8g8r8:
case rsx::surface_color_format::a8b8g8r8:
default:
LOG_ERROR(RSX, "Surface color buffer: Unsupported surface color format (0x%x)", color_format);
return{ gl::texture::type::uint_8_8_8_8, gl::texture::format::bgra, false, 4, 1 };
}
}
std::pair<gl::texture::type, gl::texture::format> surface_depth_format_to_gl(rsx::surface_depth_format depth_format)
{
switch (depth_format)
{
case rsx::surface_depth_format::z16:
return std::make_pair(gl::texture::type::ushort, gl::texture::format::depth);
default:
LOG_ERROR(RSX, "Surface depth buffer: Unsupported surface depth format (0x%x)", depth_format);
case rsx::surface_depth_format::z24s8:
return std::make_pair(gl::texture::type::uint_24_8, gl::texture::format::depth_stencil);
//return std::make_pair(gl::texture::type::f32, gl::texture::format::depth);
}
}
void GLGSRender::init_buffers(bool skip_reading)
{
u32 surface_format = rsx::method_registers[NV4097_SET_SURFACE_FORMAT];
u32 clip_horizontal = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL];
u32 clip_vertical = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL];
u32 clip_width = clip_horizontal >> 16;
u32 clip_height = clip_vertical >> 16;
u32 clip_x = clip_horizontal;
u32 clip_y = clip_vertical;
if (!draw_fbo || m_surface.format != surface_format)
{
m_surface.unpack(surface_format);
m_surface.width = clip_width;
m_surface.height = clip_height;
LOG_WARNING(RSX, "surface: %dx%d", clip_width, clip_height);
draw_fbo.recreate();
m_draw_tex_depth_stencil.recreate(gl::texture::target::texture2D);
auto format = surface_color_format_to_gl(m_surface.color_format);
for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
{
m_draw_tex_color[i].recreate(gl::texture::target::texture2D);
__glcheck m_draw_tex_color[i].config()
.size({ (int)m_surface.width, (int)m_surface.height })
.type(format.type)
.format(format.format)
.swizzle(format.swizzle.r, format.swizzle.g, format.swizzle.b, format.swizzle.a);
__glcheck m_draw_tex_color[i].pixel_pack_settings().swap_bytes(format.swap_bytes).aligment(1);
__glcheck m_draw_tex_color[i].pixel_unpack_settings().swap_bytes(format.swap_bytes).aligment(1);
__glcheck draw_fbo.color[i] = m_draw_tex_color[i];
__glcheck draw_fbo.check();
}
switch (m_surface.depth_format)
{
case rsx::surface_depth_format::z16:
{
__glcheck m_draw_tex_depth_stencil.config()
.size({ (int)m_surface.width, (int)m_surface.height })
.type(gl::texture::type::ushort)
.format(gl::texture::format::depth)
.internal_format(gl::texture::internal_format::depth16);
__glcheck draw_fbo.depth = m_draw_tex_depth_stencil;
break;
}
case rsx::surface_depth_format::z24s8:
{
__glcheck m_draw_tex_depth_stencil.config()
.size({ (int)m_surface.width, (int)m_surface.height })
.type(gl::texture::type::uint_24_8)
.format(gl::texture::format::depth_stencil)
.internal_format(gl::texture::internal_format::depth24_stencil8);
__glcheck draw_fbo.depth_stencil = m_draw_tex_depth_stencil;
break;
}
default:
{
LOG_ERROR(RSX, "Bad depth format! (%d)", m_surface.depth_format);
assert(0);
break;
}
}
__glcheck m_draw_tex_depth_stencil.pixel_pack_settings().aligment(1);
__glcheck m_draw_tex_depth_stencil.pixel_unpack_settings().aligment(1);
}
if (!skip_reading)
{
read_buffers();
}
set_viewport();
switch (rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]))
{
case rsx::surface_target::none: break;
case rsx::surface_target::surface_a:
__glcheck draw_fbo.draw_buffer(draw_fbo.color[0]);
break;
case rsx::surface_target::surface_b:
__glcheck draw_fbo.draw_buffer(draw_fbo.color[1] );
break;
case rsx::surface_target::surfaces_a_b:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1] });
break;
case rsx::surface_target::surfaces_a_b_c:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2] });
break;
case rsx::surface_target::surfaces_a_b_c_d:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2], draw_fbo.color[3] });
break;
default:
LOG_ERROR(RSX, "Bad surface color target: %d", rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]);
break;
}
}
static const u32 mr_color_offset[rsx::limits::color_buffers_count] =
{
NV4097_SET_SURFACE_COLOR_AOFFSET,
NV4097_SET_SURFACE_COLOR_BOFFSET,
NV4097_SET_SURFACE_COLOR_COFFSET,
NV4097_SET_SURFACE_COLOR_DOFFSET
};
static const u32 mr_color_dma[rsx::limits::color_buffers_count] =
{
NV4097_SET_CONTEXT_DMA_COLOR_A,
NV4097_SET_CONTEXT_DMA_COLOR_B,
NV4097_SET_CONTEXT_DMA_COLOR_C,
NV4097_SET_CONTEXT_DMA_COLOR_D
};
static const u32 mr_color_pitch[rsx::limits::color_buffers_count] =
{
NV4097_SET_SURFACE_PITCH_A,
NV4097_SET_SURFACE_PITCH_B,
NV4097_SET_SURFACE_PITCH_C,
NV4097_SET_SURFACE_PITCH_D
};
void GLGSRender::read_buffers()
{
if (!draw_fbo)
return;
glDisable(GL_STENCIL_TEST);
if (rpcs3::state.config.rsx.opengl.read_color_buffers)
{
auto color_format = surface_color_format_to_gl(m_surface.color_format);
auto read_color_buffers = [&](int index, int count)
{
u32 width = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
u32 height = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
/**
* Read color buffers is useless if write buffers is enabled. I havent encountered a case where it is necessary
* since the output is usually fed back into the pipeline as a fragment shader input texture
* It is included here for completeness
*/
for (int i = index; i < index + count; ++i)
{
u32 offset = rsx::method_registers[mr_color_offset[i]];
u32 location = rsx::method_registers[mr_color_dma[i]];
u32 pitch = rsx::method_registers[mr_color_pitch[i]];
if (pitch <= 64)
continue;
rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0));
bool success = m_gl_texture_cache.explicit_writeback(m_draw_tex_color[i], texaddr, pitch);
//Fall back to slower methods if the image could not be fetched.
if (!success)
{
if (!color_buffer.tile)
{
m_draw_tex_color[i].copy_from(color_buffer.ptr, color_format.format, color_format.type);
}
else
{
u32 range = pitch * height;
m_gl_texture_cache.remove_in_range(texaddr, range);
std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
color_buffer.read(buffer.get(), width, height, pitch);
__glcheck m_draw_tex_color[i].copy_from(buffer.get(), color_format.format, color_format.type);
}
}
}
};
switch (rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]))
{
case rsx::surface_target::none:
break;
case rsx::surface_target::surface_a:
read_color_buffers(0, 1);
break;
case rsx::surface_target::surface_b:
read_color_buffers(1, 1);
break;
case rsx::surface_target::surfaces_a_b:
read_color_buffers(0, 2);
break;
case rsx::surface_target::surfaces_a_b_c:
read_color_buffers(0, 3);
break;
case rsx::surface_target::surfaces_a_b_c_d:
read_color_buffers(0, 4);
break;
}
}
if (rpcs3::state.config.rsx.opengl.read_depth_buffer)
{
//TODO: use pitch
u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z];
if (pitch <= 64)
return;
u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]);
bool in_cache = m_gl_texture_cache.explicit_writeback(m_draw_tex_depth_stencil, depth_address, pitch);
if (in_cache)
return;
//Read failed. Fall back to slow s/w path...
auto depth_format = surface_depth_format_to_gl(m_surface.depth_format);
int pixel_size = get_pixel_size(m_surface.depth_format);
gl::buffer pbo_depth;
__glcheck pbo_depth.create(m_surface.width * m_surface.height * pixel_size);
__glcheck pbo_depth.map([&](GLubyte* pixels)
{
u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]);
if (m_surface.depth_format == rsx::surface_depth_format::z16)
{
u16 *dst = (u16*)pixels;
const be_t<u16>* src = vm::ps3::_ptr<u16>(depth_address);
for (int i = 0, end = m_draw_tex_depth_stencil.width() * m_draw_tex_depth_stencil.height(); i < end; ++i)
{
dst[i] = src[i];
}
}
else
{
u32 *dst = (u32*)pixels;
const be_t<u32>* src = vm::ps3::_ptr<u32>(depth_address);
for (int i = 0, end = m_draw_tex_depth_stencil.width() * m_draw_tex_depth_stencil.height(); i < end; ++i)
{
dst[i] = src[i];
}
}
}, gl::buffer::access::write);
__glcheck m_draw_tex_depth_stencil.copy_from(pbo_depth, depth_format.second, depth_format.first);
}
}
void GLGSRender::write_buffers()
{
if (!draw_fbo)
return;
if (rpcs3::state.config.rsx.opengl.write_color_buffers)
{
auto color_format = surface_color_format_to_gl(m_surface.color_format);
auto write_color_buffers = [&](int index, int count)
{
u32 width = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
u32 height = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
for (int i = index; i < index + count; ++i)
{
u32 offset = rsx::method_registers[mr_color_offset[i]];
u32 location = rsx::method_registers[mr_color_dma[i]];
u32 pitch = rsx::method_registers[mr_color_pitch[i]];
if (pitch <= 64)
continue;
rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0));
u32 range = pitch * height;
/**Even tiles are loaded as whole textures during read_buffers from testing.
* Need further evaluation to determine correct behavior. Separate paths for both show no difference,
* but using the GPU to perform the caching is many times faster.
*/
__glcheck m_gl_texture_cache.save_render_target(texaddr, range, m_draw_tex_color[i]);
}
};
switch (rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]))
{
case rsx::surface_target::none:
break;
case rsx::surface_target::surface_a:
write_color_buffers(0, 1);
break;
case rsx::surface_target::surface_b:
write_color_buffers(1, 1);
break;
case rsx::surface_target::surfaces_a_b:
write_color_buffers(0, 2);
break;
case rsx::surface_target::surfaces_a_b_c:
write_color_buffers(0, 3);
break;
case rsx::surface_target::surfaces_a_b_c_d:
write_color_buffers(0, 4);
break;
}
}
if (rpcs3::state.config.rsx.opengl.write_depth_buffer)
{
//TODO: use pitch
u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z];
if (pitch <= 64)
return;
auto depth_format = surface_depth_format_to_gl(m_surface.depth_format);
u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]);
u32 range = m_draw_tex_depth_stencil.width() * m_draw_tex_depth_stencil.height() * 2;
if (m_surface.depth_format != rsx::surface_depth_format::z16) range *= 2;
m_gl_texture_cache.save_render_target(depth_address, range, m_draw_tex_depth_stencil);
}
}
void GLGSRender::flip(int buffer)
{
//LOG_NOTICE(Log::RSX, "flip(%d)", buffer);

View File

@ -3,6 +3,7 @@
#include "gl_helpers.h"
#include "rsx_gl_texture.h"
#include "gl_texture_cache.h"
#include "gl_render_targets.h"
#define RSX_DEBUG 1
@ -22,6 +23,7 @@ private:
gl::glsl::program *m_program;
rsx::surface_info m_surface;
gl_render_targets m_rtts;
struct texture_buffer_pair
{
@ -38,9 +40,6 @@ public:
private:
GLProgramBuffer m_prog_buffer;
gl::texture m_draw_tex_color[rsx::limits::color_buffers_count];
gl::texture m_draw_tex_depth_stencil;
//buffer
gl::fbo m_flip_fbo;
gl::texture m_flip_tex_color;
@ -78,4 +77,7 @@ protected:
u64 timestamp() const override;
bool on_access_violation(u32 address, bool is_writing) override;
virtual std::array<std::vector<gsl::byte>, 4> copy_render_targets_to_memory() override;
virtual std::array<std::vector<gsl::byte>, 2> copy_depth_stencil_buffer_to_memory() override;
};

View File

@ -387,6 +387,7 @@ namespace gl
public:
buffer() = default;
buffer(const buffer&) = delete;
buffer(GLuint id)
{

View File

@ -0,0 +1,349 @@
#include "stdafx.h"
#include "../rsx_methods.h"
#include "GLGSRender.h"
#include "Emu/state.h"
color_format rsx::internals::surface_color_format_to_gl(rsx::surface_color_format color_format)
{
//color format
switch (color_format)
{
case rsx::surface_color_format::r5g6b5:
return{ ::gl::texture::type::ushort_5_6_5, ::gl::texture::format::bgr, false, 3, 2 };
case rsx::surface_color_format::a8r8g8b8:
return{ ::gl::texture::type::uint_8_8_8_8, ::gl::texture::format::bgra, false, 4, 1 };
case rsx::surface_color_format::x8r8g8b8_o8r8g8b8:
return{ ::gl::texture::type::uint_8_8_8_8, ::gl::texture::format::bgra, false, 4, 1,
{ ::gl::texture::channel::one, ::gl::texture::channel::r, ::gl::texture::channel::g, ::gl::texture::channel::b } };
case rsx::surface_color_format::w16z16y16x16:
return{ ::gl::texture::type::f16, ::gl::texture::format::rgba, true, 4, 2 };
case rsx::surface_color_format::w32z32y32x32:
return{ ::gl::texture::type::f32, ::gl::texture::format::rgba, true, 4, 4 };
case rsx::surface_color_format::b8:
case rsx::surface_color_format::x1r5g5b5_o1r5g5b5:
case rsx::surface_color_format::x1r5g5b5_z1r5g5b5:
case rsx::surface_color_format::x8r8g8b8_z8r8g8b8:
case rsx::surface_color_format::g8b8:
case rsx::surface_color_format::x32:
case rsx::surface_color_format::x8b8g8r8_o8b8g8r8:
case rsx::surface_color_format::x8b8g8r8_z8b8g8r8:
case rsx::surface_color_format::a8b8g8r8:
default:
LOG_ERROR(RSX, "Surface color buffer: Unsupported surface color format (0x%x)", color_format);
return{ ::gl::texture::type::uint_8_8_8_8, ::gl::texture::format::bgra, false, 4, 1 };
}
}
depth_format rsx::internals::surface_depth_format_to_gl(rsx::surface_depth_format depth_format)
{
switch (depth_format)
{
case rsx::surface_depth_format::z16:
return{ ::gl::texture::type::ushort, ::gl::texture::format::depth, ::gl::texture::internal_format::depth16 };
default:
LOG_ERROR(RSX, "Surface depth buffer: Unsupported surface depth format (0x%x)", depth_format);
case rsx::surface_depth_format::z24s8:
return{ ::gl::texture::type::uint_24_8, ::gl::texture::format::depth_stencil, ::gl::texture::internal_format::depth24_stencil8 };
}
}
u8 rsx::internals::get_pixel_size(rsx::surface_depth_format format)
{
switch (format)
{
case rsx::surface_depth_format::z16: return 2;
case rsx::surface_depth_format::z24s8: return 4;
}
throw EXCEPTION("Unknow depth format");
}
void GLGSRender::init_buffers(bool skip_reading)
{
u32 surface_format = rsx::method_registers[NV4097_SET_SURFACE_FORMAT];
u32 clip_horizontal = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL];
u32 clip_vertical = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL];
set_viewport();
if (draw_fbo && !m_rtts_dirty)
return;
m_rtts_dirty = false;
m_rtts.prepare_render_target(nullptr, surface_format, clip_horizontal, clip_vertical, rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]),
get_color_surface_addresses(), get_zeta_surface_address());
draw_fbo.recreate();
for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
{
if (std::get<0>(m_rtts.m_bound_render_targets[i]) != 0)
__glcheck draw_fbo.color[i] = *std::get<1>(m_rtts.m_bound_render_targets[i]);
}
if (std::get<0>(m_rtts.m_bound_depth_stencil) != 0)
__glcheck draw_fbo.depth = *std::get<1>(m_rtts.m_bound_depth_stencil);
__glcheck draw_fbo.check();
switch (rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]))
{
case rsx::surface_target::none: break;
case rsx::surface_target::surface_a:
__glcheck draw_fbo.draw_buffer(draw_fbo.color[0]);
break;
case rsx::surface_target::surface_b:
__glcheck draw_fbo.draw_buffer(draw_fbo.color[1]);
break;
case rsx::surface_target::surfaces_a_b:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1] });
break;
case rsx::surface_target::surfaces_a_b_c:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2] });
break;
case rsx::surface_target::surfaces_a_b_c_d:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2], draw_fbo.color[3] });
break;
}
}
std::array<std::vector<gsl::byte>, 4> GLGSRender::copy_render_targets_to_memory()
{
int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
rsx::surface_info surface = {};
surface.unpack(rsx::method_registers[NV4097_SET_SURFACE_FORMAT]);
return m_rtts.get_render_targets_data(surface.color_format, clip_w, clip_h);
}
std::array<std::vector<gsl::byte>, 2> GLGSRender::copy_depth_stencil_buffer_to_memory()
{
int clip_w = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
int clip_h = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
rsx::surface_info surface = {};
surface.unpack(rsx::method_registers[NV4097_SET_SURFACE_FORMAT]);
return m_rtts.get_depth_stencil_data(surface.depth_format, clip_w, clip_h);
}
void GLGSRender::read_buffers()
{
if (!draw_fbo)
return;
glDisable(GL_STENCIL_TEST);
if (rpcs3::state.config.rsx.opengl.read_color_buffers)
{
auto color_format = rsx::internals::surface_color_format_to_gl(m_surface.color_format);
auto read_color_buffers = [&](int index, int count)
{
u32 width = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
u32 height = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
for (int i = index; i < index + count; ++i)
{
u32 offset = rsx::method_registers[rsx::internals::mr_color_offset[i]];
u32 location = rsx::method_registers[rsx::internals::mr_color_dma[i]];
u32 pitch = rsx::method_registers[rsx::internals::mr_color_pitch[i]];
if (pitch <= 64)
continue;
rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0));
bool success = m_gl_texture_cache.explicit_writeback((*std::get<1>(m_rtts.m_bound_render_targets[i])), texaddr, pitch);
//Fall back to slower methods if the image could not be fetched from cache.
if (!success)
{
if (!color_buffer.tile)
{
__glcheck std::get<1>(m_rtts.m_bound_render_targets[i])->copy_from(color_buffer.ptr, color_format.format, color_format.type);
}
else
{
u32 range = pitch * height;
m_gl_texture_cache.remove_in_range(texaddr, range);
std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
color_buffer.read(buffer.get(), width, height, pitch);
__glcheck std::get<1>(m_rtts.m_bound_render_targets[i])->copy_from(buffer.get(), color_format.format, color_format.type);
}
}
}
};
switch (rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]))
{
case rsx::surface_target::none:
break;
case rsx::surface_target::surface_a:
read_color_buffers(0, 1);
break;
case rsx::surface_target::surface_b:
read_color_buffers(1, 1);
break;
case rsx::surface_target::surfaces_a_b:
read_color_buffers(0, 2);
break;
case rsx::surface_target::surfaces_a_b_c:
read_color_buffers(0, 3);
break;
case rsx::surface_target::surfaces_a_b_c_d:
read_color_buffers(0, 4);
break;
}
}
if (rpcs3::state.config.rsx.opengl.read_depth_buffer)
{
//TODO: use pitch
u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z];
if (pitch <= 64)
return;
u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]);
bool in_cache = m_gl_texture_cache.explicit_writeback((*std::get<1>(m_rtts.m_bound_depth_stencil)), depth_address, pitch);
if (in_cache)
return;
//Read failed. Fall back to slow s/w path...
auto depth_format = rsx::internals::surface_depth_format_to_gl(m_surface.depth_format);
int pixel_size = rsx::internals::get_pixel_size(m_surface.depth_format);
gl::buffer pbo_depth;
__glcheck pbo_depth.create(m_surface.width * m_surface.height * pixel_size);
__glcheck pbo_depth.map([&](GLubyte* pixels)
{
u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]);
if (m_surface.depth_format == rsx::surface_depth_format::z16)
{
u16 *dst = (u16*)pixels;
const be_t<u16>* src = vm::ps3::_ptr<u16>(depth_address);
for (int i = 0, end = std::get<1>(m_rtts.m_bound_depth_stencil)->width() * std::get<1>(m_rtts.m_bound_depth_stencil)->height(); i < end; ++i)
{
dst[i] = src[i];
}
}
else
{
u32 *dst = (u32*)pixels;
const be_t<u32>* src = vm::ps3::_ptr<u32>(depth_address);
for (int i = 0, end = std::get<1>(m_rtts.m_bound_depth_stencil)->width() * std::get<1>(m_rtts.m_bound_depth_stencil)->height(); i < end; ++i)
{
dst[i] = src[i];
}
}
}, gl::buffer::access::write);
__glcheck std::get<1>(m_rtts.m_bound_depth_stencil)->copy_from(pbo_depth, depth_format.format, depth_format.type);
}
}
void GLGSRender::write_buffers()
{
if (!draw_fbo)
return;
//TODO: Detect when the data is actually being used by cell and issue download command on-demand (mark as not present?)
//Should also mark cached resources as dirty so that read buffers works out-of-the-box without modification
if (rpcs3::state.config.rsx.opengl.write_color_buffers)
{
auto color_format = rsx::internals::surface_color_format_to_gl(m_surface.color_format);
auto write_color_buffers = [&](int index, int count)
{
u32 width = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
u32 height = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;
for (int i = index; i < index + count; ++i)
{
u32 offset = rsx::method_registers[rsx::internals::mr_color_offset[i]];
u32 location = rsx::method_registers[rsx::internals::mr_color_dma[i]];
u32 pitch = rsx::method_registers[rsx::internals::mr_color_pitch[i]];
if (pitch <= 64)
continue;
rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0));
u32 range = pitch * height;
/**Even tiles are loaded as whole textures during read_buffers from testing.
* Need further evaluation to determine correct behavior. Separate paths for both show no difference,
* but using the GPU to perform the caching is many times faster.
*/
__glcheck m_gl_texture_cache.save_render_target(texaddr, range, (*std::get<1>(m_rtts.m_bound_render_targets[i])));
}
};
switch (rsx::to_surface_target(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]))
{
case rsx::surface_target::none:
break;
case rsx::surface_target::surface_a:
write_color_buffers(0, 1);
break;
case rsx::surface_target::surface_b:
write_color_buffers(1, 1);
break;
case rsx::surface_target::surfaces_a_b:
write_color_buffers(0, 2);
break;
case rsx::surface_target::surfaces_a_b_c:
write_color_buffers(0, 3);
break;
case rsx::surface_target::surfaces_a_b_c_d:
write_color_buffers(0, 4);
break;
}
}
if (rpcs3::state.config.rsx.opengl.write_depth_buffer)
{
//TODO: use pitch
u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z];
if (pitch <= 64)
return;
auto depth_format = rsx::internals::surface_depth_format_to_gl(m_surface.depth_format);
u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]);
u32 range = std::get<1>(m_rtts.m_bound_depth_stencil)->width() * std::get<1>(m_rtts.m_bound_depth_stencil)->height() * 2;
if (m_surface.depth_format != rsx::surface_depth_format::z16) range *= 2;
m_gl_texture_cache.save_render_target(depth_address, range, (*std::get<1>(m_rtts.m_bound_depth_stencil)));
}
}

View File

@ -0,0 +1,196 @@
#pragma once
#include "../Common/surface_store.h"
#include "gl_helpers.h"
#include "stdafx.h"
#include "../RSXThread.h"
struct color_swizzle
{
gl::texture::channel a = gl::texture::channel::a;
gl::texture::channel r = gl::texture::channel::r;
gl::texture::channel g = gl::texture::channel::g;
gl::texture::channel b = gl::texture::channel::b;
color_swizzle() = default;
color_swizzle(gl::texture::channel a, gl::texture::channel r, gl::texture::channel g, gl::texture::channel b)
: a(a), r(r), g(g), b(b)
{
}
};
struct color_format
{
gl::texture::type type;
gl::texture::format format;
bool swap_bytes;
int channel_count;
int channel_size;
color_swizzle swizzle;
};
struct depth_format
{
gl::texture::type type;
gl::texture::format format;
gl::texture::internal_format internal_format;
};
namespace rsx
{
namespace internals
{
color_format surface_color_format_to_gl(rsx::surface_color_format color_format);
depth_format surface_depth_format_to_gl(rsx::surface_depth_format depth_format);
u8 get_pixel_size(rsx::surface_depth_format format);
const u32 mr_color_offset[rsx::limits::color_buffers_count] =
{
NV4097_SET_SURFACE_COLOR_AOFFSET,
NV4097_SET_SURFACE_COLOR_BOFFSET,
NV4097_SET_SURFACE_COLOR_COFFSET,
NV4097_SET_SURFACE_COLOR_DOFFSET
};
const u32 mr_color_dma[rsx::limits::color_buffers_count] =
{
NV4097_SET_CONTEXT_DMA_COLOR_A,
NV4097_SET_CONTEXT_DMA_COLOR_B,
NV4097_SET_CONTEXT_DMA_COLOR_C,
NV4097_SET_CONTEXT_DMA_COLOR_D
};
const u32 mr_color_pitch[rsx::limits::color_buffers_count] =
{
NV4097_SET_SURFACE_PITCH_A,
NV4097_SET_SURFACE_PITCH_B,
NV4097_SET_SURFACE_PITCH_C,
NV4097_SET_SURFACE_PITCH_D
};
}
}
struct gl_render_target_traits
{
using surface_storage_type = std::unique_ptr<gl::texture>;
using surface_type = gl::texture*;
using command_list_type = void*;
using download_buffer_object = std::vector<u8>;
static
std::unique_ptr<gl::texture> create_new_surface(
u32 address,
rsx::surface_color_format surface_color_format,
size_t width,
size_t height
)
{
std::unique_ptr<gl::texture> result(new gl::texture());
auto format = rsx::internals::surface_color_format_to_gl(surface_color_format);
result->recreate(gl::texture::target::texture2D);
__glcheck result->config()
.size({ (int)width, (int)height })
.type(format.type)
.format(format.format)
.swizzle(format.swizzle.r, format.swizzle.g, format.swizzle.b, format.swizzle.a)
.apply();
__glcheck result->pixel_pack_settings().swap_bytes(format.swap_bytes).aligment(1);
__glcheck result->pixel_unpack_settings().swap_bytes(format.swap_bytes).aligment(1);
return result;
}
static
std::unique_ptr<gl::texture> create_new_surface(
u32 address,
rsx::surface_depth_format surface_depth_format,
size_t width,
size_t height
)
{
std::unique_ptr<gl::texture> result(new gl::texture());
auto format = rsx::internals::surface_depth_format_to_gl(surface_depth_format);
result->recreate(gl::texture::target::texture2D);
__glcheck result->config()
.size({ (int)width, (int)height })
.type(format.type)
.format(format.format)
.internal_format(format.internal_format)
.apply();
__glcheck result->pixel_pack_settings().aligment(1);
__glcheck result->pixel_unpack_settings().aligment(1);
return result;
}
static void prepare_rtt_for_drawing(void *, gl::texture*) {}
static void prepare_rtt_for_sampling(void *, gl::texture*) {}
static void prepare_ds_for_drawing(void *, gl::texture*) {}
static void prepare_ds_for_sampling(void *, gl::texture*) {}
static
bool rtt_has_format_width_height(const std::unique_ptr<gl::texture> &rtt, rsx::surface_color_format surface_color_format, size_t width, size_t height)
{
// TODO: check format
return rtt->width() == width && rtt->height() == height;
}
static
bool ds_has_format_width_height(const std::unique_ptr<gl::texture> &rtt, rsx::surface_depth_format surface_depth_stencil_format, size_t width, size_t height)
{
// TODO: check format
return rtt->width() == width && rtt->height() == height;
}
// Note : pbo breaks fbo here so use classic texture copy
static std::vector<u8> issue_download_command(gl::texture* color_buffer, rsx::surface_color_format color_format, size_t width, size_t height)
{
auto pixel_format = rsx::internals::surface_color_format_to_gl(color_format);
std::vector<u8> result(width * height * pixel_format.channel_count * pixel_format.channel_size);
color_buffer->bind();
glGetTexImage(GL_TEXTURE_2D, 0, (GLenum)pixel_format.format, (GLenum)pixel_format.type, result.data());
return result;
}
static std::vector<u8> issue_depth_download_command(gl::texture* depth_stencil_buffer, rsx::surface_depth_format depth_format, size_t width, size_t height)
{
std::vector<u8> result(width * height * 4);
auto pixel_format = rsx::internals::surface_depth_format_to_gl(depth_format);
depth_stencil_buffer->bind();
glGetTexImage(GL_TEXTURE_2D, 0, (GLenum)pixel_format.format, (GLenum)pixel_format.type, result.data());
return result;
}
static std::vector<u8> issue_stencil_download_command(gl::texture* depth_stencil_buffer, size_t width, size_t height)
{
std::vector<u8> result(width * height * 4);
return result;
}
static
gsl::span<const gsl::byte> map_downloaded_buffer(const std::vector<u8> &buffer)
{
return{ reinterpret_cast<const gsl::byte*>(buffer.data()), gsl::narrow<int>(buffer.size()) };
}
static
void unmap_downloaded_buffer(const std::vector<u8> &)
{
}
static gl::texture* get(const std::unique_ptr<gl::texture> &in)
{
return in.get();
}
};
struct gl_render_targets : public rsx::surface_store<gl_render_target_traits>
{
};

View File

@ -10,6 +10,7 @@
#include <unordered_map>
#include "GLGSRender.h"
#include "gl_render_targets.h"
#include "../Common/TextureUtils.h"
#include <chrono>
@ -423,42 +424,48 @@ namespace gl
}
}
void upload_texture(int index, rsx::texture &tex, rsx::gl::texture &gl_texture)
void upload_texture(int index, rsx::texture &tex, rsx::gl::texture &gl_texture, gl_render_targets &m_rtts)
{
const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
const u32 range = (u32)get_texture_size(tex);
glActiveTexture(GL_TEXTURE0 + index);
/**
* Give precedence to rtt data obtained through read/write buffers
*/
cached_rtt *rtt = find_cached_rtt(texaddr, range);
if (rtt && !rtt->is_dirty)
{
if (!rtt->is_depth)
{
u32 real_id = gl_texture.id();
u32 real_id = gl_texture.id();
glActiveTexture(GL_TEXTURE0 + index);
gl_texture.set_id(rtt->copy_glid);
gl_texture.bind();
gl_texture.set_id(rtt->copy_glid);
gl_texture.bind();
gl_texture.set_id(real_id);
}
else
{
LOG_NOTICE(RSX, "Depth RTT found from 0x%X, Trying to upload width dims: %d x %d, Saved as %d x %d", rtt->data_addr, tex.width(), tex.height(), rtt->current_width, rtt->current_height);
//The texture should have already been loaded through the writeback interface call
//Bind it directly
u32 real_id = gl_texture.id();
gl_texture.set_id(real_id);
}
glActiveTexture(GL_TEXTURE0 + index);
gl_texture.set_id(rtt->copy_glid);
gl_texture.bind();
gl_texture.set_id(real_id);
}
/**
* Check for sampleable rtts from previous render passes
*/
gl::texture *texptr = nullptr;
if (texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr))
{
texptr->bind();
return;
}
else if (rtt)
LOG_NOTICE(RSX, "RTT texture for address 0x%X is dirty!", texaddr);
if (texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))
{
texptr->bind();
return;
}
/**
* If all the above failed, then its probably a generic texture.
* Search in cache and upload/bind
*/
gl_cached_texture *obj = nullptr;
@ -469,7 +476,6 @@ namespace gl
{
u32 real_id = gl_texture.id();
glActiveTexture(GL_TEXTURE0 + index);
gl_texture.set_id(obj->gl_id);
gl_texture.bind();

View File

@ -149,6 +149,9 @@ namespace rsx
void texture::init(int index, rsx::texture& tex)
{
const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
//TODO: safe init
if (!m_id)
{
create();
@ -157,12 +160,6 @@ namespace rsx
glActiveTexture(GL_TEXTURE0 + index);
bind();
const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
//LOG_WARNING(RSX, "texture addr = 0x%x, width = %d, height = %d, max_aniso=%d, mipmap=%d, remap=0x%x, zfunc=0x%x, wraps=0x%x, wrapt=0x%x, wrapr=0x%x, minlod=0x%x, maxlod=0x%x",
// m_offset, m_width, m_height, m_maxaniso, m_mipmap, m_remap, m_zfunc, m_wraps, m_wrapt, m_wrapr, m_minlod, m_maxlod);
//TODO: safe init
u32 full_format = tex.format();
u32 format = full_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);

View File

@ -79,6 +79,7 @@
<ClInclude Include="Emu\RSX\GL\GLProgramBuffer.h" />
<ClInclude Include="Emu\RSX\GL\GLVertexProgram.h" />
<ClInclude Include="Emu\RSX\GL\gl_helpers.h" />
<ClInclude Include="Emu\RSX\GL\gl_render_targets.h" />
<ClInclude Include="Emu\RSX\GL\gl_texture_cache.h" />
<ClInclude Include="Emu\RSX\GL\OpenGL.h" />
<ClInclude Include="Emu\RSX\GL\rsx_gl_texture.h" />
@ -89,6 +90,7 @@
<ClCompile Include="Emu\RSX\GL\GLGSRender.cpp" />
<ClCompile Include="Emu\RSX\GL\GLVertexProgram.cpp" />
<ClCompile Include="Emu\RSX\GL\gl_helpers.cpp" />
<ClCompile Include="Emu\RSX\GL\gl_render_targets.cpp" />
<ClCompile Include="Emu\RSX\GL\OpenGL.cpp" />
<ClCompile Include="Emu\RSX\GL\rsx_gl_texture.cpp" />
</ItemGroup>

View File

@ -8,6 +8,7 @@
<ClCompile Include="Emu\RSX\GL\GLGSRender.cpp" />
<ClCompile Include="Emu\RSX\GL\GLVertexProgram.cpp" />
<ClCompile Include="Emu\RSX\GL\OpenGL.cpp" />
<ClCompile Include="Emu\RSX\GL\gl_render_targets.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="Emu\RSX\GL\rsx_gl_texture.h" />
@ -19,5 +20,7 @@
<ClInclude Include="Emu\RSX\GL\GLProgramBuffer.h" />
<ClInclude Include="Emu\RSX\GL\GLVertexProgram.h" />
<ClInclude Include="Emu\RSX\GL\OpenGL.h" />
<ClInclude Include="Emu\RSX\GL\gl_texture_cache.h" />
<ClInclude Include="Emu\RSX\GL\gl_render_targets.h" />
</ItemGroup>
</Project>