rsx: Major fixes

- Handle aliased depth + color target by disabling depth writes. This looks to be the correct way
- Add support for generic passes that cannot be done using general imaging operations. Lays the framework for tons of features and effects
- Implement RGBA->D24D8 casting. Sometimes games will split depth texture into RGBA8 then use the new RGBA8 as a depth texture directly
-- This happens alot in ps3 games and I'm not sure why. Its likely the ps3 did not sample fp values with linear filtering so this is a workaround
-- Only implemented for openGL at the moment
-- Requires a workaround for an AMD driver bug
This commit is contained in:
kd-11 2017-11-15 15:02:59 +03:00
parent 8646f51fa3
commit 33f3a3e014
13 changed files with 272 additions and 31 deletions

View File

@ -153,12 +153,14 @@ namespace rsx
surface_storage_type new_surface_storage;
surface_type old_surface = nullptr;
surface_type new_surface = nullptr;
surface_type convert_surface = nullptr;
// Remove any depth surfaces occupying this memory address (TODO: Discard all overlapping range)
auto aliased_depth_surface = m_depth_stencil_storage.find(address);
if (aliased_depth_surface != m_depth_stencil_storage.end())
{
Traits::notify_surface_invalidated(aliased_depth_surface->second);
convert_surface = Traits::get(aliased_depth_surface->second);
invalidated_resources.push_back(std::move(aliased_depth_surface->second));
m_depth_stencil_storage.erase(aliased_depth_surface);
}
@ -178,7 +180,10 @@ namespace rsx
m_render_targets_storage.erase(address);
}
//Search invalidated resources for a suitable surface
// Select source of original data if any
auto contents_to_copy = old_surface == nullptr ? convert_surface : old_surface;
// Search invalidated resources for a suitable surface
for (auto It = invalidated_resources.begin(); It != invalidated_resources.end(); It++)
{
auto &rtt = *It;
@ -197,7 +202,7 @@ namespace rsx
invalidated_resources.erase(It);
new_surface = Traits::get(new_surface_storage);
Traits::invalidate_rtt_surface_contents(command_list, new_surface, old_surface, true);
Traits::invalidate_rtt_surface_contents(command_list, new_surface, contents_to_copy, true);
Traits::prepare_rtt_for_drawing(command_list, new_surface);
break;
}
@ -217,7 +222,7 @@ namespace rsx
return new_surface;
}
m_render_targets_storage[address] = Traits::create_new_surface(address, color_format, width, height, old_surface, std::forward<Args>(extra_params)...);
m_render_targets_storage[address] = Traits::create_new_surface(address, color_format, width, height, contents_to_copy, std::forward<Args>(extra_params)...);
return Traits::get(m_render_targets_storage[address]);
}
@ -232,12 +237,14 @@ namespace rsx
surface_storage_type new_surface_storage;
surface_type old_surface = nullptr;
surface_type new_surface = nullptr;
surface_type convert_surface = nullptr;
// Remove any color surfaces occupying this memory range (TODO: Discard all overlapping surfaces)
auto aliased_rtt_surface = m_render_targets_storage.find(address);
if (aliased_rtt_surface != m_render_targets_storage.end())
{
Traits::notify_surface_invalidated(aliased_rtt_surface->second);
convert_surface = Traits::get(aliased_rtt_surface->second);
invalidated_resources.push_back(std::move(aliased_rtt_surface->second));
m_render_targets_storage.erase(aliased_rtt_surface);
}
@ -257,6 +264,9 @@ namespace rsx
m_depth_stencil_storage.erase(address);
}
// Select source of original data if any
auto contents_to_copy = old_surface == nullptr ? convert_surface : old_surface;
//Search invalidated resources for a suitable surface
for (auto It = invalidated_resources.begin(); It != invalidated_resources.end(); It++)
{
@ -276,7 +286,7 @@ namespace rsx
new_surface = Traits::get(new_surface_storage);
Traits::prepare_ds_for_drawing(command_list, new_surface);
Traits::invalidate_depth_surface_contents(command_list, new_surface, old_surface, true);
Traits::invalidate_depth_surface_contents(command_list, new_surface, contents_to_copy, true);
break;
}
}
@ -295,7 +305,7 @@ namespace rsx
return new_surface;
}
m_depth_stencil_storage[address] = Traits::create_new_surface(address, depth_format, width, height, old_surface, std::forward<Args>(extra_params)...);
m_depth_stencil_storage[address] = Traits::create_new_surface(address, depth_format, width, height, contents_to_copy, std::forward<Args>(extra_params)...);
return Traits::get(m_depth_stencil_storage[address]);
}
public:

View File

@ -464,6 +464,13 @@ void GLGSRender::end()
ds->set_cleared();
}
if (ds && ds->old_contents != nullptr && ds->get_rsx_pitch() == ds->old_contents->get_rsx_pitch() &&
ds->old_contents->get_compatible_internal_format() == gl::texture::internal_format::rgba8)
{
m_depth_converter.run(ds->width(), ds->height(), ds->id(), ds->old_contents->id());
ds->old_contents = nullptr;
}
if (g_cfg.video.strict_rendering_mode)
{
if (ds->old_contents != nullptr)
@ -479,6 +486,11 @@ void GLGSRender::end()
}
}
}
else
{
// Old contents are one use only. Keep the depth conversion check from firing over and over
if (ds) ds->old_contents = nullptr;
}
glEnable(GL_SCISSOR_TEST);
@ -758,6 +770,8 @@ void GLGSRender::on_init_thread()
glEnable(GL_CLIP_DISTANCE0 + 4);
glEnable(GL_CLIP_DISTANCE0 + 5);
m_depth_converter.create();
m_gl_texture_cache.initialize();
m_thread_id = std::this_thread::get_id();
@ -826,6 +840,7 @@ void GLGSRender::on_exit()
m_text_printer.close();
m_gl_texture_cache.destroy();
m_depth_converter.destroy();
for (u32 i = 0; i < occlusion_query_count; ++i)
{

View File

@ -8,6 +8,7 @@
#include "define_new_memleakdetect.h"
#include "GLProgramBuffer.h"
#include "GLTextOut.h"
#include "GLOverlays.h"
#include "../rsx_utils.h"
#include "../rsx_cache.h"
@ -348,6 +349,7 @@ private:
bool manually_flush_ring_buffers = false;
gl::text_writer m_text_printer;
gl::depth_convert_pass m_depth_converter;
std::mutex queue_guard;
std::list<work_item> work_queue;

View File

@ -2260,22 +2260,16 @@ namespace gl
return m_location;
}
void operator = (int rhs) const { m_program.use(); glUniform1i(location(), rhs); }
void operator = (float rhs) const { m_program.use(); glUniform1f(location(), rhs); }
//void operator = (double rhs) const { m_program.use(); glUniform1d(location(), rhs); }
void operator = (const color1i& rhs) const { m_program.use(); glUniform1i(location(), rhs.r); }
void operator = (const color1f& rhs) const { m_program.use(); glUniform1f(location(), rhs.r); }
//void operator = (const color1d& rhs) const { m_program.use(); glUniform1d(location(), rhs.r); }
void operator = (const color2i& rhs) const { m_program.use(); glUniform2i(location(), rhs.r, rhs.g); }
void operator = (const color2f& rhs) const { m_program.use(); glUniform2f(location(), rhs.r, rhs.g); }
//void operator = (const color2d& rhs) const { m_program.use(); glUniform2d(location(), rhs.r, rhs.g); }
void operator = (const color3i& rhs) const { m_program.use(); glUniform3i(location(), rhs.r, rhs.g, rhs.b); }
void operator = (const color3f& rhs) const { m_program.use(); glUniform3f(location(), rhs.r, rhs.g, rhs.b); }
//void operator = (const color3d& rhs) const { m_program.use(); glUniform3d(location(), rhs.r, rhs.g, rhs.b); }
void operator = (const color4i& rhs) const { m_program.use(); glUniform4i(location(), rhs.r, rhs.g, rhs.b, rhs.a); }
void operator = (const color4f& rhs) const { m_program.use(); glUniform4f(location(), rhs.r, rhs.g, rhs.b, rhs.a); }
//void operator = (const color4d& rhs) const { m_program.use(); glUniform4d(location(), rhs.r, rhs.g, rhs.b, rhs.a); }
void operator = (int rhs) const { glProgramUniform1i(m_program.id(), location(), rhs); }
void operator = (float rhs) const { glProgramUniform1f(m_program.id(), location(), rhs); }
void operator = (const color1i& rhs) const { glProgramUniform1i(m_program.id(), location(), rhs.r); }
void operator = (const color1f& rhs) const { glProgramUniform1f(m_program.id(), location(), rhs.r); }
void operator = (const color2i& rhs) const { glProgramUniform2i(m_program.id(), location(), rhs.r, rhs.g); }
void operator = (const color2f& rhs) const { glProgramUniform2f(m_program.id(), location(), rhs.r, rhs.g); }
void operator = (const color3i& rhs) const { glProgramUniform3i(m_program.id(), location(), rhs.r, rhs.g, rhs.b); }
void operator = (const color3f& rhs) const { glProgramUniform3f(m_program.id(), location(), rhs.r, rhs.g, rhs.b); }
void operator = (const color4i& rhs) const { glProgramUniform4i(m_program.id(), location(), rhs.r, rhs.g, rhs.b, rhs.a); }
void operator = (const color4f& rhs) const { glProgramUniform4f(m_program.id(), location(), rhs.r, rhs.g, rhs.b, rhs.a); }
};
class attrib_t

View File

@ -0,0 +1,201 @@
#pragma once
#include "stdafx.h"
#include "GLHelpers.h"
namespace gl
{
struct overlay_pass
{
std::string fs_src;
std::string vs_src;
gl::glsl::program program_handle;
gl::glsl::shader vs;
gl::glsl::shader fs;
gl::fbo fbo;
bool compiled = false;
void create()
{
if (!compiled)
{
fs.create(gl::glsl::shader::type::fragment);
fs.source(fs_src);
fs.compile();
vs.create(gl::glsl::shader::type::vertex);
vs.source(vs_src);
vs.compile();
program_handle.create();
program_handle.attach(vs);
program_handle.attach(fs);
program_handle.make();
fbo.create();
compiled = true;
}
}
void destroy()
{
if (compiled)
{
program_handle.remove();
vs.remove();
fs.remove();
fbo.remove();
compiled = false;
}
}
virtual void on_load() {}
virtual void on_unload() {}
virtual void bind_resources() {}
virtual void cleanup_resources() {}
virtual void emit_geometry()
{
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
}
virtual void run(u16 w, u16 h, GLuint target_texture, bool depth_target)
{
if (!compiled)
{
LOG_ERROR(RSX, "You must initialize overlay passes with create() before calling run()");
return;
}
GLint program;
GLint old_fbo;
GLint depth_func;
GLint viewport[4];
GLboolean color_writes[4];
GLboolean depth_write;
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_fbo);
glBindFramebuffer(GL_FRAMEBUFFER, fbo.id());
if (depth_target)
{
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, target_texture, 0);
glDrawBuffer(GL_NONE);
}
else
{
GLenum buffer = GL_COLOR_ATTACHMENT0;
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, target_texture, 0);
glDrawBuffers(1, &buffer);
}
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE)
{
// Push rasterizer state
glGetIntegerv(GL_VIEWPORT, viewport);
glGetBooleanv(GL_COLOR_WRITEMASK, color_writes);
glGetBooleanv(GL_DEPTH_WRITEMASK, &depth_write);
glGetIntegerv(GL_CURRENT_PROGRAM, &program);
glGetIntegerv(GL_DEPTH_FUNC, &depth_func);
GLboolean scissor_enabled = glIsEnabled(GL_SCISSOR_TEST);
GLboolean depth_test_enabled = glIsEnabled(GL_DEPTH_TEST);
GLboolean cull_face_enabled = glIsEnabled(GL_CULL_FACE);
GLboolean blend_enabled = glIsEnabled(GL_BLEND);
GLboolean stencil_test_enabled = glIsEnabled(GL_STENCIL_TEST);
// Set initial state
glViewport(0, 0, w, h);
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glDepthMask(depth_target ? GL_TRUE : GL_FALSE);
// AMD driver bug, disabling depth test doesnt work when doing depth replace (gl_FragDepth writes still go through the depth test)
glDepthFunc(GL_ALWAYS);
glEnable(GL_DEPTH_TEST);
if (scissor_enabled) glDisable(GL_SCISSOR_TEST);
if (cull_face_enabled) glDisable(GL_CULL_FACE);
if (blend_enabled) glDisable(GL_BLEND);
if (stencil_test_enabled) glDisable(GL_STENCIL_TEST);
// Render
program_handle.use();
on_load();
bind_resources();
emit_geometry();
// Clean up
if (depth_target)
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
else
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glBindFramebuffer(GL_FRAMEBUFFER, old_fbo);
glUseProgram((GLuint)program);
glViewport(viewport[0], viewport[1], viewport[2], viewport[3]);
glColorMask(color_writes[0], color_writes[1], color_writes[2], color_writes[3]);
glDepthMask(depth_write);
glDepthFunc(depth_func);
if (!depth_test_enabled) glDisable(GL_DEPTH_TEST);
if (scissor_enabled) glEnable(GL_SCISSOR_TEST);
if (cull_face_enabled) glEnable(GL_CULL_FACE);
if (blend_enabled) glEnable(GL_BLEND);
if (stencil_test_enabled) glEnable(GL_STENCIL_TEST);
}
else
{
LOG_ERROR(RSX, "Overlay pass failed because framebuffer was not complete. Run with debug output enabled to diagnose the problem");
}
}
};
struct depth_convert_pass : public overlay_pass
{
depth_convert_pass()
{
vs_src =
{
"#version 420\n\n"
"out vec2 tc0;\n"
"\n"
"void main()\n"
"{\n"
" vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};\n"
" vec2 coords[] = {vec2(0., 0.), vec2(1., 0.), vec2(0., 1.), vec2(1., 1.)};\n"
" gl_Position = vec4(positions[gl_VertexID % 4], 0., 1.);\n"
" tc0 = coords[gl_VertexID % 4];\n"
"}\n"
};
fs_src =
{
"#version 420\n\n"
"in vec2 tc0;\n"
"layout(binding=31) uniform sampler2D fs0;\n"
"\n"
"void main()\n"
"{\n"
" vec4 rgba_in = texture(fs0, tc0);\n"
" gl_FragDepth = rgba_in.r * 0.99609 + rgba_in.g * 0.00389 + rgba_in.b * 0.00002;\n"
"}\n"
};
}
void run(u16 w, u16 h, GLuint target, GLuint source)
{
glActiveTexture(GL_TEXTURE31);
glBindTexture(GL_TEXTURE_2D, source);
overlay_pass::run(w, h, target, true);
}
};
}

View File

@ -185,7 +185,7 @@ void GLGSRender::init_buffers(bool skip_reading)
const auto depth_format = rsx::method_registers.surface_depth_fmt();
const auto required_color_pitch = rsx::utility::get_packed_pitch(surface_format, clip_horizontal);
const auto required_z_pitch = depth_format == rsx::surface_depth_format::z16 ? clip_horizontal * 2 : clip_horizontal * 4;
const u32 required_z_pitch = depth_format == rsx::surface_depth_format::z16 ? clip_horizontal * 2 : clip_horizontal * 4;
if (depth_address && zeta_pitch < required_z_pitch)
depth_address = 0;
@ -207,8 +207,9 @@ void GLGSRender::init_buffers(bool skip_reading)
if (surface_addresses[index] == depth_address &&
zeta_pitch >= required_z_pitch)
{
LOG_ERROR(RSX, "Some game dev set up the MRT to write to the same address as depth and color attachment. Not sure how to deal with that so the draw is discarded.");
framebuffer_status_valid = false;
//LOG_ERROR(RSX, "Some game dev set up the MRT to write to the same address as depth and color attachment. Not sure how to deal with that so the draw is discarded.");
//framebuffer_status_valid = false;
depth_address = 0;
break;
}
}

View File

@ -184,8 +184,7 @@ struct gl_render_target_traits
__glcheck result->pixel_pack_settings().swap_bytes(format.swap_bytes).aligment(1);
__glcheck result->pixel_unpack_settings().swap_bytes(format.swap_bytes).aligment(1);
if (old_surface != nullptr && old_surface->get_compatible_internal_format() == internal_fmt)
result->old_contents = old_surface;
result->old_contents = old_surface;
result->set_cleared();
result->update_surface();
@ -227,8 +226,7 @@ struct gl_render_target_traits
result->set_native_pitch(native_pitch);
result->set_compatible_format(format.internal_format);
if (old_surface != nullptr && old_surface->get_compatible_internal_format() == format.internal_format)
result->old_contents = old_surface;
result->old_contents = old_surface;
result->update_surface();
return result;

View File

@ -2354,8 +2354,9 @@ void VKGSRender::prepare_rtts()
if (surface_addresses[index] == zeta_address &&
zeta_pitch >= required_z_pitch)
{
LOG_ERROR(RSX, "Some game dev set up the MRT to write to the same address as depth and color attachment. Not sure how to deal with that so the draw is discarded.");
framebuffer_status_valid = false;
//LOG_ERROR(RSX, "Some game dev set up the MRT to write to the same address as depth and color attachment. Not sure how to deal with that so the draw is discarded.");
//framebuffer_status_valid = false;
zeta_address = 0;
break;
}
}

View File

@ -0,0 +1,12 @@
#pragma once
#include "VKHelpers.h"
#include "VKVertexProgram.h"
#include "VKFragmentProgram.h"
namespace vk
{
struct overlay_pass
{
//TODO
};
}

View File

@ -63,6 +63,7 @@
<PropertyGroup Label="UserMacros" />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release - LLVM|x64'">
<ClCompile />
<ClCompile />
</ItemDefinitionGroup>
<ItemGroup>
<ProjectReference Include="emucore.vcxproj">
@ -70,6 +71,7 @@
</ProjectReference>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Emu\RSX\GL\GLOverlays.h" />
<ClInclude Include="Emu\RSX\GL\GLTextOut.h" />
<ClInclude Include="Emu\RSX\GL\GLCommonDecompiler.h" />
<ClInclude Include="Emu\RSX\GL\GLFragmentProgram.h" />

View File

@ -24,5 +24,6 @@
<ClInclude Include="Emu\RSX\GL\GLTextureCache.h" />
<ClInclude Include="Emu\RSX\GL\GLRenderTargets.h" />
<ClInclude Include="Emu\RSX\GL\GLTextOut.h" />
<ClInclude Include="Emu\RSX\GL\GLOverlays.h" />
</ItemGroup>
</Project>

View File

@ -28,6 +28,7 @@
<ClInclude Include="Emu\RSX\VK\VKFragmentProgram.h" />
<ClInclude Include="Emu\RSX\VK\VKGSRender.h" />
<ClInclude Include="Emu\RSX\VK\VKHelpers.h" />
<ClInclude Include="Emu\RSX\VK\VKOverlays.h" />
<ClInclude Include="Emu\RSX\VK\VKProgramBuffer.h" />
<ClInclude Include="Emu\RSX\VK\VKRenderTargets.h" />
<ClInclude Include="Emu\RSX\VK\VKTextOut.h" />
@ -101,4 +102,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View File

@ -40,6 +40,9 @@
<ClInclude Include="Emu\RSX\VK\VKTextOut.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\VKOverlays.h">
<Filter>Source Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Emu\RSX\VK\VKGSRender.cpp">