mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-11 06:40:39 +00:00
gl/vk: Refactor draw call handling and stub shader interpreter
- Refactors backend draw call management to make it easier to extend functionality. - Stubs shader interpreter functionality.
This commit is contained in:
parent
a0509328d4
commit
4965bf7d7a
@ -409,11 +409,13 @@ target_sources(rpcs3_emu PRIVATE
|
||||
RSX/Capture/rsx_capture.cpp
|
||||
RSX/Capture/rsx_replay.cpp
|
||||
RSX/GL/GLCommonDecompiler.cpp
|
||||
RSX/GL/GLDraw.cpp
|
||||
RSX/GL/GLFragmentProgram.cpp
|
||||
RSX/GL/GLGSRender.cpp
|
||||
RSX/GL/GLHelpers.cpp
|
||||
RSX/GL/GLPresent.cpp
|
||||
RSX/GL/GLRenderTargets.cpp
|
||||
RSX/GL/GLShaderInterpreter.cpp
|
||||
RSX/GL/GLTexture.cpp
|
||||
RSX/GL/GLVertexBuffers.cpp
|
||||
RSX/GL/GLVertexProgram.cpp
|
||||
@ -425,6 +427,7 @@ if(TARGET 3rdparty_vulkan)
|
||||
RSX/VK/VKCommandStream.cpp
|
||||
RSX/VK/VKCommonDecompiler.cpp
|
||||
RSX/VK/VKDMA.cpp
|
||||
RSX/VK/VKDraw.cpp
|
||||
RSX/VK/VKFormats.cpp
|
||||
RSX/VK/VKFragmentProgram.cpp
|
||||
RSX/VK/VKFramebuffer.cpp
|
||||
@ -436,6 +439,7 @@ if(TARGET 3rdparty_vulkan)
|
||||
RSX/VK/VKRenderPass.cpp
|
||||
RSX/VK/VKResolveHelper.cpp
|
||||
RSX/VK/VKResourceManager.cpp
|
||||
RSX/VK/VKShaderInterpreter.cpp
|
||||
RSX/VK/VKTexture.cpp
|
||||
RSX/VK/VKVertexBuffers.cpp
|
||||
RSX/VK/VKVertexProgram.cpp
|
||||
|
746
rpcs3/Emu/RSX/GL/GLDraw.cpp
Normal file
746
rpcs3/Emu/RSX/GL/GLDraw.cpp
Normal file
@ -0,0 +1,746 @@
|
||||
#include "stdafx.h"
|
||||
#include "GLGSRender.h"
|
||||
#include "../Common/BufferUtils.h"
|
||||
|
||||
namespace gl
|
||||
{
|
||||
GLenum comparison_op(rsx::comparison_function op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case rsx::comparison_function::never: return GL_NEVER;
|
||||
case rsx::comparison_function::less: return GL_LESS;
|
||||
case rsx::comparison_function::equal: return GL_EQUAL;
|
||||
case rsx::comparison_function::less_or_equal: return GL_LEQUAL;
|
||||
case rsx::comparison_function::greater: return GL_GREATER;
|
||||
case rsx::comparison_function::not_equal: return GL_NOTEQUAL;
|
||||
case rsx::comparison_function::greater_or_equal: return GL_GEQUAL;
|
||||
case rsx::comparison_function::always: return GL_ALWAYS;
|
||||
}
|
||||
fmt::throw_exception("Unsupported comparison op 0x%X" HERE, static_cast<u32>(op));
|
||||
}
|
||||
|
||||
GLenum stencil_op(rsx::stencil_op op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case rsx::stencil_op::invert: return GL_INVERT;
|
||||
case rsx::stencil_op::keep: return GL_KEEP;
|
||||
case rsx::stencil_op::zero: return GL_ZERO;
|
||||
case rsx::stencil_op::replace: return GL_REPLACE;
|
||||
case rsx::stencil_op::incr: return GL_INCR;
|
||||
case rsx::stencil_op::decr: return GL_DECR;
|
||||
case rsx::stencil_op::incr_wrap: return GL_INCR_WRAP;
|
||||
case rsx::stencil_op::decr_wrap: return GL_DECR_WRAP;
|
||||
}
|
||||
fmt::throw_exception("Unsupported stencil op 0x%X" HERE, static_cast<u32>(op));
|
||||
}
|
||||
|
||||
GLenum blend_equation(rsx::blend_equation op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
// Note : maybe add is signed on gl
|
||||
case rsx::blend_equation::add_signed:
|
||||
rsx_log.trace("blend equation add_signed used. Emulating using FUNC_ADD");
|
||||
case rsx::blend_equation::add: return GL_FUNC_ADD;
|
||||
case rsx::blend_equation::min: return GL_MIN;
|
||||
case rsx::blend_equation::max: return GL_MAX;
|
||||
case rsx::blend_equation::substract: return GL_FUNC_SUBTRACT;
|
||||
case rsx::blend_equation::reverse_substract_signed:
|
||||
rsx_log.trace("blend equation reverse_subtract_signed used. Emulating using FUNC_REVERSE_SUBTRACT");
|
||||
case rsx::blend_equation::reverse_substract: return GL_FUNC_REVERSE_SUBTRACT;
|
||||
case rsx::blend_equation::reverse_add_signed:
|
||||
default:
|
||||
rsx_log.error("Blend equation 0x%X is unimplemented!", static_cast<u32>(op));
|
||||
return GL_FUNC_ADD;
|
||||
}
|
||||
}
|
||||
|
||||
GLenum blend_factor(rsx::blend_factor op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case rsx::blend_factor::zero: return GL_ZERO;
|
||||
case rsx::blend_factor::one: return GL_ONE;
|
||||
case rsx::blend_factor::src_color: return GL_SRC_COLOR;
|
||||
case rsx::blend_factor::one_minus_src_color: return GL_ONE_MINUS_SRC_COLOR;
|
||||
case rsx::blend_factor::dst_color: return GL_DST_COLOR;
|
||||
case rsx::blend_factor::one_minus_dst_color: return GL_ONE_MINUS_DST_COLOR;
|
||||
case rsx::blend_factor::src_alpha: return GL_SRC_ALPHA;
|
||||
case rsx::blend_factor::one_minus_src_alpha: return GL_ONE_MINUS_SRC_ALPHA;
|
||||
case rsx::blend_factor::dst_alpha: return GL_DST_ALPHA;
|
||||
case rsx::blend_factor::one_minus_dst_alpha: return GL_ONE_MINUS_DST_ALPHA;
|
||||
case rsx::blend_factor::src_alpha_saturate: return GL_SRC_ALPHA_SATURATE;
|
||||
case rsx::blend_factor::constant_color: return GL_CONSTANT_COLOR;
|
||||
case rsx::blend_factor::one_minus_constant_color: return GL_ONE_MINUS_CONSTANT_COLOR;
|
||||
case rsx::blend_factor::constant_alpha: return GL_CONSTANT_ALPHA;
|
||||
case rsx::blend_factor::one_minus_constant_alpha: return GL_ONE_MINUS_CONSTANT_ALPHA;
|
||||
}
|
||||
fmt::throw_exception("Unsupported blend factor 0x%X" HERE, static_cast<u32>(op));
|
||||
}
|
||||
|
||||
GLenum logic_op(rsx::logic_op op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case rsx::logic_op::logic_clear: return GL_CLEAR;
|
||||
case rsx::logic_op::logic_and: return GL_AND;
|
||||
case rsx::logic_op::logic_and_reverse: return GL_AND_REVERSE;
|
||||
case rsx::logic_op::logic_copy: return GL_COPY;
|
||||
case rsx::logic_op::logic_and_inverted: return GL_AND_INVERTED;
|
||||
case rsx::logic_op::logic_noop: return GL_NOOP;
|
||||
case rsx::logic_op::logic_xor: return GL_XOR;
|
||||
case rsx::logic_op::logic_or: return GL_OR;
|
||||
case rsx::logic_op::logic_nor: return GL_NOR;
|
||||
case rsx::logic_op::logic_equiv: return GL_EQUIV;
|
||||
case rsx::logic_op::logic_invert: return GL_INVERT;
|
||||
case rsx::logic_op::logic_or_reverse: return GL_OR_REVERSE;
|
||||
case rsx::logic_op::logic_copy_inverted: return GL_COPY_INVERTED;
|
||||
case rsx::logic_op::logic_or_inverted: return GL_OR_INVERTED;
|
||||
case rsx::logic_op::logic_nand: return GL_NAND;
|
||||
case rsx::logic_op::logic_set: return GL_SET;
|
||||
}
|
||||
fmt::throw_exception("Unsupported logic op 0x%X" HERE, static_cast<u32>(op));
|
||||
}
|
||||
|
||||
GLenum front_face(rsx::front_face op)
|
||||
{
|
||||
//NOTE: RSX face winding is always based off of upper-left corner like vulkan, but GL is bottom left
|
||||
//shader_window_origin register does not affect this
|
||||
//verified with Outrun Online Arcade (window_origin::top) and DS2 (window_origin::bottom)
|
||||
//correctness of face winding checked using stencil test (GOW collection shadows)
|
||||
switch (op)
|
||||
{
|
||||
case rsx::front_face::cw: return GL_CCW;
|
||||
case rsx::front_face::ccw: return GL_CW;
|
||||
}
|
||||
fmt::throw_exception("Unsupported front face 0x%X" HERE, static_cast<u32>(op));
|
||||
}
|
||||
|
||||
GLenum cull_face(rsx::cull_face op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case rsx::cull_face::front: return GL_FRONT;
|
||||
case rsx::cull_face::back: return GL_BACK;
|
||||
case rsx::cull_face::front_and_back: return GL_FRONT_AND_BACK;
|
||||
}
|
||||
fmt::throw_exception("Unsupported cull face 0x%X" HERE, static_cast<u32>(op));
|
||||
}
|
||||
}
|
||||
|
||||
void GLGSRender::update_draw_state()
|
||||
{
|
||||
m_profiler.start();
|
||||
|
||||
for (int index = 0; index < m_rtts.get_color_surface_count(); ++index)
|
||||
{
|
||||
bool color_mask_b = rsx::method_registers.color_mask_b(index);
|
||||
bool color_mask_g = rsx::method_registers.color_mask_g(index);
|
||||
bool color_mask_r = rsx::method_registers.color_mask_r(index);
|
||||
bool color_mask_a = rsx::method_registers.color_mask_a(index);
|
||||
|
||||
if (rsx::method_registers.surface_color() == rsx::surface_color_format::g8b8)
|
||||
{
|
||||
//Map GB components onto RG
|
||||
rsx::get_g8b8_r8g8_colormask(color_mask_r, color_mask_g, color_mask_b, color_mask_a);
|
||||
}
|
||||
|
||||
gl_state.color_maski(index, color_mask_r, color_mask_g, color_mask_b, color_mask_a);
|
||||
}
|
||||
|
||||
gl_state.depth_mask(rsx::method_registers.depth_write_enabled());
|
||||
gl_state.stencil_mask(rsx::method_registers.stencil_mask());
|
||||
|
||||
gl_state.enable(rsx::method_registers.depth_clamp_enabled() || !rsx::method_registers.depth_clip_enabled(), GL_DEPTH_CLAMP);
|
||||
|
||||
if (gl_state.enable(rsx::method_registers.depth_test_enabled(), GL_DEPTH_TEST))
|
||||
{
|
||||
gl_state.depth_func(gl::comparison_op(rsx::method_registers.depth_func()));
|
||||
}
|
||||
|
||||
if (glDepthBoundsEXT && (gl_state.enable(rsx::method_registers.depth_bounds_test_enabled(), GL_DEPTH_BOUNDS_TEST_EXT)))
|
||||
{
|
||||
gl_state.depth_bounds(rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max());
|
||||
}
|
||||
|
||||
gl_state.enable(rsx::method_registers.dither_enabled(), GL_DITHER);
|
||||
|
||||
if (gl_state.enable(rsx::method_registers.stencil_test_enabled(), GL_STENCIL_TEST))
|
||||
{
|
||||
glStencilFunc(gl::comparison_op(rsx::method_registers.stencil_func()),
|
||||
rsx::method_registers.stencil_func_ref(),
|
||||
rsx::method_registers.stencil_func_mask());
|
||||
|
||||
glStencilOp(gl::stencil_op(rsx::method_registers.stencil_op_fail()), gl::stencil_op(rsx::method_registers.stencil_op_zfail()),
|
||||
gl::stencil_op(rsx::method_registers.stencil_op_zpass()));
|
||||
|
||||
if (rsx::method_registers.two_sided_stencil_test_enabled())
|
||||
{
|
||||
glStencilMaskSeparate(GL_BACK, rsx::method_registers.back_stencil_mask());
|
||||
|
||||
glStencilFuncSeparate(GL_BACK, gl::comparison_op(rsx::method_registers.back_stencil_func()),
|
||||
rsx::method_registers.back_stencil_func_ref(), rsx::method_registers.back_stencil_func_mask());
|
||||
|
||||
glStencilOpSeparate(GL_BACK, gl::stencil_op(rsx::method_registers.back_stencil_op_fail()),
|
||||
gl::stencil_op(rsx::method_registers.back_stencil_op_zfail()), gl::stencil_op(rsx::method_registers.back_stencil_op_zpass()));
|
||||
}
|
||||
}
|
||||
|
||||
bool mrt_blend_enabled[] =
|
||||
{
|
||||
rsx::method_registers.blend_enabled(),
|
||||
rsx::method_registers.blend_enabled_surface_1(),
|
||||
rsx::method_registers.blend_enabled_surface_2(),
|
||||
rsx::method_registers.blend_enabled_surface_3()
|
||||
};
|
||||
|
||||
if (mrt_blend_enabled[0] || mrt_blend_enabled[1] || mrt_blend_enabled[2] || mrt_blend_enabled[3])
|
||||
{
|
||||
glBlendFuncSeparate(gl::blend_factor(rsx::method_registers.blend_func_sfactor_rgb()),
|
||||
gl::blend_factor(rsx::method_registers.blend_func_dfactor_rgb()),
|
||||
gl::blend_factor(rsx::method_registers.blend_func_sfactor_a()),
|
||||
gl::blend_factor(rsx::method_registers.blend_func_dfactor_a()));
|
||||
|
||||
auto blend_colors = rsx::get_constant_blend_colors();
|
||||
glBlendColor(blend_colors[0], blend_colors[1], blend_colors[2], blend_colors[3]);
|
||||
|
||||
glBlendEquationSeparate(gl::blend_equation(rsx::method_registers.blend_equation_rgb()),
|
||||
gl::blend_equation(rsx::method_registers.blend_equation_a()));
|
||||
}
|
||||
|
||||
gl_state.enablei(mrt_blend_enabled[0], GL_BLEND, 0);
|
||||
gl_state.enablei(mrt_blend_enabled[1], GL_BLEND, 1);
|
||||
gl_state.enablei(mrt_blend_enabled[2], GL_BLEND, 2);
|
||||
gl_state.enablei(mrt_blend_enabled[3], GL_BLEND, 3);
|
||||
|
||||
if (gl_state.enable(rsx::method_registers.logic_op_enabled(), GL_COLOR_LOGIC_OP))
|
||||
{
|
||||
gl_state.logic_op(gl::logic_op(rsx::method_registers.logic_operation()));
|
||||
}
|
||||
|
||||
gl_state.line_width(rsx::method_registers.line_width());
|
||||
gl_state.enable(rsx::method_registers.line_smooth_enabled(), GL_LINE_SMOOTH);
|
||||
|
||||
gl_state.enable(rsx::method_registers.poly_offset_point_enabled(), GL_POLYGON_OFFSET_POINT);
|
||||
gl_state.enable(rsx::method_registers.poly_offset_line_enabled(), GL_POLYGON_OFFSET_LINE);
|
||||
gl_state.enable(rsx::method_registers.poly_offset_fill_enabled(), GL_POLYGON_OFFSET_FILL);
|
||||
|
||||
//offset_bias is the constant factor, multiplied by the implementation factor R
|
||||
//offset_scale is the slope factor, multiplied by the triangle slope factor M
|
||||
gl_state.polygon_offset(rsx::method_registers.poly_offset_scale(), rsx::method_registers.poly_offset_bias());
|
||||
|
||||
if (gl_state.enable(rsx::method_registers.cull_face_enabled(), GL_CULL_FACE))
|
||||
{
|
||||
gl_state.cull_face(gl::cull_face(rsx::method_registers.cull_face_mode()));
|
||||
}
|
||||
|
||||
gl_state.front_face(gl::front_face(rsx::method_registers.front_face_mode()));
|
||||
|
||||
// Sample control
|
||||
// TODO: MinSampleShading
|
||||
//gl_state.enable(rsx::method_registers.msaa_enabled(), GL_MULTISAMPLE);
|
||||
//gl_state.enable(rsx::method_registers.msaa_alpha_to_coverage_enabled(), GL_SAMPLE_ALPHA_TO_COVERAGE);
|
||||
//gl_state.enable(rsx::method_registers.msaa_alpha_to_one_enabled(), GL_SAMPLE_ALPHA_TO_ONE);
|
||||
|
||||
//TODO
|
||||
//NV4097_SET_ANISO_SPREAD
|
||||
//NV4097_SET_SPECULAR_ENABLE
|
||||
//NV4097_SET_TWO_SIDE_LIGHT_EN
|
||||
//NV4097_SET_FLAT_SHADE_OP
|
||||
//NV4097_SET_EDGE_FLAG
|
||||
|
||||
|
||||
|
||||
//NV4097_SET_COLOR_KEY_COLOR
|
||||
//NV4097_SET_SHADER_CONTROL
|
||||
//NV4097_SET_ZMIN_MAX_CONTROL
|
||||
//NV4097_SET_ANTI_ALIASING_CONTROL
|
||||
//NV4097_SET_CLIP_ID_TEST_ENABLE
|
||||
|
||||
m_frame_stats.setup_time += m_profiler.duration();
|
||||
}
|
||||
|
||||
void GLGSRender::load_texture_env()
|
||||
{
|
||||
// Load textures
|
||||
gl::command_context cmd{ gl_state };
|
||||
bool update_framebuffer_sourced = false;
|
||||
|
||||
std::lock_guard lock(m_sampler_mutex);
|
||||
|
||||
if (surface_store_tag != m_rtts.cache_tag)
|
||||
{
|
||||
update_framebuffer_sourced = true;
|
||||
surface_store_tag = m_rtts.cache_tag;
|
||||
}
|
||||
|
||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||
{
|
||||
if (!fs_sampler_state[i])
|
||||
fs_sampler_state[i] = std::make_unique<gl::texture_cache::sampled_image_descriptor>();
|
||||
|
||||
if (m_samplers_dirty || m_textures_dirty[i] ||
|
||||
(update_framebuffer_sourced && fs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage))
|
||||
{
|
||||
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
|
||||
|
||||
if (rsx::method_registers.fragment_textures[i].enabled())
|
||||
{
|
||||
*sampler_state = m_gl_texture_cache.upload_texture(cmd, rsx::method_registers.fragment_textures[i], m_rtts);
|
||||
|
||||
if (m_textures_dirty[i])
|
||||
m_fs_sampler_states[i].apply(rsx::method_registers.fragment_textures[i], fs_sampler_state[i].get());
|
||||
}
|
||||
else
|
||||
{
|
||||
*sampler_state = {};
|
||||
}
|
||||
|
||||
m_textures_dirty[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
|
||||
{
|
||||
if (!vs_sampler_state[i])
|
||||
vs_sampler_state[i] = std::make_unique<gl::texture_cache::sampled_image_descriptor>();
|
||||
|
||||
if (m_samplers_dirty || m_vertex_textures_dirty[i] ||
|
||||
(update_framebuffer_sourced && vs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage))
|
||||
{
|
||||
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
|
||||
|
||||
if (rsx::method_registers.vertex_textures[i].enabled())
|
||||
{
|
||||
*sampler_state = m_gl_texture_cache.upload_texture(cmd, rsx::method_registers.vertex_textures[i], m_rtts);
|
||||
|
||||
if (m_vertex_textures_dirty[i])
|
||||
m_vs_sampler_states[i].apply(rsx::method_registers.vertex_textures[i], vs_sampler_state[i].get());
|
||||
}
|
||||
else
|
||||
*sampler_state = {};
|
||||
|
||||
m_vertex_textures_dirty[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
m_samplers_dirty.store(false);
|
||||
}
|
||||
|
||||
void GLGSRender::bind_texture_env()
|
||||
{
|
||||
// Bind textures and resolve external copy operations
|
||||
gl::command_context cmd{ gl_state };
|
||||
|
||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||
{
|
||||
if (current_fp_metadata.referenced_textures_mask & (1 << i))
|
||||
{
|
||||
_SelectTexture(GL_FRAGMENT_TEXTURES_START + i);
|
||||
|
||||
gl::texture_view* view = nullptr;
|
||||
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
|
||||
|
||||
if (rsx::method_registers.fragment_textures[i].enabled() &&
|
||||
sampler_state->validate())
|
||||
{
|
||||
if (view = sampler_state->image_handle; !view) [[unlikely]]
|
||||
{
|
||||
view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc);
|
||||
}
|
||||
}
|
||||
|
||||
if (view) [[likely]]
|
||||
{
|
||||
view->bind();
|
||||
|
||||
if (current_fragment_program.redirected_textures & (1 << i))
|
||||
{
|
||||
_SelectTexture(GL_STENCIL_MIRRORS_START + i);
|
||||
|
||||
auto root_texture = static_cast<gl::viewable_image*>(view->image());
|
||||
auto stencil_view = root_texture->get_view(0xAAE4, rsx::default_remap_vector, gl::image_aspect::stencil);
|
||||
stencil_view->bind();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto target = gl::get_target(current_fragment_program.get_texture_dimension(i));
|
||||
glBindTexture(target, m_null_textures[target]->id());
|
||||
|
||||
if (current_fragment_program.redirected_textures & (1 << i))
|
||||
{
|
||||
_SelectTexture(GL_STENCIL_MIRRORS_START + i);
|
||||
glBindTexture(target, m_null_textures[target]->id());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
|
||||
{
|
||||
if (current_vp_metadata.referenced_textures_mask & (1 << i))
|
||||
{
|
||||
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
|
||||
_SelectTexture(GL_VERTEX_TEXTURES_START + i);
|
||||
|
||||
if (rsx::method_registers.vertex_textures[i].enabled() &&
|
||||
sampler_state->validate())
|
||||
{
|
||||
if (sampler_state->image_handle) [[likely]]
|
||||
{
|
||||
sampler_state->image_handle->bind();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc)->bind();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
glBindTexture(GL_TEXTURE_2D, GL_NONE);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GLGSRender::emit_geometry(u32 sub_index)
|
||||
{
|
||||
const auto do_heap_cleanup = [this]()
|
||||
{
|
||||
if (manually_flush_ring_buffers)
|
||||
{
|
||||
m_attrib_ring_buffer->unmap();
|
||||
m_index_ring_buffer->unmap();
|
||||
}
|
||||
else
|
||||
{
|
||||
//DMA push; not needed with MAP_COHERENT
|
||||
//glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
|
||||
}
|
||||
};
|
||||
|
||||
if (!sub_index)
|
||||
{
|
||||
analyse_inputs_interleaved(m_vertex_layout);
|
||||
if (!m_vertex_layout.validate())
|
||||
{
|
||||
// Execute remainining pipeline barriers with NOP draw
|
||||
do
|
||||
{
|
||||
rsx::method_registers.current_draw_clause.execute_pipeline_dependencies();
|
||||
}
|
||||
while (rsx::method_registers.current_draw_clause.next());
|
||||
|
||||
rsx::method_registers.current_draw_clause.end();
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rsx::method_registers.current_draw_clause.execute_pipeline_dependencies() & rsx::vertex_base_changed)
|
||||
{
|
||||
// Rebase vertex bases instead of
|
||||
for (auto &info : m_vertex_layout.interleaved_blocks)
|
||||
{
|
||||
const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset();
|
||||
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location, HERE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (manually_flush_ring_buffers)
|
||||
{
|
||||
//Use approximations to reserve space. This path is mostly for debug purposes anyway
|
||||
u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
||||
u32 approx_working_buffer_size = approx_vertex_count * 256;
|
||||
|
||||
//Allocate 256K heap if we have no approximation at this time (inlined array)
|
||||
m_attrib_ring_buffer->reserve_storage_on_heap(std::max(approx_working_buffer_size, 256 * 1024U));
|
||||
m_index_ring_buffer->reserve_storage_on_heap(16 * 1024);
|
||||
}
|
||||
|
||||
// Do vertex upload before RTT prep / texture lookups to give the driver time to push data
|
||||
auto upload_info = set_vertex_buffer();
|
||||
do_heap_cleanup();
|
||||
|
||||
if (upload_info.vertex_draw_count == 0)
|
||||
{
|
||||
// Malformed vertex setup; abort
|
||||
return;
|
||||
}
|
||||
|
||||
const GLenum draw_mode = gl::draw_mode(rsx::method_registers.current_draw_clause.primitive);
|
||||
update_vertex_env(upload_info);
|
||||
|
||||
if (!upload_info.index_info)
|
||||
{
|
||||
if (rsx::method_registers.current_draw_clause.is_single_draw())
|
||||
{
|
||||
glDrawArrays(draw_mode, 0, upload_info.vertex_draw_count);
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto subranges = rsx::method_registers.current_draw_clause.get_subranges();
|
||||
const auto draw_count = subranges.size();
|
||||
const auto driver_caps = gl::get_driver_caps();
|
||||
bool use_draw_arrays_fallback = false;
|
||||
|
||||
m_scratch_buffer.resize(draw_count * 24);
|
||||
GLint* firsts = reinterpret_cast<GLint*>(m_scratch_buffer.data());
|
||||
GLsizei* counts = (firsts + draw_count);
|
||||
const GLvoid** offsets = reinterpret_cast<const GLvoid**>(counts + draw_count);
|
||||
|
||||
u32 first = 0;
|
||||
u32 dst_index = 0;
|
||||
for (const auto &range : subranges)
|
||||
{
|
||||
firsts[dst_index] = first;
|
||||
counts[dst_index] = range.count;
|
||||
offsets[dst_index++] = reinterpret_cast<const GLvoid*>(u64{first << 2});
|
||||
|
||||
if (driver_caps.vendor_AMD && (first + range.count) > (0x100000 >> 2))
|
||||
{
|
||||
//Unlikely, but added here in case the identity buffer is not large enough somehow
|
||||
use_draw_arrays_fallback = true;
|
||||
break;
|
||||
}
|
||||
|
||||
first += range.count;
|
||||
}
|
||||
|
||||
if (use_draw_arrays_fallback)
|
||||
{
|
||||
//MultiDrawArrays is broken on some primitive types using AMD. One known type is GL_TRIANGLE_STRIP but there could be more
|
||||
for (u32 n = 0; n < draw_count; ++n)
|
||||
{
|
||||
glDrawArrays(draw_mode, firsts[n], counts[n]);
|
||||
}
|
||||
}
|
||||
else if (driver_caps.vendor_AMD)
|
||||
{
|
||||
//Use identity index buffer to fix broken vertexID on AMD
|
||||
m_identity_index_buffer->bind();
|
||||
glMultiDrawElements(draw_mode, counts, GL_UNSIGNED_INT, offsets, static_cast<GLsizei>(draw_count));
|
||||
}
|
||||
else
|
||||
{
|
||||
//Normal render
|
||||
glMultiDrawArrays(draw_mode, firsts, counts, static_cast<GLsizei>(draw_count));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const GLenum index_type = std::get<0>(*upload_info.index_info);
|
||||
const u32 index_offset = std::get<1>(*upload_info.index_info);
|
||||
const bool restarts_valid = gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive) && !rsx::method_registers.current_draw_clause.is_disjoint_primitive;
|
||||
|
||||
if (gl_state.enable(restarts_valid && rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
|
||||
{
|
||||
glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT) ? 0xffff : 0xffffffff);
|
||||
}
|
||||
|
||||
m_index_ring_buffer->bind();
|
||||
|
||||
if (rsx::method_registers.current_draw_clause.is_single_draw())
|
||||
{
|
||||
glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, reinterpret_cast<GLvoid*>(u64{index_offset}));
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto subranges = rsx::method_registers.current_draw_clause.get_subranges();
|
||||
const auto draw_count = subranges.size();
|
||||
const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2;
|
||||
uintptr_t index_ptr = index_offset;
|
||||
m_scratch_buffer.resize(draw_count * 16);
|
||||
|
||||
GLsizei *counts = reinterpret_cast<GLsizei*>(m_scratch_buffer.data());
|
||||
const GLvoid** offsets = reinterpret_cast<const GLvoid**>(counts + draw_count);
|
||||
int dst_index = 0;
|
||||
|
||||
for (const auto &range : subranges)
|
||||
{
|
||||
const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.count);
|
||||
counts[dst_index] = index_size;
|
||||
offsets[dst_index++] = reinterpret_cast<const GLvoid*>(index_ptr);
|
||||
|
||||
index_ptr += (index_size << type_scale);
|
||||
}
|
||||
|
||||
glMultiDrawElements(draw_mode, counts, index_type, offsets, static_cast<GLsizei>(draw_count));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GLGSRender::begin()
|
||||
{
|
||||
rsx::thread::begin();
|
||||
|
||||
if (skip_current_frame || cond_render_ctrl.disable_rendering())
|
||||
return;
|
||||
|
||||
init_buffers(rsx::framebuffer_creation_context::context_draw);
|
||||
}
|
||||
|
||||
void GLGSRender::end()
|
||||
{
|
||||
m_profiler.start();
|
||||
|
||||
if (skip_current_frame || !framebuffer_status_valid || cond_render_ctrl.disable_rendering())
|
||||
{
|
||||
execute_nop_draw();
|
||||
rsx::thread::end();
|
||||
return;
|
||||
}
|
||||
|
||||
m_frame_stats.setup_time += m_profiler.duration();
|
||||
|
||||
gl::command_context cmd{ gl_state };
|
||||
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
|
||||
|
||||
// Handle special memory barrier for ARGB8->D24S8 in an active DSV
|
||||
if (ds && ds->old_contents.size() == 1 &&
|
||||
ds->old_contents[0].source->get_internal_format() == gl::texture::internal_format::rgba8)
|
||||
{
|
||||
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
|
||||
|
||||
// TODO: Stencil transfer
|
||||
gl::g_hw_blitter->fast_clear_image(cmd, ds, 1.f, 0xFF);
|
||||
ds->old_contents[0].init_transfer(ds);
|
||||
|
||||
m_depth_converter.run(ds->old_contents[0].src_rect(),
|
||||
ds->old_contents[0].dst_rect(),
|
||||
ds->old_contents[0].source, ds);
|
||||
|
||||
ds->on_write();
|
||||
}
|
||||
|
||||
// Active texture environment is used to decode shaders
|
||||
m_profiler.start();
|
||||
load_texture_env();
|
||||
m_frame_stats.textures_upload_time += m_profiler.duration();
|
||||
|
||||
// NOTE: Due to common OpenGL driver architecture, vertex data has to be uploaded as far away from the draw as possible
|
||||
// TODO: Implement shaders cache prediction to avoid uploading vertex data if draw is going to skip
|
||||
if (!load_program())
|
||||
{
|
||||
// Program is not ready, skip drawing this
|
||||
std::this_thread::yield();
|
||||
execute_nop_draw();
|
||||
// m_rtts.on_write(); - breaks games for obvious reasons
|
||||
rsx::thread::end();
|
||||
return;
|
||||
}
|
||||
|
||||
// Load program execution environment
|
||||
load_program_env();
|
||||
m_frame_stats.setup_time += m_profiler.duration();
|
||||
|
||||
bind_texture_env();
|
||||
m_gl_texture_cache.release_uncached_temporary_subresources();
|
||||
m_frame_stats.textures_upload_time += m_profiler.duration();
|
||||
|
||||
// Optionally do memory synchronization if the texture stage has not yet triggered this
|
||||
if (true)//g_cfg.video.strict_rendering_mode)
|
||||
{
|
||||
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
|
||||
|
||||
if (ds) ds->write_barrier(cmd);
|
||||
|
||||
for (auto &rtt : m_rtts.m_bound_render_targets)
|
||||
{
|
||||
if (auto surface = std::get<1>(rtt))
|
||||
{
|
||||
surface->write_barrier(cmd);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
rsx::simple_array<int> buffers_to_clear;
|
||||
bool clear_all_color = true;
|
||||
bool clear_depth = false;
|
||||
|
||||
for (int index = 0; index < 4; index++)
|
||||
{
|
||||
if (m_rtts.m_bound_render_targets[index].first)
|
||||
{
|
||||
if (!m_rtts.m_bound_render_targets[index].second->dirty())
|
||||
clear_all_color = false;
|
||||
else
|
||||
buffers_to_clear.push_back(index);
|
||||
}
|
||||
}
|
||||
|
||||
if (ds && ds->dirty())
|
||||
{
|
||||
clear_depth = true;
|
||||
}
|
||||
|
||||
if (clear_depth || !buffers_to_clear.empty())
|
||||
{
|
||||
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
|
||||
GLenum mask = 0;
|
||||
|
||||
if (clear_depth)
|
||||
{
|
||||
gl_state.depth_mask(GL_TRUE);
|
||||
gl_state.clear_depth(1.f);
|
||||
gl_state.clear_stencil(255);
|
||||
mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
|
||||
}
|
||||
|
||||
if (clear_all_color)
|
||||
mask |= GL_COLOR_BUFFER_BIT;
|
||||
|
||||
glClear(mask);
|
||||
|
||||
if (!buffers_to_clear.empty() && !clear_all_color)
|
||||
{
|
||||
GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f };
|
||||
//It is impossible for the render target to be type A or B here (clear all would have been flagged)
|
||||
for (auto &i : buffers_to_clear)
|
||||
glClearBufferfv(GL_COLOR, i, colors);
|
||||
}
|
||||
|
||||
if (clear_depth)
|
||||
gl_state.depth_mask(rsx::method_registers.depth_write_enabled());
|
||||
}
|
||||
}
|
||||
|
||||
// Unconditionally enable stencil test if it was disabled before
|
||||
gl_state.enable(GL_TRUE, GL_SCISSOR_TEST);
|
||||
|
||||
update_draw_state();
|
||||
|
||||
if (g_cfg.video.debug_output)
|
||||
{
|
||||
m_program->validate();
|
||||
}
|
||||
|
||||
rsx::method_registers.current_draw_clause.begin();
|
||||
u32 subdraw = 0u;
|
||||
do
|
||||
{
|
||||
emit_geometry(subdraw++);
|
||||
}
|
||||
while (rsx::method_registers.current_draw_clause.next());
|
||||
|
||||
m_rtts.on_write(m_framebuffer_layout.color_write_enabled.data(), m_framebuffer_layout.zeta_write_enabled);
|
||||
|
||||
m_attrib_ring_buffer->notify();
|
||||
m_index_ring_buffer->notify();
|
||||
m_fragment_env_buffer->notify();
|
||||
m_vertex_env_buffer->notify();
|
||||
m_texture_parameters_buffer->notify();
|
||||
m_vertex_layout_buffer->notify();
|
||||
m_fragment_constants_buffer->notify();
|
||||
m_transform_constants_buffer->notify();
|
||||
|
||||
m_frame_stats.textures_upload_time += m_profiler.duration();
|
||||
|
||||
rsx::thread::end();
|
||||
}
|
@ -1,12 +1,9 @@
|
||||
#include "stdafx.h"
|
||||
#include "../Overlays/overlay_shader_compile_notification.h"
|
||||
#include "../Overlays/Shaders/shader_loading_dialog_native.h"
|
||||
#include "GLGSRender.h"
|
||||
#include "GLCompute.h"
|
||||
#include "GLVertexProgram.h"
|
||||
#include "../Overlays/overlay_shader_compile_notification.h"
|
||||
#include "../Overlays/Shaders/shader_loading_dialog_native.h"
|
||||
#include "../rsx_methods.h"
|
||||
#include "../Common/BufferUtils.h"
|
||||
#include "../rsx_utils.h"
|
||||
|
||||
#define DUMP_VERTEX_DATA 0
|
||||
|
||||
@ -44,605 +41,6 @@ GLGSRender::GLGSRender() : GSRender()
|
||||
|
||||
extern CellGcmContextData current_context;
|
||||
|
||||
namespace
|
||||
{
|
||||
GLenum comparison_op(rsx::comparison_function op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case rsx::comparison_function::never: return GL_NEVER;
|
||||
case rsx::comparison_function::less: return GL_LESS;
|
||||
case rsx::comparison_function::equal: return GL_EQUAL;
|
||||
case rsx::comparison_function::less_or_equal: return GL_LEQUAL;
|
||||
case rsx::comparison_function::greater: return GL_GREATER;
|
||||
case rsx::comparison_function::not_equal: return GL_NOTEQUAL;
|
||||
case rsx::comparison_function::greater_or_equal: return GL_GEQUAL;
|
||||
case rsx::comparison_function::always: return GL_ALWAYS;
|
||||
}
|
||||
fmt::throw_exception("Unsupported comparison op 0x%X" HERE, static_cast<u32>(op));
|
||||
}
|
||||
|
||||
GLenum stencil_op(rsx::stencil_op op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case rsx::stencil_op::invert: return GL_INVERT;
|
||||
case rsx::stencil_op::keep: return GL_KEEP;
|
||||
case rsx::stencil_op::zero: return GL_ZERO;
|
||||
case rsx::stencil_op::replace: return GL_REPLACE;
|
||||
case rsx::stencil_op::incr: return GL_INCR;
|
||||
case rsx::stencil_op::decr: return GL_DECR;
|
||||
case rsx::stencil_op::incr_wrap: return GL_INCR_WRAP;
|
||||
case rsx::stencil_op::decr_wrap: return GL_DECR_WRAP;
|
||||
}
|
||||
fmt::throw_exception("Unsupported stencil op 0x%X" HERE, static_cast<u32>(op));
|
||||
}
|
||||
|
||||
GLenum blend_equation(rsx::blend_equation op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
// Note : maybe add is signed on gl
|
||||
case rsx::blend_equation::add_signed:
|
||||
rsx_log.trace("blend equation add_signed used. Emulating using FUNC_ADD");
|
||||
case rsx::blend_equation::add: return GL_FUNC_ADD;
|
||||
case rsx::blend_equation::min: return GL_MIN;
|
||||
case rsx::blend_equation::max: return GL_MAX;
|
||||
case rsx::blend_equation::substract: return GL_FUNC_SUBTRACT;
|
||||
case rsx::blend_equation::reverse_substract_signed:
|
||||
rsx_log.trace("blend equation reverse_subtract_signed used. Emulating using FUNC_REVERSE_SUBTRACT");
|
||||
case rsx::blend_equation::reverse_substract: return GL_FUNC_REVERSE_SUBTRACT;
|
||||
case rsx::blend_equation::reverse_add_signed:
|
||||
default:
|
||||
rsx_log.error("Blend equation 0x%X is unimplemented!", static_cast<u32>(op));
|
||||
return GL_FUNC_ADD;
|
||||
}
|
||||
}
|
||||
|
||||
GLenum blend_factor(rsx::blend_factor op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case rsx::blend_factor::zero: return GL_ZERO;
|
||||
case rsx::blend_factor::one: return GL_ONE;
|
||||
case rsx::blend_factor::src_color: return GL_SRC_COLOR;
|
||||
case rsx::blend_factor::one_minus_src_color: return GL_ONE_MINUS_SRC_COLOR;
|
||||
case rsx::blend_factor::dst_color: return GL_DST_COLOR;
|
||||
case rsx::blend_factor::one_minus_dst_color: return GL_ONE_MINUS_DST_COLOR;
|
||||
case rsx::blend_factor::src_alpha: return GL_SRC_ALPHA;
|
||||
case rsx::blend_factor::one_minus_src_alpha: return GL_ONE_MINUS_SRC_ALPHA;
|
||||
case rsx::blend_factor::dst_alpha: return GL_DST_ALPHA;
|
||||
case rsx::blend_factor::one_minus_dst_alpha: return GL_ONE_MINUS_DST_ALPHA;
|
||||
case rsx::blend_factor::src_alpha_saturate: return GL_SRC_ALPHA_SATURATE;
|
||||
case rsx::blend_factor::constant_color: return GL_CONSTANT_COLOR;
|
||||
case rsx::blend_factor::one_minus_constant_color: return GL_ONE_MINUS_CONSTANT_COLOR;
|
||||
case rsx::blend_factor::constant_alpha: return GL_CONSTANT_ALPHA;
|
||||
case rsx::blend_factor::one_minus_constant_alpha: return GL_ONE_MINUS_CONSTANT_ALPHA;
|
||||
}
|
||||
fmt::throw_exception("Unsupported blend factor 0x%X" HERE, static_cast<u32>(op));
|
||||
}
|
||||
|
||||
GLenum logic_op(rsx::logic_op op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case rsx::logic_op::logic_clear: return GL_CLEAR;
|
||||
case rsx::logic_op::logic_and: return GL_AND;
|
||||
case rsx::logic_op::logic_and_reverse: return GL_AND_REVERSE;
|
||||
case rsx::logic_op::logic_copy: return GL_COPY;
|
||||
case rsx::logic_op::logic_and_inverted: return GL_AND_INVERTED;
|
||||
case rsx::logic_op::logic_noop: return GL_NOOP;
|
||||
case rsx::logic_op::logic_xor: return GL_XOR;
|
||||
case rsx::logic_op::logic_or: return GL_OR;
|
||||
case rsx::logic_op::logic_nor: return GL_NOR;
|
||||
case rsx::logic_op::logic_equiv: return GL_EQUIV;
|
||||
case rsx::logic_op::logic_invert: return GL_INVERT;
|
||||
case rsx::logic_op::logic_or_reverse: return GL_OR_REVERSE;
|
||||
case rsx::logic_op::logic_copy_inverted: return GL_COPY_INVERTED;
|
||||
case rsx::logic_op::logic_or_inverted: return GL_OR_INVERTED;
|
||||
case rsx::logic_op::logic_nand: return GL_NAND;
|
||||
case rsx::logic_op::logic_set: return GL_SET;
|
||||
}
|
||||
fmt::throw_exception("Unsupported logic op 0x%X" HERE, static_cast<u32>(op));
|
||||
}
|
||||
|
||||
GLenum front_face(rsx::front_face op)
|
||||
{
|
||||
//NOTE: RSX face winding is always based off of upper-left corner like vulkan, but GL is bottom left
|
||||
//shader_window_origin register does not affect this
|
||||
//verified with Outrun Online Arcade (window_origin::top) and DS2 (window_origin::bottom)
|
||||
//correctness of face winding checked using stencil test (GOW collection shadows)
|
||||
switch (op)
|
||||
{
|
||||
case rsx::front_face::cw: return GL_CCW;
|
||||
case rsx::front_face::ccw: return GL_CW;
|
||||
}
|
||||
fmt::throw_exception("Unsupported front face 0x%X" HERE, static_cast<u32>(op));
|
||||
}
|
||||
|
||||
GLenum cull_face(rsx::cull_face op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case rsx::cull_face::front: return GL_FRONT;
|
||||
case rsx::cull_face::back: return GL_BACK;
|
||||
case rsx::cull_face::front_and_back: return GL_FRONT_AND_BACK;
|
||||
}
|
||||
fmt::throw_exception("Unsupported cull face 0x%X" HERE, static_cast<u32>(op));
|
||||
}
|
||||
}
|
||||
|
||||
void GLGSRender::begin()
|
||||
{
|
||||
rsx::thread::begin();
|
||||
|
||||
if (skip_current_frame || cond_render_ctrl.disable_rendering())
|
||||
return;
|
||||
|
||||
init_buffers(rsx::framebuffer_creation_context::context_draw);
|
||||
}
|
||||
|
||||
void GLGSRender::end()
|
||||
{
|
||||
m_profiler.start();
|
||||
|
||||
if (skip_current_frame || !framebuffer_status_valid || cond_render_ctrl.disable_rendering())
|
||||
{
|
||||
execute_nop_draw();
|
||||
rsx::thread::end();
|
||||
return;
|
||||
}
|
||||
|
||||
m_frame_stats.setup_time += m_profiler.duration();
|
||||
|
||||
const auto do_heap_cleanup = [this]()
|
||||
{
|
||||
if (manually_flush_ring_buffers)
|
||||
{
|
||||
m_attrib_ring_buffer->unmap();
|
||||
m_index_ring_buffer->unmap();
|
||||
}
|
||||
else
|
||||
{
|
||||
//DMA push; not needed with MAP_COHERENT
|
||||
//glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
|
||||
}
|
||||
};
|
||||
|
||||
gl::command_context cmd{ gl_state };
|
||||
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
|
||||
|
||||
// Handle special memory barrier for ARGB8->D24S8 in an active DSV
|
||||
if (ds && ds->old_contents.size() == 1 &&
|
||||
ds->old_contents[0].source->get_internal_format() == gl::texture::internal_format::rgba8)
|
||||
{
|
||||
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
|
||||
|
||||
// TODO: Stencil transfer
|
||||
gl::g_hw_blitter->fast_clear_image(cmd, ds, 1.f, 0xFF);
|
||||
ds->old_contents[0].init_transfer(ds);
|
||||
|
||||
m_depth_converter.run(ds->old_contents[0].src_rect(),
|
||||
ds->old_contents[0].dst_rect(),
|
||||
ds->old_contents[0].source, ds);
|
||||
|
||||
ds->on_write();
|
||||
}
|
||||
|
||||
// Load textures
|
||||
{
|
||||
m_profiler.start();
|
||||
|
||||
std::lock_guard lock(m_sampler_mutex);
|
||||
bool update_framebuffer_sourced = false;
|
||||
|
||||
if (surface_store_tag != m_rtts.cache_tag)
|
||||
{
|
||||
update_framebuffer_sourced = true;
|
||||
surface_store_tag = m_rtts.cache_tag;
|
||||
}
|
||||
|
||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||
{
|
||||
if (!fs_sampler_state[i])
|
||||
fs_sampler_state[i] = std::make_unique<gl::texture_cache::sampled_image_descriptor>();
|
||||
|
||||
if (m_samplers_dirty || m_textures_dirty[i] ||
|
||||
(update_framebuffer_sourced && fs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage))
|
||||
{
|
||||
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
|
||||
|
||||
if (rsx::method_registers.fragment_textures[i].enabled())
|
||||
{
|
||||
*sampler_state = m_gl_texture_cache.upload_texture(cmd, rsx::method_registers.fragment_textures[i], m_rtts);
|
||||
|
||||
if (m_textures_dirty[i])
|
||||
m_fs_sampler_states[i].apply(rsx::method_registers.fragment_textures[i], fs_sampler_state[i].get());
|
||||
}
|
||||
else
|
||||
{
|
||||
*sampler_state = {};
|
||||
}
|
||||
|
||||
m_textures_dirty[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
|
||||
{
|
||||
if (!vs_sampler_state[i])
|
||||
vs_sampler_state[i] = std::make_unique<gl::texture_cache::sampled_image_descriptor>();
|
||||
|
||||
if (m_samplers_dirty || m_vertex_textures_dirty[i] ||
|
||||
(update_framebuffer_sourced && vs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage))
|
||||
{
|
||||
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
|
||||
|
||||
if (rsx::method_registers.vertex_textures[i].enabled())
|
||||
{
|
||||
*sampler_state = m_gl_texture_cache.upload_texture(cmd, rsx::method_registers.vertex_textures[i], m_rtts);
|
||||
|
||||
if (m_vertex_textures_dirty[i])
|
||||
m_vs_sampler_states[i].apply(rsx::method_registers.vertex_textures[i], vs_sampler_state[i].get());
|
||||
}
|
||||
else
|
||||
*sampler_state = {};
|
||||
|
||||
m_vertex_textures_dirty[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
m_samplers_dirty.store(false);
|
||||
|
||||
m_frame_stats.textures_upload_time += m_profiler.duration();
|
||||
}
|
||||
|
||||
// NOTE: Due to common OpenGL driver architecture, vertex data has to be uploaded as far away from the draw as possible
|
||||
// TODO: Implement shaders cache prediction to avoid uploading vertex data if draw is going to skip
|
||||
if (!load_program())
|
||||
{
|
||||
// Program is not ready, skip drawing this
|
||||
std::this_thread::yield();
|
||||
execute_nop_draw();
|
||||
// m_rtts.on_write(); - breaks games for obvious reasons
|
||||
rsx::thread::end();
|
||||
return;
|
||||
}
|
||||
|
||||
// Load program execution environment
|
||||
load_program_env();
|
||||
|
||||
m_frame_stats.setup_time += m_profiler.duration();
|
||||
|
||||
//Bind textures and resolve external copy operations
|
||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||
{
|
||||
if (current_fp_metadata.referenced_textures_mask & (1 << i))
|
||||
{
|
||||
_SelectTexture(GL_FRAGMENT_TEXTURES_START + i);
|
||||
|
||||
gl::texture_view* view = nullptr;
|
||||
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
|
||||
|
||||
if (rsx::method_registers.fragment_textures[i].enabled() &&
|
||||
sampler_state->validate())
|
||||
{
|
||||
if (view = sampler_state->image_handle; !view) [[unlikely]]
|
||||
{
|
||||
view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc);
|
||||
}
|
||||
}
|
||||
|
||||
if (view) [[likely]]
|
||||
{
|
||||
view->bind();
|
||||
|
||||
if (current_fragment_program.redirected_textures & (1 << i))
|
||||
{
|
||||
_SelectTexture(GL_STENCIL_MIRRORS_START + i);
|
||||
|
||||
auto root_texture = static_cast<gl::viewable_image*>(view->image());
|
||||
auto stencil_view = root_texture->get_view(0xAAE4, rsx::default_remap_vector, gl::image_aspect::stencil);
|
||||
stencil_view->bind();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto target = gl::get_target(current_fragment_program.get_texture_dimension(i));
|
||||
glBindTexture(target, m_null_textures[target]->id());
|
||||
|
||||
if (current_fragment_program.redirected_textures & (1 << i))
|
||||
{
|
||||
_SelectTexture(GL_STENCIL_MIRRORS_START + i);
|
||||
glBindTexture(target, m_null_textures[target]->id());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
|
||||
{
|
||||
if (current_vp_metadata.referenced_textures_mask & (1 << i))
|
||||
{
|
||||
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
|
||||
_SelectTexture(GL_VERTEX_TEXTURES_START + i);
|
||||
|
||||
if (rsx::method_registers.vertex_textures[i].enabled() &&
|
||||
sampler_state->validate())
|
||||
{
|
||||
if (sampler_state->image_handle) [[likely]]
|
||||
{
|
||||
sampler_state->image_handle->bind();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc)->bind();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
glBindTexture(GL_TEXTURE_2D, GL_NONE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_gl_texture_cache.release_uncached_temporary_subresources();
|
||||
|
||||
m_frame_stats.textures_upload_time += m_profiler.duration();
|
||||
|
||||
// Optionally do memory synchronization if the texture stage has not yet triggered this
|
||||
if (true)//g_cfg.video.strict_rendering_mode)
|
||||
{
|
||||
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
|
||||
|
||||
if (ds) ds->write_barrier(cmd);
|
||||
|
||||
for (auto &rtt : m_rtts.m_bound_render_targets)
|
||||
{
|
||||
if (auto surface = std::get<1>(rtt))
|
||||
{
|
||||
surface->write_barrier(cmd);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
rsx::simple_array<int> buffers_to_clear;
|
||||
bool clear_all_color = true;
|
||||
bool clear_depth = false;
|
||||
|
||||
for (int index = 0; index < 4; index++)
|
||||
{
|
||||
if (m_rtts.m_bound_render_targets[index].first)
|
||||
{
|
||||
if (!m_rtts.m_bound_render_targets[index].second->dirty())
|
||||
clear_all_color = false;
|
||||
else
|
||||
buffers_to_clear.push_back(index);
|
||||
}
|
||||
}
|
||||
|
||||
if (ds && ds->dirty())
|
||||
{
|
||||
clear_depth = true;
|
||||
}
|
||||
|
||||
if (clear_depth || !buffers_to_clear.empty())
|
||||
{
|
||||
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
|
||||
GLenum mask = 0;
|
||||
|
||||
if (clear_depth)
|
||||
{
|
||||
gl_state.depth_mask(GL_TRUE);
|
||||
gl_state.clear_depth(1.f);
|
||||
gl_state.clear_stencil(255);
|
||||
mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
|
||||
}
|
||||
|
||||
if (clear_all_color)
|
||||
mask |= GL_COLOR_BUFFER_BIT;
|
||||
|
||||
glClear(mask);
|
||||
|
||||
if (!buffers_to_clear.empty() && !clear_all_color)
|
||||
{
|
||||
GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f };
|
||||
//It is impossible for the render target to be type A or B here (clear all would have been flagged)
|
||||
for (auto &i : buffers_to_clear)
|
||||
glClearBufferfv(GL_COLOR, i, colors);
|
||||
}
|
||||
|
||||
if (clear_depth)
|
||||
gl_state.depth_mask(rsx::method_registers.depth_write_enabled());
|
||||
}
|
||||
}
|
||||
|
||||
// Unconditionally enable stencil test if it was disabled before
|
||||
gl_state.enable(GL_TRUE, GL_SCISSOR_TEST);
|
||||
|
||||
update_draw_state();
|
||||
|
||||
if (g_cfg.video.debug_output)
|
||||
{
|
||||
m_program->validate();
|
||||
}
|
||||
|
||||
const GLenum draw_mode = gl::draw_mode(rsx::method_registers.current_draw_clause.primitive);
|
||||
rsx::method_registers.current_draw_clause.begin();
|
||||
int subdraw = 0;
|
||||
do
|
||||
{
|
||||
if (!subdraw)
|
||||
{
|
||||
analyse_inputs_interleaved(m_vertex_layout);
|
||||
if (!m_vertex_layout.validate())
|
||||
{
|
||||
// Execute remainining pipeline barriers with NOP draw
|
||||
do
|
||||
{
|
||||
rsx::method_registers.current_draw_clause.execute_pipeline_dependencies();
|
||||
}
|
||||
while (rsx::method_registers.current_draw_clause.next());
|
||||
|
||||
rsx::method_registers.current_draw_clause.end();
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rsx::method_registers.current_draw_clause.execute_pipeline_dependencies() & rsx::vertex_base_changed)
|
||||
{
|
||||
// Rebase vertex bases instead of
|
||||
for (auto &info : m_vertex_layout.interleaved_blocks)
|
||||
{
|
||||
const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset();
|
||||
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location, HERE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
++subdraw;
|
||||
|
||||
if (manually_flush_ring_buffers)
|
||||
{
|
||||
//Use approximations to reserve space. This path is mostly for debug purposes anyway
|
||||
u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
||||
u32 approx_working_buffer_size = approx_vertex_count * 256;
|
||||
|
||||
//Allocate 256K heap if we have no approximation at this time (inlined array)
|
||||
m_attrib_ring_buffer->reserve_storage_on_heap(std::max(approx_working_buffer_size, 256 * 1024U));
|
||||
m_index_ring_buffer->reserve_storage_on_heap(16 * 1024);
|
||||
}
|
||||
|
||||
//Do vertex upload before RTT prep / texture lookups to give the driver time to push data
|
||||
auto upload_info = set_vertex_buffer();
|
||||
do_heap_cleanup();
|
||||
|
||||
if (upload_info.vertex_draw_count == 0)
|
||||
{
|
||||
// Malformed vertex setup; abort
|
||||
continue;
|
||||
}
|
||||
|
||||
update_vertex_env(upload_info);
|
||||
|
||||
if (!upload_info.index_info)
|
||||
{
|
||||
if (rsx::method_registers.current_draw_clause.is_single_draw())
|
||||
{
|
||||
glDrawArrays(draw_mode, 0, upload_info.vertex_draw_count);
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto subranges = rsx::method_registers.current_draw_clause.get_subranges();
|
||||
const auto draw_count = subranges.size();
|
||||
const auto driver_caps = gl::get_driver_caps();
|
||||
bool use_draw_arrays_fallback = false;
|
||||
|
||||
m_scratch_buffer.resize(draw_count * 24);
|
||||
GLint* firsts = reinterpret_cast<GLint*>(m_scratch_buffer.data());
|
||||
GLsizei* counts = (firsts + draw_count);
|
||||
const GLvoid** offsets = reinterpret_cast<const GLvoid**>(counts + draw_count);
|
||||
|
||||
u32 first = 0;
|
||||
u32 dst_index = 0;
|
||||
for (const auto &range : subranges)
|
||||
{
|
||||
firsts[dst_index] = first;
|
||||
counts[dst_index] = range.count;
|
||||
offsets[dst_index++] = reinterpret_cast<const GLvoid*>(u64{first << 2});
|
||||
|
||||
if (driver_caps.vendor_AMD && (first + range.count) > (0x100000 >> 2))
|
||||
{
|
||||
//Unlikely, but added here in case the identity buffer is not large enough somehow
|
||||
use_draw_arrays_fallback = true;
|
||||
break;
|
||||
}
|
||||
|
||||
first += range.count;
|
||||
}
|
||||
|
||||
if (use_draw_arrays_fallback)
|
||||
{
|
||||
//MultiDrawArrays is broken on some primitive types using AMD. One known type is GL_TRIANGLE_STRIP but there could be more
|
||||
for (u32 n = 0; n < draw_count; ++n)
|
||||
{
|
||||
glDrawArrays(draw_mode, firsts[n], counts[n]);
|
||||
}
|
||||
}
|
||||
else if (driver_caps.vendor_AMD)
|
||||
{
|
||||
//Use identity index buffer to fix broken vertexID on AMD
|
||||
m_identity_index_buffer->bind();
|
||||
glMultiDrawElements(draw_mode, counts, GL_UNSIGNED_INT, offsets, static_cast<GLsizei>(draw_count));
|
||||
}
|
||||
else
|
||||
{
|
||||
//Normal render
|
||||
glMultiDrawArrays(draw_mode, firsts, counts, static_cast<GLsizei>(draw_count));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const GLenum index_type = std::get<0>(*upload_info.index_info);
|
||||
const u32 index_offset = std::get<1>(*upload_info.index_info);
|
||||
const bool restarts_valid = gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive) && !rsx::method_registers.current_draw_clause.is_disjoint_primitive;
|
||||
|
||||
if (gl_state.enable(restarts_valid && rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
|
||||
{
|
||||
glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT) ? 0xffff : 0xffffffff);
|
||||
}
|
||||
|
||||
m_index_ring_buffer->bind();
|
||||
|
||||
if (rsx::method_registers.current_draw_clause.is_single_draw())
|
||||
{
|
||||
glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, reinterpret_cast<GLvoid*>(u64{index_offset}));
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto subranges = rsx::method_registers.current_draw_clause.get_subranges();
|
||||
const auto draw_count = subranges.size();
|
||||
const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2;
|
||||
uintptr_t index_ptr = index_offset;
|
||||
m_scratch_buffer.resize(draw_count * 16);
|
||||
|
||||
GLsizei *counts = reinterpret_cast<GLsizei*>(m_scratch_buffer.data());
|
||||
const GLvoid** offsets = reinterpret_cast<const GLvoid**>(counts + draw_count);
|
||||
int dst_index = 0;
|
||||
|
||||
for (const auto &range : subranges)
|
||||
{
|
||||
const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.count);
|
||||
counts[dst_index] = index_size;
|
||||
offsets[dst_index++] = reinterpret_cast<const GLvoid*>(index_ptr);
|
||||
|
||||
index_ptr += (index_size << type_scale);
|
||||
}
|
||||
|
||||
glMultiDrawElements(draw_mode, counts, index_type, offsets, static_cast<GLsizei>(draw_count));
|
||||
}
|
||||
}
|
||||
} while (rsx::method_registers.current_draw_clause.next());
|
||||
|
||||
m_rtts.on_write(m_framebuffer_layout.color_write_enabled.data(), m_framebuffer_layout.zeta_write_enabled);
|
||||
|
||||
m_attrib_ring_buffer->notify();
|
||||
m_index_ring_buffer->notify();
|
||||
m_fragment_env_buffer->notify();
|
||||
m_vertex_env_buffer->notify();
|
||||
m_texture_parameters_buffer->notify();
|
||||
m_vertex_layout_buffer->notify();
|
||||
m_fragment_constants_buffer->notify();
|
||||
m_transform_constants_buffer->notify();
|
||||
|
||||
m_frame_stats.textures_upload_time += m_profiler.duration();
|
||||
|
||||
rsx::thread::end();
|
||||
}
|
||||
|
||||
void GLGSRender::set_viewport()
|
||||
{
|
||||
// NOTE: scale offset matrix already contains the viewport transformation
|
||||
@ -1352,138 +750,6 @@ void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info)
|
||||
}
|
||||
}
|
||||
|
||||
void GLGSRender::update_draw_state()
|
||||
{
|
||||
m_profiler.start();
|
||||
|
||||
for (int index = 0; index < m_rtts.get_color_surface_count(); ++index)
|
||||
{
|
||||
bool color_mask_b = rsx::method_registers.color_mask_b(index);
|
||||
bool color_mask_g = rsx::method_registers.color_mask_g(index);
|
||||
bool color_mask_r = rsx::method_registers.color_mask_r(index);
|
||||
bool color_mask_a = rsx::method_registers.color_mask_a(index);
|
||||
|
||||
if (rsx::method_registers.surface_color() == rsx::surface_color_format::g8b8)
|
||||
{
|
||||
//Map GB components onto RG
|
||||
rsx::get_g8b8_r8g8_colormask(color_mask_r, color_mask_g, color_mask_b, color_mask_a);
|
||||
}
|
||||
|
||||
gl_state.color_maski(index, color_mask_r, color_mask_g, color_mask_b, color_mask_a);
|
||||
}
|
||||
|
||||
gl_state.depth_mask(rsx::method_registers.depth_write_enabled());
|
||||
gl_state.stencil_mask(rsx::method_registers.stencil_mask());
|
||||
|
||||
gl_state.enable(rsx::method_registers.depth_clamp_enabled() || !rsx::method_registers.depth_clip_enabled(), GL_DEPTH_CLAMP);
|
||||
|
||||
if (gl_state.enable(rsx::method_registers.depth_test_enabled(), GL_DEPTH_TEST))
|
||||
{
|
||||
gl_state.depth_func(comparison_op(rsx::method_registers.depth_func()));
|
||||
}
|
||||
|
||||
if (glDepthBoundsEXT && (gl_state.enable(rsx::method_registers.depth_bounds_test_enabled(), GL_DEPTH_BOUNDS_TEST_EXT)))
|
||||
{
|
||||
gl_state.depth_bounds(rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max());
|
||||
}
|
||||
|
||||
gl_state.enable(rsx::method_registers.dither_enabled(), GL_DITHER);
|
||||
|
||||
if (gl_state.enable(rsx::method_registers.stencil_test_enabled(), GL_STENCIL_TEST))
|
||||
{
|
||||
glStencilFunc(comparison_op(rsx::method_registers.stencil_func()),
|
||||
rsx::method_registers.stencil_func_ref(),
|
||||
rsx::method_registers.stencil_func_mask());
|
||||
|
||||
glStencilOp(stencil_op(rsx::method_registers.stencil_op_fail()), stencil_op(rsx::method_registers.stencil_op_zfail()),
|
||||
stencil_op(rsx::method_registers.stencil_op_zpass()));
|
||||
|
||||
if (rsx::method_registers.two_sided_stencil_test_enabled())
|
||||
{
|
||||
glStencilMaskSeparate(GL_BACK, rsx::method_registers.back_stencil_mask());
|
||||
|
||||
glStencilFuncSeparate(GL_BACK, comparison_op(rsx::method_registers.back_stencil_func()),
|
||||
rsx::method_registers.back_stencil_func_ref(), rsx::method_registers.back_stencil_func_mask());
|
||||
|
||||
glStencilOpSeparate(GL_BACK, stencil_op(rsx::method_registers.back_stencil_op_fail()),
|
||||
stencil_op(rsx::method_registers.back_stencil_op_zfail()), stencil_op(rsx::method_registers.back_stencil_op_zpass()));
|
||||
}
|
||||
}
|
||||
|
||||
bool mrt_blend_enabled[] =
|
||||
{
|
||||
rsx::method_registers.blend_enabled(),
|
||||
rsx::method_registers.blend_enabled_surface_1(),
|
||||
rsx::method_registers.blend_enabled_surface_2(),
|
||||
rsx::method_registers.blend_enabled_surface_3()
|
||||
};
|
||||
|
||||
if (mrt_blend_enabled[0] || mrt_blend_enabled[1] || mrt_blend_enabled[2] || mrt_blend_enabled[3])
|
||||
{
|
||||
glBlendFuncSeparate(blend_factor(rsx::method_registers.blend_func_sfactor_rgb()),
|
||||
blend_factor(rsx::method_registers.blend_func_dfactor_rgb()),
|
||||
blend_factor(rsx::method_registers.blend_func_sfactor_a()),
|
||||
blend_factor(rsx::method_registers.blend_func_dfactor_a()));
|
||||
|
||||
auto blend_colors = rsx::get_constant_blend_colors();
|
||||
glBlendColor(blend_colors[0], blend_colors[1], blend_colors[2], blend_colors[3]);
|
||||
|
||||
glBlendEquationSeparate(blend_equation(rsx::method_registers.blend_equation_rgb()),
|
||||
blend_equation(rsx::method_registers.blend_equation_a()));
|
||||
}
|
||||
|
||||
gl_state.enablei(mrt_blend_enabled[0], GL_BLEND, 0);
|
||||
gl_state.enablei(mrt_blend_enabled[1], GL_BLEND, 1);
|
||||
gl_state.enablei(mrt_blend_enabled[2], GL_BLEND, 2);
|
||||
gl_state.enablei(mrt_blend_enabled[3], GL_BLEND, 3);
|
||||
|
||||
if (gl_state.enable(rsx::method_registers.logic_op_enabled(), GL_COLOR_LOGIC_OP))
|
||||
{
|
||||
gl_state.logic_op(logic_op(rsx::method_registers.logic_operation()));
|
||||
}
|
||||
|
||||
gl_state.line_width(rsx::method_registers.line_width());
|
||||
gl_state.enable(rsx::method_registers.line_smooth_enabled(), GL_LINE_SMOOTH);
|
||||
|
||||
gl_state.enable(rsx::method_registers.poly_offset_point_enabled(), GL_POLYGON_OFFSET_POINT);
|
||||
gl_state.enable(rsx::method_registers.poly_offset_line_enabled(), GL_POLYGON_OFFSET_LINE);
|
||||
gl_state.enable(rsx::method_registers.poly_offset_fill_enabled(), GL_POLYGON_OFFSET_FILL);
|
||||
|
||||
//offset_bias is the constant factor, multiplied by the implementation factor R
|
||||
//offset_scale is the slope factor, multiplied by the triangle slope factor M
|
||||
gl_state.polygon_offset(rsx::method_registers.poly_offset_scale(), rsx::method_registers.poly_offset_bias());
|
||||
|
||||
if (gl_state.enable(rsx::method_registers.cull_face_enabled(), GL_CULL_FACE))
|
||||
{
|
||||
gl_state.cull_face(cull_face(rsx::method_registers.cull_face_mode()));
|
||||
}
|
||||
|
||||
gl_state.front_face(front_face(rsx::method_registers.front_face_mode()));
|
||||
|
||||
// Sample control
|
||||
// TODO: MinSampleShading
|
||||
//gl_state.enable(rsx::method_registers.msaa_enabled(), GL_MULTISAMPLE);
|
||||
//gl_state.enable(rsx::method_registers.msaa_alpha_to_coverage_enabled(), GL_SAMPLE_ALPHA_TO_COVERAGE);
|
||||
//gl_state.enable(rsx::method_registers.msaa_alpha_to_one_enabled(), GL_SAMPLE_ALPHA_TO_ONE);
|
||||
|
||||
//TODO
|
||||
//NV4097_SET_ANISO_SPREAD
|
||||
//NV4097_SET_SPECULAR_ENABLE
|
||||
//NV4097_SET_TWO_SIDE_LIGHT_EN
|
||||
//NV4097_SET_FLAT_SHADE_OP
|
||||
//NV4097_SET_EDGE_FLAG
|
||||
|
||||
|
||||
|
||||
//NV4097_SET_COLOR_KEY_COLOR
|
||||
//NV4097_SET_SHADER_CONTROL
|
||||
//NV4097_SET_ZMIN_MAX_CONTROL
|
||||
//NV4097_SET_ANTI_ALIASING_CONTROL
|
||||
//NV4097_SET_CLIP_ID_TEST_ENABLE
|
||||
|
||||
m_frame_stats.setup_time += m_profiler.duration();
|
||||
}
|
||||
|
||||
bool GLGSRender::on_access_violation(u32 address, bool is_writing)
|
||||
{
|
||||
const bool can_flush = (std::this_thread::get_id() == m_rsx_thread);
|
||||
|
@ -75,6 +75,7 @@ private:
|
||||
gl::sampler_state m_vs_sampler_states[rsx::limits::vertex_textures_count]; // Vertex textures
|
||||
|
||||
gl::glsl::program *m_program;
|
||||
gl::glsl::program m_shader_interpreter;
|
||||
|
||||
gl_render_targets m_rtts;
|
||||
|
||||
@ -154,6 +155,9 @@ private:
|
||||
|
||||
void update_draw_state();
|
||||
|
||||
void load_texture_env();
|
||||
void bind_texture_env();
|
||||
|
||||
gl::texture* get_present_source(gl::present_surface_info* info, const rsx::avconf* avconfig);
|
||||
|
||||
public:
|
||||
@ -174,6 +178,7 @@ protected:
|
||||
void clear_surface(u32 arg) override;
|
||||
void begin() override;
|
||||
void end() override;
|
||||
void emit_geometry(u32 sub_index) override;
|
||||
|
||||
void on_init_thread() override;
|
||||
void on_exit() override;
|
||||
|
7
rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp
Normal file
7
rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp
Normal file
@ -0,0 +1,7 @@
|
||||
#include "stdafx.h"
|
||||
#include "GLShaderInterpreter.h"
|
||||
|
||||
namespace gl
|
||||
{
|
||||
|
||||
}
|
15
rpcs3/Emu/RSX/GL/GLShaderInterpreter.h
Normal file
15
rpcs3/Emu/RSX/GL/GLShaderInterpreter.h
Normal file
@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
#include "GLGSRender.h"
|
||||
|
||||
namespace gl
|
||||
{
|
||||
class shader_interpreter : glsl::program
|
||||
{
|
||||
glsl::shader vs;
|
||||
glsl::shader fs;
|
||||
|
||||
public:
|
||||
void create();
|
||||
void destroy();
|
||||
};
|
||||
}
|
930
rpcs3/Emu/RSX/VK/VKDraw.cpp
Normal file
930
rpcs3/Emu/RSX/VK/VKDraw.cpp
Normal file
@ -0,0 +1,930 @@
|
||||
#include "stdafx.h"
|
||||
#include "VKGSRender.h"
|
||||
#include "../Common/BufferUtils.h"
|
||||
|
||||
namespace vk
|
||||
{
|
||||
VkImageViewType get_view_type(rsx::texture_dimension_extended type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case rsx::texture_dimension_extended::texture_dimension_1d:
|
||||
return VK_IMAGE_VIEW_TYPE_1D;
|
||||
case rsx::texture_dimension_extended::texture_dimension_2d:
|
||||
return VK_IMAGE_VIEW_TYPE_2D;
|
||||
case rsx::texture_dimension_extended::texture_dimension_cubemap:
|
||||
return VK_IMAGE_VIEW_TYPE_CUBE;
|
||||
case rsx::texture_dimension_extended::texture_dimension_3d:
|
||||
return VK_IMAGE_VIEW_TYPE_3D;
|
||||
default: ASSUME(0);
|
||||
};
|
||||
}
|
||||
|
||||
VkCompareOp get_compare_func(rsx::comparison_function op, bool reverse_direction = false)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case rsx::comparison_function::never: return VK_COMPARE_OP_NEVER;
|
||||
case rsx::comparison_function::greater: return reverse_direction ? VK_COMPARE_OP_LESS: VK_COMPARE_OP_GREATER;
|
||||
case rsx::comparison_function::less: return reverse_direction ? VK_COMPARE_OP_GREATER: VK_COMPARE_OP_LESS;
|
||||
case rsx::comparison_function::less_or_equal: return reverse_direction ? VK_COMPARE_OP_GREATER_OR_EQUAL: VK_COMPARE_OP_LESS_OR_EQUAL;
|
||||
case rsx::comparison_function::greater_or_equal: return reverse_direction ? VK_COMPARE_OP_LESS_OR_EQUAL: VK_COMPARE_OP_GREATER_OR_EQUAL;
|
||||
case rsx::comparison_function::equal: return VK_COMPARE_OP_EQUAL;
|
||||
case rsx::comparison_function::not_equal: return VK_COMPARE_OP_NOT_EQUAL;
|
||||
case rsx::comparison_function::always: return VK_COMPARE_OP_ALWAYS;
|
||||
default:
|
||||
fmt::throw_exception("Unknown compare op: 0x%x" HERE, static_cast<u32>(op));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VKGSRender::begin_render_pass()
|
||||
{
|
||||
vk::begin_renderpass(
|
||||
*m_current_command_buffer,
|
||||
get_render_pass(),
|
||||
m_draw_fbo->value,
|
||||
{ positionu{0u, 0u}, sizeu{m_draw_fbo->width(), m_draw_fbo->height()} });
|
||||
}
|
||||
|
||||
void VKGSRender::close_render_pass()
|
||||
{
|
||||
vk::end_renderpass(*m_current_command_buffer);
|
||||
}
|
||||
|
||||
VkRenderPass VKGSRender::get_render_pass()
|
||||
{
|
||||
if (!m_cached_renderpass)
|
||||
{
|
||||
m_cached_renderpass = vk::get_renderpass(*m_device, m_current_renderpass_key);
|
||||
}
|
||||
|
||||
return m_cached_renderpass;
|
||||
}
|
||||
|
||||
void VKGSRender::update_draw_state()
|
||||
{
|
||||
m_profiler.start();
|
||||
|
||||
float actual_line_width = rsx::method_registers.line_width();
|
||||
vkCmdSetLineWidth(*m_current_command_buffer, actual_line_width);
|
||||
|
||||
if (rsx::method_registers.poly_offset_fill_enabled())
|
||||
{
|
||||
//offset_bias is the constant factor, multiplied by the implementation factor R
|
||||
//offst_scale is the slope factor, multiplied by the triangle slope factor M
|
||||
vkCmdSetDepthBias(*m_current_command_buffer, rsx::method_registers.poly_offset_bias(), 0.f, rsx::method_registers.poly_offset_scale());
|
||||
}
|
||||
else
|
||||
{
|
||||
//Zero bias value - disables depth bias
|
||||
vkCmdSetDepthBias(*m_current_command_buffer, 0.f, 0.f, 0.f);
|
||||
}
|
||||
|
||||
//Update dynamic state
|
||||
if (rsx::method_registers.blend_enabled())
|
||||
{
|
||||
//Update blend constants
|
||||
auto blend_colors = rsx::get_constant_blend_colors();
|
||||
vkCmdSetBlendConstants(*m_current_command_buffer, blend_colors.data());
|
||||
}
|
||||
|
||||
if (rsx::method_registers.stencil_test_enabled())
|
||||
{
|
||||
const bool two_sided_stencil = rsx::method_registers.two_sided_stencil_test_enabled();
|
||||
VkStencilFaceFlags face_flag = (two_sided_stencil) ? VK_STENCIL_FACE_FRONT_BIT : VK_STENCIL_FRONT_AND_BACK;
|
||||
|
||||
vkCmdSetStencilWriteMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_mask());
|
||||
vkCmdSetStencilCompareMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_func_mask());
|
||||
vkCmdSetStencilReference(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_func_ref());
|
||||
|
||||
if (two_sided_stencil)
|
||||
{
|
||||
vkCmdSetStencilWriteMask(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_mask());
|
||||
vkCmdSetStencilCompareMask(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_func_mask());
|
||||
vkCmdSetStencilReference(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_func_ref());
|
||||
}
|
||||
}
|
||||
|
||||
if (m_device->get_depth_bounds_support())
|
||||
{
|
||||
if (rsx::method_registers.depth_bounds_test_enabled())
|
||||
{
|
||||
//Update depth bounds min/max
|
||||
vkCmdSetDepthBounds(*m_current_command_buffer, rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max());
|
||||
}
|
||||
else
|
||||
{
|
||||
vkCmdSetDepthBounds(*m_current_command_buffer, 0.f, 1.f);
|
||||
}
|
||||
}
|
||||
|
||||
bind_viewport();
|
||||
|
||||
//TODO: Set up other render-state parameters into the program pipeline
|
||||
|
||||
m_frame_stats.setup_time += m_profiler.duration();
|
||||
}
|
||||
|
||||
void VKGSRender::load_texture_env()
|
||||
{
|
||||
//Load textures
|
||||
bool update_framebuffer_sourced = false;
|
||||
bool check_for_cyclic_refs = false;
|
||||
|
||||
std::lock_guard lock(m_sampler_mutex);
|
||||
|
||||
if (surface_store_tag != m_rtts.cache_tag) [[unlikely]]
|
||||
{
|
||||
update_framebuffer_sourced = true;
|
||||
surface_store_tag = m_rtts.cache_tag;
|
||||
}
|
||||
|
||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||
{
|
||||
if (!fs_sampler_state[i])
|
||||
fs_sampler_state[i] = std::make_unique<vk::texture_cache::sampled_image_descriptor>();
|
||||
|
||||
if (m_samplers_dirty || m_textures_dirty[i] ||
|
||||
(update_framebuffer_sourced && fs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage))
|
||||
{
|
||||
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
|
||||
|
||||
if (rsx::method_registers.fragment_textures[i].enabled())
|
||||
{
|
||||
check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE);
|
||||
*sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts);
|
||||
|
||||
if (sampler_state->is_cyclic_reference)
|
||||
{
|
||||
check_for_cyclic_refs |= true;
|
||||
}
|
||||
|
||||
bool replace = !fs_sampler_handles[i];
|
||||
VkFilter mag_filter;
|
||||
vk::minification_filter min_filter;
|
||||
f32 min_lod = 0.f, max_lod = 0.f;
|
||||
f32 lod_bias = 0.f;
|
||||
|
||||
const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN);
|
||||
VkBool32 compare_enabled = VK_FALSE;
|
||||
VkCompareOp depth_compare_mode = VK_COMPARE_OP_NEVER;
|
||||
|
||||
if (texture_format >= CELL_GCM_TEXTURE_DEPTH24_D8 && texture_format <= CELL_GCM_TEXTURE_DEPTH16_FLOAT)
|
||||
{
|
||||
if (m_device->get_formats_support().d24_unorm_s8)
|
||||
{
|
||||
// NOTE:
|
||||
// The nvidia-specific format D24S8 has a special way of doing depth comparison that matches the PS3
|
||||
// In case of projected shadow lookup the result of the divide operation has its Z clamped to [0-1] before comparison
|
||||
// Most other wide formats (Z bits > 16) do not behave this way and depth greater than 1 is possible due to the use of floating point as storage
|
||||
// Compare operations for these formats (such as D32_SFLOAT) are therefore emulated for correct results
|
||||
|
||||
// NOTE2:
|
||||
// To improve reusability, DEPTH16 shadow ops are also emulated if D24S8 support is not available
|
||||
|
||||
compare_enabled = VK_TRUE;
|
||||
depth_compare_mode = vk::get_compare_func(rsx::method_registers.fragment_textures[i].zfunc(), true);
|
||||
}
|
||||
}
|
||||
|
||||
const bool aniso_override = !g_cfg.video.strict_rendering_mode && g_cfg.video.anisotropic_level_override > 0;
|
||||
const f32 af_level = aniso_override ? g_cfg.video.anisotropic_level_override : vk::max_aniso(rsx::method_registers.fragment_textures[i].max_aniso());
|
||||
const auto wrap_s = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_s());
|
||||
const auto wrap_t = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_t());
|
||||
const auto wrap_r = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_r());
|
||||
const auto border_color = vk::get_border_color(rsx::method_registers.fragment_textures[i].border_color());
|
||||
|
||||
// Check if non-point filtering can even be used on this format
|
||||
bool can_sample_linear;
|
||||
if (sampler_state->format_class == rsx::format_type::color) [[likely]]
|
||||
{
|
||||
// Most PS3-like formats can be linearly filtered without problem
|
||||
can_sample_linear = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Not all GPUs support linear filtering of depth formats
|
||||
const auto vk_format = sampler_state->image_handle ? sampler_state->image_handle->image()->format() :
|
||||
vk::get_compatible_sampler_format(m_device->get_formats_support(), sampler_state->external_subresource_desc.gcm_format);
|
||||
|
||||
can_sample_linear = m_device->get_format_properties(vk_format).optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
|
||||
}
|
||||
|
||||
const auto mipmap_count = rsx::method_registers.fragment_textures[i].get_exact_mipmap_count();
|
||||
min_filter = vk::get_min_filter(rsx::method_registers.fragment_textures[i].min_filter());
|
||||
|
||||
if (can_sample_linear)
|
||||
{
|
||||
mag_filter = vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter());
|
||||
}
|
||||
else
|
||||
{
|
||||
mag_filter = VK_FILTER_NEAREST;
|
||||
min_filter.filter = VK_FILTER_NEAREST;
|
||||
}
|
||||
|
||||
if (min_filter.sample_mipmaps && mipmap_count > 1)
|
||||
{
|
||||
f32 actual_mipmaps;
|
||||
if (sampler_state->upload_context == rsx::texture_upload_context::shader_read)
|
||||
{
|
||||
actual_mipmaps = static_cast<f32>(mipmap_count);
|
||||
}
|
||||
else if (sampler_state->external_subresource_desc.op == rsx::deferred_request_command::mipmap_gather)
|
||||
{
|
||||
// Clamp min and max lod
|
||||
actual_mipmaps = static_cast<f32>(sampler_state->external_subresource_desc.sections_to_copy.size());
|
||||
}
|
||||
else
|
||||
{
|
||||
actual_mipmaps = 1.f;
|
||||
}
|
||||
|
||||
if (actual_mipmaps > 1.f)
|
||||
{
|
||||
min_lod = rsx::method_registers.fragment_textures[i].min_lod();
|
||||
max_lod = rsx::method_registers.fragment_textures[i].max_lod();
|
||||
lod_bias = rsx::method_registers.fragment_textures[i].bias();
|
||||
|
||||
min_lod = std::min(min_lod, actual_mipmaps - 1.f);
|
||||
max_lod = std::min(max_lod, actual_mipmaps - 1.f);
|
||||
|
||||
if (min_filter.mipmap_mode == VK_SAMPLER_MIPMAP_MODE_NEAREST)
|
||||
{
|
||||
// Round to nearest 0.5 to work around some broken games
|
||||
// Unlike openGL, sampler parameters cannot be dynamically changed on vulkan, leading to many permutations
|
||||
lod_bias = std::floor(lod_bias * 2.f + 0.5f) * 0.5f;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
min_lod = max_lod = lod_bias = 0.f;
|
||||
min_filter.mipmap_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||
}
|
||||
}
|
||||
|
||||
if (fs_sampler_handles[i] && m_textures_dirty[i])
|
||||
{
|
||||
if (!fs_sampler_handles[i]->matches(wrap_s, wrap_t, wrap_r, false, lod_bias, af_level, min_lod, max_lod,
|
||||
min_filter.filter, mag_filter, min_filter.mipmap_mode, border_color, compare_enabled, depth_compare_mode))
|
||||
{
|
||||
replace = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (replace)
|
||||
{
|
||||
fs_sampler_handles[i] = vk::get_resource_manager()->find_sampler(*m_device, wrap_s, wrap_t, wrap_r, false, lod_bias, af_level, min_lod, max_lod,
|
||||
min_filter.filter, mag_filter, min_filter.mipmap_mode, border_color, compare_enabled, depth_compare_mode);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*sampler_state = {};
|
||||
}
|
||||
|
||||
m_textures_dirty[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
|
||||
{
|
||||
if (!vs_sampler_state[i])
|
||||
vs_sampler_state[i] = std::make_unique<vk::texture_cache::sampled_image_descriptor>();
|
||||
|
||||
if (m_samplers_dirty || m_vertex_textures_dirty[i] ||
|
||||
(update_framebuffer_sourced && vs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage))
|
||||
{
|
||||
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
|
||||
|
||||
if (rsx::method_registers.vertex_textures[i].enabled())
|
||||
{
|
||||
check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE);
|
||||
*sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts);
|
||||
|
||||
if (sampler_state->is_cyclic_reference || sampler_state->external_subresource_desc.do_not_cache)
|
||||
{
|
||||
check_for_cyclic_refs |= true;
|
||||
}
|
||||
|
||||
bool replace = !vs_sampler_handles[i];
|
||||
const VkBool32 unnormalized_coords = !!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN);
|
||||
const auto min_lod = rsx::method_registers.vertex_textures[i].min_lod();
|
||||
const auto max_lod = rsx::method_registers.vertex_textures[i].max_lod();
|
||||
const auto border_color = vk::get_border_color(rsx::method_registers.vertex_textures[i].border_color());
|
||||
|
||||
if (vs_sampler_handles[i])
|
||||
{
|
||||
if (!vs_sampler_handles[i]->matches(VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT,
|
||||
unnormalized_coords, 0.f, 1.f, min_lod, max_lod, VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, border_color))
|
||||
{
|
||||
replace = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (replace)
|
||||
{
|
||||
vs_sampler_handles[i] = vk::get_resource_manager()->find_sampler(
|
||||
*m_device,
|
||||
VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT,
|
||||
unnormalized_coords,
|
||||
0.f, 1.f, min_lod, max_lod,
|
||||
VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, border_color);
|
||||
}
|
||||
}
|
||||
else
|
||||
*sampler_state = {};
|
||||
|
||||
m_vertex_textures_dirty[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
m_samplers_dirty.store(false);
|
||||
|
||||
if (check_for_cyclic_refs)
|
||||
{
|
||||
// Regenerate renderpass key
|
||||
if (const auto key = vk::get_renderpass_key(m_fbo_images, m_current_renderpass_key);
|
||||
key != m_current_renderpass_key)
|
||||
{
|
||||
m_current_renderpass_key = key;
|
||||
m_cached_renderpass = VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VKGSRender::bind_texture_env()
|
||||
{
|
||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||
{
|
||||
if (current_fp_metadata.referenced_textures_mask & (1 << i))
|
||||
{
|
||||
vk::image_view* view = nullptr;
|
||||
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
|
||||
|
||||
if (rsx::method_registers.fragment_textures[i].enabled() &&
|
||||
sampler_state->validate())
|
||||
{
|
||||
if (view = sampler_state->image_handle; !view)
|
||||
{
|
||||
//Requires update, copy subresource
|
||||
view = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (auto raw = view->image(); raw->current_layout)
|
||||
{
|
||||
default:
|
||||
//case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
|
||||
break;
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
|
||||
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst;
|
||||
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
break;
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
|
||||
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src;
|
||||
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
break;
|
||||
case VK_IMAGE_LAYOUT_GENERAL:
|
||||
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage;
|
||||
if (!sampler_state->is_cyclic_reference)
|
||||
{
|
||||
// This was used in a cyclic ref before, but is missing a barrier
|
||||
// No need for a full stall, use a custom barrier instead
|
||||
VkPipelineStageFlags src_stage;
|
||||
VkAccessFlags src_access;
|
||||
if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT)
|
||||
{
|
||||
src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
}
|
||||
else
|
||||
{
|
||||
src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
||||
src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
}
|
||||
|
||||
vk::insert_image_memory_barrier(
|
||||
*m_current_command_buffer,
|
||||
raw->value,
|
||||
VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
||||
src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
src_access, VK_ACCESS_SHADER_READ_BIT,
|
||||
{ raw->aspect(), 0, 1, 0, 1 });
|
||||
|
||||
raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
}
|
||||
break;
|
||||
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
|
||||
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
|
||||
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage, !sampler_state->is_cyclic_reference;
|
||||
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (view) [[likely]]
|
||||
{
|
||||
m_program->bind_uniform({ fs_sampler_handles[i]->value, view->value, view->image()->current_layout },
|
||||
i,
|
||||
::glsl::program_domain::glsl_fragment_program,
|
||||
m_current_frame->descriptor_set);
|
||||
|
||||
if (current_fragment_program.redirected_textures & (1 << i))
|
||||
{
|
||||
// Stencil mirror required
|
||||
auto root_image = static_cast<vk::viewable_image*>(view->image());
|
||||
auto stencil_view = root_image->get_view(0xAAE4, rsx::default_remap_vector, VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
|
||||
if (!m_stencil_mirror_sampler)
|
||||
{
|
||||
m_stencil_mirror_sampler = std::make_unique<vk::sampler>(*m_device,
|
||||
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
||||
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
||||
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
||||
VK_FALSE, 0.f, 1.f, 0.f, 0.f,
|
||||
VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST,
|
||||
VK_BORDER_COLOR_INT_OPAQUE_BLACK);
|
||||
}
|
||||
|
||||
m_program->bind_uniform({ m_stencil_mirror_sampler->value, stencil_view->value, stencil_view->image()->current_layout },
|
||||
i,
|
||||
::glsl::program_domain::glsl_fragment_program,
|
||||
m_current_frame->descriptor_set,
|
||||
true);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const VkImageViewType view_type = vk::get_view_type(current_fragment_program.get_texture_dimension(i));
|
||||
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
|
||||
i,
|
||||
::glsl::program_domain::glsl_fragment_program,
|
||||
m_current_frame->descriptor_set);
|
||||
|
||||
if (current_fragment_program.redirected_textures & (1 << i))
|
||||
{
|
||||
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
|
||||
i,
|
||||
::glsl::program_domain::glsl_fragment_program,
|
||||
m_current_frame->descriptor_set,
|
||||
true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
|
||||
{
|
||||
if (current_vp_metadata.referenced_textures_mask & (1 << i))
|
||||
{
|
||||
if (!rsx::method_registers.vertex_textures[i].enabled())
|
||||
{
|
||||
const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i));
|
||||
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
|
||||
i,
|
||||
::glsl::program_domain::glsl_vertex_program,
|
||||
m_current_frame->descriptor_set);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
|
||||
auto image_ptr = sampler_state->image_handle;
|
||||
|
||||
if (!image_ptr && sampler_state->validate())
|
||||
{
|
||||
image_ptr = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc);
|
||||
m_vertex_textures_dirty[i] = true;
|
||||
}
|
||||
|
||||
if (!image_ptr)
|
||||
{
|
||||
rsx_log.error("Texture upload failed to vtexture index %d. Binding null sampler.", i);
|
||||
const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i));
|
||||
|
||||
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
|
||||
i,
|
||||
::glsl::program_domain::glsl_vertex_program,
|
||||
m_current_frame->descriptor_set);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (auto raw = image_ptr->image(); raw->current_layout)
|
||||
{
|
||||
default:
|
||||
//case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
|
||||
break;
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
|
||||
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst;
|
||||
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
break;
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
|
||||
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src;
|
||||
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
break;
|
||||
case VK_IMAGE_LAYOUT_GENERAL:
|
||||
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage;
|
||||
if (!sampler_state->is_cyclic_reference)
|
||||
{
|
||||
// Custom barrier, see similar block in FS stage
|
||||
VkPipelineStageFlags src_stage;
|
||||
VkAccessFlags src_access;
|
||||
if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT)
|
||||
{
|
||||
src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
}
|
||||
else
|
||||
{
|
||||
src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
||||
src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
}
|
||||
|
||||
vk::insert_image_memory_barrier(
|
||||
*m_current_command_buffer,
|
||||
raw->value,
|
||||
VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
||||
src_stage, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
|
||||
src_access, VK_ACCESS_SHADER_READ_BIT,
|
||||
{ raw->aspect(), 0, 1, 0, 1 });
|
||||
|
||||
raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
}
|
||||
break;
|
||||
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
|
||||
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
|
||||
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage;
|
||||
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
break;
|
||||
}
|
||||
|
||||
m_program->bind_uniform({ vs_sampler_handles[i]->value, image_ptr->value, image_ptr->image()->current_layout },
|
||||
i,
|
||||
::glsl::program_domain::glsl_vertex_program,
|
||||
m_current_frame->descriptor_set);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VKGSRender::emit_geometry(u32 sub_index)
|
||||
{
|
||||
auto &draw_call = rsx::method_registers.current_draw_clause;
|
||||
m_profiler.start();
|
||||
|
||||
if (sub_index == 0)
|
||||
{
|
||||
analyse_inputs_interleaved(m_vertex_layout);
|
||||
|
||||
if (!m_vertex_layout.validate())
|
||||
{
|
||||
// No vertex inputs enabled
|
||||
// Execute remainining pipeline barriers with NOP draw
|
||||
do
|
||||
{
|
||||
draw_call.execute_pipeline_dependencies();
|
||||
}
|
||||
while (draw_call.next());
|
||||
|
||||
draw_call.end();
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (draw_call.execute_pipeline_dependencies() & rsx::vertex_base_changed)
|
||||
{
|
||||
// Rebase vertex bases instead of
|
||||
for (auto &info : m_vertex_layout.interleaved_blocks)
|
||||
{
|
||||
const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset();
|
||||
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location, HERE);
|
||||
}
|
||||
}
|
||||
|
||||
const auto old_persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value;
|
||||
const auto old_volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;
|
||||
|
||||
// Programs data is dependent on vertex state
|
||||
auto upload_info = upload_vertex_data();
|
||||
if (!upload_info.vertex_draw_count)
|
||||
{
|
||||
// Malformed vertex setup; abort
|
||||
return;
|
||||
}
|
||||
|
||||
m_frame_stats.vertex_upload_time += m_profiler.duration();
|
||||
|
||||
auto persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value;
|
||||
auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;
|
||||
bool update_descriptors = false;
|
||||
|
||||
const auto& binding_table = m_device->get_pipeline_binding_table();
|
||||
|
||||
if (sub_index == 0)
|
||||
{
|
||||
update_descriptors = true;
|
||||
|
||||
// Allocate stream layout memory for this batch
|
||||
m_vertex_layout_stream_info.range = rsx::method_registers.current_draw_clause.pass_count() * 128;
|
||||
m_vertex_layout_stream_info.offset = m_vertex_layout_ring_info.alloc<256>(m_vertex_layout_stream_info.range);
|
||||
|
||||
if (vk::test_status_interrupt(vk::heap_changed))
|
||||
{
|
||||
if (m_vertex_layout_storage &&
|
||||
m_vertex_layout_storage->info.buffer != m_vertex_layout_ring_info.heap->value)
|
||||
{
|
||||
m_current_frame->buffer_views_to_clean.push_back(std::move(m_vertex_layout_storage));
|
||||
}
|
||||
|
||||
vk::clear_status_interrupt(vk::heap_changed);
|
||||
}
|
||||
}
|
||||
else if (persistent_buffer != old_persistent_buffer || volatile_buffer != old_volatile_buffer)
|
||||
{
|
||||
// Need to update descriptors; make a copy for the next draw
|
||||
VkDescriptorSet new_descriptor_set = allocate_descriptor_set();
|
||||
std::vector<VkCopyDescriptorSet> copy_set(binding_table.total_descriptor_bindings);
|
||||
|
||||
for (u32 n = 0; n < binding_table.total_descriptor_bindings; ++n)
|
||||
{
|
||||
copy_set[n] =
|
||||
{
|
||||
VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET, // sType
|
||||
nullptr, // pNext
|
||||
m_current_frame->descriptor_set, // srcSet
|
||||
n, // srcBinding
|
||||
0u, // srcArrayElement
|
||||
new_descriptor_set, // dstSet
|
||||
n, // dstBinding
|
||||
0u, // dstArrayElement
|
||||
1u // descriptorCount
|
||||
};
|
||||
}
|
||||
|
||||
vkUpdateDescriptorSets(*m_device, 0, 0, binding_table.total_descriptor_bindings, copy_set.data());
|
||||
m_current_frame->descriptor_set = new_descriptor_set;
|
||||
|
||||
update_descriptors = true;
|
||||
}
|
||||
|
||||
// Update vertex fetch parameters
|
||||
update_vertex_env(sub_index, upload_info);
|
||||
|
||||
verify(HERE), m_vertex_layout_storage;
|
||||
if (update_descriptors)
|
||||
{
|
||||
m_program->bind_uniform(persistent_buffer, binding_table.vertex_buffers_first_bind_slot, m_current_frame->descriptor_set);
|
||||
m_program->bind_uniform(volatile_buffer, binding_table.vertex_buffers_first_bind_slot + 1, m_current_frame->descriptor_set);
|
||||
m_program->bind_uniform(m_vertex_layout_storage->value, binding_table.vertex_buffers_first_bind_slot + 2, m_current_frame->descriptor_set);
|
||||
}
|
||||
|
||||
if (!m_current_subdraw_id++)
|
||||
{
|
||||
vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
|
||||
update_draw_state();
|
||||
begin_render_pass();
|
||||
|
||||
if (cond_render_ctrl.hw_cond_active && m_device->get_conditional_render_support())
|
||||
{
|
||||
// It is inconvenient that conditional rendering breaks other things like compute dispatch
|
||||
// TODO: If this is heavy, add refactor the resources into global and add checks around compute dispatch
|
||||
VkConditionalRenderingBeginInfoEXT info{};
|
||||
info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
|
||||
info.buffer = m_cond_render_buffer->value;
|
||||
|
||||
m_device->cmdBeginConditionalRenderingEXT(*m_current_command_buffer, &info);
|
||||
m_current_command_buffer->flags |= vk::command_buffer::cb_has_conditional_render;
|
||||
}
|
||||
}
|
||||
|
||||
// Bind the new set of descriptors for use with this draw call
|
||||
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr);
|
||||
|
||||
m_frame_stats.setup_time += m_profiler.duration();
|
||||
|
||||
if (!upload_info.index_info)
|
||||
{
|
||||
if (draw_call.is_single_draw())
|
||||
{
|
||||
vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 vertex_offset = 0;
|
||||
const auto subranges = draw_call.get_subranges();
|
||||
for (const auto &range : subranges)
|
||||
{
|
||||
vkCmdDraw(*m_current_command_buffer, range.count, 1, vertex_offset, 0);
|
||||
vertex_offset += range.count;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const VkIndexType index_type = std::get<1>(*upload_info.index_info);
|
||||
const VkDeviceSize offset = std::get<0>(*upload_info.index_info);
|
||||
|
||||
vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type);
|
||||
|
||||
if (rsx::method_registers.current_draw_clause.is_single_draw())
|
||||
{
|
||||
const u32 index_count = upload_info.vertex_draw_count;
|
||||
vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 vertex_offset = 0;
|
||||
const auto subranges = draw_call.get_subranges();
|
||||
for (const auto &range : subranges)
|
||||
{
|
||||
const auto count = get_index_count(draw_call.primitive, range.count);
|
||||
vkCmdDrawIndexed(*m_current_command_buffer, count, 1, vertex_offset, 0, 0);
|
||||
vertex_offset += count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_frame_stats.draw_exec_time += m_profiler.duration();
|
||||
}
|
||||
|
||||
void VKGSRender::begin()
|
||||
{
|
||||
rsx::thread::begin();
|
||||
|
||||
if (skip_current_frame || swapchain_unavailable || cond_render_ctrl.disable_rendering())
|
||||
return;
|
||||
|
||||
init_buffers(rsx::framebuffer_creation_context::context_draw);
|
||||
}
|
||||
|
||||
void VKGSRender::end()
|
||||
{
|
||||
if (skip_current_frame || !framebuffer_status_valid || swapchain_unavailable || cond_render_ctrl.disable_rendering())
|
||||
{
|
||||
execute_nop_draw();
|
||||
rsx::thread::end();
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for frame resource status here because it is possible for an async flip to happen between begin/end
|
||||
if (m_current_frame->flags & frame_context_state::dirty) [[unlikely]]
|
||||
{
|
||||
check_present_status();
|
||||
|
||||
if (m_current_frame->swap_command_buffer) [[unlikely]]
|
||||
{
|
||||
// Borrow time by using the auxilliary context
|
||||
m_aux_frame_context.grab_resources(*m_current_frame);
|
||||
m_current_frame = &m_aux_frame_context;
|
||||
}
|
||||
else if (m_current_frame->used_descriptors)
|
||||
{
|
||||
m_current_frame->descriptor_pool.reset(0);
|
||||
m_current_frame->used_descriptors = 0;
|
||||
}
|
||||
|
||||
verify(HERE), !m_current_frame->swap_command_buffer;
|
||||
|
||||
m_current_frame->flags &= ~frame_context_state::dirty;
|
||||
}
|
||||
|
||||
m_profiler.start();
|
||||
|
||||
// Check for data casts
|
||||
// NOTE: This is deprecated and will be removed soon. The memory barrier invoked before rendering does this better
|
||||
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
|
||||
if (ds && ds->old_contents.size() == 1 &&
|
||||
ds->old_contents[0].source->info.format == VK_FORMAT_B8G8R8A8_UNORM)
|
||||
{
|
||||
auto key = vk::get_renderpass_key(ds->info.format);
|
||||
auto render_pass = vk::get_renderpass(*m_device, key);
|
||||
verify("Usupported renderpass configuration" HERE), render_pass != VK_NULL_HANDLE;
|
||||
|
||||
VkClearDepthStencilValue clear = { 1.f, 0xFF };
|
||||
VkImageSubresourceRange range = { VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 1, 0, 1 };
|
||||
|
||||
// Initialize source
|
||||
auto src = vk::as_rtt(ds->old_contents[0].source);
|
||||
src->read_barrier(*m_current_command_buffer);
|
||||
|
||||
switch (src->current_layout)
|
||||
{
|
||||
case VK_IMAGE_LAYOUT_GENERAL:
|
||||
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
|
||||
break;
|
||||
//case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
|
||||
default:
|
||||
src->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
break;
|
||||
}
|
||||
|
||||
// Clear explicitly before starting the inheritance transfer
|
||||
const bool preinitialized = (ds->current_layout == VK_IMAGE_LAYOUT_GENERAL);
|
||||
if (!preinitialized) ds->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
vkCmdClearDepthStencilImage(*m_current_command_buffer, ds->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear, 1, &range);
|
||||
if (!preinitialized) ds->pop_layout(*m_current_command_buffer);
|
||||
|
||||
// TODO: Stencil transfer
|
||||
ds->old_contents[0].init_transfer(ds);
|
||||
m_depth_converter->run(*m_current_command_buffer,
|
||||
ds->old_contents[0].src_rect(),
|
||||
ds->old_contents[0].dst_rect(),
|
||||
src->get_view(0xAAE4, rsx::default_remap_vector),
|
||||
ds, render_pass);
|
||||
|
||||
// TODO: Flush management to avoid pass running out of ubo space (very unlikely)
|
||||
ds->on_write();
|
||||
}
|
||||
|
||||
load_texture_env();
|
||||
m_frame_stats.textures_upload_time += m_profiler.duration();
|
||||
|
||||
if (!load_program())
|
||||
{
|
||||
// Program is not ready, skip drawing this
|
||||
std::this_thread::yield();
|
||||
execute_nop_draw();
|
||||
// m_rtts.on_write(); - breaks games for obvious reasons
|
||||
rsx::thread::end();
|
||||
return;
|
||||
}
|
||||
|
||||
// Allocate descriptor set
|
||||
check_descriptors();
|
||||
m_current_frame->descriptor_set = allocate_descriptor_set();
|
||||
|
||||
// Load program execution environment
|
||||
load_program_env();
|
||||
m_frame_stats.setup_time += m_profiler.duration();
|
||||
|
||||
bind_texture_env();
|
||||
m_texture_cache.release_uncached_temporary_subresources();
|
||||
m_frame_stats.textures_upload_time += m_profiler.duration();
|
||||
|
||||
if (m_current_command_buffer->flags & vk::command_buffer::cb_load_occluson_task)
|
||||
{
|
||||
u32 occlusion_id = m_occlusion_query_pool.find_free_slot();
|
||||
if (occlusion_id == UINT32_MAX)
|
||||
{
|
||||
// Force flush
|
||||
rsx_log.error("[Performance Warning] Out of free occlusion slots. Forcing hard sync.");
|
||||
ZCULL_control::sync(this);
|
||||
|
||||
occlusion_id = m_occlusion_query_pool.find_free_slot();
|
||||
if (occlusion_id == UINT32_MAX)
|
||||
{
|
||||
//rsx_log.error("Occlusion pool overflow");
|
||||
if (m_current_task) m_current_task->result = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Begin query
|
||||
m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id);
|
||||
|
||||
auto &data = m_occlusion_map[m_active_query_info->driver_handle];
|
||||
data.indices.push_back(occlusion_id);
|
||||
data.set_sync_command_buffer(m_current_command_buffer);
|
||||
|
||||
m_current_command_buffer->flags &= ~vk::command_buffer::cb_load_occluson_task;
|
||||
m_current_command_buffer->flags |= (vk::command_buffer::cb_has_occlusion_task | vk::command_buffer::cb_has_open_query);
|
||||
}
|
||||
|
||||
bool primitive_emulated = false;
|
||||
vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated);
|
||||
|
||||
// Apply write memory barriers
|
||||
if (ds) ds->write_barrier(*m_current_command_buffer);
|
||||
|
||||
for (auto &rtt : m_rtts.m_bound_render_targets)
|
||||
{
|
||||
if (auto surface = std::get<1>(rtt))
|
||||
{
|
||||
surface->write_barrier(*m_current_command_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
// Final heap check...
|
||||
check_heap_status(VK_HEAP_CHECK_VERTEX_STORAGE | VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE);
|
||||
|
||||
u32 sub_index = 0;
|
||||
m_current_subdraw_id = 0;
|
||||
|
||||
rsx::method_registers.current_draw_clause.begin();
|
||||
do
|
||||
{
|
||||
emit_geometry(sub_index++);
|
||||
}
|
||||
while (rsx::method_registers.current_draw_clause.next());
|
||||
|
||||
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_conditional_render)
|
||||
{
|
||||
m_device->cmdEndConditionalRenderingEXT(*m_current_command_buffer);
|
||||
m_current_command_buffer->flags &= ~(vk::command_buffer::cb_has_conditional_render);
|
||||
}
|
||||
|
||||
m_rtts.on_write(m_framebuffer_layout.color_write_enabled.data(), m_framebuffer_layout.zeta_write_enabled);
|
||||
|
||||
rsx::thread::end();
|
||||
}
|
@ -1,11 +1,7 @@
|
||||
#include "stdafx.h"
|
||||
#include "VKGSRender.h"
|
||||
#include "../Overlays/overlay_shader_compile_notification.h"
|
||||
#include "../Overlays/Shaders/shader_loading_dialog_native.h"
|
||||
#include "../rsx_methods.h"
|
||||
#include "../rsx_utils.h"
|
||||
#include "../Common/BufferUtils.h"
|
||||
#include "VKFormats.h"
|
||||
#include "VKGSRender.h"
|
||||
#include "VKCommonDecompiler.h"
|
||||
#include "VKRenderPass.h"
|
||||
#include "VKResourceManager.h"
|
||||
@ -42,22 +38,7 @@ namespace
|
||||
|
||||
namespace vk
|
||||
{
|
||||
VkCompareOp get_compare_func(rsx::comparison_function op, bool reverse_direction = false)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case rsx::comparison_function::never: return VK_COMPARE_OP_NEVER;
|
||||
case rsx::comparison_function::greater: return reverse_direction ? VK_COMPARE_OP_LESS: VK_COMPARE_OP_GREATER;
|
||||
case rsx::comparison_function::less: return reverse_direction ? VK_COMPARE_OP_GREATER: VK_COMPARE_OP_LESS;
|
||||
case rsx::comparison_function::less_or_equal: return reverse_direction ? VK_COMPARE_OP_GREATER_OR_EQUAL: VK_COMPARE_OP_LESS_OR_EQUAL;
|
||||
case rsx::comparison_function::greater_or_equal: return reverse_direction ? VK_COMPARE_OP_LESS_OR_EQUAL: VK_COMPARE_OP_GREATER_OR_EQUAL;
|
||||
case rsx::comparison_function::equal: return VK_COMPARE_OP_EQUAL;
|
||||
case rsx::comparison_function::not_equal: return VK_COMPARE_OP_NOT_EQUAL;
|
||||
case rsx::comparison_function::always: return VK_COMPARE_OP_ALWAYS;
|
||||
default:
|
||||
fmt::throw_exception("Unknown compare op: 0x%x" HERE, static_cast<u32>(op));
|
||||
}
|
||||
}
|
||||
VkCompareOp get_compare_func(rsx::comparison_function op, bool reverse_direction = false);
|
||||
|
||||
std::pair<VkFormat, VkComponentMapping> get_compatible_surface_format(rsx::surface_color_format color_format)
|
||||
{
|
||||
@ -194,7 +175,6 @@ namespace vk
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
VkStencilOp get_stencil_op(rsx::stencil_op op)
|
||||
{
|
||||
switch (op)
|
||||
@ -234,22 +214,6 @@ namespace vk
|
||||
fmt::throw_exception("Unknown cull face value: 0x%x" HERE, static_cast<u32>(cfv));
|
||||
}
|
||||
}
|
||||
|
||||
VkImageViewType get_view_type(rsx::texture_dimension_extended type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case rsx::texture_dimension_extended::texture_dimension_1d:
|
||||
return VK_IMAGE_VIEW_TYPE_1D;
|
||||
case rsx::texture_dimension_extended::texture_dimension_2d:
|
||||
return VK_IMAGE_VIEW_TYPE_2D;
|
||||
case rsx::texture_dimension_extended::texture_dimension_cubemap:
|
||||
return VK_IMAGE_VIEW_TYPE_CUBE;
|
||||
case rsx::texture_dimension_extended::texture_dimension_3d:
|
||||
return VK_IMAGE_VIEW_TYPE_3D;
|
||||
default: ASSUME(0);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -986,892 +950,6 @@ VkDescriptorSet VKGSRender::allocate_descriptor_set()
|
||||
return new_descriptor_set;
|
||||
}
|
||||
|
||||
void VKGSRender::begin()
|
||||
{
|
||||
rsx::thread::begin();
|
||||
|
||||
if (skip_current_frame || swapchain_unavailable || cond_render_ctrl.disable_rendering())
|
||||
return;
|
||||
|
||||
init_buffers(rsx::framebuffer_creation_context::context_draw);
|
||||
}
|
||||
|
||||
void VKGSRender::update_draw_state()
|
||||
{
|
||||
m_profiler.start();
|
||||
|
||||
float actual_line_width = rsx::method_registers.line_width();
|
||||
vkCmdSetLineWidth(*m_current_command_buffer, actual_line_width);
|
||||
|
||||
if (rsx::method_registers.poly_offset_fill_enabled())
|
||||
{
|
||||
//offset_bias is the constant factor, multiplied by the implementation factor R
|
||||
//offst_scale is the slope factor, multiplied by the triangle slope factor M
|
||||
vkCmdSetDepthBias(*m_current_command_buffer, rsx::method_registers.poly_offset_bias(), 0.f, rsx::method_registers.poly_offset_scale());
|
||||
}
|
||||
else
|
||||
{
|
||||
//Zero bias value - disables depth bias
|
||||
vkCmdSetDepthBias(*m_current_command_buffer, 0.f, 0.f, 0.f);
|
||||
}
|
||||
|
||||
//Update dynamic state
|
||||
if (rsx::method_registers.blend_enabled())
|
||||
{
|
||||
//Update blend constants
|
||||
auto blend_colors = rsx::get_constant_blend_colors();
|
||||
vkCmdSetBlendConstants(*m_current_command_buffer, blend_colors.data());
|
||||
}
|
||||
|
||||
if (rsx::method_registers.stencil_test_enabled())
|
||||
{
|
||||
const bool two_sided_stencil = rsx::method_registers.two_sided_stencil_test_enabled();
|
||||
VkStencilFaceFlags face_flag = (two_sided_stencil) ? VK_STENCIL_FACE_FRONT_BIT : VK_STENCIL_FRONT_AND_BACK;
|
||||
|
||||
vkCmdSetStencilWriteMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_mask());
|
||||
vkCmdSetStencilCompareMask(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_func_mask());
|
||||
vkCmdSetStencilReference(*m_current_command_buffer, face_flag, rsx::method_registers.stencil_func_ref());
|
||||
|
||||
if (two_sided_stencil)
|
||||
{
|
||||
vkCmdSetStencilWriteMask(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_mask());
|
||||
vkCmdSetStencilCompareMask(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_func_mask());
|
||||
vkCmdSetStencilReference(*m_current_command_buffer, VK_STENCIL_FACE_BACK_BIT, rsx::method_registers.back_stencil_func_ref());
|
||||
}
|
||||
}
|
||||
|
||||
if (m_device->get_depth_bounds_support())
|
||||
{
|
||||
if (rsx::method_registers.depth_bounds_test_enabled())
|
||||
{
|
||||
//Update depth bounds min/max
|
||||
vkCmdSetDepthBounds(*m_current_command_buffer, rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max());
|
||||
}
|
||||
else
|
||||
{
|
||||
vkCmdSetDepthBounds(*m_current_command_buffer, 0.f, 1.f);
|
||||
}
|
||||
}
|
||||
|
||||
bind_viewport();
|
||||
|
||||
//TODO: Set up other render-state parameters into the program pipeline
|
||||
|
||||
m_frame_stats.setup_time += m_profiler.duration();
|
||||
}
|
||||
|
||||
void VKGSRender::begin_render_pass()
|
||||
{
|
||||
vk::begin_renderpass(
|
||||
*m_current_command_buffer,
|
||||
get_render_pass(),
|
||||
m_draw_fbo->value,
|
||||
{ positionu{0u, 0u}, sizeu{m_draw_fbo->width(), m_draw_fbo->height()} });
|
||||
}
|
||||
|
||||
void VKGSRender::close_render_pass()
|
||||
{
|
||||
vk::end_renderpass(*m_current_command_buffer);
|
||||
}
|
||||
|
||||
VkRenderPass VKGSRender::get_render_pass()
|
||||
{
|
||||
if (!m_cached_renderpass)
|
||||
{
|
||||
m_cached_renderpass = vk::get_renderpass(*m_device, m_current_renderpass_key);
|
||||
}
|
||||
|
||||
return m_cached_renderpass;
|
||||
}
|
||||
|
||||
void VKGSRender::emit_geometry(u32 sub_index)
|
||||
{
|
||||
auto &draw_call = rsx::method_registers.current_draw_clause;
|
||||
m_profiler.start();
|
||||
|
||||
if (sub_index == 0)
|
||||
{
|
||||
analyse_inputs_interleaved(m_vertex_layout);
|
||||
|
||||
if (!m_vertex_layout.validate())
|
||||
{
|
||||
// No vertex inputs enabled
|
||||
// Execute remainining pipeline barriers with NOP draw
|
||||
do
|
||||
{
|
||||
draw_call.execute_pipeline_dependencies();
|
||||
}
|
||||
while (draw_call.next());
|
||||
|
||||
draw_call.end();
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (draw_call.execute_pipeline_dependencies() & rsx::vertex_base_changed)
|
||||
{
|
||||
// Rebase vertex bases instead of
|
||||
for (auto &info : m_vertex_layout.interleaved_blocks)
|
||||
{
|
||||
const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset();
|
||||
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location, HERE);
|
||||
}
|
||||
}
|
||||
|
||||
const auto old_persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value;
|
||||
const auto old_volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;
|
||||
|
||||
// Programs data is dependent on vertex state
|
||||
auto upload_info = upload_vertex_data();
|
||||
if (!upload_info.vertex_draw_count)
|
||||
{
|
||||
// Malformed vertex setup; abort
|
||||
return;
|
||||
}
|
||||
|
||||
m_frame_stats.vertex_upload_time += m_profiler.duration();
|
||||
|
||||
auto persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value;
|
||||
auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;
|
||||
bool update_descriptors = false;
|
||||
|
||||
const auto& binding_table = m_device->get_pipeline_binding_table();
|
||||
|
||||
if (sub_index == 0)
|
||||
{
|
||||
update_descriptors = true;
|
||||
|
||||
// Allocate stream layout memory for this batch
|
||||
m_vertex_layout_stream_info.range = rsx::method_registers.current_draw_clause.pass_count() * 128;
|
||||
m_vertex_layout_stream_info.offset = m_vertex_layout_ring_info.alloc<256>(m_vertex_layout_stream_info.range);
|
||||
|
||||
if (vk::test_status_interrupt(vk::heap_changed))
|
||||
{
|
||||
if (m_vertex_layout_storage &&
|
||||
m_vertex_layout_storage->info.buffer != m_vertex_layout_ring_info.heap->value)
|
||||
{
|
||||
m_current_frame->buffer_views_to_clean.push_back(std::move(m_vertex_layout_storage));
|
||||
}
|
||||
|
||||
vk::clear_status_interrupt(vk::heap_changed);
|
||||
}
|
||||
}
|
||||
else if (persistent_buffer != old_persistent_buffer || volatile_buffer != old_volatile_buffer)
|
||||
{
|
||||
// Need to update descriptors; make a copy for the next draw
|
||||
VkDescriptorSet new_descriptor_set = allocate_descriptor_set();
|
||||
std::vector<VkCopyDescriptorSet> copy_set(binding_table.total_descriptor_bindings);
|
||||
|
||||
for (u32 n = 0; n < binding_table.total_descriptor_bindings; ++n)
|
||||
{
|
||||
copy_set[n] =
|
||||
{
|
||||
VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET, // sType
|
||||
nullptr, // pNext
|
||||
m_current_frame->descriptor_set, // srcSet
|
||||
n, // srcBinding
|
||||
0u, // srcArrayElement
|
||||
new_descriptor_set, // dstSet
|
||||
n, // dstBinding
|
||||
0u, // dstArrayElement
|
||||
1u // descriptorCount
|
||||
};
|
||||
}
|
||||
|
||||
vkUpdateDescriptorSets(*m_device, 0, 0, binding_table.total_descriptor_bindings, copy_set.data());
|
||||
m_current_frame->descriptor_set = new_descriptor_set;
|
||||
|
||||
update_descriptors = true;
|
||||
}
|
||||
|
||||
// Update vertex fetch parameters
|
||||
update_vertex_env(sub_index, upload_info);
|
||||
|
||||
verify(HERE), m_vertex_layout_storage;
|
||||
if (update_descriptors)
|
||||
{
|
||||
m_program->bind_uniform(persistent_buffer, binding_table.vertex_buffers_first_bind_slot, m_current_frame->descriptor_set);
|
||||
m_program->bind_uniform(volatile_buffer, binding_table.vertex_buffers_first_bind_slot + 1, m_current_frame->descriptor_set);
|
||||
m_program->bind_uniform(m_vertex_layout_storage->value, binding_table.vertex_buffers_first_bind_slot + 2, m_current_frame->descriptor_set);
|
||||
}
|
||||
|
||||
if (!m_current_subdraw_id++)
|
||||
{
|
||||
vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
|
||||
update_draw_state();
|
||||
begin_render_pass();
|
||||
|
||||
if (cond_render_ctrl.hw_cond_active && m_device->get_conditional_render_support())
|
||||
{
|
||||
// It is inconvenient that conditional rendering breaks other things like compute dispatch
|
||||
// TODO: If this is heavy, add refactor the resources into global and add checks around compute dispatch
|
||||
VkConditionalRenderingBeginInfoEXT info{};
|
||||
info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
|
||||
info.buffer = m_cond_render_buffer->value;
|
||||
|
||||
m_device->cmdBeginConditionalRenderingEXT(*m_current_command_buffer, &info);
|
||||
m_current_command_buffer->flags |= vk::command_buffer::cb_has_conditional_render;
|
||||
}
|
||||
}
|
||||
|
||||
// Bind the new set of descriptors for use with this draw call
|
||||
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr);
|
||||
|
||||
m_frame_stats.setup_time += m_profiler.duration();
|
||||
|
||||
if (!upload_info.index_info)
|
||||
{
|
||||
if (draw_call.is_single_draw())
|
||||
{
|
||||
vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 vertex_offset = 0;
|
||||
const auto subranges = draw_call.get_subranges();
|
||||
for (const auto &range : subranges)
|
||||
{
|
||||
vkCmdDraw(*m_current_command_buffer, range.count, 1, vertex_offset, 0);
|
||||
vertex_offset += range.count;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const VkIndexType index_type = std::get<1>(*upload_info.index_info);
|
||||
const VkDeviceSize offset = std::get<0>(*upload_info.index_info);
|
||||
|
||||
vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type);
|
||||
|
||||
if (rsx::method_registers.current_draw_clause.is_single_draw())
|
||||
{
|
||||
const u32 index_count = upload_info.vertex_draw_count;
|
||||
vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 vertex_offset = 0;
|
||||
const auto subranges = draw_call.get_subranges();
|
||||
for (const auto &range : subranges)
|
||||
{
|
||||
const auto count = get_index_count(draw_call.primitive, range.count);
|
||||
vkCmdDrawIndexed(*m_current_command_buffer, count, 1, vertex_offset, 0, 0);
|
||||
vertex_offset += count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_frame_stats.draw_exec_time += m_profiler.duration();
|
||||
}
|
||||
|
||||
void VKGSRender::end()
|
||||
{
|
||||
if (skip_current_frame || !framebuffer_status_valid || swapchain_unavailable || cond_render_ctrl.disable_rendering())
|
||||
{
|
||||
execute_nop_draw();
|
||||
rsx::thread::end();
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for frame resource status here because it is possible for an async flip to happen between begin/end
|
||||
if (m_current_frame->flags & frame_context_state::dirty) [[unlikely]]
|
||||
{
|
||||
check_present_status();
|
||||
|
||||
if (m_current_frame->swap_command_buffer) [[unlikely]]
|
||||
{
|
||||
// Borrow time by using the auxilliary context
|
||||
m_aux_frame_context.grab_resources(*m_current_frame);
|
||||
m_current_frame = &m_aux_frame_context;
|
||||
}
|
||||
else if (m_current_frame->used_descriptors)
|
||||
{
|
||||
m_current_frame->descriptor_pool.reset(0);
|
||||
m_current_frame->used_descriptors = 0;
|
||||
}
|
||||
|
||||
verify(HERE), !m_current_frame->swap_command_buffer;
|
||||
|
||||
m_current_frame->flags &= ~frame_context_state::dirty;
|
||||
}
|
||||
|
||||
m_profiler.start();
|
||||
|
||||
// Check for data casts
|
||||
// NOTE: This is deprecated and will be removed soon. The memory barrier invoked before rendering does this better
|
||||
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
|
||||
if (ds && ds->old_contents.size() == 1 &&
|
||||
ds->old_contents[0].source->info.format == VK_FORMAT_B8G8R8A8_UNORM)
|
||||
{
|
||||
auto key = vk::get_renderpass_key(ds->info.format);
|
||||
auto render_pass = vk::get_renderpass(*m_device, key);
|
||||
verify("Usupported renderpass configuration" HERE), render_pass != VK_NULL_HANDLE;
|
||||
|
||||
VkClearDepthStencilValue clear = { 1.f, 0xFF };
|
||||
VkImageSubresourceRange range = { VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 1, 0, 1 };
|
||||
|
||||
// Initialize source
|
||||
auto src = vk::as_rtt(ds->old_contents[0].source);
|
||||
src->read_barrier(*m_current_command_buffer);
|
||||
|
||||
switch (src->current_layout)
|
||||
{
|
||||
case VK_IMAGE_LAYOUT_GENERAL:
|
||||
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
|
||||
break;
|
||||
//case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
|
||||
default:
|
||||
src->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
break;
|
||||
}
|
||||
|
||||
// Clear explicitly before starting the inheritance transfer
|
||||
const bool preinitialized = (ds->current_layout == VK_IMAGE_LAYOUT_GENERAL);
|
||||
if (!preinitialized) ds->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
vkCmdClearDepthStencilImage(*m_current_command_buffer, ds->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear, 1, &range);
|
||||
if (!preinitialized) ds->pop_layout(*m_current_command_buffer);
|
||||
|
||||
// TODO: Stencil transfer
|
||||
ds->old_contents[0].init_transfer(ds);
|
||||
m_depth_converter->run(*m_current_command_buffer,
|
||||
ds->old_contents[0].src_rect(),
|
||||
ds->old_contents[0].dst_rect(),
|
||||
src->get_view(0xAAE4, rsx::default_remap_vector),
|
||||
ds, render_pass);
|
||||
|
||||
// TODO: Flush management to avoid pass running out of ubo space (very unlikely)
|
||||
ds->on_write();
|
||||
}
|
||||
|
||||
//Load textures
|
||||
{
|
||||
std::lock_guard lock(m_sampler_mutex);
|
||||
bool update_framebuffer_sourced = false;
|
||||
bool check_for_cyclic_refs = false;
|
||||
|
||||
if (surface_store_tag != m_rtts.cache_tag) [[unlikely]]
|
||||
{
|
||||
update_framebuffer_sourced = true;
|
||||
surface_store_tag = m_rtts.cache_tag;
|
||||
}
|
||||
|
||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||
{
|
||||
if (!fs_sampler_state[i])
|
||||
fs_sampler_state[i] = std::make_unique<vk::texture_cache::sampled_image_descriptor>();
|
||||
|
||||
if (m_samplers_dirty || m_textures_dirty[i] ||
|
||||
(update_framebuffer_sourced && fs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage))
|
||||
{
|
||||
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
|
||||
|
||||
if (rsx::method_registers.fragment_textures[i].enabled())
|
||||
{
|
||||
check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE);
|
||||
*sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts);
|
||||
|
||||
if (sampler_state->is_cyclic_reference)
|
||||
{
|
||||
check_for_cyclic_refs |= true;
|
||||
}
|
||||
|
||||
bool replace = !fs_sampler_handles[i];
|
||||
VkFilter mag_filter;
|
||||
vk::minification_filter min_filter;
|
||||
f32 min_lod = 0.f, max_lod = 0.f;
|
||||
f32 lod_bias = 0.f;
|
||||
|
||||
const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN);
|
||||
VkBool32 compare_enabled = VK_FALSE;
|
||||
VkCompareOp depth_compare_mode = VK_COMPARE_OP_NEVER;
|
||||
|
||||
if (texture_format >= CELL_GCM_TEXTURE_DEPTH24_D8 && texture_format <= CELL_GCM_TEXTURE_DEPTH16_FLOAT)
|
||||
{
|
||||
if (m_device->get_formats_support().d24_unorm_s8)
|
||||
{
|
||||
// NOTE:
|
||||
// The nvidia-specific format D24S8 has a special way of doing depth comparison that matches the PS3
|
||||
// In case of projected shadow lookup the result of the divide operation has its Z clamped to [0-1] before comparison
|
||||
// Most other wide formats (Z bits > 16) do not behave this way and depth greater than 1 is possible due to the use of floating point as storage
|
||||
// Compare operations for these formats (such as D32_SFLOAT) are therefore emulated for correct results
|
||||
|
||||
// NOTE2:
|
||||
// To improve reusability, DEPTH16 shadow ops are also emulated if D24S8 support is not available
|
||||
|
||||
compare_enabled = VK_TRUE;
|
||||
depth_compare_mode = vk::get_compare_func(rsx::method_registers.fragment_textures[i].zfunc(), true);
|
||||
}
|
||||
}
|
||||
|
||||
const bool aniso_override = !g_cfg.video.strict_rendering_mode && g_cfg.video.anisotropic_level_override > 0;
|
||||
const f32 af_level = aniso_override ? g_cfg.video.anisotropic_level_override : vk::max_aniso(rsx::method_registers.fragment_textures[i].max_aniso());
|
||||
const auto wrap_s = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_s());
|
||||
const auto wrap_t = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_t());
|
||||
const auto wrap_r = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_r());
|
||||
const auto border_color = vk::get_border_color(rsx::method_registers.fragment_textures[i].border_color());
|
||||
|
||||
// Check if non-point filtering can even be used on this format
|
||||
bool can_sample_linear;
|
||||
if (sampler_state->format_class == rsx::format_type::color) [[likely]]
|
||||
{
|
||||
// Most PS3-like formats can be linearly filtered without problem
|
||||
can_sample_linear = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Not all GPUs support linear filtering of depth formats
|
||||
const auto vk_format = sampler_state->image_handle ? sampler_state->image_handle->image()->format() :
|
||||
vk::get_compatible_sampler_format(m_device->get_formats_support(), sampler_state->external_subresource_desc.gcm_format);
|
||||
|
||||
can_sample_linear = m_device->get_format_properties(vk_format).optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
|
||||
}
|
||||
|
||||
const auto mipmap_count = rsx::method_registers.fragment_textures[i].get_exact_mipmap_count();
|
||||
min_filter = vk::get_min_filter(rsx::method_registers.fragment_textures[i].min_filter());
|
||||
|
||||
if (can_sample_linear)
|
||||
{
|
||||
mag_filter = vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter());
|
||||
}
|
||||
else
|
||||
{
|
||||
mag_filter = VK_FILTER_NEAREST;
|
||||
min_filter.filter = VK_FILTER_NEAREST;
|
||||
}
|
||||
|
||||
if (min_filter.sample_mipmaps && mipmap_count > 1)
|
||||
{
|
||||
f32 actual_mipmaps;
|
||||
if (sampler_state->upload_context == rsx::texture_upload_context::shader_read)
|
||||
{
|
||||
actual_mipmaps = static_cast<f32>(mipmap_count);
|
||||
}
|
||||
else if (sampler_state->external_subresource_desc.op == rsx::deferred_request_command::mipmap_gather)
|
||||
{
|
||||
// Clamp min and max lod
|
||||
actual_mipmaps = static_cast<f32>(sampler_state->external_subresource_desc.sections_to_copy.size());
|
||||
}
|
||||
else
|
||||
{
|
||||
actual_mipmaps = 1.f;
|
||||
}
|
||||
|
||||
if (actual_mipmaps > 1.f)
|
||||
{
|
||||
min_lod = rsx::method_registers.fragment_textures[i].min_lod();
|
||||
max_lod = rsx::method_registers.fragment_textures[i].max_lod();
|
||||
lod_bias = rsx::method_registers.fragment_textures[i].bias();
|
||||
|
||||
min_lod = std::min(min_lod, actual_mipmaps - 1.f);
|
||||
max_lod = std::min(max_lod, actual_mipmaps - 1.f);
|
||||
|
||||
if (min_filter.mipmap_mode == VK_SAMPLER_MIPMAP_MODE_NEAREST)
|
||||
{
|
||||
// Round to nearest 0.5 to work around some broken games
|
||||
// Unlike openGL, sampler parameters cannot be dynamically changed on vulkan, leading to many permutations
|
||||
lod_bias = std::floor(lod_bias * 2.f + 0.5f) * 0.5f;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
min_lod = max_lod = lod_bias = 0.f;
|
||||
min_filter.mipmap_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||
}
|
||||
}
|
||||
|
||||
if (fs_sampler_handles[i] && m_textures_dirty[i])
|
||||
{
|
||||
if (!fs_sampler_handles[i]->matches(wrap_s, wrap_t, wrap_r, false, lod_bias, af_level, min_lod, max_lod,
|
||||
min_filter.filter, mag_filter, min_filter.mipmap_mode, border_color, compare_enabled, depth_compare_mode))
|
||||
{
|
||||
replace = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (replace)
|
||||
{
|
||||
fs_sampler_handles[i] = vk::get_resource_manager()->find_sampler(*m_device, wrap_s, wrap_t, wrap_r, false, lod_bias, af_level, min_lod, max_lod,
|
||||
min_filter.filter, mag_filter, min_filter.mipmap_mode, border_color, compare_enabled, depth_compare_mode);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*sampler_state = {};
|
||||
}
|
||||
|
||||
m_textures_dirty[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
|
||||
{
|
||||
if (!vs_sampler_state[i])
|
||||
vs_sampler_state[i] = std::make_unique<vk::texture_cache::sampled_image_descriptor>();
|
||||
|
||||
if (m_samplers_dirty || m_vertex_textures_dirty[i] ||
|
||||
(update_framebuffer_sourced && vs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage))
|
||||
{
|
||||
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
|
||||
|
||||
if (rsx::method_registers.vertex_textures[i].enabled())
|
||||
{
|
||||
check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE);
|
||||
*sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts);
|
||||
|
||||
if (sampler_state->is_cyclic_reference || sampler_state->external_subresource_desc.do_not_cache)
|
||||
{
|
||||
check_for_cyclic_refs |= true;
|
||||
}
|
||||
|
||||
bool replace = !vs_sampler_handles[i];
|
||||
const VkBool32 unnormalized_coords = !!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN);
|
||||
const auto min_lod = rsx::method_registers.vertex_textures[i].min_lod();
|
||||
const auto max_lod = rsx::method_registers.vertex_textures[i].max_lod();
|
||||
const auto border_color = vk::get_border_color(rsx::method_registers.vertex_textures[i].border_color());
|
||||
|
||||
if (vs_sampler_handles[i])
|
||||
{
|
||||
if (!vs_sampler_handles[i]->matches(VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT,
|
||||
unnormalized_coords, 0.f, 1.f, min_lod, max_lod, VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, border_color))
|
||||
{
|
||||
replace = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (replace)
|
||||
{
|
||||
vs_sampler_handles[i] = vk::get_resource_manager()->find_sampler(
|
||||
*m_device,
|
||||
VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT,
|
||||
unnormalized_coords,
|
||||
0.f, 1.f, min_lod, max_lod,
|
||||
VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, border_color);
|
||||
}
|
||||
}
|
||||
else
|
||||
*sampler_state = {};
|
||||
|
||||
m_vertex_textures_dirty[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
m_samplers_dirty.store(false);
|
||||
|
||||
if (check_for_cyclic_refs)
|
||||
{
|
||||
// Regenerate renderpass key
|
||||
if (const auto key = vk::get_renderpass_key(m_fbo_images, m_current_renderpass_key);
|
||||
key != m_current_renderpass_key)
|
||||
{
|
||||
m_current_renderpass_key = key;
|
||||
m_cached_renderpass = VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_frame_stats.textures_upload_time += m_profiler.duration();
|
||||
|
||||
if (!load_program())
|
||||
{
|
||||
// Program is not ready, skip drawing this
|
||||
std::this_thread::yield();
|
||||
execute_nop_draw();
|
||||
// m_rtts.on_write(); - breaks games for obvious reasons
|
||||
rsx::thread::end();
|
||||
return;
|
||||
}
|
||||
|
||||
// Allocate descriptor set
|
||||
check_descriptors();
|
||||
m_current_frame->descriptor_set = allocate_descriptor_set();
|
||||
|
||||
// Load program execution environment
|
||||
load_program_env();
|
||||
|
||||
m_frame_stats.setup_time += m_profiler.duration();
|
||||
|
||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||
{
|
||||
if (current_fp_metadata.referenced_textures_mask & (1 << i))
|
||||
{
|
||||
vk::image_view* view = nullptr;
|
||||
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
|
||||
|
||||
if (rsx::method_registers.fragment_textures[i].enabled() &&
|
||||
sampler_state->validate())
|
||||
{
|
||||
if (view = sampler_state->image_handle; !view)
|
||||
{
|
||||
//Requires update, copy subresource
|
||||
view = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (auto raw = view->image(); raw->current_layout)
|
||||
{
|
||||
default:
|
||||
//case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
|
||||
break;
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
|
||||
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst;
|
||||
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
break;
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
|
||||
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src;
|
||||
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
break;
|
||||
case VK_IMAGE_LAYOUT_GENERAL:
|
||||
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage;
|
||||
if (!sampler_state->is_cyclic_reference)
|
||||
{
|
||||
// This was used in a cyclic ref before, but is missing a barrier
|
||||
// No need for a full stall, use a custom barrier instead
|
||||
VkPipelineStageFlags src_stage;
|
||||
VkAccessFlags src_access;
|
||||
if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT)
|
||||
{
|
||||
src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
}
|
||||
else
|
||||
{
|
||||
src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
||||
src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
}
|
||||
|
||||
vk::insert_image_memory_barrier(
|
||||
*m_current_command_buffer,
|
||||
raw->value,
|
||||
VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
||||
src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
src_access, VK_ACCESS_SHADER_READ_BIT,
|
||||
{ raw->aspect(), 0, 1, 0, 1 });
|
||||
|
||||
raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
}
|
||||
break;
|
||||
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
|
||||
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
|
||||
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage, !sampler_state->is_cyclic_reference;
|
||||
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (view) [[likely]]
|
||||
{
|
||||
m_program->bind_uniform({ fs_sampler_handles[i]->value, view->value, view->image()->current_layout },
|
||||
i,
|
||||
::glsl::program_domain::glsl_fragment_program,
|
||||
m_current_frame->descriptor_set);
|
||||
|
||||
if (current_fragment_program.redirected_textures & (1 << i))
|
||||
{
|
||||
// Stencil mirror required
|
||||
auto root_image = static_cast<vk::viewable_image*>(view->image());
|
||||
auto stencil_view = root_image->get_view(0xAAE4, rsx::default_remap_vector, VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
|
||||
if (!m_stencil_mirror_sampler)
|
||||
{
|
||||
m_stencil_mirror_sampler = std::make_unique<vk::sampler>(*m_device,
|
||||
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
||||
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
||||
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
||||
VK_FALSE, 0.f, 1.f, 0.f, 0.f,
|
||||
VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST,
|
||||
VK_BORDER_COLOR_INT_OPAQUE_BLACK);
|
||||
}
|
||||
|
||||
m_program->bind_uniform({ m_stencil_mirror_sampler->value, stencil_view->value, stencil_view->image()->current_layout },
|
||||
i,
|
||||
::glsl::program_domain::glsl_fragment_program,
|
||||
m_current_frame->descriptor_set,
|
||||
true);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const VkImageViewType view_type = vk::get_view_type(current_fragment_program.get_texture_dimension(i));
|
||||
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
|
||||
i,
|
||||
::glsl::program_domain::glsl_fragment_program,
|
||||
m_current_frame->descriptor_set);
|
||||
|
||||
if (current_fragment_program.redirected_textures & (1 << i))
|
||||
{
|
||||
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
|
||||
i,
|
||||
::glsl::program_domain::glsl_fragment_program,
|
||||
m_current_frame->descriptor_set,
|
||||
true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
|
||||
{
|
||||
if (current_vp_metadata.referenced_textures_mask & (1 << i))
|
||||
{
|
||||
if (!rsx::method_registers.vertex_textures[i].enabled())
|
||||
{
|
||||
const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i));
|
||||
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
|
||||
i,
|
||||
::glsl::program_domain::glsl_vertex_program,
|
||||
m_current_frame->descriptor_set);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
|
||||
auto image_ptr = sampler_state->image_handle;
|
||||
|
||||
if (!image_ptr && sampler_state->validate())
|
||||
{
|
||||
image_ptr = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc);
|
||||
m_vertex_textures_dirty[i] = true;
|
||||
}
|
||||
|
||||
if (!image_ptr)
|
||||
{
|
||||
rsx_log.error("Texture upload failed to vtexture index %d. Binding null sampler.", i);
|
||||
const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i));
|
||||
|
||||
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
|
||||
i,
|
||||
::glsl::program_domain::glsl_vertex_program,
|
||||
m_current_frame->descriptor_set);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (auto raw = image_ptr->image(); raw->current_layout)
|
||||
{
|
||||
default:
|
||||
//case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
|
||||
break;
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
|
||||
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst;
|
||||
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
break;
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
|
||||
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src;
|
||||
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
break;
|
||||
case VK_IMAGE_LAYOUT_GENERAL:
|
||||
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage;
|
||||
if (!sampler_state->is_cyclic_reference)
|
||||
{
|
||||
// Custom barrier, see similar block in FS stage
|
||||
VkPipelineStageFlags src_stage;
|
||||
VkAccessFlags src_access;
|
||||
if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT)
|
||||
{
|
||||
src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
}
|
||||
else
|
||||
{
|
||||
src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
||||
src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
}
|
||||
|
||||
vk::insert_image_memory_barrier(
|
||||
*m_current_command_buffer,
|
||||
raw->value,
|
||||
VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
||||
src_stage, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
|
||||
src_access, VK_ACCESS_SHADER_READ_BIT,
|
||||
{ raw->aspect(), 0, 1, 0, 1 });
|
||||
|
||||
raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
}
|
||||
break;
|
||||
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
|
||||
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
|
||||
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage;
|
||||
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
break;
|
||||
}
|
||||
|
||||
m_program->bind_uniform({ vs_sampler_handles[i]->value, image_ptr->value, image_ptr->image()->current_layout },
|
||||
i,
|
||||
::glsl::program_domain::glsl_vertex_program,
|
||||
m_current_frame->descriptor_set);
|
||||
}
|
||||
}
|
||||
|
||||
m_texture_cache.release_uncached_temporary_subresources();
|
||||
|
||||
m_frame_stats.textures_upload_time += m_profiler.duration();
|
||||
|
||||
if (m_current_command_buffer->flags & vk::command_buffer::cb_load_occluson_task)
|
||||
{
|
||||
u32 occlusion_id = m_occlusion_query_pool.find_free_slot();
|
||||
if (occlusion_id == UINT32_MAX)
|
||||
{
|
||||
// Force flush
|
||||
rsx_log.error("[Performance Warning] Out of free occlusion slots. Forcing hard sync.");
|
||||
ZCULL_control::sync(this);
|
||||
|
||||
occlusion_id = m_occlusion_query_pool.find_free_slot();
|
||||
if (occlusion_id == UINT32_MAX)
|
||||
{
|
||||
//rsx_log.error("Occlusion pool overflow");
|
||||
if (m_current_task) m_current_task->result = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Begin query
|
||||
m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id);
|
||||
|
||||
auto &data = m_occlusion_map[m_active_query_info->driver_handle];
|
||||
data.indices.push_back(occlusion_id);
|
||||
data.set_sync_command_buffer(m_current_command_buffer);
|
||||
|
||||
m_current_command_buffer->flags &= ~vk::command_buffer::cb_load_occluson_task;
|
||||
m_current_command_buffer->flags |= (vk::command_buffer::cb_has_occlusion_task | vk::command_buffer::cb_has_open_query);
|
||||
}
|
||||
|
||||
bool primitive_emulated = false;
|
||||
vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated);
|
||||
|
||||
// Apply write memory barriers
|
||||
if (ds) ds->write_barrier(*m_current_command_buffer);
|
||||
|
||||
for (auto &rtt : m_rtts.m_bound_render_targets)
|
||||
{
|
||||
if (auto surface = std::get<1>(rtt))
|
||||
{
|
||||
surface->write_barrier(*m_current_command_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
// Final heap check...
|
||||
check_heap_status(VK_HEAP_CHECK_VERTEX_STORAGE | VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE);
|
||||
|
||||
u32 sub_index = 0;
|
||||
m_current_subdraw_id = 0;
|
||||
|
||||
rsx::method_registers.current_draw_clause.begin();
|
||||
do
|
||||
{
|
||||
emit_geometry(sub_index++);
|
||||
}
|
||||
while (rsx::method_registers.current_draw_clause.next());
|
||||
|
||||
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_conditional_render)
|
||||
{
|
||||
m_device->cmdEndConditionalRenderingEXT(*m_current_command_buffer);
|
||||
m_current_command_buffer->flags &= ~(vk::command_buffer::cb_has_conditional_render);
|
||||
}
|
||||
|
||||
m_rtts.on_write(m_framebuffer_layout.color_write_enabled.data(), m_framebuffer_layout.zeta_write_enabled);
|
||||
|
||||
rsx::thread::end();
|
||||
}
|
||||
|
||||
void VKGSRender::set_viewport()
|
||||
{
|
||||
const auto clip_width = rsx::apply_resolution_scale(rsx::method_registers.surface_clip_width(), true);
|
||||
|
@ -510,6 +510,9 @@ private:
|
||||
void load_program_env();
|
||||
void update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_info);
|
||||
|
||||
void load_texture_env();
|
||||
void bind_texture_env();
|
||||
|
||||
public:
|
||||
void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false);
|
||||
void set_viewport();
|
||||
|
7
rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp
Normal file
7
rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp
Normal file
@ -0,0 +1,7 @@
|
||||
#include "stdafx.h"
|
||||
#include "VKShaderInterpreter.h"
|
||||
|
||||
namespace vk
|
||||
{
|
||||
|
||||
};
|
9
rpcs3/Emu/RSX/VK/VKShaderInterpreter.h
Normal file
9
rpcs3/Emu/RSX/VK/VKShaderInterpreter.h
Normal file
@ -0,0 +1,9 @@
|
||||
#pragma once
|
||||
#include "VKGSRender.h"
|
||||
|
||||
namespace vk
|
||||
{
|
||||
class shader_interpreter : glsl::program
|
||||
{
|
||||
};
|
||||
}
|
@ -83,18 +83,21 @@
|
||||
<ClInclude Include="Emu\RSX\GL\GLVertexProgram.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\GLHelpers.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\GLRenderTargets.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\GLShaderInterpreter.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\GLTextureCache.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\OpenGL.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\GLTexture.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="Emu\RSX\GL\GLCommonDecompiler.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLDraw.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLFragmentProgram.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLGSRender.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLVertexProgram.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLHelpers.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLPresent.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLRenderTargets.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLShaderInterpreter.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\OpenGL.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLTexture.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLVertexBuffers.cpp" />
|
||||
|
@ -1,6 +1,7 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<ClCompile Include="Emu\RSX\GL\GLDraw.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLTexture.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLHelpers.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLCommonDecompiler.cpp" />
|
||||
@ -10,6 +11,7 @@
|
||||
<ClCompile Include="Emu\RSX\GL\OpenGL.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLPresent.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLRenderTargets.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLShaderInterpreter.cpp" />
|
||||
<ClCompile Include="Emu\RSX\GL\GLVertexBuffers.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
@ -24,6 +26,7 @@
|
||||
<ClInclude Include="Emu\RSX\GL\OpenGL.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\GLTextureCache.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\GLRenderTargets.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\GLShaderInterpreter.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\GLTextOut.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\GLOverlays.h" />
|
||||
<ClInclude Include="Emu\RSX\GL\GLExecutionState.h" />
|
||||
|
@ -38,6 +38,7 @@
|
||||
<ClInclude Include="Emu\RSX\VK\VKRenderTargets.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKResolveHelper.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKResourceManager.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKShaderInterpreter.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKTextOut.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKTextureCache.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKVertexProgram.h" />
|
||||
@ -47,6 +48,7 @@
|
||||
<ClCompile Include="Emu\RSX\VK\VKCommandStream.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKCommonDecompiler.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKDMA.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKDraw.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKFormats.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKFragmentProgram.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKFramebuffer.cpp" />
|
||||
@ -57,6 +59,7 @@
|
||||
<ClCompile Include="Emu\RSX\VK\VKRenderPass.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKResolveHelper.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKResourceManager.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKShaderInterpreter.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKTexture.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKVertexBuffers.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKVertexProgram.cpp" />
|
||||
|
@ -3,6 +3,7 @@
|
||||
<ItemGroup>
|
||||
<ClCompile Include="Emu\RSX\VK\VKCommonDecompiler.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKDMA.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKDraw.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKFormats.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKFragmentProgram.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKFramebuffer.cpp" />
|
||||
@ -13,6 +14,7 @@
|
||||
<ClCompile Include="Emu\RSX\VK\VKRenderPass.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKResolveHelper.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKResourceManager.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKShaderInterpreter.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKTexture.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKVertexBuffers.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKVertexProgram.cpp" />
|
||||
@ -34,6 +36,7 @@
|
||||
<ClInclude Include="Emu\RSX\VK\VKRenderTargets.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKResolveHelper.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKResourceManager.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKShaderInterpreter.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKTextOut.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKTextureCache.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKVertexProgram.h" />
|
||||
|
Loading…
x
Reference in New Issue
Block a user