rsx: Zcull refactoring and vulkan implementation

This commit is contained in:
kd-11 2017-11-17 00:52:21 +03:00
parent c926868758
commit 680ca1d12a
8 changed files with 575 additions and 286 deletions

View File

@ -590,10 +590,6 @@ void GLGSRender::end()
m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
m_draw_calls++;
if (zcull_task_queue.active_query &&
zcull_task_queue.active_query->active)
zcull_task_queue.active_query->num_draws++;
synchronize_buffers();
rsx::thread::end();
}
@ -754,9 +750,11 @@ void GLGSRender::on_init_thread()
//Occlusion query
for (u32 i = 0; i < occlusion_query_count; ++i)
{
GLuint handle = 0;
auto &query = occlusion_query_data[i];
glGenQueries(1, &query.handle);
glGenQueries(1, &handle);
query.driver_handle = (u64)handle;
query.pending = false;
query.active = false;
query.result = 0;
@ -848,7 +846,9 @@ void GLGSRender::on_exit()
query.active = false;
query.pending = false;
glDeleteQueries(1, &query.handle);
GLuint handle = (GLuint)query.driver_handle;
glDeleteQueries(1, &handle);
query.driver_handle = 0;
}
glFlush();
@ -1410,179 +1410,29 @@ void GLGSRender::notify_tile_unbound(u32 tile)
//m_rtts.invalidate_surface_address(addr, false);
}
void GLGSRender::check_zcull_status(bool framebuffer_swap, bool force_read)
void GLGSRender::begin_occlusion_query(rsx::occlusion_query_info* query)
{
if (g_cfg.video.disable_zcull_queries)
return;
bool testing_enabled = zcull_pixel_cnt_enabled || zcull_stats_enabled;
if (framebuffer_swap)
{
zcull_surface_active = false;
const u32 zeta_address = depth_surface_info.address;
if (zeta_address)
{
//Find zeta address in bound zculls
for (int i = 0; i < rsx::limits::zculls_count; i++)
{
if (zculls[i].binded)
{
const u32 rsx_address = rsx::get_address(zculls[i].offset, CELL_GCM_LOCATION_LOCAL);
if (rsx_address == zeta_address)
{
zcull_surface_active = true;
break;
}
}
}
}
}
occlusion_query_info* query = nullptr;
if (zcull_task_queue.task_stack.size() > 0)
query = zcull_task_queue.active_query;
if (query && query->active)
{
if (force_read || (!zcull_rendering_enabled || !testing_enabled || !zcull_surface_active))
{
glEndQuery(GL_ANY_SAMPLES_PASSED);
query->active = false;
query->pending = true;
}
}
else
{
if (zcull_rendering_enabled && testing_enabled && zcull_surface_active)
{
//Find query
u32 free_index = synchronize_zcull_stats();
query = &occlusion_query_data[free_index];
zcull_task_queue.add(query);
glBeginQuery(GL_ANY_SAMPLES_PASSED, query->handle);
query->active = true;
query->result = 0;
query->num_draws = 0;
}
}
query->result = 0;
glBeginQuery(GL_ANY_SAMPLES_PASSED, (GLuint)query->driver_handle);
}
void GLGSRender::clear_zcull_stats(u32 type)
void GLGSRender::end_occlusion_query(rsx::occlusion_query_info* query)
{
if (g_cfg.video.disable_zcull_queries)
return;
if (type == CELL_GCM_ZPASS_PIXEL_CNT)
{
if (zcull_task_queue.active_query &&
zcull_task_queue.active_query->active &&
zcull_task_queue.active_query->num_draws > 0)
{
//discard active query results
check_zcull_status(false, true);
zcull_task_queue.active_query->pending = false;
//re-enable cull stats if stats are enabled
check_zcull_status(false, false);
zcull_task_queue.active_query->num_draws = 0;
}
current_zcull_stats.clear();
}
glEndQuery(GL_ANY_SAMPLES_PASSED);
}
u32 GLGSRender::get_zcull_stats(u32 type)
bool GLGSRender::check_occlusion_query_status(rsx::occlusion_query_info* query)
{
if (g_cfg.video.disable_zcull_queries)
return 0u;
GLint status = GL_TRUE;
glGetQueryObjectiv((GLuint)query->driver_handle, GL_QUERY_RESULT_AVAILABLE, &status);
if (zcull_task_queue.active_query &&
zcull_task_queue.active_query->active &&
current_zcull_stats.zpass_pixel_cnt == 0 &&
type == CELL_GCM_ZPASS_PIXEL_CNT)
{
//The zcull unit is still bound as the read is happening and there are no results ready
check_zcull_status(false, true); //close current query
check_zcull_status(false, false); //start new query since stat counting is still active
}
switch (type)
{
case CELL_GCM_ZPASS_PIXEL_CNT:
{
if (current_zcull_stats.zpass_pixel_cnt > 0)
return UINT16_MAX;
synchronize_zcull_stats(true);
return (current_zcull_stats.zpass_pixel_cnt > 0)? UINT16_MAX : 0;
}
case CELL_GCM_ZCULL_STATS:
case CELL_GCM_ZCULL_STATS1:
case CELL_GCM_ZCULL_STATS2:
//TODO
return UINT16_MAX;
case CELL_GCM_ZCULL_STATS3:
{
//Some kind of inverse value
if (current_zcull_stats.zpass_pixel_cnt > 0)
return 0;
synchronize_zcull_stats(true);
return (current_zcull_stats.zpass_pixel_cnt > 0) ? 0 : UINT16_MAX;
}
default:
LOG_ERROR(RSX, "Unknown zcull stat type %d", type);
return 0;
}
return status != GL_FALSE;
}
u32 GLGSRender::synchronize_zcull_stats(bool hard_sync)
void GLGSRender::get_occlusion_query_result(rsx::occlusion_query_info* query)
{
if (!zcull_rendering_enabled || zcull_task_queue.pending == 0)
return 0;
GLint result;
glGetQueryObjectiv((GLuint)query->driver_handle, GL_QUERY_RESULT, &result);
u32 result = UINT16_MAX;
GLint count, status;
for (auto &query : zcull_task_queue.task_stack)
{
if (query == nullptr || query->active)
continue;
glGetQueryObjectiv(query->handle, GL_QUERY_RESULT_AVAILABLE, &status);
if (status == GL_FALSE && !hard_sync)
continue;
glGetQueryObjectiv(query->handle, GL_QUERY_RESULT, &count);
query->pending = false;
query = nullptr;
current_zcull_stats.zpass_pixel_cnt += count;
zcull_task_queue.pending--;
}
for (u32 i = 0; i < occlusion_query_count; ++i)
{
auto &query = occlusion_query_data[i];
if (!query.pending && !query.active)
{
result = i;
break;
}
}
if (result == UINT16_MAX && !hard_sync)
return synchronize_zcull_stats(true);
return result;
}
void GLGSRender::notify_zcull_info_changed()
{
check_zcull_status(false, false);
}
query->result += result;
}

View File

@ -36,62 +36,6 @@ struct work_item
volatile bool received = false;
};
struct occlusion_query_info
{
GLuint handle;
GLint result;
GLint num_draws;
bool pending;
bool active;
};
struct zcull_statistics
{
u32 zpass_pixel_cnt;
u32 zcull_stats;
u32 zcull_stats1;
u32 zcull_stats2;
u32 zcull_stats3;
void clear()
{
zpass_pixel_cnt = zcull_stats = zcull_stats1 = zcull_stats2 = zcull_stats3 = 0;
}
};
struct occlusion_task
{
std::vector<occlusion_query_info*> task_stack;
occlusion_query_info* active_query = nullptr;
u32 pending = 0;
//Add one query to the task
void add(occlusion_query_info* query)
{
active_query = query;
if (task_stack.size() > 0 && pending == 0)
task_stack.resize(0);
const auto empty_slots = task_stack.size() - pending;
if (empty_slots >= 4)
{
for (auto &_query : task_stack)
{
if (_query == nullptr)
{
_query = query;
pending++;
return;
}
}
}
task_stack.push_back(query);
pending++;
}
};
struct driver_state
{
const u32 DEPTH_BOUNDS_MIN = 0xFFFF0001;
@ -354,11 +298,6 @@ private:
std::mutex queue_guard;
std::list<work_item> work_queue;
bool framebuffer_status_valid = false;
rsx::gcm_framebuffer_info surface_info[rsx::limits::color_buffers_count];
rsx::gcm_framebuffer_info depth_surface_info;
bool flush_draw_buffers = false;
std::thread::id m_thread_id;
@ -372,14 +311,6 @@ private:
//vaos are mandatory for core profile
gl::vao m_vao;
//occlusion query
bool zcull_surface_active = false;
zcull_statistics current_zcull_stats;
occlusion_task zcull_task_queue = {};
const u32 occlusion_query_count = 128;
std::array<occlusion_query_info, 128> occlusion_query_data = {};
std::mutex m_sampler_mutex;
u64 surface_store_tag = 0;
std::atomic_bool m_samplers_dirty = {true};
@ -414,9 +345,11 @@ public:
work_item& post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data);
bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override;
void check_zcull_status(bool framebuffer_swap, bool force_read);
u32 synchronize_zcull_stats(bool hard_sync = false);
void begin_occlusion_query(rsx::occlusion_query_info* query) override;
void end_occlusion_query(rsx::occlusion_query_info* query) override;
bool check_occlusion_query_status(rsx::occlusion_query_info* query) override;
void get_occlusion_query_result(rsx::occlusion_query_info* query) override;
protected:
void begin() override;
@ -430,10 +363,6 @@ protected:
void do_local_task() override;
void notify_zcull_info_changed() override;
void clear_zcull_stats(u32 type) override;
u32 get_zcull_stats(u32 type) override;
bool on_access_violation(u32 address, bool is_writing) override;
void on_notify_memory_unmapped(u32 address_base, u32 size) override;
void notify_tile_unbound(u32 tile) override;

View File

@ -248,15 +248,15 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
{
if (surface_info[i].pitch && g_cfg.video.write_color_buffers)
if (m_surface_info[i].pitch && g_cfg.video.write_color_buffers)
{
if (!old_format_found)
{
old_format = rsx::internals::surface_color_format_to_gl(surface_info[i].color_format).format;
old_format = rsx::internals::surface_color_format_to_gl(m_surface_info[i].color_format).format;
old_format_found = true;
}
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, surface_info[i].address, surface_info[i].pitch * surface_info[i].height);
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height);
}
if (std::get<0>(m_rtts.m_bound_render_targets[i]))
@ -265,7 +265,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
draw_fbo.color[i] = *rtt;
rtt->set_rsx_pitch(pitchs[i]);
surface_info[i] = { surface_addresses[i], pitchs[i], false, surface_format, depth_format, clip_horizontal, clip_vertical };
m_surface_info[i] = { surface_addresses[i], pitchs[i], false, surface_format, depth_format, clip_horizontal, clip_vertical };
rtt->tile = find_tile(color_offsets[i], color_locations[i]);
rtt->aa_mode = aa_mode;
@ -275,7 +275,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
m_gl_texture_cache.tag_framebuffer(surface_addresses[i] + rtt->raster_address_offset);
}
else
surface_info[i] = {};
m_surface_info[i] = {};
}
if (std::get<0>(m_rtts.m_bound_depth_stencil))
@ -293,7 +293,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
const u32 depth_surface_pitch = rsx::method_registers.surface_z_pitch();
std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(rsx::method_registers.surface_z_pitch());
depth_surface_info = { depth_address, depth_surface_pitch, true, surface_format, depth_format, clip_horizontal, clip_vertical };
m_depth_surface_info = { depth_address, depth_surface_pitch, true, surface_format, depth_format, clip_horizontal, clip_vertical };
ds->aa_mode = aa_mode;
ds->set_raster_offset(clip_x, clip_y, texel_size);
@ -302,7 +302,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
m_gl_texture_cache.tag_framebuffer(depth_address + ds->raster_address_offset);
}
else
depth_surface_info = {};
m_depth_surface_info = {};
framebuffer_status_valid = draw_fbo.check();
if (!framebuffer_status_valid) return;
@ -349,30 +349,30 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i)
{
if (!surface_info[i].address || !surface_info[i].pitch) continue;
if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue;
const u32 range = surface_info[i].pitch * surface_info[i].height;
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), surface_info[i].address, range, surface_info[i].width, surface_info[i].height, surface_info[i].pitch,
const u32 range = m_surface_info[i].pitch * m_surface_info[i].height;
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), m_surface_info[i].address, range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
color_format.format, color_format.type, color_format.swap_bytes);
}
}
if (g_cfg.video.write_depth_buffer)
{
if (depth_surface_info.address && depth_surface_info.pitch)
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
{
auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(depth_format);
u32 pitch = depth_surface_info.width * 2;
if (depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2;
u32 pitch = m_depth_surface_info.width * 2;
if (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2;
const u32 range = pitch * depth_surface_info.height;
const u32 range = pitch * m_depth_surface_info.height;
//TODO: Verify that depth surface pitch variance affects results
if (pitch != depth_surface_info.pitch)
LOG_WARNING(RSX, "Depth surface pitch does not match computed pitch, %d vs %d", depth_surface_info.pitch, pitch);
if (pitch != m_depth_surface_info.pitch)
LOG_WARNING(RSX, "Depth surface pitch does not match computed pitch, %d vs %d", m_depth_surface_info.pitch, pitch);
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), depth_surface_info.address, range, depth_surface_info.width, depth_surface_info.height, pitch,
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, m_depth_surface_info.width, m_depth_surface_info.height, pitch,
depth_format_gl.format, depth_format_gl.type, true);
}
}
@ -418,7 +418,7 @@ void GLGSRender::read_buffers()
const u32 location = locations[i];
const u32 pitch = pitchs[i];
if (!surface_info[i].pitch)
if (!m_surface_info[i].pitch)
continue;
const u32 range = pitch * height;
@ -478,7 +478,7 @@ void GLGSRender::read_buffers()
if (g_cfg.video.read_depth_buffer)
{
//TODO: use pitch
const u32 pitch = depth_surface_info.pitch;
const u32 pitch = m_depth_surface_info.pitch;
const u32 width = rsx::method_registers.surface_clip_width();
const u32 height = rsx::method_registers.surface_clip_height();
@ -537,7 +537,7 @@ void GLGSRender::write_buffers()
{
for (int i = index; i < index + count; ++i)
{
if (surface_info[i].pitch == 0)
if (m_surface_info[i].pitch == 0)
continue;
/**Even tiles are loaded as whole textures during read_buffers from testing.
@ -545,8 +545,8 @@ void GLGSRender::write_buffers()
* but using the GPU to perform the caching is many times faster.
*/
const u32 range = surface_info[i].pitch * surface_info[i].height;
__glcheck m_gl_texture_cache.flush_memory_to_cache(surface_info[i].address, range, true);
const u32 range = m_surface_info[i].pitch * m_surface_info[i].height;
__glcheck m_gl_texture_cache.flush_memory_to_cache(m_surface_info[i].address, range, true);
}
};
@ -556,11 +556,11 @@ void GLGSRender::write_buffers()
if (g_cfg.video.write_depth_buffer)
{
//TODO: use pitch
if (depth_surface_info.pitch == 0) return;
if (m_depth_surface_info.pitch == 0) return;
u32 range = depth_surface_info.width * depth_surface_info.height * 2;
if (depth_surface_info.depth_format != rsx::surface_depth_format::z16) range *= 2;
u32 range = m_depth_surface_info.width * m_depth_surface_info.height * 2;
if (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) range *= 2;
m_gl_texture_cache.flush_memory_to_cache(depth_surface_info.address, range, true);
m_gl_texture_cache.flush_memory_to_cache(m_depth_surface_info.address, range, true);
}
}

View File

@ -343,6 +343,9 @@ namespace rsx
element_push_buffer.resize(0);
if (zcull_task_queue.active_query && zcull_task_queue.active_query->active)
zcull_task_queue.active_query->num_draws++;
if (capture_current_frame)
{
u32 element_count = rsx::method_registers.current_draw_clause.get_elements_count();
@ -1925,4 +1928,179 @@ namespace rsx
skip_frame = (m_skip_frame_ctr < 0);
}
}
void thread::check_zcull_status(bool framebuffer_swap, bool force_read)
{
if (g_cfg.video.disable_zcull_queries)
return;
bool testing_enabled = zcull_pixel_cnt_enabled || zcull_stats_enabled;
if (framebuffer_swap)
{
zcull_surface_active = false;
const u32 zeta_address = m_depth_surface_info.address;
if (zeta_address)
{
//Find zeta address in bound zculls
for (int i = 0; i < rsx::limits::zculls_count; i++)
{
if (zculls[i].binded)
{
const u32 rsx_address = rsx::get_address(zculls[i].offset, CELL_GCM_LOCATION_LOCAL);
if (rsx_address == zeta_address)
{
zcull_surface_active = true;
break;
}
}
}
}
}
occlusion_query_info* query = nullptr;
if (zcull_task_queue.task_stack.size() > 0)
query = zcull_task_queue.active_query;
if (query && query->active)
{
if (force_read || (!zcull_rendering_enabled || !testing_enabled || !zcull_surface_active))
{
end_occlusion_query(query);
query->active = false;
query->pending = true;
}
}
else
{
if (zcull_rendering_enabled && testing_enabled && zcull_surface_active)
{
//Find query
u32 free_index = synchronize_zcull_stats();
query = &occlusion_query_data[free_index];
zcull_task_queue.add(query);
begin_occlusion_query(query);
query->active = true;
query->result = 0;
query->num_draws = 0;
}
}
}
void thread::clear_zcull_stats(u32 type)
{
if (g_cfg.video.disable_zcull_queries)
return;
if (type == CELL_GCM_ZPASS_PIXEL_CNT)
{
if (zcull_task_queue.active_query &&
zcull_task_queue.active_query->active &&
zcull_task_queue.active_query->num_draws > 0)
{
//discard active query results
check_zcull_status(false, true);
zcull_task_queue.active_query->pending = false;
//re-enable cull stats if stats are enabled
check_zcull_status(false, false);
zcull_task_queue.active_query->num_draws = 0;
}
current_zcull_stats.clear();
}
}
u32 thread::get_zcull_stats(u32 type)
{
if (g_cfg.video.disable_zcull_queries)
return 0u;
if (zcull_task_queue.active_query &&
zcull_task_queue.active_query->active &&
current_zcull_stats.zpass_pixel_cnt == 0 &&
type == CELL_GCM_ZPASS_PIXEL_CNT)
{
//The zcull unit is still bound as the read is happening and there are no results ready
check_zcull_status(false, true); //close current query
check_zcull_status(false, false); //start new query since stat counting is still active
}
switch (type)
{
case CELL_GCM_ZPASS_PIXEL_CNT:
{
if (current_zcull_stats.zpass_pixel_cnt > 0)
return UINT16_MAX;
synchronize_zcull_stats(true);
return (current_zcull_stats.zpass_pixel_cnt > 0) ? UINT16_MAX : 0;
}
case CELL_GCM_ZCULL_STATS:
case CELL_GCM_ZCULL_STATS1:
case CELL_GCM_ZCULL_STATS2:
//TODO
return UINT16_MAX;
case CELL_GCM_ZCULL_STATS3:
{
//Some kind of inverse value
if (current_zcull_stats.zpass_pixel_cnt > 0)
return 0;
synchronize_zcull_stats(true);
return (current_zcull_stats.zpass_pixel_cnt > 0) ? 0 : UINT16_MAX;
}
default:
LOG_ERROR(RSX, "Unknown zcull stat type %d", type);
return 0;
}
}
u32 thread::synchronize_zcull_stats(bool hard_sync)
{
if (!zcull_rendering_enabled || zcull_task_queue.pending == 0)
return 0;
u32 result = UINT16_MAX;
for (auto &query : zcull_task_queue.task_stack)
{
if (query == nullptr || query->active)
continue;
bool status = check_occlusion_query_status(query);
if (status == false && !hard_sync)
continue;
get_occlusion_query_result(query);
current_zcull_stats.zpass_pixel_cnt += query->result;
query->pending = false;
query = nullptr;
zcull_task_queue.pending--;
}
for (u32 i = 0; i < occlusion_query_count; ++i)
{
auto &query = occlusion_query_data[i];
if (!query.pending && !query.active)
{
result = i;
break;
}
}
if (result == UINT16_MAX && !hard_sync)
return synchronize_zcull_stats(true);
return result;
}
void thread::notify_zcull_info_changed()
{
check_zcull_status(false, false);
}
}

View File

@ -11,7 +11,7 @@
#include "RSXVertexProgram.h"
#include "RSXFragmentProgram.h"
#include "rsx_methods.h"
#include "rsx_trace.h"
#include "rsx_utils.h"
#include <Utilities/GSL.h>
#include "Utilities/Thread.h"
@ -142,6 +142,65 @@ namespace rsx
std::array<attribute_buffer_placement, 16> attribute_placement;
};
struct zcull_statistics
{
u32 zpass_pixel_cnt;
u32 zcull_stats;
u32 zcull_stats1;
u32 zcull_stats2;
u32 zcull_stats3;
void clear()
{
zpass_pixel_cnt = zcull_stats = zcull_stats1 = zcull_stats2 = zcull_stats3 = 0;
}
};
struct occlusion_query_info
{
u32 driver_handle;
u32 result;
u32 num_draws;
bool pending;
bool active;
u64 sync_timestamp;
u64 external_flags;
};
struct occlusion_task
{
std::vector<occlusion_query_info*> task_stack;
occlusion_query_info* active_query = nullptr;
u32 pending = 0;
//Add one query to the task
void add(occlusion_query_info* query)
{
active_query = query;
if (task_stack.size() > 0 && pending == 0)
task_stack.resize(0);
const auto empty_slots = task_stack.size() - pending;
if (empty_slots >= 4)
{
for (auto &_query : task_stack)
{
if (_query == nullptr)
{
_query = query;
pending++;
return;
}
}
}
task_stack.push_back(query);
pending++;
}
};
struct sampled_image_descriptor_base;
class thread : public named_thread
@ -158,6 +217,19 @@ namespace rsx
bool supports_multidraw = false;
//occlusion query
bool zcull_surface_active = false;
zcull_statistics current_zcull_stats;
const u32 occlusion_query_count = 128;
std::array<occlusion_query_info, 128> occlusion_query_data = {};
occlusion_task zcull_task_queue = {};
//framebuffer setup
rsx::gcm_framebuffer_info m_surface_info[rsx::limits::color_buffers_count];
rsx::gcm_framebuffer_info m_depth_surface_info;
bool framebuffer_status_valid = false;
public:
RsxDmaControl* ctrl = nullptr;
atomic_t<u32> internal_get{ 0 };
@ -274,9 +346,16 @@ namespace rsx
virtual void notify_tile_unbound(u32 /*tile*/) {}
//zcull
virtual void notify_zcull_info_changed() {}
virtual void clear_zcull_stats(u32 /*type*/) {}
virtual u32 get_zcull_stats(u32 /*type*/) { return UINT16_MAX; }
virtual void notify_zcull_info_changed();
virtual void clear_zcull_stats(u32 type);
virtual u32 get_zcull_stats(u32 type);
virtual void check_zcull_status(bool framebuffer_swap, bool force_read);
virtual u32 synchronize_zcull_stats(bool hard_sync = false);
virtual void begin_occlusion_query(occlusion_query_info* /*query*/) {}
virtual void end_occlusion_query(occlusion_query_info* /*query*/) {}
virtual bool check_occlusion_query_status(occlusion_query_info* /*query*/) { return true; }
virtual void get_occlusion_query_result(occlusion_query_info* query) { query->result = UINT32_MAX; }
gsl::span<const gsl::byte> get_raw_index_array(const std::vector<std::pair<u32, u32> >& draw_indexed_clause) const;
gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector<std::pair<u32, u32>>& vertex_ranges) const;

View File

@ -596,11 +596,16 @@ VKGSRender::VKGSRender() : GSRender()
m_secondary_command_buffer_pool.create((*m_device));
m_secondary_command_buffer.create(m_secondary_command_buffer_pool);
m_secondary_command_buffer.access_hint = vk::command_buffer::access_type_hint::all;
//Precalculated stuff
m_render_passes = get_precomputed_render_passes(*m_device, m_optimal_tiling_supported_formats);
std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device);
//Occlusion
m_occlusion_query_pool.create((*m_device), 1024); //Enough for 1k draw calls per pass
for (int n = 0; n < 128; ++n)
occlusion_query_data[n].driver_handle = n;
//Generate frame contexts
VkDescriptorPoolSize uniform_buffer_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS };
VkDescriptorPoolSize uniform_texel_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 16 * DESCRIPTOR_MAX_DRAW_CALLS };
@ -753,6 +758,9 @@ VKGSRender::~VKGSRender()
vkDestroyPipelineLayout(*m_device, pipeline_layout, nullptr);
vkDestroyDescriptorSetLayout(*m_device, descriptor_layouts, nullptr);
//Queries
m_occlusion_query_pool.destroy();
//Command buffer
for (auto &cb : m_primary_cb_list)
cb.destroy();
@ -1416,6 +1424,54 @@ void VKGSRender::end()
const bool is_emulated_restart = (!primitive_emulated && rsx::method_registers.restart_index_enabled() && vk::emulate_primitive_restart() && rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed);
const bool single_draw = !supports_multidraw || (!is_emulated_restart && (rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || rsx::method_registers.current_draw_clause.is_disjoint_primitive));
u32 occlusion_id = 0;
if (m_occlusion_query_active)
{
//Begin query
occlusion_id = m_occlusion_query_pool.find_free_slot();
if (occlusion_id == UINT32_MAX)
{
bool free_slot_found = false;
u32 index_to_free = UINT32_MAX;
u64 earliest_timestamp = UINT64_MAX;
//flush occlusion queries
for (auto It : m_occlusion_map)
{
u32 index = It.first;
auto query = &occlusion_query_data[index];
if (check_occlusion_query_status(query))
{
free_slot_found = true;
get_occlusion_query_result(query);
break;
}
if (query->sync_timestamp < earliest_timestamp)
{
index_to_free = index;
earliest_timestamp = query->sync_timestamp;
}
}
if (free_slot_found)
{
occlusion_id = m_occlusion_query_pool.find_free_slot();
}
else
{
get_occlusion_query_result(&occlusion_query_data[index_to_free]);
occlusion_id = m_occlusion_query_pool.find_free_slot();
}
verify(HERE), occlusion_id != UINT32_MAX;
}
m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id);
m_occlusion_map[m_active_query_info->driver_handle].indices.push_back(occlusion_id);
m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer;
}
if (!index_info)
{
if (single_draw)
@ -1468,6 +1524,12 @@ void VKGSRender::end()
}
}
if (m_occlusion_query_active)
{
//End query
m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id);
}
close_render_pass();
vk::leave_uninterruptible();
@ -2309,7 +2371,6 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
return;
copy_render_targets_to_dma_location();
m_rtts_dirty = false;
u32 clip_width = rsx::method_registers.surface_clip_width();
@ -2580,6 +2641,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
m_draw_fbo.reset(new vk::framebuffer_holder(*m_device, current_render_pass, fbo_width, fbo_height, std::move(fbo_images)));
}
check_zcull_status(true, false);
}
void VKGSRender::reinitialize_swapchain()
@ -2913,6 +2976,79 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
m_current_command_buffer->begin();
m_samplers_dirty.store(true);
return result;
}
void VKGSRender::clear_zcull_stats(u32 type)
{
rsx::thread::clear_zcull_stats(type);
m_occlusion_map.clear();
m_occlusion_query_pool.reset_all(*m_current_command_buffer);
}
void VKGSRender::begin_occlusion_query(rsx::occlusion_query_info* query)
{
query->result = 0;
query->sync_timestamp = get_system_time();
m_active_query_info = query;
m_occlusion_query_active = true;
}
void VKGSRender::end_occlusion_query(rsx::occlusion_query_info* query)
{
m_occlusion_query_active = false;
m_active_query_info = nullptr;
flush_command_queue();
}
bool VKGSRender::check_occlusion_query_status(rsx::occlusion_query_info* query)
{
auto found = m_occlusion_map.find(query->driver_handle);
if (found == m_occlusion_map.end())
return true;
auto &data = found->second;
if (data.indices.size() == 0)
return true;
if (data.command_buffer_to_wait == m_current_command_buffer)
return false;
if (data.command_buffer_to_wait->pending)
return false;
u32 oldest = data.indices.front();
return m_occlusion_query_pool.check_query_status(oldest);
}
void VKGSRender::get_occlusion_query_result(rsx::occlusion_query_info* query)
{
auto found = m_occlusion_map.find(query->driver_handle);
if (found == m_occlusion_map.end())
return;
auto &data = found->second;
if (data.indices.size() == 0)
return;
if (data.command_buffer_to_wait == m_current_command_buffer)
flush_command_queue(); //Should hard sync, but this should almost never ever happen
if (data.command_buffer_to_wait->pending)
data.command_buffer_to_wait->wait();
//Gather data
for (const auto occlusion_id : data.indices)
{
//We only need one hit
if (auto value = m_occlusion_query_pool.get_query_result(occlusion_id))
{
query->result = 1;
break;
}
}
m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices);
m_occlusion_map.erase(query->driver_handle);
}

View File

@ -109,6 +109,12 @@ struct command_buffer_chunk: public vk::command_buffer
}
};
struct occlusion_data
{
std::vector<u32> indices;
command_buffer_chunk* command_buffer_to_wait = nullptr;
};
class VKGSRender : public GSRender
{
private:
@ -153,10 +159,14 @@ private:
//Vulkan internals
vk::command_pool m_command_buffer_pool;
vk::occlusion_query_pool m_occlusion_query_pool;
bool m_occlusion_query_active = false;
rsx::occlusion_query_info *m_active_query_info = nullptr;
std::unordered_map<u32, occlusion_data> m_occlusion_map;
std::mutex m_secondary_cb_guard;
vk::command_pool m_secondary_command_buffer_pool;
vk::command_buffer m_secondary_command_buffer;
vk::command_buffer m_secondary_command_buffer; //command buffer used for setup operations
u32 m_current_cb_index = 0;
std::array<command_buffer_chunk, VK_MAX_ASYNC_CB_COUNT> m_primary_cb_list;
@ -260,12 +270,6 @@ private:
s64 m_flip_time = 0;
u8 m_draw_buffers_count = 0;
bool framebuffer_status_valid = false;
rsx::gcm_framebuffer_info m_surface_info[rsx::limits::color_buffers_count];
rsx::gcm_framebuffer_info m_depth_surface_info;
bool m_flush_draw_buffers = false;
std::atomic<int> m_last_flushable_cb = {-1 };
@ -320,6 +324,12 @@ public:
void write_buffers();
void set_viewport();
void clear_zcull_stats(u32 type) override;
void begin_occlusion_query(rsx::occlusion_query_info* query) override;
void end_occlusion_query(rsx::occlusion_query_info* query) override;
bool check_occlusion_query_status(rsx::occlusion_query_info* query) override;
void get_occlusion_query_result(rsx::occlusion_query_info* query) override;
protected:
void begin() override;
void end() override;

View File

@ -1450,6 +1450,113 @@ namespace vk
}
};
class occlusion_query_pool
{
VkQueryPool query_pool = VK_NULL_HANDLE;
vk::render_device* owner = nullptr;
std::vector<bool> query_active_status;
public:
void create(vk::render_device &dev, u32 num_entries)
{
VkQueryPoolCreateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
info.queryType = VK_QUERY_TYPE_OCCLUSION;
info.queryCount = num_entries;
CHECK_RESULT(vkCreateQueryPool(dev, &info, nullptr, &query_pool));
owner = &dev;
query_active_status.resize(num_entries, false);
}
void destroy()
{
if (query_pool)
{
vkDestroyQueryPool(*owner, query_pool, nullptr);
owner = nullptr;
query_pool = VK_NULL_HANDLE;
}
}
void begin_query(vk::command_buffer &cmd, u32 index)
{
if (query_active_status[index])
{
//Synchronization must be done externally
vkCmdResetQueryPool(cmd, query_pool, index, 1);
}
vkCmdBeginQuery(cmd, query_pool, index, 0);//VK_QUERY_CONTROL_PRECISE_BIT);
query_active_status[index] = true;
}
void end_query(vk::command_buffer &cmd, u32 index)
{
vkCmdEndQuery(cmd, query_pool, index);
}
bool check_query_status(u32 index)
{
u32 result[2] = {0, 0};
switch (VkResult status = vkGetQueryPoolResults(*owner, query_pool, index, 1, 8, result, 8, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT))
{
case VK_SUCCESS:
break;
case VK_NOT_READY:
return false;
default:
vk::die_with_error(HERE, status);
}
return result[1] != 0;
}
u32 get_query_result(u32 index)
{
u32 result = 0;
CHECK_RESULT(vkGetQueryPoolResults(*owner, query_pool, index, 1, 4, &result, 4, VK_QUERY_RESULT_WAIT_BIT));
return result == 0u? 0u: 1u;
}
void reset_query(vk::command_buffer &cmd, u32 index)
{
vkCmdResetQueryPool(cmd, query_pool, index, 1);
query_active_status[index] = false;
}
void reset_queries(vk::command_buffer &cmd, std::vector<u32> &list)
{
for (const auto index : list)
reset_query(cmd, index);
}
void reset_all(vk::command_buffer &cmd)
{
for (u32 n = 0; n < query_active_status.size(); n++)
{
if (query_active_status[n])
reset_query(cmd, n);
}
}
u32 find_free_slot()
{
for (u32 n = 0; n < query_active_status.size(); n++)
{
if (query_active_status[n] == false)
return n;
}
return UINT32_MAX;
}
};
namespace glsl
{
enum program_input_type