rsx: Async shader compilation

- Defer compilation process to worker threads
- vulkan: Fixup for graphics_pipeline_state.
  Never use struct assignment operator on vk** structs due to padding after sType member (4 bytes)
This commit is contained in:
kd-11 2018-07-11 23:51:29 +03:00 committed by kd-11
parent ac99fd764d
commit e7f30640ef
21 changed files with 585 additions and 128 deletions

View File

@ -335,7 +335,7 @@ public:
u32 ctrl = (vmfprog.outputFromH0 ? 0 : 0x40) | (vmfprog.depthReplace ? 0xe : 0);
std::vector<rsx::texture_dimension_extended> td;
RSXFragmentProgram prog;
prog.size = 0, prog.addr = vm::base(ptr + vmprog.ucode), prog.offset = 0, prog.ctrl = ctrl;
prog.ucode_length = 0, prog.addr = vm::base(ptr + vmprog.ucode), prog.offset = 0, prog.ctrl = ctrl;
GLFragmentDecompilerThread(m_glsl_shader, param_array, prog, size).Task();
vm::close();
}

View File

@ -326,20 +326,20 @@ size_t fragment_program_utils::get_fragment_program_ucode_size(void *ptr)
fragment_program_utils::fragment_program_metadata fragment_program_utils::analyse_fragment_program(void *ptr)
{
const qword *instBuffer = (const qword*)ptr;
size_t instIndex = 0;
s32 index = 0;
s32 program_offset = -1;
u32 ucode_size = 0;
u16 textures_mask = 0;
while (true)
{
const qword& inst = instBuffer[instIndex];
const qword& inst = instBuffer[index];
const u32 opcode = (inst.word[0] >> 16) & 0x3F;
if (opcode)
{
if (program_offset < 0)
program_offset = instIndex * 16;
program_offset = index * 16;
switch(opcode)
{
@ -362,7 +362,7 @@ fragment_program_utils::fragment_program_metadata fragment_program_utils::analys
if (is_constant(inst.word[1]) || is_constant(inst.word[2]) || is_constant(inst.word[3]))
{
//Instruction references constant, skip one slot occupied by data
instIndex++;
index++;
ucode_size += 16;
}
}
@ -376,14 +376,14 @@ fragment_program_utils::fragment_program_metadata fragment_program_utils::analys
{
if (program_offset < 0)
{
program_offset = instIndex * 16;
program_offset = index * 16;
ucode_size = 16;
}
break;
}
instIndex++;
index++;
}
return{ (u32)program_offset, ucode_size, textures_mask };

View File

@ -6,7 +6,9 @@
#include "Utilities/GSL.h"
#include "Utilities/hash.h"
#include <mutex>
#include "Utilities/mutex.h"
#include <deque>
enum class SHADER_TYPE
{
@ -136,22 +138,73 @@ class program_state_cache
}
};
public:
struct async_link_task_entry
{
const vertex_program_type& vp;
const fragment_program_type& fp;
pipeline_properties props;
async_link_task_entry(const vertex_program_type& _V, const fragment_program_type& _F, const pipeline_properties& _P)
: vp(_V), fp(_F), props(_P)
{}
};
struct async_decompile_task_entry
{
RSXVertexProgram vp;
RSXFragmentProgram fp;
bool is_fp;
std::vector<u8> tmp_cache;
async_decompile_task_entry(const RSXVertexProgram& _V)
: vp(_V), is_fp(false)
{
}
async_decompile_task_entry(const RSXFragmentProgram& _F)
: fp(_F), is_fp(true)
{
tmp_cache.resize(fp.ucode_length);
std::memcpy(tmp_cache.data(), fp.addr, fp.ucode_length);
fp.addr = tmp_cache.data();
}
};
protected:
std::mutex s_mtx; // TODO: Only need to synchronize when loading cache
shared_mutex m_pipeline_mutex;
shared_mutex m_decompiler_mutex;
size_t m_next_id = 0;
bool m_cache_miss_flag;
bool m_cache_miss_flag; // Set if last lookup did not find any usable cached programs
bool m_program_compiled_flag; // Set if last lookup caused program to be linked
binary_to_vertex_program m_vertex_shader_cache;
binary_to_fragment_program m_fragment_shader_cache;
std::unordered_map <pipeline_key, pipeline_storage_type, pipeline_key_hash, pipeline_key_compare> m_storage;
std::unordered_map <pipeline_key, std::unique_ptr<async_link_task_entry>, pipeline_key_hash, pipeline_key_compare> m_link_queue;
std::deque<async_decompile_task_entry> m_decompile_queue;
vertex_program_type __null_vertex_program;
fragment_program_type __null_fragment_program;
pipeline_storage_type __null_pipeline_handle;
/// bool here to inform that the program was preexisting.
std::tuple<const vertex_program_type&, bool> search_vertex_program(const RSXVertexProgram& rsx_vp)
std::tuple<const vertex_program_type&, bool> search_vertex_program(const RSXVertexProgram& rsx_vp, bool force_load = true)
{
const auto& I = m_vertex_shader_cache.find(rsx_vp);
if (I != m_vertex_shader_cache.end())
{
return std::forward_as_tuple(I->second, true);
}
if (!force_load)
{
return std::forward_as_tuple(__null_vertex_program, false);
}
LOG_NOTICE(RSX, "VP not found in buffer!");
vertex_program_type& new_shader = m_vertex_shader_cache[rsx_vp];
backend_traits::recompile_vertex_program(rsx_vp, new_shader, m_next_id++);
@ -160,17 +213,22 @@ protected:
}
/// bool here to inform that the program was preexisting.
std::tuple<const fragment_program_type&, bool> search_fragment_program(const RSXFragmentProgram& rsx_fp)
std::tuple<const fragment_program_type&, bool> search_fragment_program(const RSXFragmentProgram& rsx_fp, bool force_load = true)
{
const auto& I = m_fragment_shader_cache.find(rsx_fp);
if (I != m_fragment_shader_cache.end())
{
return std::forward_as_tuple(I->second, true);
}
if (!force_load)
{
return std::forward_as_tuple(__null_fragment_program, false);
}
LOG_NOTICE(RSX, "FP not found in buffer!");
size_t fragment_program_size = program_hash_util::fragment_program_utils::get_fragment_program_ucode_size(rsx_fp.addr);
gsl::not_null<void*> fragment_program_ucode_copy = malloc(fragment_program_size);
std::memcpy(fragment_program_ucode_copy, rsx_fp.addr, fragment_program_size);
gsl::not_null<void*> fragment_program_ucode_copy = malloc(rsx_fp.ucode_length);
std::memcpy(fragment_program_ucode_copy, rsx_fp.addr, rsx_fp.ucode_length);
RSXFragmentProgram new_fp_key = rsx_fp;
new_fp_key.addr = fragment_program_ucode_copy;
fragment_program_type &new_shader = m_fragment_shader_cache[new_fp_key];
@ -277,45 +335,173 @@ public:
}
template<typename... Args>
pipeline_storage_type& getGraphicPipelineState(
bool async_update(u32 max_decompile_count, Args&& ...args)
{
// Decompile shaders and link one pipeline object per 'run'
// NOTE: Linking is much slower than decompilation step, so always decompile at least 1 unit
// TODO: Use try_lock instead
bool busy = false;
{
u32 count = 0;
writer_lock lock(m_decompiler_mutex);
while (!m_decompile_queue.empty())
{
const auto& decompile_task = m_decompile_queue.front();
if (decompile_task.is_fp)
{
search_fragment_program(decompile_task.fp);
}
else
{
search_vertex_program(decompile_task.vp);
}
m_decompile_queue.pop_front();
if (++count >= max_decompile_count)
{
// Allows configurable decompiler 'load'
// Smaller unit count will release locks faster
busy = true;
break;
}
}
}
async_link_task_entry* link_entry;
pipeline_key key;
{
reader_lock lock(m_pipeline_mutex);
if (!m_link_queue.empty())
{
auto It = m_link_queue.begin();
link_entry = It->second.get();
key = It->first;
}
else
{
return busy;
}
}
pipeline_storage_type pipeline = backend_traits::build_pipeline(link_entry->vp, link_entry->fp, link_entry->props, std::forward<Args>(args)...);
LOG_SUCCESS(RSX, "New program compiled successfully");
writer_lock lock(m_pipeline_mutex);
m_storage[key] = std::move(pipeline);
m_link_queue.erase(key);
return (busy || !m_link_queue.empty());
}
template<typename... Args>
pipeline_storage_type& get_graphics_pipeline(
const RSXVertexProgram& vertexShader,
const RSXFragmentProgram& fragmentShader,
pipeline_properties& pipelineProperties,
bool allow_async,
Args&& ...args
)
{
// TODO : use tie and implicit variable declaration syntax with c++17
const auto &vp_search = search_vertex_program(vertexShader);
const auto &fp_search = search_fragment_program(fragmentShader);
const vertex_program_type &vertex_program = std::get<0>(vp_search);
const fragment_program_type &fragment_program = std::get<0>(fp_search);
bool already_existing_fragment_program = std::get<1>(fp_search);
bool already_existing_vertex_program = std::get<1>(vp_search);
const auto &vp_search = search_vertex_program(vertexShader, !allow_async);
const auto &fp_search = search_fragment_program(fragmentShader, !allow_async);
backend_traits::validate_pipeline_properties(vertex_program, fragment_program, pipelineProperties);
pipeline_key key = { vertex_program.id, fragment_program.id, pipelineProperties };
const bool already_existing_fragment_program = std::get<1>(fp_search);
const bool already_existing_vertex_program = std::get<1>(vp_search);
if (already_existing_fragment_program && already_existing_vertex_program)
bool link_only = false;
m_cache_miss_flag = true;
m_program_compiled_flag = false;
if (!allow_async || (already_existing_vertex_program && already_existing_fragment_program))
{
const vertex_program_type &vertex_program = std::get<0>(vp_search);
const fragment_program_type &fragment_program = std::get<0>(fp_search);
backend_traits::validate_pipeline_properties(vertex_program, fragment_program, pipelineProperties);
pipeline_key key = { vertex_program.id, fragment_program.id, pipelineProperties };
const auto I = m_storage.find(key);
if (I != m_storage.end())
{
m_cache_miss_flag = false;
return I->second;
}
if (allow_async)
{
// Programs already exist, only linking required
link_only = true;
}
else
{
LOG_NOTICE(RSX, "Add program (vp id = %d, fp id = %d)", vertex_program.id, fragment_program.id);
m_program_compiled_flag = true;
pipeline_storage_type pipeline = backend_traits::build_pipeline(vertex_program, fragment_program, pipelineProperties, std::forward<Args>(args)...);
writer_lock lock(m_pipeline_mutex);
auto &rtn = m_storage[key] = std::move(pipeline);
LOG_SUCCESS(RSX, "New program compiled successfully");
return rtn;
}
}
LOG_NOTICE(RSX, "Add program :");
LOG_NOTICE(RSX, "*** vp id = %d", vertex_program.id);
LOG_NOTICE(RSX, "*** fp id = %d", fragment_program.id);
verify(HERE), allow_async;
pipeline_storage_type pipeline = backend_traits::build_pipeline(vertex_program, fragment_program, pipelineProperties, std::forward<Args>(args)...);
std::lock_guard<std::mutex> lock(s_mtx);
auto &rtn = m_storage[key] = std::move(pipeline);
m_cache_miss_flag = true;
if (link_only)
{
const vertex_program_type &vertex_program = std::get<0>(vp_search);
const fragment_program_type &fragment_program = std::get<0>(fp_search);
pipeline_key key = { vertex_program.id, fragment_program.id, pipelineProperties };
LOG_SUCCESS(RSX, "New program compiled successfully");
return rtn;
reader_lock lock(m_pipeline_mutex);
if (m_link_queue.find(key) != m_link_queue.end())
{
// Already in queue
return __null_pipeline_handle;
}
LOG_NOTICE(RSX, "Add program (vp id = %d, fp id = %d)", vertex_program.id, fragment_program.id);
m_program_compiled_flag = true;
lock.upgrade();
m_link_queue[key] = std::make_unique<async_link_task_entry>(vertex_program, fragment_program, pipelineProperties);
}
else
{
reader_lock lock(m_decompiler_mutex);
auto vertex_program_found = std::find_if(m_decompile_queue.begin(), m_decompile_queue.end(), [&](const auto& V)
{
if (V.is_fp) return false;
return program_hash_util::vertex_program_compare()(V.vp, vertexShader);
});
auto fragment_program_found = std::find_if(m_decompile_queue.begin(), m_decompile_queue.end(), [&](const auto& F)
{
if (!F.is_fp) return false;
return program_hash_util::fragment_program_compare()(F.fp, fragmentShader);
});
const bool add_vertex_program = (vertex_program_found == m_decompile_queue.end());
const bool add_fragment_program = (fragment_program_found == m_decompile_queue.end());
if (add_vertex_program)
{
lock.upgrade();
m_decompile_queue.emplace_back(vertexShader);
}
if (add_fragment_program)
{
lock.upgrade();
m_decompile_queue.emplace_back(fragmentShader);
}
}
return __null_pipeline_handle;
}
size_t get_fragment_constants_buffer_size(const RSXFragmentProgram &fragmentShader) const

View File

@ -310,7 +310,7 @@ void D3D12GSRender::load_program()
}
}
m_current_pso = m_pso_cache.getGraphicPipelineState(current_vertex_program, current_fragment_program, prop, m_device.Get(), m_shared_root_signature.Get());
m_current_pso = m_pso_cache.get_graphics_pipeline(current_vertex_program, current_fragment_program, prop, false, m_device.Get(), m_shared_root_signature.Get());
return;
}

View File

@ -372,9 +372,19 @@ void GLGSRender::end()
}
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
//Load program here since it is dependent on vertex state
load_program(upload_info);
// NOTE: Due to common OpenGL driver architecture, vertex data has to be uploaded as far away from the draw as possible
// TODO: Implement shaders cache prediction to avoid uploading vertex data if draw is going to skip
if (!load_program())
{
// Program is not ready, skip drawing this
std::this_thread::yield();
rsx::thread::end();
return;
}
// Load program here since it is dependent on vertex state
load_program_env(upload_info);
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
@ -619,7 +629,16 @@ void GLGSRender::set_viewport()
void GLGSRender::on_init_thread()
{
GSRender::on_init_thread();
verify(HERE), m_frame;
// NOTES: All contexts have to be created before any is bound to a thread
// This allows context sharing to work (both GLRCs passed to wglShareLists have to be idle or you get ERROR_BUSY)
m_context = m_frame->make_context();
m_decompiler_context = m_frame->make_context();
// Bind primary context to main RSX thread
m_frame->set_current(m_context);
zcull_ctrl.reset(static_cast<::rsx::reports::ZCULL_control*>(this));
gl::init();
@ -1108,7 +1127,7 @@ bool GLGSRender::check_program_state()
return (rsx::method_registers.shader_program_address() != 0);
}
void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
bool GLGSRender::load_program()
{
if (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits)
{
@ -1119,35 +1138,49 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
current_vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side
current_fragment_program.unnormalized_coords = 0; //unused
void* pipeline_properties = nullptr;
}
else if (m_program)
{
// Program already loaded
return true;
}
m_program = &m_prog_buffer.getGraphicPipelineState(current_vertex_program, current_fragment_program, pipeline_properties);
m_program->use();
void* pipeline_properties = nullptr;
m_program = m_prog_buffer.get_graphics_pipeline(current_vertex_program, current_fragment_program, pipeline_properties,
!g_cfg.video.disable_asynchronous_shader_compiler).get();
if (m_prog_buffer.check_cache_missed())
if (m_prog_buffer.check_cache_missed())
{
if (m_prog_buffer.check_program_linked_flag())
{
// Program was linked or queued for linking
m_shaders_cache->store(pipeline_properties, current_vertex_program, current_fragment_program);
}
//Notify the user with HUD notification
if (g_cfg.misc.show_shader_compilation_hint)
// Notify the user with HUD notification
if (g_cfg.misc.show_shader_compilation_hint)
{
if (m_overlay_manager)
{
if (m_overlay_manager)
if (auto dlg = m_overlay_manager->get<rsx::overlays::shader_compile_notification>())
{
if (auto dlg = m_overlay_manager->get<rsx::overlays::shader_compile_notification>())
{
//Extend duration
dlg->touch();
}
else
{
//Create dialog but do not show immediately
m_overlay_manager->create<rsx::overlays::shader_compile_notification>();
}
// Extend duration
dlg->touch();
}
else
{
// Create dialog but do not show immediately
m_overlay_manager->create<rsx::overlays::shader_compile_notification>();
}
}
}
}
return m_program != nullptr;
}
void GLGSRender::load_program_env(const gl::vertex_upload_info& upload_info)
{
u8 *buf;
u32 vertex_state_offset;
u32 vertex_constants_offset;
@ -1157,6 +1190,13 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
const u32 fragment_buffer_size = fragment_constants_size + (18 * 4 * sizeof(float));
const bool update_transform_constants = !!(m_graphics_state & rsx::pipeline_state::transform_constants_dirty);
if (!m_program)
{
fmt::throw_exception("Unreachable right now" HERE);
}
m_program->use();
if (manually_flush_ring_buffers)
{
m_vertex_state_buffer->reserve_storage_on_heap(512);
@ -1212,7 +1252,8 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
if (update_transform_constants) m_transform_constants_buffer->unmap();
}
m_graphics_state &= ~rsx::pipeline_state::memory_barrier_bits;
const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty);
m_graphics_state &= ~handled_flags;
}
void GLGSRender::update_draw_state()
@ -1730,3 +1771,26 @@ void GLGSRender::discard_occlusion_query(rsx::reports::occlusion_query_info* que
glEndQuery(GL_ANY_SAMPLES_PASSED);
}
}
void GLGSRender::on_decompiler_init()
{
// Bind decompiler context to this thread
m_frame->set_current(m_decompiler_context);
}
void GLGSRender::on_decompiler_exit()
{
// Cleanup
m_frame->delete_context(m_decompiler_context);
}
bool GLGSRender::on_decompiler_task()
{
bool ret = m_prog_buffer.async_update(8);
// TODO: Proper synchronization with renderer
// Finish works well enough for now but it is not a proper soulution
glFinish();
return ret;
}

View File

@ -329,6 +329,7 @@ private:
std::thread::id m_thread_id;
GLProgramBuffer m_prog_buffer;
draw_context_t m_decompiler_context;
//buffer
gl::fbo draw_fbo;
@ -361,7 +362,8 @@ private:
void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false);
bool check_program_state();
void load_program(const gl::vertex_upload_info& upload_info);
bool load_program();
void load_program_env(const gl::vertex_upload_info& upload_info);
void update_draw_state();
@ -398,4 +400,8 @@ protected:
std::array<std::vector<gsl::byte>, 4> copy_render_targets_to_memory() override;
std::array<std::vector<gsl::byte>, 2> copy_depth_stencil_buffer_to_memory() override;
void on_decompiler_init() override;
void on_decompiler_exit() override;
bool on_decompiler_task() override;
};

View File

@ -7,7 +7,7 @@ struct GLTraits
{
using vertex_program_type = GLVertexProgram;
using fragment_program_type = GLFragmentProgram;
using pipeline_storage_type = gl::glsl::program;
using pipeline_storage_type = std::unique_ptr<gl::glsl::program>;
using pipeline_properties = void*;
static
@ -32,8 +32,8 @@ struct GLTraits
static
pipeline_storage_type build_pipeline(const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, const pipeline_properties&)
{
pipeline_storage_type result;
__glcheck result.create()
pipeline_storage_type result = std::make_unique<gl::glsl::program>();
result->create()
.attach(gl::glsl::shader_view(vertexProgramData.id))
.attach(gl::glsl::shader_view(fragmentProgramData.id))
.bind_fragment_data_location("ocol0", 0)
@ -41,31 +41,30 @@ struct GLTraits
.bind_fragment_data_location("ocol2", 2)
.bind_fragment_data_location("ocol3", 3)
.make();
__glcheck result.use();
//Progam locations are guaranteed to not change after linking
//Texture locations are simply bound to the TIUs so this can be done once
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
int location;
if (result.uniforms.has_location(rsx::constants::fragment_texture_names[i], &location))
result.uniforms[location] = i;
if (result->uniforms.has_location(rsx::constants::fragment_texture_names[i], &location))
result->uniforms[location] = i;
}
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
{
int location;
if (result.uniforms.has_location(rsx::constants::vertex_texture_names[i], &location))
result.uniforms[location] = (i + rsx::limits::fragment_textures_count);
if (result->uniforms.has_location(rsx::constants::vertex_texture_names[i], &location))
result->uniforms[location] = (i + rsx::limits::fragment_textures_count);
}
const int stream_buffer_start = rsx::limits::fragment_textures_count + rsx::limits::vertex_textures_count;
//Bind locations 0 and 1 to the stream buffers
result.uniforms[0] = stream_buffer_start;
result.uniforms[1] = stream_buffer_start + 1;
result->uniforms[0] = stream_buffer_start;
result->uniforms[1] = stream_buffer_start + 1;
LOG_NOTICE(RSX, "*** prog id = %d", result.id());
LOG_NOTICE(RSX, "*** prog id = %d", result->id());
LOG_NOTICE(RSX, "*** vp id = %d", vertexProgramData.id);
LOG_NOTICE(RSX, "*** fp id = %d", fragmentProgramData.id);
@ -99,7 +98,7 @@ public:
void add_pipeline_entry(RSXVertexProgram &vp, RSXFragmentProgram &fp, void* &props, Args&& ...args)
{
vp.skip_vertex_input_check = true;
getGraphicPipelineState(vp, fp, props, std::forward<Args>(args)...);
get_graphics_pipeline(vp, fp, props, false, std::forward<Args>(args)...);
}
void preload_programs(RSXVertexProgram &vp, RSXFragmentProgram &fp)
@ -112,4 +111,9 @@ public:
{
return m_cache_miss_flag;
}
bool check_program_linked_flag() const
{
return m_program_compiled_flag;
}
};

View File

@ -32,7 +32,11 @@ namespace rsx
{
if (auto rsxthr = rsx::get_current_renderer())
{
rsxthr->native_ui_flip_request.store(true);
const auto now = get_system_time() - 1000000;
if ((now - rsxthr->last_flip_time) > min_refresh_duration_us)
{
rsxthr->native_ui_flip_request.store(true);
}
}
}
} // namespace overlays

View File

@ -32,6 +32,8 @@ namespace rsx
u16 virtual_width = 1280;
u16 virtual_height = 720;
u32 min_refresh_duration_us = 16600;
virtual ~overlay() = default;
virtual void update() {}
@ -1103,6 +1105,9 @@ namespace rsx
creation_time = get_system_time();
expire_time = creation_time + 1000000;
// Disable forced refresh unless fps dips below 4
min_refresh_duration_us = 250000;
}
void update_animation(u64 t)

View File

@ -216,9 +216,9 @@ static const std::string rsx_fp_op_names[] =
struct RSXFragmentProgram
{
u32 size;
void *addr;
u32 offset;
u32 ucode_length;
u32 ctrl;
u16 unnormalized_coords;
u16 redirected_textures;

View File

@ -422,6 +422,47 @@ namespace rsx
}
});
thread_ctrl::spawn(m_decompiler_thread, "RSX Decompiler Thread", [this]
{
if (g_cfg.video.disable_asynchronous_shader_compiler)
{
// Die
return;
}
on_decompiler_init();
if (g_cfg.core.thread_scheduler_enabled)
{
thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::rsx));
}
// Weak cpus need all the help they can get, sleep instead of yield loop
// Lowers decompiler responsiveness but improves emulator performance
const bool prefer_sleep = (std::thread::hardware_concurrency() < 6);
while (!Emu.IsStopped() && !m_rsx_thread_exiting)
{
if (!on_decompiler_task())
{
if (Emu.IsPaused())
{
std::this_thread::sleep_for(1ms);
}
else if (prefer_sleep)
{
std::this_thread::sleep_for(500us);
}
else
{
std::this_thread::yield();
}
}
}
on_decompiler_exit();
});
// Raise priority above other threads
thread_ctrl::set_native_priority(1);
@ -969,6 +1010,12 @@ namespace rsx
m_vblank_thread->join();
m_vblank_thread.reset();
}
if (m_decompiler_thread)
{
m_decompiler_thread->join();
m_decompiler_thread.reset();
}
}
std::string thread::get_name() const
@ -1651,6 +1698,7 @@ namespace rsx
result.addr = ((u8*)result.addr + current_fp_metadata.program_start_offset);
result.offset = program_offset + current_fp_metadata.program_start_offset;
result.ucode_length = current_fp_metadata.program_ucode_length;
result.valid = true;
result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT);
result.unnormalized_coords = 0;
@ -1781,6 +1829,7 @@ namespace rsx
result.addr = ((u8*)result.addr + program_info.program_start_offset);
result.offset = program_offset + program_info.program_start_offset;
result.ucode_length = program_info.program_ucode_length;
result.valid = true;
result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT);
result.unnormalized_coords = 0;

View File

@ -275,6 +275,7 @@ namespace rsx
class thread : public named_thread
{
std::shared_ptr<thread_ctrl> m_vblank_thread;
std::shared_ptr<thread_ctrl> m_decompiler_thread;
protected:
atomic_t<bool> m_rsx_thread_exiting{false};
@ -424,6 +425,10 @@ namespace rsx
*/
virtual void do_local_task(FIFO_state state);
virtual void on_decompiler_init() {}
virtual void on_decompiler_exit() {}
virtual bool on_decompiler_task() { return false; }
public:
virtual std::string get_name() const override;

View File

@ -1066,13 +1066,7 @@ void VKGSRender::end()
return;
}
//Programs data is dependent on vertex state
std::chrono::time_point<steady_clock> vertex_start = steady_clock::now();
auto upload_info = upload_vertex_data();
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
std::chrono::time_point<steady_clock> textures_start = vertex_end;
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
//Clear any 'dirty' surfaces - possible is a recycled cache surface is used
@ -1303,19 +1297,37 @@ void VKGSRender::end()
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
//Load program
std::chrono::time_point<steady_clock> program_start = textures_end;
load_program(upload_info);
if (!load_program())
{
// Program is not ready, skip drawing this
std::this_thread::yield();
rsx::thread::end();
return;
}
std::chrono::time_point<steady_clock> program_end = steady_clock::now();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_end - program_start).count();
// Programs data is dependent on vertex state
std::chrono::time_point<steady_clock> vertex_start = program_end;
auto upload_info = upload_vertex_data();
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
// Load program execution environment
program_start = textures_end;
load_program_env(upload_info);
VkBufferView persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value;
VkBufferView volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;
m_program->bind_uniform(persistent_buffer, "persistent_input_stream", m_current_frame->descriptor_set);
m_program->bind_uniform(volatile_buffer, "volatile_input_stream", m_current_frame->descriptor_set);
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
program_end = steady_clock::now();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_end - program_start).count();
textures_start = program_stop;
textures_start = program_end;
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
@ -2193,7 +2205,7 @@ bool VKGSRender::check_program_status()
return (rsx::method_registers.shader_program_address() != 0);
}
void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info)
bool VKGSRender::load_program()
{
if (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits)
{
@ -2201,13 +2213,15 @@ void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info)
verify(HERE), current_fragment_program.valid;
get_current_vertex_program(vs_sampler_state);
m_graphics_state &= ~rsx::pipeline_state::invalidate_pipeline_bits;
}
auto &vertex_program = current_vertex_program;
auto &fragment_program = current_fragment_program;
auto old_program = m_program;
vk::pipeline_props properties = {};
vk::pipeline_props properties{};
// Input assembly
bool emulated_primitive_type;
@ -2335,36 +2349,51 @@ void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info)
//Load current program from buffer
vertex_program.skip_vertex_input_check = true;
fragment_program.unnormalized_coords = 0;
m_program = m_prog_buffer->getGraphicPipelineState(vertex_program, fragment_program, properties, *m_device, pipeline_layout).get();
m_program = m_prog_buffer->get_graphics_pipeline(vertex_program, fragment_program, properties,
!g_cfg.video.disable_asynchronous_shader_compiler, *m_device, pipeline_layout).get();
vk::leave_uninterruptible();
if (m_prog_buffer->check_cache_missed())
{
m_shaders_cache->store(properties, vertex_program, fragment_program);
if (m_prog_buffer->check_program_linked_flag())
{
// Program was linked or queued for linking
m_shaders_cache->store(properties, vertex_program, fragment_program);
}
//Notify the user with HUD notification
// Notify the user with HUD notification
if (g_cfg.misc.show_shader_compilation_hint)
{
if (m_overlay_manager)
{
if (auto dlg = m_overlay_manager->get<rsx::overlays::shader_compile_notification>())
{
//Extend duration
// Extend duration
dlg->touch();
}
else
{
//Create dialog but do not show immediately
// Create dialog but do not show immediately
m_overlay_manager->create<rsx::overlays::shader_compile_notification>();
}
}
}
}
vk::leave_uninterruptible();
return m_program != nullptr;
}
void VKGSRender::load_program_env(const vk::vertex_upload_info& vertex_info)
{
if (!m_program)
{
fmt::throw_exception("Unreachable right now" HERE);
}
if (1)//m_graphics_state & (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty))
{
const size_t fragment_constants_sz = m_prog_buffer->get_fragment_constants_buffer_size(fragment_program);
const size_t fragment_constants_sz = m_prog_buffer->get_fragment_constants_buffer_size(current_fragment_program);
const size_t fragment_buffer_sz = fragment_constants_sz + (18 * 4 * sizeof(float));
const size_t required_mem = 512 + fragment_buffer_sz;
@ -2384,17 +2413,17 @@ void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info)
*(reinterpret_cast<f32*>(buf + 144)) = rsx::method_registers.clip_max();
fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, reinterpret_cast<s32*>(buf + 160),
vertex_info.persistent_window_offset, vertex_info.volatile_window_offset);
vertex_info.persistent_window_offset, vertex_info.volatile_window_offset);
//Fragment constants
buf = buf + 512;
if (fragment_constants_sz)
{
m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), ::narrow<int>(fragment_constants_sz) },
fragment_program, vk::sanitize_fp_values());
current_fragment_program, vk::sanitize_fp_values());
}
fill_fragment_state_buffer(buf + fragment_constants_sz, fragment_program);
fill_fragment_state_buffer(buf + fragment_constants_sz, current_fragment_program);
m_uniform_buffer_ring_info.unmap();
@ -2421,7 +2450,8 @@ void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info)
}
//Clear flags
m_graphics_state &= ~rsx::pipeline_state::memory_barrier_bits;
const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty);
m_graphics_state &= ~handled_flags;
}
static const u32 mr_color_offset[rsx::limits::color_buffers_count] =
@ -3409,3 +3439,8 @@ void VKGSRender::discard_occlusion_query(rsx::reports::occlusion_query_info* que
m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices);
m_occlusion_map.erase(query->driver_handle);
}
bool VKGSRender::on_decompiler_task()
{
return m_prog_buffer->async_update(8, *m_device, pipeline_layout);
}

View File

@ -402,7 +402,8 @@ private:
public:
bool check_program_status();
void load_program(const vk::vertex_upload_info& vertex_info);
bool load_program();
void load_program_env(const vk::vertex_upload_info& vertex_info);
void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false);
void read_buffers();
void write_buffers();
@ -429,4 +430,6 @@ protected:
bool on_access_violation(u32 address, bool is_writing) override;
void on_invalidate_memory_range(u32 address_base, u32 size) override;
bool on_decompiler_task() override;
};

View File

@ -2282,10 +2282,13 @@ public:
graphics_pipeline_state()
{
ia = { VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO };
cs = { VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO };
ds = { VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO };
rs = { VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO };
// NOTE: Vk** structs have padding bytes
memset(this, 0, sizeof(graphics_pipeline_state));
ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
cs.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
for (int i = 0; i < 4; ++i)
{
@ -2298,9 +2301,38 @@ public:
rs.lineWidth = 1.f;
}
graphics_pipeline_state(const graphics_pipeline_state& other)
{
// NOTE: Vk** structs have padding bytes
memcpy(this, &other, sizeof(graphics_pipeline_state));
if (other.cs.pAttachments == other.att_state)
{
// Rebase pointer
cs.pAttachments = att_state;
}
}
~graphics_pipeline_state()
{}
graphics_pipeline_state& operator = (const graphics_pipeline_state& other)
{
if (this != &other)
{
// NOTE: Vk** structs have padding bytes
memcpy(this, &other, sizeof(graphics_pipeline_state));
if (other.cs.pAttachments == other.att_state)
{
// Rebase pointer
cs.pAttachments = att_state;
}
}
return *this;
}
void set_primitive_type(VkPrimitiveTopology type)
{
ia.topology = type;

View File

@ -53,18 +53,18 @@ namespace rpcs3
template <>
size_t hash_struct<vk::pipeline_props>(const vk::pipeline_props &pipelineProperties)
{
size_t seed = hash_base<int>(pipelineProperties.num_targets);
size_t seed = hash_base(pipelineProperties.num_targets);
seed ^= hash_struct(pipelineProperties.state.ia);
seed ^= hash_struct(pipelineProperties.state.ds);
seed ^= hash_struct(pipelineProperties.state.rs);
//Do not compare pointers to memory!
auto tmp = pipelineProperties.state.cs;
// Do not compare pointers to memory!
VkPipelineColorBlendStateCreateInfo tmp;
memcpy(&tmp, &pipelineProperties.state.cs, sizeof(VkPipelineColorBlendStateCreateInfo));
tmp.pAttachments = nullptr;
seed ^= hash_struct(tmp);
seed ^= hash_struct(pipelineProperties.state.att_state[0]);
return hash_base<size_t>(seed);
return hash_base(seed);
}
}
@ -142,13 +142,17 @@ struct VKTraits
ms.pSampleMask = NULL;
ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
// Rebase pointers from pipeline structure in case it is moved/copied
VkPipelineColorBlendStateCreateInfo cs = pipelineProperties.state.cs;
cs.pAttachments = pipelineProperties.state.att_state;
VkPipeline pipeline;
VkGraphicsPipelineCreateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
info.pVertexInputState = &vi;
info.pInputAssemblyState = &pipelineProperties.state.ia;
info.pRasterizationState = &pipelineProperties.state.rs;
info.pColorBlendState = &pipelineProperties.state.cs;
info.pColorBlendState = &cs;
info.pMultisampleState = &ms;
info.pViewportState = &vp;
info.pDepthStencilState = &pipelineProperties.state.ds;
@ -201,11 +205,9 @@ public:
template <typename... Args>
void add_pipeline_entry(RSXVertexProgram &vp, RSXFragmentProgram &fp, vk::pipeline_props &props, Args&& ...args)
{
//Extract pointers from pipeline props
props.render_pass = m_render_pass_data[props.render_pass_location];
props.state.cs.pAttachments = props.state.att_state;
vp.skip_vertex_input_check = true;
getGraphicPipelineState(vp, fp, props, std::forward<Args>(args)...);
get_graphics_pipeline(vp, fp, props, false, std::forward<Args>(args)...);
}
void preload_programs(RSXVertexProgram &vp, RSXFragmentProgram &fp)
@ -219,4 +221,9 @@ public:
{
return m_cache_miss_flag;
}
bool check_program_linked_flag() const
{
return m_program_compiled_flag;
}
};

View File

@ -687,8 +687,7 @@ namespace rsx
if (!fs::is_file(fp_name))
{
const auto size = program_hash_util::fragment_program_utils::get_fragment_program_ucode_size(fp.addr);
fs::file(fp_name, fs::rewrite).write(fp.addr, size);
fs::file(fp_name, fs::rewrite).write(fp.addr, fp.ucode_length);
}
if (!fs::is_file(vp_name))
@ -741,6 +740,7 @@ namespace rsx
RSXFragmentProgram fp = {};
fragment_program_data[program_hash] = data;
fp.addr = fragment_program_data[program_hash].data();
fp.ucode_length = (u32)data.size();
return fp;
}

View File

@ -402,8 +402,9 @@ struct cfg_root : cfg::node
cfg::_bool frame_skip_enabled{this, "Enable Frame Skip", false};
cfg::_bool force_cpu_blit_processing{this, "Force CPU Blit", false}; // Debugging option
cfg::_bool disable_on_disk_shader_cache{this, "Disable On-Disk Shader Cache", false};
cfg::_bool disable_vulkan_mem_allocator{ this, "Disable Vulkan Memory Allocator", false };
cfg::_bool disable_vulkan_mem_allocator{this, "Disable Vulkan Memory Allocator", false};
cfg::_bool full_rgb_range_output{this, "Use full RGB output range", true}; // Video out dynamic range
cfg::_bool disable_asynchronous_shader_compiler{this, "Disable Asynchronous Shader Compiler", false};
cfg::_int<1, 8> consequtive_frames_to_draw{this, "Consecutive Frames To Draw", 1};
cfg::_int<1, 8> consequtive_frames_to_skip{this, "Consecutive Frames To Skip", 1};
cfg::_int<50, 800> resolution_scale_percent{this, "Resolution Scale", 100};

View File

@ -93,6 +93,7 @@ int main(int argc, char** argv)
QCoreApplication::setAttribute(Qt::AA_UseHighDpiPixmaps);
QCoreApplication::setAttribute(Qt::AA_DisableWindowContextHelpButton);
QCoreApplication::setAttribute(Qt::AA_DontCheckOpenGLContextThreadAffinity);
s_init.post();
s_qt_mutex.wait();

View File

@ -3,6 +3,7 @@
#include "Emu/System.h"
#include <QOpenGLContext>
#include <qoffscreensurface.h>
#include <QWindow>
gl_gs_frame::gl_gs_frame(const QRect& geometry, QIcon appIcon, bool disableMouse)
@ -24,42 +25,87 @@ gl_gs_frame::gl_gs_frame(const QRect& geometry, QIcon appIcon, bool disableMouse
draw_context_t gl_gs_frame::make_context()
{
auto context = new QOpenGLContext();
context->setFormat(m_format);
context->create();
auto context = new GLContext();
context->handle = new QOpenGLContext();
if (m_primary_context)
{
auto surface = new QOffscreenSurface();
surface->setFormat(m_format);
surface->create();
// Share resources with the first created context
context->handle->setShareContext(m_primary_context->handle);
context->surface = surface;
context->owner = true;
}
else
{
// This is the first created context, all others will share resources with this one
m_primary_context = context;
context->surface = this;
context->owner = false;
}
context->handle->setFormat(m_format);
context->handle->create();
return context;
}
void gl_gs_frame::set_current(draw_context_t ctx)
{
if (!((QOpenGLContext*)ctx)->makeCurrent(this))
if (!ctx)
{
create();
((QOpenGLContext*)ctx)->makeCurrent(this);
fmt::throw_exception("Null context handle passed to set_current" HERE);
}
auto context = (GLContext*)(ctx);
if (!context->handle->makeCurrent(context->surface))
{
if (!context->owner)
{
create();
}
else if (!context->handle->isValid())
{
context->handle->create();
}
if (!context->handle->makeCurrent(context->surface))
{
fmt::throw_exception("Could not bind OpenGL context" HERE);
}
}
}
void gl_gs_frame::delete_context(draw_context_t ctx)
{
auto gl_ctx = (QOpenGLContext*)ctx;
gl_ctx->doneCurrent();
auto gl_ctx = (GLContext*)ctx;
gl_ctx->handle->doneCurrent();
#ifndef _WIN32
delete gl_ctx;
delete gl_ctx->handle;
#else
//AMD driver crashes when executing wglDeleteContext
//Catch with SEH
__try
{
delete gl_ctx;
delete gl_ctx->handle;
}
__except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH)
{
LOG_FATAL(RSX, "Your graphics driver just crashed whilst cleaning up. All consumed VRAM should have been released, but you may want to restart the emulator just in case");
}
#endif
if (gl_ctx->owner)
{
delete gl_ctx->surface;
}
delete gl_ctx;
}
void gl_gs_frame::flip(draw_context_t context, bool skip_frame)
@ -69,5 +115,6 @@ void gl_gs_frame::flip(draw_context_t context, bool skip_frame)
//Do not swap buffers if frame skip is active
if (skip_frame) return;
((QOpenGLContext*)context)->swapBuffers(this);
auto gl_ctx = (GLContext*)context;
gl_ctx->handle->swapBuffers(gl_ctx->surface);
}

View File

@ -3,10 +3,18 @@
#include "stdafx.h"
#include "gs_frame.h"
struct GLContext
{
QSurface *surface = nullptr;
QOpenGLContext *handle = nullptr;
bool owner = false;
};
class gl_gs_frame : public gs_frame
{
private:
QSurfaceFormat m_format;
GLContext *m_primary_context = nullptr;
public:
gl_gs_frame(const QRect& geometry, QIcon appIcon, bool disableMouse);