Parallel shader cache loading (#4677)

* Parallel shader cache loading
This commit is contained in:
Dravonic 2018-06-01 13:49:29 -03:00 committed by kd-11
parent 87b510d5bf
commit 400079a006
10 changed files with 184 additions and 40 deletions

View File

@ -88,4 +88,5 @@ public:
virtual void ProgressBarSetMsg(u32 progressBarIndex, const std::string& msg) = 0;
virtual void ProgressBarReset(u32 progressBarIndex) = 0;
virtual void ProgressBarInc(u32 progressBarIndex, u32 delta) = 0;
virtual void ProgressBarSetLimit(u32 index, u32 limit) = 0;
};

View File

@ -6,6 +6,7 @@
#include "Utilities/GSL.h"
#include "Utilities/hash.h"
#include <mutex>
enum class SHADER_TYPE
{
@ -134,6 +135,7 @@ class program_state_cache
};
protected:
std::mutex s_mtx; // TODO: Only need to synchronize when loading cache
size_t m_next_id = 0;
bool m_cache_miss_flag;
binary_to_vertex_program m_vertex_shader_cache;
@ -305,11 +307,13 @@ public:
LOG_NOTICE(RSX, "*** vp id = %d", vertex_program.id);
LOG_NOTICE(RSX, "*** fp id = %d", fragment_program.id);
m_storage[key] = backend_traits::build_pipeline(vertex_program, fragment_program, pipelineProperties, std::forward<Args>(args)...);
pipeline_storage_type pipeline = backend_traits::build_pipeline(vertex_program, fragment_program, pipelineProperties, std::forward<Args>(args)...);
std::lock_guard<std::mutex> lock(s_mtx);
auto &rtn = m_storage[key] = std::move(pipeline);
m_cache_miss_flag = true;
LOG_SUCCESS(RSX, "New program compiled successfully");
return m_storage[key];
return rtn;
}
size_t get_fragment_constants_buffer_size(const RSXFragmentProgram &fragmentShader) const

View File

@ -845,7 +845,7 @@ void GLGSRender::on_init_thread()
{
MsgDialogType type = {};
type.disable_cancel = true;
type.progress_bar_count = 1;
type.progress_bar_count = 2;
dlg = fxm::get<rsx::overlays::display_manager>()->create<rsx::overlays::message_dialog>();
dlg->show("Loading precompiled shaders from disk...", type, [](s32 status)
@ -855,15 +855,22 @@ void GLGSRender::on_init_thread()
});
}
void update_msg(u32 processed, u32 entry_count) override
void update_msg(u32 index, u32 processed, u32 entry_count) override
{
dlg->progress_bar_set_message(0, fmt::format("Loading pipeline object %u of %u", processed, entry_count));
const char *text = index == 0 ? "Loading pipeline object %u of %u" : "Compiling pipeline object %u of %u";
dlg->progress_bar_set_message(index, fmt::format(text, processed, entry_count));
owner->flip(0);
}
void inc_value(u32 value) override
void inc_value(u32 index, u32 value) override
{
dlg->progress_bar_increment(0, (f32)value);
dlg->progress_bar_increment(index, (f32)value);
owner->flip(0);
}
void set_limit(u32 index, u32 limit) override
{
dlg->progress_bar_set_limit(index, limit);
owner->flip(0);
}

View File

@ -102,6 +102,12 @@ public:
getGraphicPipelineState(vp, fp, props, std::forward<Args>(args)...);
}
void preload_programs(RSXVertexProgram &vp, RSXFragmentProgram &fp)
{
search_vertex_program(vp);
search_fragment_program(fp);
}
bool check_cache_missed() const
{
return m_cache_miss_flag;

View File

@ -938,6 +938,19 @@ namespace rsx
return CELL_OK;
}
s32 progress_bar_set_limit(u32 index, u32 limit)
{
if (index >= num_progress_bars)
return CELL_MSGDIALOG_ERROR_PARAM;
if (index == 0)
progress_1.set_limit((float)limit);
else
progress_2.set_limit((float)limit);
return CELL_OK;
}
};
struct trophy_notification : public user_interface

View File

@ -1598,7 +1598,7 @@ void VKGSRender::on_init_thread()
{
MsgDialogType type = {};
type.disable_cancel = true;
type.progress_bar_count = 1;
type.progress_bar_count = 2;
dlg = fxm::get<rsx::overlays::display_manager>()->create<rsx::overlays::message_dialog>();
dlg->show("Loading precompiled shaders from disk...", type, [](s32 status)
@ -1608,15 +1608,22 @@ void VKGSRender::on_init_thread()
});
}
void update_msg(u32 processed, u32 entry_count) override
void update_msg(u32 index, u32 processed, u32 entry_count) override
{
dlg->progress_bar_set_message(0, fmt::format("Loading pipeline object %u of %u", processed, entry_count));
const char *text = index == 0 ? "Loading pipeline object %u of %u" : "Compiling pipeline object %u of %u";
dlg->progress_bar_set_message(index, fmt::format(text, processed, entry_count));
owner->flip(0);
}
void inc_value(u32 value) override
void inc_value(u32 index, u32 value) override
{
dlg->progress_bar_increment(0, (f32)value);
dlg->progress_bar_increment(index, (f32)value);
owner->flip(0);
}
void set_limit(u32 index, u32 limit) override
{
dlg->progress_bar_set_limit(index, limit);
owner->flip(0);
}

View File

@ -208,6 +208,13 @@ public:
getGraphicPipelineState(vp, fp, props, std::forward<Args>(args)...);
}
void preload_programs(RSXVertexProgram &vp, RSXFragmentProgram &fp)
{
vp.skip_vertex_input_check = true;
search_vertex_program(vp);
search_fragment_program(fp);
}
bool check_cache_missed() const
{
return m_cache_miss_flag;

View File

@ -8,6 +8,7 @@
#include "Emu/System.h"
#include "rsx_utils.h"
#include <thread>
namespace rsx
{
@ -411,7 +412,7 @@ namespace rsx
dlg = Emu.GetCallbacks().get_msg_dialog();
dlg->type.se_normal = true;
dlg->type.bg_invisible = true;
dlg->type.progress_bar_count = 1;
dlg->type.progress_bar_count = 2;
dlg->on_close = [](s32 status)
{
Emu.CallAfter([]()
@ -432,19 +433,28 @@ namespace rsx
}
}
virtual void update_msg(u32 processed, u32 entry_count)
virtual void update_msg(u32 index, u32 processed, u32 entry_count)
{
Emu.CallAfter([=]()
{
dlg->ProgressBarSetMsg(0, fmt::format("Loading pipeline object %u of %u", processed, entry_count));
const char *text = index == 0 ? "Loading pipeline object %u of %u" : "Compiling pipeline object %u of %u";
dlg->ProgressBarSetMsg(index, fmt::format(text, processed, entry_count));
});
}
virtual void inc_value(u32 value)
virtual void inc_value(u32 index, u32 value)
{
Emu.CallAfter([=]()
{
dlg->ProgressBarInc(0, value);
dlg->ProgressBarInc(index, value);
});
}
virtual void set_limit(u32 index, u32 limit)
{
Emu.CallAfter([=]()
{
dlg->ProgressBarSetLimit(index, limit);
});
}
@ -479,18 +489,22 @@ namespace rsx
}
fs::dir root = fs::dir(directory_path);
fs::dir_entry tmp;
u32 entry_count = 0;
for (auto It = root.begin(); It != root.end(); ++It, entry_count++);
std::vector<fs::dir_entry> entries;
for (auto It = root.begin(); It != root.end(); ++It, entry_count++)
{
fs::dir_entry tmp = *It;
if (entry_count <= 2)
if (tmp.name == "." || tmp.name == "..")
continue;
entries.push_back(tmp);
}
if ((entry_count = entries.size()) <= 2)
return;
entry_count -= 2;
f32 delta = 100.f / entry_count;
f32 tally = 0.f;
root.rewind();
// Invalid pipeline entries to be removed
@ -505,39 +519,107 @@ namespace rsx
}
dlg->create();
dlg->set_limit(0, entry_count);
dlg->set_limit(1, entry_count);
dlg->update_msg(0, 0, entry_count);
dlg->update_msg(1, 0, entry_count);
const auto prefix_length = version_prefix.length();
u32 processed = 0;
while (root.read(tmp) && !Emu.IsStopped())
// Setup worker threads
unsigned nb_threads = std::thread::hardware_concurrency();
std::vector<std::thread> worker_threads(nb_threads);
// Preload everything needed to compile the shaders
// Can probably be parallelized too, but since it's mostly reading files it's probably not worth it
std::vector<std::tuple<pipeline_storage_type, RSXVertexProgram, RSXFragmentProgram>> unpackeds;
std::chrono::time_point<steady_clock> last_update;
u32 processed_since_last_update = 0;
for (u32 i = 0; (i < entry_count) && !Emu.IsStopped(); i++)
{
if (tmp.name == "." || tmp.name == "..")
continue;
fs::dir_entry tmp = entries[i];
const auto filename = directory_path + "/" + tmp.name;
std::vector<u8> bytes;
fs::file f(filename);
processed++;
dlg->update_msg(processed, entry_count);
if (f.size() != sizeof(pipeline_data))
{
LOG_ERROR(RSX, "Cached pipeline object %s is not binary compatible with the current shader cache", tmp.name.c_str());
invalid_entries.push_back(filename);
continue;
}
f.read<u8>(bytes, f.size());
auto unpacked = unpack(*(pipeline_data*)bytes.data());
m_storage.add_pipeline_entry(std::get<1>(unpacked), std::get<2>(unpacked), std::get<0>(unpacked), std::forward<Args>(args)...);
m_storage.preload_programs(std::get<1>(unpacked), std::get<2>(unpacked));
unpackeds.push_back(unpacked);
tally += delta;
if (tally > 1.f)
// Only update the screen at about 10fps since updating it everytime slows down the process
std::chrono::time_point<steady_clock> now = std::chrono::steady_clock::now();
processed_since_last_update++;
if ((std::chrono::duration_cast<std::chrono::milliseconds>(now - last_update) > 100ms) || (i == entry_count - 1))
{
u32 value = (u32)tally;
dlg->inc_value(value);
dlg->update_msg(0, i + 1, entry_count);
dlg->inc_value(0, processed_since_last_update);
last_update = now;
processed_since_last_update = 0;
}
}
tally -= (f32)value;
atomic_t<u32> processed(0);
std::function<void(u32)> shader_comp_worker = [&](u32 index)
{
u32 pos;
while (((pos = processed++) < entry_count) && !Emu.IsStopped())
{
auto unpacked = unpackeds[pos];
m_storage.add_pipeline_entry(std::get<1>(unpacked), std::get<2>(unpacked), std::get<0>(unpacked), std::forward<Args>(args)...);
}
};
if (g_cfg.video.renderer == video_renderer::vulkan)
{
// Start workers
for (u32 i = 0; i < nb_threads; i++)
{
worker_threads[i] = std::thread(shader_comp_worker, i);
}
// Wait for the workers to finish their task while updating UI
u32 current_progress = 0;
u32 last_update_progress = 0;
do
{
std::this_thread::sleep_for(100ms); // Around 10fps should be good enough
current_progress = processed.load();
dlg->update_msg(1, current_progress, entry_count);
dlg->inc_value(1, current_progress - last_update_progress);
last_update_progress = current_progress;
} while ((current_progress < entry_count) && !Emu.IsStopped());
// Need to join the threads to be absolutely sure shader compilation is done.
for (std::thread& worker_thread : worker_threads)
worker_thread.join();
}
else
{
u32 pos;
while (((pos = processed++) < entry_count) && !Emu.IsStopped())
{
auto unpacked = unpackeds[pos];
m_storage.add_pipeline_entry(std::get<1>(unpacked), std::get<2>(unpacked), std::get<0>(unpacked), std::forward<Args>(args)...);
// Update screen at about 10fps
std::chrono::time_point<steady_clock> now = std::chrono::steady_clock::now();
processed_since_last_update++;
if ((std::chrono::duration_cast<std::chrono::milliseconds>(now - last_update) > 100ms) || (pos == entry_count - 1))
{
dlg->update_msg(1, pos, entry_count);
dlg->inc_value(1, processed_since_last_update);
last_update = now;
processed_since_last_update = 0;
}
}
}

View File

@ -394,6 +394,22 @@ void msg_dialog_frame::ProgressBarInc(u32 index, u32 delta)
}
}
void msg_dialog_frame::ProgressBarSetLimit(u32 index, u32 limit)
{
if (m_dialog)
{
if (index == 0 && m_gauge1)
{
m_gauge1->setMaximum(limit);
}
if (index == 1 && m_gauge2)
{
m_gauge2->setMaximum(limit);
}
}
}
#ifdef HAVE_QTDBUS
void msg_dialog_frame::UpdateProgress(int progress, bool disable)
{

View File

@ -67,6 +67,7 @@ public:
virtual void ProgressBarSetMsg(u32 progressBarIndex, const std::string& msg) override;
virtual void ProgressBarReset(u32 progressBarIndex) override;
virtual void ProgressBarInc(u32 progressBarIndex, u32 delta) override;
virtual void ProgressBarSetLimit(u32 index, u32 limit) override;
#ifdef HAVE_QTDBUS
private:
void UpdateProgress(int progress, bool disable = false);