LV2: Implement set-priority thread requeue

2025-03-28 19:20:36 +00:00 · 2023-04-28 20:10:21 +03:00 · 2023-04-28 20:10:21 +03:00 · b861a9c5d0
commit b861a9c5d0
parent 9828e6cafc
15 changed files with 143 additions and 39 deletions
--- a/rpcs3/Emu/Cell/PPUThread.cpp
+++ b/rpcs3/Emu/Cell/PPUThread.cpp
@ -1357,7 +1357,7 @@ std::string ppu_thread::dump_misc() const
 		fmt::append(ret, " (LV2 suspended)\n");
 	}

-	fmt::append(ret, "Priority: %d\n", +prio);
+	fmt::append(ret, "Priority: %d\n", prio.load().prio);
 	fmt::append(ret, "Stack: 0x%x..0x%x\n", stack_addr, stack_addr + stack_size - 1);
 	fmt::append(ret, "Joiner: %s\n", joiner.load());

@ -1660,9 +1660,8 @@ ppu_thread::~ppu_thread()
 	perf_log.notice("Perf stats for instructions: total %u", exec_bytes / 4);
 }

-ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u32 prio, int detached)
+ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u32 _prio, int detached)
 	: cpu_thread(idm::last_id())
-	, prio(prio)
 	, stack_size(param.stack_size)
 	, stack_addr(param.stack_addr)
 	, joiner(detached != 0 ? ppu_join_status::detached : ppu_join_status::joinable)
@ -1671,6 +1670,8 @@ ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u3
 	, is_interrupt_thread(detached < 0)
 	, ppu_tname(make_single<std::string>(name))
 {
+	prio.raw().prio = _prio;
+
 	gpr[1] = stack_addr + stack_size - ppu_stack_start_offset;

 	gpr[13] = param.tls_addr;
@ -1732,7 +1733,25 @@ bool ppu_thread::savable() const

 void ppu_thread::serialize_common(utils::serial& ar)
 {
-	ar(gpr, fpr, cr, fpscr.bits, lr, ctr, vrsave, cia, xer, sat, nj, prio, optional_savestate_state, vr);
+	[[maybe_unused]] const s32 version = GET_OR_USE_SERIALIZATION_VERSION(ar.is_writing(), ppu);
+
+	ar(gpr, fpr, cr, fpscr.bits, lr, ctr, vrsave, cia, xer, sat, nj);
+
+	if (ar.is_writing())
+	{
+	 	ar(prio.load().all);
+	}
+	else if (version < 2)
+	{
+		prio.raw().all = 0;
+		prio.raw().prio = ar.operator s32();
+	}
+	else
+	{
+		ar(prio.raw().all);
+	}
+
+	ar(optional_savestate_state, vr);

 	if (optional_savestate_state->data.empty())
 	{
--- a/rpcs3/Emu/Cell/PPUThread.h
+++ b/rpcs3/Emu/Cell/PPUThread.h
@ -3,6 +3,7 @@
 #include "../CPU/CPUThread.h"
 #include "../Memory/vm_ptr.h"
 #include "Utilities/lockless.h"
+#include "Utilities/BitField.h"

 #include "util/logs.hpp"
 #include "util/v128.hpp"
@ -253,7 +254,14 @@ public:
 	alignas(64) std::byte rdata[128]{}; // Reservation data
 	bool use_full_rdata{};

-	atomic_t<s32> prio{0}; // Thread priority (0..3071)
+	union ppu_prio_t
+	{
+		u64 all;
+		bf_t<s64, 0, 13> prio; // Thread priority (0..3071) (firs 12-bits)
+		bf_t<s64, 13, 51> order; // Thread enqueue order (last 52-bits)
+	};
+
+	atomic_t<ppu_prio_t> prio{};
 	const u32 stack_size; // Stack size
 	const u32 stack_addr; // Stack address

--- a/rpcs3/Emu/Cell/SPUThread.cpp
+++ b/rpcs3/Emu/Cell/SPUThread.cpp
@ -6157,11 +6157,13 @@ spu_thread::thread_name_t::operator std::string() const
 	return full_name;
 }

-spu_thread::priority_t::operator s32() const
+spu_thread::spu_prio_t spu_thread::priority_t::load() const
 {
 	if (_this->get_type() != spu_type::threaded || !_this->group->has_scheduler_context)
 	{
-		return s32{smax};
+		spu_thread::spu_prio_t prio{};
+		prio.prio = smax;
+		return prio;
 	}

 	return _this->group->prio;
--- a/rpcs3/Emu/Cell/SPUThread.h
+++ b/rpcs3/Emu/Cell/SPUThread.h
@ -887,12 +887,25 @@ public:
 		operator std::string() const;
 	} thread_name{ this };

+	union spu_prio_t
+	{
+		u64 all;
+		bf_t<s64, 0, 9> prio; // Thread priority (0..3071) (firs 9-bits)
+		bf_t<s64, 9, 55> order; // Thread enqueue order (TODO, last 52-bits)
+	};
+
 	// For lv2_obj::schedule<spu_thread>
-	const struct priority_t
+	struct priority_t
 	{
 		const spu_thread* _this;

-		operator s32() const;
+		spu_prio_t load() const;
+
+		template <typename Func>
+		auto atomic_op(Func&& func)
+		{
+			return static_cast<std::conditional_t<std::is_void_v<Func>, Func, decltype(_this->group)>>(_this->group)->prio.atomic_op(std::move(func));
+		}
 	} prio{ this };
 };

--- a/rpcs3/Emu/Cell/lv2/lv2.cpp
+++ b/rpcs3/Emu/Cell/lv2/lv2.cpp
@ -1209,6 +1209,7 @@ std::string ppu_get_syscall_name(u64 code)
 DECLARE(lv2_obj::g_mutex);
 DECLARE(lv2_obj::g_ppu){};
 DECLARE(lv2_obj::g_pending){};
+DECLARE(lv2_obj::g_priority_order_tag){};

 thread_local DECLARE(lv2_obj::g_to_notify){};
 thread_local DECLARE(lv2_obj::g_postpone_notify_barrier){};
@ -1424,24 +1425,53 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)
 	// Check thread type
 	AUDIT(!cpu || cpu->id_type() == 1);

+	bool push_first = false;
+
 	switch (prio)
 	{
 	default:
 	{
 		// Priority set
-		const s32 old_prio = static_cast<ppu_thread*>(cpu)->prio.exchange(prio);
+		auto set_prio = [](atomic_t<ppu_thread::ppu_prio_t>& prio, s32 value, bool increment_order_last, bool increment_order_first)
+		{
+			s64 tag = 0;
+
+			if (increment_order_first || increment_order_last)
+			{
+				tag = ++g_priority_order_tag;
+			}
+
+			prio.atomic_op([&](ppu_thread::ppu_prio_t& prio)
+			{
+				prio.prio = value;
+
+				if (increment_order_first)
+				{
+					prio.order = ~tag;
+				}
+				else if (increment_order_last)
+				{
+					prio.order = tag;
+				}
+			});
+		};
+
+		const s32 old_prio = static_cast<ppu_thread*>(cpu)->prio.load().prio;

 		// If priority is the same, push ONPROC/RUNNABLE thread to the back of the priority list if it is not the current thread
 		if (old_prio == prio && cpu == cpu_thread::get_current())
 		{
+			set_prio(static_cast<ppu_thread*>(cpu)->prio, prio, false, false);
 			return true;
 		}

 		if (!unqueue(g_ppu, static_cast<ppu_thread*>(cpu), &ppu_thread::next_ppu))
 		{
+			set_prio(static_cast<ppu_thread*>(cpu)->prio, prio, old_prio > prio, old_prio < prio);
 			return true;
 		}

+		set_prio(static_cast<ppu_thread*>(cpu)->prio, prio, false, false);
 		break;
 	}
 	case yield_cmd:
@ -1462,7 +1492,7 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)
 			{
 				auto ppu2 = ppu->next_ppu;

-				if (!ppu2 || ppu2->prio != ppu->prio)
+				if (!ppu2 || ppu2->prio.load().prio != ppu->prio.load().prio)
 				{
 					// Empty 'same prio' threads list
 					return false;
@ -1472,7 +1502,7 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)
 				{
 					const auto next = ppu2->next_ppu;

-					if (!next || next->prio != ppu->prio)
+					if (!next || next->prio.load().prio != ppu->prio.load().prio)
 					{
 						break;
 					}
@ -1505,7 +1535,7 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)
 	}
 	}

-	const auto emplace_thread = [](cpu_thread* const cpu)
+	const auto emplace_thread = [push_first](cpu_thread* const cpu)
 	{
 		for (auto it = &g_ppu;;)
 		{
@ -1525,7 +1555,7 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)
 			}

 			// Use priority, also preserve FIFO order
-			if (!next || next->prio > static_cast<ppu_thread*>(cpu)->prio)
+			if (!next || (push_first ? next->prio.load().prio >= static_cast<ppu_thread*>(cpu)->prio.load().prio : next->prio.load().prio > static_cast<ppu_thread*>(cpu)->prio.load().prio))
 			{
 				atomic_storage<ppu_thread*>::release(static_cast<ppu_thread*>(cpu)->next_ppu, next);
 				atomic_storage<ppu_thread*>::release(*it, static_cast<ppu_thread*>(cpu));
--- a/rpcs3/Emu/Cell/lv2/sys_event_flag.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_event_flag.cpp
@ -373,10 +373,10 @@ error_code sys_event_flag_set(cpu_thread& cpu, u32 id, u64 bitptn)

 				for (auto ppu = first; ppu; ppu = ppu->next_cpu)
 				{
-					if (!ppu->gpr[7] && (flag->protocol != SYS_SYNC_PRIORITY || ppu->prio <= prio))
+					if (!ppu->gpr[7] && (flag->protocol != SYS_SYNC_PRIORITY || ppu->prio.load().prio <= prio))
 					{
 						it = ppu;
-						prio = ppu->prio;
+						prio = ppu->prio.load().prio;
 					}
 				}

--- a/rpcs3/Emu/Cell/lv2/sys_lwmutex.h
+++ b/rpcs3/Emu/Cell/lv2/sys_lwmutex.h
@ -93,6 +93,11 @@ struct lv2_lwmutex final : lv2_obj
 		{
 			if (!data.signaled)
 			{
+				cpu->prio.atomic_op([tag = ++g_priority_order_tag](std::common_type_t<decltype(T::prio)>& prio)
+				{
+					prio.order = tag;
+				});
+
 				cpu->next_cpu = data.sq;
 				data.sq = cpu;
 			}
--- a/rpcs3/Emu/Cell/lv2/sys_mutex.h
+++ b/rpcs3/Emu/Cell/lv2/sys_mutex.h
@ -105,6 +105,11 @@ struct lv2_mutex final : lv2_obj
 		{
 			if (data.owner)
 			{
+				cpu.prio.atomic_op([tag = ++g_priority_order_tag](std::common_type_t<decltype(T::prio)>& prio)
+				{
+					prio.order = tag;
+				});
+
 				cpu.next_cpu = data.sq;
 				data.sq = &cpu;
 				return false;
--- a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp
@ -312,7 +312,7 @@ error_code sys_ppu_thread_set_priority(ppu_thread& ppu, u32 thread_id, s32 prio)
 	if (thread_id == ppu.id)
 	{
 		// Fast path for self
-		if (ppu.prio != prio)
+		if (ppu.prio.load().prio != prio)
 		{
 			lv2_obj::set_priority(ppu, prio);
 		}
@ -322,10 +322,7 @@ error_code sys_ppu_thread_set_priority(ppu_thread& ppu, u32 thread_id, s32 prio)

 	const auto thread = idm::check<named_thread<ppu_thread>>(thread_id, [&, notify = lv2_obj::notify_all_t()](ppu_thread& thread)
 	{
-		if (thread.prio != prio)
-		{
-			lv2_obj::set_priority(thread, prio);
-		}
+		lv2_obj::set_priority(thread, prio);
 	});

 	if (!thread)
@ -346,7 +343,7 @@ error_code sys_ppu_thread_get_priority(ppu_thread& ppu, u32 thread_id, vm::ptr<s
 	{
 		// Fast path for self
 		ppu.check_state();
-		*priop = ppu.prio;
+		*priop = ppu.prio.load().prio;
 		return CELL_OK;
 	}

@ -354,7 +351,7 @@ error_code sys_ppu_thread_get_priority(ppu_thread& ppu, u32 thread_id, vm::ptr<s

 	const auto thread = idm::check<named_thread<ppu_thread>>(thread_id, [&](ppu_thread& thread)
 	{
-		prio = thread.prio;
+		prio = thread.prio.load().prio;
 	});

 	if (!thread)
--- a/rpcs3/Emu/Cell/lv2/sys_spu.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_spu.cpp
@ -206,7 +206,21 @@ lv2_spu_group::lv2_spu_group(utils::serial& ar) noexcept
 	, has_scheduler_context(ar.operator u8())
 	, max_run(ar)
 	, init(ar)
-	, prio(ar)
+	, prio([&ar]()
+	{
+		std::common_type_t<decltype(lv2_spu_group::prio)> prio{};
+
+		if (GET_SERIALIZATION_VERSION(spu) < 3)
+		{
+			prio.prio = ar.operator s32();
+		}
+		else
+		{
+			ar(prio.all);
+		}
+
+		return prio;
+	}())
 	, run_state(ar.operator spu_group_status())
 	, exit_status(ar)
 {
@ -286,7 +300,7 @@ void lv2_spu_group::save(utils::serial& ar)
 {
 	USING_SERIALIZATION_VERSION(spu);

-	ar(name, max_num, mem_size, type, ct->id, has_scheduler_context, max_run, init, prio, run_state, exit_status);
+	ar(name, max_num, mem_size, type, ct->id, has_scheduler_context, max_run, init, prio.load().all, run_state, exit_status);

 	for (const auto& thread : threads)
 	{
@ -1541,7 +1555,10 @@ error_code sys_spu_thread_group_set_priority(ppu_thread& ppu, u32 id, s32 priori
 		return CELL_EINVAL;
 	}

-	group->prio = priority;
+	group->prio.atomic_op([&](std::common_type_t<decltype(lv2_spu_group::prio)>& prio)
+	{
+		prio.prio = priority;
+	});

 	return CELL_OK;
 }
@ -1567,7 +1584,7 @@ error_code sys_spu_thread_group_get_priority(ppu_thread& ppu, u32 id, vm::ptr<s3
 	}
 	else
 	{
-		*priority = group->prio;
+		*priority = group->prio.load().prio;
 	}

 	return CELL_OK;
--- a/rpcs3/Emu/Cell/lv2/sys_spu.h
+++ b/rpcs3/Emu/Cell/lv2/sys_spu.h
@ -284,7 +284,7 @@ struct lv2_spu_group
 	shared_mutex mutex;

 	atomic_t<u32> init; // Initialization Counter
-	atomic_t<s32> prio; // SPU Thread Group Priority
+	atomic_t<typename spu_thread::spu_prio_t> prio{}; // SPU Thread Group Priority
 	atomic_t<spu_group_status> run_state; // SPU Thread Group State
 	atomic_t<s32> exit_status; // SPU Thread Group Exit Status
 	atomic_t<u32> join_state; // flags used to detect exit cause and signal
@ -303,7 +303,7 @@ struct lv2_spu_group
 	std::shared_ptr<lv2_event_queue> ep_exception; // TODO: SYS_SPU_THREAD_GROUP_EVENT_EXCEPTION
 	std::shared_ptr<lv2_event_queue> ep_sysmodule; // TODO: SYS_SPU_THREAD_GROUP_EVENT_SYSTEM_MODULE

-	lv2_spu_group(std::string name, u32 num, s32 prio, s32 type, lv2_memory_container* ct, bool uses_scheduler, u32 mem_size) noexcept
+	lv2_spu_group(std::string name, u32 num, s32 _prio, s32 type, lv2_memory_container* ct, bool uses_scheduler, u32 mem_size) noexcept
 		: name(std::move(name))
 		, id(idm::last_id())
 		, max_num(num)
@ -313,13 +313,13 @@ struct lv2_spu_group
 		, has_scheduler_context(uses_scheduler)
 		, max_run(num)
 		, init(0)
-		, prio(prio)
 		, run_state(SPU_THREAD_GROUP_STATUS_NOT_INITIALIZED)
 		, exit_status(0)
 		, join_state(0)
 		// TODO: args()
 	{
 		threads_map.fill(-1);
+		prio.raw().prio = _prio;
 	}

 	SAVESTATE_INIT_POS(8); // Dependency on SPUs
--- a/rpcs3/Emu/Cell/lv2/sys_sync.h
+++ b/rpcs3/Emu/Cell/lv2/sys_sync.h
@ -180,7 +180,7 @@ public:
 			}
 		}

-		s32 prio = it->prio;
+		auto prio = it->prio.load();
 		auto found = it;

 		while (true)
@ -193,10 +193,10 @@ public:
 				break;
 			}

-			const s32 _prio = static_cast<E*>(next)->prio;
+			const auto _prio = static_cast<E*>(next)->prio.load();

-			// This condition tests for equality as well so the eraliest element to be pushed is popped
-			if (_prio <= prio)
+			// This condition tests for equality as well so the earliest element to be pushed is popped
+			if (_prio.prio < prio.prio || (_prio.prio == prio.prio && _prio.order < prio.order))
 			{
 				found = next;
 				parent_found = &node;
@ -224,6 +224,11 @@ public:
 	{
 		atomic_storage<T>::release(object->next_cpu, first);
 		atomic_storage<T>::release(first, object);
+
+		object->prio.atomic_op([order = ++g_priority_order_tag](std::common_type_t<decltype(std::declval<T>()->prio.load())>& prio)
+		{
+			prio.order = order;
+		});
 	}

 private:
@ -476,6 +481,9 @@ public:
 	// Scheduler mutex
 	static shared_mutex g_mutex;

+	// Proirity tags
+	static atomic_t<u64> g_priority_order_tag;
+
 private:
 	// Pending list of threads to run
 	static thread_local std::vector<class cpu_thread*> g_to_awake;
--- a/rpcs3/Emu/savestate_utils.cpp
+++ b/rpcs3/Emu/savestate_utils.cpp
@ -36,8 +36,8 @@ static std::array<serial_ver_t, 23> s_serial_versions;
 	}

 SERIALIZATION_VER(global_version, 0,                            12) // For stuff not listed here
-SERIALIZATION_VER(ppu, 1,                                       1)
-SERIALIZATION_VER(spu, 2,                                       1, 2 /*spu_limits_t ctor*/)
+SERIALIZATION_VER(ppu, 1,                                       1, 2 /*thread sleep queue order*/)
+SERIALIZATION_VER(spu, 2,                                       1, 2 /*spu_limits_t ctor*/, 3 /*thread sleep queue order*/)
 SERIALIZATION_VER(lv2_sync, 3,                                  1)
 SERIALIZATION_VER(lv2_vm, 4,                                    1)
 SERIALIZATION_VER(lv2_net, 5,                                   1, 2/*RECV/SEND timeout*/)
--- a/rpcs3/rpcs3qt/kernel_explorer.cpp
+++ b/rpcs3/rpcs3qt/kernel_explorer.cpp
@ -656,7 +656,7 @@ void kernel_explorer::update()
 		const auto func = ppu.last_function;
 		const ppu_thread_status status = lv2_obj::ppu_state(&ppu, false, false);

-		add_leaf(find_node(root, additional_nodes::ppu_threads), qstr(fmt::format(u8"PPU 0x%07x: “%s”, PRIO: %d, Joiner: %s, Status: %s, State: %s, %s func: “%s”%s", id, *ppu.ppu_tname.load(), +ppu.prio, ppu.joiner.load(), status, ppu.state.load()
+		add_leaf(find_node(root, additional_nodes::ppu_threads), qstr(fmt::format(u8"PPU 0x%07x: “%s”, PRIO: %d, Joiner: %s, Status: %s, State: %s, %s func: “%s”%s", id, *ppu.ppu_tname.load(), ppu.prio.load().prio, ppu.joiner.load(), status, ppu.state.load()
 			, ppu.ack_suspend ? "After" : (ppu.current_function ? "In" : "Last"), func ? func : "", get_wait_time_str(ppu.start_time))));
 	}, idm::unlocked);

@ -723,7 +723,7 @@ void kernel_explorer::update()

 	idm::select<lv2_spu_group>([&](u32 id, lv2_spu_group& tg)
 	{
-		QTreeWidgetItem* spu_tree = add_solid_node(find_node(root, additional_nodes::spu_thread_groups), qstr(fmt::format(u8"SPU Group 0x%07x: “%s”, Type = 0x%x", id, tg.name, tg.type)), qstr(fmt::format(u8"SPU Group 0x%07x: “%s”, Status = %s, Priority = %d, Type = 0x%x", id, tg.name, tg.run_state.load(), +tg.prio, tg.type)));
+		QTreeWidgetItem* spu_tree = add_solid_node(find_node(root, additional_nodes::spu_thread_groups), qstr(fmt::format(u8"SPU Group 0x%07x: “%s”, Type = 0x%x", id, tg.name, tg.type)), qstr(fmt::format(u8"SPU Group 0x%07x: “%s”, Status = %s, Priority = %d, Type = 0x%x", id, tg.name, tg.run_state.load(), tg.prio.load().prio, tg.type)));

 		if (tg.name.ends_with("CellSpursKernelGroup"sv))
 		{
--- a/rpcs3/rpcs3qt/register_editor_dialog.cpp
+++ b/rpcs3/rpcs3qt/register_editor_dialog.cpp
@ -186,7 +186,7 @@ void register_editor_dialog::updateRegister(int reg) const
 		else if (reg == PPU_LR)  str = fmt::format("%016llx", ppu.lr);
 		else if (reg == PPU_CTR) str = fmt::format("%016llx", ppu.ctr);
 		else if (reg == PPU_VRSAVE) str = fmt::format("%08x", ppu.vrsave);
-		else if (reg == PPU_PRIO) str = fmt::format("%08x", +ppu.prio);
+		else if (reg == PPU_PRIO) str = fmt::format("%08x", ppu.prio.load().prio);
 		else if (reg == RESERVATION_LOST) str = sstr(ppu.raddr ? tr("Lose reservation on OK") : tr("Reservation is inactive"));
 		else if (reg == PC) str = fmt::format("%08x", ppu.cia);
 	}