From 1b37e775befd98af5b9e14ea73f14b8000c0ce4d Mon Sep 17 00:00:00 2001
From: Nekotekina <nekotekina@gmail.com>
Date: Thu, 11 Oct 2018 01:17:19 +0300
Subject: [PATCH] Migration to named_thread<>

Add atomic_t<>::try_dec instead of fetch_dec_sat
Add atomic_t<>::try_inc
GDBDebugServer is broken (needs rewrite)
Removed old_thread class (former named_thread)
Removed storing/rethrowing exceptions from thread
Emu.Stop doesn't inject an exception anymore
task_stack helper class removed
thread_base simplified (no shared_from_this)
thread_ctrl::spawn simplified (creates detached thread)
Implemented overrideable thread detaching logic
Disabled cellAdec, cellDmux, cellFsAio
SPUThread renamed to spu_thread
RawSPUThread removed, spu_thread used instead
Disabled deriving from ppu_thread
Partial support for thread renaming
lv2_timer... simplified, screw it
idm/fxm: butchered support for on_stop/on_init
vm: improved allocation structure (added size)
---
 Utilities/Atomic.h                            |  34 +-
 Utilities/GDBDebugServer.h                    |  17 +-
 Utilities/Thread.cpp                          | 224 ++-------
 Utilities/Thread.h                            | 294 +++--------
 Utilities/cond.cpp                            |   2 +-
 Utilities/sema.h                              |   2 +-
 Utilities/typemap.h                           |   8 +-
 rpcs3/Emu/CPU/CPUThread.cpp                   |  53 +-
 rpcs3/Emu/CPU/CPUThread.h                     |  58 ++-
 rpcs3/Emu/Cell/Modules/cellAdec.cpp           |  20 +-
 rpcs3/Emu/Cell/Modules/cellAudio.cpp          |   7 +-
 rpcs3/Emu/Cell/Modules/cellDmux.cpp           |  28 +-
 rpcs3/Emu/Cell/Modules/cellFs.cpp             |  12 +-
 rpcs3/Emu/Cell/Modules/cellGcmSys.cpp         |  15 +-
 rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp      |   8 +-
 rpcs3/Emu/Cell/Modules/cellSpurs.cpp          |  26 +-
 rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp       | 146 +++---
 rpcs3/Emu/Cell/Modules/cellSync.cpp           |  65 ++-
 rpcs3/Emu/Cell/Modules/cellSysutil.cpp        |  11 +-
 rpcs3/Emu/Cell/Modules/cellVdec.cpp           | 268 +++++-----
 rpcs3/Emu/Cell/Modules/libmixer.cpp           |   6 +-
 rpcs3/Emu/Cell/Modules/sys_lwcond_.cpp        |  36 +-
 rpcs3/Emu/Cell/Modules/sys_spinlock.cpp       |   9 +-
 rpcs3/Emu/Cell/PPUModule.cpp                  |  19 +-
 rpcs3/Emu/Cell/PPUModule.h                    |   3 -
 rpcs3/Emu/Cell/PPUThread.cpp                  |  90 ++--
 rpcs3/Emu/Cell/PPUThread.h                    |  22 +-
 rpcs3/Emu/Cell/RawSPUThread.cpp               |  46 +-
 rpcs3/Emu/Cell/RawSPUThread.h                 |  17 -
 rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp        | 102 ++--
 rpcs3/Emu/Cell/SPUInterpreter.cpp             | 468 +++++++++---------
 rpcs3/Emu/Cell/SPUInterpreter.h               | 460 ++++++++---------
 rpcs3/Emu/Cell/SPURecompiler.cpp              | 156 +++---
 rpcs3/Emu/Cell/SPURecompiler.h                |   4 +-
 rpcs3/Emu/Cell/SPUThread.cpp                  | 147 +++---
 rpcs3/Emu/Cell/SPUThread.h                    |  32 +-
 rpcs3/Emu/Cell/lv2/lv2.cpp                    |   8 +-
 rpcs3/Emu/Cell/lv2/sys_cond.cpp               |   5 +
 rpcs3/Emu/Cell/lv2/sys_event.cpp              |   9 +-
 rpcs3/Emu/Cell/lv2/sys_event_flag.cpp         |  17 +-
 rpcs3/Emu/Cell/lv2/sys_interrupt.cpp          |  15 +-
 rpcs3/Emu/Cell/lv2/sys_interrupt.h            |   4 +-
 rpcs3/Emu/Cell/lv2/sys_lwcond.cpp             |   7 +-
 rpcs3/Emu/Cell/lv2/sys_lwmutex.cpp            |   5 +
 rpcs3/Emu/Cell/lv2/sys_mutex.cpp              |   5 +
 rpcs3/Emu/Cell/lv2/sys_net.cpp                |  34 +-
 rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp         | 112 +++--
 rpcs3/Emu/Cell/lv2/sys_process.cpp            |   4 +-
 rpcs3/Emu/Cell/lv2/sys_rwlock.cpp             |  10 +
 rpcs3/Emu/Cell/lv2/sys_semaphore.cpp          |   5 +
 rpcs3/Emu/Cell/lv2/sys_spu.cpp                | 156 +++---
 rpcs3/Emu/Cell/lv2/sys_spu.h                  |   5 +-
 rpcs3/Emu/Cell/lv2/sys_sync.h                 |   4 +-
 rpcs3/Emu/Cell/lv2/sys_timer.cpp              |  22 +-
 rpcs3/Emu/Cell/lv2/sys_timer.h                |   8 +-
 rpcs3/Emu/IdManager.cpp                       |  22 +-
 rpcs3/Emu/IdManager.h                         | 145 +-----
 rpcs3/Emu/Memory/vm.cpp                       |  68 +--
 rpcs3/Emu/Memory/vm.h                         |   4 +-
 rpcs3/Emu/RSX/Capture/rsx_replay.cpp          |  18 +-
 rpcs3/Emu/RSX/Capture/rsx_replay.h            |   9 +-
 rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp         |   5 +
 rpcs3/Emu/RSX/D3D12/D3D12GSRender.h           |   1 +
 rpcs3/Emu/RSX/GL/GLGSRender.cpp               |   5 +
 rpcs3/Emu/RSX/GL/GLGSRender.h                 |   1 +
 rpcs3/Emu/RSX/Null/NullGSRender.cpp           |   5 +
 rpcs3/Emu/RSX/Null/NullGSRender.h             |   5 +-
 .../Emu/RSX/Overlays/overlay_perf_metrics.cpp |  17 +-
 rpcs3/Emu/RSX/Overlays/overlays.h             |   4 +-
 rpcs3/Emu/RSX/RSXThread.cpp                   |  56 ++-
 rpcs3/Emu/RSX/RSXThread.h                     |  21 +-
 rpcs3/Emu/RSX/VK/VKGSRender.cpp               |   5 +
 rpcs3/Emu/RSX/VK/VKGSRender.h                 |   3 +-
 rpcs3/Emu/RSX/rsx_methods.cpp                 |   2 +-
 rpcs3/Emu/System.cpp                          |  47 +-
 rpcs3/rpcs3_app.cpp                           |   8 +-
 rpcs3/rpcs3qt/breakpoint_list.cpp             |   4 +-
 rpcs3/rpcs3qt/debugger_frame.cpp              |  18 +-
 rpcs3/rpcs3qt/debugger_list.cpp               |   6 +-
 rpcs3/rpcs3qt/instruction_editor_dialog.cpp   |   2 +-
 rpcs3/rpcs3qt/kernel_explorer.cpp             |   8 +-
 rpcs3/rpcs3qt/register_editor_dialog.cpp      |   4 +-
 82 files changed, 1820 insertions(+), 2023 deletions(-)

diff --git a/Utilities/Atomic.h b/Utilities/Atomic.h
index 5edb45f2e0..1ea3e4f597 100644
--- a/Utilities/Atomic.h
+++ b/Utilities/Atomic.h
@@ -1023,7 +1023,7 @@ public:
 	}
 
 	// Conditionally decrement
-	simple_type fetch_dec_sat(simple_type greater_than = std::numeric_limits<simple_type>::min(), simple_type amount = 1)
+	bool try_dec(simple_type greater_than = std::numeric_limits<simple_type>::min())
 	{
 		type _new, old = atomic_storage<type>::load(m_data);
 
@@ -1031,17 +1031,39 @@ public:
 		{
 			_new = old;
 
-			if (_new <= greater_than)
+			if (!(_new > greater_than))
 			{
-				// Early exit
-				return old;
+				return false;
 			}
 
-			_new -= amount;
+			_new -= 1;
 
 			if (LIKELY(atomic_storage<type>::compare_exchange(m_data, old, _new)))
 			{
-				return old;
+				return true;
+			}
+		}
+	}
+
+	// Conditionally increment
+	bool try_inc(simple_type less_than = std::numeric_limits<simple_type>::max())
+	{
+		type _new, old = atomic_storage<type>::load(m_data);
+
+		while (true)
+		{
+			_new = old;
+
+			if (!(_new < less_than))
+			{
+				return false;
+			}
+
+			_new += 1;
+
+			if (LIKELY(atomic_storage<type>::compare_exchange(m_data, old, _new)))
+			{
+				return true;
 			}
 		}
 	}
diff --git a/Utilities/GDBDebugServer.h b/Utilities/GDBDebugServer.h
index d18861610b..75d0bd684a 100644
--- a/Utilities/GDBDebugServer.h
+++ b/Utilities/GDBDebugServer.h
@@ -40,7 +40,7 @@ public:
 const u64 ALL_THREADS = 0xffffffffffffffff;
 const u64 ANY_THREAD = 0;
 
-class GDBDebugServer : public old_thread
+class GDBDebugServer
 {
 	socket_t server_socket;
 	socket_t client_socket;
@@ -112,29 +112,16 @@ class GDBDebugServer : public old_thread
 	bool cmd_set_breakpoint(gdb_cmd& cmd);
 	bool cmd_remove_breakpoint(gdb_cmd& cmd);
 
-protected:
-	void on_task() override final;
-	void on_exit() override final;
-
 public:
 	bool from_breakpoint = true;
 	bool stop = false;
 	bool paused = false;
 	u64 pausedBy;
 
-	virtual std::string get_name() const;
-	virtual void on_stop() override final;
+	void operator()();
 	void pause_from(cpu_thread* t);
 };
 
 extern u32 g_gdb_debugger_id;
 
-template <>
-struct id_manager::on_stop<GDBDebugServer> {
-	static inline void func(GDBDebugServer* ptr)
-	{
-		if (ptr) ptr->on_stop();
-	}
-};
-
 #endif
diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp
index 1b8be3b960..f70ce02915 100644
--- a/Utilities/Thread.cpp
+++ b/Utilities/Thread.cpp
@@ -1091,33 +1091,40 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
 
 	const auto cpu = get_current_cpu_thread();
 
-
 	if (rsx::g_access_violation_handler)
 	{
 		bool handled = false;
+
 		try
 		{
 			handled = rsx::g_access_violation_handler(addr, is_writing);
 		}
-		catch (std::runtime_error &e)
+		catch (const std::exception& e)
 		{
 			LOG_FATAL(RSX, "g_access_violation_handler(0x%x, %d): %s", addr, is_writing, e.what());
+
 			if (cpu)
 			{
 				vm::temporary_unlock(*cpu);
 				cpu->state += cpu_flag::dbg_pause;
-				cpu->test_state();
-				return false;
+
+				if (cpu->test_stopped())
+				{
+					std::terminate();
+				}
 			}
+
+			return false;
 		}
 
 		if (handled)
 		{
 			g_tls_fault_rsx++;
-			if (cpu)
+			if (cpu && cpu->test_stopped())
 			{
-				cpu->test_state();
+				std::terminate();
 			}
+
 			return true;
 		}
 	}
@@ -1160,7 +1167,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
 	// check if address is RawSPU MMIO register
 	if (addr - RAW_SPU_BASE_ADDR < (6 * RAW_SPU_OFFSET) && (addr % RAW_SPU_OFFSET) >= RAW_SPU_PROB_OFFSET)
 	{
-		auto thread = idm::get<RawSPUThread>((addr - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET);
+		auto thread = idm::get<named_thread<spu_thread>>(spu_thread::find_raw_spu((addr - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET));
 
 		if (!thread)
 		{
@@ -1255,9 +1262,9 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
 
 	if (vm::check_addr(addr, std::max<std::size_t>(1, d_size), vm::page_allocated | (is_writing ? vm::page_writable : vm::page_readable)))
 	{
-		if (cpu)
+		if (cpu && cpu->test_stopped())
 		{
-			cpu->test_state();
+			std::terminate();
 		}
 
 		return true;
@@ -1321,6 +1328,11 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
 		LOG_FATAL(MEMORY, "Access violation %s location 0x%x", is_writing ? "writing" : "reading", addr);
 		cpu->state += cpu_flag::dbg_pause;
 		cpu->check_state();
+
+		if (cpu->test_stopped())
+		{
+			std::terminate();
+		}
 	}
 
 	return true;
@@ -1571,53 +1583,6 @@ thread_local DECLARE(thread_ctrl::g_tls_this_thread) = nullptr;
 
 DECLARE(thread_ctrl::g_native_core_layout) { native_core_arrangement::undefined };
 
-void thread_base::start(const std::shared_ptr<thread_base>& ctrl, task_stack task)
-{
-#ifdef _WIN32
-	using thread_result = uint;
-#else
-	using thread_result = void*;
-#endif
-
-	// Thread entry point
-	const native_entry entry = [](void* arg) -> thread_result
-	{
-		// Recover shared_ptr from short-circuited thread_base object pointer
-		std::shared_ptr<thread_base> ctrl = static_cast<thread_base*>(arg)->m_self;
-
-		try
-		{
-			ctrl->initialize();
-			task_stack{std::move(ctrl->m_task)}.invoke();
-		}
-		catch (...)
-		{
-			// Capture exception
-			ctrl->finalize(std::current_exception());
-			finalize();
-			return 0;
-		}
-
-		ctrl->finalize(nullptr);
-		finalize();
-		return 0;
-	};
-
-	ctrl->m_self = ctrl;
-	ctrl->m_task = std::move(task);
-
-#ifdef _WIN32
-	std::uintptr_t thread = _beginthreadex(nullptr, 0, entry, ctrl.get(), 0, nullptr);
-	verify("thread_ctrl::start" HERE), thread != 0;
-#else
-	pthread_t thread;
-	verify("thread_ctrl::start" HERE), pthread_create(&thread, nullptr, entry, ctrl.get()) == 0;
-#endif
-
-	// TODO: this is unsafe and must be duplicated in thread_ctrl::initialize
-	ctrl->m_thread = (uintptr_t)thread;
-}
-
 void thread_base::start(native_entry entry)
 {
 #ifdef _WIN32
@@ -1679,7 +1644,7 @@ void thread_base::initialize()
 #endif
 }
 
-std::shared_ptr<thread_base> thread_base::finalize(std::exception_ptr eptr) noexcept
+bool thread_base::finalize(int) noexcept
 {
 	// Report pending errors
 	error_code::error_report(0, 0, 0, 0);
@@ -1712,17 +1677,13 @@ std::shared_ptr<thread_base> thread_base::finalize(std::exception_ptr eptr) noex
 		g_tls_fault_rsx,
 		g_tls_fault_spu);
 
-	// Untangle circular reference, set exception
-	std::unique_lock lock(m_mutex);
-
-	// Possibly last reference to the thread object
-	std::shared_ptr<thread_base> self = std::move(m_self);
-	m_state = thread_state::finished;
-	m_exception = eptr;
+	// Return true if need to delete thread object
+	const bool result = m_state.exchange(thread_state::finished) == thread_state::detached;
 
 	// Signal waiting threads
-	lock.unlock(), m_jcv.notify_all();
-	return self;
+	m_mutex.lock_unlock();
+	m_jcv.notify_all();
+	return result;
 }
 
 void thread_base::finalize() noexcept
@@ -1741,8 +1702,6 @@ bool thread_ctrl::_wait_for(u64 usec)
 		// Mutex is unlocked at the start and after the waiting
 		if (u32 sig = _this->m_signal.load())
 		{
-			thread_ctrl::test();
-
 			if (sig & 1)
 			{
 				_this->m_signal &= ~1;
@@ -1761,11 +1720,6 @@ bool thread_ctrl::_wait_for(u64 usec)
 		// Double-check the value
 		if (u32 sig = _this->m_signal.load())
 		{
-			if (sig & 2 && _this->m_exception)
-			{
-				_this->_throw();
-			}
-
 			if (sig & 1)
 			{
 				_this->m_signal &= ~1;
@@ -1780,20 +1734,6 @@ bool thread_ctrl::_wait_for(u64 usec)
 	return false;
 }
 
-[[noreturn]] void thread_base::_throw()
-{
-	std::exception_ptr ex = std::exchange(m_exception, std::exception_ptr{});
-	m_signal &= ~3;
-	m_mutex.unlock();
-	std::rethrow_exception(std::move(ex));
-}
-
-void thread_base::_notify(cond_variable thread_base::* ptr)
-{
-	m_mutex.lock_unlock();
-	(this->*ptr).notify_one();
-}
-
 thread_base::thread_base(std::string_view name)
 	: m_name(name)
 {
@@ -1811,22 +1751,6 @@ thread_base::~thread_base()
 	}
 }
 
-void thread_base::set_exception(std::exception_ptr ptr)
-{
-	std::lock_guard lock(m_mutex);
-	m_exception = ptr;
-
-	if (m_exception)
-	{
-		m_signal |= 2;
-		m_cond.notify_one();
-	}
-	else
-	{
-		m_signal &= ~2;
-	}
-}
-
 void thread_base::join() const
 {
 	if (m_state == thread_state::finished)
@@ -1842,33 +1766,13 @@ void thread_base::join() const
 	}
 }
 
-void thread_base::detach()
-{
-	auto self = weak_from_this().lock();
-
-	if (!self)
-	{
-		LOG_FATAL(GENERAL, "Cannot detach thread '%s'", get_name());
-		return;
-	}
-
-	if (self->m_state.compare_and_swap_test(thread_state::created, thread_state::detached))
-	{
-		std::lock_guard lock(m_mutex);
-
-		if (m_state == thread_state::detached)
-		{
-			m_self = std::move(self);
-		}
-	}
-}
-
 void thread_base::notify()
 {
 	if (!(m_signal & 1))
 	{
 		m_signal |= 1;
-		_notify(&thread_base::m_cond);
+		m_mutex.lock_unlock();
+		m_cond.notify_one();
 	}
 }
 
@@ -1886,16 +1790,13 @@ u64 thread_base::get_cycles()
 	{
 		cycles = static_cast<u64>(thread_time.tv_sec) * 1'000'000'000 + thread_time.tv_nsec;
 #endif
-		// Report 0 the first time this function is called
-		if (m_cycles == 0)
+		if (const u64 old_cycles = m_cycles.exchange(cycles))
 		{
-			m_cycles = cycles;
-			return 0;
+			return cycles - old_cycles;
 		}
 
-		const auto diff_cycles = cycles - m_cycles;
-		m_cycles = cycles;
-		return diff_cycles;
+		// Report 0 the first time this function is called
+		return 0;
 	}
 	else
 	{
@@ -1903,23 +1804,6 @@ u64 thread_base::get_cycles()
 	}
 }
 
-void thread_ctrl::test()
-{
-	const auto _this = g_tls_this_thread;
-
-	if (_this->m_signal & 2)
-	{
-		_this->m_mutex.lock();
-
-		if (_this->m_exception)
-		{
-			_this->_throw();
-		}
-
-		_this->m_mutex.unlock();
-	}
-}
-
 void thread_ctrl::detect_cpu_layout()
 {
 	if (!g_native_core_layout.compare_and_swap_test(native_core_arrangement::undefined, native_core_arrangement::generic))
@@ -2067,45 +1951,3 @@ void thread_ctrl::set_thread_affinity_mask(u16 mask)
 	pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cs);
 #endif
 }
-
-old_thread::old_thread()
-{
-}
-
-old_thread::~old_thread()
-{
-}
-
-std::string old_thread::get_name() const
-{
-	return fmt::format("('%s') Unnamed Thread", typeid(*this).name());
-}
-
-void old_thread::start_thread(const std::shared_ptr<void>& _this)
-{
-	// Ensure it's not called from the constructor and the correct object is passed
-	verify("old_thread::start_thread" HERE), _this.get() == this;
-
-	// Run thread
-	thread_ctrl::spawn(m_thread, get_name(), [this, _this]()
-	{
-		try
-		{
-			LOG_TRACE(GENERAL, "Thread started");
-			on_spawn();
-			on_task();
-			LOG_TRACE(GENERAL, "Thread ended");
-		}
-		catch (const std::exception& e)
-		{
-			LOG_FATAL(GENERAL, "%s thrown: %s", typeid(e).name(), e.what());
-			Emu.Pause();
-		}
-
-		on_exit();
-	});
-}
-
-task_stack::task_base::~task_base()
-{
-}
diff --git a/Utilities/Thread.h b/Utilities/Thread.h
index dca858baed..a7c400e8ef 100644
--- a/Utilities/Thread.h
+++ b/Utilities/Thread.h
@@ -3,7 +3,6 @@
 #include "types.h"
 #include "Atomic.h"
 
-#include <exception>
 #include <string>
 #include <memory>
 #include <string_view>
@@ -38,8 +37,8 @@ enum class thread_class : u32
 enum class thread_state
 {
 	created,  // Initial state
-	detached, // Set if the thread has been detached successfully (only possible via shared_ptr)
-	aborting, // Set if the thread has been joined in destructor (mutually exclusive with detached)
+	detached, // The thread has been detached to destroy its own named_thread object (can be dangerously misused)
+	aborting, // The thread has been joined in the destructor or explicitly aborted (mutually exclusive with detached)
 	finished  // Final state, always set at the end of thread execution
 };
 
@@ -89,84 +88,15 @@ struct thread_on_abort : std::bool_constant<false> {};
 template <typename T>
 struct thread_on_abort<T, decltype(std::declval<named_thread<T>&>().on_abort())> : std::bool_constant<true> {};
 
-// Detect on_cleanup() static function (should return void)
+// Detect on_cleanup() static member function (should return void) (in C++20 can use destroying delete instead)
 template <typename T, typename = void>
 struct thread_on_cleanup : std::bool_constant<false> {};
 
 template <typename T>
 struct thread_on_cleanup<T, decltype(named_thread<T>::on_cleanup(std::declval<named_thread<T>*>()))> : std::bool_constant<true> {};
 
-// Simple list of void() functors
-class task_stack
-{
-	struct task_base
-	{
-		std::unique_ptr<task_base> next;
-
-		virtual ~task_base();
-
-		virtual void invoke()
-		{
-			if (next)
-			{
-				next->invoke();
-			}
-		}
-	};
-
-	template <typename F>
-	struct task_type final : task_base
-	{
-		std::remove_reference_t<F> func;
-
-		task_type(F&& func)
-			: func(std::forward<F>(func))
-		{
-		}
-
-		void invoke() final override
-		{
-			func();
-			task_base::invoke();
-		}
-	};
-
-	std::unique_ptr<task_base> m_stack;
-
-public:
-	task_stack() = default;
-
-	template <typename F>
-	task_stack(F&& func)
-		: m_stack(new task_type<F>(std::forward<F>(func)))
-	{
-	}
-
-	void push(task_stack stack)
-	{
-		auto _top = stack.m_stack.release();
-		auto _next = m_stack.release();
-		m_stack.reset(_top);
-		while (UNLIKELY(_top->next)) _top = _top->next.get();
-		_top->next.reset(_next);
-	}
-
-	void reset()
-	{
-		m_stack.reset();
-	}
-
-	void invoke() const
-	{
-		if (m_stack)
-		{
-			m_stack->invoke();
-		}
-	}
-};
-
-// Thread base class (TODO: remove shared_ptr, make private base)
-class thread_base : public std::enable_shared_from_this<thread_base>
+// Thread base class
+class thread_base
 {
 	// Native thread entry point function type
 #ifdef _WIN32
@@ -175,9 +105,6 @@ class thread_base : public std::enable_shared_from_this<thread_base>
 	using native_entry = void*(*)(void* arg);
 #endif
 
-	// Self pointer for detached thread
-	std::shared_ptr<thread_base> m_self;
-
 	// Thread handle (platform-specific)
 	atomic_t<std::uintptr_t> m_thread{0};
 
@@ -196,71 +123,41 @@ class thread_base : public std::enable_shared_from_this<thread_base>
 	// Thread state
 	atomic_t<thread_state> m_state = thread_state::created;
 
-	// Remotely set or caught exception
-	std::exception_ptr m_exception;
-
-	// Thread initial task
-	task_stack m_task;
-
 	// Thread name
 	lf_value<std::string> m_name;
 
-	// CPU cycles thread has run for
-	u64 m_cycles{0};
+	//
+	atomic_t<u64> m_cycles = 0;
 
 	// Start thread
-	static void start(const std::shared_ptr<thread_base>&, task_stack);
-
 	void start(native_entry);
 
 	// Called at the thread start
 	void initialize();
 
-	// Called at the thread end, returns moved m_self (may be null)
-	std::shared_ptr<thread_base> finalize(std::exception_ptr) noexcept;
+	// Called at the thread end, returns true if needs destruction
+	bool finalize(int) noexcept;
 
+	// Cleanup after possibly deleting the thread instance
 	static void finalize() noexcept;
 
-	// Internal throwing function. Mutex must be locked and will be unlocked.
-	[[noreturn]] void _throw();
-
-	// Internal notification function
-	void _notify(cond_variable thread_base::*);
-
 	friend class thread_ctrl;
 
 	template <class Context>
 	friend class named_thread;
 
-public:
+protected:
 	thread_base(std::string_view name);
 
 	~thread_base();
 
-	// Get thread name
-	const std::string& get_name() const
-	{
-		return m_name;
-	}
-
-	// Set thread name (not recommended)
-	void set_name(std::string_view name)
-	{
-		m_name.assign(name);
-	}
-
+public:
 	// Get CPU cycles since last time this function was called. First call returns 0.
 	u64 get_cycles();
 
-	// Set exception
-	void set_exception(std::exception_ptr ptr);
-
 	// Wait for the thread (it does NOT change thread state, and can be called from multiple threads)
 	void join() const;
 
-	// Make thread to manage a shared_ptr of itself
-	void detach();
-
 	// Notify the thread
 	void notify();
 };
@@ -306,25 +203,37 @@ public:
 		static_cast<thread_base&>(thread).m_name.assign(name);
 	}
 
+	template <typename T>
+	static u64 get_cycles(named_thread<T>& thread)
+	{
+		return static_cast<thread_base&>(thread).get_cycles();
+	}
+
+	template <typename T>
+	static void notify(named_thread<T>& thread)
+	{
+		static_cast<thread_base&>(thread).notify();
+	}
+
 	// Read current state
 	static inline thread_state state()
 	{
 		return g_tls_this_thread->m_state;
 	}
 
-	// Wait once with timeout. Abortable, may throw. May spuriously return false.
+	// Wait once with timeout. May spuriously return false.
 	static inline bool wait_for(u64 usec)
 	{
 		return _wait_for(usec);
 	}
 
-	// Wait. Abortable, may throw.
+	// Wait.
 	static inline void wait()
 	{
 		_wait_for(-1);
 	}
 
-	// Wait until pred(). Abortable, may throw.
+	// Wait until pred().
 	template <typename F, typename RT = std::invoke_result_t<F>>
 	static inline RT wait(F&& pred)
 	{
@@ -339,42 +248,12 @@ public:
 		}
 	}
 
-	// Wait eternally until aborted.
-	[[noreturn]] static inline void eternalize()
-	{
-		while (true)
-		{
-			_wait_for(-1);
-		}
-	}
-
-	// Test exception (may throw).
-	static void test();
-
 	// Get current thread (may be nullptr)
 	static thread_base* get_current()
 	{
 		return g_tls_this_thread;
 	}
 
-	// Create detached named thread
-	template <typename N, typename F>
-	static inline void spawn(N&& name, F&& func)
-	{
-		auto out = std::make_shared<thread_base>(std::forward<N>(name));
-
-		thread_base::start(out, std::forward<F>(func));
-	}
-
-	// Named thread factory
-	template <typename N, typename F>
-	static inline void spawn(std::shared_ptr<thread_base>& out, N&& name, F&& func)
-	{
-		out = std::make_shared<thread_base>(std::forward<N>(name));
-
-		thread_base::start(out, std::forward<F>(func));
-	}
-
 	// Detect layout
 	static void detect_cpu_layout();
 
@@ -387,22 +266,17 @@ public:
 	// Sets the preferred affinity mask for this thread
 	static void set_thread_affinity_mask(u16 mask);
 
+	// Spawn a detached named thread
 	template <typename F>
-	static inline std::shared_ptr<named_thread<F>> make_shared(std::string_view name, F&& lambda)
+	static void spawn(std::string_view name, F&& func)
 	{
-		return std::make_shared<named_thread<F>>(name, std::forward<F>(lambda));
-	}
-
-	template <typename T, typename... Args>
-	static inline std::shared_ptr<named_thread<T>> make_shared(std::string_view name, Args&&... args)
-	{
-		return std::make_shared<named_thread<T>>(name, std::forward<Args>(args)...);
+		new named_thread<F>(thread_state::detached, name, std::forward<F>(func));
 	}
 };
 
 // Derived from the callable object Context, possibly a lambda
 template <class Context>
-class named_thread final : public Context, result_storage_t<Context>, public thread_base
+class named_thread final : public Context, result_storage_t<Context>, thread_base
 {
 	using result = result_storage_t<Context>;
 	using thread = thread_base;
@@ -414,7 +288,22 @@ class named_thread final : public Context, result_storage_t<Context>, public thr
 	static inline void* entry_point(void* arg) try
 #endif
 	{
-		const auto maybe_last_ptr = static_cast<named_thread*>(static_cast<thread*>(arg))->entry_point();
+		const auto _this = static_cast<named_thread*>(static_cast<thread*>(arg));
+
+		// Perform self-cleanup if necessary
+		if (_this->entry_point())
+		{
+			// Call on_cleanup() static member function if it's available
+			if constexpr (thread_on_cleanup<Context>())
+			{
+				Context::on_cleanup(_this);
+			}
+			else
+			{
+				delete _this;
+			}
+		}
+
 		thread::finalize();
 		return 0;
 	}
@@ -423,7 +312,7 @@ class named_thread final : public Context, result_storage_t<Context>, public thr
 		catch_all_exceptions();
 	}
 
-	std::shared_ptr<thread> entry_point()
+	bool entry_point()
 	{
 		thread::initialize();
 
@@ -438,7 +327,16 @@ class named_thread final : public Context, result_storage_t<Context>, public thr
 			new (result::get()) typename result::type(Context::operator()());
 		}
 
-		return thread::finalize(nullptr);
+		return thread::finalize(0);
+	}
+
+	// Detached thread constructor
+	named_thread(thread_state s, std::string_view name, Context&& f)
+		: Context(std::forward<Context>(f))
+		, thread(name)
+	{
+		thread::m_state.raw() = s;
+		thread::start(&named_thread::entry_point);
 	}
 
 	friend class thread_ctrl;
@@ -493,21 +391,23 @@ public:
 		return thread::m_state.load();
 	}
 
-	// Try to set thread_state::aborting
+	// Try to abort/detach
 	named_thread& operator=(thread_state s)
 	{
-		if (s != thread_state::aborting)
+		if (s != thread_state::aborting && s != thread_state::detached)
 		{
 			ASSUME(0);
 		}
 
-		// Notify thread if not detached or terminated
-		if (thread::m_state.compare_and_swap_test(thread_state::created, thread_state::aborting))
+		if (thread::m_state.compare_and_swap_test(thread_state::created, s))
 		{
-			// Call on_abort() method if it's available
-			if constexpr (thread_on_abort<Context>())
+			if (s == thread_state::aborting)
 			{
-				Context::on_abort();
+				// Call on_abort() method if it's available
+				if constexpr (thread_on_abort<Context>())
+				{
+					Context::on_abort();
+				}
 			}
 
 			thread::notify();
@@ -528,63 +428,3 @@ public:
 		}
 	}
 };
-
-// Old named_thread
-class old_thread
-{
-	// Pointer to managed resource (shared with actual thread)
-	std::shared_ptr<thread_base> m_thread;
-
-public:
-	old_thread();
-
-	virtual ~old_thread();
-
-	old_thread(const old_thread&) = delete;
-
-	old_thread& operator=(const old_thread&) = delete;
-
-	// Get thread name
-	virtual std::string get_name() const;
-
-protected:
-	// Start thread (cannot be called from the constructor: should throw in such case)
-	void start_thread(const std::shared_ptr<void>& _this);
-
-	// Thread task (called in the thread)
-	virtual void on_task() = 0;
-
-	// Thread finalization (called after on_task)
-	virtual void on_exit() {}
-
-	// Called once upon thread spawn within the thread's own context
-	virtual void on_spawn() {}
-
-public:
-	// ID initialization
-	virtual void on_init(const std::shared_ptr<void>& _this)
-	{
-		return start_thread(_this);
-	}
-
-	// ID finalization
-	virtual void on_stop()
-	{
-		m_thread->join();
-	}
-
-	thread_base* get() const
-	{
-		return m_thread.get();
-	}
-
-	void join() const
-	{
-		return m_thread->join();
-	}
-
-	void notify() const
-	{
-		return m_thread->notify();
-	}
-};
diff --git a/Utilities/cond.cpp b/Utilities/cond.cpp
index 0694c32dab..c68249aff0 100644
--- a/Utilities/cond.cpp
+++ b/Utilities/cond.cpp
@@ -21,7 +21,7 @@ bool cond_variable::imp_wait(u32 _old, u64 _timeout) noexcept
 		verify(HERE), rc == WAIT_TIMEOUT;
 
 		// Retire
-		while (!m_value.fetch_dec_sat())
+		while (!m_value.try_dec())
 		{
 			timeout.QuadPart = 0;
 
diff --git a/Utilities/sema.h b/Utilities/sema.h
index 7b7d97c990..755bccf000 100644
--- a/Utilities/sema.h
+++ b/Utilities/sema.h
@@ -34,7 +34,7 @@ protected:
 
 	bool try_wait()
 	{
-		return m_value.fetch_dec_sat(0) > 0;
+		return m_value.try_dec(0);
 	}
 
 	void post(s32 _max)
diff --git a/Utilities/typemap.h b/Utilities/typemap.h
index 4034b19e6e..d12a4074f7 100644
--- a/Utilities/typemap.h
+++ b/Utilities/typemap.h
@@ -774,10 +774,8 @@ namespace utils
 					// If max_count > 1 only id_new is supported
 					static_assert(std::is_same_v<id_tag, id_new_t> && !std::is_const_v<std::remove_reference_t<Type>>);
 
-					// Try to acquire the semaphore (conditional increment)
-					const uint old_sema = head->m_sema.load();
-
-					if (UNLIKELY(old_sema > last || !head->m_sema.compare_and_swap_test(old_sema, old_sema + 1)))
+					// Try to acquire the semaphore
+					if (UNLIKELY(!head->m_sema.try_inc(last + 1)))
 					{
 						block = nullptr;
 					}
@@ -1225,7 +1223,7 @@ namespace utils
 		template <typename Type>
 		std::shared_lock<::notifier> get_free_notifier() const
 		{
-			return std::shared_lock{get_head<Type>()->m_free_notifier};
+			return std::shared_lock(get_head<Type>()->m_free_notifier, std::try_to_lock);
 		}
 	};
 } // namespace utils
diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp
index 78fcac747d..c6ba7d3de7 100644
--- a/rpcs3/Emu/CPU/CPUThread.cpp
+++ b/rpcs3/Emu/CPU/CPUThread.cpp
@@ -4,11 +4,8 @@
 #include "CPUThread.h"
 #include "Emu/IdManager.h"
 #include "Utilities/GDBDebugServer.h"
-#include <typeinfo>
-
-#ifdef _WIN32
-#include <Windows.h>
-#endif
+#include "Emu/Cell/PPUThread.h"
+#include "Emu/Cell/SPUThread.h"
 
 DECLARE(cpu_thread::g_threads_created){0};
 DECLARE(cpu_thread::g_threads_deleted){0};
@@ -45,12 +42,22 @@ void fmt_class_string<bs_t<cpu_flag>>::format(std::string& out, u64 arg)
 
 thread_local cpu_thread* g_tls_current_cpu_thread = nullptr;
 
-void cpu_thread::on_task()
+void cpu_thread::operator()()
 {
 	state -= cpu_flag::exit;
 
 	g_tls_current_cpu_thread = this;
 
+	if (g_cfg.core.thread_scheduler_enabled)
+	{
+		thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(id_type() == 1 ? thread_class::ppu : thread_class::spu));
+	}
+
+	if (g_cfg.core.lower_spu_priority && id_type() == 2)
+	{
+		thread_ctrl::set_native_priority(-1);
+	}
+
 	// Check thread status
 	while (!(state & (cpu_flag::exit + cpu_flag::dbg_global_stop)))
 	{
@@ -65,10 +72,12 @@ void cpu_thread::on_task()
 			{
 				state += _s;
 			}
-			catch (const std::exception&)
+			catch (const std::exception& e)
 			{
+				LOG_FATAL(GENERAL, "%s thrown: %s", typeid(e).name(), e.what());
 				LOG_NOTICE(GENERAL, "\n%s", dump());
-				throw;
+				Emu.Pause();
+				break;
 			}
 
 			state -= cpu_flag::ret;
@@ -79,10 +88,9 @@ void cpu_thread::on_task()
 	}
 }
 
-void cpu_thread::on_stop()
+void cpu_thread::on_abort()
 {
 	state += cpu_flag::exit;
-	notify();
 }
 
 cpu_thread::~cpu_thread()
@@ -132,7 +140,7 @@ bool cpu_thread::check_state()
 			cpu_sleep_called = false;
 		}
 
-		if (!(state & cpu_state_pause))
+		if (!is_paused())
 		{
 			if (cpu_flag_memory)
 			{
@@ -167,21 +175,20 @@ bool cpu_thread::check_state()
 	return false;
 }
 
-void cpu_thread::test_state()
+void cpu_thread::notify()
 {
-	if (UNLIKELY(state))
+	if (id_type() == 1)
 	{
-		if (check_state())
-		{
-			throw cpu_flag::ret;
-		}
+		thread_ctrl::notify(*static_cast<named_thread<ppu_thread>*>(this));
+	}
+	else if (id_type() == 2)
+	{
+		thread_ctrl::notify(*static_cast<named_thread<spu_thread>*>(this));
+	}
+	else
+	{
+		fmt::throw_exception("Invalid cpu_thread type");
 	}
-}
-
-void cpu_thread::run()
-{
-	state -= cpu_flag::stop;
-	notify();
 }
 
 std::string cpu_thread::dump() const
diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h
index 8272be5d45..5d270eef70 100644
--- a/rpcs3/Emu/CPU/CPUThread.h
+++ b/rpcs3/Emu/CPU/CPUThread.h
@@ -21,32 +21,53 @@ enum class cpu_flag : u32
 	__bitset_enum_max
 };
 
-// Flag set for pause state
-constexpr bs_t<cpu_flag> cpu_state_pause = cpu_flag::suspend + cpu_flag::dbg_global_pause + cpu_flag::dbg_pause;
-
-class cpu_thread : public old_thread
+class cpu_thread
 {
-	void on_task() override final;
+	// PPU cache backward compatibility hack
+	char dummy[sizeof(std::shared_ptr<void>)];
+
+protected:
+	cpu_thread(u32 id);
 
 public:
-	virtual void on_stop() override;
-	virtual ~cpu_thread() override;
+	virtual ~cpu_thread();
+	void operator()();
+	void on_abort();
 
+	// Self identifier
 	const u32 id;
 
-	cpu_thread(u32 id);
-
 	// Public thread state
 	atomic_bs_t<cpu_flag> state{+cpu_flag::stop};
 
 	// Process thread state, return true if the checker must return
 	bool check_state();
 
-	// Process thread state
-	void test_state();
+	// Process thread state (pause)
+	[[nodiscard]] bool test_stopped()
+	{
+		if (UNLIKELY(state))
+		{
+			if (check_state())
+			{
+				return true;
+			}
+		}
 
-	// Run thread
-	void run();
+		return false;
+	}
+
+	// Test stopped state
+	bool is_stopped()
+	{
+		return !!(state & (cpu_flag::stop + cpu_flag::exit + cpu_flag::dbg_global_stop));
+	}
+
+	// Test paused state
+	bool is_paused()
+	{
+		return !!(state & (cpu_flag::suspend + cpu_flag::dbg_global_pause + cpu_flag::dbg_pause));
+	}
 
 	// Check thread type
 	u32 id_type()
@@ -54,10 +75,16 @@ public:
 		return id >> 24;
 	}
 
+	// Upcast and notify
+	void notify();
+
 	// Thread stats for external observation
 	static atomic_t<u64> g_threads_created, g_threads_deleted;
 
-	// Print CPU state
+	// Get thread name
+	virtual std::string get_name() const = 0;
+
+	// Get CPU state dump
 	virtual std::string dump() const;
 
 	// Thread entry point function
@@ -79,3 +106,6 @@ inline cpu_thread* get_current_cpu_thread() noexcept
 
 	return g_tls_current_cpu_thread;
 }
+
+class ppu_thread;
+class spu_thread;
diff --git a/rpcs3/Emu/Cell/Modules/cellAdec.cpp b/rpcs3/Emu/Cell/Modules/cellAdec.cpp
index b74c31c4ec..8c6ea9054d 100644
--- a/rpcs3/Emu/Cell/Modules/cellAdec.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellAdec.cpp
@@ -71,7 +71,7 @@ public:
 	bool use_ats_headers;
 
 	AudioDecoder(s32 type, u32 addr, u32 size, vm::ptr<CellAdecCbMsg> func, u32 arg)
-		: ppu_thread("HLE Audio Decoder")
+		: ppu_thread({}, "", 0)
 		, type(type)
 		, memAddr(addr)
 		, memSize(size)
@@ -159,7 +159,7 @@ public:
 		}
 	}
 
-	virtual void cpu_task() override
+	void non_task()
 	{
 		while (true)
 		{
@@ -564,13 +564,7 @@ s32 cellAdecOpen(vm::ptr<CellAdecType> type, vm::ptr<CellAdecResource> res, vm::
 		return CELL_ADEC_ERROR_ARG;
 	}
 
-	auto&& adec = idm::make_ptr<ppu_thread, AudioDecoder>(type->audioCodecType, res->startAddr, res->totalMemSize, cb->cbFunc, cb->cbArg);
-
-	*handle = adec->id;
-
-	adec->run();
-
-	return CELL_OK;
+	fmt::throw_exception("cellAdec disabled, use LLE.");
 }
 
 s32 cellAdecOpenEx(vm::ptr<CellAdecType> type, vm::ptr<CellAdecResourceEx> res, vm::ptr<CellAdecCb> cb, vm::ptr<u32> handle)
@@ -582,13 +576,7 @@ s32 cellAdecOpenEx(vm::ptr<CellAdecType> type, vm::ptr<CellAdecResourceEx> res,
 		return CELL_ADEC_ERROR_ARG;
 	}
 
-	auto&& adec = idm::make_ptr<ppu_thread, AudioDecoder>(type->audioCodecType, res->startAddr, res->totalMemSize, cb->cbFunc, cb->cbArg);
-
-	*handle = adec->id;
-
-	adec->run();
-
-	return CELL_OK;
+	fmt::throw_exception("cellAdec disabled, use LLE.");
 }
 
 s32 cellAdecOpenExt(vm::ptr<CellAdecType> type, vm::ptr<CellAdecResourceEx> res, vm::ptr<CellAdecCb> cb, vm::ptr<u32> handle)
diff --git a/rpcs3/Emu/Cell/Modules/cellAudio.cpp b/rpcs3/Emu/Cell/Modules/cellAudio.cpp
index c5da498547..e38362a565 100644
--- a/rpcs3/Emu/Cell/Modules/cellAudio.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellAudio.cpp
@@ -349,7 +349,7 @@ error_code cellAudioInit()
 	return CELL_OK;
 }
 
-error_code cellAudioQuit()
+error_code cellAudioQuit(ppu_thread& ppu)
 {
 	cellAudio.warning("cellAudioQuit()");
 
@@ -367,6 +367,11 @@ error_code cellAudioQuit()
 
 	while (true)
 	{
+		if (ppu.is_stopped())
+		{
+			return 0;
+		}
+
 		thread_ctrl::wait_for(1000);
 
 		auto g_audio = g_idm->lock<named_thread<audio_thread>>(0);
diff --git a/rpcs3/Emu/Cell/Modules/cellDmux.cpp b/rpcs3/Emu/Cell/Modules/cellDmux.cpp
index c0124bd90c..9aaa7cfd52 100644
--- a/rpcs3/Emu/Cell/Modules/cellDmux.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellDmux.cpp
@@ -194,7 +194,7 @@ public:
 	atomic_t<bool> is_working;
 
 	Demuxer(u32 addr, u32 size, vm::ptr<CellDmuxCbMsg> func, u32 arg)
-		: ppu_thread("HLE Demuxer")
+		: ppu_thread({}, "", 0)
 		, is_finished(false)
 		, is_closed(false)
 		, is_running(false)
@@ -206,7 +206,7 @@ public:
 	{
 	}
 
-	virtual void cpu_task() override
+	void non_task()
 	{
 		DemuxerTask task;
 		DemuxerStream stream = {};
@@ -987,13 +987,7 @@ s32 cellDmuxOpen(vm::cptr<CellDmuxType> type, vm::cptr<CellDmuxResource> res, vm
 	}
 
 	// TODO: check demuxerResource and demuxerCb arguments
-	auto&& dmux = idm::make_ptr<ppu_thread, Demuxer>(res->memAddr, res->memSize, cb->cbMsgFunc, cb->cbArg);
-
-	*handle = dmux->id;
-
-	dmux->run();
-
-	return CELL_OK;
+	fmt::throw_exception("cellDmux disabled, use LLE.");
 }
 
 s32 cellDmuxOpenEx(vm::cptr<CellDmuxType> type, vm::cptr<CellDmuxResourceEx> resEx, vm::cptr<CellDmuxCb> cb, vm::ptr<u32> handle)
@@ -1006,13 +1000,7 @@ s32 cellDmuxOpenEx(vm::cptr<CellDmuxType> type, vm::cptr<CellDmuxResourceEx> res
 	}
 
 	// TODO: check demuxerResourceEx and demuxerCb arguments
-	auto&& dmux = idm::make_ptr<ppu_thread, Demuxer>(resEx->memAddr, resEx->memSize, cb->cbMsgFunc, cb->cbArg);
-
-	*handle = dmux->id;
-
-	dmux->run();
-
-	return CELL_OK;
+	fmt::throw_exception("cellDmux disabled, use LLE.");
 }
 
 s32 cellDmuxOpenExt(vm::cptr<CellDmuxType> type, vm::cptr<CellDmuxResourceEx> resEx, vm::cptr<CellDmuxCb> cb, vm::ptr<u32> handle)
@@ -1032,13 +1020,7 @@ s32 cellDmuxOpen2(vm::cptr<CellDmuxType2> type2, vm::cptr<CellDmuxResource2> res
 	}
 
 	// TODO: check demuxerType2, demuxerResource2 and demuxerCb arguments
-	auto&& dmux = idm::make_ptr<ppu_thread, Demuxer>(res2->memAddr, res2->memSize, cb->cbMsgFunc, cb->cbArg);
-
-	*handle = dmux->id;
-
-	dmux->run();
-
-	return CELL_OK;
+	fmt::throw_exception("cellDmux disabled, use LLE.");
 }
 
 s32 cellDmuxClose(u32 handle)
diff --git a/rpcs3/Emu/Cell/Modules/cellFs.cpp b/rpcs3/Emu/Cell/Modules/cellFs.cpp
index efddcbd76f..1931b3940c 100644
--- a/rpcs3/Emu/Cell/Modules/cellFs.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellFs.cpp
@@ -869,7 +869,7 @@ struct fs_aio_thread : ppu_thread
 {
 	using ppu_thread::ppu_thread;
 
-	virtual void cpu_task() override
+	void non_task()
 	{
 		while (cmd64 cmd = cmd_wait())
 		{
@@ -920,11 +920,7 @@ s32 cellFsAioInit(vm::cptr<char> mount_point)
 	// TODO: create AIO thread (if not exists) for specified mount point
 	const auto m = fxm::make<fs_aio_manager>();
 
-	if (m)
-	{
-		m->thread = idm::make_ptr<ppu_thread, fs_aio_thread>("FS AIO Thread", 500);
-		m->thread->run();
-	}
+	fmt::throw_exception("cellFsAio disabled, use LLE.");
 
 	return CELL_OK;
 }
@@ -961,8 +957,6 @@ s32 cellFsAioRead(vm::ptr<CellFsAio> aio, vm::ptr<s32> id, fs_aio_cb_t func)
 		{ aio, func },
 	});
 
-	m->thread->notify();
-
 	return CELL_OK;
 }
 
@@ -987,8 +981,6 @@ s32 cellFsAioWrite(vm::ptr<CellFsAio> aio, vm::ptr<s32> id, fs_aio_cb_t func)
 		{ aio, func },
 	});
 
-	m->thread->notify();
-
 	return CELL_OK;
 }
 
diff --git a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp
index 0bf640c4d4..7d30c3a983 100644
--- a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp
@@ -6,6 +6,7 @@
 #include "Emu/Memory/vm.h"
 #include "Emu/RSX/GSRender.h"
 #include "cellGcmSys.h"
+#include "sysPrxForUser.h"
 
 #include <thread>
 
@@ -346,7 +347,7 @@ void _cellGcmFunc15(vm::ptr<CellGcmContextData> context)
 u32 g_defaultCommandBufferBegin, g_defaultCommandBufferFragmentCount;
 
 // Called by cellGcmInit
-s32 _cellGcmInitBody(vm::pptr<CellGcmContextData> context, u32 cmdSize, u32 ioSize, u32 ioAddress)
+s32 _cellGcmInitBody(ppu_thread& ppu, vm::pptr<CellGcmContextData> context, u32 cmdSize, u32 ioSize, u32 ioAddress)
 {
 	cellGcmSys.warning("_cellGcmInitBody(context=**0x%x, cmdSize=0x%x, ioSize=0x%x, ioAddress=0x%x)", context, cmdSize, ioSize, ioAddress);
 
@@ -429,8 +430,10 @@ s32 _cellGcmInitBody(vm::pptr<CellGcmContextData> context, u32 cmdSize, u32 ioSi
 	ctrl.get = 0;
 	ctrl.ref = 0; // Set later to -1 at RSX initialization
 
-	render->intr_thread = idm::make_ptr<ppu_thread>("_gcm_intr_thread", 1, 0x4000);
-	render->intr_thread->run();
+	vm::var<u64> _tid;
+	vm::var<char[]> _name = vm::make_str("_gcm_intr_thread");
+	ppu_execute<&sys_ppu_thread_create>(ppu, +_tid, 128, 0, 1, 0x4000, 0, +_name);
+	render->intr_thread = idm::get<named_thread<ppu_thread>>(*_tid);
 	render->main_mem_addr = 0;
 	render->isHLE = true;
 	render->label_addr = m_config->gcm_info.label_addr;
@@ -1380,7 +1383,11 @@ s32 cellGcmCallback(ppu_thread& ppu, vm::ptr<CellGcmContextData> context, u32 co
 		if (isInCommandBufferExcept(getPos, newCommandBuffer.first, newCommandBuffer.second))
 			break;
 
-		ppu.test_state();
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
+
 		busy_wait();
 	}
 
diff --git a/rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp b/rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp
index 302789c85d..183a508247 100644
--- a/rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp
@@ -242,7 +242,7 @@ s32 cellMsgDialogClose(f32 delay)
 	{
 		if (auto dlg = manager->get<rsx::overlays::message_dialog>())
 		{
-			thread_ctrl::make_shared("cellMsgDialogClose() Thread", [=]
+			thread_ctrl::spawn("cellMsgDialogClose() Thread", [=]
 			{
 				while (get_system_time() < wait_until)
 				{
@@ -256,7 +256,7 @@ s32 cellMsgDialogClose(f32 delay)
 				}
 
 				dlg->close();
-			})->detach();
+			});
 
 			return CELL_OK;
 		}
@@ -269,7 +269,7 @@ s32 cellMsgDialogClose(f32 delay)
 		return CELL_MSGDIALOG_ERROR_DIALOG_NOT_OPENED;
 	}
 
-	thread_ctrl::make_shared("cellMsgDialogClose() Thread", [=]()
+	thread_ctrl::spawn("cellMsgDialogClose() Thread", [=]()
 	{
 		while (dlg->state == MsgDialogState::Open && get_system_time() < wait_until)
 		{
@@ -279,7 +279,7 @@ s32 cellMsgDialogClose(f32 delay)
 		}
 
 		dlg->on_close(CELL_MSGDIALOG_BUTTON_NONE);
-	})->detach();
+	});
 
 	return CELL_OK;
 }
diff --git a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp
index 98756e8f96..a7245ec909 100644
--- a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp
@@ -36,7 +36,7 @@ struct cell_error_t
 // Function prototypes
 //----------------------------------------------------------------------------
 
-bool spursKernelEntry(SPUThread& spu);
+bool spursKernelEntry(spu_thread& spu);
 
 // SPURS Internals
 namespace _spurs
@@ -599,18 +599,18 @@ s32 _spurs::create_handler(vm::ptr<CellSpurs> spurs, u32 ppuPriority)
 	{
 		using ppu_thread::ppu_thread;
 
-		virtual void cpu_task() override
+		void non_task()
 		{
 			BIND_FUNC(_spurs::handler_entry)(*this);
 		}
 	};
 
-	auto&& eht = idm::make_ptr<ppu_thread, handler_thread>(std::string(spurs->prefix, spurs->prefixSize) + "SpursHdlr0", ppuPriority, 0x4000);
+	// auto eht = idm::make_ptr<ppu_thread, handler_thread>(std::string(spurs->prefix, spurs->prefixSize) + "SpursHdlr0", ppuPriority, 0x4000);
 
-	spurs->ppu0 = eht->id;
+	// spurs->ppu0 = eht->id;
 
-	eht->gpr[3] = spurs.addr();
-	eht->run();
+	// eht->gpr[3] = spurs.addr();
+	// eht->run();
 
 	return CELL_OK;
 }
@@ -796,15 +796,15 @@ s32 _spurs::create_event_helper(ppu_thread& ppu, vm::ptr<CellSpurs> spurs, u32 p
 	{
 		using ppu_thread::ppu_thread;
 
-		virtual void cpu_task() override
+		void non_task()
 		{
 			BIND_FUNC(_spurs::event_helper_entry)(*this);
 		}
 	};
 
-	auto&& eht = idm::make_ptr<ppu_thread, event_helper_thread>(std::string(spurs->prefix, spurs->prefixSize) + "SpursHdlr1", ppuPriority, 0x8000);
+	//auto eht = idm::make_ptr<ppu_thread, event_helper_thread>(std::string(spurs->prefix, spurs->prefixSize) + "SpursHdlr1", ppuPriority, 0x8000);
 
-	if (!eht)
+	//if (!eht)
 	{
 		sys_event_port_disconnect(spurs->eventPort);
 		sys_event_port_destroy(spurs->eventPort);
@@ -818,10 +818,10 @@ s32 _spurs::create_event_helper(ppu_thread& ppu, vm::ptr<CellSpurs> spurs, u32 p
 		return CELL_SPURS_CORE_ERROR_STAT;
 	}
 
-	eht->gpr[3] = spurs.addr();
-	eht->run();
+	// eht->gpr[3] = spurs.addr();
+	// eht->run();
 
-	spurs->ppu1 = eht->id;
+	// spurs->ppu1 = eht->id;
 	return CELL_OK;
 }
 
@@ -1118,7 +1118,7 @@ s32 _spurs::initialize(ppu_thread& ppu, vm::ptr<CellSpurs> spurs, u32 revision,
 		}
 
 		// entry point cannot be initialized immediately because SPU LS will be rewritten by sys_spu_thread_group_start()
-		//idm::get<SPUThread>(spurs->spus[num])->custom_task = [entry = spurs->spuImg.entry_point](SPUThread& spu)
+		//idm::get<named_thread<spu_thread>>(spurs->spus[num])->custom_task = [entry = spurs->spuImg.entry_point](spu_thread& spu)
 		{
 			// Disabled
 			//spu.RegisterHleFunction(entry, spursKernelEntry);
diff --git a/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp b/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp
index b4a1a6cf86..133de86a44 100644
--- a/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp
@@ -26,57 +26,57 @@ extern logs::channel cellSpurs;
 // SPURS utility functions
 //
 static void cellSpursModulePutTrace(CellSpursTracePacket* packet, u32 dmaTagId);
-static u32 cellSpursModulePollStatus(SPUThread& spu, u32* status);
-static void cellSpursModuleExit(SPUThread& spu);
+static u32 cellSpursModulePollStatus(spu_thread& spu, u32* status);
+static void cellSpursModuleExit(spu_thread& spu);
 
-static bool spursDma(SPUThread& spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag);
-static u32 spursDmaGetCompletionStatus(SPUThread& spu, u32 tagMask);
-static u32 spursDmaWaitForCompletion(SPUThread& spu, u32 tagMask, bool waitForAll = true);
-static void spursHalt(SPUThread& spu);
+static bool spursDma(spu_thread& spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag);
+static u32 spursDmaGetCompletionStatus(spu_thread& spu, u32 tagMask);
+static u32 spursDmaWaitForCompletion(spu_thread& spu, u32 tagMask, bool waitForAll = true);
+static void spursHalt(spu_thread& spu);
 
 //
 // SPURS kernel functions
 //
-static bool spursKernel1SelectWorkload(SPUThread& spu);
-static bool spursKernel2SelectWorkload(SPUThread& spu);
-static void spursKernelDispatchWorkload(SPUThread& spu, u64 widAndPollStatus);
-static bool spursKernelWorkloadExit(SPUThread& spu);
-bool spursKernelEntry(SPUThread& spu);
+static bool spursKernel1SelectWorkload(spu_thread& spu);
+static bool spursKernel2SelectWorkload(spu_thread& spu);
+static void spursKernelDispatchWorkload(spu_thread& spu, u64 widAndPollStatus);
+static bool spursKernelWorkloadExit(spu_thread& spu);
+bool spursKernelEntry(spu_thread& spu);
 
 //
 // SPURS system workload functions
 //
-static bool spursSysServiceEntry(SPUThread& spu);
+static bool spursSysServiceEntry(spu_thread& spu);
 // TODO: Exit
-static void spursSysServiceIdleHandler(SPUThread& spu, SpursKernelContext* ctxt);
-static void spursSysServiceMain(SPUThread& spu, u32 pollStatus);
-static void spursSysServiceProcessRequests(SPUThread& spu, SpursKernelContext* ctxt);
-static void spursSysServiceActivateWorkload(SPUThread& spu, SpursKernelContext* ctxt);
+static void spursSysServiceIdleHandler(spu_thread& spu, SpursKernelContext* ctxt);
+static void spursSysServiceMain(spu_thread& spu, u32 pollStatus);
+static void spursSysServiceProcessRequests(spu_thread& spu, SpursKernelContext* ctxt);
+static void spursSysServiceActivateWorkload(spu_thread& spu, SpursKernelContext* ctxt);
 // TODO: Deactivate workload
-static void spursSysServiceUpdateShutdownCompletionEvents(SPUThread& spu, SpursKernelContext* ctxt, u32 wklShutdownBitSet);
-static void spursSysServiceTraceSaveCount(SPUThread& spu, SpursKernelContext* ctxt);
-static void spursSysServiceTraceUpdate(SPUThread& spu, SpursKernelContext* ctxt, u32 arg2, u32 arg3, u32 forceNotify);
+static void spursSysServiceUpdateShutdownCompletionEvents(spu_thread& spu, SpursKernelContext* ctxt, u32 wklShutdownBitSet);
+static void spursSysServiceTraceSaveCount(spu_thread& spu, SpursKernelContext* ctxt);
+static void spursSysServiceTraceUpdate(spu_thread& spu, SpursKernelContext* ctxt, u32 arg2, u32 arg3, u32 forceNotify);
 // TODO: Deactivate trace
 // TODO: System workload entry
-static void spursSysServiceCleanupAfterSystemWorkload(SPUThread& spu, SpursKernelContext* ctxt);
+static void spursSysServiceCleanupAfterSystemWorkload(spu_thread& spu, SpursKernelContext* ctxt);
 
 //
 // SPURS taskset policy module functions
 //
-static bool spursTasksetEntry(SPUThread& spu);
-static bool spursTasksetSyscallEntry(SPUThread& spu);
-static void spursTasksetResumeTask(SPUThread& spu);
-static void spursTasksetStartTask(SPUThread& spu, CellSpursTaskArgument& taskArgs);
-static s32 spursTasksetProcessRequest(SPUThread& spu, s32 request, u32* taskId, u32* isWaiting);
-static void spursTasksetProcessPollStatus(SPUThread& spu, u32 pollStatus);
-static bool spursTasksetPollStatus(SPUThread& spu);
-static void spursTasksetExit(SPUThread& spu);
-static void spursTasksetOnTaskExit(SPUThread& spu, u64 addr, u32 taskId, s32 exitCode, u64 args);
-static s32 spursTasketSaveTaskContext(SPUThread& spu);
-static void spursTasksetDispatch(SPUThread& spu);
-static s32 spursTasksetProcessSyscall(SPUThread& spu, u32 syscallNum, u32 args);
-static void spursTasksetInit(SPUThread& spu, u32 pollStatus);
-static s32 spursTasksetLoadElf(SPUThread& spu, u32* entryPoint, u32* lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments);
+static bool spursTasksetEntry(spu_thread& spu);
+static bool spursTasksetSyscallEntry(spu_thread& spu);
+static void spursTasksetResumeTask(spu_thread& spu);
+static void spursTasksetStartTask(spu_thread& spu, CellSpursTaskArgument& taskArgs);
+static s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* isWaiting);
+static void spursTasksetProcessPollStatus(spu_thread& spu, u32 pollStatus);
+static bool spursTasksetPollStatus(spu_thread& spu);
+static void spursTasksetExit(spu_thread& spu);
+static void spursTasksetOnTaskExit(spu_thread& spu, u64 addr, u32 taskId, s32 exitCode, u64 args);
+static s32 spursTasketSaveTaskContext(spu_thread& spu);
+static void spursTasksetDispatch(spu_thread& spu);
+static s32 spursTasksetProcessSyscall(spu_thread& spu, u32 syscallNum, u32 args);
+static void spursTasksetInit(spu_thread& spu, u32 pollStatus);
+static s32 spursTasksetLoadElf(spu_thread& spu, u32* entryPoint, u32* lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments);
 
 //----------------------------------------------------------------------------
 // SPURS utility functions
@@ -89,7 +89,7 @@ void cellSpursModulePutTrace(CellSpursTracePacket* packet, u32 dmaTagId)
 }
 
 // Check for execution right requests
-u32 cellSpursModulePollStatus(SPUThread& spu, u32* status)
+u32 cellSpursModulePollStatus(spu_thread& spu, u32* status)
 {
 	auto ctxt = vm::_ptr<SpursKernelContext>(spu.offset + 0x100);
 
@@ -114,7 +114,7 @@ u32 cellSpursModulePollStatus(SPUThread& spu, u32* status)
 }
 
 // Exit current workload
-void cellSpursModuleExit(SPUThread& spu)
+void cellSpursModuleExit(spu_thread& spu)
 {
 	auto ctxt = vm::_ptr<SpursKernelContext>(spu.offset + 0x100);
 	spu.pc = ctxt->exitToKernelAddr;
@@ -122,7 +122,7 @@ void cellSpursModuleExit(SPUThread& spu)
 }
 
 // Execute a DMA operation
-bool spursDma(SPUThread& spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag)
+bool spursDma(spu_thread& spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag)
 {
 	spu.set_ch_value(MFC_LSA, lsa);
 	spu.set_ch_value(MFC_EAH, (u32)(ea >> 32));
@@ -141,7 +141,7 @@ bool spursDma(SPUThread& spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag)
 }
 
 // Get the status of DMA operations
-u32 spursDmaGetCompletionStatus(SPUThread& spu, u32 tagMask)
+u32 spursDmaGetCompletionStatus(spu_thread& spu, u32 tagMask)
 {
 	spu.set_ch_value(MFC_WrTagMask, tagMask);
 	spu.set_ch_value(MFC_WrTagUpdate, MFC_TAG_UPDATE_IMMEDIATE);
@@ -149,7 +149,7 @@ u32 spursDmaGetCompletionStatus(SPUThread& spu, u32 tagMask)
 }
 
 // Wait for DMA operations to complete
-u32 spursDmaWaitForCompletion(SPUThread& spu, u32 tagMask, bool waitForAll)
+u32 spursDmaWaitForCompletion(spu_thread& spu, u32 tagMask, bool waitForAll)
 {
 	spu.set_ch_value(MFC_WrTagMask, tagMask);
 	spu.set_ch_value(MFC_WrTagUpdate, waitForAll ? MFC_TAG_UPDATE_ALL : MFC_TAG_UPDATE_ANY);
@@ -157,12 +157,12 @@ u32 spursDmaWaitForCompletion(SPUThread& spu, u32 tagMask, bool waitForAll)
 }
 
 // Halt the SPU
-void spursHalt(SPUThread& spu)
+void spursHalt(spu_thread& spu)
 {
 	spu.halt();
 }
 
-void sys_spu_thread_exit(SPUThread& spu, s32 status)
+void sys_spu_thread_exit(spu_thread& spu, s32 status)
 {
 	// Cancel any pending status update requests
 	spu.set_ch_value(MFC_WrTagUpdate, 0);
@@ -178,7 +178,7 @@ void sys_spu_thread_exit(SPUThread& spu, s32 status)
 	spu.stop_and_signal(0x102);
 }
 
-void sys_spu_thread_group_exit(SPUThread& spu, s32 status)
+void sys_spu_thread_group_exit(spu_thread& spu, s32 status)
 {
 	// Cancel any pending status update requests
 	spu.set_ch_value(MFC_WrTagUpdate, 0);
@@ -194,7 +194,7 @@ void sys_spu_thread_group_exit(SPUThread& spu, s32 status)
 	spu.stop_and_signal(0x101);
 }
 
-s32 sys_spu_thread_send_event(SPUThread& spu, u8 spup, u32 data0, u32 data1)
+s32 sys_spu_thread_send_event(spu_thread& spu, u8 spup, u32 data0, u32 data1)
 {
 	if (spup > 0x3F)
 	{
@@ -211,7 +211,7 @@ s32 sys_spu_thread_send_event(SPUThread& spu, u8 spup, u32 data0, u32 data1)
 	return static_cast<u32>(spu.get_ch_value(SPU_RdInMbox));
 }
 
-s32 sys_spu_thread_switch_system_module(SPUThread& spu, u32 status)
+s32 sys_spu_thread_switch_system_module(spu_thread& spu, u32 status)
 {
 	if (spu.get_ch_count(SPU_RdInMbox))
 	{
@@ -246,7 +246,7 @@ s32 sys_spu_thread_switch_system_module(SPUThread& spu, u32 status)
 //----------------------------------------------------------------------------
 
 // Select a workload to run
-bool spursKernel1SelectWorkload(SPUThread& spu)
+bool spursKernel1SelectWorkload(spu_thread& spu)
 {
 	auto ctxt = vm::_ptr<SpursKernelContext>(spu.offset + 0x100);
 
@@ -430,7 +430,7 @@ bool spursKernel1SelectWorkload(SPUThread& spu)
 }
 
 // Select a workload to run
-bool spursKernel2SelectWorkload(SPUThread& spu)
+bool spursKernel2SelectWorkload(spu_thread& spu)
 {
 	auto ctxt = vm::_ptr<SpursKernelContext>(spu.offset + 0x100);
 
@@ -603,7 +603,7 @@ bool spursKernel2SelectWorkload(SPUThread& spu)
 }
 
 // SPURS kernel dispatch workload
-void spursKernelDispatchWorkload(SPUThread& spu, u64 widAndPollStatus)
+void spursKernelDispatchWorkload(spu_thread& spu, u64 widAndPollStatus)
 {
 	auto ctxt = vm::_ptr<SpursKernelContext>(spu.offset + 0x100);
 	auto isKernel2 = ctxt->spurs->flags1 & SF1_32_WORKLOADS ? true : false;
@@ -655,7 +655,7 @@ void spursKernelDispatchWorkload(SPUThread& spu, u64 widAndPollStatus)
 }
 
 // SPURS kernel workload exit
-bool spursKernelWorkloadExit(SPUThread& spu)
+bool spursKernelWorkloadExit(spu_thread& spu)
 {
 	auto ctxt = vm::_ptr<SpursKernelContext>(spu.offset + 0x100);
 	auto isKernel2 = ctxt->spurs->flags1 & SF1_32_WORKLOADS ? true : false;
@@ -676,10 +676,8 @@ bool spursKernelWorkloadExit(SPUThread& spu)
 }
 
 // SPURS kernel entry point
-bool spursKernelEntry(SPUThread& spu)
+bool spursKernelEntry(spu_thread& spu)
 {
-	thread_ctrl::eternalize();
-
 	auto ctxt = vm::_ptr<SpursKernelContext>(spu.offset + 0x100);
 	memset(ctxt, 0, sizeof(SpursKernelContext));
 
@@ -728,7 +726,7 @@ bool spursKernelEntry(SPUThread& spu)
 //----------------------------------------------------------------------------
 
 // Entry point of the system service
-bool spursSysServiceEntry(SPUThread& spu)
+bool spursSysServiceEntry(spu_thread& spu)
 {
 	auto ctxt = vm::_ptr<SpursKernelContext>(spu.offset + spu.gpr[3]._u32[3]);
 	auto arg = spu.gpr[4]._u64[1];
@@ -757,7 +755,7 @@ bool spursSysServiceEntry(SPUThread& spu)
 }
 
 // Wait for an external event or exit the SPURS thread group if no workloads can be scheduled
-void spursSysServiceIdleHandler(SPUThread& spu, SpursKernelContext* ctxt)
+void spursSysServiceIdleHandler(spu_thread& spu, SpursKernelContext* ctxt)
 {
 	bool shouldExit;
 
@@ -865,7 +863,7 @@ void spursSysServiceIdleHandler(SPUThread& spu, SpursKernelContext* ctxt)
 }
 
 // Main function for the system service
-void spursSysServiceMain(SPUThread& spu, u32 pollStatus)
+void spursSysServiceMain(spu_thread& spu, u32 pollStatus)
 {
 	auto ctxt = vm::_ptr<SpursKernelContext>(spu.offset + 0x100);
 
@@ -970,7 +968,7 @@ void spursSysServiceMain(SPUThread& spu, u32 pollStatus)
 }
 
 // Process any requests
-void spursSysServiceProcessRequests(SPUThread& spu, SpursKernelContext* ctxt)
+void spursSysServiceProcessRequests(spu_thread& spu, SpursKernelContext* ctxt)
 {
 	bool updateTrace = false;
 	bool updateWorkload = false;
@@ -1023,7 +1021,7 @@ void spursSysServiceProcessRequests(SPUThread& spu, SpursKernelContext* ctxt)
 }
 
 // Activate a workload
-void spursSysServiceActivateWorkload(SPUThread& spu, SpursKernelContext* ctxt)
+void spursSysServiceActivateWorkload(spu_thread& spu, SpursKernelContext* ctxt)
 {
 	auto spurs = vm::_ptr<CellSpurs>(spu.offset + 0x100);
 	std::memcpy(vm::base(spu.offset + 0x30000), ctxt->spurs->wklInfo1, 0x200);
@@ -1121,7 +1119,7 @@ void spursSysServiceActivateWorkload(SPUThread& spu, SpursKernelContext* ctxt)
 }
 
 // Update shutdown completion events
-void spursSysServiceUpdateShutdownCompletionEvents(SPUThread& spu, SpursKernelContext* ctxt, u32 wklShutdownBitSet)
+void spursSysServiceUpdateShutdownCompletionEvents(spu_thread& spu, SpursKernelContext* ctxt, u32 wklShutdownBitSet)
 {
 	// Mark the workloads in wklShutdownBitSet as completed and also generate a bit set of the completed
 	// workloads that have a shutdown completion hook registered
@@ -1164,7 +1162,7 @@ void spursSysServiceUpdateShutdownCompletionEvents(SPUThread& spu, SpursKernelCo
 }
 
 // Update the trace count for this SPU
-void spursSysServiceTraceSaveCount(SPUThread& spu, SpursKernelContext* ctxt)
+void spursSysServiceTraceSaveCount(spu_thread& spu, SpursKernelContext* ctxt)
 {
 	if (ctxt->traceBuffer)
 	{
@@ -1174,7 +1172,7 @@ void spursSysServiceTraceSaveCount(SPUThread& spu, SpursKernelContext* ctxt)
 }
 
 // Update trace control
-void spursSysServiceTraceUpdate(SPUThread& spu, SpursKernelContext* ctxt, u32 arg2, u32 arg3, u32 forceNotify)
+void spursSysServiceTraceUpdate(spu_thread& spu, SpursKernelContext* ctxt, u32 arg2, u32 arg3, u32 forceNotify)
 {
 	bool notify;
 
@@ -1238,7 +1236,7 @@ void spursSysServiceTraceUpdate(SPUThread& spu, SpursKernelContext* ctxt, u32 ar
 }
 
 // Restore state after executing the system workload
-void spursSysServiceCleanupAfterSystemWorkload(SPUThread& spu, SpursKernelContext* ctxt)
+void spursSysServiceCleanupAfterSystemWorkload(spu_thread& spu, SpursKernelContext* ctxt)
 {
 	u8 wklId;
 
@@ -1314,7 +1312,7 @@ enum SpursTasksetRequest
 };
 
 // Taskset PM entry point
-bool spursTasksetEntry(SPUThread& spu)
+bool spursTasksetEntry(spu_thread& spu)
 {
 	auto ctxt = vm::_ptr<SpursTasksetContext>(spu.offset + 0x2700);
 	auto kernelCtxt = vm::_ptr<SpursKernelContext>(spu.offset + spu.gpr[3]._u32[3]);
@@ -1353,7 +1351,7 @@ bool spursTasksetEntry(SPUThread& spu)
 }
 
 // Entry point into the Taskset PM for task syscalls
-bool spursTasksetSyscallEntry(SPUThread& spu)
+bool spursTasksetSyscallEntry(spu_thread& spu)
 {
 	auto ctxt = vm::_ptr<SpursTasksetContext>(spu.offset + 0x2700);
 
@@ -1384,7 +1382,7 @@ bool spursTasksetSyscallEntry(SPUThread& spu)
 }
 
 // Resume a task
-void spursTasksetResumeTask(SPUThread& spu)
+void spursTasksetResumeTask(spu_thread& spu)
 {
 	auto ctxt = vm::_ptr<SpursTasksetContext>(spu.offset + 0x2700);
 
@@ -1400,7 +1398,7 @@ void spursTasksetResumeTask(SPUThread& spu)
 }
 
 // Start a task
-void spursTasksetStartTask(SPUThread& spu, CellSpursTaskArgument& taskArgs)
+void spursTasksetStartTask(spu_thread& spu, CellSpursTaskArgument& taskArgs)
 {
 	auto ctxt = vm::_ptr<SpursTasksetContext>(spu.offset + 0x2700);
 	auto taskset = vm::_ptr<CellSpursTaskset>(spu.offset + 0x2700);
@@ -1418,7 +1416,7 @@ void spursTasksetStartTask(SPUThread& spu, CellSpursTaskArgument& taskArgs)
 }
 
 // Process a request and update the state of the taskset
-s32 spursTasksetProcessRequest(SPUThread& spu, s32 request, u32* taskId, u32* isWaiting)
+s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* isWaiting)
 {
 	auto kernelCtxt = vm::_ptr<SpursKernelContext>(spu.offset + 0x100);
 	auto ctxt = vm::_ptr<SpursTasksetContext>(spu.offset + 0x2700);
@@ -1611,7 +1609,7 @@ s32 spursTasksetProcessRequest(SPUThread& spu, s32 request, u32* taskId, u32* is
 }
 
 // Process pollStatus received from the SPURS kernel
-void spursTasksetProcessPollStatus(SPUThread& spu, u32 pollStatus)
+void spursTasksetProcessPollStatus(spu_thread& spu, u32 pollStatus)
 {
 	if (pollStatus & CELL_SPURS_MODULE_POLL_STATUS_FLAG)
 	{
@@ -1620,7 +1618,7 @@ void spursTasksetProcessPollStatus(SPUThread& spu, u32 pollStatus)
 }
 
 // Check execution rights
-bool spursTasksetPollStatus(SPUThread& spu)
+bool spursTasksetPollStatus(spu_thread& spu)
 {
 	u32 pollStatus;
 
@@ -1634,7 +1632,7 @@ bool spursTasksetPollStatus(SPUThread& spu)
 }
 
 // Exit the Taskset PM
-void spursTasksetExit(SPUThread& spu)
+void spursTasksetExit(spu_thread& spu)
 {
 	auto ctxt = vm::_ptr<SpursTasksetContext>(spu.offset + 0x2700);
 
@@ -1656,7 +1654,7 @@ void spursTasksetExit(SPUThread& spu)
 }
 
 // Invoked when a task exits
-void spursTasksetOnTaskExit(SPUThread& spu, u64 addr, u32 taskId, s32 exitCode, u64 args)
+void spursTasksetOnTaskExit(spu_thread& spu, u64 addr, u32 taskId, s32 exitCode, u64 args)
 {
 	auto ctxt = vm::_ptr<SpursTasksetContext>(spu.offset + 0x2700);
 
@@ -1670,7 +1668,7 @@ void spursTasksetOnTaskExit(SPUThread& spu, u64 addr, u32 taskId, s32 exitCode,
 }
 
 // Save the context of a task
-s32 spursTasketSaveTaskContext(SPUThread& spu)
+s32 spursTasketSaveTaskContext(spu_thread& spu)
 {
 	auto ctxt = vm::_ptr<SpursTasksetContext>(spu.offset + 0x2700);
 	auto taskInfo = vm::_ptr<CellSpursTaskset::TaskInfo>(spu.offset + 0x2780);
@@ -1733,7 +1731,7 @@ s32 spursTasketSaveTaskContext(SPUThread& spu)
 }
 
 // Taskset dispatcher
-void spursTasksetDispatch(SPUThread& spu)
+void spursTasksetDispatch(spu_thread& spu)
 {
 	auto ctxt = vm::_ptr<SpursTasksetContext>(spu.offset + 0x2700);
 	auto taskset = vm::_ptr<CellSpursTaskset>(spu.offset + 0x2700);
@@ -1864,7 +1862,7 @@ void spursTasksetDispatch(SPUThread& spu)
 }
 
 // Process a syscall request
-s32 spursTasksetProcessSyscall(SPUThread& spu, u32 syscallNum, u32 args)
+s32 spursTasksetProcessSyscall(spu_thread& spu, u32 syscallNum, u32 args)
 {
 	auto ctxt = vm::_ptr<SpursTasksetContext>(spu.offset + 0x2700);
 	auto taskset = vm::_ptr<CellSpursTaskset>(spu.offset + 0x2700);
@@ -1974,7 +1972,7 @@ s32 spursTasksetProcessSyscall(SPUThread& spu, u32 syscallNum, u32 args)
 }
 
 // Initialise the Taskset PM
-void spursTasksetInit(SPUThread& spu, u32 pollStatus)
+void spursTasksetInit(spu_thread& spu, u32 pollStatus)
 {
 	auto ctxt = vm::_ptr<SpursTasksetContext>(spu.offset + 0x2700);
 	auto kernelCtxt = vm::_ptr<SpursKernelContext>(spu.offset + 0x100);
@@ -1995,7 +1993,7 @@ void spursTasksetInit(SPUThread& spu, u32 pollStatus)
 }
 
 // Load an ELF
-s32 spursTasksetLoadElf(SPUThread& spu, u32* entryPoint, u32* lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments)
+s32 spursTasksetLoadElf(spu_thread& spu, u32* entryPoint, u32* lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments)
 {
 	if (elfAddr == 0 || (elfAddr & 0x0F) != 0)
 	{
diff --git a/rpcs3/Emu/Cell/Modules/cellSync.cpp b/rpcs3/Emu/Cell/Modules/cellSync.cpp
index 50d5d6aca9..82aa1eb4f3 100644
--- a/rpcs3/Emu/Cell/Modules/cellSync.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellSync.cpp
@@ -79,7 +79,10 @@ error_code cellSyncMutexLock(ppu_thread& ppu, vm::ptr<CellSyncMutex> mutex)
 	// Wait until rel value is equal to old acq value
 	while (mutex->ctrl.load().rel != order)
 	{
-		ppu.test_state();
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
 	}
 
 	_mm_mfence();
@@ -169,7 +172,10 @@ error_code cellSyncBarrierNotify(ppu_thread& ppu, vm::ptr<CellSyncBarrier> barri
 
 	while (!barrier->ctrl.atomic_op<&CellSyncBarrier::try_notify>())
 	{
-		ppu.test_state();
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
 	}
 
 	return CELL_OK;
@@ -217,7 +223,10 @@ error_code cellSyncBarrierWait(ppu_thread& ppu, vm::ptr<CellSyncBarrier> barrier
 
 	while (!barrier->ctrl.atomic_op<&CellSyncBarrier::try_wait>())
 	{
-		ppu.test_state();
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
 	}
 
 	return CELL_OK;
@@ -293,7 +302,10 @@ error_code cellSyncRwmRead(ppu_thread& ppu, vm::ptr<CellSyncRwm> rwm, vm::ptr<vo
 	// wait until `writers` is zero, increase `readers`
 	while (!rwm->ctrl.atomic_op<&CellSyncRwm::try_read_begin>())
 	{
-		ppu.test_state();
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
 	}
 
 	// copy data to buffer
@@ -357,13 +369,19 @@ error_code cellSyncRwmWrite(ppu_thread& ppu, vm::ptr<CellSyncRwm> rwm, vm::cptr<
 	// wait until `writers` is zero, set to 1
 	while (!rwm->ctrl.atomic_op<&CellSyncRwm::try_write_begin>())
 	{
-		ppu.test_state();
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
 	}
 
 	// wait until `readers` is zero
 	while (rwm->ctrl.load().readers != 0)
 	{
-		ppu.test_state();
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
 	}
 
 	// copy data from buffer
@@ -462,7 +480,10 @@ error_code cellSyncQueuePush(ppu_thread& ppu, vm::ptr<CellSyncQueue> queue, vm::
 		return CellSyncQueue::try_push_begin(ctrl, depth, &position);
 	}))
 	{
-		ppu.test_state();
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
 	}
 
 	// copy data from the buffer at the position
@@ -530,7 +551,10 @@ error_code cellSyncQueuePop(ppu_thread& ppu, vm::ptr<CellSyncQueue> queue, vm::p
 		return CellSyncQueue::try_pop_begin(ctrl, depth, &position);
 	}))
 	{
-		ppu.test_state();
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
 	}
 
 	// copy data at the position to the buffer
@@ -598,7 +622,10 @@ error_code cellSyncQueuePeek(ppu_thread& ppu, vm::ptr<CellSyncQueue> queue, vm::
 		return CellSyncQueue::try_peek_begin(ctrl, depth, &position);
 	}))
 	{
-		ppu.test_state();
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
 	}
 
 	// copy data at the position to the buffer
@@ -680,12 +707,18 @@ error_code cellSyncQueueClear(ppu_thread& ppu, vm::ptr<CellSyncQueue> queue)
 
 	while (!queue->ctrl.atomic_op<&CellSyncQueue::try_clear_begin_1>())
 	{
-		ppu.test_state();
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
 	}
 
 	while (!queue->ctrl.atomic_op<&CellSyncQueue::try_clear_begin_2>())
 	{
-		ppu.test_state();
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
 	}
 
 	queue->ctrl.exchange({ 0, 0 });
@@ -1120,7 +1153,10 @@ error_code _cellSyncLFQueuePushBody(ppu_thread& ppu, vm::ptr<CellSyncLFQueue> qu
 			break;
 		}
 
-		ppu.test_state();
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
 	}
 
 	const s32 depth = queue->m_depth;
@@ -1415,7 +1451,10 @@ error_code _cellSyncLFQueuePopBody(ppu_thread& ppu, vm::ptr<CellSyncLFQueue> que
 			break;
 		}
 
-		ppu.test_state();
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
 	}
 
 	const s32 depth = queue->m_depth;
diff --git a/rpcs3/Emu/Cell/Modules/cellSysutil.cpp b/rpcs3/Emu/Cell/Modules/cellSysutil.cpp
index accb2f131f..84da30ecc8 100644
--- a/rpcs3/Emu/Cell/Modules/cellSysutil.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellSysutil.cpp
@@ -232,20 +232,23 @@ s32 cellSysutilGetSystemParamString(CellSysutilParamId id, vm::ptr<char> buf, u3
 	return CELL_OK;
 }
 
-s32 cellSysutilCheckCallback(ppu_thread& ppu)
+error_code cellSysutilCheckCallback(ppu_thread& ppu)
 {
 	cellSysutil.trace("cellSysutilCheckCallback()");
 
 	const auto cbm = fxm::get_always<sysutil_cb_manager>();
 
-	while (auto&& func = cbm->get_cb())
+	while (auto func = cbm->get_cb())
 	{
 		if (s32 res = func(ppu))
 		{
-			return res;
+			return not_an_error(res);
 		}
 
-		thread_ctrl::test();
+		if (ppu.is_stopped())
+		{
+			return 0;
+		}
 	}
 
 	return CELL_OK;
diff --git a/rpcs3/Emu/Cell/Modules/cellVdec.cpp b/rpcs3/Emu/Cell/Modules/cellVdec.cpp
index cb90cbed31..f56a55143a 100644
--- a/rpcs3/Emu/Cell/Modules/cellVdec.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellVdec.cpp
@@ -19,6 +19,8 @@ extern "C"
 #include <mutex>
 #include <queue>
 #include <cmath>
+#include "Utilities/lockless.h"
+#include <variant>
 
 std::mutex g_mutex_avcodec_open2;
 
@@ -26,15 +28,13 @@ LOG_CHANNEL(cellVdec);
 
 vm::gvar<s32> _cell_vdec_prx_ver; // ???
 
-enum class vdec_cmd : u32
-{
-	null,
+constexpr struct vdec_start_seq_t{} vdec_start_seq{};
+constexpr struct vdec_close_t{} vdec_close{};
 
-	start_seq,
-	end_seq,
-	decode,
-	set_frc,
-	close,
+struct vdec_cmd
+{
+	s32 mode;
+	CellVdecAuInfo au;
 };
 
 struct vdec_frame
@@ -60,14 +60,19 @@ struct vdec_frame
 	}
 };
 
-struct vdec_thread : ppu_thread
+struct vdec_context final
 {
+	static constexpr u32 id_base = 0xf0000000;
+	static constexpr u32 id_step = 0x00000100;
+	static constexpr u32 id_count = 1024;
+
 	AVCodec* codec{};
 	AVCodecContext* ctx{};
 	SwsContext* sws{};
 
-	const s32 type;
-	const u32 profile;
+	shared_mutex mutex; // Used for 'out' queue (TODO)
+
+	const u32 type;
 	const u32 mem_addr;
 	const u32 mem_size;
 	const vm::ptr<CellVdecCbMsg> cb_func;
@@ -79,16 +84,16 @@ struct vdec_thread : ppu_thread
 	u64 next_dts{};
 	u64 ppu_tid{};
 
-	std::mutex mutex;
 	std::queue<vdec_frame> out;
-	u32 max_frames = 60;
+	atomic_t<u32> out_max = 60;
 
 	atomic_t<u32> au_count{0};
 
-	vdec_thread(s32 type, u32 profile, u32 addr, u32 size, vm::ptr<CellVdecCbMsg> func, u32 arg, u32 prio, u32 stack)
-		: ppu_thread("HLE Video Decoder", prio, stack)
-		, type(type)
-		, profile(profile)
+	notifier in_cv;
+	lf_queue<std::variant<vdec_start_seq_t, vdec_close_t, vdec_cmd, CellVdecFrameRate>> in_cmd;
+
+	vdec_context(s32 type, u32 profile, u32 addr, u32 size, vm::ptr<CellVdecCbMsg> func, u32 arg)
+		: type(type)
 		, mem_addr(addr)
 		, mem_size(size)
 		, cb_func(func)
@@ -144,55 +149,51 @@ struct vdec_thread : ppu_thread
 		}
 	}
 
-	virtual ~vdec_thread() override
+	~vdec_context()
 	{
 		avcodec_close(ctx);
 		avcodec_free_context(&ctx);
 		sws_freeContext(sws);
 	}
 
-	virtual std::string dump() const override
+	void exec(ppu_thread& ppu, u32 vid)
 	{
-		// TODO
-		return ppu_thread::dump();
-	}
+		ppu_tid = ppu.id;
 
-	virtual void cpu_task() override
-	{
-		while (cmd64 cmd = cmd_wait())
+		std::shared_lock no_lock(in_cv, std::try_to_lock);
+
+		for (auto cmds = in_cmd.pop_all(); !Emu.IsStopped(); cmds ? cmds = cmds->pop_all() : cmds = in_cmd.pop_all())
 		{
-			switch (vdec_cmd vcmd = cmd.arg1<vdec_cmd>())
+			if (!cmds)
 			{
-			case vdec_cmd::start_seq:
+				in_cv.wait(1000);
+				continue;
+			}
+
+			if (std::get_if<vdec_start_seq_t>(&cmds->get()))
 			{
-				cmd_pop();
 				avcodec_flush_buffers(ctx);
 
 				frc_set = 0; // TODO: ???
 				next_pts = 0;
 				next_dts = 0;
 				cellVdec.trace("Start sequence...");
-				break;
 			}
-
-			case vdec_cmd::decode:
-			case vdec_cmd::end_seq:
+			else if (auto* cmd = std::get_if<vdec_cmd>(&cmds->get()))
 			{
 				AVPacket packet{};
 				packet.pos = -1;
 
 				u64 au_usrd{};
 
-				if (vcmd == vdec_cmd::decode)
+				if (cmd->mode != -1)
 				{
-					const u32 au_mode = cmd.arg2<u32>();  // TODO
-					const u32 au_addr = cmd_get(1).arg1<u32>();
-					const u32 au_size = cmd_get(1).arg2<u32>();
-					const u64 au_pts = cmd_get(2).as<u64>();
-					const u64 au_dts = cmd_get(3).as<u64>();
-					au_usrd = cmd_get(4).as<u64>(); // TODO
-					const u64 au_spec = cmd_get(5).as<u64>(); // Unused
-					cmd_pop(5);
+					const u32 au_mode = cmd->mode;
+					const u32 au_addr = cmd->au.startAddr;
+					const u32 au_size = cmd->au.size;
+					const u64 au_pts = u64{cmd->au.pts.upper} << 32 | cmd->au.pts.lower;
+					const u64 au_dts = u64{cmd->au.dts.upper} << 32 | cmd->au.dts.lower;
+					au_usrd = cmd->au.userData;
 
 					packet.data = vm::_ptr<u8>(au_addr);
 					packet.size = au_size;
@@ -217,16 +218,14 @@ struct vdec_thread : ppu_thread
 				}
 				else
 				{
-					cmd_pop();
-
 					packet.pts = AV_NOPTS_VALUE;
 					packet.dts = AV_NOPTS_VALUE;
 					cellVdec.trace("End sequence...");
 				}
 
-				while (max_frames)
+				while (out_max)
 				{
-					if (vcmd == vdec_cmd::end_seq)
+					if (cmd->mode == -1)
 					{
 						break;
 					}
@@ -356,59 +355,52 @@ struct vdec_thread : ppu_thread
 
 						std::lock_guard{mutex}, out.push(std::move(frame));
 
-						cb_func(*this, id, CELL_VDEC_MSG_TYPE_PICOUT, CELL_OK, cb_arg);
-						lv2_obj::sleep(*this);
+						cb_func(ppu, vid, CELL_VDEC_MSG_TYPE_PICOUT, CELL_OK, cb_arg);
+						lv2_obj::sleep(ppu);
 					}
 
-					if (vcmd == vdec_cmd::decode)
+					if (cmd->mode != -1)
 					{
 						break;
 					}
 				}
 
-				if (max_frames)
+				if (out_max)
 				{
-					cb_func(*this, id, vcmd == vdec_cmd::decode ? CELL_VDEC_MSG_TYPE_AUDONE : CELL_VDEC_MSG_TYPE_SEQDONE, CELL_OK, cb_arg);
-					lv2_obj::sleep(*this);
+					cb_func(ppu, vid, cmd->mode != -1 ? CELL_VDEC_MSG_TYPE_AUDONE : CELL_VDEC_MSG_TYPE_SEQDONE, CELL_OK, cb_arg);
+					lv2_obj::sleep(ppu);
 				}
 
-				if (vcmd == vdec_cmd::decode)
+				if (cmd->mode != -1)
 				{
 					au_count--;
 				}
 
-				while (std::lock_guard{mutex}, max_frames && out.size() > max_frames)
+				while (!Emu.IsStopped() && out_max && (std::lock_guard{mutex}, out.size() > out_max))
 				{
-					thread_ctrl::wait();
+					in_cv.wait(1000);
 				}
-
+			}
+			else if (auto* frc = std::get_if<CellVdecFrameRate>(&cmds->get()))
+			{
+				frc_set = *frc;
+			}
+			else
+			{
 				break;
 			}
-
-			case vdec_cmd::set_frc:
-			{
-				cmd_pop();
-				frc_set = cmd.arg2<u32>();
-				break;
-			}
-
-			case vdec_cmd::close:
-			{
-				cmd_pop();
-				state += cpu_flag::exit;
-				return;
-			}
-
-			default:
-			{
-				fmt::throw_exception("Unknown command (0x%x)" HERE, (u32)vcmd);
-			}
-			}
 		}
 	}
 };
 
-u32 vdecQueryAttr(s32 type, u32 profile, u32 spec_addr /* may be 0 */, vm::ptr<CellVdecAttr> attr)
+static void vdecEntry(ppu_thread& ppu, u32 vid)
+{
+	idm::get<vdec_context>(vid)->exec(ppu, vid);
+
+	_sys_ppu_thread_exit(ppu, 0);
+}
+
+static u32 vdecQueryAttr(s32 type, u32 profile, u32 spec_addr /* may be 0 */, vm::ptr<CellVdecAttr> attr)
 {
 	switch (type) // TODO: check profile levels
 	{
@@ -440,51 +432,51 @@ s32 cellVdecQueryAttrEx(vm::cptr<CellVdecTypeEx> type, vm::ptr<CellVdecAttr> att
 	return vdecQueryAttr(type->codecType, type->profileLevel, type->codecSpecificInfo_addr, attr);
 }
 
+template <typename T, typename U>
+static s32 vdecOpen(ppu_thread& ppu, T type, U res, vm::cptr<CellVdecCb> cb, vm::ptr<u32> handle)
+{
+	// Create decoder context
+	const u32 vid = idm::make<vdec_context>(type->codecType, type->profileLevel, res->memAddr, res->memSize, cb->cbFunc, cb->cbArg);
+
+	// Run thread
+	vm::var<u64> _tid;
+	vm::var<char[]> _name = vm::make_str("HLE Video Decoder");
+	ppu_execute<&sys_ppu_thread_create>(ppu, +_tid, 0, vid, +res->ppuThreadPriority, +res->ppuThreadStackSize, SYS_PPU_THREAD_CREATE_INTERRUPT, +_name);
+	*handle = vid;
+
+	const auto thrd = idm::get<named_thread<ppu_thread>>(*_tid);
+
+	thrd->cmd_list
+	({
+		{ ppu_cmd::set_args, 1 }, u64{vid},
+		{ ppu_cmd::hle_call, FIND_FUNC(vdecEntry) },
+	});
+
+	thrd->state -= cpu_flag::stop;
+	thread_ctrl::notify(*thrd);
+
+	return CELL_OK;
+}
+
 s32 cellVdecOpen(ppu_thread& ppu, vm::cptr<CellVdecType> type, vm::cptr<CellVdecResource> res, vm::cptr<CellVdecCb> cb, vm::ptr<u32> handle)
 {
 	cellVdec.warning("cellVdecOpen(type=*0x%x, res=*0x%x, cb=*0x%x, handle=*0x%x)", type, res, cb, handle);
 
-	// Create decoder thread
-	auto&& vdec = idm::make_ptr<ppu_thread, vdec_thread>(type->codecType, type->profileLevel, res->memAddr, res->memSize, cb->cbFunc, cb->cbArg, res->ppuThreadPriority, res->ppuThreadStackSize);
-
-	// Hack: store thread id (normally it should be pointer)
-	*handle = vdec->id;
-
-	vm::var<u64> _tid;
-	ppu_execute<&sys_ppu_thread_create>(ppu, +_tid, 1148, 0, 900, 0x4000, SYS_PPU_THREAD_CREATE_INTERRUPT, vm::null);
-	vdec->gpr[13] = idm::get<ppu_thread>(*_tid)->gpr[13];
-	vdec->ppu_tid = *_tid;
-
-	vdec->run();
-
-	return CELL_OK;
+	return vdecOpen(ppu, type, res, cb, handle);
 }
 
 s32 cellVdecOpenEx(ppu_thread& ppu, vm::cptr<CellVdecTypeEx> type, vm::cptr<CellVdecResourceEx> res, vm::cptr<CellVdecCb> cb, vm::ptr<u32> handle)
 {
 	cellVdec.warning("cellVdecOpenEx(type=*0x%x, res=*0x%x, cb=*0x%x, handle=*0x%x)", type, res, cb, handle);
 
-	// Create decoder thread
-	auto&& vdec = idm::make_ptr<ppu_thread, vdec_thread>(type->codecType, type->profileLevel, res->memAddr, res->memSize, cb->cbFunc, cb->cbArg, res->ppuThreadPriority, res->ppuThreadStackSize);
-
-	// Hack: store thread id (normally it should be pointer)
-	*handle = vdec->id;
-
-	vm::var<u64> _tid;
-	ppu_execute<&sys_ppu_thread_create>(ppu, +_tid, 1148, 0, 900, 0x4000, SYS_PPU_THREAD_CREATE_INTERRUPT, vm::null);
-	vdec->gpr[13] = idm::get<ppu_thread>(*_tid)->gpr[13];
-	vdec->ppu_tid = *_tid;
-
-	vdec->run();
-
-	return CELL_OK;
+	return vdecOpen(ppu, type, res, cb, handle);
 }
 
 s32 cellVdecClose(ppu_thread& ppu, u32 handle)
 {
 	cellVdec.warning("cellVdecClose(handle=0x%x)", handle);
 
-	const auto vdec = idm::get<ppu_thread, vdec_thread>(handle);
+	const auto vdec = idm::get<vdec_context>(handle);
 
 	if (!vdec)
 	{
@@ -492,15 +484,9 @@ s32 cellVdecClose(ppu_thread& ppu, u32 handle)
 	}
 
 	lv2_obj::sleep(ppu);
-
-	{
-		std::lock_guard lock(vdec->mutex);
-		vdec->cmd_push({vdec_cmd::close, 0});
-		vdec->max_frames = 0;
-	}
-
-	vdec->notify();
-	vdec->join();
+	vdec->out_max = 0;
+	vdec->in_cmd.push(vdec_close);
+	vdec->in_cv.notify_all();
 	ppu_execute<&sys_interrupt_thread_disestablish>(ppu, vdec->ppu_tid);
 	return CELL_OK;
 }
@@ -509,15 +495,15 @@ s32 cellVdecStartSeq(u32 handle)
 {
 	cellVdec.trace("cellVdecStartSeq(handle=0x%x)", handle);
 
-	const auto vdec = idm::get<ppu_thread, vdec_thread>(handle);
+	const auto vdec = idm::get<vdec_context>(handle);
 
 	if (!vdec)
 	{
 		return CELL_VDEC_ERROR_ARG;
 	}
 
-	vdec->cmd_push({vdec_cmd::start_seq, 0});
-	vdec->notify();
+	vdec->in_cmd.push(vdec_start_seq);
+	vdec->in_cv.notify_all();
 	return CELL_OK;
 }
 
@@ -525,15 +511,15 @@ s32 cellVdecEndSeq(u32 handle)
 {
 	cellVdec.warning("cellVdecEndSeq(handle=0x%x)", handle);
 
-	const auto vdec = idm::get<ppu_thread, vdec_thread>(handle);
+	const auto vdec = idm::get<vdec_context>(handle);
 
 	if (!vdec)
 	{
 		return CELL_VDEC_ERROR_ARG;
 	}
 
-	vdec->cmd_push({vdec_cmd::end_seq, 0});
-	vdec->notify();
+	vdec->in_cmd.push(vdec_cmd{-1});
+	vdec->in_cv.notify_all();
 	return CELL_OK;
 }
 
@@ -541,30 +527,21 @@ s32 cellVdecDecodeAu(u32 handle, CellVdecDecodeMode mode, vm::cptr<CellVdecAuInf
 {
 	cellVdec.trace("cellVdecDecodeAu(handle=0x%x, mode=%d, auInfo=*0x%x)", handle, (s32)mode, auInfo);
 
-	const auto vdec = idm::get<ppu_thread, vdec_thread>(handle);
+	const auto vdec = idm::get<vdec_context>(handle);
 
-	if (mode > CELL_VDEC_DEC_MODE_PB_SKIP || !vdec)
+	if (mode < 0 || mode > CELL_VDEC_DEC_MODE_PB_SKIP || !vdec)
 	{
 		return CELL_VDEC_ERROR_ARG;
 	}
 
-	if (vdec->au_count.fetch_op([](u32& c) { if (c < 4) c++; }) >= 4)
+	if (!vdec->au_count.try_inc(4))
 	{
 		return CELL_VDEC_ERROR_BUSY;
 	}
 
 	// TODO: check info
-	vdec->cmd_list
-	({
-		{ vdec_cmd::decode, mode },
-		{ auInfo->startAddr, auInfo->size },
-		u64{auInfo->pts.upper} << 32 | auInfo->pts.lower,
-		u64{auInfo->dts.upper} << 32 | auInfo->dts.lower,
-		auInfo->userData,
-		auInfo->codecSpecificData,
-	});
-
-	vdec->notify();
+	vdec->in_cmd.push(vdec_cmd{mode, *auInfo});
+	vdec->in_cv.notify_all();
 	return CELL_OK;
 }
 
@@ -572,7 +549,7 @@ s32 cellVdecGetPicture(u32 handle, vm::cptr<CellVdecPicFormat> format, vm::ptr<u
 {
 	cellVdec.trace("cellVdecGetPicture(handle=0x%x, format=*0x%x, outBuff=*0x%x)", handle, format, outBuff);
 
-	const auto vdec = idm::get<ppu_thread, vdec_thread>(handle);
+	const auto vdec = idm::get<vdec_context>(handle);
 
 	if (!format || !vdec)
 	{
@@ -580,6 +557,7 @@ s32 cellVdecGetPicture(u32 handle, vm::cptr<CellVdecPicFormat> format, vm::ptr<u
 	}
 
 	vdec_frame frame;
+	bool notify = false;
 	{
 		std::lock_guard lock(vdec->mutex);
 
@@ -591,14 +569,12 @@ s32 cellVdecGetPicture(u32 handle, vm::cptr<CellVdecPicFormat> format, vm::ptr<u
 		frame = std::move(vdec->out.front());
 
 		vdec->out.pop();
-
-		if (vdec->out.size() <= vdec->max_frames)
-		{
-			vdec->notify();
-		}
+		if (vdec->out.size() + 1 == vdec->out_max)
+			notify = true;
 	}
 
-	vdec->notify();
+	if (notify)
+		vdec->in_cv.notify_all();
 
 	if (outBuff)
 	{
@@ -698,7 +674,7 @@ s32 cellVdecGetPicItem(u32 handle, vm::pptr<CellVdecPicItem> picItem)
 {
 	cellVdec.trace("cellVdecGetPicItem(handle=0x%x, picItem=**0x%x)", handle, picItem);
 
-	const auto vdec = idm::get<ppu_thread, vdec_thread>(handle);
+	const auto vdec = idm::get<vdec_context>(handle);
 
 	if (!vdec)
 	{
@@ -893,7 +869,7 @@ s32 cellVdecSetFrameRate(u32 handle, CellVdecFrameRate frc)
 {
 	cellVdec.trace("cellVdecSetFrameRate(handle=0x%x, frc=0x%x)", handle, (s32)frc);
 
-	const auto vdec = idm::get<ppu_thread, vdec_thread>(handle);
+	const auto vdec = idm::get<vdec_context>(handle);
 
 	if (!vdec)
 	{
@@ -901,8 +877,8 @@ s32 cellVdecSetFrameRate(u32 handle, CellVdecFrameRate frc)
 	}
 
 	// TODO: check frc value
-	vdec->cmd_push({vdec_cmd::set_frc, frc});
-	vdec->notify();
+	vdec->in_cmd.push(frc);
+	vdec->in_cv.notify_all();
 	return CELL_OK;
 }
 
@@ -966,4 +942,6 @@ DECLARE(ppu_module_manager::cellVdec)("libvdec", []()
 	REG_FUNC(libvdec, cellVdecSetFrameRate);
 	REG_FUNC(libvdec, cellVdecSetFrameRateExt); // 0xcffc42a5
 	REG_FUNC(libvdec, cellVdecSetPts); // 0x3ce2e4f8
+
+	REG_FUNC(libvdec, vdecEntry).flag(MFF_HIDDEN);
 });
diff --git a/rpcs3/Emu/Cell/Modules/libmixer.cpp b/rpcs3/Emu/Cell/Modules/libmixer.cpp
index 6352d220b5..f503c59a85 100644
--- a/rpcs3/Emu/Cell/Modules/libmixer.cpp
+++ b/rpcs3/Emu/Cell/Modules/libmixer.cpp
@@ -326,7 +326,7 @@ struct surmixer_thread : ppu_thread
 {
 	using ppu_thread::ppu_thread;
 
-	virtual void cpu_task() override
+	void non_task()
 	{
 		const auto g_audio = fxm::get<audio_config>();
 
@@ -489,9 +489,7 @@ s32 cellSurMixerCreate(vm::cptr<CellSurMixerConfig> config)
 
 	libmixer.warning("*** surMixer created (ch1=%d, ch2=%d, ch6=%d, ch8=%d)", config->chStrips1, config->chStrips2, config->chStrips6, config->chStrips8);
 
-	auto&& thread = idm::make_ptr<ppu_thread, surmixer_thread>("Surmixer Thread");
-
-	thread->run();
+	//auto thread = idm::make_ptr<ppu_thread>("Surmixer Thread");
 
 	return CELL_OK;
 }
diff --git a/rpcs3/Emu/Cell/Modules/sys_lwcond_.cpp b/rpcs3/Emu/Cell/Modules/sys_lwcond_.cpp
index a43f1891cb..c5a7ace924 100644
--- a/rpcs3/Emu/Cell/Modules/sys_lwcond_.cpp
+++ b/rpcs3/Emu/Cell/Modules/sys_lwcond_.cpp
@@ -72,7 +72,11 @@ error_code sys_lwcond_signal(ppu_thread& ppu, vm::ptr<sys_lwcond_t> lwcond)
 		// call the syscall
 		if (error_code res = _sys_lwcond_signal(ppu, lwcond->lwcond_queue, lwmutex->sleep_queue, -1, 1))
 		{
-			ppu.test_state();
+			if (ppu.test_stopped())
+			{
+				return 0;
+			}
+
 			lwmutex->all_info--;
 
 			if (res != CELL_EPERM)
@@ -103,7 +107,11 @@ error_code sys_lwcond_signal(ppu_thread& ppu, vm::ptr<sys_lwcond_t> lwcond)
 	// call the syscall
 	if (error_code res = _sys_lwcond_signal(ppu, lwcond->lwcond_queue, lwmutex->sleep_queue, -1, 3))
 	{
-		ppu.test_state();
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
+
 		lwmutex->all_info--;
 
 		// unlock the lightweight mutex
@@ -145,9 +153,12 @@ error_code sys_lwcond_signal_all(ppu_thread& ppu, vm::ptr<sys_lwcond_t> lwcond)
 			return res;
 		}
 
-		ppu.test_state();
-		lwmutex->all_info += +res;
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
 
+		lwmutex->all_info += +res;
 		return CELL_OK;
 	}
 
@@ -167,7 +178,10 @@ error_code sys_lwcond_signal_all(ppu_thread& ppu, vm::ptr<sys_lwcond_t> lwcond)
 	// if locking succeeded, call the syscall
 	error_code res = _sys_lwcond_signal_all(ppu, lwcond->lwcond_queue, lwmutex->sleep_queue, 1);
 
-	ppu.test_state();
+	if (ppu.test_stopped())
+	{
+		return 0;
+	}
 
 	if (res > 0)
 	{
@@ -206,7 +220,11 @@ error_code sys_lwcond_signal_to(ppu_thread& ppu, vm::ptr<sys_lwcond_t> lwcond, u
 		// call the syscall
 		if (error_code res = _sys_lwcond_signal(ppu, lwcond->lwcond_queue, lwmutex->sleep_queue, ppu_thread_id, 1))
 		{
-			ppu.test_state();
+			if (ppu.test_stopped())
+			{
+				return 0;
+			}
+
 			lwmutex->all_info--;
 
 			return res;
@@ -234,7 +252,11 @@ error_code sys_lwcond_signal_to(ppu_thread& ppu, vm::ptr<sys_lwcond_t> lwcond, u
 	// call the syscall
 	if (error_code res = _sys_lwcond_signal(ppu, lwcond->lwcond_queue, lwmutex->sleep_queue, ppu_thread_id, 3))
 	{
-		ppu.test_state();
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
+
 		lwmutex->all_info--;
 
 		// unlock the lightweight mutex
diff --git a/rpcs3/Emu/Cell/Modules/sys_spinlock.cpp b/rpcs3/Emu/Cell/Modules/sys_spinlock.cpp
index f206cf9754..2f7276645f 100644
--- a/rpcs3/Emu/Cell/Modules/sys_spinlock.cpp
+++ b/rpcs3/Emu/Cell/Modules/sys_spinlock.cpp
@@ -16,15 +16,20 @@ void sys_spinlock_initialize(vm::ptr<atomic_be_t<u32>> lock)
 	}
 }
 
-void sys_spinlock_lock(ppu_thread& ppu, vm::ptr<atomic_be_t<u32>> lock)
+error_code sys_spinlock_lock(ppu_thread& ppu, vm::ptr<atomic_be_t<u32>> lock)
 {
 	sysPrxForUser.trace("sys_spinlock_lock(lock=*0x%x)", lock);
 
 	// Try to exchange with 0xabadcafe, repeat until exchanged with 0
 	while (*lock || lock->exchange(0xabadcafe))
 	{
-		ppu.test_state();
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
 	}
+
+	return not_an_error(ppu.gpr[3]);
 }
 
 s32 sys_spinlock_trylock(vm::ptr<atomic_be_t<u32>> lock)
diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp
index 00c42866f7..f26fadc49e 100644
--- a/rpcs3/Emu/Cell/PPUModule.cpp
+++ b/rpcs3/Emu/Cell/PPUModule.cpp
@@ -983,7 +983,11 @@ std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object& elf, const std::stri
 	if (Emu.IsReady() && fxm::import<ppu_module>([&] { return prx; }))
 	{
 		// Special loading mode
-		auto ppu = idm::make_ptr<ppu_thread>("test_thread", 0, 0x100000);
+		ppu_thread_params p{};
+		p.stack_addr = vm::cast(vm::alloc(0x100000, vm::stack, 4096));
+		p.stack_size = 0x100000;
+
+		auto ppu = idm::make_ptr<named_thread<ppu_thread>>("PPU[0x1000000] Thread (test_thread)", p, "test_thread", 0);
 
 		ppu->cmd_push({ppu_cmd::initialize, 0});
 	}
@@ -1463,7 +1467,7 @@ void ppu_load_exec(const ppu_exec_object& elf)
 	}
 
 	// Fix primary stack size
-	switch (primary_stacksize)
+	switch (u32 sz = primary_stacksize)
 	{
 	case 0x10: primary_stacksize = 32 * 1024; break; // SYS_PROCESS_PRIMARY_STACK_SIZE_32K
 	case 0x20: primary_stacksize = 64 * 1024; break; // SYS_PROCESS_PRIMARY_STACK_SIZE_64K
@@ -1472,10 +1476,19 @@ void ppu_load_exec(const ppu_exec_object& elf)
 	case 0x50: primary_stacksize = 256 * 1024; break; // SYS_PROCESS_PRIMARY_STACK_SIZE_256K
 	case 0x60: primary_stacksize = 512 * 1024; break; // SYS_PROCESS_PRIMARY_STACK_SIZE_512K
 	case 0x70: primary_stacksize = 1024 * 1024; break; // SYS_PROCESS_PRIMARY_STACK_SIZE_1M
+	default:
+	{
+		primary_stacksize = sz >= 4096 ? ::align(std::min<u32>(sz, 0x100000), 4096) : 0x4000;
+		break;
+	}
 	}
 
 	// Initialize main thread
-	auto ppu = idm::make_ptr<ppu_thread>("main_thread", primary_prio, primary_stacksize);
+	ppu_thread_params p{};
+	p.stack_addr = vm::cast(vm::alloc(primary_stacksize, vm::stack, 4096));
+	p.stack_size = primary_stacksize;
+
+	auto ppu = idm::make_ptr<named_thread<ppu_thread>>("PPU[0x1000000] Thread (main_thread)", p, "main_thread", primary_prio);
 
 	// Write initial data (exitspawn)
 	if (Emu.data.size())
diff --git a/rpcs3/Emu/Cell/PPUModule.h b/rpcs3/Emu/Cell/PPUModule.h
index d3c434513b..f4d7efb768 100644
--- a/rpcs3/Emu/Cell/PPUModule.h
+++ b/rpcs3/Emu/Cell/PPUModule.h
@@ -77,9 +77,6 @@ class ppu_static_module final
 public:
 	const std::string name;
 
-	task_stack on_load;
-	task_stack on_unload;
-
 	std::unordered_map<u32, ppu_static_function, value_hash<u32>> functions;
 	std::unordered_map<u32, ppu_static_variable, value_hash<u32>> variables;
 
diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp
index 277ca698d7..5c300d9ca6 100644
--- a/rpcs3/Emu/Cell/PPUThread.cpp
+++ b/rpcs3/Emu/Cell/PPUThread.cpp
@@ -334,37 +334,6 @@ extern void ppu_breakpoint(u32 addr, bool isAdding)
 	}
 }
 
-void ppu_thread::on_spawn()
-{
-	if (g_cfg.core.thread_scheduler_enabled)
-	{
-		// Bind to primary set
-		thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::ppu));
-	}
-}
-
-void ppu_thread::on_init(const std::shared_ptr<void>& _this)
-{
-	if (!stack_addr)
-	{
-		// Allocate stack + gap between stacks
-		auto new_stack_base = vm::alloc(stack_size + 4096, vm::stack, 4096);
-		if (!new_stack_base)
-		{
-			fmt::throw_exception("Out of stack memory (size=0x%x)" HERE, stack_size);
-		}
-
-		const_cast<u32&>(stack_addr) = new_stack_base + 4096;
-
-		// Make the gap inaccessible
-		vm::page_protect(new_stack_base, 4096, 0, 0, vm::page_readable + vm::page_writable);
-
-		gpr[1] = ::align(stack_addr + stack_size, 0x200) - 0x200;
-
-		cpu_thread::on_init(_this);
-	}
-}
-
 //sets breakpoint, does nothing if there is a breakpoint there already
 extern void ppu_set_breakpoint(u32 addr)
 {
@@ -427,9 +396,15 @@ extern bool ppu_patch(u32 addr, u32 value)
 	return true;
 }
 
+void ppu_thread::on_cleanup(named_thread<ppu_thread>* _this)
+{
+	// Remove thread id
+	idm::remove<named_thread<ppu_thread>>(_this->id);
+}
+
 std::string ppu_thread::get_name() const
 {
-	return fmt::format("PPU[0x%x] Thread (%s)", id, m_name);
+	return fmt::format("PPU[0x%x] Thread (%s)", id, ppu_name.get());
 }
 
 std::string ppu_thread::dump() const
@@ -564,6 +539,12 @@ void ppu_thread::cpu_task()
 			cmd_pop(), ppu_function_manager::get().at(arg)(*this);
 			break;
 		}
+		case ppu_cmd::ptr_call:
+		{
+			const ppu_function_t func = cmd_get(1).as<ppu_function_t>();
+			cmd_pop(1), func(*this);
+			break;
+		}
 		case ppu_cmd::initialize:
 		{
 			cmd_pop(), ppu_initialize();
@@ -697,20 +678,38 @@ void ppu_thread::exec_task()
 
 ppu_thread::~ppu_thread()
 {
-	if (stack_addr)
-	{
-		vm::dealloc_verbose_nothrow(stack_addr - 4096, vm::stack);
-	}
+	// Deallocate Stack Area
+	vm::dealloc_verbose_nothrow(stack_addr, vm::stack);
 }
 
-ppu_thread::ppu_thread(const std::string& name, u32 prio, u32 stack)
+ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u32 prio, int detached)
 	: cpu_thread(idm::last_id())
 	, prio(prio)
-	, stack_size(stack >= 0x1000 ? ::align(std::min<u32>(stack, 0x100000), 0x1000) : 0x4000)
-	, stack_addr(0)
+	, stack_size(param.stack_size)
+	, stack_addr(param.stack_addr)
 	, start_time(get_system_time())
-	, m_name(name)
+	, joiner(-!!detached)
+	, ppu_name(name)
 {
+	gpr[1] = ::align(stack_addr + stack_size, 0x200) - 0x200;
+
+	gpr[13] = param.tls_addr;
+
+	if (detached >= 0 && id != id_base)
+	{
+		// Initialize thread entry point
+		cmd_list
+		({
+		    {ppu_cmd::set_args, 2}, param.arg0, param.arg1,
+		    {ppu_cmd::lle_call, param.entry},
+		});
+	}
+	else
+	{
+		// Save entry for further use (interrupt handler workaround)
+		gpr[2] = param.entry;
+	}
+
 	// Trigger the scheduler
 	state += cpu_flag::suspend;
 
@@ -765,7 +764,7 @@ cmd64 ppu_thread::cmd_wait()
 	{
 		if (UNLIKELY(state))
 		{
-			if (state & (cpu_flag::stop + cpu_flag::exit))
+			if (is_stopped())
 			{
 				return cmd64{};
 			}
@@ -802,8 +801,7 @@ void ppu_thread::fast_call(u32 addr, u32 rtoc)
 	g_tls_log_prefix = []
 	{
 		const auto _this = static_cast<ppu_thread*>(get_current_cpu_thread());
-
-		return fmt::format("%s [0x%08x]", _this->get_name(), _this->cia);
+		return fmt::format("%s [0x%08x]", thread_ctrl::get_name(), _this->cia);
 	};
 
 	auto at_ret = gsl::finally([&]()
@@ -930,7 +928,11 @@ extern void sse_cellbe_stvrx_v0(u64 addr, __m128i a);
 static void ppu_check(ppu_thread& ppu, u64 addr)
 {
 	ppu.cia = ::narrow<u32>(addr);
-	ppu.test_state();
+
+	if (ppu.test_stopped())
+	{
+		return;
+	}
 }
 
 static void ppu_trace(u64 addr)
diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h
index c4394dbbc4..c00ae7f9b7 100644
--- a/rpcs3/Emu/Cell/PPUThread.h
+++ b/rpcs3/Emu/Cell/PPUThread.h
@@ -14,6 +14,7 @@ enum class ppu_cmd : u32
 	set_args, // Set general-purpose args (+arg cmd)
 	lle_call, // Load addr and rtoc at *arg or *gpr[arg] and execute
 	hle_call, // Execute function by index (arg)
+	ptr_call, // Execute function by pointer
 	initialize, // ppu_initialize()
 	sleep,
 	reset_stack, // resets stack address
@@ -24,6 +25,17 @@ enum class ppu_syscall_code : u64
 {
 };
 
+// ppu_thread constructor argument
+struct ppu_thread_params
+{
+	vm::addr_t stack_addr;
+	u32 stack_size;
+	u32 tls_addr;
+	u32 entry;
+	u64 arg0;
+	u64 arg1;
+};
+
 class ppu_thread : public cpu_thread
 {
 public:
@@ -31,17 +43,17 @@ public:
 	static const u32 id_step = 1;
 	static const u32 id_count = 2048;
 
-	virtual void on_spawn() override;
-	virtual void on_init(const std::shared_ptr<void>&) override;
+	static void on_cleanup(named_thread<ppu_thread>*);
+
 	virtual std::string get_name() const override;
 	virtual std::string dump() const override;
-	virtual void cpu_task() override;
+	virtual void cpu_task() override final;
 	virtual void cpu_sleep() override;
 	virtual void cpu_mem() override;
 	virtual void cpu_unmem() override;
 	virtual ~ppu_thread() override;
 
-	ppu_thread(const std::string& name, u32 prio = 0, u32 stack = 0x10000);
+	ppu_thread(const ppu_thread_params&, std::string_view name, u32 prio, int detached = 0);
 
 	u64 gpr[32] = {}; // General-Purpose Registers
 	f64 fpr[32] = {}; // Floating Point Registers
@@ -153,7 +165,7 @@ public:
 	u64 start_time{0}; // Sleep start timepoint
 	const char* last_function{}; // Last function name for diagnosis, optimized for speed.
 
-	const std::string m_name; // Thread name
+	lf_value<std::string> ppu_name; // Thread name
 
 	be_t<u64>* get_stack_arg(s32 i, u64 align = alignof(u64));
 	void exec_task();
diff --git a/rpcs3/Emu/Cell/RawSPUThread.cpp b/rpcs3/Emu/Cell/RawSPUThread.cpp
index f74986c6be..aaedc088a0 100644
--- a/rpcs3/Emu/Cell/RawSPUThread.cpp
+++ b/rpcs3/Emu/Cell/RawSPUThread.cpp
@@ -9,39 +9,7 @@
 // Originally, SPU MFC registers are accessed externally in a concurrent manner (don't mix with channels, SPU MFC channels are isolated)
 thread_local spu_mfc_cmd g_tls_mfc[8] = {};
 
-void RawSPUThread::cpu_task()
-{
-	// get next PC and SPU Interrupt status
-	pc = npc.exchange(0);
-
-	set_interrupt_status((pc & 1) != 0);
-
-	pc &= 0x3fffc;
-
-	SPUThread::cpu_task();
-
-	// save next PC and current SPU Interrupt status
-	npc = pc | (interrupts_enabled);
-}
-
-void RawSPUThread::on_init(const std::shared_ptr<void>& _this)
-{
-	if (!offset)
-	{
-		// Install correct SPU index and LS address
-		const_cast<u32&>(index) = id;
-		const_cast<u32&>(offset) = verify(HERE, vm::falloc(RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * index, 0x40000));
-
-		cpu_thread::on_init(_this);
-	}
-}
-
-RawSPUThread::RawSPUThread(const std::string& name)
-	: SPUThread(name, 0, nullptr)
-{
-}
-
-bool RawSPUThread::read_reg(const u32 addr, u32& value)
+bool spu_thread::read_reg(const u32 addr, u32& value)
 {
 	const u32 offset = addr - RAW_SPU_BASE_ADDR - index * RAW_SPU_OFFSET - RAW_SPU_PROB_OFFSET;
 
@@ -101,7 +69,7 @@ bool RawSPUThread::read_reg(const u32 addr, u32& value)
 	return false;
 }
 
-bool RawSPUThread::write_reg(const u32 addr, const u32 value)
+bool spu_thread::write_reg(const u32 addr, const u32 value)
 {
 	auto try_start = [this]()
 	{
@@ -116,7 +84,8 @@ bool RawSPUThread::write_reg(const u32 addr, const u32 value)
 			return true;
 		}))
 		{
-			run();
+			state -= cpu_flag::stop;
+			thread_ctrl::notify(static_cast<named_thread<spu_thread>&>(*this));
 		}
 	};
 
@@ -291,7 +260,11 @@ bool RawSPUThread::write_reg(const u32 addr, const u32 value)
 
 void spu_load_exec(const spu_exec_object& elf)
 {
-	auto spu = idm::make_ptr<RawSPUThread>("TEST_SPU");
+	auto ls0 = vm::cast(vm::falloc(RAW_SPU_BASE_ADDR, 0x40000, vm::spu));
+	auto spu = idm::make_ptr<named_thread<spu_thread>>("TEST_SPU", ls0, nullptr, 0, "");
+
+	spu_thread::g_raw_spu_ctr++;
+	spu_thread::g_raw_spu_id[0] = spu->id;
 
 	for (const auto& prog : elf.progs)
 	{
@@ -301,6 +274,5 @@ void spu_load_exec(const spu_exec_object& elf)
 		}
 	}
 
-	spu->cpu_init();
 	spu->npc = elf.header.e_entry;
 }
diff --git a/rpcs3/Emu/Cell/RawSPUThread.h b/rpcs3/Emu/Cell/RawSPUThread.h
index 668db24678..921db8f1f6 100644
--- a/rpcs3/Emu/Cell/RawSPUThread.h
+++ b/rpcs3/Emu/Cell/RawSPUThread.h
@@ -1,20 +1,3 @@
 #pragma once
 
 #include "SPUThread.h"
-
-class RawSPUThread final : public SPUThread
-{
-	void cpu_task() override;
-
-public:
-	static const u32 id_base = 0;
-	static const u32 id_step = 1;
-	static const u32 id_count = 5;
-
-	void on_init(const std::shared_ptr<void>&) override;
-
-	RawSPUThread(const std::string& name);
-
-	bool read_reg(const u32 addr, u32& value);
-	bool write_reg(const u32 addr, const u32 value);
-};
diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp
index 03402567c0..2d3a230de5 100644
--- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp
+++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp
@@ -16,11 +16,11 @@
 
 #include "SPUASMJITRecompiler.h"
 
-#define SPU_OFF_128(x, ...) asmjit::x86::oword_ptr(*cpu, offset32(&SPUThread::x, ##__VA_ARGS__))
-#define SPU_OFF_64(x, ...) asmjit::x86::qword_ptr(*cpu, offset32(&SPUThread::x, ##__VA_ARGS__))
-#define SPU_OFF_32(x, ...) asmjit::x86::dword_ptr(*cpu, offset32(&SPUThread::x, ##__VA_ARGS__))
-#define SPU_OFF_16(x, ...) asmjit::x86::word_ptr(*cpu, offset32(&SPUThread::x, ##__VA_ARGS__))
-#define SPU_OFF_8(x, ...) asmjit::x86::byte_ptr(*cpu, offset32(&SPUThread::x, ##__VA_ARGS__))
+#define SPU_OFF_128(x, ...) asmjit::x86::oword_ptr(*cpu, offset32(&spu_thread::x, ##__VA_ARGS__))
+#define SPU_OFF_64(x, ...) asmjit::x86::qword_ptr(*cpu, offset32(&spu_thread::x, ##__VA_ARGS__))
+#define SPU_OFF_32(x, ...) asmjit::x86::dword_ptr(*cpu, offset32(&spu_thread::x, ##__VA_ARGS__))
+#define SPU_OFF_16(x, ...) asmjit::x86::word_ptr(*cpu, offset32(&spu_thread::x, ##__VA_ARGS__))
+#define SPU_OFF_8(x, ...) asmjit::x86::byte_ptr(*cpu, offset32(&spu_thread::x, ##__VA_ARGS__))
 
 extern const spu_decoder<spu_interpreter_fast> g_spu_interpreter_fast; // TODO: avoid
 const spu_decoder<spu_recompiler> s_spu_decoder;
@@ -1177,12 +1177,12 @@ inline asmjit::X86Mem spu_recompiler::XmmConst(__m128i data)
 	return XmmConst(v128::fromV(data));
 }
 
-static void check_state_ret(SPUThread& _spu, void*, u8*)
+static void check_state_ret(spu_thread& _spu, void*, u8*)
 {
 	// MSVC workaround (TCO)
 }
 
-static void check_state(SPUThread* _spu, spu_function_t _ret)
+static void check_state(spu_thread* _spu, spu_function_t _ret)
 {
 	if (_spu->state && _spu->check_state())
 	{
@@ -1209,7 +1209,7 @@ void spu_recompiler::branch_fixed(u32 target)
 		return;
 	}
 
-	c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher) + target * 2));
+	c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&spu_thread::jit_dispatcher) + target * 2));
 	c->mov(SPU_OFF_32(pc), target);
 	c->cmp(SPU_OFF_32(state), 0);
 	c->jnz(label_stop);
@@ -1251,7 +1251,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
 	}
 	else if (op.e)
 	{
-		auto _throw = [](SPUThread* _spu)
+		auto _throw = [](spu_thread* _spu)
 		{
 			fmt::throw_exception("SPU Interrupts not implemented (mask=0x%x)" HERE, +_spu->ch_event_mask);
 		};
@@ -1270,7 +1270,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
 		c->jmp(no_intr);
 		c->bind(fail);
 		c->mov(SPU_OFF_32(pc), *addr);
-		c->jmp(imm_ptr<void(*)(SPUThread*)>(_throw));
+		c->jmp(imm_ptr<void(*)(spu_thread*)>(_throw));
 
 		// Save addr in srr0 and disable interrupts
 		c->bind(intr);
@@ -1292,7 +1292,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
 	if (!jt && g_cfg.core.spu_block_size != spu_block_size_type::giga)
 	{
 		// Simply external call (return or indirect call)
-		c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
+		c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&spu_thread::jit_dispatcher)));
 	}
 	else
 	{
@@ -1311,7 +1311,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
 		c->lea(x86::r10, x86::qword_ptr(instr_table));
 		c->cmp(qw1->r32(), end - start);
 		c->lea(x86::r10, x86::qword_ptr(x86::r10, *qw1, 1, 0));
-		c->lea(*qw1, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
+		c->lea(*qw1, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&spu_thread::jit_dispatcher)));
 		c->cmovae(x86::r10, *qw1);
 		c->mov(x86::r10, x86::qword_ptr(x86::r10));
 	}
@@ -1321,7 +1321,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
 		// Get stack pointer, try to use native return address (check SPU return address)
 		c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3));
 		c->and_(qw1->r32(), 0x3fff0);
-		c->lea(*qw1, x86::qword_ptr(*cpu, *qw1, 0, ::offset32(&SPUThread::stack_mirror)));
+		c->lea(*qw1, x86::qword_ptr(*cpu, *qw1, 0, ::offset32(&spu_thread::stack_mirror)));
 		c->cmp(x86::dword_ptr(*qw1, 8), *addr);
 		c->cmove(x86::r10, x86::qword_ptr(*qw1));
 	}
@@ -1352,7 +1352,7 @@ void spu_recompiler::branch_set_link(u32 target)
 			// Get stack pointer, write native and SPU return addresses into the stack mirror
 			c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3));
 			c->and_(qw1->r32(), 0x3fff0);
-			c->lea(*qw1, x86::qword_ptr(*cpu, *qw1, 0, ::offset32(&SPUThread::stack_mirror)));
+			c->lea(*qw1, x86::qword_ptr(*cpu, *qw1, 0, ::offset32(&spu_thread::stack_mirror)));
 			c->lea(x86::r10, x86::qword_ptr(ret));
 			c->mov(x86::qword_ptr(*qw1, 0), x86::r10);
 			c->mov(x86::qword_ptr(*qw1, 8), target);
@@ -1365,7 +1365,7 @@ void spu_recompiler::branch_set_link(u32 target)
 				c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3));
 				c->and_(qw1->r32(), 0x3fff0);
 				c->pcmpeqd(x86::xmm0, x86::xmm0);
-				c->movdqa(x86::dqword_ptr(*cpu, *qw1, 0, ::offset32(&SPUThread::stack_mirror)), x86::xmm0);
+				c->movdqa(x86::dqword_ptr(*cpu, *qw1, 0, ::offset32(&spu_thread::stack_mirror)), x86::xmm0);
 				c->jmp(target);
 			});
 		}
@@ -1374,7 +1374,7 @@ void spu_recompiler::branch_set_link(u32 target)
 
 void spu_recompiler::fall(spu_opcode_t op)
 {
-	auto gate = [](SPUThread* _spu, u32 opcode, spu_inter_func_t _func, spu_function_t _ret)
+	auto gate = [](spu_thread* _spu, u32 opcode, spu_inter_func_t _func, spu_function_t _ret)
 	{
 		if (!_func(*_spu, {opcode}))
 		{
@@ -1391,7 +1391,7 @@ void spu_recompiler::fall(spu_opcode_t op)
 	c->mov(*ls, op.opcode);
 	c->mov(*qw0, asmjit::imm_ptr(asmjit::Internal::ptr_cast<void*>(g_spu_interpreter_fast.decode(op.opcode))));
 	c->lea(*qw1, asmjit::x86::qword_ptr(next));
-	c->jmp(asmjit::imm_ptr<void(*)(SPUThread*, u32, spu_inter_func_t, spu_function_t)>(gate));
+	c->jmp(asmjit::imm_ptr<void(*)(spu_thread*, u32, spu_inter_func_t, spu_function_t)>(gate));
 	c->align(asmjit::kAlignCode, 16);
 	c->bind(next);
 }
@@ -1442,13 +1442,13 @@ void spu_recompiler::get_events()
 
 		if (utils::has_avx())
 		{
-			c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&SPUThread::rdata) + 0));
+			c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&spu_thread::rdata) + 0));
 			c->vxorps(x86::ymm1, x86::ymm0, x86::yword_ptr(*qw0, *addr, 0, 0));
-			c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&SPUThread::rdata) + 32));
+			c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&spu_thread::rdata) + 32));
 			c->vxorps(x86::ymm2, x86::ymm0, x86::yword_ptr(*qw0, *addr, 0, 32));
-			c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&SPUThread::rdata) + 64));
+			c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&spu_thread::rdata) + 64));
 			c->vxorps(x86::ymm3, x86::ymm0, x86::yword_ptr(*qw0, *addr, 0, 64));
-			c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&SPUThread::rdata) + 96));
+			c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&spu_thread::rdata) + 96));
 			c->vxorps(x86::ymm4, x86::ymm0, x86::yword_ptr(*qw0, *addr, 0, 96));
 			c->vorps(x86::ymm0, x86::ymm1, x86::ymm2);
 			c->vorps(x86::ymm1, x86::ymm3, x86::ymm4);
@@ -1460,11 +1460,11 @@ void spu_recompiler::get_events()
 		else
 		{
 			c->movaps(x86::xmm0, x86::dqword_ptr(*qw0, *addr));
-			c->xorps(x86::xmm0, x86::dqword_ptr(*cpu, offset32(&SPUThread::rdata) + 0));
+			c->xorps(x86::xmm0, x86::dqword_ptr(*cpu, offset32(&spu_thread::rdata) + 0));
 			for (u32 i = 16; i < 128; i += 16)
 			{
 				c->movaps(x86::xmm1, x86::dqword_ptr(*qw0, *addr, 0, i));
-				c->xorps(x86::xmm1, x86::dqword_ptr(*cpu, offset32(&SPUThread::rdata) + i));
+				c->xorps(x86::xmm1, x86::dqword_ptr(*cpu, offset32(&spu_thread::rdata) + i));
 				c->orps(x86::xmm0, x86::xmm1);
 			}
 
@@ -1495,7 +1495,7 @@ void spu_recompiler::get_events()
 	// Check decrementer event (unlikely)
 	after.emplace_back([=]
 	{
-		auto sub = [](SPUThread* _spu, spu_function_t _ret)
+		auto sub = [](spu_thread* _spu, spu_function_t _ret)
 		{
 			if ((_spu->ch_dec_value - (get_timebased_time() - _spu->ch_dec_start_timestamp)) >> 31)
 			{
@@ -1508,7 +1508,7 @@ void spu_recompiler::get_events()
 
 		c->bind(tcheck);
 		c->lea(*ls, x86::qword_ptr(label2));
-		c->jmp(imm_ptr<void(*)(SPUThread*, spu_function_t)>(sub));
+		c->jmp(imm_ptr<void(*)(spu_thread*, spu_function_t)>(sub));
 	});
 
 	// Check whether SPU_EVENT_TM is already set
@@ -1527,13 +1527,13 @@ void spu_recompiler::get_events()
 
 	after.emplace_back([=]
 	{
-		auto _throw = [](SPUThread* _spu)
+		auto _throw = [](spu_thread* _spu)
 		{
 			fmt::throw_exception("SPU Events not implemented (mask=0x%x)" HERE, +_spu->ch_event_mask);
 		};
 
 		c->bind(fail);
-		c->jmp(imm_ptr<void(*)(SPUThread*)>(_throw));
+		c->jmp(imm_ptr<void(*)(spu_thread*)>(_throw));
 	});
 
 	// Load active events into addr
@@ -1547,18 +1547,18 @@ void spu_recompiler::get_events()
 
 void spu_recompiler::UNK(spu_opcode_t op)
 {
-	auto gate = [](SPUThread* _spu, u32 op)
+	auto gate = [](spu_thread* _spu, u32 op)
 	{
 		fmt::throw_exception("Unknown/Illegal instruction (0x%08x)" HERE, op);
 	};
 
 	c->mov(SPU_OFF_32(pc), m_pos);
 	c->mov(*ls, op.opcode);
-	c->jmp(asmjit::imm_ptr<void(*)(SPUThread*, u32)>(gate));
+	c->jmp(asmjit::imm_ptr<void(*)(spu_thread*, u32)>(gate));
 	m_pos = -1;
 }
 
-void spu_stop(SPUThread* _spu, u32 code, spu_function_t _ret)
+void spu_stop(spu_thread* _spu, u32 code, spu_function_t _ret)
 {
 	if (!_spu->stop_and_signal(code))
 	{
@@ -1619,12 +1619,12 @@ void spu_recompiler::MFSPR(spu_opcode_t op)
 	c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
 }
 
-static void spu_rdch_ret(SPUThread& spu, void*, u32)
+static void spu_rdch_ret(spu_thread& spu, void*, u32)
 {
 	// MSVC workaround (TCO)
 }
 
-static void spu_rdch(SPUThread* _spu, u32 ch, void(*_ret)(SPUThread&, void*, u32))
+static void spu_rdch(spu_thread* _spu, u32 ch, void(*_ret)(spu_thread&, void*, u32))
 {
 	const s64 result = _spu->get_ch_value(ch);
 
@@ -1733,7 +1733,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
 	{
 		LOG_WARNING(SPU, "[0x%x] RDCH: RdDec", m_pos);
 
-		auto sub1 = [](SPUThread* _spu, v128* _res, spu_function_t _ret)
+		auto sub1 = [](spu_thread* _spu, v128* _res, spu_function_t _ret)
 		{
 			const u32 out = _spu->ch_dec_value - static_cast<u32>(get_timebased_time() - _spu->ch_dec_start_timestamp);
 
@@ -1744,7 +1744,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
 			_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
 		};
 
-		auto sub2 = [](SPUThread* _spu, v128* _res, spu_function_t _ret)
+		auto sub2 = [](spu_thread* _spu, v128* _res, spu_function_t _ret)
 		{
 			const u32 out = _spu->ch_dec_value - static_cast<u32>(get_timebased_time() - _spu->ch_dec_start_timestamp);
 
@@ -1752,7 +1752,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
 			_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
 		};
 
-		using ftype = void (*)(SPUThread*, v128*, spu_function_t);
+		using ftype = void (*)(spu_thread*, v128*, spu_function_t);
 
 		asmjit::Label next = c->newLabel();
 		c->mov(SPU_OFF_32(pc), m_pos);
@@ -1817,7 +1817,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
 	c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0);
 }
 
-static void spu_rchcnt(SPUThread* _spu, u32 ch, void(*_ret)(SPUThread&, void*, u32 res))
+static void spu_rchcnt(spu_thread* _spu, u32 ch, void(*_ret)(spu_thread&, void*, u32 res))
 {
 	// Put result into the third argument
 	const u32 res = _spu->get_ch_count(ch);
@@ -2565,12 +2565,12 @@ void spu_recompiler::MTSPR(spu_opcode_t op)
 	// Check SPUInterpreter for notes.
 }
 
-static void spu_wrch_ret(SPUThread& _spu, void*, u8*)
+static void spu_wrch_ret(spu_thread& _spu, void*, u8*)
 {
 	// MSVC workaround (TCO)
 }
 
-static void spu_wrch(SPUThread* _spu, u32 ch, u32 value, spu_function_t _ret)
+static void spu_wrch(spu_thread* _spu, u32 ch, u32 value, spu_function_t _ret)
 {
 	if (!_spu->set_ch_value(ch, value))
 	{
@@ -2580,7 +2580,7 @@ static void spu_wrch(SPUThread* _spu, u32 ch, u32 value, spu_function_t _ret)
 	_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
 }
 
-static void spu_wrch_mfc(SPUThread* _spu, spu_function_t _ret)
+static void spu_wrch_mfc(spu_thread* _spu, spu_function_t _ret)
 {
 	if (!_spu->process_mfc_cmd(_spu->ch_mfc_cmd))
 	{
@@ -2744,7 +2744,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
 	}
 	case MFC_WrListStallAck:
 	{
-		auto sub = [](SPUThread* _spu, spu_function_t _ret)
+		auto sub = [](spu_thread* _spu, spu_function_t _ret)
 		{
 			_spu->do_mfc(true);
 			_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
@@ -2756,14 +2756,14 @@ void spu_recompiler::WRCH(spu_opcode_t op)
 		c->btr(SPU_OFF_32(ch_stall_mask), qw0->r32());
 		c->jnc(ret);
 		c->lea(*ls, x86::qword_ptr(ret));
-		c->jmp(imm_ptr<void(*)(SPUThread*, spu_function_t)>(sub));
+		c->jmp(imm_ptr<void(*)(spu_thread*, spu_function_t)>(sub));
 		c->align(kAlignCode, 16);
 		c->bind(ret);
 		return;
 	}
 	case SPU_WrDec:
 	{
-		auto sub = [](SPUThread* _spu, spu_function_t _ret)
+		auto sub = [](spu_thread* _spu, spu_function_t _ret)
 		{
 			_spu->ch_dec_start_timestamp = get_timebased_time();
 			_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
@@ -2771,7 +2771,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
 
 		Label ret = c->newLabel();
 		c->lea(*ls, x86::qword_ptr(ret));
-		c->jmp(imm_ptr<void(*)(SPUThread*, spu_function_t)>(sub));
+		c->jmp(imm_ptr<void(*)(spu_thread*, spu_function_t)>(sub));
 		c->align(kAlignCode, 16);
 		c->bind(ret);
 		c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
@@ -3113,7 +3113,7 @@ void spu_recompiler::CBX(spu_opcode_t op)
 	const XmmLink& vr = XmmAlloc();
 	c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
 	c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
-	c->mov(asmjit::x86::byte_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), 0x03);
+	c->mov(asmjit::x86::byte_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), 0x03);
 }
 
 void spu_recompiler::CHX(spu_opcode_t op)
@@ -3126,7 +3126,7 @@ void spu_recompiler::CHX(spu_opcode_t op)
 	const XmmLink& vr = XmmAlloc();
 	c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
 	c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
-	c->mov(asmjit::x86::word_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), 0x0203);
+	c->mov(asmjit::x86::word_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), 0x0203);
 }
 
 void spu_recompiler::CWX(spu_opcode_t op)
@@ -3139,7 +3139,7 @@ void spu_recompiler::CWX(spu_opcode_t op)
 	const XmmLink& vr = XmmAlloc();
 	c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
 	c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
-	c->mov(asmjit::x86::dword_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), 0x00010203);
+	c->mov(asmjit::x86::dword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), 0x00010203);
 }
 
 void spu_recompiler::CDX(spu_opcode_t op)
@@ -3153,7 +3153,7 @@ void spu_recompiler::CDX(spu_opcode_t op)
 	c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
 	c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
 	c->mov(*qw0, asmjit::imm_u(0x0001020304050607));
-	c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), *qw0);
+	c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), *qw0);
 }
 
 void spu_recompiler::ROTQBI(spu_opcode_t op)
@@ -3292,7 +3292,7 @@ void spu_recompiler::CBD(spu_opcode_t op)
 	const XmmLink& vr = XmmAlloc();
 	c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
 	c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
-	c->mov(asmjit::x86::byte_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), 0x03);
+	c->mov(asmjit::x86::byte_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), 0x03);
 }
 
 void spu_recompiler::CHD(spu_opcode_t op)
@@ -3316,7 +3316,7 @@ void spu_recompiler::CHD(spu_opcode_t op)
 	const XmmLink& vr = XmmAlloc();
 	c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
 	c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
-	c->mov(asmjit::x86::word_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), 0x0203);
+	c->mov(asmjit::x86::word_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), 0x0203);
 }
 
 void spu_recompiler::CWD(spu_opcode_t op)
@@ -3340,7 +3340,7 @@ void spu_recompiler::CWD(spu_opcode_t op)
 	const XmmLink& vr = XmmAlloc();
 	c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
 	c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
-	c->mov(asmjit::x86::dword_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), 0x00010203);
+	c->mov(asmjit::x86::dword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), 0x00010203);
 }
 
 void spu_recompiler::CDD(spu_opcode_t op)
@@ -3365,7 +3365,7 @@ void spu_recompiler::CDD(spu_opcode_t op)
 	c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
 	c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
 	c->mov(*qw0, asmjit::imm_u(0x0001020304050607));
-	c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), *qw0);
+	c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), *qw0);
 }
 
 void spu_recompiler::ROTQBII(spu_opcode_t op)
diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp
index 6f5a6e0355..62d918a111 100644
--- a/rpcs3/Emu/Cell/SPUInterpreter.cpp
+++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp
@@ -60,7 +60,7 @@ namespace asmjit
 			c.shl(x86::eax, I + 4);
 		}
 
-		const auto ptr = x86::oword_ptr(spu, x86::rax, 0, ::offset32(&SPUThread::gpr));
+		const auto ptr = x86::oword_ptr(spu, x86::rax, 0, offsetof(spu_thread, gpr));
 
 		if (utils::has_avx())
 		{
@@ -85,13 +85,13 @@ namespace asmjit
 	}
 }
 
-bool spu_interpreter::UNK(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::UNK(spu_thread& spu, spu_opcode_t op)
 {
 	fmt::throw_exception("Unknown/Illegal instruction (0x%08x)" HERE, op.opcode);
 }
 
 
-void spu_interpreter::set_interrupt_status(SPUThread& spu, spu_opcode_t op)
+void spu_interpreter::set_interrupt_status(spu_thread& spu, spu_opcode_t op)
 {
 	if (op.e)
 	{
@@ -115,37 +115,37 @@ void spu_interpreter::set_interrupt_status(SPUThread& spu, spu_opcode_t op)
 }
 
 
-bool spu_interpreter::STOP(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::STOP(spu_thread& spu, spu_opcode_t op)
 {
 	return spu.stop_and_signal(op.opcode & 0x3fff);
 }
 
-bool spu_interpreter::LNOP(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::LNOP(spu_thread& spu, spu_opcode_t op)
 {
 	return true;
 }
 
 // This instruction must be used following a store instruction that modifies the instruction stream.
-bool spu_interpreter::SYNC(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::SYNC(spu_thread& spu, spu_opcode_t op)
 {
 	_mm_mfence();
 	return true;
 }
 
 // This instruction forces all earlier load, store, and channel instructions to complete before proceeding.
-bool spu_interpreter::DSYNC(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::DSYNC(spu_thread& spu, spu_opcode_t op)
 {
 	_mm_mfence();
 	return true;
 }
 
-bool spu_interpreter::MFSPR(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::MFSPR(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].clear(); // All SPRs read as zero. TODO: check it.
 	return true;
 }
 
-bool spu_interpreter::RDCH(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::RDCH(spu_thread& spu, spu_opcode_t op)
 {
 	const s64 result = spu.get_ch_value(op.ra);
 
@@ -158,43 +158,43 @@ bool spu_interpreter::RDCH(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::RCHCNT(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::RCHCNT(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = v128::from32r(spu.get_ch_count(op.ra));
 	return true;
 }
 
-bool spu_interpreter::SF(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::SF(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = v128::sub32(spu.gpr[op.rb], spu.gpr[op.ra]);
 	return true;
 }
 
-bool spu_interpreter::OR(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::OR(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = spu.gpr[op.ra] | spu.gpr[op.rb];
 	return true;
 }
 
-bool spu_interpreter::BG(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::BG(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_add_epi32(sse_cmpgt_epu32(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi), _mm_set1_epi32(1));
 	return true;
 }
 
-bool spu_interpreter::SFH(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::SFH(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = v128::sub16(spu.gpr[op.rb], spu.gpr[op.ra]);
 	return true;
 }
 
-bool spu_interpreter::NOR(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::NOR(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = ~(spu.gpr[op.ra] | spu.gpr[op.rb]);
 	return true;
 }
 
-bool spu_interpreter::ABSDB(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ABSDB(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra];
 	const auto b = spu.gpr[op.rb];
@@ -202,7 +202,7 @@ bool spu_interpreter::ABSDB(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ROT(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROT(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra];
 	const auto b = spu.gpr[op.rb];
@@ -214,7 +214,7 @@ bool spu_interpreter::ROT(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ROTM(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTM(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra];
 	const auto b = spu.gpr[op.rb];
@@ -227,7 +227,7 @@ bool spu_interpreter::ROTM(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ROTMA(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTMA(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra];
 	const auto b = spu.gpr[op.rb];
@@ -240,7 +240,7 @@ bool spu_interpreter::ROTMA(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::SHL(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::SHL(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra];
 	const auto b = spu.gpr[op.rb];
@@ -253,7 +253,7 @@ bool spu_interpreter::SHL(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ROTH(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTH(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra];
 	const auto b = spu.gpr[op.rb];
@@ -265,7 +265,7 @@ bool spu_interpreter::ROTH(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ROTHM(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTHM(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra];
 	const auto b = spu.gpr[op.rb];
@@ -278,7 +278,7 @@ bool spu_interpreter::ROTHM(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ROTMAH(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTMAH(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra];
 	const auto b = spu.gpr[op.rb];
@@ -291,7 +291,7 @@ bool spu_interpreter::ROTMAH(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::SHLH(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::SHLH(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra];
 	const auto b = spu.gpr[op.rb];
@@ -304,7 +304,7 @@ bool spu_interpreter::SHLH(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ROTI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTI(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	const s32 n = op.i7 & 0x1f;
@@ -312,25 +312,25 @@ bool spu_interpreter::ROTI(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ROTMI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTMI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_srli_epi32(spu.gpr[op.ra].vi, 0-op.i7 & 0x3f);
 	return true;
 }
 
-bool spu_interpreter::ROTMAI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTMAI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_srai_epi32(spu.gpr[op.ra].vi, 0-op.i7 & 0x3f);
 	return true;
 }
 
-bool spu_interpreter::SHLI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::SHLI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_slli_epi32(spu.gpr[op.ra].vi, op.i7 & 0x3f);
 	return true;
 }
 
-bool spu_interpreter::ROTHI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTHI(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	const s32 n = op.i7 & 0xf;
@@ -338,37 +338,37 @@ bool spu_interpreter::ROTHI(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ROTHMI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTHMI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_srli_epi16(spu.gpr[op.ra].vi, 0-op.i7 & 0x1f);
 	return true;
 }
 
-bool spu_interpreter::ROTMAHI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTMAHI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_srai_epi16(spu.gpr[op.ra].vi, 0-op.i7 & 0x1f);
 	return true;
 }
 
-bool spu_interpreter::SHLHI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::SHLHI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_slli_epi16(spu.gpr[op.ra].vi, op.i7 & 0x1f);
 	return true;
 }
 
-bool spu_interpreter::A(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::A(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = v128::add32(spu.gpr[op.ra], spu.gpr[op.rb]);
 	return true;
 }
 
-bool spu_interpreter::AND(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::AND(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = spu.gpr[op.ra] & spu.gpr[op.rb];
 	return true;
 }
 
-bool spu_interpreter::CG(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CG(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = _mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(0x7fffffff));
 	const auto b = _mm_xor_si128(spu.gpr[op.rb].vi, _mm_set1_epi32(0x80000000));
@@ -376,36 +376,36 @@ bool spu_interpreter::CG(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::AH(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::AH(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = v128::add16(spu.gpr[op.ra], spu.gpr[op.rb]);
 	return true;
 }
 
-bool spu_interpreter::NAND(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::NAND(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = ~(spu.gpr[op.ra] & spu.gpr[op.rb]);
 	return true;
 }
 
-bool spu_interpreter::AVGB(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::AVGB(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_avg_epu8(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi);
 	return true;
 }
 
-bool spu_interpreter::MTSPR(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::MTSPR(spu_thread& spu, spu_opcode_t op)
 {
 	// SPR writes are ignored. TODO: check it.
 	return true;
 }
 
-bool spu_interpreter::WRCH(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::WRCH(spu_thread& spu, spu_opcode_t op)
 {
 	return spu.set_ch_value(op.ra, spu.gpr[op.rt]._u32[3]);
 }
 
-bool spu_interpreter::BIZ(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::BIZ(spu_thread& spu, spu_opcode_t op)
 {
 	if (spu.gpr[op.rt]._u32[3] == 0)
 	{
@@ -416,7 +416,7 @@ bool spu_interpreter::BIZ(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::BINZ(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::BINZ(spu_thread& spu, spu_opcode_t op)
 {
 	if (spu.gpr[op.rt]._u32[3] != 0)
 	{
@@ -427,7 +427,7 @@ bool spu_interpreter::BINZ(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::BIHZ(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::BIHZ(spu_thread& spu, spu_opcode_t op)
 {
 	if (spu.gpr[op.rt]._u16[6] == 0)
 	{
@@ -438,7 +438,7 @@ bool spu_interpreter::BIHZ(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::BIHNZ(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::BIHNZ(spu_thread& spu, spu_opcode_t op)
 {
 	if (spu.gpr[op.rt]._u16[6] != 0)
 	{
@@ -449,25 +449,25 @@ bool spu_interpreter::BIHNZ(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::STOPD(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::STOPD(spu_thread& spu, spu_opcode_t op)
 {
 	return spu.stop_and_signal(0x3fff);
 }
 
-bool spu_interpreter::STQX(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::STQX(spu_thread& spu, spu_opcode_t op)
 {
 	spu._ref<v128>((spu.gpr[op.ra]._u32[3] + spu.gpr[op.rb]._u32[3]) & 0x3fff0) = spu.gpr[op.rt];
 	return true;
 }
 
-bool spu_interpreter::BI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::BI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]);
 	set_interrupt_status(spu, op);
 	return false;
 }
 
-bool spu_interpreter::BISL(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::BISL(spu_thread& spu, spu_opcode_t op)
 {
 	const u32 target = spu_branch_target(spu.gpr[op.ra]._u32[3]);
 	spu.gpr[op.rt] = v128::from32r(spu_branch_target(spu.pc + 4));
@@ -476,43 +476,43 @@ bool spu_interpreter::BISL(SPUThread& spu, spu_opcode_t op)
 	return false;
 }
 
-bool spu_interpreter::IRET(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::IRET(spu_thread& spu, spu_opcode_t op)
 {
 	spu.pc = spu_branch_target(spu.srr0);
 	set_interrupt_status(spu, op);
 	return false;
 }
 
-bool spu_interpreter::BISLED(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::BISLED(spu_thread& spu, spu_opcode_t op)
 {
 	fmt::throw_exception("Unimplemented instruction" HERE);
 	return true;
 }
 
-bool spu_interpreter::HBR(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::HBR(spu_thread& spu, spu_opcode_t op)
 {
 	return true;
 }
 
-bool spu_interpreter::GB(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::GB(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = v128::from32r(_mm_movemask_ps(_mm_castsi128_ps(_mm_slli_epi32(spu.gpr[op.ra].vi, 31))));
 	return true;
 }
 
-bool spu_interpreter::GBH(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::GBH(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = v128::from32r(_mm_movemask_epi8(_mm_packs_epi16(_mm_slli_epi16(spu.gpr[op.ra].vi, 15), _mm_setzero_si128())));
 	return true;
 }
 
-bool spu_interpreter::GBB(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::GBB(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = v128::from32r(_mm_movemask_epi8(_mm_slli_epi64(spu.gpr[op.ra].vi, 7)));
 	return true;
 }
 
-bool spu_interpreter::FSM(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::FSM(spu_thread& spu, spu_opcode_t op)
 {
 	const auto bits = _mm_shuffle_epi32(spu.gpr[op.ra].vi, 0xff);
 	const auto mask = _mm_set_epi32(8, 4, 2, 1);
@@ -520,7 +520,7 @@ bool spu_interpreter::FSM(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::FSMH(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::FSMH(spu_thread& spu, spu_opcode_t op)
 {
 	const auto vsrc = spu.gpr[op.ra].vi;
 	const auto bits = _mm_shuffle_epi32(_mm_unpackhi_epi16(vsrc, vsrc), 0xaa);
@@ -529,7 +529,7 @@ bool spu_interpreter::FSMH(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::FSMB(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::FSMB(spu_thread& spu, spu_opcode_t op)
 {
 	const auto vsrc = spu.gpr[op.ra].vi;
 	const auto bits = _mm_shuffle_epi32(_mm_shufflehi_epi16(_mm_unpackhi_epi8(vsrc, vsrc), 0x50), 0xfa);
@@ -538,26 +538,26 @@ bool spu_interpreter::FSMB(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter_fast::FREST(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::FREST(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vf = _mm_rcp_ps(spu.gpr[op.ra].vf);
 	return true;
 }
 
-bool spu_interpreter_fast::FRSQEST(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::FRSQEST(spu_thread& spu, spu_opcode_t op)
 {
 	const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
 	spu.gpr[op.rt].vf = _mm_rsqrt_ps(_mm_and_ps(spu.gpr[op.ra].vf, mask));
 	return true;
 }
 
-bool spu_interpreter::LQX(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::LQX(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = spu._ref<v128>((spu.gpr[op.ra]._u32[3] + spu.gpr[op.rb]._u32[3]) & 0x3fff0);
 	return true;
 }
 
-bool spu_interpreter::ROTQBYBI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTQBYBI(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	alignas(32) const __m128i buf[2]{a, a};
@@ -565,7 +565,7 @@ bool spu_interpreter::ROTQBYBI(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ROTQMBYBI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTQMBYBI(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	alignas(64) const __m128i buf[3]{a, _mm_setzero_si128(), _mm_setzero_si128()};
@@ -573,7 +573,7 @@ bool spu_interpreter::ROTQMBYBI(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::SHLQBYBI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::SHLQBYBI(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	alignas(64) const __m128i buf[3]{_mm_setzero_si128(), _mm_setzero_si128(), a};
@@ -581,7 +581,7 @@ bool spu_interpreter::SHLQBYBI(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::CBX(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CBX(spu_thread& spu, spu_opcode_t op)
 {
 	if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF))
 	{
@@ -594,7 +594,7 @@ bool spu_interpreter::CBX(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::CHX(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CHX(spu_thread& spu, spu_opcode_t op)
 {
 	if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF))
 	{
@@ -607,7 +607,7 @@ bool spu_interpreter::CHX(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::CWX(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CWX(spu_thread& spu, spu_opcode_t op)
 {
 	if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF))
 	{
@@ -620,7 +620,7 @@ bool spu_interpreter::CWX(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::CDX(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CDX(spu_thread& spu, spu_opcode_t op)
 {
 	if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF))
 	{
@@ -633,7 +633,7 @@ bool spu_interpreter::CDX(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ROTQBI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTQBI(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	const s32 n = spu.gpr[op.rb]._s32[3] & 0x7;
@@ -641,7 +641,7 @@ bool spu_interpreter::ROTQBI(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ROTQMBI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTQMBI(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	const s32 n = -spu.gpr[op.rb]._s32[3] & 0x7;
@@ -649,7 +649,7 @@ bool spu_interpreter::ROTQMBI(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::SHLQBI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::SHLQBI(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	const s32 n = spu.gpr[op.rb]._u32[3] & 0x7;
@@ -657,7 +657,7 @@ bool spu_interpreter::SHLQBI(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ROTQBY(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTQBY(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	alignas(32) const __m128i buf[2]{a, a};
@@ -665,7 +665,7 @@ bool spu_interpreter::ROTQBY(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ROTQMBY(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTQMBY(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	alignas(64) const __m128i buf[3]{a, _mm_setzero_si128(), _mm_setzero_si128()};
@@ -673,7 +673,7 @@ bool spu_interpreter::ROTQMBY(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::SHLQBY(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::SHLQBY(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	alignas(64) const __m128i buf[3]{_mm_setzero_si128(), _mm_setzero_si128(), a};
@@ -681,13 +681,13 @@ bool spu_interpreter::SHLQBY(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ORX(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ORX(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = v128::from32r(spu.gpr[op.ra]._u32[0] | spu.gpr[op.ra]._u32[1] | spu.gpr[op.ra]._u32[2] | spu.gpr[op.ra]._u32[3]);
 	return true;
 }
 
-bool spu_interpreter::CBD(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CBD(spu_thread& spu, spu_opcode_t op)
 {
 	if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF))
 	{
@@ -700,7 +700,7 @@ bool spu_interpreter::CBD(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::CHD(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CHD(spu_thread& spu, spu_opcode_t op)
 {
 	if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF))
 	{
@@ -713,7 +713,7 @@ bool spu_interpreter::CHD(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::CWD(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CWD(spu_thread& spu, spu_opcode_t op)
 {
 	if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF))
 	{
@@ -726,7 +726,7 @@ bool spu_interpreter::CWD(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::CDD(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CDD(spu_thread& spu, spu_opcode_t op)
 {
 	if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF))
 	{
@@ -739,7 +739,7 @@ bool spu_interpreter::CDD(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ROTQBII(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTQBII(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	const s32 n = op.i7 & 0x7;
@@ -747,7 +747,7 @@ bool spu_interpreter::ROTQBII(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ROTQMBII(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTQMBII(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	const s32 n = 0-op.i7 & 0x7;
@@ -755,7 +755,7 @@ bool spu_interpreter::ROTQMBII(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::SHLQBII(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::SHLQBII(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	const s32 n = op.i7 & 0x7;
@@ -763,7 +763,7 @@ bool spu_interpreter::SHLQBII(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ROTQBYI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTQBYI(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	alignas(32) const __m128i buf[2]{a, a};
@@ -771,7 +771,7 @@ bool spu_interpreter::ROTQBYI(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ROTQMBYI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ROTQMBYI(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	alignas(64) const __m128i buf[3]{a, _mm_setzero_si128(), _mm_setzero_si128()};
@@ -779,7 +779,7 @@ bool spu_interpreter::ROTQMBYI(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::SHLQBYI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::SHLQBYI(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	alignas(64) const __m128i buf[3]{_mm_setzero_si128(), _mm_setzero_si128(), a};
@@ -787,42 +787,42 @@ bool spu_interpreter::SHLQBYI(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::NOP(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::NOP(spu_thread& spu, spu_opcode_t op)
 {
 	return true;
 }
 
-bool spu_interpreter::CGT(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CGT(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_cmpgt_epi32(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi);
 	return true;
 }
 
-bool spu_interpreter::XOR(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::XOR(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = spu.gpr[op.ra] ^ spu.gpr[op.rb];
 	return true;
 }
 
-bool spu_interpreter::CGTH(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CGTH(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_cmpgt_epi16(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi);
 	return true;
 }
 
-bool spu_interpreter::EQV(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::EQV(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = ~(spu.gpr[op.ra] ^ spu.gpr[op.rb]);
 	return true;
 }
 
-bool spu_interpreter::CGTB(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CGTB(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_cmpgt_epi8(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi);
 	return true;
 }
 
-bool spu_interpreter::SUMB(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::SUMB(spu_thread& spu, spu_opcode_t op)
 {
 	const auto m1 = _mm_set1_epi16(0xff);
 	const auto m2 = _mm_set1_epi32(0xffff);
@@ -842,7 +842,7 @@ bool spu_interpreter::SUMB(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::HGT(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::HGT(spu_thread& spu, spu_opcode_t op)
 {
 	if (spu.gpr[op.ra]._s32[3] > spu.gpr[op.rb]._s32[3])
 	{
@@ -851,7 +851,7 @@ bool spu_interpreter::HGT(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::CLZ(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CLZ(spu_thread& spu, spu_opcode_t op)
 {
 	for (u32 i = 0; i < 4; i++)
 	{
@@ -860,20 +860,20 @@ bool spu_interpreter::CLZ(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::XSWD(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::XSWD(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt]._s64[0] = spu.gpr[op.ra]._s32[0];
 	spu.gpr[op.rt]._s64[1] = spu.gpr[op.ra]._s32[2];
 	return true;
 }
 
-bool spu_interpreter::XSHW(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::XSHW(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_srai_epi32(_mm_slli_epi32(spu.gpr[op.ra].vi, 16), 16);
 	return true;
 }
 
-bool spu_interpreter::CNTB(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CNTB(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	const auto mask1 = _mm_set1_epi8(0x55);
@@ -886,25 +886,25 @@ bool spu_interpreter::CNTB(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::XSBH(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::XSBH(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_srai_epi16(_mm_slli_epi16(spu.gpr[op.ra].vi, 8), 8);
 	return true;
 }
 
-bool spu_interpreter::CLGT(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CLGT(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = sse_cmpgt_epu32(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi);
 	return true;
 }
 
-bool spu_interpreter::ANDC(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ANDC(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = v128::andnot(spu.gpr[op.rb], spu.gpr[op.ra]);
 	return true;
 }
 
-bool spu_interpreter_fast::FCGT(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::FCGT(spu_thread& spu, spu_opcode_t op)
 {
 	// IMPL NOTES:
 	// if (v is inf) v = (inf - 1) i.e nearest normal value to inf with mantissa bits left intact
@@ -943,25 +943,25 @@ bool spu_interpreter_fast::FCGT(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::DFCGT(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::DFCGT(spu_thread& spu, spu_opcode_t op)
 {
 	fmt::throw_exception("Unexpected instruction" HERE);
 	return true;
 }
 
-bool spu_interpreter_fast::FA(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::FA(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = v128::addfs(spu.gpr[op.ra], spu.gpr[op.rb]);
 	return true;
 }
 
-bool spu_interpreter_fast::FS(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::FS(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = v128::subfs(spu.gpr[op.ra], spu.gpr[op.rb]);
 	return true;
 }
 
-bool spu_interpreter_fast::FM(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::FM(spu_thread& spu, spu_opcode_t op)
 {
 	const auto zero = _mm_set1_ps(0.f);
 	const auto sign_bits = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
@@ -991,19 +991,19 @@ bool spu_interpreter_fast::FM(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::CLGTH(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CLGTH(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = sse_cmpgt_epu16(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi);
 	return true;
 }
 
-bool spu_interpreter::ORC(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ORC(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = spu.gpr[op.ra] | ~spu.gpr[op.rb];
 	return true;
 }
 
-bool spu_interpreter_fast::FCMGT(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::FCMGT(spu_thread& spu, spu_opcode_t op)
 {
 	//IMPL NOTES: See FCGT
 
@@ -1030,7 +1030,7 @@ bool spu_interpreter_fast::FCMGT(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::DFCMGT(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::DFCMGT(spu_thread& spu, spu_opcode_t op)
 {
 	const auto mask = _mm_castsi128_pd(_mm_set1_epi64x(0x7fffffffffffffff));
 	const auto ra = _mm_and_pd(spu.gpr[op.ra].vd, mask);
@@ -1039,31 +1039,31 @@ bool spu_interpreter::DFCMGT(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter_fast::DFA(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::DFA(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = v128::addfd(spu.gpr[op.ra], spu.gpr[op.rb]);
 	return true;
 }
 
-bool spu_interpreter_fast::DFS(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::DFS(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = v128::subfd(spu.gpr[op.ra], spu.gpr[op.rb]);
 	return true;
 }
 
-bool spu_interpreter_fast::DFM(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::DFM(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vd = _mm_mul_pd(spu.gpr[op.ra].vd, spu.gpr[op.rb].vd);
 	return true;
 }
 
-bool spu_interpreter::CLGTB(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CLGTB(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = sse_cmpgt_epu8(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi);
 	return true;
 }
 
-bool spu_interpreter::HLGT(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::HLGT(spu_thread& spu, spu_opcode_t op)
 {
 	if (spu.gpr[op.ra]._u32[3] > spu.gpr[op.rb]._u32[3])
 	{
@@ -1072,37 +1072,37 @@ bool spu_interpreter::HLGT(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter_fast::DFMA(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::DFMA(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vd = _mm_add_pd(_mm_mul_pd(spu.gpr[op.ra].vd, spu.gpr[op.rb].vd), spu.gpr[op.rt].vd);
 	return true;
 }
 
-bool spu_interpreter_fast::DFMS(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::DFMS(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vd = _mm_sub_pd(_mm_mul_pd(spu.gpr[op.ra].vd, spu.gpr[op.rb].vd), spu.gpr[op.rt].vd);
 	return true;
 }
 
-bool spu_interpreter_fast::DFNMS(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::DFNMS(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vd = _mm_sub_pd(spu.gpr[op.rt].vd, _mm_mul_pd(spu.gpr[op.ra].vd, spu.gpr[op.rb].vd));
 	return true;
 }
 
-bool spu_interpreter_fast::DFNMA(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::DFNMA(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vd = _mm_xor_pd(_mm_add_pd(_mm_mul_pd(spu.gpr[op.ra].vd, spu.gpr[op.rb].vd), spu.gpr[op.rt].vd), _mm_set1_pd(-0.0));
 	return true;
 }
 
-bool spu_interpreter::CEQ(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CEQ(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_cmpeq_epi32(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi);
 	return true;
 }
 
-bool spu_interpreter::MPYHHU(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::MPYHHU(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	const auto b = spu.gpr[op.rb].vi;
@@ -1110,19 +1110,19 @@ bool spu_interpreter::MPYHHU(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::ADDX(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ADDX(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = v128::add32(v128::add32(spu.gpr[op.ra], spu.gpr[op.rb]), spu.gpr[op.rt] & v128::from32p(1));
 	return true;
 }
 
-bool spu_interpreter::SFX(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::SFX(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = v128::sub32(v128::sub32(spu.gpr[op.rb], spu.gpr[op.ra]), v128::andnot(spu.gpr[op.rt], v128::from32p(1)));
 	return true;
 }
 
-bool spu_interpreter::CGX(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CGX(spu_thread& spu, spu_opcode_t op)
 {
 	for (s32 i = 0; i < 4; i++)
 	{
@@ -1132,7 +1132,7 @@ bool spu_interpreter::CGX(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::BGX(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::BGX(spu_thread& spu, spu_opcode_t op)
 {
 	for (s32 i = 0; i < 4; i++)
 	{
@@ -1142,13 +1142,13 @@ bool spu_interpreter::BGX(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::MPYHHA(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::MPYHHA(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_add_epi32(spu.gpr[op.rt].vi, _mm_madd_epi16(_mm_srli_epi32(spu.gpr[op.ra].vi, 16), _mm_srli_epi32(spu.gpr[op.rb].vi, 16)));
 	return true;
 }
 
-bool spu_interpreter::MPYHHAU(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::MPYHHAU(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	const auto b = spu.gpr[op.rb].vi;
@@ -1156,94 +1156,94 @@ bool spu_interpreter::MPYHHAU(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter_fast::FSCRRD(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::FSCRRD(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].clear();
 	return true;
 }
 
-bool spu_interpreter_fast::FESD(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::FESD(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vf;
 	spu.gpr[op.rt].vd = _mm_cvtps_pd(_mm_shuffle_ps(a, a, 0x8d));
 	return true;
 }
 
-bool spu_interpreter_fast::FRDS(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::FRDS(spu_thread& spu, spu_opcode_t op)
 {
 	const auto t = _mm_cvtpd_ps(spu.gpr[op.ra].vd);
 	spu.gpr[op.rt].vf = _mm_shuffle_ps(t, t, 0x72);
 	return true;
 }
 
-bool spu_interpreter_fast::FSCRWR(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::FSCRWR(spu_thread& spu, spu_opcode_t op)
 {
 	return true;
 }
 
-bool spu_interpreter::DFTSV(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::DFTSV(spu_thread& spu, spu_opcode_t op)
 {
 	fmt::throw_exception("Unexpected instruction" HERE);
 	return true;
 }
 
-bool spu_interpreter_fast::FCEQ(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::FCEQ(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vf = _mm_cmpeq_ps(spu.gpr[op.rb].vf, spu.gpr[op.ra].vf);
 	return true;
 }
 
-bool spu_interpreter::DFCEQ(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::DFCEQ(spu_thread& spu, spu_opcode_t op)
 {
 	fmt::throw_exception("Unexpected instruction" HERE);
 	return true;
 }
 
-bool spu_interpreter::MPY(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::MPY(spu_thread& spu, spu_opcode_t op)
 {
 	const auto mask = _mm_set1_epi32(0xffff);
 	spu.gpr[op.rt].vi = _mm_madd_epi16(_mm_and_si128(spu.gpr[op.ra].vi, mask), _mm_and_si128(spu.gpr[op.rb].vi, mask));
 	return true;
 }
 
-bool spu_interpreter::MPYH(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::MPYH(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_slli_epi32(_mm_mullo_epi16(_mm_srli_epi32(spu.gpr[op.ra].vi, 16), spu.gpr[op.rb].vi), 16);
 	return true;
 }
 
-bool spu_interpreter::MPYHH(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::MPYHH(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_madd_epi16(_mm_srli_epi32(spu.gpr[op.ra].vi, 16), _mm_srli_epi32(spu.gpr[op.rb].vi, 16));
 	return true;
 }
 
-bool spu_interpreter::MPYS(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::MPYS(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_srai_epi32(_mm_slli_epi32(_mm_mulhi_epi16(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi), 16), 16);
 	return true;
 }
 
-bool spu_interpreter::CEQH(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CEQH(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_cmpeq_epi16(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi);
 	return true;
 }
 
-bool spu_interpreter_fast::FCMEQ(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::FCMEQ(spu_thread& spu, spu_opcode_t op)
 {
 	const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
 	spu.gpr[op.rt].vf = _mm_cmpeq_ps(_mm_and_ps(spu.gpr[op.rb].vf, mask), _mm_and_ps(spu.gpr[op.ra].vf, mask));
 	return true;
 }
 
-bool spu_interpreter::DFCMEQ(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::DFCMEQ(spu_thread& spu, spu_opcode_t op)
 {
 	fmt::throw_exception("Unexpected instruction" HERE);
 	return true;
 }
 
-bool spu_interpreter::MPYU(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::MPYU(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	const auto b = spu.gpr[op.rb].vi;
@@ -1251,13 +1251,13 @@ bool spu_interpreter::MPYU(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::CEQB(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CEQB(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_cmpeq_epi8(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi);
 	return true;
 }
 
-bool spu_interpreter_fast::FI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::FI(spu_thread& spu, spu_opcode_t op)
 {
 	// TODO
 	const auto mask_se = _mm_castsi128_ps(_mm_set1_epi32(0xff800000)); // sign and exponent mask
@@ -1271,7 +1271,7 @@ bool spu_interpreter_fast::FI(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::HEQ(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::HEQ(spu_thread& spu, spu_opcode_t op)
 {
 	if (spu.gpr[op.ra]._s32[3] == spu.gpr[op.rb]._s32[3])
 	{
@@ -1281,14 +1281,14 @@ bool spu_interpreter::HEQ(SPUThread& spu, spu_opcode_t op)
 }
 
 
-bool spu_interpreter_fast::CFLTS(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::CFLTS(spu_thread& spu, spu_opcode_t op)
 {
 	const auto scaled = _mm_mul_ps(spu.gpr[op.ra].vf, g_spu_imm.scale[173 - op.i8]);
 	spu.gpr[op.rt].vi = _mm_xor_si128(_mm_cvttps_epi32(scaled), _mm_castps_si128(_mm_cmpge_ps(scaled, _mm_set1_ps(0x80000000))));
 	return true;
 }
 
-bool spu_interpreter_fast::CFLTU(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::CFLTU(spu_thread& spu, spu_opcode_t op)
 {
 	const auto scaled1 = _mm_max_ps(_mm_mul_ps(spu.gpr[op.ra].vf, g_spu_imm.scale[173 - op.i8]), _mm_set1_ps(0.0f));
 	const auto scaled2 = _mm_and_ps(_mm_sub_ps(scaled1, _mm_set1_ps(0x80000000)), _mm_cmpge_ps(scaled1, _mm_set1_ps(0x80000000)));
@@ -1296,13 +1296,13 @@ bool spu_interpreter_fast::CFLTU(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter_fast::CSFLT(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::CSFLT(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vf = _mm_mul_ps(_mm_cvtepi32_ps(spu.gpr[op.ra].vi), g_spu_imm.scale[op.i8 - 155]);
 	return true;
 }
 
-bool spu_interpreter_fast::CUFLT(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::CUFLT(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	const auto fix = _mm_and_ps(_mm_castsi128_ps(_mm_srai_epi32(a, 31)), _mm_set1_ps(0x80000000));
@@ -1311,7 +1311,7 @@ bool spu_interpreter_fast::CUFLT(SPUThread& spu, spu_opcode_t op)
 }
 
 
-bool spu_interpreter::BRZ(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::BRZ(spu_thread& spu, spu_opcode_t op)
 {
 	if (spu.gpr[op.rt]._u32[3] == 0)
 	{
@@ -1321,13 +1321,13 @@ bool spu_interpreter::BRZ(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::STQA(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::STQA(spu_thread& spu, spu_opcode_t op)
 {
 	spu._ref<v128>(spu_ls_target(0, op.i16)) = spu.gpr[op.rt];
 	return true;
 }
 
-bool spu_interpreter::BRNZ(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::BRNZ(spu_thread& spu, spu_opcode_t op)
 {
 	if (spu.gpr[op.rt]._u32[3] != 0)
 	{
@@ -1337,7 +1337,7 @@ bool spu_interpreter::BRNZ(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::BRHZ(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::BRHZ(spu_thread& spu, spu_opcode_t op)
 {
 	if (spu.gpr[op.rt]._u16[6] == 0)
 	{
@@ -1347,7 +1347,7 @@ bool spu_interpreter::BRHZ(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::BRHNZ(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::BRHNZ(spu_thread& spu, spu_opcode_t op)
 {
 	if (spu.gpr[op.rt]._u16[6] != 0)
 	{
@@ -1357,25 +1357,25 @@ bool spu_interpreter::BRHNZ(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::STQR(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::STQR(spu_thread& spu, spu_opcode_t op)
 {
 	spu._ref<v128>(spu_ls_target(spu.pc, op.i16)) = spu.gpr[op.rt];
 	return true;
 }
 
-bool spu_interpreter::BRA(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::BRA(spu_thread& spu, spu_opcode_t op)
 {
 	spu.pc = spu_branch_target(0, op.i16);
 	return false;
 }
 
-bool spu_interpreter::LQA(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::LQA(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = spu._ref<v128>(spu_ls_target(0, op.i16));
 	return true;
 }
 
-bool spu_interpreter::BRASL(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::BRASL(spu_thread& spu, spu_opcode_t op)
 {
 	const u32 target = spu_branch_target(0, op.i16);
 	spu.gpr[op.rt] = v128::from32r(spu_branch_target(spu.pc + 4));
@@ -1383,13 +1383,13 @@ bool spu_interpreter::BRASL(SPUThread& spu, spu_opcode_t op)
 	return false;
 }
 
-bool spu_interpreter::BR(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::BR(spu_thread& spu, spu_opcode_t op)
 {
 	spu.pc = spu_branch_target(spu.pc, op.i16);
 	return false;
 }
 
-bool spu_interpreter::FSMBI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::FSMBI(spu_thread& spu, spu_opcode_t op)
 {
 	const auto vsrc = _mm_set_epi32(0, 0, 0, op.i16);
 	const auto bits = _mm_shuffle_epi32(_mm_shufflelo_epi16(_mm_unpacklo_epi8(vsrc, vsrc), 0x50), 0x50);
@@ -1398,7 +1398,7 @@ bool spu_interpreter::FSMBI(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::BRSL(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::BRSL(spu_thread& spu, spu_opcode_t op)
 {
 	const u32 target = spu_branch_target(spu.pc, op.i16);
 	spu.gpr[op.rt] = v128::from32r(spu_branch_target(spu.pc + 4));
@@ -1406,146 +1406,146 @@ bool spu_interpreter::BRSL(SPUThread& spu, spu_opcode_t op)
 	return false;
 }
 
-bool spu_interpreter::LQR(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::LQR(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = spu._ref<v128>(spu_ls_target(spu.pc, op.i16));
 	return true;
 }
 
-bool spu_interpreter::IL(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::IL(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_set1_epi32(op.si16);
 	return true;
 }
 
-bool spu_interpreter::ILHU(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ILHU(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_set1_epi32(op.i16 << 16);
 	return true;
 }
 
-bool spu_interpreter::ILH(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ILH(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_set1_epi16(op.i16);
 	return true;
 }
 
-bool spu_interpreter::IOHL(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::IOHL(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_or_si128(spu.gpr[op.rt].vi, _mm_set1_epi32(op.i16));
 	return true;
 }
 
 
-bool spu_interpreter::ORI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ORI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_or_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(op.si10));
 	return true;
 }
 
-bool spu_interpreter::ORHI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ORHI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_or_si128(spu.gpr[op.ra].vi, _mm_set1_epi16(op.si10));
 	return true;
 }
 
-bool spu_interpreter::ORBI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ORBI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_or_si128(spu.gpr[op.ra].vi, _mm_set1_epi8(op.i8));
 	return true;
 }
 
-bool spu_interpreter::SFI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::SFI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_sub_epi32(_mm_set1_epi32(op.si10), spu.gpr[op.ra].vi);
 	return true;
 }
 
-bool spu_interpreter::SFHI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::SFHI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_sub_epi16(_mm_set1_epi16(op.si10), spu.gpr[op.ra].vi);
 	return true;
 }
 
-bool spu_interpreter::ANDI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ANDI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_and_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(op.si10));
 	return true;
 }
 
-bool spu_interpreter::ANDHI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ANDHI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_and_si128(spu.gpr[op.ra].vi, _mm_set1_epi16(op.si10));
 	return true;
 }
 
-bool spu_interpreter::ANDBI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ANDBI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_and_si128(spu.gpr[op.ra].vi, _mm_set1_epi8(op.i8));
 	return true;
 }
 
-bool spu_interpreter::AI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::AI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_add_epi32(_mm_set1_epi32(op.si10), spu.gpr[op.ra].vi);
 	return true;
 }
 
-bool spu_interpreter::AHI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::AHI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_add_epi16(_mm_set1_epi16(op.si10), spu.gpr[op.ra].vi);
 	return true;
 }
 
-bool spu_interpreter::STQD(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::STQD(spu_thread& spu, spu_opcode_t op)
 {
 	spu._ref<v128>((spu.gpr[op.ra]._s32[3] + (op.si10 << 4)) & 0x3fff0) = spu.gpr[op.rt];
 	return true;
 }
 
-bool spu_interpreter::LQD(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::LQD(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt] = spu._ref<v128>((spu.gpr[op.ra]._s32[3] + (op.si10 << 4)) & 0x3fff0);
 	return true;
 }
 
-bool spu_interpreter::XORI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::XORI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(op.si10));
 	return true;
 }
 
-bool spu_interpreter::XORHI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::XORHI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi16(op.si10));
 	return true;
 }
 
-bool spu_interpreter::XORBI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::XORBI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi8(op.i8));
 	return true;
 }
 
-bool spu_interpreter::CGTI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CGTI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_cmpgt_epi32(spu.gpr[op.ra].vi, _mm_set1_epi32(op.si10));
 	return true;
 }
 
-bool spu_interpreter::CGTHI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CGTHI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_cmpgt_epi16(spu.gpr[op.ra].vi, _mm_set1_epi16(op.si10));
 	return true;
 }
 
-bool spu_interpreter::CGTBI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CGTBI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_cmpgt_epi8(spu.gpr[op.ra].vi, _mm_set1_epi8(op.i8));
 	return true;
 }
 
-bool spu_interpreter::HGTI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::HGTI(spu_thread& spu, spu_opcode_t op)
 {
 	if (spu.gpr[op.ra]._s32[3] > op.si10)
 	{
@@ -1554,25 +1554,25 @@ bool spu_interpreter::HGTI(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::CLGTI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CLGTI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_cmpgt_epi32(_mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(0x80000000)), _mm_set1_epi32(op.si10 ^ 0x80000000));
 	return true;
 }
 
-bool spu_interpreter::CLGTHI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CLGTHI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_cmpgt_epi16(_mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(0x80008000)), _mm_set1_epi16(op.si10 ^ 0x8000));
 	return true;
 }
 
-bool spu_interpreter::CLGTBI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CLGTBI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_cmpgt_epi8(_mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(0x80808080)), _mm_set1_epi8(op.i8 ^ 0x80));
 	return true;
 }
 
-bool spu_interpreter::HLGTI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::HLGTI(spu_thread& spu, spu_opcode_t op)
 {
 	if (spu.gpr[op.ra]._u32[3] > static_cast<u32>(op.si10))
 	{
@@ -1581,13 +1581,13 @@ bool spu_interpreter::HLGTI(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::MPYI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::MPYI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_madd_epi16(spu.gpr[op.ra].vi, _mm_set1_epi32(op.si10 & 0xffff));
 	return true;
 }
 
-bool spu_interpreter::MPYUI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::MPYUI(spu_thread& spu, spu_opcode_t op)
 {
 	const auto a = spu.gpr[op.ra].vi;
 	const auto i = _mm_set1_epi32(op.si10 & 0xffff);
@@ -1595,25 +1595,25 @@ bool spu_interpreter::MPYUI(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter::CEQI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CEQI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_cmpeq_epi32(spu.gpr[op.ra].vi, _mm_set1_epi32(op.si10));
 	return true;
 }
 
-bool spu_interpreter::CEQHI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CEQHI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_cmpeq_epi16(spu.gpr[op.ra].vi, _mm_set1_epi16(op.si10));
 	return true;
 }
 
-bool spu_interpreter::CEQBI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::CEQBI(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_cmpeq_epi8(spu.gpr[op.ra].vi, _mm_set1_epi8(op.i8));
 	return true;
 }
 
-bool spu_interpreter::HEQI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::HEQI(spu_thread& spu, spu_opcode_t op)
 {
 	if (spu.gpr[op.ra]._s32[3] == op.si10)
 	{
@@ -1623,30 +1623,30 @@ bool spu_interpreter::HEQI(SPUThread& spu, spu_opcode_t op)
 }
 
 
-bool spu_interpreter::HBRA(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::HBRA(spu_thread& spu, spu_opcode_t op)
 {
 	return true;
 }
 
-bool spu_interpreter::HBRR(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::HBRR(spu_thread& spu, spu_opcode_t op)
 {
 	return true;
 }
 
-bool spu_interpreter::ILA(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::ILA(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt].vi = _mm_set1_epi32(op.i18);
 	return true;
 }
 
 
-bool spu_interpreter::SELB(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::SELB(spu_thread& spu, spu_opcode_t op)
 {
 	spu.gpr[op.rt4] = (spu.gpr[op.rc] & spu.gpr[op.rb]) | v128::andnot(spu.gpr[op.rc], spu.gpr[op.ra]);
 	return true;
 }
 
-static bool SHUFB_(SPUThread& spu, spu_opcode_t op)
+static bool SHUFB_(spu_thread& spu, spu_opcode_t op)
 {
 	__m128i ab[2]{spu.gpr[op.rb].vi, spu.gpr[op.ra].vi};
 	v128 c = spu.gpr[op.rc];
@@ -1741,14 +1741,14 @@ const spu_inter_func_t spu_interpreter::SHUFB = !utils::has_ssse3() ? &SHUFB_ :
 	c.dq(0x0f0f0f0f0f0f0f0f);
 });
 
-bool spu_interpreter::MPYA(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter::MPYA(spu_thread& spu, spu_opcode_t op)
 {
 	const auto mask = _mm_set1_epi32(0xffff);
 	spu.gpr[op.rt4].vi = _mm_add_epi32(spu.gpr[op.rc].vi, _mm_madd_epi16(_mm_and_si128(spu.gpr[op.ra].vi, mask), _mm_and_si128(spu.gpr[op.rb].vi, mask)));
 	return true;
 }
 
-bool spu_interpreter_fast::FNMS(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::FNMS(spu_thread& spu, spu_opcode_t op)
 {
 	const u32 test_bits = 0x7f800000;
 	auto mask = _mm_set1_ps((f32&)test_bits);
@@ -1765,7 +1765,7 @@ bool spu_interpreter_fast::FNMS(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter_fast::FMA(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::FMA(spu_thread& spu, spu_opcode_t op)
 {
 	const u32 test_bits = 0x7f800000;
 	auto mask = _mm_set1_ps((f32&)test_bits);
@@ -1782,7 +1782,7 @@ bool spu_interpreter_fast::FMA(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter_fast::FMS(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_fast::FMS(spu_thread& spu, spu_opcode_t op)
 {
 	const u32 test_bits = 0x7f800000;
 	auto mask = _mm_set1_ps((f32&)test_bits);
@@ -1864,7 +1864,7 @@ inline bool isdenormal(double x)
 	return std::fpclassify(x) == FP_SUBNORMAL;
 }
 
-bool spu_interpreter_precise::FREST(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_precise::FREST(spu_thread& spu, spu_opcode_t op)
 {
 	fesetround(FE_TOWARDZERO);
 	for (int i = 0; i < 4; i++)
@@ -1885,7 +1885,7 @@ bool spu_interpreter_precise::FREST(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter_precise::FRSQEST(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_precise::FRSQEST(spu_thread& spu, spu_opcode_t op)
 {
 	fesetround(FE_TOWARDZERO);
 	for (int i = 0; i < 4; i++)
@@ -1906,7 +1906,7 @@ bool spu_interpreter_precise::FRSQEST(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter_precise::FCGT(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_precise::FCGT(spu_thread& spu, spu_opcode_t op)
 {
 	for (int i = 0; i < 4; i++)
 	{
@@ -1930,7 +1930,7 @@ bool spu_interpreter_precise::FCGT(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-static void FA_FS(SPUThread& spu, spu_opcode_t op, bool sub)
+static void FA_FS(spu_thread& spu, spu_opcode_t op, bool sub)
 {
 	fesetround(FE_TOWARDZERO);
 	for (int w = 0; w < 4; w++)
@@ -2015,11 +2015,11 @@ static void FA_FS(SPUThread& spu, spu_opcode_t op, bool sub)
 	}
 }
 
-bool spu_interpreter_precise::FA(SPUThread& spu, spu_opcode_t op) { FA_FS(spu, op, false); return true; }
+bool spu_interpreter_precise::FA(spu_thread& spu, spu_opcode_t op) { FA_FS(spu, op, false); return true; }
 
-bool spu_interpreter_precise::FS(SPUThread& spu, spu_opcode_t op) { FA_FS(spu, op, true); return true; }
+bool spu_interpreter_precise::FS(spu_thread& spu, spu_opcode_t op) { FA_FS(spu, op, true); return true; }
 
-bool spu_interpreter_precise::FM(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_precise::FM(spu_thread& spu, spu_opcode_t op)
 {
 	fesetround(FE_TOWARDZERO);
 	for (int w = 0; w < 4; w++)
@@ -2093,7 +2093,7 @@ bool spu_interpreter_precise::FM(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter_precise::FCMGT(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_precise::FCMGT(spu_thread& spu, spu_opcode_t op)
 {
 	for (int i = 0; i < 4; i++)
 	{
@@ -2122,7 +2122,7 @@ enum DoubleOp
 	DFASM_M,
 };
 
-static void DFASM(SPUThread& spu, spu_opcode_t op, DoubleOp operation)
+static void DFASM(spu_thread& spu, spu_opcode_t op, DoubleOp operation)
 {
 	for (int i = 0; i < 2; i++)
 	{
@@ -2176,13 +2176,13 @@ static void DFASM(SPUThread& spu, spu_opcode_t op, DoubleOp operation)
 	}
 }
 
-bool spu_interpreter_precise::DFA(SPUThread& spu, spu_opcode_t op) { DFASM(spu, op, DFASM_A); return true; }
+bool spu_interpreter_precise::DFA(spu_thread& spu, spu_opcode_t op) { DFASM(spu, op, DFASM_A); return true; }
 
-bool spu_interpreter_precise::DFS(SPUThread& spu, spu_opcode_t op) { DFASM(spu, op, DFASM_S); return true; }
+bool spu_interpreter_precise::DFS(spu_thread& spu, spu_opcode_t op) { DFASM(spu, op, DFASM_S); return true; }
 
-bool spu_interpreter_precise::DFM(SPUThread& spu, spu_opcode_t op) { DFASM(spu, op, DFASM_M); return true; }
+bool spu_interpreter_precise::DFM(spu_thread& spu, spu_opcode_t op) { DFASM(spu, op, DFASM_M); return true; }
 
-static void DFMA(SPUThread& spu, spu_opcode_t op, bool neg, bool sub)
+static void DFMA(spu_thread& spu, spu_opcode_t op, bool neg, bool sub)
 {
 	for (int i = 0; i < 2; i++)
 	{
@@ -2238,21 +2238,21 @@ static void DFMA(SPUThread& spu, spu_opcode_t op, bool neg, bool sub)
 	}
 }
 
-bool spu_interpreter_precise::DFMA(SPUThread& spu, spu_opcode_t op) { ::DFMA(spu, op, false, false); return true; }
+bool spu_interpreter_precise::DFMA(spu_thread& spu, spu_opcode_t op) { ::DFMA(spu, op, false, false); return true; }
 
-bool spu_interpreter_precise::DFMS(SPUThread& spu, spu_opcode_t op) { ::DFMA(spu, op, false, true); return true; }
+bool spu_interpreter_precise::DFMS(spu_thread& spu, spu_opcode_t op) { ::DFMA(spu, op, false, true); return true; }
 
-bool spu_interpreter_precise::DFNMS(SPUThread& spu, spu_opcode_t op) { ::DFMA(spu, op, true, true); return true; }
+bool spu_interpreter_precise::DFNMS(spu_thread& spu, spu_opcode_t op) { ::DFMA(spu, op, true, true); return true; }
 
-bool spu_interpreter_precise::DFNMA(SPUThread& spu, spu_opcode_t op) { ::DFMA(spu, op, true, false); return true; }
+bool spu_interpreter_precise::DFNMA(spu_thread& spu, spu_opcode_t op) { ::DFMA(spu, op, true, false); return true; }
 
-bool spu_interpreter_precise::FSCRRD(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_precise::FSCRRD(spu_thread& spu, spu_opcode_t op)
 {
 	spu.fpscr.Read(spu.gpr[op.rt]);
 	return true;
 }
 
-bool spu_interpreter_precise::FESD(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_precise::FESD(spu_thread& spu, spu_opcode_t op)
 {
 	for (int i = 0; i < 2; i++)
 	{
@@ -2277,7 +2277,7 @@ bool spu_interpreter_precise::FESD(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter_precise::FRDS(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_precise::FRDS(spu_thread& spu, spu_opcode_t op)
 {
 	for (int i = 0; i < 2; i++)
 	{
@@ -2307,13 +2307,13 @@ bool spu_interpreter_precise::FRDS(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter_precise::FSCRWR(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_precise::FSCRWR(spu_thread& spu, spu_opcode_t op)
 {
 	spu.fpscr.Write(spu.gpr[op.ra]);
 	return true;
 }
 
-bool spu_interpreter_precise::FCEQ(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_precise::FCEQ(spu_thread& spu, spu_opcode_t op)
 {
 	for (int i = 0; i < 4; i++)
 	{
@@ -2329,7 +2329,7 @@ bool spu_interpreter_precise::FCEQ(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter_precise::FCMEQ(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_precise::FCMEQ(spu_thread& spu, spu_opcode_t op)
 {
 	for (int i = 0; i < 4; i++)
 	{
@@ -2345,14 +2345,14 @@ bool spu_interpreter_precise::FCMEQ(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter_precise::FI(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_precise::FI(spu_thread& spu, spu_opcode_t op)
 {
 	// TODO
 	spu.gpr[op.rt] = spu.gpr[op.rb];
 	return true;
 }
 
-bool spu_interpreter_precise::CFLTS(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_precise::CFLTS(spu_thread& spu, spu_opcode_t op)
 {
 	const int scale = 173 - (op.i8 & 0xff); //unsigned immediate
 	for (int i = 0; i < 4; i++)
@@ -2375,7 +2375,7 @@ bool spu_interpreter_precise::CFLTS(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter_precise::CFLTU(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_precise::CFLTU(spu_thread& spu, spu_opcode_t op)
 {
 	const int scale = 173 - (op.i8 & 0xff); //unsigned immediate
 	for (int i = 0; i < 4; i++)
@@ -2398,7 +2398,7 @@ bool spu_interpreter_precise::CFLTU(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter_precise::CSFLT(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_precise::CSFLT(spu_thread& spu, spu_opcode_t op)
 {
 	fesetround(FE_TOWARDZERO);
 	const int scale = 155 - (op.i8 & 0xff); //unsigned immediate
@@ -2422,7 +2422,7 @@ bool spu_interpreter_precise::CSFLT(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-bool spu_interpreter_precise::CUFLT(SPUThread& spu, spu_opcode_t op)
+bool spu_interpreter_precise::CUFLT(spu_thread& spu, spu_opcode_t op)
 {
 	fesetround(FE_TOWARDZERO);
 	const int scale = 155 - (op.i8 & 0xff); //unsigned immediate
@@ -2446,7 +2446,7 @@ bool spu_interpreter_precise::CUFLT(SPUThread& spu, spu_opcode_t op)
 	return true;
 }
 
-static void FMA(SPUThread& spu, spu_opcode_t op, bool neg, bool sub)
+static void FMA(spu_thread& spu, spu_opcode_t op, bool neg, bool sub)
 {
 	fesetround(FE_TOWARDZERO);
 	for (int w = 0; w < 4; w++)
@@ -2591,11 +2591,11 @@ static void FMA(SPUThread& spu, spu_opcode_t op, bool neg, bool sub)
 	}
 }
 
-bool spu_interpreter_precise::FNMS(SPUThread& spu, spu_opcode_t op) { ::FMA(spu, op, true, true); return true; }
+bool spu_interpreter_precise::FNMS(spu_thread& spu, spu_opcode_t op) { ::FMA(spu, op, true, true); return true; }
 
-bool spu_interpreter_precise::FMA(SPUThread& spu, spu_opcode_t op) { ::FMA(spu, op, false, false); return true; }
+bool spu_interpreter_precise::FMA(spu_thread& spu, spu_opcode_t op) { ::FMA(spu, op, false, false); return true; }
 
-bool spu_interpreter_precise::FMS(SPUThread& spu, spu_opcode_t op) { ::FMA(spu, op, false, true); return true; }
+bool spu_interpreter_precise::FMS(spu_thread& spu, spu_opcode_t op) { ::FMA(spu, op, false, true); return true; }
 
 extern const spu_decoder<spu_interpreter_precise> g_spu_interpreter_precise{};
 
diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h
index 19cdb2cd50..86bfb57c92 100644
--- a/rpcs3/Emu/Cell/SPUInterpreter.h
+++ b/rpcs3/Emu/Cell/SPUInterpreter.h
@@ -2,248 +2,248 @@
 
 #include "SPUOpcodes.h"
 
-class SPUThread;
+class spu_thread;
 
-using spu_inter_func_t = bool(*)(SPUThread& spu, spu_opcode_t op);
+using spu_inter_func_t = bool(*)(spu_thread& spu, spu_opcode_t op);
 
 struct spu_interpreter
 {
-	static bool UNK(SPUThread&, spu_opcode_t);
-	static void set_interrupt_status(SPUThread&, spu_opcode_t);
+	static bool UNK(spu_thread&, spu_opcode_t);
+	static void set_interrupt_status(spu_thread&, spu_opcode_t);
 
-	static bool STOP(SPUThread&, spu_opcode_t);
-	static bool LNOP(SPUThread&, spu_opcode_t);
-	static bool SYNC(SPUThread&, spu_opcode_t);
-	static bool DSYNC(SPUThread&, spu_opcode_t);
-	static bool MFSPR(SPUThread&, spu_opcode_t);
-	static bool RDCH(SPUThread&, spu_opcode_t);
-	static bool RCHCNT(SPUThread&, spu_opcode_t);
-	static bool SF(SPUThread&, spu_opcode_t);
-	static bool OR(SPUThread&, spu_opcode_t);
-	static bool BG(SPUThread&, spu_opcode_t);
-	static bool SFH(SPUThread&, spu_opcode_t);
-	static bool NOR(SPUThread&, spu_opcode_t);
-	static bool ABSDB(SPUThread&, spu_opcode_t);
-	static bool ROT(SPUThread&, spu_opcode_t);
-	static bool ROTM(SPUThread&, spu_opcode_t);
-	static bool ROTMA(SPUThread&, spu_opcode_t);
-	static bool SHL(SPUThread&, spu_opcode_t);
-	static bool ROTH(SPUThread&, spu_opcode_t);
-	static bool ROTHM(SPUThread&, spu_opcode_t);
-	static bool ROTMAH(SPUThread&, spu_opcode_t);
-	static bool SHLH(SPUThread&, spu_opcode_t);
-	static bool ROTI(SPUThread&, spu_opcode_t);
-	static bool ROTMI(SPUThread&, spu_opcode_t);
-	static bool ROTMAI(SPUThread&, spu_opcode_t);
-	static bool SHLI(SPUThread&, spu_opcode_t);
-	static bool ROTHI(SPUThread&, spu_opcode_t);
-	static bool ROTHMI(SPUThread&, spu_opcode_t);
-	static bool ROTMAHI(SPUThread&, spu_opcode_t);
-	static bool SHLHI(SPUThread&, spu_opcode_t);
-	static bool A(SPUThread&, spu_opcode_t);
-	static bool AND(SPUThread&, spu_opcode_t);
-	static bool CG(SPUThread&, spu_opcode_t);
-	static bool AH(SPUThread&, spu_opcode_t);
-	static bool NAND(SPUThread&, spu_opcode_t);
-	static bool AVGB(SPUThread&, spu_opcode_t);
-	static bool MTSPR(SPUThread&, spu_opcode_t);
-	static bool WRCH(SPUThread&, spu_opcode_t);
-	static bool BIZ(SPUThread&, spu_opcode_t);
-	static bool BINZ(SPUThread&, spu_opcode_t);
-	static bool BIHZ(SPUThread&, spu_opcode_t);
-	static bool BIHNZ(SPUThread&, spu_opcode_t);
-	static bool STOPD(SPUThread&, spu_opcode_t);
-	static bool STQX(SPUThread&, spu_opcode_t);
-	static bool BI(SPUThread&, spu_opcode_t);
-	static bool BISL(SPUThread&, spu_opcode_t);
-	static bool IRET(SPUThread&, spu_opcode_t);
-	static bool BISLED(SPUThread&, spu_opcode_t);
-	static bool HBR(SPUThread&, spu_opcode_t);
-	static bool GB(SPUThread&, spu_opcode_t);
-	static bool GBH(SPUThread&, spu_opcode_t);
-	static bool GBB(SPUThread&, spu_opcode_t);
-	static bool FSM(SPUThread&, spu_opcode_t);
-	static bool FSMH(SPUThread&, spu_opcode_t);
-	static bool FSMB(SPUThread&, spu_opcode_t);
-	static bool LQX(SPUThread&, spu_opcode_t);
-	static bool ROTQBYBI(SPUThread&, spu_opcode_t);
-	static bool ROTQMBYBI(SPUThread&, spu_opcode_t);
-	static bool SHLQBYBI(SPUThread&, spu_opcode_t);
-	static bool CBX(SPUThread&, spu_opcode_t);
-	static bool CHX(SPUThread&, spu_opcode_t);
-	static bool CWX(SPUThread&, spu_opcode_t);
-	static bool CDX(SPUThread&, spu_opcode_t);
-	static bool ROTQBI(SPUThread&, spu_opcode_t);
-	static bool ROTQMBI(SPUThread&, spu_opcode_t);
-	static bool SHLQBI(SPUThread&, spu_opcode_t);
-	static bool ROTQBY(SPUThread&, spu_opcode_t);
-	static bool ROTQMBY(SPUThread&, spu_opcode_t);
-	static bool SHLQBY(SPUThread&, spu_opcode_t);
-	static bool ORX(SPUThread&, spu_opcode_t);
-	static bool CBD(SPUThread&, spu_opcode_t);
-	static bool CHD(SPUThread&, spu_opcode_t);
-	static bool CWD(SPUThread&, spu_opcode_t);
-	static bool CDD(SPUThread&, spu_opcode_t);
-	static bool ROTQBII(SPUThread&, spu_opcode_t);
-	static bool ROTQMBII(SPUThread&, spu_opcode_t);
-	static bool SHLQBII(SPUThread&, spu_opcode_t);
-	static bool ROTQBYI(SPUThread&, spu_opcode_t);
-	static bool ROTQMBYI(SPUThread&, spu_opcode_t);
-	static bool SHLQBYI(SPUThread&, spu_opcode_t);
-	static bool NOP(SPUThread&, spu_opcode_t);
-	static bool CGT(SPUThread&, spu_opcode_t);
-	static bool XOR(SPUThread&, spu_opcode_t);
-	static bool CGTH(SPUThread&, spu_opcode_t);
-	static bool EQV(SPUThread&, spu_opcode_t);
-	static bool CGTB(SPUThread&, spu_opcode_t);
-	static bool SUMB(SPUThread&, spu_opcode_t);
-	static bool HGT(SPUThread&, spu_opcode_t);
-	static bool CLZ(SPUThread&, spu_opcode_t);
-	static bool XSWD(SPUThread&, spu_opcode_t);
-	static bool XSHW(SPUThread&, spu_opcode_t);
-	static bool CNTB(SPUThread&, spu_opcode_t);
-	static bool XSBH(SPUThread&, spu_opcode_t);
-	static bool CLGT(SPUThread&, spu_opcode_t);
-	static bool ANDC(SPUThread&, spu_opcode_t);
-	static bool CLGTH(SPUThread&, spu_opcode_t);
-	static bool ORC(SPUThread&, spu_opcode_t);
-	static bool CLGTB(SPUThread&, spu_opcode_t);
-	static bool HLGT(SPUThread&, spu_opcode_t);
-	static bool CEQ(SPUThread&, spu_opcode_t);
-	static bool MPYHHU(SPUThread&, spu_opcode_t);
-	static bool ADDX(SPUThread&, spu_opcode_t);
-	static bool SFX(SPUThread&, spu_opcode_t);
-	static bool CGX(SPUThread&, spu_opcode_t);
-	static bool BGX(SPUThread&, spu_opcode_t);
-	static bool MPYHHA(SPUThread&, spu_opcode_t);
-	static bool MPYHHAU(SPUThread&, spu_opcode_t);
-	static bool MPY(SPUThread&, spu_opcode_t);
-	static bool MPYH(SPUThread&, spu_opcode_t);
-	static bool MPYHH(SPUThread&, spu_opcode_t);
-	static bool MPYS(SPUThread&, spu_opcode_t);
-	static bool CEQH(SPUThread&, spu_opcode_t);
-	static bool MPYU(SPUThread&, spu_opcode_t);
-	static bool CEQB(SPUThread&, spu_opcode_t);
-	static bool HEQ(SPUThread&, spu_opcode_t);
-	static bool BRZ(SPUThread&, spu_opcode_t);
-	static bool STQA(SPUThread&, spu_opcode_t);
-	static bool BRNZ(SPUThread&, spu_opcode_t);
-	static bool BRHZ(SPUThread&, spu_opcode_t);
-	static bool BRHNZ(SPUThread&, spu_opcode_t);
-	static bool STQR(SPUThread&, spu_opcode_t);
-	static bool BRA(SPUThread&, spu_opcode_t);
-	static bool LQA(SPUThread&, spu_opcode_t);
-	static bool BRASL(SPUThread&, spu_opcode_t);
-	static bool BR(SPUThread&, spu_opcode_t);
-	static bool FSMBI(SPUThread&, spu_opcode_t);
-	static bool BRSL(SPUThread&, spu_opcode_t);
-	static bool LQR(SPUThread&, spu_opcode_t);
-	static bool IL(SPUThread&, spu_opcode_t);
-	static bool ILHU(SPUThread&, spu_opcode_t);
-	static bool ILH(SPUThread&, spu_opcode_t);
-	static bool IOHL(SPUThread&, spu_opcode_t);
-	static bool ORI(SPUThread&, spu_opcode_t);
-	static bool ORHI(SPUThread&, spu_opcode_t);
-	static bool ORBI(SPUThread&, spu_opcode_t);
-	static bool SFI(SPUThread&, spu_opcode_t);
-	static bool SFHI(SPUThread&, spu_opcode_t);
-	static bool ANDI(SPUThread&, spu_opcode_t);
-	static bool ANDHI(SPUThread&, spu_opcode_t);
-	static bool ANDBI(SPUThread&, spu_opcode_t);
-	static bool AI(SPUThread&, spu_opcode_t);
-	static bool AHI(SPUThread&, spu_opcode_t);
-	static bool STQD(SPUThread&, spu_opcode_t);
-	static bool LQD(SPUThread&, spu_opcode_t);
-	static bool XORI(SPUThread&, spu_opcode_t);
-	static bool XORHI(SPUThread&, spu_opcode_t);
-	static bool XORBI(SPUThread&, spu_opcode_t);
-	static bool CGTI(SPUThread&, spu_opcode_t);
-	static bool CGTHI(SPUThread&, spu_opcode_t);
-	static bool CGTBI(SPUThread&, spu_opcode_t);
-	static bool HGTI(SPUThread&, spu_opcode_t);
-	static bool CLGTI(SPUThread&, spu_opcode_t);
-	static bool CLGTHI(SPUThread&, spu_opcode_t);
-	static bool CLGTBI(SPUThread&, spu_opcode_t);
-	static bool HLGTI(SPUThread&, spu_opcode_t);
-	static bool MPYI(SPUThread&, spu_opcode_t);
-	static bool MPYUI(SPUThread&, spu_opcode_t);
-	static bool CEQI(SPUThread&, spu_opcode_t);
-	static bool CEQHI(SPUThread&, spu_opcode_t);
-	static bool CEQBI(SPUThread&, spu_opcode_t);
-	static bool HEQI(SPUThread&, spu_opcode_t);
-	static bool HBRA(SPUThread&, spu_opcode_t);
-	static bool HBRR(SPUThread&, spu_opcode_t);
-	static bool ILA(SPUThread&, spu_opcode_t);
-	static bool SELB(SPUThread&, spu_opcode_t);
+	static bool STOP(spu_thread&, spu_opcode_t);
+	static bool LNOP(spu_thread&, spu_opcode_t);
+	static bool SYNC(spu_thread&, spu_opcode_t);
+	static bool DSYNC(spu_thread&, spu_opcode_t);
+	static bool MFSPR(spu_thread&, spu_opcode_t);
+	static bool RDCH(spu_thread&, spu_opcode_t);
+	static bool RCHCNT(spu_thread&, spu_opcode_t);
+	static bool SF(spu_thread&, spu_opcode_t);
+	static bool OR(spu_thread&, spu_opcode_t);
+	static bool BG(spu_thread&, spu_opcode_t);
+	static bool SFH(spu_thread&, spu_opcode_t);
+	static bool NOR(spu_thread&, spu_opcode_t);
+	static bool ABSDB(spu_thread&, spu_opcode_t);
+	static bool ROT(spu_thread&, spu_opcode_t);
+	static bool ROTM(spu_thread&, spu_opcode_t);
+	static bool ROTMA(spu_thread&, spu_opcode_t);
+	static bool SHL(spu_thread&, spu_opcode_t);
+	static bool ROTH(spu_thread&, spu_opcode_t);
+	static bool ROTHM(spu_thread&, spu_opcode_t);
+	static bool ROTMAH(spu_thread&, spu_opcode_t);
+	static bool SHLH(spu_thread&, spu_opcode_t);
+	static bool ROTI(spu_thread&, spu_opcode_t);
+	static bool ROTMI(spu_thread&, spu_opcode_t);
+	static bool ROTMAI(spu_thread&, spu_opcode_t);
+	static bool SHLI(spu_thread&, spu_opcode_t);
+	static bool ROTHI(spu_thread&, spu_opcode_t);
+	static bool ROTHMI(spu_thread&, spu_opcode_t);
+	static bool ROTMAHI(spu_thread&, spu_opcode_t);
+	static bool SHLHI(spu_thread&, spu_opcode_t);
+	static bool A(spu_thread&, spu_opcode_t);
+	static bool AND(spu_thread&, spu_opcode_t);
+	static bool CG(spu_thread&, spu_opcode_t);
+	static bool AH(spu_thread&, spu_opcode_t);
+	static bool NAND(spu_thread&, spu_opcode_t);
+	static bool AVGB(spu_thread&, spu_opcode_t);
+	static bool MTSPR(spu_thread&, spu_opcode_t);
+	static bool WRCH(spu_thread&, spu_opcode_t);
+	static bool BIZ(spu_thread&, spu_opcode_t);
+	static bool BINZ(spu_thread&, spu_opcode_t);
+	static bool BIHZ(spu_thread&, spu_opcode_t);
+	static bool BIHNZ(spu_thread&, spu_opcode_t);
+	static bool STOPD(spu_thread&, spu_opcode_t);
+	static bool STQX(spu_thread&, spu_opcode_t);
+	static bool BI(spu_thread&, spu_opcode_t);
+	static bool BISL(spu_thread&, spu_opcode_t);
+	static bool IRET(spu_thread&, spu_opcode_t);
+	static bool BISLED(spu_thread&, spu_opcode_t);
+	static bool HBR(spu_thread&, spu_opcode_t);
+	static bool GB(spu_thread&, spu_opcode_t);
+	static bool GBH(spu_thread&, spu_opcode_t);
+	static bool GBB(spu_thread&, spu_opcode_t);
+	static bool FSM(spu_thread&, spu_opcode_t);
+	static bool FSMH(spu_thread&, spu_opcode_t);
+	static bool FSMB(spu_thread&, spu_opcode_t);
+	static bool LQX(spu_thread&, spu_opcode_t);
+	static bool ROTQBYBI(spu_thread&, spu_opcode_t);
+	static bool ROTQMBYBI(spu_thread&, spu_opcode_t);
+	static bool SHLQBYBI(spu_thread&, spu_opcode_t);
+	static bool CBX(spu_thread&, spu_opcode_t);
+	static bool CHX(spu_thread&, spu_opcode_t);
+	static bool CWX(spu_thread&, spu_opcode_t);
+	static bool CDX(spu_thread&, spu_opcode_t);
+	static bool ROTQBI(spu_thread&, spu_opcode_t);
+	static bool ROTQMBI(spu_thread&, spu_opcode_t);
+	static bool SHLQBI(spu_thread&, spu_opcode_t);
+	static bool ROTQBY(spu_thread&, spu_opcode_t);
+	static bool ROTQMBY(spu_thread&, spu_opcode_t);
+	static bool SHLQBY(spu_thread&, spu_opcode_t);
+	static bool ORX(spu_thread&, spu_opcode_t);
+	static bool CBD(spu_thread&, spu_opcode_t);
+	static bool CHD(spu_thread&, spu_opcode_t);
+	static bool CWD(spu_thread&, spu_opcode_t);
+	static bool CDD(spu_thread&, spu_opcode_t);
+	static bool ROTQBII(spu_thread&, spu_opcode_t);
+	static bool ROTQMBII(spu_thread&, spu_opcode_t);
+	static bool SHLQBII(spu_thread&, spu_opcode_t);
+	static bool ROTQBYI(spu_thread&, spu_opcode_t);
+	static bool ROTQMBYI(spu_thread&, spu_opcode_t);
+	static bool SHLQBYI(spu_thread&, spu_opcode_t);
+	static bool NOP(spu_thread&, spu_opcode_t);
+	static bool CGT(spu_thread&, spu_opcode_t);
+	static bool XOR(spu_thread&, spu_opcode_t);
+	static bool CGTH(spu_thread&, spu_opcode_t);
+	static bool EQV(spu_thread&, spu_opcode_t);
+	static bool CGTB(spu_thread&, spu_opcode_t);
+	static bool SUMB(spu_thread&, spu_opcode_t);
+	static bool HGT(spu_thread&, spu_opcode_t);
+	static bool CLZ(spu_thread&, spu_opcode_t);
+	static bool XSWD(spu_thread&, spu_opcode_t);
+	static bool XSHW(spu_thread&, spu_opcode_t);
+	static bool CNTB(spu_thread&, spu_opcode_t);
+	static bool XSBH(spu_thread&, spu_opcode_t);
+	static bool CLGT(spu_thread&, spu_opcode_t);
+	static bool ANDC(spu_thread&, spu_opcode_t);
+	static bool CLGTH(spu_thread&, spu_opcode_t);
+	static bool ORC(spu_thread&, spu_opcode_t);
+	static bool CLGTB(spu_thread&, spu_opcode_t);
+	static bool HLGT(spu_thread&, spu_opcode_t);
+	static bool CEQ(spu_thread&, spu_opcode_t);
+	static bool MPYHHU(spu_thread&, spu_opcode_t);
+	static bool ADDX(spu_thread&, spu_opcode_t);
+	static bool SFX(spu_thread&, spu_opcode_t);
+	static bool CGX(spu_thread&, spu_opcode_t);
+	static bool BGX(spu_thread&, spu_opcode_t);
+	static bool MPYHHA(spu_thread&, spu_opcode_t);
+	static bool MPYHHAU(spu_thread&, spu_opcode_t);
+	static bool MPY(spu_thread&, spu_opcode_t);
+	static bool MPYH(spu_thread&, spu_opcode_t);
+	static bool MPYHH(spu_thread&, spu_opcode_t);
+	static bool MPYS(spu_thread&, spu_opcode_t);
+	static bool CEQH(spu_thread&, spu_opcode_t);
+	static bool MPYU(spu_thread&, spu_opcode_t);
+	static bool CEQB(spu_thread&, spu_opcode_t);
+	static bool HEQ(spu_thread&, spu_opcode_t);
+	static bool BRZ(spu_thread&, spu_opcode_t);
+	static bool STQA(spu_thread&, spu_opcode_t);
+	static bool BRNZ(spu_thread&, spu_opcode_t);
+	static bool BRHZ(spu_thread&, spu_opcode_t);
+	static bool BRHNZ(spu_thread&, spu_opcode_t);
+	static bool STQR(spu_thread&, spu_opcode_t);
+	static bool BRA(spu_thread&, spu_opcode_t);
+	static bool LQA(spu_thread&, spu_opcode_t);
+	static bool BRASL(spu_thread&, spu_opcode_t);
+	static bool BR(spu_thread&, spu_opcode_t);
+	static bool FSMBI(spu_thread&, spu_opcode_t);
+	static bool BRSL(spu_thread&, spu_opcode_t);
+	static bool LQR(spu_thread&, spu_opcode_t);
+	static bool IL(spu_thread&, spu_opcode_t);
+	static bool ILHU(spu_thread&, spu_opcode_t);
+	static bool ILH(spu_thread&, spu_opcode_t);
+	static bool IOHL(spu_thread&, spu_opcode_t);
+	static bool ORI(spu_thread&, spu_opcode_t);
+	static bool ORHI(spu_thread&, spu_opcode_t);
+	static bool ORBI(spu_thread&, spu_opcode_t);
+	static bool SFI(spu_thread&, spu_opcode_t);
+	static bool SFHI(spu_thread&, spu_opcode_t);
+	static bool ANDI(spu_thread&, spu_opcode_t);
+	static bool ANDHI(spu_thread&, spu_opcode_t);
+	static bool ANDBI(spu_thread&, spu_opcode_t);
+	static bool AI(spu_thread&, spu_opcode_t);
+	static bool AHI(spu_thread&, spu_opcode_t);
+	static bool STQD(spu_thread&, spu_opcode_t);
+	static bool LQD(spu_thread&, spu_opcode_t);
+	static bool XORI(spu_thread&, spu_opcode_t);
+	static bool XORHI(spu_thread&, spu_opcode_t);
+	static bool XORBI(spu_thread&, spu_opcode_t);
+	static bool CGTI(spu_thread&, spu_opcode_t);
+	static bool CGTHI(spu_thread&, spu_opcode_t);
+	static bool CGTBI(spu_thread&, spu_opcode_t);
+	static bool HGTI(spu_thread&, spu_opcode_t);
+	static bool CLGTI(spu_thread&, spu_opcode_t);
+	static bool CLGTHI(spu_thread&, spu_opcode_t);
+	static bool CLGTBI(spu_thread&, spu_opcode_t);
+	static bool HLGTI(spu_thread&, spu_opcode_t);
+	static bool MPYI(spu_thread&, spu_opcode_t);
+	static bool MPYUI(spu_thread&, spu_opcode_t);
+	static bool CEQI(spu_thread&, spu_opcode_t);
+	static bool CEQHI(spu_thread&, spu_opcode_t);
+	static bool CEQBI(spu_thread&, spu_opcode_t);
+	static bool HEQI(spu_thread&, spu_opcode_t);
+	static bool HBRA(spu_thread&, spu_opcode_t);
+	static bool HBRR(spu_thread&, spu_opcode_t);
+	static bool ILA(spu_thread&, spu_opcode_t);
+	static bool SELB(spu_thread&, spu_opcode_t);
 	static const spu_inter_func_t SHUFB;
-	static bool MPYA(SPUThread&, spu_opcode_t);
-	static bool DFCGT(SPUThread&, spu_opcode_t);
-	static bool DFCMGT(SPUThread&, spu_opcode_t);
-	static bool DFTSV(SPUThread&, spu_opcode_t);
-	static bool DFCEQ(SPUThread&, spu_opcode_t);
-	static bool DFCMEQ(SPUThread&, spu_opcode_t);
+	static bool MPYA(spu_thread&, spu_opcode_t);
+	static bool DFCGT(spu_thread&, spu_opcode_t);
+	static bool DFCMGT(spu_thread&, spu_opcode_t);
+	static bool DFTSV(spu_thread&, spu_opcode_t);
+	static bool DFCEQ(spu_thread&, spu_opcode_t);
+	static bool DFCMEQ(spu_thread&, spu_opcode_t);
 };
 
 struct spu_interpreter_fast final : spu_interpreter
 {
-	static bool FREST(SPUThread&, spu_opcode_t);
-	static bool FRSQEST(SPUThread&, spu_opcode_t);
-	static bool FCGT(SPUThread&, spu_opcode_t);
-	static bool FA(SPUThread&, spu_opcode_t);
-	static bool FS(SPUThread&, spu_opcode_t);
-	static bool FM(SPUThread&, spu_opcode_t);
-	static bool FCMGT(SPUThread&, spu_opcode_t);
-	static bool DFA(SPUThread&, spu_opcode_t);
-	static bool DFS(SPUThread&, spu_opcode_t);
-	static bool DFM(SPUThread&, spu_opcode_t);
-	static bool DFMA(SPUThread&, spu_opcode_t);
-	static bool DFMS(SPUThread&, spu_opcode_t);
-	static bool DFNMS(SPUThread&, spu_opcode_t);
-	static bool DFNMA(SPUThread&, spu_opcode_t);
-	static bool FSCRRD(SPUThread&, spu_opcode_t);
-	static bool FESD(SPUThread&, spu_opcode_t);
-	static bool FRDS(SPUThread&, spu_opcode_t);
-	static bool FSCRWR(SPUThread&, spu_opcode_t);
-	static bool FCEQ(SPUThread&, spu_opcode_t);
-	static bool FCMEQ(SPUThread&, spu_opcode_t);
-	static bool FI(SPUThread&, spu_opcode_t);
-	static bool CFLTS(SPUThread&, spu_opcode_t);
-	static bool CFLTU(SPUThread&, spu_opcode_t);
-	static bool CSFLT(SPUThread&, spu_opcode_t);
-	static bool CUFLT(SPUThread&, spu_opcode_t);
-	static bool FNMS(SPUThread&, spu_opcode_t);
-	static bool FMA(SPUThread&, spu_opcode_t);
-	static bool FMS(SPUThread&, spu_opcode_t);
+	static bool FREST(spu_thread&, spu_opcode_t);
+	static bool FRSQEST(spu_thread&, spu_opcode_t);
+	static bool FCGT(spu_thread&, spu_opcode_t);
+	static bool FA(spu_thread&, spu_opcode_t);
+	static bool FS(spu_thread&, spu_opcode_t);
+	static bool FM(spu_thread&, spu_opcode_t);
+	static bool FCMGT(spu_thread&, spu_opcode_t);
+	static bool DFA(spu_thread&, spu_opcode_t);
+	static bool DFS(spu_thread&, spu_opcode_t);
+	static bool DFM(spu_thread&, spu_opcode_t);
+	static bool DFMA(spu_thread&, spu_opcode_t);
+	static bool DFMS(spu_thread&, spu_opcode_t);
+	static bool DFNMS(spu_thread&, spu_opcode_t);
+	static bool DFNMA(spu_thread&, spu_opcode_t);
+	static bool FSCRRD(spu_thread&, spu_opcode_t);
+	static bool FESD(spu_thread&, spu_opcode_t);
+	static bool FRDS(spu_thread&, spu_opcode_t);
+	static bool FSCRWR(spu_thread&, spu_opcode_t);
+	static bool FCEQ(spu_thread&, spu_opcode_t);
+	static bool FCMEQ(spu_thread&, spu_opcode_t);
+	static bool FI(spu_thread&, spu_opcode_t);
+	static bool CFLTS(spu_thread&, spu_opcode_t);
+	static bool CFLTU(spu_thread&, spu_opcode_t);
+	static bool CSFLT(spu_thread&, spu_opcode_t);
+	static bool CUFLT(spu_thread&, spu_opcode_t);
+	static bool FNMS(spu_thread&, spu_opcode_t);
+	static bool FMA(spu_thread&, spu_opcode_t);
+	static bool FMS(spu_thread&, spu_opcode_t);
 };
 
 struct spu_interpreter_precise final : spu_interpreter
 {
-	static bool FREST(SPUThread&, spu_opcode_t);
-	static bool FRSQEST(SPUThread&, spu_opcode_t);
-	static bool FCGT(SPUThread&, spu_opcode_t);
-	static bool FA(SPUThread&, spu_opcode_t);
-	static bool FS(SPUThread&, spu_opcode_t);
-	static bool FM(SPUThread&, spu_opcode_t);
-	static bool FCMGT(SPUThread&, spu_opcode_t);
-	static bool DFA(SPUThread&, spu_opcode_t);
-	static bool DFS(SPUThread&, spu_opcode_t);
-	static bool DFM(SPUThread&, spu_opcode_t);
-	static bool DFMA(SPUThread&, spu_opcode_t);
-	static bool DFMS(SPUThread&, spu_opcode_t);
-	static bool DFNMS(SPUThread&, spu_opcode_t);
-	static bool DFNMA(SPUThread&, spu_opcode_t);
-	static bool FSCRRD(SPUThread&, spu_opcode_t);
-	static bool FESD(SPUThread&, spu_opcode_t);
-	static bool FRDS(SPUThread&, spu_opcode_t);
-	static bool FSCRWR(SPUThread&, spu_opcode_t);
-	static bool FCEQ(SPUThread&, spu_opcode_t);
-	static bool FCMEQ(SPUThread&, spu_opcode_t);
-	static bool FI(SPUThread&, spu_opcode_t);
-	static bool CFLTS(SPUThread&, spu_opcode_t);
-	static bool CFLTU(SPUThread&, spu_opcode_t);
-	static bool CSFLT(SPUThread&, spu_opcode_t);
-	static bool CUFLT(SPUThread&, spu_opcode_t);
-	static bool FNMS(SPUThread&, spu_opcode_t);
-	static bool FMA(SPUThread&, spu_opcode_t);
-	static bool FMS(SPUThread&, spu_opcode_t);
+	static bool FREST(spu_thread&, spu_opcode_t);
+	static bool FRSQEST(spu_thread&, spu_opcode_t);
+	static bool FCGT(spu_thread&, spu_opcode_t);
+	static bool FA(spu_thread&, spu_opcode_t);
+	static bool FS(spu_thread&, spu_opcode_t);
+	static bool FM(spu_thread&, spu_opcode_t);
+	static bool FCMGT(spu_thread&, spu_opcode_t);
+	static bool DFA(spu_thread&, spu_opcode_t);
+	static bool DFS(spu_thread&, spu_opcode_t);
+	static bool DFM(spu_thread&, spu_opcode_t);
+	static bool DFMA(spu_thread&, spu_opcode_t);
+	static bool DFMS(spu_thread&, spu_opcode_t);
+	static bool DFNMS(spu_thread&, spu_opcode_t);
+	static bool DFNMA(spu_thread&, spu_opcode_t);
+	static bool FSCRRD(spu_thread&, spu_opcode_t);
+	static bool FESD(spu_thread&, spu_opcode_t);
+	static bool FRDS(spu_thread&, spu_opcode_t);
+	static bool FSCRWR(spu_thread&, spu_opcode_t);
+	static bool FCEQ(spu_thread&, spu_opcode_t);
+	static bool FCMEQ(spu_thread&, spu_opcode_t);
+	static bool FI(spu_thread&, spu_opcode_t);
+	static bool CFLTS(spu_thread&, spu_opcode_t);
+	static bool CFLTU(spu_thread&, spu_opcode_t);
+	static bool CSFLT(spu_thread&, spu_opcode_t);
+	static bool CUFLT(spu_thread&, spu_opcode_t);
+	static bool FNMS(spu_thread&, spu_opcode_t);
+	static bool FMA(spu_thread&, spu_opcode_t);
+	static bool FMS(spu_thread&, spu_opcode_t);
 };
diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp
index 9bd8b5abe4..0fa85f78c8 100644
--- a/rpcs3/Emu/Cell/SPURecompiler.cpp
+++ b/rpcs3/Emu/Cell/SPURecompiler.cpp
@@ -211,7 +211,7 @@ spu_recompiler_base::~spu_recompiler_base()
 {
 }
 
-void spu_recompiler_base::dispatch(SPUThread& spu, void*, u8* rip)
+void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip)
 {
 	// If code verification failed from a patched patchpoint, clear it with a single NOP
 	if (rip)
@@ -255,7 +255,7 @@ void spu_recompiler_base::dispatch(SPUThread& spu, void*, u8* rip)
 	}
 }
 
-void spu_recompiler_base::branch(SPUThread& spu, void*, u8* rip)
+void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip)
 {
 	// Compile (TODO: optimize search of the existing functions)
 	const auto func = verify(HERE, spu.jit->compile(spu.jit->block(spu._ptr<u32>(0), spu.pc)));
@@ -1692,7 +1692,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
 			const auto cblock = m_ir->GetInsertBlock();
 			const auto result = llvm::BasicBlock::Create(m_context, "", m_function);
 			m_ir->SetInsertPoint(result);
-			m_ir->CreateStore(m_ir->getInt32(target), spu_ptr<u32>(&SPUThread::pc));
+			m_ir->CreateStore(m_ir->getInt32(target), spu_ptr<u32>(&spu_thread::pc));
 			tail(add_function(target));
 			m_ir->SetInsertPoint(cblock);
 			return result;
@@ -1708,8 +1708,8 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
 			const auto cblock = m_ir->GetInsertBlock();
 			const auto result = llvm::BasicBlock::Create(m_context, "", m_function);
 			m_ir->SetInsertPoint(result);
-			m_ir->CreateStore(m_ir->getInt32(target), spu_ptr<u32>(&SPUThread::pc));
-			const auto addr = m_ir->CreateGEP(m_thread, m_ir->getInt64(::offset32(&SPUThread::jit_dispatcher) + target * 2));
+			m_ir->CreateStore(m_ir->getInt32(target), spu_ptr<u32>(&spu_thread::pc));
+			const auto addr = m_ir->CreateGEP(m_thread, m_ir->getInt64(::offset32(&spu_thread::jit_dispatcher) + target * 2));
 			const auto type = llvm::FunctionType::get(get_type<void>(), {get_type<u8*>(), get_type<u8*>(), get_type<u32>()}, false)->getPointerTo()->getPointerTo();
 			tail(m_ir->CreateLoad(m_ir->CreateBitCast(addr, type)));
 			m_ir->SetInsertPoint(cblock);
@@ -1789,15 +1789,15 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
 	{
 		if (index < 128)
 		{
-			return ::offset32(&SPUThread::gpr, index);
+			return ::offset32(&spu_thread::gpr, index);
 		}
 
 		switch (index)
 		{
-		case s_reg_mfc_eal: return ::offset32(&SPUThread::ch_mfc_cmd, &spu_mfc_cmd::eal);
-		case s_reg_mfc_lsa: return ::offset32(&SPUThread::ch_mfc_cmd, &spu_mfc_cmd::lsa);
-		case s_reg_mfc_tag: return ::offset32(&SPUThread::ch_mfc_cmd, &spu_mfc_cmd::tag);
-		case s_reg_mfc_size: return ::offset32(&SPUThread::ch_mfc_cmd, &spu_mfc_cmd::size);
+		case s_reg_mfc_eal: return ::offset32(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::eal);
+		case s_reg_mfc_lsa: return ::offset32(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::lsa);
+		case s_reg_mfc_tag: return ::offset32(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::tag);
+		case s_reg_mfc_size: return ::offset32(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::size);
 		default:
 			fmt::throw_exception("get_reg_offset(%u): invalid register index" HERE, index);
 		}
@@ -2183,19 +2183,19 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
 
 	void update_pc()
 	{
-		m_ir->CreateStore(m_ir->getInt32(m_pos), spu_ptr<u32>(&SPUThread::pc))->setVolatile(true);
+		m_ir->CreateStore(m_ir->getInt32(m_pos), spu_ptr<u32>(&spu_thread::pc))->setVolatile(true);
 	}
 
 	// Call cpu_thread::check_state if necessary and return or continue (full check)
 	void check_state(u32 addr)
 	{
-		const auto pstate = spu_ptr<u32>(&SPUThread::state);
+		const auto pstate = spu_ptr<u32>(&spu_thread::state);
 		const auto _body = llvm::BasicBlock::Create(m_context, "", m_function);
 		const auto check = llvm::BasicBlock::Create(m_context, "", m_function);
 		const auto stop  = llvm::BasicBlock::Create(m_context, "", m_function);
 		m_ir->CreateCondBr(m_ir->CreateICmpEQ(m_ir->CreateLoad(pstate), m_ir->getInt32(0)), _body, check);
 		m_ir->SetInsertPoint(check);
-		m_ir->CreateStore(m_ir->getInt32(addr), spu_ptr<u32>(&SPUThread::pc));
+		m_ir->CreateStore(m_ir->getInt32(addr), spu_ptr<u32>(&spu_thread::pc));
 		m_ir->CreateCondBr(call(&exec_check_state, m_thread), stop, _body);
 		m_ir->SetInsertPoint(stop);
 		m_ir->CreateRetVoid();
@@ -2386,7 +2386,7 @@ public:
 		const auto label_stop = BasicBlock::Create(m_context, "", m_function);
 
 		// Emit state check
-		const auto pstate = spu_ptr<u32>(&SPUThread::state);
+		const auto pstate = spu_ptr<u32>(&spu_thread::state);
 		m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->CreateLoad(pstate, true), m_ir->getInt32(0)), label_stop, label_test);
 
 		// Emit code check
@@ -2482,7 +2482,7 @@ public:
 
 		// Increase block counter with statistics
 		m_ir->SetInsertPoint(label_body);
-		const auto pbcount = spu_ptr<u64>(&SPUThread::block_counter);
+		const auto pbcount = spu_ptr<u64>(&spu_thread::block_counter);
 		m_ir->CreateStore(m_ir->CreateAdd(m_ir->CreateLoad(pbcount), m_ir->getInt64(check_iterations)), pbcount);
 
 		// Call the entry function chunk
@@ -2497,7 +2497,7 @@ public:
 
 		if (g_cfg.core.spu_verification)
 		{
-			const auto pbfail = spu_ptr<u64>(&SPUThread::block_failure);
+			const auto pbfail = spu_ptr<u64>(&spu_thread::block_failure);
 			m_ir->CreateStore(m_ir->CreateAdd(m_ir->CreateLoad(pbfail), m_ir->getInt64(1)), pbfail);
 			tail(&spu_recompiler_base::dispatch, m_thread, m_ir->getInt32(0), m_ir->getInt32(0));
 		}
@@ -3001,13 +3001,13 @@ public:
 		return fn;
 	}
 
-	static bool exec_check_state(SPUThread* _spu)
+	static bool exec_check_state(spu_thread* _spu)
 	{
 		return _spu->check_state();
 	}
 
 	template <spu_inter_func_t F>
-	static void exec_fall(SPUThread* _spu, spu_opcode_t op)
+	static void exec_fall(spu_thread* _spu, spu_opcode_t op)
 	{
 		if (F(*_spu, op))
 		{
@@ -3022,7 +3022,7 @@ public:
 		call(&exec_fall<F>, m_thread, m_ir->getInt32(op.opcode));
 	}
 
-	static void exec_unk(SPUThread* _spu, u32 op)
+	static void exec_unk(spu_thread* _spu, u32 op)
 	{
 		fmt::throw_exception("Unknown/Illegal instruction (0x%08x)" HERE, op);
 	}
@@ -3034,7 +3034,7 @@ public:
 		tail(&exec_unk, m_thread, m_ir->getInt32(op_unk.opcode));
 	}
 
-	static bool exec_stop(SPUThread* _spu, u32 code)
+	static bool exec_stop(spu_thread* _spu, u32 code)
 	{
 		return _spu->stop_and_signal(code);
 	}
@@ -3053,7 +3053,7 @@ public:
 		if (g_cfg.core.spu_block_size == spu_block_size_type::safe)
 		{
 			m_block->block_end = m_ir->GetInsertBlock();
-			m_ir->CreateStore(m_ir->getInt32(m_pos + 4), spu_ptr<u32>(&SPUThread::pc));
+			m_ir->CreateStore(m_ir->getInt32(m_pos + 4), spu_ptr<u32>(&spu_thread::pc));
 			m_ir->CreateRetVoid();
 		}
 	}
@@ -3063,18 +3063,18 @@ public:
 		STOP(spu_opcode_t{0x3fff});
 	}
 
-	static s64 exec_rdch(SPUThread* _spu, u32 ch)
+	static s64 exec_rdch(spu_thread* _spu, u32 ch)
 	{
 		return _spu->get_ch_value(ch);
 	}
 
-	static s64 exec_read_in_mbox(SPUThread* _spu)
+	static s64 exec_read_in_mbox(spu_thread* _spu)
 	{
 		// TODO
 		return _spu->get_ch_value(SPU_RdInMbox);
 	}
 
-	static u32 exec_read_dec(SPUThread* _spu)
+	static u32 exec_read_dec(spu_thread* _spu)
 	{
 		const u32 res = _spu->ch_dec_value - static_cast<u32>(get_timebased_time() - _spu->ch_dec_start_timestamp);
 
@@ -3086,7 +3086,7 @@ public:
 		return res;
 	}
 
-	static s64 exec_read_events(SPUThread* _spu)
+	static s64 exec_read_events(spu_thread* _spu)
 	{
 		if (const u32 events = _spu->get_events())
 		{
@@ -3139,7 +3139,7 @@ public:
 		{
 		case SPU_RdSRR0:
 		{
-			res.value = m_ir->CreateLoad(spu_ptr<u32>(&SPUThread::srr0));
+			res.value = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::srr0));
 			break;
 		}
 		case SPU_RdInMbox:
@@ -3157,32 +3157,32 @@ public:
 		}
 		case MFC_RdTagStat:
 		{
-			res.value = get_rdch(op, ::offset32(&SPUThread::ch_tag_stat), false);
+			res.value = get_rdch(op, ::offset32(&spu_thread::ch_tag_stat), false);
 			break;
 		}
 		case MFC_RdTagMask:
 		{
-			res.value = m_ir->CreateLoad(spu_ptr<u32>(&SPUThread::ch_tag_mask));
+			res.value = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::ch_tag_mask));
 			break;
 		}
 		case SPU_RdSigNotify1:
 		{
-			res.value = get_rdch(op, ::offset32(&SPUThread::ch_snr1), true);
+			res.value = get_rdch(op, ::offset32(&spu_thread::ch_snr1), true);
 			break;
 		}
 		case SPU_RdSigNotify2:
 		{
-			res.value = get_rdch(op, ::offset32(&SPUThread::ch_snr2), true);
+			res.value = get_rdch(op, ::offset32(&spu_thread::ch_snr2), true);
 			break;
 		}
 		case MFC_RdAtomicStat:
 		{
-			res.value = get_rdch(op, ::offset32(&SPUThread::ch_atomic_stat), false);
+			res.value = get_rdch(op, ::offset32(&spu_thread::ch_atomic_stat), false);
 			break;
 		}
 		case MFC_RdListStallStat:
 		{
-			res.value = get_rdch(op, ::offset32(&SPUThread::ch_stall_stat), false);
+			res.value = get_rdch(op, ::offset32(&spu_thread::ch_stall_stat), false);
 			break;
 		}
 		case SPU_RdDec:
@@ -3192,7 +3192,7 @@ public:
 		}
 		case SPU_RdEventMask:
 		{
-			res.value = m_ir->CreateLoad(spu_ptr<u32>(&SPUThread::ch_event_mask));
+			res.value = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::ch_event_mask));
 			break;
 		}
 		case SPU_RdEventStat:
@@ -3210,7 +3210,7 @@ public:
 		}
 		case SPU_RdMachStat:
 		{
-			res.value = m_ir->CreateZExt(m_ir->CreateLoad(spu_ptr<u8>(&SPUThread::interrupts_enabled)), get_type<u32>());
+			res.value = m_ir->CreateZExt(m_ir->CreateLoad(spu_ptr<u8>(&spu_thread::interrupts_enabled)), get_type<u32>());
 			break;
 		}
 
@@ -3232,12 +3232,12 @@ public:
 		set_vr(op.rt, insert(splat<u32[4]>(0), 3, res));
 	}
 
-	static u32 exec_rchcnt(SPUThread* _spu, u32 ch)
+	static u32 exec_rchcnt(spu_thread* _spu, u32 ch)
 	{
 		return _spu->get_ch_count(ch);
 	}
 
-	static u32 exec_get_events(SPUThread* _spu)
+	static u32 exec_get_events(spu_thread* _spu)
 	{
 		return _spu->get_events();
 	}
@@ -3257,55 +3257,55 @@ public:
 		{
 		case SPU_WrOutMbox:
 		{
-			res.value = get_rchcnt(::offset32(&SPUThread::ch_out_mbox), true);
+			res.value = get_rchcnt(::offset32(&spu_thread::ch_out_mbox), true);
 			break;
 		}
 		case SPU_WrOutIntrMbox:
 		{
-			res.value = get_rchcnt(::offset32(&SPUThread::ch_out_intr_mbox), true);
+			res.value = get_rchcnt(::offset32(&spu_thread::ch_out_intr_mbox), true);
 			break;
 		}
 		case MFC_RdTagStat:
 		{
-			res.value = get_rchcnt(::offset32(&SPUThread::ch_tag_stat));
+			res.value = get_rchcnt(::offset32(&spu_thread::ch_tag_stat));
 			break;
 		}
 		case MFC_RdListStallStat:
 		{
-			res.value = get_rchcnt(::offset32(&SPUThread::ch_stall_stat));
+			res.value = get_rchcnt(::offset32(&spu_thread::ch_stall_stat));
 			break;
 		}
 		case SPU_RdSigNotify1:
 		{
-			res.value = get_rchcnt(::offset32(&SPUThread::ch_snr1));
+			res.value = get_rchcnt(::offset32(&spu_thread::ch_snr1));
 			break;
 		}
 		case SPU_RdSigNotify2:
 		{
-			res.value = get_rchcnt(::offset32(&SPUThread::ch_snr2));
+			res.value = get_rchcnt(::offset32(&spu_thread::ch_snr2));
 			break;
 		}
 		case MFC_RdAtomicStat:
 		{
-			res.value = get_rchcnt(::offset32(&SPUThread::ch_atomic_stat));
+			res.value = get_rchcnt(::offset32(&spu_thread::ch_atomic_stat));
 			break;
 		}
 		case MFC_WrTagUpdate:
 		{
-			res.value = m_ir->CreateLoad(spu_ptr<u32>(&SPUThread::ch_tag_upd), true);
+			res.value = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::ch_tag_upd), true);
 			res.value = m_ir->CreateICmpEQ(res.value, m_ir->getInt32(0));
 			res.value = m_ir->CreateZExt(res.value, get_type<u32>());
 			break;
 		}
 		case MFC_Cmd:
 		{
-			res.value = m_ir->CreateLoad(spu_ptr<u32>(&SPUThread::mfc_size), true);
+			res.value = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::mfc_size), true);
 			res.value = m_ir->CreateSub(m_ir->getInt32(16), res.value);
 			break;
 		}
 		case SPU_RdInMbox:
 		{
-			res.value = m_ir->CreateLoad(spu_ptr<u32>(&SPUThread::ch_in_mbox), true);
+			res.value = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::ch_in_mbox), true);
 			res.value = m_ir->CreateLShr(res.value, 8);
 			res.value = m_ir->CreateAnd(res.value, 7);
 			break;
@@ -3328,17 +3328,17 @@ public:
 		set_vr(op.rt, insert(splat<u32[4]>(0), 3, res));
 	}
 
-	static bool exec_wrch(SPUThread* _spu, u32 ch, u32 value)
+	static bool exec_wrch(spu_thread* _spu, u32 ch, u32 value)
 	{
 		return _spu->set_ch_value(ch, value);
 	}
 
-	static void exec_mfc(SPUThread* _spu)
+	static void exec_mfc(spu_thread* _spu)
 	{
 		return _spu->do_mfc();
 	}
 
-	static bool exec_mfc_cmd(SPUThread* _spu)
+	static bool exec_mfc_cmd(spu_thread* _spu)
 	{
 		return _spu->process_mfc_cmd(_spu->ch_mfc_cmd);
 	}
@@ -3351,7 +3351,7 @@ public:
 		{
 		case SPU_WrSRR0:
 		{
-			m_ir->CreateStore(val.value, spu_ptr<u32>(&SPUThread::srr0));
+			m_ir->CreateStore(val.value, spu_ptr<u32>(&spu_thread::srr0));
 			return;
 		}
 		case SPU_WrOutIntrMbox:
@@ -3367,7 +3367,7 @@ public:
 		case MFC_WrTagMask:
 		{
 			// TODO
-			m_ir->CreateStore(val.value, spu_ptr<u32>(&SPUThread::ch_tag_mask));
+			m_ir->CreateStore(val.value, spu_ptr<u32>(&spu_thread::ch_tag_mask));
 			return;
 		}
 		case MFC_WrTagUpdate:
@@ -3376,11 +3376,11 @@ public:
 			{
 				const u64 upd = ci->getZExtValue();
 
-				const auto tag_mask  = m_ir->CreateLoad(spu_ptr<u32>(&SPUThread::ch_tag_mask));
-				const auto mfc_fence = m_ir->CreateLoad(spu_ptr<u32>(&SPUThread::mfc_fence));
+				const auto tag_mask  = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::ch_tag_mask));
+				const auto mfc_fence = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::mfc_fence));
 				const auto completed = m_ir->CreateAnd(tag_mask, m_ir->CreateNot(mfc_fence));
-				const auto upd_ptr   = spu_ptr<u32>(&SPUThread::ch_tag_upd);
-				const auto stat_ptr  = spu_ptr<u64>(&SPUThread::ch_tag_stat);
+				const auto upd_ptr   = spu_ptr<u32>(&spu_thread::ch_tag_upd);
+				const auto stat_ptr  = spu_ptr<u64>(&spu_thread::ch_tag_stat);
 				const auto stat_val  = m_ir->CreateOr(m_ir->CreateZExt(completed, get_type<u64>()), INT64_MIN);
 
 				if (upd == 0)
@@ -3424,7 +3424,7 @@ public:
 			}
 
 			LOG_WARNING(SPU, "[0x%x] MFC_EAH: $%u is not a zero constant", m_pos, +op.rt);
-			//m_ir->CreateStore(val.value, spu_ptr<u32>(&SPUThread::ch_mfc_cmd, &spu_mfc_cmd::eah));
+			//m_ir->CreateStore(val.value, spu_ptr<u32>(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::eah));
 			return;
 		}
 		case MFC_EAL:
@@ -3468,8 +3468,8 @@ public:
 				const auto fail = llvm::BasicBlock::Create(m_context, "", m_function);
 				const auto next = llvm::BasicBlock::Create(m_context, "", m_function);
 
-				const auto pf = spu_ptr<u32>(&SPUThread::mfc_fence);
-				const auto pb = spu_ptr<u32>(&SPUThread::mfc_barrier);
+				const auto pf = spu_ptr<u32>(&spu_thread::mfc_fence);
+				const auto pb = spu_ptr<u32>(&spu_thread::mfc_barrier);
 
 				switch (u64 cmd = ci->getZExtValue())
 				{
@@ -3494,7 +3494,7 @@ public:
 					m_ir->SetInsertPoint(fail);
 					m_ir->CreateUnreachable();
 					m_ir->SetInsertPoint(next);
-					m_ir->CreateStore(ci, spu_ptr<u8>(&SPUThread::ch_mfc_cmd, &spu_mfc_cmd::cmd));
+					m_ir->CreateStore(ci, spu_ptr<u8>(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::cmd));
 					call(&exec_mfc_cmd, m_thread);
 					return;
 				}
@@ -3609,7 +3609,7 @@ public:
 				case MFC_EIEIO_CMD:
 				case MFC_SYNC_CMD:
 				{
-					const auto cond = m_ir->CreateIsNull(m_ir->CreateLoad(spu_ptr<u32>(&SPUThread::mfc_size)));
+					const auto cond = m_ir->CreateIsNull(m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::mfc_size)));
 					m_ir->CreateCondBr(cond, exec, fail);
 					m_ir->SetInsertPoint(exec);
 					m_ir->CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
@@ -3631,8 +3631,8 @@ public:
 				m_ir->SetInsertPoint(fail);
 
 				// Get MFC slot, redirect to invalid memory address
-				const auto slot = m_ir->CreateLoad(spu_ptr<u32>(&SPUThread::mfc_size));
-				const auto off0 = m_ir->CreateAdd(m_ir->CreateMul(slot, m_ir->getInt32(sizeof(spu_mfc_cmd))), m_ir->getInt32(::offset32(&SPUThread::mfc_queue)));
+				const auto slot = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::mfc_size));
+				const auto off0 = m_ir->CreateAdd(m_ir->CreateMul(slot, m_ir->getInt32(sizeof(spu_mfc_cmd))), m_ir->getInt32(::offset32(&spu_thread::mfc_queue)));
 				const auto ptr0 = m_ir->CreateGEP(m_thread, m_ir->CreateZExt(off0, get_type<u64>()));
 				const auto ptr1 = m_ir->CreateGEP(m_memptr, m_ir->getInt64(0xffdeadf0));
 				const auto pmfc = m_ir->CreateSelect(m_ir->CreateICmpULT(slot, m_ir->getInt32(16)), ptr0, ptr1);
@@ -3695,7 +3695,7 @@ public:
 				}
 				}
 
-				m_ir->CreateStore(m_ir->CreateAdd(slot, m_ir->getInt32(1)), spu_ptr<u32>(&SPUThread::mfc_size));
+				m_ir->CreateStore(m_ir->CreateAdd(slot, m_ir->getInt32(1)), spu_ptr<u32>(&spu_thread::mfc_size));
 				m_ir->CreateBr(next);
 				m_ir->SetInsertPoint(next);
 				return;
@@ -3708,7 +3708,7 @@ public:
 		case MFC_WrListStallAck:
 		{
 			const auto mask = eval(splat<u32>(1) << (val & 0x1f));
-			const auto _ptr = spu_ptr<u32>(&SPUThread::ch_stall_mask);
+			const auto _ptr = spu_ptr<u32>(&spu_thread::ch_stall_mask);
 			const auto _old = m_ir->CreateLoad(_ptr);
 			const auto _new = m_ir->CreateAnd(_old, m_ir->CreateNot(mask.value));
 			m_ir->CreateStore(_new, _ptr);
@@ -3723,18 +3723,18 @@ public:
 		}
 		case SPU_WrDec:
 		{
-			m_ir->CreateStore(call(&get_timebased_time), spu_ptr<u64>(&SPUThread::ch_dec_start_timestamp));
-			m_ir->CreateStore(val.value, spu_ptr<u32>(&SPUThread::ch_dec_value));
+			m_ir->CreateStore(call(&get_timebased_time), spu_ptr<u64>(&spu_thread::ch_dec_start_timestamp));
+			m_ir->CreateStore(val.value, spu_ptr<u32>(&spu_thread::ch_dec_value));
 			return;
 		}
 		case SPU_WrEventMask:
 		{
-			m_ir->CreateStore(val.value, spu_ptr<u32>(&SPUThread::ch_event_mask))->setVolatile(true);
+			m_ir->CreateStore(val.value, spu_ptr<u32>(&spu_thread::ch_event_mask))->setVolatile(true);
 			return;
 		}
 		case SPU_WrEventAck:
 		{
-			m_ir->CreateAtomicRMW(llvm::AtomicRMWInst::And, spu_ptr<u32>(&SPUThread::ch_event_stat), eval(~val).value, llvm::AtomicOrdering::Release);
+			m_ir->CreateAtomicRMW(llvm::AtomicRMWInst::And, spu_ptr<u32>(&spu_thread::ch_event_stat), eval(~val).value, llvm::AtomicOrdering::Release);
 			return;
 		}
 		case 69:
@@ -3769,7 +3769,7 @@ public:
 		if (g_cfg.core.spu_block_size == spu_block_size_type::safe)
 		{
 			m_block->block_end = m_ir->GetInsertBlock();
-			m_ir->CreateStore(m_ir->getInt32(m_pos + 4), spu_ptr<u32>(&SPUThread::pc));
+			m_ir->CreateStore(m_ir->getInt32(m_pos + 4), spu_ptr<u32>(&spu_thread::pc));
 			m_ir->CreateRetVoid();
 		}
 	}
@@ -5330,7 +5330,7 @@ public:
 		const auto halt = llvm::BasicBlock::Create(m_context, "", m_function);
 		m_ir->CreateCondBr(cond.value, halt, next);
 		m_ir->SetInsertPoint(halt);
-		const auto pstatus = spu_ptr<u32>(&SPUThread::status);
+		const auto pstatus = spu_ptr<u32>(&spu_thread::status);
 		const auto chalt = m_ir->getInt32(SPU_STATUS_STOPPED_BY_HALT);
 		m_ir->CreateAtomicRMW(llvm::AtomicRMWInst::Or, pstatus, chalt, llvm::AtomicOrdering::Release)->setVolatile(true);
 		const auto ptr = _ptr<u32>(m_memptr, 0xffdead00);
@@ -5391,7 +5391,7 @@ public:
 	}
 
 	// TODO
-	static u32 exec_check_interrupts(SPUThread* _spu, u32 addr)
+	static u32 exec_check_interrupts(spu_thread* _spu, u32 addr)
 	{
 		_spu->set_interrupt_status(true);
 
@@ -5464,18 +5464,18 @@ public:
 
 		if (op.d)
 		{
-			m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&SPUThread::interrupts_enabled))->setVolatile(true);
+			m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled))->setVolatile(true);
 		}
 
-		m_ir->CreateStore(addr.value, spu_ptr<u32>(&SPUThread::pc));
+		m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc));
 		const auto type = llvm::FunctionType::get(get_type<void>(), {get_type<u8*>(), get_type<u8*>(), get_type<u32>()}, false)->getPointerTo()->getPointerTo();
-		const auto disp = m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, m_ir->getInt64(::offset32(&SPUThread::jit_dispatcher))), type);
+		const auto disp = m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, m_ir->getInt64(::offset32(&spu_thread::jit_dispatcher))), type);
 		const auto ad64 = m_ir->CreateZExt(addr.value, get_type<u64>());
 
 		if (ret && g_cfg.core.spu_block_size != spu_block_size_type::safe)
 		{
 			// Compare address stored in stack mirror with addr
-			const auto stack0 = eval(zext<u64>(sp) + ::offset32(&SPUThread::stack_mirror));
+			const auto stack0 = eval(zext<u64>(sp) + ::offset32(&spu_thread::stack_mirror));
 			const auto stack1 = eval(stack0 + 8);
 			const auto _ret = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack0.value), type));
 			const auto link = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack1.value), get_type<u64*>()));
@@ -5595,7 +5595,7 @@ public:
 
 			// Exit function on unexpected target
 			m_ir->SetInsertPoint(sw->getDefaultDest());
-			m_ir->CreateStore(addr.value, spu_ptr<u32>(&SPUThread::pc));
+			m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc));
 			m_ir->CreateRetVoid();
 		}
 		else
@@ -5617,7 +5617,7 @@ public:
 	{
 		m_block->block_end = m_ir->GetInsertBlock();
 		value_t<u32> srr0;
-		srr0.value = m_ir->CreateLoad(spu_ptr<u32>(&SPUThread::srr0));
+		srr0.value = m_ir->CreateLoad(spu_ptr<u32>(&spu_thread::srr0));
 		m_ir->CreateBr(add_block_indirect(op, srr0));
 	}
 
@@ -5716,7 +5716,7 @@ public:
 		{
 			// Store the return function chunk address at the stack mirror
 			const auto func = add_function(m_pos + 4);
-			const auto stack0 = eval(zext<u64>(extract(get_vr(1), 3) & 0x3fff0) + ::offset32(&SPUThread::stack_mirror));
+			const auto stack0 = eval(zext<u64>(extract(get_vr(1), 3) & 0x3fff0) + ::offset32(&spu_thread::stack_mirror));
 			const auto stack1 = eval(stack0 + 8);
 			m_ir->CreateStore(func, m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack0.value), func->getType()->getPointerTo()));
 			m_ir->CreateStore(m_ir->getInt64(m_pos + 4), m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack1.value), get_type<u64*>()));
diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h
index bcce0aebd8..f75ea57faa 100644
--- a/rpcs3/Emu/Cell/SPURecompiler.h
+++ b/rpcs3/Emu/Cell/SPURecompiler.h
@@ -76,10 +76,10 @@ public:
 	virtual spu_function_t compile(std::vector<u32>&&) = 0;
 
 	// Default dispatch function fallback (second arg is unused)
-	static void dispatch(SPUThread&, void*, u8* rip);
+	static void dispatch(spu_thread&, void*, u8* rip);
 
 	// Target for the unresolved patch point (second arg is unused)
-	static void branch(SPUThread&, void*, u8* rip);
+	static void branch(spu_thread&, void*, u8* rip);
 
 	// Get the block at specified address
 	std::vector<u32> block(const be_t<u32>* ls, u32 lsa);
diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp
index 6c5eb16483..915267ef5a 100644
--- a/rpcs3/Emu/Cell/SPUThread.cpp
+++ b/rpcs3/Emu/Cell/SPUThread.cpp
@@ -140,7 +140,7 @@ namespace spu
 			u32 pc = 0;
 			bool active = false;
 
-			concurrent_execution_watchdog(SPUThread& spu)
+			concurrent_execution_watchdog(spu_thread& spu)
 				:pc(spu.pc)
 			{
 				if (g_cfg.core.preferred_spu_threads > 0)
@@ -391,35 +391,12 @@ spu_imm_table_t::spu_imm_table_t()
 	}
 }
 
-void SPUThread::on_spawn()
+std::string spu_thread::get_name() const
 {
-	if (g_cfg.core.thread_scheduler_enabled)
-	{
-		thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::spu));
-	}
-
-	if (g_cfg.core.lower_spu_priority)
-	{
-		thread_ctrl::set_native_priority(-1);
-	}
+	return fmt::format("%sSPU[0x%x] Thread (%s)", offset >= RAW_SPU_BASE_ADDR ? "Raw" : "", id, spu_name.get());
 }
 
-void SPUThread::on_init(const std::shared_ptr<void>& _this)
-{
-	if (!offset)
-	{
-		const_cast<u32&>(offset) = verify("SPU LS" HERE, vm::alloc(0x40000, vm::main));
-
-		cpu_thread::on_init(_this);
-	}
-}
-
-std::string SPUThread::get_name() const
-{
-	return fmt::format("%sSPU[0x%x] Thread (%s)", offset >= RAW_SPU_BASE_ADDR ? "Raw" : "", id, m_name);
-}
-
-std::string SPUThread::dump() const
+std::string spu_thread::dump() const
 {
 	std::string ret = cpu_thread::dump();
 
@@ -451,7 +428,7 @@ std::string SPUThread::dump() const
 	return ret;
 }
 
-void SPUThread::cpu_init()
+void spu_thread::cpu_init()
 {
 	gpr = {};
 	fpscr.Reset();
@@ -501,8 +478,15 @@ void SPUThread::cpu_init()
 
 extern thread_local std::string(*g_tls_log_prefix)();
 
-void SPUThread::cpu_task()
+void spu_thread::cpu_task()
 {
+	// Get next PC and SPU Interrupt status
+	pc = npc.exchange(0);
+
+	set_interrupt_status((pc & 1) != 0);
+
+	pc &= 0x3fffc;
+
 	std::fesetround(FE_TOWARDZERO);
 
 	if (g_cfg.core.set_daz_and_ftz && g_cfg.core.spu_decoder != spu_decoder_type::precise)
@@ -513,9 +497,8 @@ void SPUThread::cpu_task()
 
 	g_tls_log_prefix = []
 	{
-		const auto cpu = static_cast<SPUThread*>(get_current_cpu_thread());
-
-		return fmt::format("%s [0x%05x]", cpu->get_name(), cpu->pc);
+		const auto cpu = static_cast<spu_thread*>(get_current_cpu_thread());
+		return fmt::format("%s [0x%05x]", thread_ctrl::get_name(), cpu->pc);
 	};
 
 	if (jit)
@@ -525,6 +508,9 @@ void SPUThread::cpu_task()
 			jit_dispatcher[pc / 4](*this, vm::_ptr<u8>(offset), nullptr);
 		}
 
+		// save next PC and current SPU Interrupt status
+		npc = pc | (interrupts_enabled);
+
 		// Print some stats
 		LOG_NOTICE(SPU, "Stats: Block Weight: %u (Retreats: %u);", block_counter, block_failure);
 		return;
@@ -548,7 +534,8 @@ void SPUThread::cpu_task()
 	{
 		if (UNLIKELY(state))
 		{
-			if (check_state()) return;
+			if (check_state())
+				break;
 
 			// Decode single instruction (may be step)
 			const u32 op = *reinterpret_cast<const be_t<u32>*>(base + pc);
@@ -606,29 +593,39 @@ void SPUThread::cpu_task()
 			break;
 		}
 	}
+
+	// save next PC and current SPU Interrupt status
+	npc = pc | (interrupts_enabled);
 }
 
-void SPUThread::cpu_mem()
+void spu_thread::cpu_mem()
 {
 	//vm::passive_lock(*this);
 }
 
-void SPUThread::cpu_unmem()
+void spu_thread::cpu_unmem()
 {
 	//state.test_and_set(cpu_flag::memory);
 }
 
-SPUThread::~SPUThread()
+spu_thread::~spu_thread()
 {
 	// Deallocate Local Storage
 	vm::dealloc_verbose_nothrow(offset);
+
+	// Deallocate RawSPU ID
+	if (!group && offset >= RAW_SPU_BASE_ADDR)
+	{
+		g_raw_spu_id[index] = 0;
+		g_raw_spu_ctr--;
+	}
 }
 
-SPUThread::SPUThread(const std::string& name, u32 index, lv2_spu_group* group)
+spu_thread::spu_thread(vm::addr_t ls, lv2_spu_group* group, u32 index, std::string_view name)
 	: cpu_thread(idm::last_id())
-	, m_name(name)
+	, spu_name(name)
 	, index(index)
-	, offset(0)
+	, offset(ls)
 	, group(group)
 {
 	if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit)
@@ -652,9 +649,14 @@ SPUThread::SPUThread(const std::string& name, u32 index, lv2_spu_group* group)
 			std::memset(stack_mirror.data(), 0xff, sizeof(stack_mirror));
 		}
 	}
+
+	if (!group && offset >= RAW_SPU_BASE_ADDR)
+	{
+		cpu_init();
+	}
 }
 
-void SPUThread::push_snr(u32 number, u32 value)
+void spu_thread::push_snr(u32 number, u32 value)
 {
 	// Get channel
 	const auto channel = number & 1 ? &ch_snr2 : &ch_snr1;
@@ -670,7 +672,7 @@ void SPUThread::push_snr(u32 number, u32 value)
 	}
 }
 
-void SPUThread::do_dma_transfer(const spu_mfc_cmd& args)
+void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
 {
 	const bool is_get = (args.cmd & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK | MFC_START_MASK)) == MFC_GET_CMD;
 
@@ -686,7 +688,7 @@ void SPUThread::do_dma_transfer(const spu_mfc_cmd& args)
 		if (eal < SYS_SPU_THREAD_BASE_LOW)
 		{
 			// RawSPU MMIO
-			auto thread = idm::get<RawSPUThread>((eal - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET);
+			auto thread = idm::get<named_thread<spu_thread>>(find_raw_spu((eal - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET));
 
 			if (!thread)
 			{
@@ -717,7 +719,7 @@ void SPUThread::do_dma_transfer(const spu_mfc_cmd& args)
 		}
 		else if (group && group->threads[index])
 		{
-			auto& spu = static_cast<SPUThread&>(*group->threads[index]);
+			auto& spu = static_cast<spu_thread&>(*group->threads[index]);
 
 			if (offset + args.size - 1 < 0x40000) // LS access
 			{
@@ -890,7 +892,7 @@ void SPUThread::do_dma_transfer(const spu_mfc_cmd& args)
 	}
 }
 
-bool SPUThread::do_dma_check(const spu_mfc_cmd& args)
+bool spu_thread::do_dma_check(const spu_mfc_cmd& args)
 {
 	const u32 mask = 1u << args.tag;
 
@@ -940,7 +942,7 @@ bool SPUThread::do_dma_check(const spu_mfc_cmd& args)
 	return true;
 }
 
-bool SPUThread::do_list_transfer(spu_mfc_cmd& args)
+bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
 {
 	struct list_element
 	{
@@ -994,7 +996,7 @@ bool SPUThread::do_list_transfer(spu_mfc_cmd& args)
 	return true;
 }
 
-void SPUThread::do_putlluc(const spu_mfc_cmd& args)
+void spu_thread::do_putlluc(const spu_mfc_cmd& args)
 {
 	const u32 addr = args.eal & -128u;
 
@@ -1047,7 +1049,7 @@ void SPUThread::do_putlluc(const spu_mfc_cmd& args)
 	vm::reservation_notifier(addr, 128).notify_all();
 }
 
-void SPUThread::do_mfc(bool wait)
+void spu_thread::do_mfc(bool wait)
 {
 	u32 removed = 0;
 	u32 barrier = 0;
@@ -1149,17 +1151,17 @@ void SPUThread::do_mfc(bool wait)
 	}
 }
 
-u32 SPUThread::get_mfc_completed()
+u32 spu_thread::get_mfc_completed()
 {
 	return ch_tag_mask & ~mfc_fence;
 }
 
-bool SPUThread::process_mfc_cmd(spu_mfc_cmd args)
+bool spu_thread::process_mfc_cmd(spu_mfc_cmd args)
 {
 	// Stall infinitely if MFC queue is full
 	while (UNLIKELY(mfc_size >= 16))
 	{
-		if (state & cpu_flag::stop)
+		if (is_stopped())
 		{
 			return false;
 		}
@@ -1192,7 +1194,7 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args)
 
 			while (rdata == data && vm::reservation_acquire(raddr, 128) == rtime)
 			{
-				if (state & cpu_flag::stop)
+				if (is_stopped())
 				{
 					break;
 				}
@@ -1446,7 +1448,7 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args)
 		args.cmd, args.lsa, args.eal, args.tag, args.size);
 }
 
-u32 SPUThread::get_events(bool waiting)
+u32 spu_thread::get_events(bool waiting)
 {
 	const u32 mask1 = ch_event_mask;
 
@@ -1485,7 +1487,7 @@ u32 SPUThread::get_events(bool waiting)
 	});
 }
 
-void SPUThread::set_events(u32 mask)
+void spu_thread::set_events(u32 mask)
 {
 	if (mask & ~SPU_EVENT_IMPLEMENTED)
 	{
@@ -1502,7 +1504,7 @@ void SPUThread::set_events(u32 mask)
 	}
 }
 
-void SPUThread::set_interrupt_status(bool enable)
+void spu_thread::set_interrupt_status(bool enable)
 {
 	if (enable)
 	{
@@ -1520,7 +1522,7 @@ void SPUThread::set_interrupt_status(bool enable)
 	}
 }
 
-u32 SPUThread::get_ch_count(u32 ch)
+u32 spu_thread::get_ch_count(u32 ch)
 {
 	LOG_TRACE(SPU, "get_ch_count(ch=%d [%s])", ch, ch < 128 ? spu_ch_name[ch] : "???");
 
@@ -1542,7 +1544,7 @@ u32 SPUThread::get_ch_count(u32 ch)
 	fmt::throw_exception("Unknown/illegal channel (ch=%d [%s])" HERE, ch, ch < 128 ? spu_ch_name[ch] : "???");
 }
 
-s64 SPUThread::get_ch_value(u32 ch)
+s64 spu_thread::get_ch_value(u32 ch)
 {
 	LOG_TRACE(SPU, "get_ch_value(ch=%d [%s])", ch, ch < 128 ? spu_ch_name[ch] : "???");
 
@@ -1557,7 +1559,7 @@ s64 SPUThread::get_ch_value(u32 ch)
 
 		while (!channel.try_pop(out))
 		{
-			if (state & cpu_flag::stop)
+			if (is_stopped())
 			{
 				return -1;
 			}
@@ -1595,7 +1597,7 @@ s64 SPUThread::get_ch_value(u32 ch)
 				return out;
 			}
 
-			if (state & cpu_flag::stop)
+			if (is_stopped())
 			{
 				return -1;
 			}
@@ -1699,7 +1701,7 @@ s64 SPUThread::get_ch_value(u32 ch)
 
 			while (res = get_events(), !res)
 			{
-				if (state & (cpu_flag::stop + cpu_flag::dbg_global_stop))
+				if (is_stopped())
 				{
 					return -1;
 				}
@@ -1712,7 +1714,7 @@ s64 SPUThread::get_ch_value(u32 ch)
 
 		while (res = get_events(true), !res)
 		{
-			if (state & cpu_flag::stop)
+			if (is_stopped())
 			{
 				return -1;
 			}
@@ -1734,7 +1736,7 @@ s64 SPUThread::get_ch_value(u32 ch)
 	fmt::throw_exception("Unknown/illegal channel (ch=%d [%s])" HERE, ch, ch < 128 ? spu_ch_name[ch] : "???");
 }
 
-bool SPUThread::set_ch_value(u32 ch, u32 value)
+bool spu_thread::set_ch_value(u32 ch, u32 value)
 {
 	LOG_TRACE(SPU, "set_ch_value(ch=%d [%s], value=0x%x)", ch, ch < 128 ? spu_ch_name[ch] : "???", value);
 
@@ -1752,7 +1754,7 @@ bool SPUThread::set_ch_value(u32 ch, u32 value)
 		{
 			while (!ch_out_intr_mbox.try_push(value))
 			{
-				if (state & cpu_flag::stop)
+				if (is_stopped())
 				{
 					return false;
 				}
@@ -1898,7 +1900,7 @@ bool SPUThread::set_ch_value(u32 ch, u32 value)
 	{
 		while (!ch_out_mbox.try_push(value))
 		{
-			if (state & cpu_flag::stop)
+			if (is_stopped())
 			{
 				return false;
 			}
@@ -2043,7 +2045,7 @@ bool SPUThread::set_ch_value(u32 ch, u32 value)
 	fmt::throw_exception("Unknown/illegal channel (ch=%d [%s], value=0x%x)" HERE, ch, ch < 128 ? spu_ch_name[ch] : "???", value);
 }
 
-bool SPUThread::stop_and_signal(u32 code)
+bool spu_thread::stop_and_signal(u32 code)
 {
 	LOG_TRACE(SPU, "stop_and_signal(code=0x%x)", code);
 
@@ -2087,7 +2089,7 @@ bool SPUThread::stop_and_signal(u32 code)
 		// HACK: wait for executable code
 		while (!_ref<u32>(pc))
 		{
-			if (state & cpu_flag::stop)
+			if (is_stopped())
 			{
 				return false;
 			}
@@ -2143,7 +2145,7 @@ bool SPUThread::stop_and_signal(u32 code)
 			// Check group status, wait if necessary
 			while (group->run_state >= SPU_THREAD_GROUP_STATUS_WAITING && group->run_state <= SPU_THREAD_GROUP_STATUS_SUSPENDED)
 			{
-				if (state & cpu_flag::stop)
+				if (is_stopped())
 				{
 					return false;
 				}
@@ -2212,7 +2214,7 @@ bool SPUThread::stop_and_signal(u32 code)
 
 		while (true)
 		{
-			if (state & cpu_flag::stop)
+			if (is_stopped())
 			{
 				return false;
 			}
@@ -2246,7 +2248,7 @@ bool SPUThread::stop_and_signal(u32 code)
 
 				if (thread.get() != this)
 				{
-					thread->notify();
+					thread_ctrl::notify(*thread);
 				}
 			}
 		}
@@ -2285,7 +2287,7 @@ bool SPUThread::stop_and_signal(u32 code)
 			if (thread && thread.get() != this)
 			{
 				thread->state += cpu_flag::stop;
-				thread->notify();
+				thread_ctrl::notify(*thread);
 			}
 		}
 
@@ -2329,7 +2331,7 @@ bool SPUThread::stop_and_signal(u32 code)
 	}
 }
 
-void SPUThread::halt()
+void spu_thread::halt()
 {
 	LOG_TRACE(SPU, "halt()");
 
@@ -2350,7 +2352,7 @@ void SPUThread::halt()
 	fmt::throw_exception("Halt" HERE);
 }
 
-void SPUThread::fast_call(u32 ls_addr)
+void spu_thread::fast_call(u32 ls_addr)
 {
 	// LS:0x0: this is originally the entry point of the interrupt handler, but interrupts are not implemented
 	_ref<u32>(0) = 0x00000002; // STOP 2
@@ -2378,3 +2380,6 @@ void SPUThread::fast_call(u32 ls_addr)
 	gpr[0]._u32[3] = old_lr;
 	gpr[1]._u32[3] = old_stack;
 }
+
+DECLARE(spu_thread::g_raw_spu_ctr){};
+DECLARE(spu_thread::g_raw_spu_id){};
diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h
index baee513c58..7229888618 100644
--- a/rpcs3/Emu/Cell/SPUThread.h
+++ b/rpcs3/Emu/Cell/SPUThread.h
@@ -11,10 +11,8 @@ struct lv2_event_queue;
 struct lv2_spu_group;
 struct lv2_int_tag;
 
-class SPUThread;
-
 // JIT Block
-using spu_function_t = void(*)(SPUThread&, void*, u8*);
+using spu_function_t = void(*)(spu_thread&, void*, u8*);
 
 // SPU Channels
 enum : u32
@@ -500,24 +498,22 @@ public:
 	}
 };
 
-class SPUThread : public cpu_thread
+class spu_thread : public cpu_thread
 {
 public:
-	virtual void on_spawn() override;
-	virtual void on_init(const std::shared_ptr<void>&) override;
 	virtual std::string get_name() const override;
 	virtual std::string dump() const override;
-	virtual void cpu_task() override;
+	virtual void cpu_task() override final;
 	virtual void cpu_mem() override;
 	virtual void cpu_unmem() override;
-	virtual ~SPUThread() override;
+	virtual ~spu_thread() override;
 	void cpu_init();
 
 	static const u32 id_base = 0x02000000; // TODO (used to determine thread type)
 	static const u32 id_step = 1;
 	static const u32 id_count = 2048;
 
-	SPUThread(const std::string& name, u32 index, lv2_spu_group* group);
+	spu_thread(vm::addr_t ls, lv2_spu_group* group, u32 index, std::string_view name);
 
 	u32 pc = 0;
 
@@ -578,7 +574,7 @@ public:
 	const u32 offset; // SPU LS offset
 	lv2_spu_group* const group; // SPU Thread Group
 
-	const std::string m_name; // Thread name
+	lf_value<std::string> spu_name; // Thread name
 
 	std::unique_ptr<class spu_recompiler_base> jit; // Recompiler instance
 
@@ -623,4 +619,20 @@ public:
 	{
 		return *_ptr<T>(lsa);
 	}
+
+	bool read_reg(const u32 addr, u32& value);
+	bool write_reg(const u32 addr, const u32 value);
+
+	static atomic_t<u32> g_raw_spu_ctr;
+	static atomic_t<u32> g_raw_spu_id[5];
+
+	static u32 find_raw_spu(u32 id)
+	{
+		if (LIKELY(id < std::size(g_raw_spu_id)))
+		{
+			return g_raw_spu_id[id];
+		}
+
+		return -1;
+	}
 };
diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp
index 4d01b2816b..190594cc01 100644
--- a/rpcs3/Emu/Cell/lv2/lv2.cpp
+++ b/rpcs3/Emu/Cell/lv2/lv2.cpp
@@ -1002,13 +1002,13 @@ DECLARE(lv2_obj::g_ppu);
 DECLARE(lv2_obj::g_pending);
 DECLARE(lv2_obj::g_waiting);
 
-void lv2_obj::sleep_timeout(old_thread& thread, u64 timeout)
+void lv2_obj::sleep_timeout(cpu_thread& thread, u64 timeout)
 {
 	std::lock_guard lock(g_mutex);
 
 	const u64 start_time = get_system_time();
 
-	if (auto ppu = dynamic_cast<ppu_thread*>(&thread))
+	if (auto ppu = static_cast<ppu_thread*>(thread.id_type() == 1 ? &thread : nullptr))
 	{
 		LOG_TRACE(PPU, "sleep() - waiting (%zu)", g_pending.size());
 
@@ -1123,7 +1123,7 @@ void lv2_obj::awake(cpu_thread& cpu, u32 prio)
 	}
 
 	// Remove pending if necessary
-	if (!g_pending.empty() && cpu.get() == thread_ctrl::get_current())
+	if (!g_pending.empty() && &cpu == get_current_cpu_thread())
 	{
 		unqueue(g_pending, &cpu);
 	}
@@ -1165,7 +1165,7 @@ void lv2_obj::schedule_all()
 				target->state ^= (cpu_flag::signal + cpu_flag::suspend);
 				target->start_time = 0;
 
-				if (target->get() != thread_ctrl::get_current())
+				if (target != get_current_cpu_thread())
 				{
 					target->notify();
 				}
diff --git a/rpcs3/Emu/Cell/lv2/sys_cond.cpp b/rpcs3/Emu/Cell/lv2/sys_cond.cpp
index a0e4d09694..36a5d6f41a 100644
--- a/rpcs3/Emu/Cell/lv2/sys_cond.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_cond.cpp
@@ -241,6 +241,11 @@ error_code sys_cond_wait(ppu_thread& ppu, u32 cond_id, u64 timeout)
 
 	while (!ppu.state.test_and_reset(cpu_flag::signal))
 	{
+		if (ppu.is_stopped())
+		{
+			return 0;
+		}
+
 		if (timeout)
 		{
 			const u64 passed = get_system_time() - ppu.start_time;
diff --git a/rpcs3/Emu/Cell/lv2/sys_event.cpp b/rpcs3/Emu/Cell/lv2/sys_event.cpp
index 5ea0742ac9..52b959afb7 100644
--- a/rpcs3/Emu/Cell/lv2/sys_event.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_event.cpp
@@ -57,7 +57,7 @@ bool lv2_event_queue::send(lv2_event event)
 	else
 	{
 		// Store event in In_MBox
-		auto& spu = static_cast<SPUThread&>(*sq.front());
+		auto& spu = static_cast<spu_thread&>(*sq.front());
 
 		// TODO: use protocol?
 		sq.pop_front();
@@ -180,7 +180,7 @@ error_code sys_event_queue_destroy(ppu_thread& ppu, u32 equeue_id, s32 mode)
 			}
 			else
 			{
-				static_cast<SPUThread&>(*cpu).ch_in_mbox.set_values(1, CELL_ECANCELED);
+				static_cast<spu_thread&>(*cpu).ch_in_mbox.set_values(1, CELL_ECANCELED);
 				cpu->state += cpu_flag::signal;
 				cpu->notify();
 			}
@@ -271,6 +271,11 @@ error_code sys_event_queue_receive(ppu_thread& ppu, u32 equeue_id, vm::ptr<sys_e
 	// If cancelled, gpr[3] will be non-zero. Other registers must contain event data.
 	while (!ppu.state.test_and_reset(cpu_flag::signal))
 	{
+		if (ppu.is_stopped())
+		{
+			return 0;
+		}
+
 		if (timeout)
 		{
 			const u64 passed = get_system_time() - ppu.start_time;
diff --git a/rpcs3/Emu/Cell/lv2/sys_event_flag.cpp b/rpcs3/Emu/Cell/lv2/sys_event_flag.cpp
index f3ff307498..9ce84fb138 100644
--- a/rpcs3/Emu/Cell/lv2/sys_event_flag.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_event_flag.cpp
@@ -165,6 +165,11 @@ error_code sys_event_flag_wait(ppu_thread& ppu, u32 id, u64 bitptn, u32 mode, vm
 
 	while (!ppu.state.test_and_reset(cpu_flag::signal))
 	{
+		if (ppu.is_stopped())
+		{
+			return 0;
+		}
+
 		if (timeout)
 		{
 			const u64 passed = get_system_time() - ppu.start_time;
@@ -193,7 +198,11 @@ error_code sys_event_flag_wait(ppu_thread& ppu, u32 id, u64 bitptn, u32 mode, vm
 		}
 	}
 
-	ppu.test_state();
+	if (ppu.test_stopped())
+	{
+		return 0;
+	}
+
 	if (result) *result = ppu.gpr[6];
 	return not_an_error(ppu.gpr[3]);
 }
@@ -366,7 +375,11 @@ error_code sys_event_flag_cancel(ppu_thread& ppu, u32 id, vm::ptr<u32> num)
 		}
 	}
 
-	ppu.test_state();
+	if (ppu.test_stopped())
+	{
+		return 0;
+	}
+
 	if (num) *num = value;
 	return CELL_OK;
 }
diff --git a/rpcs3/Emu/Cell/lv2/sys_interrupt.cpp b/rpcs3/Emu/Cell/lv2/sys_interrupt.cpp
index 77c50e3c06..4185df50e9 100644
--- a/rpcs3/Emu/Cell/lv2/sys_interrupt.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_interrupt.cpp
@@ -8,8 +8,6 @@
 #include "Emu/Cell/PPUOpcodes.h"
 #include "sys_interrupt.h"
 
-
-
 LOG_CHANNEL(sys_interrupt);
 
 void lv2_int_serv::exec()
@@ -22,7 +20,7 @@ void lv2_int_serv::exec()
 		{ ppu_cmd::sleep, 0 }
 	});
 
-	thread->notify();
+	thread_ctrl::notify(*thread);
 }
 
 void lv2_int_serv::join()
@@ -35,8 +33,8 @@ void lv2_int_serv::join()
 		{ ppu_cmd::opcode, ppu_instructions::SC(0) },
 	});
 
-	thread->notify();
-	thread->join();
+	thread_ctrl::notify(*thread);
+	(*thread)();
 }
 
 error_code sys_interrupt_tag_destroy(u32 intrtag)
@@ -86,7 +84,7 @@ error_code _sys_interrupt_thread_establish(vm::ptr<u32> ih, u32 intrtag, u32 int
 		}
 
 		// Get interrupt thread
-		const auto it = idm::get_unlocked<ppu_thread>(intrthread);
+		const auto it = idm::get_unlocked<named_thread<ppu_thread>>(intrthread);
 
 		if (!it)
 		{
@@ -110,7 +108,8 @@ error_code _sys_interrupt_thread_establish(vm::ptr<u32> ih, u32 intrtag, u32 int
 
 		result = std::make_shared<lv2_int_serv>(it, arg1, arg2);
 		tag->handler = result;
-		it->run();
+		it->state -= cpu_flag::stop;
+		thread_ctrl::notify(*it);
 		return result;
 	});
 
@@ -131,7 +130,7 @@ error_code _sys_interrupt_thread_disestablish(ppu_thread& ppu, u32 ih, vm::ptr<u
 
 	if (!handler)
 	{
-		if (const auto thread = idm::withdraw<ppu_thread>(ih))
+		if (const auto thread = idm::withdraw<named_thread<ppu_thread>>(ih))
 		{
 			*r13 = thread->gpr[13];
 			return CELL_OK;
diff --git a/rpcs3/Emu/Cell/lv2/sys_interrupt.h b/rpcs3/Emu/Cell/lv2/sys_interrupt.h
index dca783b03e..6b87f59f56 100644
--- a/rpcs3/Emu/Cell/lv2/sys_interrupt.h
+++ b/rpcs3/Emu/Cell/lv2/sys_interrupt.h
@@ -15,11 +15,11 @@ struct lv2_int_serv final : lv2_obj
 {
 	static const u32 id_base = 0x0b000000;
 
-	const std::shared_ptr<ppu_thread> thread;
+	const std::shared_ptr<named_thread<ppu_thread>> thread;
 	const u64 arg1;
 	const u64 arg2;
 
-	lv2_int_serv(const std::shared_ptr<ppu_thread>& thread, u64 arg1, u64 arg2)
+	lv2_int_serv(const std::shared_ptr<named_thread<ppu_thread>>& thread, u64 arg1, u64 arg2)
 		: thread(thread)
 		, arg1(arg1)
 		, arg2(arg2)
diff --git a/rpcs3/Emu/Cell/lv2/sys_lwcond.cpp b/rpcs3/Emu/Cell/lv2/sys_lwcond.cpp
index 6bfe68a6fa..a4b65d1fc8 100644
--- a/rpcs3/Emu/Cell/lv2/sys_lwcond.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_lwcond.cpp
@@ -274,6 +274,11 @@ error_code _sys_lwcond_queue_wait(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id
 
 	while (!ppu.state.test_and_reset(cpu_flag::signal))
 	{
+		if (ppu.is_stopped())
+		{
+			return 0;
+		}
+
 		if (timeout)
 		{
 			const u64 passed = get_system_time() - ppu.start_time;
@@ -290,7 +295,7 @@ error_code _sys_lwcond_queue_wait(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id
 
 				cond->waiters--;
 
-				if (mutex->signaled.fetch_dec_sat())
+				if (mutex->signaled.try_dec())
 				{
 					ppu.gpr[3] = CELL_EDEADLK;
 					break;
diff --git a/rpcs3/Emu/Cell/lv2/sys_lwmutex.cpp b/rpcs3/Emu/Cell/lv2/sys_lwmutex.cpp
index 0708101be3..733a9acf23 100644
--- a/rpcs3/Emu/Cell/lv2/sys_lwmutex.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_lwmutex.cpp
@@ -112,6 +112,11 @@ error_code _sys_lwmutex_lock(ppu_thread& ppu, u32 lwmutex_id, u64 timeout)
 
 	while (!ppu.state.test_and_reset(cpu_flag::signal))
 	{
+		if (ppu.is_stopped())
+		{
+			return 0;
+		}
+
 		if (timeout)
 		{
 			const u64 passed = get_system_time() - ppu.start_time;
diff --git a/rpcs3/Emu/Cell/lv2/sys_mutex.cpp b/rpcs3/Emu/Cell/lv2/sys_mutex.cpp
index deaa413789..97e94aeeae 100644
--- a/rpcs3/Emu/Cell/lv2/sys_mutex.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_mutex.cpp
@@ -152,6 +152,11 @@ error_code sys_mutex_lock(ppu_thread& ppu, u32 mutex_id, u64 timeout)
 
 	while (!ppu.state.test_and_reset(cpu_flag::signal))
 	{
+		if (ppu.is_stopped())
+		{
+			return 0;
+		}
+
 		if (timeout)
 		{
 			const u64 passed = get_system_time() - ppu.start_time;
diff --git a/rpcs3/Emu/Cell/lv2/sys_net.cpp b/rpcs3/Emu/Cell/lv2/sys_net.cpp
index fbe6662048..10076edd8e 100644
--- a/rpcs3/Emu/Cell/lv2/sys_net.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_net.cpp
@@ -112,7 +112,7 @@ static void network_clear_queue(ppu_thread& ppu)
 
 extern void network_thread_init()
 {
-	thread_ctrl::make_shared("Network Thread", []()
+	thread_ctrl::spawn("Network Thread", []()
 	{
 		std::vector<std::shared_ptr<lv2_socket>> socklist;
 		socklist.reserve(lv2_socket::id_count);
@@ -241,7 +241,7 @@ extern void network_thread_init()
 		CloseHandle(_eventh);
 		WSACleanup();
 #endif
-	})->detach();
+	});
 }
 
 lv2_socket::lv2_socket(lv2_socket::socket_type s)
@@ -338,6 +338,11 @@ s32 sys_net_bnet_accept(ppu_thread& ppu, s32 s, vm::ptr<sys_net_sockaddr> addr,
 	{
 		while (!ppu.state.test_and_reset(cpu_flag::signal))
 		{
+			if (ppu.is_stopped())
+			{
+				return 0;
+			}
+
 			thread_ctrl::wait();
 		}
 
@@ -546,6 +551,11 @@ s32 sys_net_bnet_connect(ppu_thread& ppu, s32 s, vm::ptr<sys_net_sockaddr> addr,
 	{
 		while (!ppu.state.test_and_reset(cpu_flag::signal))
 		{
+			if (ppu.is_stopped())
+			{
+				return 0;
+			}
+
 			thread_ctrl::wait();
 		}
 
@@ -946,6 +956,11 @@ s32 sys_net_bnet_recvfrom(ppu_thread& ppu, s32 s, vm::ptr<void> buf, u32 len, s3
 	{
 		while (!ppu.state.test_and_reset(cpu_flag::signal))
 		{
+			if (ppu.is_stopped())
+			{
+				return 0;
+			}
+
 			thread_ctrl::wait();
 		}
 
@@ -1099,6 +1114,11 @@ s32 sys_net_bnet_sendto(ppu_thread& ppu, s32 s, vm::cptr<void> buf, u32 len, s32
 	{
 		while (!ppu.state.test_and_reset(cpu_flag::signal))
 		{
+			if (ppu.is_stopped())
+			{
+				return 0;
+			}
+
 			thread_ctrl::wait();
 		}
 
@@ -1546,6 +1566,11 @@ s32 sys_net_bnet_poll(ppu_thread& ppu, vm::ptr<sys_net_pollfd> fds, s32 nfds, s3
 
 	while (!ppu.state.test_and_reset(cpu_flag::signal))
 	{
+		if (ppu.is_stopped())
+		{
+			return 0;
+		}
+
 		if (timeout)
 		{
 			const u64 passed = get_system_time() - ppu.start_time;
@@ -1740,6 +1765,11 @@ s32 sys_net_bnet_select(ppu_thread& ppu, s32 nfds, vm::ptr<sys_net_fd_set> readf
 
 	while (!ppu.state.test_and_reset(cpu_flag::signal))
 	{
+		if (ppu.is_stopped())
+		{
+			return 0;
+		}
+
 		if (timeout)
 		{
 			const u64 passed = get_system_time() - ppu.start_time;
diff --git a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp
index e99ae658ef..066957f0c8 100644
--- a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp
@@ -9,8 +9,6 @@
 #include "sys_event.h"
 #include "sys_mmapper.h"
 
-
-
 LOG_CHANNEL(sys_ppu_thread);
 
 void _sys_ppu_thread_exit(ppu_thread& ppu, u64 errorcode)
@@ -40,15 +38,15 @@ void _sys_ppu_thread_exit(ppu_thread& ppu, u64 errorcode)
 
 	if (jid == -1)
 	{
-		// Delete detached thread and unqueue
-		idm::remove<ppu_thread>(ppu.id);
+		// Detach detached thread, id will be removed on cleanup
+		static_cast<named_thread<ppu_thread>&>(ppu) = thread_state::detached;
 	}
 	else if (jid != 0)
 	{
 		std::lock_guard lock(id_manager::g_mutex);
 
 		// Schedule joiner and unqueue
-		lv2_obj::awake(*idm::check_unlocked<ppu_thread>(jid), -2);
+		lv2_obj::awake(*idm::check_unlocked<named_thread<ppu_thread>>(jid), -2);
 	}
 
 	// Unqueue
@@ -71,7 +69,7 @@ error_code sys_ppu_thread_join(ppu_thread& ppu, u32 thread_id, vm::ptr<u64> vptr
 
 	sys_ppu_thread.trace("sys_ppu_thread_join(thread_id=0x%x, vptr=*0x%x)", thread_id, vptr);
 
-	const auto thread = idm::get<ppu_thread>(thread_id, [&](ppu_thread& thread) -> CellError
+	const auto thread = idm::get<named_thread<ppu_thread>>(thread_id, [&](ppu_thread& thread) -> CellError
 	{
 		CellError result = thread.joiner.atomic_op([&](u32& value) -> CellError
 		{
@@ -120,17 +118,21 @@ error_code sys_ppu_thread_join(ppu_thread& ppu, u32 thread_id, vm::ptr<u64> vptr
 	}
 
 	// Wait for cleanup
-	thread->join();
+	(*thread.ptr)();
 
 	// Get the exit status from the register
 	if (vptr)
 	{
-		ppu.test_state();
+		if (ppu.test_stopped())
+		{
+			return 0;
+		}
+
 		*vptr = thread->gpr[3];
 	}
 
 	// Cleanup
-	idm::remove<ppu_thread>(thread->id);
+	idm::remove<named_thread<ppu_thread>>(thread->id);
 	return CELL_OK;
 }
 
@@ -138,7 +140,7 @@ error_code sys_ppu_thread_detach(u32 thread_id)
 {
 	sys_ppu_thread.trace("sys_ppu_thread_detach(thread_id=0x%x)", thread_id);
 
-	const auto thread = idm::check<ppu_thread>(thread_id, [&](ppu_thread& thread) -> CellError
+	const auto thread = idm::check<named_thread<ppu_thread>>(thread_id, [&](ppu_thread& thread) -> CellError
 	{
 		return thread.joiner.atomic_op([&](u32& value) -> CellError
 		{
@@ -180,7 +182,7 @@ error_code sys_ppu_thread_detach(u32 thread_id)
 
 	if (thread.ret == CELL_EAGAIN)
 	{
-		idm::remove<ppu_thread>(thread_id);
+		idm::remove<named_thread<ppu_thread>>(thread_id);
 	}
 
 	return CELL_OK;
@@ -202,7 +204,7 @@ error_code sys_ppu_thread_set_priority(ppu_thread& ppu, u32 thread_id, s32 prio)
 		return CELL_EINVAL;
 	}
 
-	const auto thread = idm::check<ppu_thread>(thread_id, [&](ppu_thread& thread)
+	const auto thread = idm::check<named_thread<ppu_thread>>(thread_id, [&](ppu_thread& thread)
 	{
 		if (thread.prio != prio && thread.prio.exchange(prio) != prio)
 		{
@@ -222,7 +224,7 @@ error_code sys_ppu_thread_get_priority(u32 thread_id, vm::ptr<s32> priop)
 {
 	sys_ppu_thread.trace("sys_ppu_thread_get_priority(thread_id=0x%x, priop=*0x%x)", thread_id, priop);
 
-	const auto thread = idm::check<ppu_thread>(thread_id, [&](ppu_thread& thread)
+	const auto thread = idm::check<named_thread<ppu_thread>>(thread_id, [&](ppu_thread& thread)
 	{
 		*priop = thread.prio;
 	});
@@ -249,7 +251,7 @@ error_code sys_ppu_thread_stop(u32 thread_id)
 {
 	sys_ppu_thread.todo("sys_ppu_thread_stop(thread_id=0x%x)", thread_id);
 
-	const auto thread = idm::get<ppu_thread>(thread_id);
+	const auto thread = idm::get<named_thread<ppu_thread>>(thread_id);
 
 	if (!thread)
 	{
@@ -263,7 +265,7 @@ error_code sys_ppu_thread_restart(u32 thread_id)
 {
 	sys_ppu_thread.todo("sys_ppu_thread_restart(thread_id=0x%x)", thread_id);
 
-	const auto thread = idm::get<ppu_thread>(thread_id);
+	const auto thread = idm::get<named_thread<ppu_thread>>(thread_id);
 
 	if (!thread)
 	{
@@ -273,10 +275,10 @@ error_code sys_ppu_thread_restart(u32 thread_id)
 	return CELL_OK;
 }
 
-error_code _sys_ppu_thread_create(vm::ptr<u64> thread_id, vm::ptr<ppu_thread_param_t> param, u64 arg, u64 unk, s32 prio, u32 stacksize, u64 flags, vm::cptr<char> threadname)
+error_code _sys_ppu_thread_create(vm::ptr<u64> thread_id, vm::ptr<ppu_thread_param_t> param, u64 arg, u64 unk, s32 prio, u32 _stacksz, u64 flags, vm::cptr<char> threadname)
 {
 	sys_ppu_thread.warning("_sys_ppu_thread_create(thread_id=*0x%x, param=*0x%x, arg=0x%llx, unk=0x%llx, prio=%d, stacksize=0x%x, flags=0x%llx, threadname=%s)",
-		thread_id, param, arg, unk, prio, stacksize, flags, threadname);
+		thread_id, param, arg, unk, prio, _stacksz, flags, threadname);
 
 	if (prio < 0 || prio > 3071)
 	{
@@ -288,33 +290,38 @@ error_code _sys_ppu_thread_create(vm::ptr<u64> thread_id, vm::ptr<ppu_thread_par
 		return CELL_EPERM;
 	}
 
-	const u32 tid = idm::import<ppu_thread>([&]()
+	// Compute actual stack size and allocate
+	const u32 stack_size = _stacksz >= 4096 ? ::align(std::min<u32>(_stacksz, 0x100000), 4096) : 0x4000;
+
+	const vm::addr_t stack_base{vm::alloc(_stacksz, vm::stack, 4096)};
+
+	if (!stack_base)
 	{
-		auto ppu = std::make_shared<ppu_thread>(threadname ? threadname.get_ptr() : "", prio, stacksize);
+		return CELL_ENOMEM;
+	}
 
-		if ((flags & SYS_PPU_THREAD_CREATE_JOINABLE) != 0)
+	const u32 tid = idm::import<named_thread<ppu_thread>>([&]()
+	{
+		const u32 tid = idm::last_id();
+
+		std::string ppu_name;
+		std::string full_name = fmt::format("PPU[0x%x] Thread", tid);
+
+		if (threadname)
 		{
-			ppu->joiner = 0;
+			ppu_name = threadname.get_ptr();
+			fmt::append(full_name, " (%s)", ppu_name);
 		}
 
-		ppu->gpr[13] = param->tls.value();
+		ppu_thread_params p;
+		p.stack_addr = stack_base;
+		p.stack_size = stack_size;
+		p.tls_addr = param->tls;
+		p.entry = param->entry;
+		p.arg0 = arg;
+		p.arg1 = unk;
 
-		if ((flags & SYS_PPU_THREAD_CREATE_INTERRUPT) == 0)
-		{
-			// Initialize thread entry point
-			ppu->cmd_list
-			({
-				{ ppu_cmd::set_args, 2 }, arg, unk, // Actually unknown
-				{ ppu_cmd::lle_call, param->entry.value() },
-			});
-		}
-		else
-		{
-			// Save entry for further use (workaround)
-			ppu->gpr[2] = param->entry.value();
-		}
-
-		return ppu;
+		return std::make_shared<named_thread<ppu_thread>>(full_name, p, ppu_name, prio, 1 - static_cast<int>(flags & 3));
 	});
 
 	if (!tid)
@@ -330,7 +337,7 @@ error_code sys_ppu_thread_start(ppu_thread& ppu, u32 thread_id)
 {
 	sys_ppu_thread.trace("sys_ppu_thread_start(thread_id=0x%x)", thread_id);
 
-	const auto thread = idm::get<ppu_thread>(thread_id, [&](ppu_thread& thread)
+	const auto thread = idm::get<named_thread<ppu_thread>>(thread_id, [&](ppu_thread& thread)
 	{
 		lv2_obj::awake(thread, -2);
 	});
@@ -347,10 +354,10 @@ error_code sys_ppu_thread_start(ppu_thread& ppu, u32 thread_id)
 	}
 	else
 	{
-		thread->notify();
+		thread_ctrl::notify(*thread);
 
 		// Dirty hack for sound: confirm the creation of _mxr000 event queue
-		if (thread->m_name == "_cellsurMixerMain")
+		if (thread->ppu_name.get() == "_cellsurMixerMain"sv)
 		{
 			lv2_obj::sleep(ppu);
 
@@ -360,10 +367,18 @@ error_code sys_ppu_thread_start(ppu_thread& ppu, u32 thread_id)
 				return (eq.name == "_mxr000\0"_u64) || (eq.key == 0x8000cafe02460300);
 			}))
 			{
+				if (ppu.is_stopped())
+				{
+					return 0;
+				}
+
 				thread_ctrl::wait_for(50000);
 			}
 
-			ppu.test_state();
+			if (ppu.test_stopped())
+			{
+				return 0;
+			}
 		}
 	}
 
@@ -372,22 +387,26 @@ error_code sys_ppu_thread_start(ppu_thread& ppu, u32 thread_id)
 
 error_code sys_ppu_thread_rename(u32 thread_id, vm::cptr<char> name)
 {
-	sys_ppu_thread.todo("sys_ppu_thread_rename(thread_id=0x%x, name=%s)", thread_id, name);
+	sys_ppu_thread.warning("sys_ppu_thread_rename(thread_id=0x%x, name=%s)", thread_id, name);
 
-	const auto thread = idm::get<ppu_thread>(thread_id);
+	const auto thread = idm::get<named_thread<ppu_thread>>(thread_id);
 
 	if (!thread)
 	{
 		return CELL_ESRCH;
 	}
 
+	// thread_ctrl name is not changed (TODO)
+	thread->ppu_name.assign(name.get_ptr());
 	return CELL_OK;
 }
 
 error_code sys_ppu_thread_recover_page_fault(u32 thread_id)
 {
 	sys_ppu_thread.warning("sys_ppu_thread_recover_page_fault(thread_id=0x%x)", thread_id);
-	const auto thread = idm::get<ppu_thread>(thread_id);
+
+	const auto thread = idm::get<named_thread<ppu_thread>>(thread_id);
+
 	if (!thread)
 	{
 		return CELL_ESRCH;
@@ -421,7 +440,8 @@ error_code sys_ppu_thread_get_page_fault_context(u32 thread_id, vm::ptr<sys_ppu_
 {
 	sys_ppu_thread.todo("sys_ppu_thread_get_page_fault_context(thread_id=0x%x, ctxt=*0x%x)", thread_id, ctxt);
 
-	const auto thread = idm::get<ppu_thread>(thread_id);
+	const auto thread = idm::get<named_thread<ppu_thread>>(thread_id);
+
 	if (!thread)
 	{
 		return CELL_ESRCH;
diff --git a/rpcs3/Emu/Cell/lv2/sys_process.cpp b/rpcs3/Emu/Cell/lv2/sys_process.cpp
index a706e16f2c..42e6415fa6 100644
--- a/rpcs3/Emu/Cell/lv2/sys_process.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_process.cpp
@@ -246,7 +246,7 @@ void _sys_process_exit(ppu_thread& ppu, s32 status, u32 arg2, u32 arg3)
 		Emu.Stop();
 	});
 
-	thread_ctrl::eternalize();
+	ppu.state += cpu_flag::dbg_global_stop;
 }
 
 void _sys_process_exit2(ppu_thread& ppu, s32 status, vm::ptr<sys_exit2_param> arg, u32 arg_size, u32 arg4)
@@ -314,5 +314,5 @@ void _sys_process_exit2(ppu_thread& ppu, s32 status, vm::ptr<sys_exit2_param> ar
 		Emu.BootGame(path, true);
 	});
 
-	thread_ctrl::eternalize();
+	ppu.state += cpu_flag::dbg_global_stop;
 }
diff --git a/rpcs3/Emu/Cell/lv2/sys_rwlock.cpp b/rpcs3/Emu/Cell/lv2/sys_rwlock.cpp
index b1fa084896..31ffbc98d9 100644
--- a/rpcs3/Emu/Cell/lv2/sys_rwlock.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_rwlock.cpp
@@ -129,6 +129,11 @@ error_code sys_rwlock_rlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout)
 
 	while (!ppu.state.test_and_reset(cpu_flag::signal))
 	{
+		if (ppu.is_stopped())
+		{
+			return 0;
+		}
+
 		if (timeout)
 		{
 			const u64 passed = get_system_time() - ppu.start_time;
@@ -318,6 +323,11 @@ error_code sys_rwlock_wlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout)
 
 	while (!ppu.state.test_and_reset(cpu_flag::signal))
 	{
+		if (ppu.is_stopped())
+		{
+			return 0;
+		}
+
 		if (timeout)
 		{
 			const u64 passed = get_system_time() - ppu.start_time;
diff --git a/rpcs3/Emu/Cell/lv2/sys_semaphore.cpp b/rpcs3/Emu/Cell/lv2/sys_semaphore.cpp
index e38e5c46c1..a59d9680d5 100644
--- a/rpcs3/Emu/Cell/lv2/sys_semaphore.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_semaphore.cpp
@@ -123,6 +123,11 @@ error_code sys_semaphore_wait(ppu_thread& ppu, u32 sem_id, u64 timeout)
 
 	while (!ppu.state.test_and_reset(cpu_flag::signal))
 	{
+		if (ppu.is_stopped())
+		{
+			return 0;
+		}
+
 		if (timeout)
 		{
 			const u64 passed = get_system_time() - ppu.start_time;
diff --git a/rpcs3/Emu/Cell/lv2/sys_spu.cpp b/rpcs3/Emu/Cell/lv2/sys_spu.cpp
index 1171c269cd..13202e07c3 100644
--- a/rpcs3/Emu/Cell/lv2/sys_spu.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_spu.cpp
@@ -15,8 +15,6 @@
 #include "sys_event.h"
 #include "sys_spu.h"
 
-
-
 LOG_CHANNEL(sys_spu);
 
 void sys_spu_image::load(const fs::file& stream)
@@ -233,11 +231,25 @@ error_code sys_spu_thread_initialize(vm::ptr<u32> thread, u32 group_id, u32 spu_
 		sys_spu.todo("Unimplemented SPU Thread options (0x%x)", option);
 	}
 
-	auto spu = idm::make_ptr<SPUThread>(thread_name, spu_num, group.get());
+	const vm::addr_t ls_addr{verify("SPU LS" HERE, vm::alloc(0x40000, vm::main))};
 
-	*thread = spu->id;
+	const u32 tid = idm::import<named_thread<spu_thread>>([&]()
+	{
+		const u32 tid = idm::last_id();
+
+		std::string full_name = fmt::format("SPU[0x%x] Thread", tid);
+
+		if (!thread_name.empty())
+		{
+			fmt::append(full_name, " (%s)", thread_name);
+		}
+
+		group->threads[spu_num] = std::make_shared<named_thread<spu_thread>>(full_name, ls_addr, group.get(), spu_num, thread_name);
+		return group->threads[spu_num];
+	});
+
+	*thread = tid;
 
-	group->threads[spu_num] = std::move(spu);
 	group->args[spu_num] = {arg->arg1, arg->arg2, arg->arg3, arg->arg4};
 	group->imgs[spu_num] = std::make_pair(*img, std::vector<sys_spu_segment>());
 	group->imgs[spu_num].second.assign(img->segs.get_ptr(), img->segs.get_ptr() + img->nsegs);
@@ -254,9 +266,9 @@ error_code sys_spu_thread_set_argument(u32 id, vm::ptr<sys_spu_thread_argument>
 {
 	sys_spu.warning("sys_spu_thread_set_argument(id=0x%x, arg=*0x%x)", id, arg);
 
-	const auto thread = idm::get<SPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(id);
 
-	if (!thread)
+	if (UNLIKELY(!thread || !thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -274,9 +286,9 @@ error_code sys_spu_thread_get_exit_status(u32 id, vm::ptr<u32> status)
 {
 	sys_spu.warning("sys_spu_thread_get_exit_status(id=0x%x, status=*0x%x)", id, status);
 
-	const auto thread = idm::get<SPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(id);
 
-	if (UNLIKELY(!thread))
+	if (UNLIKELY(!thread || !thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -342,7 +354,7 @@ error_code sys_spu_thread_group_destroy(u32 id)
 	{
 		if (auto thread = std::move(ptr))
 		{
-			idm::remove<SPUThread>(thread->id);
+			idm::remove<named_thread<spu_thread>>(thread->id);
 		}
 	}
 
@@ -384,8 +396,8 @@ error_code sys_spu_thread_group_start(ppu_thread& ppu, u32 id)
 
 			sys_spu_image::deploy(thread->offset, img.second.data(), img.first.nsegs);
 
-			thread->pc = img.first.entry_point;
 			thread->cpu_init();
+			thread->npc = img.first.entry_point;
 			thread->gpr[3] = v128::from64(0, args[0]);
 			thread->gpr[4] = v128::from64(0, args[1]);
 			thread->gpr[5] = v128::from64(0, args[2]);
@@ -403,7 +415,8 @@ error_code sys_spu_thread_group_start(ppu_thread& ppu, u32 id)
 	{
 		if (thread)
 		{
-			thread->run();
+			thread->state -= cpu_flag::stop;
+			thread_ctrl::notify(*thread);
 		}
 	}
 
@@ -501,7 +514,7 @@ error_code sys_spu_thread_group_resume(u32 id)
 		if (thread)
 		{
 			thread->state -= cpu_flag::suspend;
-			thread->notify();
+			thread_ctrl::notify(*thread);
 		}
 	}
 
@@ -539,11 +552,11 @@ error_code sys_spu_thread_group_terminate(u32 id, s32 value)
 	sys_spu.warning("sys_spu_thread_group_terminate(id=0x%x, value=0x%x)", id, value);
 
 	// The id can be either SPU Thread Group or SPU Thread
-	const auto thread = idm::get<SPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(id);
 	const auto _group = idm::get<lv2_spu_group>(id);
 	const auto group = thread ? thread->group : _group.get();
 
-	if (!group && !thread)
+	if (!group && (!thread || !thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -581,7 +594,7 @@ error_code sys_spu_thread_group_terminate(u32 id, s32 value)
 		if (thread)
 		{
 			thread->state += cpu_flag::stop;
-			thread->notify();
+			thread_ctrl::notify(*thread);
 		}
 	}
 
@@ -627,6 +640,11 @@ error_code sys_spu_thread_group_join(ppu_thread& ppu, u32 id, vm::ptr<u32> cause
 
 		while ((group->join_state & ~SPU_TGJSF_IS_JOINING) == 0)
 		{
+			if (ppu.is_stopped())
+			{
+				return 0;
+			}
+
 			bool stopped = true;
 
 			for (auto& t : group->threads)
@@ -648,7 +666,6 @@ error_code sys_spu_thread_group_join(ppu_thread& ppu, u32 id, vm::ptr<u32> cause
 
 			// TODO
 			group->cv.wait(group->mutex, 1000);
-			thread_ctrl::test();
 		}
 
 		join_state = group->join_state;
@@ -657,7 +674,10 @@ error_code sys_spu_thread_group_join(ppu_thread& ppu, u32 id, vm::ptr<u32> cause
 		group->run_state = SPU_THREAD_GROUP_STATUS_INITIALIZED; // hack
 	}
 
-	ppu.test_state();
+	if (ppu.test_stopped())
+	{
+		return 0;
+	}
 
 	switch (join_state & ~SPU_TGJSF_IS_JOINING)
 	{
@@ -743,9 +763,9 @@ error_code sys_spu_thread_write_ls(u32 id, u32 lsa, u64 value, u32 type)
 {
 	sys_spu.trace("sys_spu_thread_write_ls(id=0x%x, lsa=0x%05x, value=0x%llx, type=%d)", id, lsa, value, type);
 
-	const auto thread = idm::get<SPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(id);
 
-	if (!thread)
+	if (UNLIKELY(!thread || !thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -780,9 +800,9 @@ error_code sys_spu_thread_read_ls(u32 id, u32 lsa, vm::ptr<u64> value, u32 type)
 {
 	sys_spu.trace("sys_spu_thread_read_ls(id=0x%x, lsa=0x%05x, value=*0x%x, type=%d)", id, lsa, value, type);
 
-	const auto thread = idm::get<SPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(id);
 
-	if (!thread)
+	if (UNLIKELY(!thread || !thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -817,9 +837,9 @@ error_code sys_spu_thread_write_spu_mb(u32 id, u32 value)
 {
 	sys_spu.warning("sys_spu_thread_write_spu_mb(id=0x%x, value=0x%x)", id, value);
 
-	const auto thread = idm::get<SPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(id);
 
-	if (!thread)
+	if (UNLIKELY(!thread || !thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -842,9 +862,9 @@ error_code sys_spu_thread_set_spu_cfg(u32 id, u64 value)
 {
 	sys_spu.warning("sys_spu_thread_set_spu_cfg(id=0x%x, value=0x%x)", id, value);
 
-	const auto thread = idm::get<SPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(id);
 
-	if (!thread)
+	if (UNLIKELY(!thread || !thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -863,9 +883,9 @@ error_code sys_spu_thread_get_spu_cfg(u32 id, vm::ptr<u64> value)
 {
 	sys_spu.warning("sys_spu_thread_get_spu_cfg(id=0x%x, value=*0x%x)", id, value);
 
-	const auto thread = idm::get<SPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(id);
 
-	if (!thread)
+	if (UNLIKELY(!thread || !thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -879,9 +899,9 @@ error_code sys_spu_thread_write_snr(u32 id, u32 number, u32 value)
 {
 	sys_spu.trace("sys_spu_thread_write_snr(id=0x%x, number=%d, value=0x%x)", id, number, value);
 
-	const auto thread = idm::get<SPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(id);
 
-	if (!thread)
+	if (UNLIKELY(!thread || !thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -1016,10 +1036,10 @@ error_code sys_spu_thread_connect_event(u32 id, u32 eq, u32 et, u8 spup)
 {
 	sys_spu.warning("sys_spu_thread_connect_event(id=0x%x, eq=0x%x, et=%d, spup=%d)", id, eq, et, spup);
 
-	const auto thread = idm::get<SPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(id);
 	const auto queue = idm::get<lv2_obj, lv2_event_queue>(eq);
 
-	if (!thread || !queue)
+	if (UNLIKELY(!queue || !thread || !thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -1048,9 +1068,9 @@ error_code sys_spu_thread_disconnect_event(u32 id, u32 et, u8 spup)
 {
 	sys_spu.warning("sys_spu_thread_disconnect_event(id=0x%x, et=%d, spup=%d)", id, et, spup);
 
-	const auto thread = idm::get<SPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(id);
 
-	if (!thread)
+	if (UNLIKELY(!thread || !thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -1079,10 +1099,10 @@ error_code sys_spu_thread_bind_queue(u32 id, u32 spuq, u32 spuq_num)
 {
 	sys_spu.warning("sys_spu_thread_bind_queue(id=0x%x, spuq=0x%x, spuq_num=0x%x)", id, spuq, spuq_num);
 
-	const auto thread = idm::get<SPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(id);
 	const auto queue = idm::get<lv2_obj, lv2_event_queue>(spuq);
 
-	if (!thread || !queue)
+	if (UNLIKELY(!queue || !thread || !thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -1123,9 +1143,9 @@ error_code sys_spu_thread_unbind_queue(u32 id, u32 spuq_num)
 {
 	sys_spu.warning("sys_spu_thread_unbind_queue(id=0x%x, spuq_num=0x%x)", id, spuq_num);
 
-	const auto thread = idm::get<SPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(id);
 
-	if (!thread)
+	if (UNLIKELY(!thread || !thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -1251,16 +1271,27 @@ error_code sys_raw_spu_create(vm::ptr<u32> id, vm::ptr<void> attr)
 
 	// TODO: check number set by sys_spu_initialize()
 
-	const auto thread = idm::make_ptr<RawSPUThread>("");
-
-	if (!thread)
+	if (!spu_thread::g_raw_spu_ctr.try_inc(5))
 	{
 		return CELL_EAGAIN;
 	}
 
-	thread->cpu_init();
+	u32 index = 0;
 
-	*id = thread->index;
+	// Find free RawSPU ID
+	while (!spu_thread::g_raw_spu_id[index].try_inc(1))
+	{
+		if (++index == 5)
+			index = 0;
+	}
+
+	const vm::addr_t ls_addr{verify(HERE, vm::falloc(RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * index, 0x40000, vm::spu))};
+
+	const u32 tid = idm::make<named_thread<spu_thread>>(fmt::format("RawSPU[0x%x] Thread", index), ls_addr, nullptr, index, "");
+
+	spu_thread::g_raw_spu_id[index] = verify("RawSPU ID" HERE, tid);
+
+	*id = index;
 
 	return CELL_OK;
 }
@@ -1269,9 +1300,9 @@ error_code sys_raw_spu_destroy(ppu_thread& ppu, u32 id)
 {
 	sys_spu.warning("sys_raw_spu_destroy(id=%d)", id);
 
-	const auto thread = idm::get<RawSPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(spu_thread::find_raw_spu(id));
 
-	if (!thread)
+	if (UNLIKELY(!thread || thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -1320,8 +1351,7 @@ error_code sys_raw_spu_destroy(ppu_thread& ppu, u32 id)
 			idm::remove<lv2_obj, lv2_int_serv>(pair.second);
 	}
 
-	idm::remove<RawSPUThread>(thread->id);
-
+	idm::remove<named_thread<spu_thread>>(thread->id);
 	return CELL_OK;
 }
 
@@ -1340,9 +1370,9 @@ error_code sys_raw_spu_create_interrupt_tag(u32 id, u32 class_id, u32 hwthread,
 	{
 		std::shared_ptr<lv2_int_tag> result;
 
-		auto thread = idm::check_unlocked<RawSPUThread>(id);
+		auto thread = idm::check_unlocked<named_thread<spu_thread>>(spu_thread::find_raw_spu(id));
 
-		if (!thread)
+		if (!thread || thread->group)
 		{
 			error = CELL_ESRCH;
 			return result;
@@ -1379,9 +1409,9 @@ error_code sys_raw_spu_set_int_mask(u32 id, u32 class_id, u64 mask)
 		return CELL_EINVAL;
 	}
 
-	const auto thread = idm::get<RawSPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(spu_thread::find_raw_spu(id));
 
-	if (!thread)
+	if (UNLIKELY(!thread || thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -1400,9 +1430,9 @@ error_code sys_raw_spu_get_int_mask(u32 id, u32 class_id, vm::ptr<u64> mask)
 		return CELL_EINVAL;
 	}
 
-	const auto thread = idm::get<RawSPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(spu_thread::find_raw_spu(id));
 
-	if (!thread)
+	if (UNLIKELY(!thread || thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -1421,9 +1451,9 @@ error_code sys_raw_spu_set_int_stat(u32 id, u32 class_id, u64 stat)
 		return CELL_EINVAL;
 	}
 
-	const auto thread = idm::get<RawSPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(spu_thread::find_raw_spu(id));
 
-	if (!thread)
+	if (UNLIKELY(!thread || thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -1442,9 +1472,9 @@ error_code sys_raw_spu_get_int_stat(u32 id, u32 class_id, vm::ptr<u64> stat)
 		return CELL_EINVAL;
 	}
 
-	const auto thread = idm::get<RawSPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(spu_thread::find_raw_spu(id));
 
-	if (!thread)
+	if (UNLIKELY(!thread || thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -1458,9 +1488,9 @@ error_code sys_raw_spu_read_puint_mb(u32 id, vm::ptr<u32> value)
 {
 	sys_spu.trace("sys_raw_spu_read_puint_mb(id=%d, value=*0x%x)", id, value);
 
-	const auto thread = idm::get<RawSPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(spu_thread::find_raw_spu(id));
 
-	if (!thread)
+	if (UNLIKELY(!thread || thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -1479,9 +1509,9 @@ error_code sys_raw_spu_set_spu_cfg(u32 id, u32 value)
 		fmt::throw_exception("Unexpected value (0x%x)" HERE, value);
 	}
 
-	const auto thread = idm::get<RawSPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(spu_thread::find_raw_spu(id));
 
-	if (!thread)
+	if (UNLIKELY(!thread || thread->group))
 	{
 		return CELL_ESRCH;
 	}
@@ -1495,9 +1525,9 @@ error_code sys_raw_spu_get_spu_cfg(u32 id, vm::ptr<u32> value)
 {
 	sys_spu.trace("sys_raw_spu_get_spu_afg(id=%d, value=*0x%x)", id, value);
 
-	const auto thread = idm::get<RawSPUThread>(id);
+	const auto thread = idm::get<named_thread<spu_thread>>(spu_thread::find_raw_spu(id));
 
-	if (!thread)
+	if (UNLIKELY(!thread || thread->group))
 	{
 		return CELL_ESRCH;
 	}
diff --git a/rpcs3/Emu/Cell/lv2/sys_spu.h b/rpcs3/Emu/Cell/lv2/sys_spu.h
index 7d192c3164..fd96649a48 100644
--- a/rpcs3/Emu/Cell/lv2/sys_spu.h
+++ b/rpcs3/Emu/Cell/lv2/sys_spu.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include "sys_event.h"
+#include "Emu/Cell/SPUThread.h"
 
 enum : s32
 {
@@ -214,8 +215,6 @@ enum : u32
 	SPU_TGJSF_GROUP_EXIT = (1 << 2), // set if SPU Thread Group is terminated by sys_spu_thread_group_exit
 };
 
-class SPUThread;
-
 struct lv2_spu_group
 {
 	static const u32 id_base = 1; // Wrong?
@@ -236,7 +235,7 @@ struct lv2_spu_group
 	atomic_t<u32> join_state; // flags used to detect exit cause
 	cond_variable cv; // used to signal waiting PPU thread
 
-	std::array<std::shared_ptr<SPUThread>, 256> threads; // SPU Threads
+	std::array<std::shared_ptr<named_thread<spu_thread>>, 256> threads; // SPU Threads
 	std::array<std::pair<sys_spu_image, std::vector<sys_spu_segment>>, 256> imgs; // SPU Images
 	std::array<std::array<u64, 4>, 256> args; // SPU Thread Arguments
 
diff --git a/rpcs3/Emu/Cell/lv2/sys_sync.h b/rpcs3/Emu/Cell/lv2/sys_sync.h
index ec4e5aeea8..f9f53ec5fb 100644
--- a/rpcs3/Emu/Cell/lv2/sys_sync.h
+++ b/rpcs3/Emu/Cell/lv2/sys_sync.h
@@ -114,7 +114,7 @@ struct lv2_obj
 	}
 
 	// Remove the current thread from the scheduling queue, register timeout
-	static void sleep_timeout(old_thread&, u64 timeout);
+	static void sleep_timeout(cpu_thread&, u64 timeout);
 
 	static void sleep(cpu_thread& thread, u64 timeout = 0)
 	{
@@ -224,7 +224,7 @@ private:
 	static std::deque<class cpu_thread*> g_pending;
 
 	// Scheduler queue for timeouts (wait until -> thread)
-	static std::deque<std::pair<u64, old_thread*>> g_waiting;
+	static std::deque<std::pair<u64, class cpu_thread*>> g_waiting;
 
 	static void schedule_all();
 };
diff --git a/rpcs3/Emu/Cell/lv2/sys_timer.cpp b/rpcs3/Emu/Cell/lv2/sys_timer.cpp
index e59f00d4fa..9603e61e30 100644
--- a/rpcs3/Emu/Cell/lv2/sys_timer.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_timer.cpp
@@ -11,13 +11,11 @@
 
 #include <thread>
 
-
-
 LOG_CHANNEL(sys_timer);
 
 extern u64 get_system_time();
 
-void lv2_timer::on_task()
+void lv2_timer_context::operator()()
 {
 	while (!Emu.IsStopped())
 	{
@@ -50,7 +48,6 @@ void lv2_timer::on_task()
 			}
 
 			// TODO: use single global dedicated thread for busy waiting, no timer threads
-			lv2_obj::sleep_timeout(*this, next - _now);
 			thread_ctrl::wait_for(next - _now);
 		}
 		else if (_state == SYS_TIMER_STATE_STOP)
@@ -64,19 +61,17 @@ void lv2_timer::on_task()
 	}
 }
 
-void lv2_timer::on_stop()
+void lv2_timer_context::on_abort()
 {
 	// Signal thread using invalid state
 	state = -1;
-	notify();
-	join();
 }
 
 error_code sys_timer_create(vm::ptr<u32> timer_id)
 {
 	sys_timer.warning("sys_timer_create(timer_id=*0x%x)", timer_id);
 
-	if (const u32 id = idm::make<lv2_obj, lv2_timer>())
+	if (const u32 id = idm::make<lv2_obj, lv2_timer>("Timer Thread"))
 	{
 		*timer_id = id;
 		return CELL_OK;
@@ -155,7 +150,7 @@ error_code _sys_timer_start(u32 timer_id, u64 base_time, u64 period)
 
 	const auto timer = idm::check<lv2_obj, lv2_timer>(timer_id, [&](lv2_timer& timer) -> CellError
 	{
-		std::lock_guard lock(timer.mutex);
+		std::unique_lock lock(timer.mutex);
 
 		if (timer.state != SYS_TIMER_STATE_STOP)
 		{
@@ -171,7 +166,9 @@ error_code _sys_timer_start(u32 timer_id, u64 base_time, u64 period)
 		timer.expire = base_time ? base_time : start_time + period;
 		timer.period = period;
 		timer.state  = SYS_TIMER_STATE_RUN;
-		timer.notify();
+
+		lock.unlock();
+		thread_ctrl::notify(timer);
 		return {};
 	});
 
@@ -311,6 +308,11 @@ error_code sys_timer_usleep(ppu_thread& ppu, u64 sleep_time)
 
 		while (sleep_time >= passed)
 		{
+			if (ppu.is_stopped())
+			{
+				return 0;
+			}
+
 			remaining = sleep_time - passed;
 
 			if (remaining > host_min_quantum)
diff --git a/rpcs3/Emu/Cell/lv2/sys_timer.h b/rpcs3/Emu/Cell/lv2/sys_timer.h
index 15f058917f..b24a4d16ae 100644
--- a/rpcs3/Emu/Cell/lv2/sys_timer.h
+++ b/rpcs3/Emu/Cell/lv2/sys_timer.h
@@ -17,12 +17,12 @@ struct sys_timer_information_t
 	be_t<u32> pad;
 };
 
-struct lv2_timer final : public lv2_obj, public old_thread
+struct lv2_timer_context : lv2_obj
 {
 	static const u32 id_base = 0x11000000;
 
-	void on_task() override;
-	void on_stop() override;
+	void operator()();
+	void on_abort();
 
 	semaphore<> mutex;
 	atomic_t<u32> state{SYS_TIMER_STATE_STOP};
@@ -36,6 +36,8 @@ struct lv2_timer final : public lv2_obj, public old_thread
 	atomic_t<u64> period{0}; // Period (oneshot if 0)
 };
 
+using lv2_timer = named_thread<lv2_timer_context>;
+
 class ppu_thread;
 
 // Syscalls
diff --git a/rpcs3/Emu/IdManager.cpp b/rpcs3/Emu/IdManager.cpp
index 9f9f355c26..19f42c2224 100644
--- a/rpcs3/Emu/IdManager.cpp
+++ b/rpcs3/Emu/IdManager.cpp
@@ -1,5 +1,6 @@
 #include "stdafx.h"
 #include "IdManager.h"
+#include "Utilities/Thread.h"
 
 shared_mutex id_manager::g_mutex;
 
@@ -23,7 +24,7 @@ id_manager::id_map::pointer idm::allocate_id(const id_manager::id_key& info, u32
 		if (_next >= base && _next < base + step * count)
 		{
 			g_id = _next;
-			vec.emplace_back(id_manager::id_key(_next, info.type(), info.on_stop()), nullptr);
+			vec.emplace_back(id_manager::id_key(_next, info.type()), nullptr);
 			return &vec.back();
 		}
 	}
@@ -37,7 +38,7 @@ id_manager::id_map::pointer idm::allocate_id(const id_manager::id_key& info, u32
 		if (!ptr->second)
 		{
 			g_id = next;
-			ptr->first = id_manager::id_key(next, info.type(), info.on_stop());
+			ptr->first = id_manager::id_key(next, info.type());
 			return ptr;
 		}
 	}
@@ -60,12 +61,8 @@ void idm::clear()
 	{
 		for (auto& pair : map)
 		{
-			if (auto ptr = pair.second.get())
-			{
-				pair.first.on_stop()(ptr);
-				pair.second.reset();
-				pair.first = {};
-			}
+			pair.second.reset();
+			pair.first = {};
 		}
 
 		map.clear();
@@ -82,13 +79,8 @@ void fxm::init()
 void fxm::clear()
 {
 	// Call recorded finalization functions for all IDs
-	for (auto& pair : g_vec)
+	for (auto& val : g_vec)
 	{
-		if (auto ptr = pair.second.get())
-		{
-			pair.first(ptr);
-			pair.second.reset();
-			pair.first = nullptr;
-		}
+		val.reset();
 	}
 }
diff --git a/rpcs3/Emu/IdManager.h b/rpcs3/Emu/IdManager.h
index d7b093133c..7e536b9265 100644
--- a/rpcs3/Emu/IdManager.h
+++ b/rpcs3/Emu/IdManager.h
@@ -35,46 +35,6 @@ namespace id_manager
 		static_assert(u64{step} * count + base < UINT32_MAX, "ID traits: invalid object range");
 	};
 
-	// Optional object initialization function (called after ID registration)
-	template <typename T, typename = void>
-	struct on_init
-	{
-		static inline void func(T*, const std::shared_ptr<void>&)
-		{
-			// Forbid forward declarations
-			static constexpr auto size = sizeof(std::conditional_t<std::is_void<T>::value, void*, T>);
-		}
-	};
-
-	template <typename T>
-	struct on_init<T, decltype(std::declval<T>().on_init(std::declval<const std::shared_ptr<void>&>()))>
-	{
-		static inline void func(T* ptr, const std::shared_ptr<void>& _ptr)
-		{
-			if (ptr) ptr->on_init(_ptr);
-		}
-	};
-
-	// Optional object finalization function (called after ID removal)
-	template <typename T, typename = void>
-	struct on_stop
-	{
-		static inline void func(T*)
-		{
-			// Forbid forward declarations
-			static constexpr auto size = sizeof(std::conditional_t<std::is_void<T>::value, void*, T>);
-		}
-	};
-
-	template <typename T>
-	struct on_stop<T, decltype(std::declval<T>().on_stop())>
-	{
-		static inline void func(T* ptr)
-		{
-			if (ptr) ptr->on_stop();
-		}
-	};
-
 	// Correct usage testing
 	template <typename T, typename T2, typename = void>
 	struct id_verify : std::integral_constant<bool, std::is_base_of<T, T2>::value>
@@ -118,16 +78,6 @@ namespace id_manager
 		{
 			return add_type(0);
 		}
-
-		// Get type finalizer
-		template <typename T>
-		static inline auto get_stop()
-		{
-			return [](void* ptr) -> void
-			{
-				return id_manager::on_stop<T>::func(static_cast<T*>(ptr));
-			};
-		}
 	};
 
 	template <typename T>
@@ -138,15 +88,13 @@ namespace id_manager
 	{
 		u32 m_value;           // ID value
 		u32 m_type;            // True object type
-		void (*m_stop)(void*); // Finalizer
 
 	public:
 		id_key() = default;
 
-		id_key(u32 value, u32 type, void (*stop)(void*))
+		id_key(u32 value, u32 type)
 			: m_value(value)
 			, m_type(type)
-			, m_stop(stop)
 		{
 		}
 
@@ -160,11 +108,6 @@ namespace id_manager
 			return m_type;
 		}
 
-		auto on_stop() const
-		{
-			return m_stop;
-		}
-
 		operator u32() const
 		{
 			return m_value;
@@ -301,7 +244,7 @@ class idm
 		static_assert(id_manager::id_verify<T, Type>::value, "Invalid ID type combination");
 
 		// ID info
-		const id_manager::id_key info{get_type<T>(), get_type<Type>(), id_manager::typeinfo::get_stop<Type>()};
+		const id_manager::id_key info{get_type<T>(), get_type<Type>()};
 
 		// ID traits
 		using traits = id_manager::id_traits<Type>;
@@ -342,7 +285,6 @@ public:
 	{
 		if (auto pair = create_id<T, Make>([&] { return std::make_shared<Make>(std::forward<Args>(args)...); }))
 		{
-			id_manager::on_init<Make>::func(static_cast<Make*>(pair->second.get()), pair->second);
 			return {pair->second, static_cast<Make*>(pair->second.get())};
 		}
 
@@ -355,7 +297,6 @@ public:
 	{
 		if (auto pair = create_id<T, Make>([&] { return std::make_shared<Make>(std::forward<Args>(args)...); }))
 		{
-			id_manager::on_init<Make>::func(static_cast<Make*>(pair->second.get()), pair->second);
 			return pair->first;
 		}
 
@@ -368,7 +309,6 @@ public:
 	{
 		if (auto pair = create_id<T, Made>([&] { return ptr; }))
 		{
-			id_manager::on_init<Made>::func(static_cast<Made*>(pair->second.get()), pair->second);
 			return pair->first;
 		}
 
@@ -381,7 +321,6 @@ public:
 	{
 		if (auto pair = create_id<T, Made>(std::forward<F>(provider)))
 		{
-			id_manager::on_init<Made>::func(static_cast<Made*>(pair->second.get()), pair->second);
 			return pair->first;
 		}
 
@@ -572,7 +511,6 @@ public:
 			}
 		}
 
-		id_manager::on_stop<Get>::func(static_cast<Get*>(ptr.get()));
 		return true;
 	}
 
@@ -594,7 +532,6 @@ public:
 			}
 		}
 
-		id_manager::on_stop<Get>::func(static_cast<Get*>(ptr.get()));
 		return {ptr, static_cast<Get*>(ptr.get())};
 	}
 
@@ -612,8 +549,6 @@ public:
 			{
 				func(*_ptr);
 				std::shared_ptr<void> ptr = std::move(found->second);
-				lock.unlock();
-				id_manager::on_stop<Get>::func(static_cast<Get*>(ptr.get()));
 				return {ptr, static_cast<Get*>(ptr.get())};
 			}
 			else
@@ -627,8 +562,6 @@ public:
 				}
 
 				std::shared_ptr<void> ptr = std::move(found->second);
-				lock.unlock();
-				id_manager::on_stop<Get>::func(static_cast<Get*>(ptr.get()));
 				return {{ptr, static_cast<Get*>(ptr.get())}, std::move(ret)};
 			}
 		}
@@ -641,7 +574,7 @@ public:
 class fxm
 {
 	// Type Index -> Object. Use global since only one process is supported atm.
-	static std::vector<std::pair<void(*)(void*), std::shared_ptr<void>>> g_vec;
+	static std::vector<std::shared_ptr<void>> g_vec;
 
 	template <typename T>
 	static inline u32 get_type()
@@ -664,14 +597,12 @@ public:
 		{
 			std::lock_guard lock(id_manager::g_mutex);
 
-			auto& pair = g_vec[get_type<T>()];
+			auto& cur = g_vec[get_type<T>()];
 
-			if (!pair.second)
+			if (!cur)
 			{
 				ptr = std::make_shared<Make>(std::forward<Args>(args)...);
-
-				pair.first = id_manager::typeinfo::get_stop<T>();
-				pair.second = ptr;
+				cur = ptr;
 			}
 			else
 			{
@@ -679,7 +610,6 @@ public:
 			}
 		}
 
-		id_manager::on_init<T>::func(ptr.get(), ptr);
 		return ptr;
 	}
 
@@ -692,21 +622,13 @@ public:
 		{
 			std::lock_guard lock(id_manager::g_mutex);
 
-			auto& pair = g_vec[get_type<T>()];
+			auto& cur = g_vec[get_type<T>()];
 
 			ptr = std::make_shared<Make>(std::forward<Args>(args)...);
-			old = std::move(pair.second);
-
-			pair.first = id_manager::typeinfo::get_stop<T>();
-			pair.second = ptr;
+			old = std::move(cur);
+			cur = ptr;
 		}
 
-		if (old)
-		{
-			id_manager::on_stop<T>::func(static_cast<T*>(old.get()));
-		}
-
-		id_manager::on_init<T>::func(ptr.get(), ptr);
 		return ptr;
 	}
 
@@ -718,16 +640,15 @@ public:
 		{
 			std::lock_guard lock(id_manager::g_mutex);
 
-			auto& pair = g_vec[get_type<T>()];
+			auto& cur = g_vec[get_type<T>()];
 
-			if (!pair.second)
+			if (!cur)
 			{
 				ptr = provider();
 
 				if (ptr)
 				{
-					pair.first = id_manager::typeinfo::get_stop<T>();
-					pair.second = ptr;
+					cur = ptr;
 				}
 			}
 
@@ -737,7 +658,6 @@ public:
 			}
 		}
 
-		id_manager::on_init<T>::func(ptr.get(), ptr);
 		return ptr;
 	}
 
@@ -750,16 +670,14 @@ public:
 		{
 			std::lock_guard lock(id_manager::g_mutex);
 
-			auto& pair = g_vec[get_type<T>()];
+			auto& cur = g_vec[get_type<T>()];
 
 			ptr = provider();
 
 			if (ptr)
 			{
-				old = std::move(pair.second);
-
-				pair.first = id_manager::typeinfo::get_stop<T>();
-				pair.second = ptr;
+				old = std::move(cur);
+				cur = ptr;
 			}
 			else
 			{
@@ -767,12 +685,6 @@ public:
 			}
 		}
 
-		if (old)
-		{
-			id_manager::on_stop<T>::func(static_cast<T*>(old.get()));
-		}
-
-		id_manager::on_init<T>::func(ptr.get(), ptr);
 		return ptr;
 	}
 
@@ -784,22 +696,19 @@ public:
 		{
 			std::lock_guard lock(id_manager::g_mutex);
 
-			auto& pair = g_vec[get_type<T>()];
+			auto& old = g_vec[get_type<T>()];
 
-			if (auto& old = pair.second)
+			if (old)
 			{
 				return {old, static_cast<T*>(old.get())};
 			}
 			else
 			{
 				ptr = std::make_shared<Make>(std::forward<Args>(args)...);
-
-				pair.first = id_manager::typeinfo::get_stop<T>();
-				pair.second = ptr;
+				old = ptr;
 			}
 		}
 
-		id_manager::on_init<T>::func(ptr.get(), ptr);
 		return ptr;
 	}
 
@@ -807,7 +716,7 @@ public:
 	template <typename T>
 	static inline T* check_unlocked()
 	{
-		return static_cast<T*>(g_vec[get_type<T>()].second.get());
+		return static_cast<T*>(g_vec[get_type<T>()].get());
 	}
 
 	// Check whether the object exists
@@ -825,7 +734,7 @@ public:
 	{
 		reader_lock lock(id_manager::g_mutex);
 
-		auto& ptr = g_vec[get_type<T>()].second;
+		auto& ptr = g_vec[get_type<T>()];
 
 		return {ptr, static_cast<T*>(ptr.get())};
 	}
@@ -837,12 +746,7 @@ public:
 		std::shared_ptr<void> ptr;
 		{
 			std::lock_guard lock(id_manager::g_mutex);
-			ptr = std::move(g_vec[get_type<T>()].second);
-		}
-
-		if (ptr)
-		{
-			id_manager::on_stop<T>::func(static_cast<T*>(ptr.get()));
+			ptr = std::move(g_vec[get_type<T>()]);
 		}
 
 		return ptr.operator bool();
@@ -855,12 +759,7 @@ public:
 		std::shared_ptr<void> ptr;
 		{
 			std::lock_guard lock(id_manager::g_mutex);
-			ptr = std::move(g_vec[get_type<T>()].second);
-		}
-
-		if (ptr)
-		{
-			id_manager::on_stop<T>::func(static_cast<T*>(ptr.get()));
+			ptr = std::move(g_vec[get_type<T>()]);
 		}
 
 		return {ptr, static_cast<T*>(ptr.get())};
diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp
index 6c5cff0bf6..67fde64b31 100644
--- a/rpcs3/Emu/Memory/vm.cpp
+++ b/rpcs3/Emu/Memory/vm.cpp
@@ -127,11 +127,6 @@ namespace vm
 
 	void cleanup_unlock(cpu_thread& cpu) noexcept
 	{
-		if (g_tls_locked && cpu.get() == thread_ctrl::get_current())
-		{
-			g_tls_locked = nullptr;
-		}
-
 		for (u32 i = 0; i < g_locks.size(); i++)
 		{
 			if (g_locks[i] == &cpu)
@@ -225,7 +220,7 @@ namespace vm
 			{
 				while (cpu_thread* ptr = lock)
 				{
-					if (ptr->state & (cpu_flag::dbg_global_stop + cpu_flag::exit))
+					if (ptr->is_stopped())
 					{
 						break;
 					}
@@ -533,11 +528,21 @@ namespace vm
 			}
 		}
 
+		const u32 page_addr = addr + (this->flags & 0x10 ? 0x1000 : 0);
+		const u32 page_size = size - (this->flags & 0x10 ? 0x2000 : 0);
+
+		if (this->flags & 0x10)
+		{
+			// Mark overflow/underflow guard pages as allocated
+			verify(HERE), !g_pages[addr / 4096].flags.exchange(page_allocated);
+			verify(HERE), !g_pages[addr / 4096 + size / 4096 - 1].flags.exchange(page_allocated);
+		}
+
 		// Map "real" memory pages
-		_page_map(addr, flags, size, shm.get());
+		_page_map(page_addr, flags, page_size, shm.get());
 
 		// Add entry
-		m_map[addr] = std::move(shm);
+		m_map[addr] = std::make_pair(size, std::move(shm));
 
 		return true;
 	}
@@ -589,11 +594,11 @@ namespace vm
 			vm::writer_lock lock(0);
 
 			// Deallocate all memory
-			for (auto it = m_map.begin(), end = m_map.end(); it != end;)
+			for (auto it = m_map.begin(), end = m_map.end(); !m_common && it != end;)
 			{
 				const auto next = std::next(it);
-				const auto size = (next == end ? this->addr + this->size : next->first) - it->first;
-				_page_unmap(it->first, size, it->second.get());
+				const auto size = it->second.first;
+				_page_unmap(it->first, size, it->second.second.get());
 				it = next;
 			}
 
@@ -614,7 +619,7 @@ namespace vm
 		const u32 min_page_size = flags & 0x100 ? 0x1000 : 0x10000;
 
 		// Align to minimal page size
-		const u32 size = ::align(orig_size, min_page_size);
+		const u32 size = ::align(orig_size, min_page_size) + (flags & 0x10 ? 0x2000 : 0);
 
 		// Check alignment (it's page allocation, so passing small values there is just silly)
 		if (align < min_page_size || align != (0x80000000u >> utils::cntlz32(align, true)))
@@ -623,7 +628,7 @@ namespace vm
 		}
 
 		// Return if size is invalid
-		if (!size || size > this->size)
+		if (!orig_size || !size || size > this->size)
 		{
 			return 0;
 		}
@@ -654,7 +659,7 @@ namespace vm
 		{
 			if (try_alloc(addr, pflags, size, std::move(shm)))
 			{
-				return addr;
+				return addr + (flags & 0x10 ? 0x1000 : 0);
 			}
 		}
 
@@ -672,7 +677,7 @@ namespace vm
 		const u32 size = ::align(orig_size, min_page_size);
 
 		// return if addr or size is invalid
-		if (!size || size > this->size || addr < this->addr || addr + size - 1 > this->addr + this->size - 1)
+		if (!size || size > this->size || addr < this->addr || addr + size - 1 > this->addr + this->size - 1 || flags & 0x10)
 		{
 			return 0;
 		}
@@ -708,37 +713,42 @@ namespace vm
 
 	u32 block_t::dealloc(u32 addr, const std::shared_ptr<utils::shm>* src)
 	{
-		u32 result = 0;
 		{
 			vm::writer_lock lock(0);
 
-			const auto found = m_map.find(addr);
+			const auto found = m_map.find(addr - (flags & 0x10 ? 0x1000 : 0));
 
 			if (found == m_map.end())
 			{
 				return 0;
 			}
 
-			if (src && found->second.get() != src->get())
+			if (src && found->second.second.get() != src->get())
 			{
 				return 0;
 			}
 
-			// Approximate allocation size
-			const auto next = std::next(found);
-			const auto size = (next == m_map.end() ? this->addr + this->size : next->first) - found->first;
+			// Get allocation size
+			const auto size = found->second.first - (flags & 0x10 ? 0x2000 : 0);
+
+			if (flags & 0x10)
+			{
+				// Clear guard pages
+				verify(HERE), g_pages[addr / 4096 - 1].flags.exchange(0) == page_allocated;
+				verify(HERE), g_pages[addr / 4096 + size / 4096].flags.exchange(0) == page_allocated;
+			}
 
 			// Unmap "real" memory pages
-			result = _page_unmap(addr, size, found->second.get());
+			verify(HERE), size == _page_unmap(addr, size, found->second.second.get());
 
 			// Remove entry
 			m_map.erase(found);
-		}
 
-		return result;
+			return size;
+		}
 	}
 
-	std::pair<const u32, std::shared_ptr<utils::shm>> block_t::get(u32 addr, u32 size)
+	std::pair<u32, std::shared_ptr<utils::shm>> block_t::get(u32 addr, u32 size)
 	{
 		if (addr < this->addr || std::max<u32>(size, addr - this->addr + size) >= this->size)
 		{
@@ -769,12 +779,12 @@ namespace vm
 		}
 
 		// Range check
-		if (std::max<u32>(size, addr - found->first + size) > found->second->size())
+		if (std::max<u32>(size, addr - found->first + size) > found->second.second->size())
 		{
 			return {addr, nullptr};
 		}
 
-		return *found;
+		return {found->first, found->second.second};
 	}
 
 	u32 block_t::imp_used(const vm::writer_lock&)
@@ -783,7 +793,7 @@ namespace vm
 
 		for (auto& entry : m_map)
 		{
-			result += entry.second->size();
+			result += entry.second.first - (flags & 0x10 ? 0x2000 : 0);
 		}
 
 		return result;
@@ -967,7 +977,7 @@ namespace vm
 				std::make_shared<block_t>(0x20000000, 0x10000000, 0x201), // user 64k pages
 				nullptr, // user 1m pages
 				std::make_shared<block_t>(0xC0000000, 0x10000000), // video
-				std::make_shared<block_t>(0xD0000000, 0x10000000, 0x101), // stack
+				std::make_shared<block_t>(0xD0000000, 0x10000000, 0x111), // stack
 				std::make_shared<block_t>(0xE0000000, 0x20000000), // SPU reserved
 			};
 		}
diff --git a/rpcs3/Emu/Memory/vm.h b/rpcs3/Emu/Memory/vm.h
index d27ee13bc0..170de102da 100644
--- a/rpcs3/Emu/Memory/vm.h
+++ b/rpcs3/Emu/Memory/vm.h
@@ -146,7 +146,7 @@ namespace vm
 	class block_t final
 	{
 		// Mapped regions: addr -> shm handle
-		std::map<u32, std::shared_ptr<utils::shm>> m_map;
+		std::map<u32, std::pair<u32, std::shared_ptr<utils::shm>>> m_map;
 
 		// Common mapped region for special cases
 		std::shared_ptr<utils::shm> m_common;
@@ -173,7 +173,7 @@ namespace vm
 		u32 dealloc(u32 addr, const std::shared_ptr<utils::shm>* = nullptr);
 
 		// Get memory at specified address (if size = 0, addr assumed exact)
-		std::pair<const u32, std::shared_ptr<utils::shm>> get(u32 addr, u32 size = 0);
+		std::pair<u32, std::shared_ptr<utils::shm>> get(u32 addr, u32 size = 0);
 
 		// Internal
 		u32 imp_used(const vm::writer_lock&);
diff --git a/rpcs3/Emu/RSX/Capture/rsx_replay.cpp b/rpcs3/Emu/RSX/Capture/rsx_replay.cpp
index 8b3a23c173..c2e11ab610 100644
--- a/rpcs3/Emu/RSX/Capture/rsx_replay.cpp
+++ b/rpcs3/Emu/RSX/Capture/rsx_replay.cpp
@@ -7,6 +7,7 @@
 #include "Emu/RSX/GSRender.h"
 
 #include <map>
+#include <exception>
 
 namespace rsx
 {
@@ -42,7 +43,7 @@ namespace rsx
 	{
 		u32 fifo_size = 4;
 
-		// run through replay commands to figure out how big command buffer needs to be 
+		// run through replay commands to figure out how big command buffer needs to be
 		for (const auto& rc : frame->replay_commands)
 		{
 			const u32 count = (rc.rsx_command.first >> 18) & 0x7ff;
@@ -203,7 +204,7 @@ namespace rsx
 		}
 	}
 
-	void rsx_replay_thread::cpu_task()
+	void rsx_replay_thread::on_task()
 	{
 		be_t<u32> context_id = allocate_context();
 
@@ -284,7 +285,18 @@ namespace rsx
 			// random pause to not destroy gpu
 			std::this_thread::sleep_for(10ms);
 		}
+	}
 
-		state += cpu_flag::exit;
+	void rsx_replay_thread::operator()()
+	{
+		try
+		{
+			on_task();
+		}
+		catch (const std::exception& e)
+		{
+			LOG_FATAL(RSX, "%s thrown: %s", typeid(e).name(), e.what());
+			Emu.Pause();
+		}
 	}
 }
diff --git a/rpcs3/Emu/RSX/Capture/rsx_replay.h b/rpcs3/Emu/RSX/Capture/rsx_replay.h
index c2bfdded30..1bccfb5924 100644
--- a/rpcs3/Emu/RSX/Capture/rsx_replay.h
+++ b/rpcs3/Emu/RSX/Capture/rsx_replay.h
@@ -210,7 +210,7 @@ namespace rsx
 	};
 
 
-	class rsx_replay_thread : public ppu_thread
+	class rsx_replay_thread
 	{
 		struct rsx_context
 		{
@@ -236,9 +236,12 @@ namespace rsx
 
 	public:
 		rsx_replay_thread(std::unique_ptr<frame_capture_data>&& frame_data)
-			: ppu_thread("Rsx Capture Replay Thread"), frame(std::move(frame_data)) {};
+			:frame(std::move(frame_data))
+		{
+		}
 
-		virtual void cpu_task() override;
+		void on_task();
+		void operator()();
 	private:
 		be_t<u32> allocate_context();
 		std::vector<u32> alloc_write_fifo(be_t<u32> context_id);
diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp
index ad843903bf..9263f77d78 100644
--- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp
+++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp
@@ -134,6 +134,11 @@ namespace
 	}
 }
 
+u64 D3D12GSRender::get_cycles()
+{
+	return thread_ctrl::get_cycles(static_cast<named_thread<D3D12GSRender>&>(*this));
+}
+
 D3D12GSRender::D3D12GSRender()
 	: GSRender()
 	, m_d3d12_lib()
diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h
index 5478f7763e..5d2f3a08cb 100644
--- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h
+++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h
@@ -121,6 +121,7 @@ private:
 	ComPtr<ID3D12DescriptorHeap> m_current_sampler_descriptors;
 
 public:
+	u64 get_cycles() override final;
 	D3D12GSRender();
 	virtual ~D3D12GSRender();
 
diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
index 7f5b0e868b..71a39e7c5b 100644
--- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
@@ -22,6 +22,11 @@ namespace
 	}
 }
 
+u64 GLGSRender::get_cycles()
+{
+	return thread_ctrl::get_cycles(static_cast<named_thread<GLGSRender>&>(*this));
+}
+
 GLGSRender::GLGSRender() : GSRender()
 {
 	m_shaders_cache.reset(new gl::shader_cache(m_prog_buffer, "opengl", "v1.6"));
diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h
index 29b359e655..8d5eb06f58 100644
--- a/rpcs3/Emu/RSX/GL/GLGSRender.h
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.h
@@ -346,6 +346,7 @@ private:
 	std::vector<u8> m_scratch_buffer;
 
 public:
+	u64 get_cycles() override final;
 	GLGSRender();
 
 private:
diff --git a/rpcs3/Emu/RSX/Null/NullGSRender.cpp b/rpcs3/Emu/RSX/Null/NullGSRender.cpp
index 48f2f51f3d..464ca7581a 100644
--- a/rpcs3/Emu/RSX/Null/NullGSRender.cpp
+++ b/rpcs3/Emu/RSX/Null/NullGSRender.cpp
@@ -2,6 +2,11 @@
 #include "NullGSRender.h"
 #include "Emu/System.h"
 
+u64 NullGSRender::get_cycles()
+{
+	return thread_ctrl::get_cycles(static_cast<named_thread<NullGSRender>&>(*this));
+}
+
 NullGSRender::NullGSRender() : GSRender()
 {
 }
diff --git a/rpcs3/Emu/RSX/Null/NullGSRender.h b/rpcs3/Emu/RSX/Null/NullGSRender.h
index 34adb02ada..2bfdd2e247 100644
--- a/rpcs3/Emu/RSX/Null/NullGSRender.h
+++ b/rpcs3/Emu/RSX/Null/NullGSRender.h
@@ -1,11 +1,12 @@
 #pragma once
 #include "Emu/RSX/GSRender.h"
 
-class NullGSRender final : public GSRender
+class NullGSRender : public GSRender
 {
 public:
+	u64 get_cycles() override final;
 	NullGSRender();
 
 private:
-	bool do_method(u32 cmd, u32 value) override;
+	bool do_method(u32 cmd, u32 value) override final;
 };
diff --git a/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.cpp b/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.cpp
index 12e9e7f5e3..88245bc780 100644
--- a/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.cpp
+++ b/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.cpp
@@ -230,7 +230,6 @@ namespace rsx
 
 				u32 ppus{0};
 				u32 spus{0};
-				u32 rawspus{0};
 
 				f32 cpu_usage{-1.f};
 				u32 total_threads{0};
@@ -260,16 +259,20 @@ namespace rsx
 				}
 				case detail_level::medium:
 				{
-					ppus = idm::select<ppu_thread>([&ppu_cycles](u32, ppu_thread& ppu) { ppu_cycles += ppu.get()->get_cycles(); });
+					ppus = idm::select<named_thread<ppu_thread>>([&ppu_cycles](u32, named_thread<ppu_thread>& ppu)
+					{
+						ppu_cycles += thread_ctrl::get_cycles(ppu);
+					});
 
-					spus = idm::select<SPUThread>([&spu_cycles](u32, SPUThread& spu) { spu_cycles += spu.get()->get_cycles(); });
-
-					rawspus = idm::select<RawSPUThread>([&spu_cycles](u32, RawSPUThread& rawspu) { spu_cycles += rawspu.get()->get_cycles(); });
+					spus = idm::select<named_thread<spu_thread>>([&spu_cycles](u32, named_thread<spu_thread>& spu)
+					{
+						spu_cycles += thread_ctrl::get_cycles(spu);
+					});
 
 					if (!rsx_thread)
 						rsx_thread = fxm::get<GSRender>();
 
-					rsx_cycles += rsx_thread->get()->get_cycles();
+					rsx_cycles += rsx_thread->get_cycles();
 
 					total_cycles = ppu_cycles + spu_cycles + rsx_cycles;
 					cpu_usage = m_cpu_stats.get_usage();
@@ -329,7 +332,7 @@ namespace rsx
 					                         " Total : %04.1f %% (%2u)\n\n"
 					                         "%s\n"
 					                         " RSX   : %02u %%",
-					    fps, frametime, std::string(title1_high.size(), ' '), ppu_usage, ppus, spu_usage, spus + rawspus, rsx_usage, cpu_usage, total_threads, std::string(title2.size(), ' '), rsx_load);
+					    fps, frametime, std::string(title1_high.size(), ' '), ppu_usage, ppus, spu_usage, spus, rsx_usage, cpu_usage, total_threads, std::string(title2.size(), ' '), rsx_load);
 					break;
 				}
 				}
diff --git a/rpcs3/Emu/RSX/Overlays/overlays.h b/rpcs3/Emu/RSX/Overlays/overlays.h
index e70d118b4e..f6353187c1 100644
--- a/rpcs3/Emu/RSX/Overlays/overlays.h
+++ b/rpcs3/Emu/RSX/Overlays/overlays.h
@@ -977,13 +977,13 @@ namespace rsx
 				this->on_close = on_close;
 				if (interactive)
 				{
-					thread_ctrl::make_shared("dialog input thread", [&]
+					thread_ctrl::spawn("dialog input thread", [&]
 					{
 						if (auto error = run_input_loop())
 						{
 							LOG_ERROR(RSX, "Dialog input loop exited with error code=%d", error);
 						}
-					})->detach();
+					});
 				}
 
 				return CELL_OK;
diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp
index c371b2853a..2d1a7d7bc1 100644
--- a/rpcs3/Emu/RSX/RSXThread.cpp
+++ b/rpcs3/Emu/RSX/RSXThread.cpp
@@ -23,6 +23,7 @@
 #include <sstream>
 #include <thread>
 #include <unordered_set>
+#include <exception>
 #include <fenv.h>
 
 class GSRender;
@@ -367,13 +368,36 @@ namespace rsx
 		}
 	}
 
-	void thread::on_spawn()
+	void thread::operator()()
 	{
-		m_rsx_thread = std::this_thread::get_id();
+		try
+		{
+			// Wait for startup (TODO)
+			while (m_rsx_thread_exiting)
+			{
+				thread_ctrl::wait_for(1000);
+
+				if (Emu.IsStopped())
+				{
+					return;
+				}
+			}
+
+			on_task();
+		}
+		catch (const std::exception& e)
+		{
+			LOG_FATAL(RSX, "%s thrown: %s", typeid(e).name(), e.what());
+			Emu.Pause();
+		}
+
+		on_exit();
 	}
 
 	void thread::on_task()
 	{
+		m_rsx_thread = std::this_thread::get_id();
+
 		if (supports_native_ui)
 		{
 			m_overlay_manager = fxm::make_always<rsx::overlays::display_manager>();
@@ -406,7 +430,7 @@ namespace rsx
 
 		last_flip_time = get_system_time() - 1000000;
 
-		thread_ctrl::spawn(m_vblank_thread, "VBlank Thread", [this]()
+		named_thread vblank_thread("VBlank Thread", [this]()
 		{
 			const u64 start_time = get_system_time();
 
@@ -428,7 +452,7 @@ namespace rsx
 							{ ppu_cmd::sleep, 0 }
 						});
 
-						intr_thread->notify();
+						thread_ctrl::notify(*intr_thread);
 					}
 
 					continue;
@@ -441,7 +465,7 @@ namespace rsx
 			}
 		});
 
-		thread_ctrl::spawn(m_decompiler_thread, "RSX Decompiler Thread", [this]
+		named_thread decompiler_thread ("RSX Decompiler Thread", [this]
 		{
 			if (g_cfg.video.disable_asynchronous_shader_compiler)
 			{
@@ -1000,22 +1024,6 @@ namespace rsx
 	void thread::on_exit()
 	{
 		m_rsx_thread_exiting = true;
-		if (m_vblank_thread)
-		{
-			m_vblank_thread->join();
-			m_vblank_thread.reset();
-		}
-
-		if (m_decompiler_thread)
-		{
-			m_decompiler_thread->join();
-			m_decompiler_thread.reset();
-		}
-	}
-
-	std::string thread::get_name() const
-	{
-		return "rsx::thread";
 	}
 
 	void thread::fill_scale_offset_data(void *buffer, bool flip_y) const
@@ -2179,10 +2187,8 @@ namespace rsx
 
 		memset(display_buffers, 0, sizeof(display_buffers));
 
-		m_rsx_thread_exiting = false;
-
 		on_init_rsx();
-		start_thread(fxm::get<GSRender>());
+		m_rsx_thread_exiting = false;
 	}
 
 	GcmTileInfo *thread::find_tile(u32 offset, u32 location)
@@ -2908,7 +2914,7 @@ namespace rsx
 				{ ppu_cmd::sleep, 0 }
 			});
 
-			intr_thread->notify();
+			thread_ctrl::notify(*intr_thread);
 		}
 
 		sys_rsx_context_attribute(0x55555555, 0xFEC, buffer, 0, 0, 0);
diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h
index a3a69a087b..af6753cceb 100644
--- a/rpcs3/Emu/RSX/RSXThread.h
+++ b/rpcs3/Emu/RSX/RSXThread.h
@@ -361,11 +361,8 @@ namespace rsx
 
 	struct sampled_image_descriptor_base;
 
-	class thread : public old_thread
+	class thread
 	{
-		std::shared_ptr<thread_base> m_vblank_thread;
-		std::shared_ptr<thread_base> m_decompiler_thread;
-
 		u64 timestamp_ctrl = 0;
 		u64 timestamp_subvalue = 0;
 
@@ -434,7 +431,7 @@ namespace rsx
 		void capture_frame(const std::string &name);
 
 	public:
-		std::shared_ptr<class ppu_thread> intr_thread;
+		std::shared_ptr<named_thread<class ppu_thread>> intr_thread;
 
 		// I hate this flag, but until hle is closer to lle, its needed
 		bool isHLE{ false };
@@ -516,13 +513,14 @@ namespace rsx
 		bool zcull_rendering_enabled = false;
 		bool zcull_pixel_cnt_enabled = false;
 
+		void operator()();
+		virtual u64 get_cycles() = 0;
+
 	protected:
 		thread();
 		virtual ~thread();
-
-		virtual void on_spawn() override;
-		virtual void on_task() override;
-		virtual void on_exit() override;
+		virtual void on_task();
+		virtual void on_exit();
 
 		/**
 		 * Execute a backend local task queue
@@ -534,11 +532,6 @@ namespace rsx
 		virtual bool on_decompiler_task() { return false; }
 
 	public:
-		virtual std::string get_name() const override;
-
-		virtual void on_init(const std::shared_ptr<void>&) override {} // disable start() (TODO)
-		virtual void on_stop() override {} // disable join()
-
 		virtual void begin();
 		virtual void end();
 
diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
index ca8a2824aa..8757e2437a 100644
--- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
@@ -502,6 +502,11 @@ namespace
 	}
 }
 
+u64 VKGSRender::get_cycles()
+{
+	return thread_ctrl::get_cycles(static_cast<named_thread<VKGSRender>&>(*this));
+}
+
 VKGSRender::VKGSRender() : GSRender()
 {
 	u32 instance_handle = m_thread_context.createInstance("RPCS3");
diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h
index 9b5ebdfe2b..ac30ccc049 100644
--- a/rpcs3/Emu/RSX/VK/VKGSRender.h
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.h
@@ -360,7 +360,7 @@ private:
 	s64 m_flip_time = 0;
 
 	std::vector<u8> m_draw_buffers;
-	
+
 	shared_mutex m_flush_queue_mutex;
 	flush_request_task m_flush_requests;
 
@@ -377,6 +377,7 @@ private:
 #endif
 
 public:
+	u64 get_cycles() override final;
 	VKGSRender();
 	~VKGSRender();
 
diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp
index fd46fcb4ad..db9fa82280 100644
--- a/rpcs3/Emu/RSX/rsx_methods.cpp
+++ b/rpcs3/Emu/RSX/rsx_methods.cpp
@@ -1133,7 +1133,7 @@ namespace rsx
 				{ ppu_cmd::sleep, 0 }
 			});
 
-			rsx->intr_thread->notify();
+			thread_ctrl::notify(*rsx->intr_thread);
 		}
 	}
 
diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp
index afdc7d2159..5874a963c8 100644
--- a/rpcs3/Emu/System.cpp
+++ b/rpcs3/Emu/System.cpp
@@ -486,9 +486,7 @@ bool Emulator::BootRsxCapture(const std::string& path)
 	GetCallbacks().on_run();
 	m_state = system_state::running;
 
-	auto&& rsxcapture = idm::make_ptr<ppu_thread, rsx::rsx_replay_thread>(std::move(frame));
-	rsxcapture->run();
-
+	fxm::make<rsx::rsx_replay_thread>(std::move(frame));
 	return true;
 }
 
@@ -736,7 +734,7 @@ void Emulator::Load(bool add_only)
 			// Workaround for analyser glitches
 			vm::falloc(0x10000, 0xf0000, vm::main);
 
-			return thread_ctrl::make_shared("SPRX Loader", [this]
+			return thread_ctrl::spawn("SPRX Loader", [this]
 			{
 				std::vector<std::string> dir_queue;
 				dir_queue.emplace_back(m_path + '/');
@@ -744,7 +742,7 @@ void Emulator::Load(bool add_only)
 				std::vector<std::pair<std::string, u64>> file_queue;
 				file_queue.reserve(2000);
 
-				std::queue<std::shared_ptr<thread_base>> thread_queue;
+				std::queue<named_thread<std::function<void()>>> thread_queue;
 				const uint max_threads = std::thread::hardware_concurrency();
 
 				// Initialize progress dialog
@@ -820,12 +818,12 @@ void Emulator::Load(bool add_only)
 								std::this_thread::sleep_for(10ms);
 							}
 
-							thread_queue.emplace(thread_ctrl::make_shared("Worker " + std::to_string(thread_queue.size()), [_prx = std::move(prx)]
+							thread_queue.emplace("Worker " + std::to_string(thread_queue.size()), [_prx = std::move(prx)]
 							{
 								ppu_initialize(*_prx);
 								ppu_unload_prx(*_prx);
 								g_progr_fdone++;
-							}));
+							});
 
 							continue;
 						}
@@ -846,7 +844,7 @@ void Emulator::Load(bool add_only)
 				{
 					Emu.Stop();
 				});
-			})->detach();
+			});
 		}
 
 		// Detect boot location
@@ -1237,12 +1235,12 @@ void Emulator::Run()
 
 	auto on_select = [](u32, cpu_thread& cpu)
 	{
-		cpu.run();
+		cpu.state -= cpu_flag::stop;
+		cpu.notify();
 	};
 
-	idm::select<ppu_thread>(on_select);
-	idm::select<RawSPUThread>(on_select);
-	idm::select<SPUThread>(on_select);
+	idm::select<named_thread<ppu_thread>>(on_select);
+	idm::select<named_thread<spu_thread>>(on_select);
 
 #ifdef WITH_GDB_DEBUGGER
 	// Initialize debug server at the end of emu run sequence
@@ -1273,9 +1271,8 @@ bool Emulator::Pause()
 		cpu.state += cpu_flag::dbg_global_pause;
 	};
 
-	idm::select<ppu_thread>(on_select);
-	idm::select<RawSPUThread>(on_select);
-	idm::select<SPUThread>(on_select);
+	idm::select<named_thread<ppu_thread>>(on_select);
+	idm::select<named_thread<spu_thread>>(on_select);
 	return true;
 }
 
@@ -1338,9 +1335,8 @@ void Emulator::Resume()
 		cpu.notify();
 	};
 
-	idm::select<ppu_thread>(on_select);
-	idm::select<RawSPUThread>(on_select);
-	idm::select<SPUThread>(on_select);
+	idm::select<named_thread<ppu_thread>>(on_select);
+	idm::select<named_thread<spu_thread>>(on_select);
 	GetCallbacks().on_resume();
 }
 
@@ -1369,23 +1365,14 @@ void Emulator::Stop(bool restart)
 	fxm::remove<GDBDebugServer>();
 #endif
 
-	auto e_stop = std::make_exception_ptr(cpu_flag::dbg_global_stop);
-
 	auto on_select = [&](u32, cpu_thread& cpu)
 	{
 		cpu.state += cpu_flag::dbg_global_stop;
-
-		// Can't normally be null.
-		// Hack for a possible vm deadlock on thread creation.
-		if (auto thread = cpu.get())
-		{
-			thread->set_exception(e_stop);
-		}
+		cpu.notify();
 	};
 
-	idm::select<ppu_thread>(on_select);
-	idm::select<RawSPUThread>(on_select);
-	idm::select<SPUThread>(on_select);
+	idm::select<named_thread<ppu_thread>>(on_select);
+	idm::select<named_thread<spu_thread>>(on_select);
 
 	LOG_NOTICE(GENERAL, "All threads signaled...");
 
diff --git a/rpcs3/rpcs3_app.cpp b/rpcs3/rpcs3_app.cpp
index 559acdb38d..00a43c6961 100644
--- a/rpcs3/rpcs3_app.cpp
+++ b/rpcs3/rpcs3_app.cpp
@@ -234,13 +234,13 @@ void rpcs3_app::InitializeCallbacks()
 	{
 		switch (video_renderer type = g_cfg.video.renderer)
 		{
-		case video_renderer::null: return std::make_shared<NullGSRender>();
-		case video_renderer::opengl: return std::make_shared<GLGSRender>();
+		case video_renderer::null: return std::make_shared<named_thread<NullGSRender>>("rsx::thread");
+		case video_renderer::opengl: return std::make_shared<named_thread<GLGSRender>>("rsx::thread");
 #if defined(_WIN32) || defined(HAVE_VULKAN)
-		case video_renderer::vulkan: return std::make_shared<VKGSRender>();
+		case video_renderer::vulkan: return std::make_shared<named_thread<VKGSRender>>("rsx::thread");
 #endif
 #ifdef _MSC_VER
-		case video_renderer::dx12: return std::make_shared<D3D12GSRender>();
+		case video_renderer::dx12: return std::make_shared<named_thread<D3D12GSRender>>("rsx::thread");
 #endif
 		default: fmt::throw_exception("Invalid video renderer: %s" HERE, type);
 		}
diff --git a/rpcs3/rpcs3qt/breakpoint_list.cpp b/rpcs3/rpcs3qt/breakpoint_list.cpp
index e76059f39f..08aba88a86 100644
--- a/rpcs3/rpcs3qt/breakpoint_list.cpp
+++ b/rpcs3/rpcs3qt/breakpoint_list.cpp
@@ -17,7 +17,7 @@ breakpoint_list::breakpoint_list(QWidget* parent, breakpoint_handler* handler) :
 	connect(this, &QListWidget::customContextMenuRequested, this, &breakpoint_list::OnBreakpointListRightClicked);
 }
 
-/** 
+/**
 * It's unfortunate I need a method like this to sync these.  Should ponder a cleaner way to do this.
 */
 void breakpoint_list::UpdateCPUData(std::weak_ptr<cpu_thread> cpu, std::shared_ptr<CPUDisAsm> disasm)
@@ -60,7 +60,7 @@ void breakpoint_list::AddBreakpoint(u32 pc)
 	m_breakpoint_handler->AddBreakpoint(pc);
 
 	const auto cpu = this->cpu.lock();
-	const u32 cpu_offset = cpu->id_type() != 1 ? static_cast<SPUThread&>(*cpu).offset : 0;
+	const u32 cpu_offset = cpu->id_type() != 1 ? static_cast<spu_thread&>(*cpu).offset : 0;
 	m_disasm->offset = (u8*)vm::base(cpu_offset);
 
 	m_disasm->disasm(m_disasm->dump_pc = pc);
diff --git a/rpcs3/rpcs3qt/debugger_frame.cpp b/rpcs3/rpcs3qt/debugger_frame.cpp
index 3ddc90a7ed..68c56869af 100644
--- a/rpcs3/rpcs3qt/debugger_frame.cpp
+++ b/rpcs3/rpcs3qt/debugger_frame.cpp
@@ -258,7 +258,7 @@ u32 debugger_frame::GetPc() const
 		return 0;
 	}
 
-	return cpu->id_type() == 1 ? static_cast<ppu_thread*>(cpu.get())->cia : static_cast<SPUThread*>(cpu.get())->pc;
+	return cpu->id_type() == 1 ? static_cast<ppu_thread*>(cpu.get())->cia : static_cast<spu_thread*>(cpu.get())->pc;
 }
 
 void debugger_frame::UpdateUI()
@@ -340,9 +340,8 @@ void debugger_frame::UpdateUnitList()
 	{
 		const QSignalBlocker blocker(m_choice_units);
 
-		idm::select<ppu_thread>(on_select);
-		idm::select<RawSPUThread>(on_select);
-		idm::select<SPUThread>(on_select);
+		idm::select<named_thread<ppu_thread>>(on_select);
+		idm::select<named_thread<spu_thread>>(on_select);
 	}
 
 	OnSelectUnit();
@@ -369,21 +368,16 @@ void debugger_frame::OnSelectUnit()
 			return data == &cpu;
 		};
 
-		if (auto ppu = idm::select<ppu_thread>(on_select))
+		if (auto ppu = idm::select<named_thread<ppu_thread>>(on_select))
 		{
 			m_disasm = std::make_unique<PPUDisAsm>(CPUDisAsm_InterpreterMode);
 			cpu = ppu.ptr;
 		}
-		else if (auto spu1 = idm::select<SPUThread>(on_select))
+		else if (auto spu1 = idm::select<named_thread<spu_thread>>(on_select))
 		{
 			m_disasm = std::make_unique<SPUDisAsm>(CPUDisAsm_InterpreterMode);
 			cpu = spu1.ptr;
 		}
-		else if (auto rspu = idm::select<RawSPUThread>(on_select))
-		{
-			m_disasm = std::make_unique<SPUDisAsm>(CPUDisAsm_InterpreterMode);
-			cpu = rspu.ptr;
-		}
 	}
 
 	m_debugger_list->UpdateCPUData(this->cpu, m_disasm);
@@ -540,7 +534,7 @@ u64 debugger_frame::EvaluateExpression(const QString& expression)
 	}
 	else
 	{
-		auto spu = static_cast<SPUThread*>(thread.get());
+		auto spu = static_cast<spu_thread*>(thread.get());
 
 		for (int i = 0; i < 128; ++i)
 		{
diff --git a/rpcs3/rpcs3qt/debugger_list.cpp b/rpcs3/rpcs3qt/debugger_list.cpp
index 5aa6ca590b..d099d0be7b 100644
--- a/rpcs3/rpcs3qt/debugger_list.cpp
+++ b/rpcs3/rpcs3qt/debugger_list.cpp
@@ -37,7 +37,7 @@ u32 debugger_list::GetPc() const
 		return 0;
 	}
 
-	return cpu->id_type() == 1 ? static_cast<ppu_thread*>(cpu.get())->cia : static_cast<SPUThread*>(cpu.get())->pc;
+	return cpu->id_type() == 1 ? static_cast<ppu_thread*>(cpu.get())->cia : static_cast<spu_thread*>(cpu.get())->pc;
 }
 
 u32 debugger_list::GetCenteredAddress(u32 address) const
@@ -73,7 +73,7 @@ void debugger_list::ShowAddress(u32 addr)
 	else
 	{
 		const bool is_spu = cpu->id_type() != 1;
-		const u32 cpu_offset = is_spu ? static_cast<SPUThread&>(*cpu).offset : 0;
+		const u32 cpu_offset = is_spu ? static_cast<spu_thread&>(*cpu).offset : 0;
 		const u32 address_limits = is_spu ? 0x3ffff : ~0;
 		m_pc &= address_limits;
 		m_disasm->offset = (u8*)vm::base(cpu_offset);
@@ -90,7 +90,7 @@ void debugger_list::ShowAddress(u32 addr)
 
 			item(i)->setText((IsBreakpoint(m_pc) ? ">>> " : "    ") + qstr(m_disasm->last_opcode));
 
-			if (cpu->state & cpu_state_pause && m_pc == GetPc())
+			if (cpu->is_paused() && m_pc == GetPc())
 			{
 				item(i)->setTextColor(m_text_color_pc);
 				item(i)->setBackgroundColor(m_color_pc);
diff --git a/rpcs3/rpcs3qt/instruction_editor_dialog.cpp b/rpcs3/rpcs3qt/instruction_editor_dialog.cpp
index fca254a64c..0076400ef3 100644
--- a/rpcs3/rpcs3qt/instruction_editor_dialog.cpp
+++ b/rpcs3/rpcs3qt/instruction_editor_dialog.cpp
@@ -17,7 +17,7 @@ instruction_editor_dialog::instruction_editor_dialog(QWidget *parent, u32 _pc, c
 	setMinimumSize(300, sizeHint().height());
 
 	const auto cpu = _cpu.get();
-	m_cpu_offset = cpu->id_type() != 1 ? static_cast<SPUThread&>(*cpu).offset : 0;
+	m_cpu_offset = cpu->id_type() != 1 ? static_cast<spu_thread&>(*cpu).offset : 0;
 	QString instruction = qstr(fmt::format("%08x", vm::read32(m_cpu_offset + m_pc).value()));
 
 	QVBoxLayout* vbox_panel(new QVBoxLayout());
diff --git a/rpcs3/rpcs3qt/kernel_explorer.cpp b/rpcs3/rpcs3qt/kernel_explorer.cpp
index 69f2ce20ae..28d83812af 100644
--- a/rpcs3/rpcs3qt/kernel_explorer.cpp
+++ b/rpcs3/rpcs3qt/kernel_explorer.cpp
@@ -261,18 +261,18 @@ void kernel_explorer::Update()
 
 	lv2_types.emplace_back(l_addTreeChild(root, "PPU Threads"));
 
-	idm::select<ppu_thread>([&](u32 id, ppu_thread& ppu)
+	idm::select<named_thread<ppu_thread>>([&](u32 id, ppu_thread& ppu)
 	{
 		lv2_types.back().count++;
-		l_addTreeChild(lv2_types.back().node, qstr(fmt::format("PPU Thread: ID = 0x%08x '%s'", id, ppu.get_name())));
+		l_addTreeChild(lv2_types.back().node, qstr(fmt::format("PPU Thread: ID = 0x%08x '%s'", id, ppu.ppu_name.get())));
 	});
 
 	lv2_types.emplace_back(l_addTreeChild(root, "SPU Threads"));
 
-	idm::select<SPUThread>([&](u32 id, SPUThread& spu)
+	idm::select<named_thread<spu_thread>>([&](u32 id, spu_thread& spu)
 	{
 		lv2_types.back().count++;
-		l_addTreeChild(lv2_types.back().node, qstr(fmt::format("SPU Thread: ID = 0x%08x '%s'", id, spu.get_name())));
+		l_addTreeChild(lv2_types.back().node, qstr(fmt::format("SPU Thread: ID = 0x%08x '%s'", id, spu.spu_name.get())));
 	});
 
 	lv2_types.emplace_back(l_addTreeChild(root, "SPU Thread Groups"));
diff --git a/rpcs3/rpcs3qt/register_editor_dialog.cpp b/rpcs3/rpcs3qt/register_editor_dialog.cpp
index a957ae8bf5..0242a160db 100644
--- a/rpcs3/rpcs3qt/register_editor_dialog.cpp
+++ b/rpcs3/rpcs3qt/register_editor_dialog.cpp
@@ -111,7 +111,7 @@ void register_editor_dialog::updateRegister(const QString& text)
 	}
 	else
 	{
-		auto& spu = *static_cast<SPUThread*>(cpu.get());
+		auto& spu = *static_cast<spu_thread*>(cpu.get());
 
 		std::string::size_type first_brk = reg.find('[');
 		if (first_brk != std::string::npos)
@@ -179,7 +179,7 @@ void register_editor_dialog::OnOkay(const std::shared_ptr<cpu_thread>& _cpu)
 	}
 	else
 	{
-		auto& spu = *static_cast<SPUThread*>(cpu);
+		auto& spu = *static_cast<spu_thread*>(cpu);
 
 		while (value.length() < 32) value = "0" + value;
 		const auto first_brk = reg.find('[');