diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp
index 03e49b8aa0..6cb0686122 100644
--- a/rpcs3/Emu/Cell/PPUThread.cpp
+++ b/rpcs3/Emu/Cell/PPUThread.cpp
@@ -187,7 +187,7 @@ u64 PPUThread::FastCall2(u32 addr, u32 rtoc)
 	LR = Emu.m_ppu_thr_stop;
 	SetCurrentNamedThread(this);
 
-	Task();
+	CPUThread::Task();
 
 	m_status = old_status;
 	PC = old_PC;
@@ -202,4 +202,16 @@ u64 PPUThread::FastCall2(u32 addr, u32 rtoc)
 void PPUThread::FastStop()
 {
 	m_status = Stopped;
-}
\ No newline at end of file
+}
+
+void PPUThread::Task()
+{
+	if (m_custom_task)
+	{
+		m_custom_task(*this);
+	}
+	else
+	{
+		CPUThread::Task();
+	}
+}
diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h
index 0fb4e2867b..34021a2d6a 100644
--- a/rpcs3/Emu/Cell/PPUThread.h
+++ b/rpcs3/Emu/Cell/PPUThread.h
@@ -470,9 +470,6 @@ struct FPRdouble
 
 class PPUThread : public PPCThread
 {
-public:
-	u32 owned_mutexes;
-
 public:
 	PPCdouble FPR[32]; //Floating Point Register
 	FPSCRhdr FPSCR; //Floating Point Status and Control Register
@@ -556,6 +553,9 @@ public:
 	u64 R_ADDR; // reservation address
 	u64 R_VALUE; // reservation value (BE)
 
+	u32 owned_mutexes;
+	std::function<void(PPUThread& CPU)> m_custom_task;
+
 public:
 	PPUThread();
 	virtual ~PPUThread();
@@ -785,17 +785,18 @@ public:
 
 public:
 	virtual void InitRegs();
+	virtual void Task();
 	u64 GetStackArg(s32 i);
 	u64 FastCall2(u32 addr, u32 rtoc);
 	void FastStop();
-
-	virtual void DoReset() override;
 	virtual void DoRun() override;
+
+protected:
+	virtual void DoReset() override;
 	virtual void DoPause() override;
 	virtual void DoResume() override;
 	virtual void DoStop() override;
 
-protected:
 	virtual void Step() override
 	{
 		//if(++cycle > 20)
diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp
index f7dc30ca73..0ff0af3c6a 100644
--- a/rpcs3/Emu/Cell/SPUThread.cpp
+++ b/rpcs3/Emu/Cell/SPUThread.cpp
@@ -50,7 +50,15 @@ void SPUThread::Task()
 	const int round = std::fegetround();
 	std::fesetround(FE_TOWARDZERO);
 
-	CPUThread::Task();
+	if (m_custom_task)
+	{
+		m_custom_task(*this);
+	}
+	else
+	{
+		CPUThread::Task();
+	}
+	
 	if (std::fegetround() != FE_TOWARDZERO)
 	{
 		LOG_ERROR(Log::SPU, "Rounding mode has changed(%d)", std::fegetround());
@@ -68,7 +76,7 @@ void SPUThread::DoReset()
 
 void SPUThread::InitRegs()
 {
-	GPR[1]._u32[3] = 0x40000 - 120;
+	GPR[1]._u32[3] = 0x3FFF0; // initial stack frame pointer
 
 	cfg.Reset();
 
@@ -138,6 +146,23 @@ void SPUThread::DoClose()
 	}
 }
 
+void SPUThread::FastCall(u32 ls_addr)
+{
+	// doesn't touch thread status (instead of PPUThread::FastCall2);
+	// can't be called from another thread (because it doesn't make sense);
+	// FastStop-like routine is not defined (TODO);
+
+	auto old_PC = PC;
+	auto old_stack = GPR[1]; // only saved and restored (may be wrong)
+
+	PC = ls_addr;
+
+	CPUThread::Task();
+
+	PC = old_PC;
+	GPR[1] = old_stack;
+}
+
 void SPUThread::WriteSNR(bool number, u32 value)
 {
 	if (cfg.value & ((u64)1 << (u64)number))
@@ -272,7 +297,7 @@ void SPUThread::ListCmd(u32 lsa, u64 ea, u16 tag, u16 size, u32 cmd, MFCReg& MFC
 		auto rec = vm::ptr<list_element>::make(dmac.ls_offset + list_addr + i * 8);
 
 		u32 size = rec->ts;
-		if (size < 16 && size != 1 && size != 2 && size != 4 && size != 8)
+		if (!(rec->s.ToBE() & se16(0x8000)) && size < 16 && size != 1 && size != 2 && size != 4 && size != 8)
 		{
 			LOG_ERROR(Log::SPU, "DMA List: invalid transfer size(%d)", size);
 			result = MFC_PPU_DMA_CMD_SEQUENCE_ERROR;
@@ -280,13 +305,16 @@ void SPUThread::ListCmd(u32 lsa, u64 ea, u16 tag, u16 size, u32 cmd, MFCReg& MFC
 		}
 
 		u32 addr = rec->ea;
-		ProcessCmd(cmd, tag, lsa | (addr & 0xf), addr, size);
 
-		if (Ini.HLELogging.GetValue() || rec->s)
+		if (size)
+			ProcessCmd(cmd, tag, lsa | (addr & 0xf), addr, size);
+
+		if (Ini.HLELogging.GetValue() || rec->s.ToBE())
 			LOG_NOTICE(Log::SPU, "*** list element(%d/%d): s = 0x%x, ts = 0x%x, low ea = 0x%x (lsa = 0x%x)",
 			i, list_size, (u16)rec->s, (u16)rec->ts, (u32)rec->ea, lsa | (addr & 0xf));
 
-		lsa += std::max(size, (u32)16);
+		if (size)
+			lsa += std::max(size, (u32)16);
 
 		if (rec->s.ToBE() & se16(0x8000))
 		{
@@ -454,7 +482,7 @@ void SPUThread::EnqMfcCmd(MFCReg& MFCArgs)
 		}
 		else // store unconditional
 		{
-			if (R_ADDR)
+			if (R_ADDR) // may be wrong
 			{
 				m_events |= SPU_EVENT_LR;
 			}
diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h
index ca17de309d..eba9dd6a0c 100644
--- a/rpcs3/Emu/Cell/SPUThread.h
+++ b/rpcs3/Emu/Cell/SPUThread.h
@@ -503,6 +503,8 @@ public:
 	void WriteLS64 (const u32 lsa, const u64&  data) const { vm::write64 (lsa + m_offset, data); }
 	void WriteLS128(const u32 lsa, const u128& data) const { vm::write128(lsa + m_offset, data); }
 
+	std::function<void(SPUThread& CPU)> m_custom_task;
+
 public:
 	SPUThread(CPUThreadType type = CPU_THREAD_SPU);
 	virtual ~SPUThread();
@@ -560,6 +562,7 @@ public:
 public:
 	virtual void InitRegs();
 	virtual void Task();
+	void FastCall(u32 ls_addr);
 
 protected:
 	virtual void DoReset();
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index 0039cfd3b1..6c4ef20f46 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -44,8 +44,8 @@ s64 spursAttachLv2EventQueue(vm::ptr<CellSpurs> spurs, u32 queue, vm::ptr<u8> po
 	if (sdk_ver == -1) sdk_ver = 0x460000;
 
 	u8 _port = 0x3f;
-	u8 port_start = 0x10;
 	u64 port_mask = 0;
+
 	if (isDynamic == 0)
 	{
 		_port = *port;
@@ -53,18 +53,18 @@ s64 spursAttachLv2EventQueue(vm::ptr<CellSpurs> spurs, u32 queue, vm::ptr<u8> po
 		{
 			return CELL_SPURS_CORE_ERROR_INVAL;
 		}
-		if (sdk_ver <= 0x17ffff && _port > 0xf)
+		if (sdk_ver > 0x17ffff && _port > 0xf)
 		{
 			return CELL_SPURS_CORE_ERROR_PERM;
 		}
-		port_start = _port;
 	}
 
-	for (u32 i = port_start + 1; i < _port; i++)
+	for (u32 i = isDynamic ? 0x10 : _port; i <= _port; i++)
 	{
-		port_mask |= 1ull << (i - 1);
+		port_mask |= 1ull << (i);
 	}
 
+	assert(port_mask); // zero mask will return CELL_EINVAL
 	if (s32 res = sys_spu_thread_group_connect_event_all_threads(spurs->m.spuTG, queue, port_mask, port))
 	{
 		if (res == CELL_EISCONN)
@@ -78,7 +78,6 @@ s64 spursAttachLv2EventQueue(vm::ptr<CellSpurs> spurs, u32 queue, vm::ptr<u8> po
 	{
 		spurs->m.spups |= be_t<u64>::make(1ull << *port); // atomic bitwise or
 	}
-
 	return CELL_OK;
 #endif
 }
@@ -141,7 +140,7 @@ s64 spursInit(
 		return CELL_SPURS_CORE_ERROR_PERM;
 	}
 
-	const bool isSecond = flags & SAF_SECOND_VERSION;
+	const bool isSecond = (flags & SAF_SECOND_VERSION) != 0;
 	memset(spurs.get_ptr(), 0, CellSpurs::size1 + isSecond * CellSpurs::size2);
 	spurs->m.revision = revision;
 	spurs->m.sdkVersion = sdkVersion;
@@ -198,10 +197,9 @@ s64 spursInit(
 	spurs->m.spuPriority = spuPriority;
 #ifdef PRX_DEBUG
 	assert(spu_image_import(spurs->m.spuImg, vm::read32(libsre_rtoc - (isSecond ? 0x7E94 : 0x7E98)), 1) == CELL_OK);
+#else
+	spurs->m.spuImg.addr = Memory.Alloc(0x40000, 4096);
 #endif
-	//char str1[0x80];
-	//memcpy(str1, prefix, prefixSize); // strcpy
-	//memcpy(str1 + prefixSize, "CellSpursKernelGroup", 21); // strcat
 
 	s32 tgt = SYS_SPU_THREAD_GROUP_TYPE_NORMAL;
 	if (flags & SAF_SPU_TGT_EXCLUSIVE_NON_CONTEXT)
@@ -222,10 +220,17 @@ s64 spursInit(
 	spurs->m.spuTG = tg->m_id;
 
 	name += "CellSpursKernel0";
-	for (s32 i = 0; i < nSpus; i++, name[name.size() - 1]++)
+	for (s32 num = 0; num < nSpus; num++, name[name.size() - 1]++)
 	{
-		auto spu = spu_thread_initialize(tg, i, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, u64(i) << 32, spurs.addr(), 0, 0);
-		spurs->m.spus[i] = spu->GetId();
+		spurs->m.spus[num] = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, 0, 0, 0, 0, [spurs, num, isSecond](SPUThread& CPU)
+		{
+#ifdef PRX_DEBUG
+			CPU.GPR[3]._u32[3] = num;
+			CPU.GPR[4]._u64[1] = spurs.addr();
+			return CPU.FastCall(CPU.PC);
+#endif
+			
+		})->GetId();
 	}
 
 	if (flags & SAF_SPU_PRINTF_ENABLED)
@@ -261,22 +266,24 @@ s64 spursInit(
 
 	name = std::string(prefix, prefixSize);
 
-	PPUThread* ppu0 = nullptr;
+	spurs->m.ppu0 = ppu_thread_create(0, 0, ppuPriority, 0x4000, true, false, name + "SpursHdlr0", [spurs](PPUThread& CPU)
+	{
 #ifdef PRX_DEBUG
-	ppu0 = ppu_thread_create(vm::read32(libsre_rtoc - 0x7E60), spurs.addr(), ppuPriority, 0x4000, true, false, name + "SpursHdlr0");
+		return cb_call<void, vm::ptr<CellSpurs>>(CPU, libsre + 0x9214, libsre_rtoc, spurs);
 #endif
-	assert(ppu0);
-	spurs->m.ppu0 = ppu0->GetId();
 
-	PPUThread* ppu1 = nullptr;
+	})->GetId();
+
+	spurs->m.ppu1 = ppu_thread_create(0, 0, ppuPriority, 0x8000, true, false, name + "SpursHdlr1", [spurs](PPUThread& CPU)
+	{
 #ifdef PRX_DEBUG
-	ppu1 = ppu_thread_create(vm::read32(libsre_rtoc - 0x7E24), spurs.addr(), ppuPriority, 0x8000, true, false, name + "SpursHdlr1");
+		return cb_call<void, vm::ptr<CellSpurs>>(CPU, libsre + 0xB40C, libsre_rtoc, spurs);
 #endif
-	assert(ppu1);
-	spurs->m.ppu1 = ppu1->GetId();
+
+	})->GetId();
 
 	// enable exception event handler
-	if (spurs->m.enableEH.compare_and_swap(be_t<u32>::make(0), be_t<u32>::make(1)).ToBE() == 0)
+	if (spurs->m.enableEH.compare_and_swap_test(be_t<u32>::make(0), be_t<u32>::make(1)))
 	{
 		assert(sys_spu_thread_group_connect_event(spurs->m.spuTG, spurs->m.queue, SYS_SPU_THREAD_GROUP_EVENT_EXCEPTION) == CELL_OK);
 	}
@@ -291,12 +298,10 @@ s64 spursInit(
 
 	if (flags & SAF_SYSTEM_WORKLOAD_ENABLED) // initialize system workload
 	{
-		s32 res;
+		s32 res = CELL_OK;
 #ifdef PRX_DEBUG
 		res = cb_call<s32, vm::ptr<CellSpurs>, u32, u32, u32>(GetCurrentPPUThread(), libsre + 0x10428, libsre_rtoc,
 			spurs, Memory.RealToVirtualAddr(swlPriority), swlMaxSpu, swlIsPreem);
-#else
-		res = -1;
 #endif
 		assert(res == CELL_OK);
 	}
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSync.cpp b/rpcs3/Emu/SysCalls/Modules/cellSync.cpp
index 84e1bf877f..8f06a48660 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSync.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSync.cpp
@@ -367,6 +367,7 @@ s32 cellSyncRwmTryRead(vm::ptr<CellSyncRwm> rwm, vm::ptr<void> buffer)
 	{
 		return res;
 	}
+
 	memcpy(buffer.get_ptr(), rwm->m_buffer.get_ptr(), (u32)rwm->m_size);
 
 	return rwm->data.atomic_op(CELL_OK, syncRwmReadEndOp);
@@ -520,7 +521,8 @@ s32 cellSyncQueuePush(vm::ptr<CellSyncQueue> queue, vm::ptr<const void> buffer)
 
 	const u32 size = (u32)queue->m_size;
 	const u32 depth = (u32)queue->m_depth;
-	assert(((u32)queue->data.read_relaxed().m_v1 & 0xffffff) <= depth && ((u32)queue->data.read_relaxed().m_v2 & 0xffffff) <= depth);
+	const auto data = queue->data.read_relaxed();
+	assert(((u32)data.m_v1 & 0xffffff) <= depth && ((u32)data.m_v2 & 0xffffff) <= depth);
 
 	u32 position;
 	while (queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32
@@ -559,7 +561,8 @@ s32 cellSyncQueueTryPush(vm::ptr<CellSyncQueue> queue, vm::ptr<const void> buffe
 
 	const u32 size = (u32)queue->m_size;
 	const u32 depth = (u32)queue->m_depth;
-	assert(((u32)queue->data.read_relaxed().m_v1 & 0xffffff) <= depth && ((u32)queue->data.read_relaxed().m_v2 & 0xffffff) <= depth);
+	const auto data = queue->data.read_relaxed();
+	assert(((u32)data.m_v1 & 0xffffff) <= depth && ((u32)data.m_v2 & 0xffffff) <= depth);
 
 	u32 position;
 	if (s32 res = queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32
@@ -610,7 +613,8 @@ s32 cellSyncQueuePop(vm::ptr<CellSyncQueue> queue, vm::ptr<void> buffer)
 
 	const u32 size = (u32)queue->m_size;
 	const u32 depth = (u32)queue->m_depth;
-	assert(((u32)queue->data.read_relaxed().m_v1 & 0xffffff) <= depth && ((u32)queue->data.read_relaxed().m_v2 & 0xffffff) <= depth);
+	const auto data = queue->data.read_relaxed();
+	assert(((u32)data.m_v1 & 0xffffff) <= depth && ((u32)data.m_v2 & 0xffffff) <= depth);
 	
 	u32 position;
 	while (queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32
@@ -649,7 +653,8 @@ s32 cellSyncQueueTryPop(vm::ptr<CellSyncQueue> queue, vm::ptr<void> buffer)
 
 	const u32 size = (u32)queue->m_size;
 	const u32 depth = (u32)queue->m_depth;
-	assert(((u32)queue->data.read_relaxed().m_v1 & 0xffffff) <= depth && ((u32)queue->data.read_relaxed().m_v2 & 0xffffff) <= depth);
+	const auto data = queue->data.read_relaxed();
+	assert(((u32)data.m_v1 & 0xffffff) <= depth && ((u32)data.m_v2 & 0xffffff) <= depth);
 
 	u32 position;
 	if (s32 res = queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32
@@ -694,7 +699,8 @@ s32 cellSyncQueuePeek(vm::ptr<CellSyncQueue> queue, vm::ptr<void> buffer)
 
 	const u32 size = (u32)queue->m_size;
 	const u32 depth = (u32)queue->m_depth;
-	assert(((u32)queue->data.read_relaxed().m_v1 & 0xffffff) <= depth && ((u32)queue->data.read_relaxed().m_v2 & 0xffffff) <= depth);
+	const auto data = queue->data.read_relaxed();
+	assert(((u32)data.m_v1 & 0xffffff) <= depth && ((u32)data.m_v2 & 0xffffff) <= depth);
 
 	u32 position;
 	while (queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32
@@ -730,7 +736,8 @@ s32 cellSyncQueueTryPeek(vm::ptr<CellSyncQueue> queue, vm::ptr<void> buffer)
 
 	const u32 size = (u32)queue->m_size;
 	const u32 depth = (u32)queue->m_depth;
-	assert(((u32)queue->data.read_relaxed().m_v1 & 0xffffff) <= depth && ((u32)queue->data.read_relaxed().m_v2 & 0xffffff) <= depth);
+	const auto data = queue->data.read_relaxed();
+	assert(((u32)data.m_v1 & 0xffffff) <= depth && ((u32)data.m_v2 & 0xffffff) <= depth);
 
 	u32 position;
 	if (s32 res = queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32
@@ -759,9 +766,10 @@ s32 cellSyncQueueSize(vm::ptr<CellSyncQueue> queue)
 		return CELL_SYNC_ERROR_ALIGN;
 	}
 
-	const u32 count = (u32)queue->data.read_relaxed().m_v2 & 0xffffff;
+	const auto data = queue->data.read_relaxed();
+	const u32 count = (u32)data.m_v2 & 0xffffff;
 	const u32 depth = (u32)queue->m_depth;
-	assert(((u32)queue->data.read_relaxed().m_v1 & 0xffffff) <= depth && count <= depth);
+	assert(((u32)data.m_v1 & 0xffffff) <= depth && count <= depth);
 
 	return count;
 }
@@ -780,7 +788,8 @@ s32 cellSyncQueueClear(vm::ptr<CellSyncQueue> queue)
 	}
 
 	const u32 depth = (u32)queue->m_depth;
-	assert(((u32)queue->data.read_relaxed().m_v1 & 0xffffff) <= depth && ((u32)queue->data.read_relaxed().m_v2 & 0xffffff) <= depth);
+	const auto data = queue->data.read_relaxed();
+	assert(((u32)data.m_v1 & 0xffffff) <= depth && ((u32)data.m_v2 & 0xffffff) <= depth);
 
 	// TODO: optimize if possible
 	while (queue->data.atomic_op(CELL_OK, [depth](CellSyncQueue::data_t& queue) -> s32
diff --git a/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp b/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp
index 3e504cffc9..0c33f0e25d 100644
--- a/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp
+++ b/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp
@@ -147,7 +147,7 @@ s32 sys_ppu_thread_restart(u64 thread_id)
 	return CELL_OK;
 }
 
-PPUThread* ppu_thread_create(u32 entry, u64 arg, s32 prio, u32 stacksize, bool is_joinable, bool is_interrupt, const std::string& name)
+PPUThread* ppu_thread_create(u32 entry, u64 arg, s32 prio, u32 stacksize, bool is_joinable, bool is_interrupt, const std::string& name, std::function<void(PPUThread&)> task)
 {
 	PPUThread& new_thread = *(PPUThread*)&Emu.GetCPU().AddThread(CPU_THREAD_PPU);
 
@@ -159,6 +159,7 @@ PPUThread* ppu_thread_create(u32 entry, u64 arg, s32 prio, u32 stacksize, bool i
 	new_thread.m_has_interrupt = false;
 	new_thread.m_is_interrupt = is_interrupt;
 	new_thread.SetName(name);
+	new_thread.m_custom_task = task;
 
 	sys_ppu_thread.Notice("*** New PPU Thread [%s] (%s, entry=0x%x): id = %d", name.c_str(),
 		is_interrupt ? "interrupt" :
diff --git a/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.h b/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.h
index b5d8540d32..c76c49b461 100644
--- a/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.h
+++ b/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.h
@@ -15,7 +15,7 @@ enum ppu_thread_flags : u64
 };
 
 // Aux
-PPUThread* ppu_thread_create(u32 entry, u64 arg, s32 prio, u32 stacksize, bool is_joinable, bool is_interrupt, const std::string& name);
+PPUThread* ppu_thread_create(u32 entry, u64 arg, s32 prio, u32 stacksize, bool is_joinable, bool is_interrupt, const std::string& name, std::function<void(PPUThread&)> task = nullptr);
 
 // SysCalls
 void sys_ppu_thread_exit(PPUThread& CPU, u64 errorcode);
diff --git a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp
index 26909c9408..2f790c5ecf 100644
--- a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp
+++ b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp
@@ -59,7 +59,7 @@ s32 sys_spu_image_open(vm::ptr<sys_spu_image> img, vm::ptr<const char> path)
 	return CELL_OK;
 }
 
-SPUThread* spu_thread_initialize(SpuGroupInfo* group, u32 spu_num, sys_spu_image& img, const std::string& name, u32 option, u64 a1, u64 a2, u64 a3, u64 a4)
+SPUThread* spu_thread_initialize(SpuGroupInfo* group, u32 spu_num, sys_spu_image& img, const std::string& name, u32 option, u64 a1, u64 a2, u64 a3, u64 a4, std::function<void(SPUThread&)> task)
 {
 	if (option)
 	{
@@ -77,6 +77,7 @@ SPUThread* spu_thread_initialize(SpuGroupInfo* group, u32 spu_num, sys_spu_image
 	new_thread.SetOffset(spu_offset);
 	new_thread.SetEntry(spu_ep);
 	new_thread.SetName(name);
+	new_thread.m_custom_task = task;
 	new_thread.Run();
 	new_thread.GPR[3] = u128::from64(0, a1);
 	new_thread.GPR[4] = u128::from64(0, a2);
diff --git a/rpcs3/Emu/SysCalls/lv2/sys_spu.h b/rpcs3/Emu/SysCalls/lv2/sys_spu.h
index 526334fe36..203138e8c8 100644
--- a/rpcs3/Emu/SysCalls/lv2/sys_spu.h
+++ b/rpcs3/Emu/SysCalls/lv2/sys_spu.h
@@ -158,7 +158,7 @@ class SPUThread;
 // Aux
 s32 spu_image_import(sys_spu_image& img, u32 src, u32 type);
 SpuGroupInfo* spu_thread_group_create(const std::string& name, u32 num, s32 prio, s32 type, u32 container);
-SPUThread* spu_thread_initialize(SpuGroupInfo* group, u32 spu_num, sys_spu_image& img, const std::string& name, u32 option, u64 a1, u64 a2, u64 a3, u64 a4);
+SPUThread* spu_thread_initialize(SpuGroupInfo* group, u32 spu_num, sys_spu_image& img, const std::string& name, u32 option, u64 a1, u64 a2, u64 a3, u64 a4, std::function<void(SPUThread&)> task = nullptr);
 
 // SysCalls
 s32 sys_spu_initialize(u32 max_usable_spu, u32 max_raw_spu);