From 2c70f5168a89e081f2bfde4482df94eef2426c7c Mon Sep 17 00:00:00 2001
From: S Gopal Rajagopal <gopalsr83@users.noreply.github.com>
Date: Mon, 2 Feb 2015 09:02:38 +0530
Subject: [PATCH] SPURS: Reorder, rename and some cleanup

---
 rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp    |    4 +-
 rpcs3/Emu/SysCalls/Modules/cellSpurs.h      |   18 +-
 rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 1599 ++++++++++---------
 3 files changed, 826 insertions(+), 795 deletions(-)
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index b8088daaff..18eed2c944 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -26,7 +26,7 @@ extern u32 libsre;
 extern u32 libsre_rtoc;
 #endif
 
-bool spursKernelMain(SPUThread & spu);
+bool spursKernelEntry(SPUThread & spu);
 s64 cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, u32 id);
 s64 _cellSpursSendSignal(vm::ptr<CellSpursTaskset> taskset, u32 taskID);
 
@@ -182,7 +182,7 @@ s64 spursInit(
 	{
 		auto spu = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, num, spurs.addr(), 0, 0);
 #ifndef PRX_DEBUG_XXX
-		spu->RegisterHleFunction(spurs->m.spuImg.entry_point, spursKernelMain);
+		spu->RegisterHleFunction(spurs->m.spuImg.entry_point, spursKernelEntry);
 #endif
 		spurs->m.spus[num] = spu->GetId();
 	}
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index f1a4793e13..21f011eda2 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -105,8 +105,8 @@ enum SPURSKernelInterfaces
 	CELL_SPURS_KERNEL_DMA_TAG_ID = 31,
 	CELL_SPURS_KERNEL1_ENTRY_ADDR = 0x818,
 	CELL_SPURS_KERNEL2_ENTRY_ADDR = 0x848,
-	CELL_SPURS_KERNEL1_YIELD_ADDR = 0x808,
-	CELL_SPURS_KERNEL2_YIELD_ADDR = 0x838,
+	CELL_SPURS_KERNEL1_EXIT_ADDR = 0x808,
+	CELL_SPURS_KERNEL2_EXIT_ADDR = 0x838,
 	CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR = 0x290,
 	CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR = 0x290,
 };
@@ -886,8 +886,8 @@ struct CellSpursTaskBinInfo
 	CellSpursTaskLsPattern lsPattern;
 };
 
-// The SPURS kernel data store. This resides at 0x100 of the LS.
-struct SpursKernelMgmtData
+// The SPURS kernel context. This resides at 0x100 of the LS.
+struct SpursKernelContext
 {
 	u8 tempArea[0x80];                              // 0x100
 	u8 wklLocContention[0x10];                      // 0x180
@@ -900,7 +900,7 @@ struct SpursKernelMgmtData
 	vm::bptr<const void, 1, u64> wklCurrentAddr;    // 0x1D0
 	be_t<u32> wklCurrentUniqueId;                   // 0x1D8
 	be_t<u32> wklCurrentId;                         // 0x1DC
-	be_t<u32> yieldToKernelAddr;                    // 0x1E0
+	be_t<u32> exitToKernelAddr;                     // 0x1E0
 	be_t<u32> selectWorkloadAddr;                   // 0x1E4
 	u8 moduleId[2];                                 // 0x1E8
 	u8 sysSrvInitialised;                           // 0x1EA
@@ -923,10 +923,10 @@ struct SpursKernelMgmtData
 	be_t<u32> guid[4];                              // 0x280
 };
 
-static_assert(sizeof(SpursKernelMgmtData) == 0x190, "Incorrect size for SpursKernelMgmtData");
+static_assert(sizeof(SpursKernelContext) == 0x190, "Incorrect size for SpursKernelContext");
 
-// The SPURS taskset policy module data store. This resides at 0x2700 of the LS.
-struct SpursTasksetPmMgmtData
+// The SPURS taskset policy module context. This resides at 0x2700 of the LS.
+struct SpursTasksetContext
 {
     u8 tempAreaTaskset[0x80];                       // 0x2700
     u8 tempAreaTaskInfo[0x30];                      // 0x2780
@@ -956,7 +956,7 @@ struct SpursTasksetPmMgmtData
     u8 x2FD8[0x3000 - 0x2FD8];                      // 0x2FD8
 };
 
-static_assert(sizeof(SpursTasksetPmMgmtData) == 0x900, "Incorrect size for SpursTasksetPmMgmtData");
+static_assert(sizeof(SpursTasksetContext) == 0x900, "Incorrect size for SpursTasksetContext");
 
 s64 spursAttachLv2EventQueue(vm::ptr<CellSpurs> spurs, u32 queue, vm::ptr<u8> port, s32 isDynamic, bool wasCreated);
 s64 spursWakeUp(PPUThread& CPU, vm::ptr<CellSpurs> spurs);
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
index c7902f7240..451bfef848 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
@@ -13,7 +13,7 @@
 //
 // SPURS utility functions
 //
-void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag);
+void cellSpursModulePutTrace(CellSpursTracePacket * packet, u32 dmaTagId);
 u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status);
 void cellSpursModuleExit(SPUThread & spu);
 
@@ -27,36 +27,44 @@ void spursHalt(SPUThread & spu);
 //
 bool spursKernel1SelectWorkload(SPUThread & spu);
 bool spursKernel2SelectWorkload(SPUThread & spu);
+void spursKernelDispatchWorkload(SPUThread & spu, u64 widAndPollStatus);
+bool spursKernelWorkloadExit(SPUThread & spu);
+bool spursKernelEntry(SPUThread & spu);
 
 //
-// SPURS system service workload functions
+// SPURS System Service functions
 //
-void spursSysServiceCleanupAfterPreemption(SPUThread & spu, SpursKernelMgmtData * mgmt);
-void spursSysServiceUpdateTraceCount(SPUThread & spu, SpursKernelMgmtData * mgmt);
-void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4);
-void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownBitSet);
-void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt);
-void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt);
-void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt);
-void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus);
-bool spursSysServiceWorkloadEntry(SPUThread & spu);
+bool spursSysServiceEntry(SPUThread & spu);
+// TODO: Exit
+void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt);
+void spursSysServiceMain(SPUThread & spu, u32 pollStatus);
+void spursSysServiceProcessRequests(SPUThread & spu, SpursKernelContext * ctxt);
+void spursSysServiceActivateWorkload(SPUThread & spu, SpursKernelContext * ctxt);
+// TODO: Deactivate workload
+void spursSysServiceUpdateShutdownCompletionEvents(SPUThread & spu, SpursKernelContext * ctxt, u32 wklShutdownBitSet);
+void spursSysServiceTraceSaveCount(SPUThread & spu, SpursKernelContext * ctxt);
+void spursSysServiceTraceUpdate(SPUThread & spu, SpursKernelContext * ctxt, u32 arg2, u32 arg3, u32 arg4);
+// TODO: Deactivate trace
+// TODO: System workload entry
+void spursSysServiceCleanupAfterSystemWorkload(SPUThread & spu, SpursKernelContext * ctxt);
 
 //
-// SPURS taskset policy module functions
+// SPURS Taskset Policy Module functions
 //
-s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting);
-s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments);
-void spursTasksetExit(SPUThread & spu);
+bool spursTasksetEntry(SPUThread & spu);
+bool spursTasksetSyscallEntry(SPUThread & spu);
+void spursTasksetResumeTask(SPUThread & spu);
 void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs);
-void spursTasksetDispatch(SPUThread & spu);
+s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting);
 void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus);
 bool spursTasksetPollStatus(SPUThread & spu);
-void spursTasksetInit(SPUThread & spu, u32 pollStatus);
-void spursTasksetResumeTask(SPUThread & spu);
-s32 spursTasketSaveTaskContext(SPUThread & spu);
+void spursTasksetExit(SPUThread & spu);
 void spursTasksetOnTaskExit(SPUThread & spu, u64 addr, u32 taskId, s32 exitCode, u64 args);
+s32 spursTasketSaveTaskContext(SPUThread & spu);
+void spursTasksetDispatch(SPUThread & spu);
 s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args);
-bool spursTasksetEntry(SPUThread & spu);
+void spursTasksetInit(SPUThread & spu, u32 pollStatus);
+s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments);
 
 extern Module *cellSpurs;
 
@@ -65,16 +73,16 @@ extern Module *cellSpurs;
 //////////////////////////////////////////////////////////////////////////////
 
 /// Output trace information
-void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag) {
+void cellSpursModulePutTrace(CellSpursTracePacket * packet, u32 dmaTagId) {
     // TODO: Implement this
 }
 
 /// Check for execution right requests
 u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) {
-    auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
+    auto ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
 
     spu.GPR[3]._u32[3] = 1;
-    if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) {
+    if (ctxt->spurs->m.flags1 & SF1_32_WORKLOADS) {
         spursKernel2SelectWorkload(spu);
     } else {
         spursKernel1SelectWorkload(spu);
@@ -86,13 +94,13 @@ u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) {
     }
 
     u32 wklId = result >> 32;
-    return wklId == mgmt->wklCurrentId ? 0 : 1;
+    return wklId == ctxt->wklCurrentId ? 0 : 1;
 }
 
 /// Exit current workload
 void cellSpursModuleExit(SPUThread & spu) {
-    auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
-    spu.SetBranch(mgmt->yieldToKernelAddr);
+    auto ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+    spu.SetBranch(ctxt->exitToKernelAddr);
 }
 
 /// Execute a DMA operation
@@ -134,7 +142,7 @@ u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll) {
     return rv._u32[3];
 }
 
-// Halt the SPU
+/// Halt the SPU
 void spursHalt(SPUThread & spu) {
     spu.SPU.Status.SetValue(SPU_STATUS_STOPPED_BY_HALT);
     spu.Stop();
@@ -146,7 +154,7 @@ void spursHalt(SPUThread & spu) {
 
 /// Select a workload to run
 bool spursKernel1SelectWorkload(SPUThread & spu) {
-    auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
+    auto ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
 
     // The first and only argument to this function is a boolean that is set to false if the function
     // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus.
@@ -158,20 +166,20 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
 
     do {
         // DMA and lock the first 0x80 bytes of spurs
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
         auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
 
         // Calculate the contention (number of SPUs used) for each workload
         u8 contention[CELL_SPURS_MAX_WORKLOAD];
         u8 pendingContention[CELL_SPURS_MAX_WORKLOAD];
         for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-            contention[i] = spurs->m.wklCurrentContention[i] - mgmt->wklLocContention[i];
+            contention[i] = spurs->m.wklCurrentContention[i] - ctxt->wklLocContention[i];
 
             // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
             // to prevent unnecessary jumps to the kernel
             if (isPoll) {
-                pendingContention[i] = spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i];
-                if (i != mgmt->wklCurrentId) {
+                pendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i];
+                if (i != ctxt->wklCurrentId) {
                     contention[i] += pendingContention[i];
                 }
             }
@@ -180,19 +188,19 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
         wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
         pollStatus    = 0;
 
-        // The system service workload has the highest priority. Select the system service workload if
+        // The system service has the highest priority. Select the system service if
         // the system service message bit for this SPU is set.
-        if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
-            mgmt->spuIdling = 0;
-            if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+        if (spurs->m.sysSrvMessage.read_relaxed() & (1 << ctxt->spuNum)) {
+            ctxt->spuIdling = 0;
+            if (!isPoll || ctxt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
                 // Clear the message bit
-                spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
+                spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << ctxt->spuNum));
             }
         } else {
             // Caclulate the scheduling weight for each workload
             u16 maxWeight = 0;
             for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-                u16 runnable     = mgmt->wklRunnable1 & (0x8000 >> i);
+                u16 runnable     = ctxt->wklRunnable1 & (0x8000 >> i);
                 u16 wklSignal    = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
                 u8  wklFlag      = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
                 u8  readyCount   = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed();
@@ -206,7 +214,7 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
                 // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
                 //    OR the workload must be signalled
                 //    OR the workload flag is 0 and the workload is configured as the wokload flag receiver
-                if (runnable && mgmt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) {
+                if (runnable && ctxt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) {
                     if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) {
                         // The scheduling weight of the workload is formed from the following parameters in decreasing order of priority:
                         // 1. Wokload signal set or workload flag or ready count > contention
@@ -217,11 +225,11 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
                         // 6. Is the workload executable same as the currently loaded executable
                         // 7. The workload id (lesser the number, more the weight)
                         u16 weight  = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0;
-                        weight     |= (u16)(mgmt->priority[i] & 0x7F) << 16;
-                        weight     |= i == mgmt->wklCurrentId ? 0x80 : 0x00;
+                        weight     |= (u16)(ctxt->priority[i] & 0x7F) << 16;
+                        weight     |= i == ctxt->wklCurrentId ? 0x80 : 0x00;
                         weight     |= (contention[i] > 0 && spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00;
                         weight     |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2;
-                        weight     |= mgmt->wklUniqueId[i] == mgmt->wklCurrentId ? 0x02 : 0x00;
+                        weight     |= ctxt->wklUniqueId[i] == ctxt->wklCurrentId ? 0x02 : 0x00;
                         weight     |= 0x01;
 
                         // In case of a tie the lower numbered workload is chosen
@@ -237,9 +245,9 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
             }
 
             // Not sure what this does. Possibly mark the SPU as idle/in use.
-            mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
+            ctxt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
 
-            if (!isPoll || wklSelectedId == mgmt->wklCurrentId) {
+            if (!isPoll || wklSelectedId == ctxt->wklCurrentId) {
                 // Clear workload signal for the selected workload
                 spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
                 spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
@@ -260,16 +268,16 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
 
             for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
                 spurs->m.wklCurrentContention[i] = contention[i];
-                mgmt->wklLocContention[i]        = 0;
-                mgmt->wklLocPendingContention[i] = 0;
+                ctxt->wklLocContention[i]        = 0;
+                ctxt->wklLocPendingContention[i] = 0;
             }
 
             if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
-                mgmt->wklLocContention[wklSelectedId] = 1;
+                ctxt->wklLocContention[wklSelectedId] = 1;
             }
 
-            mgmt->wklCurrentId = wklSelectedId;
-        } else if (wklSelectedId != mgmt->wklCurrentId) {
+            ctxt->wklCurrentId = wklSelectedId;
+        } else if (wklSelectedId != ctxt->wklCurrentId) {
             // Not called by kernel but a context switch is required
             // Increment the pending contention for the selected workload
             if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
@@ -278,14 +286,14 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
 
             for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
                 spurs->m.wklPendingContention[i] = pendingContention[i];
-                mgmt->wklLocPendingContention[i] = 0;
+                ctxt->wklLocPendingContention[i] = 0;
             }
 
             if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
-                mgmt->wklLocPendingContention[wklSelectedId] = 1;
+                ctxt->wklLocPendingContention[wklSelectedId] = 1;
             }
         }
-    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
     u64 result          = (u64)wklSelectedId << 32;
     result             |= pollStatus;
@@ -295,7 +303,7 @@ bool spursKernel1SelectWorkload(SPUThread & spu) {
 
 /// Select a workload to run
 bool spursKernel2SelectWorkload(SPUThread & spu) {
-    auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
+    auto ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
 
     // The first and only argument to this function is a boolean that is set to false if the function
     // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus.
@@ -307,22 +315,22 @@ bool spursKernel2SelectWorkload(SPUThread & spu) {
 
     do {
         // DMA and lock the first 0x80 bytes of spurs
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
         auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
 
         // Calculate the contention (number of SPUs used) for each workload
         u8 contention[CELL_SPURS_MAX_WORKLOAD2];
         u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2];
         for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
-            contention[i] = spurs->m.wklCurrentContention[i & 0x0F] - mgmt->wklLocContention[i & 0x0F];
+            contention[i] = spurs->m.wklCurrentContention[i & 0x0F] - ctxt->wklLocContention[i & 0x0F];
             contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4;
 
             // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
             // to prevent unnecessary jumps to the kernel
             if (isPoll) {
-                pendingContention[i] = spurs->m.wklPendingContention[i & 0x0F] - mgmt->wklLocPendingContention[i & 0x0F];
+                pendingContention[i] = spurs->m.wklPendingContention[i & 0x0F] - ctxt->wklLocPendingContention[i & 0x0F];
                 pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4;
-                if (i != mgmt->wklCurrentId) {
+                if (i != ctxt->wklCurrentId) {
                     contention[i] += pendingContention[i];
                 }
             }
@@ -331,22 +339,22 @@ bool spursKernel2SelectWorkload(SPUThread & spu) {
         wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
         pollStatus    = 0;
 
-        // The system service workload has the highest priority. Select the system service workload if
+        // The system service has the highest priority. Select the system service if
         // the system service message bit for this SPU is set.
-        if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
+        if (spurs->m.sysSrvMessage.read_relaxed() & (1 << ctxt->spuNum)) {
             // Not sure what this does. Possibly Mark the SPU as in use.
-            mgmt->spuIdling = 0;
-            if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+            ctxt->spuIdling = 0;
+            if (!isPoll || ctxt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
                 // Clear the message bit
-                spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
+                spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << ctxt->spuNum));
             }
         } else {
             // Caclulate the scheduling weight for each workload
             u8 maxWeight = 0;
             for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
                 auto j           = i & 0x0F;
-                u16 runnable      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j);
-                u8  priority      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4;
+                u16 runnable      = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->wklRunnable1 & (0x8000 >> j) : ctxt->wklRunnable2 & (0x8000 >> j);
+                u8  priority      = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->priority[j] & 0x0F : ctxt->priority[j] >> 4;
                 u8  maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4;
                 u16 wklSignal     = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
                 u8  wklFlag       = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
@@ -365,7 +373,7 @@ bool spursKernel2SelectWorkload(SPUThread & spu) {
                         // The current workload is given a sligtly higher weight presumably to reduce the number of context switches.
                         // In case of a tie the lower numbered workload is chosen.
                         u8 weight = priority << 4;
-                        if (mgmt->wklCurrentId == i) {
+                        if (ctxt->wklCurrentId == i) {
                             weight |= 0x04;
                         }
 
@@ -381,9 +389,9 @@ bool spursKernel2SelectWorkload(SPUThread & spu) {
             }
 
             // Not sure what this does. Possibly mark the SPU as idle/in use.
-            mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
+            ctxt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
 
-            if (!isPoll || wklSelectedId == mgmt->wklCurrentId) {
+            if (!isPoll || wklSelectedId == ctxt->wklCurrentId) {
                 // Clear workload signal for the selected workload
                 spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
                 spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
@@ -404,13 +412,13 @@ bool spursKernel2SelectWorkload(SPUThread & spu) {
 
             for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) {
                 spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4);
-                mgmt->wklLocContention[i]        = 0;
-                mgmt->wklLocPendingContention[i] = 0;
+                ctxt->wklLocContention[i]        = 0;
+                ctxt->wklLocPendingContention[i] = 0;
             }
 
-            mgmt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
-            mgmt->wklCurrentId = wklSelectedId;
-        } else if (wklSelectedId != mgmt->wklCurrentId) {
+            ctxt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
+            ctxt->wklCurrentId = wklSelectedId;
+        } else if (wklSelectedId != ctxt->wklCurrentId) {
             // Not called by kernel but a context switch is required
             // Increment the pending contention for the selected workload
             if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
@@ -419,12 +427,12 @@ bool spursKernel2SelectWorkload(SPUThread & spu) {
 
             for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) {
                 spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4);
-                mgmt->wklLocPendingContention[i] = 0;
+                ctxt->wklLocPendingContention[i] = 0;
             }
 
-            mgmt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
+            ctxt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
         }
-    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
     u64 result          = (u64)wklSelectedId << 32;
     result             |= pollStatus;
@@ -432,81 +440,27 @@ bool spursKernel2SelectWorkload(SPUThread & spu) {
     return true;
 }
 
-/// SPURS kernel main
-bool spursKernelMain(SPUThread & spu) {
-    SpursKernelMgmtData * mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
+/// SPURS kernel dispatch workload
+void spursKernelDispatchWorkload(SPUThread & spu, u64 widAndPollStatus) {
+    SpursKernelContext * ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+    auto isKernel2            = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
 
-    bool isKernel2;
-    u32  pollStatus;
-    if (spu.PC == CELL_SPURS_KERNEL1_ENTRY_ADDR || spu.PC == CELL_SPURS_KERNEL2_ENTRY_ADDR) {
-        // Entry point of SPURS kernel
-        // Save arguments
-        mgmt->spuNum = spu.GPR[3]._u32[3];
-        mgmt->spurs.set(spu.GPR[4]._u64[1]);
+    auto pollStatus = (u32)(spu.GPR[3]._u64[1]);
+    auto wid        = (u32)(spu.GPR[3]._u64[1] >> 32);
 
-        isKernel2 = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
-
-        memset(mgmt, 0, sizeof(SpursKernelMgmtData));
-
-        // Initialise the SPURS management area to its initial values
-        mgmt->dmaTagId           = CELL_SPURS_KERNEL_DMA_TAG_ID;
-        mgmt->wklCurrentUniqueId = 0x20;
-        mgmt->wklCurrentId       = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
-        mgmt->yieldToKernelAddr  = isKernel2 ? CELL_SPURS_KERNEL2_YIELD_ADDR : CELL_SPURS_KERNEL1_YIELD_ADDR;
-        mgmt->selectWorkloadAddr = isKernel2 ? CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR : CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR;
-        if (!isKernel2) {
-            mgmt->x1F0    = 0xF0020000;
-            mgmt->x200    = 0x20000;
-            mgmt->guid[0] = 0x423A3A02;
-            mgmt->guid[1] = 0x43F43A82;
-            mgmt->guid[2] = 0x43F26502;
-            mgmt->guid[3] = 0x420EB382;
-        } else {
-            mgmt->guid[0] = 0x43A08402;
-            mgmt->guid[1] = 0x43FB0A82;
-            mgmt->guid[2] = 0x435E9302;
-            mgmt->guid[3] = 0x43A3C982;
-        }
-
-        // Register SPURS kernel HLE functions
-        spu.UnregisterHleFunctions(0, 0x40000); // TODO: use a symbolic constant
-        spu.RegisterHleFunction(isKernel2 ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR, spursKernelMain);
-        spu.RegisterHleFunction(mgmt->yieldToKernelAddr, spursKernelMain);
-        spu.RegisterHleFunction(mgmt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload);
-
-        // DMA in the system service workload info
-        spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklInfoSysSrv), 0x3FFE0/*LSA*/, 0x20/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID/*tag*/);
-        spursDmaWaitForCompletion(spu, 0x80000000);
-        pollStatus = 0;
-    } else if (spu.PC == mgmt->yieldToKernelAddr) {
-        isKernel2 = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
-
-        // Select next workload to run
-        spu.GPR[3].clear();
-        if (isKernel2) {
-            spursKernel2SelectWorkload(spu);
-        } else {
-            spursKernel1SelectWorkload(spu);
-        }
-
-        pollStatus = (u32)(spu.GPR[3]._u64[1]);
-        auto wid   = (u32)(spu.GPR[3]._u64[1] >> 32);
-
-        // DMA in the workload info for the selected workload
-        auto wklInfoOffset =  wid < CELL_SPURS_MAX_WORKLOAD ? offsetof(CellSpurs, m.wklInfo1[wid]) :
-                                                              wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? offsetof(CellSpurs, m.wklInfo2[wid & 0xf]) :
-                                                                                                            offsetof(CellSpurs, m.wklInfoSysSrv);
-        spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + wklInfoOffset, 0x3FFE0/*LSA*/, 0x20/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID);
-        spursDmaWaitForCompletion(spu, 0x80000000);
-    } else {
-        assert(0);
-    }
+    // DMA in the workload info for the selected workload
+    auto wklInfoOffset =  wid < CELL_SPURS_MAX_WORKLOAD ? offsetof(CellSpurs, m.wklInfo1[wid]) :
+                                                          wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? offsetof(CellSpurs, m.wklInfo2[wid & 0xf]) :
+                                                                                                        offsetof(CellSpurs, m.wklInfoSysSrv);
+    spursDma(spu, MFC_GET_CMD, ctxt->spurs.addr() + wklInfoOffset, 0x3FFE0/*LSA*/, 0x20/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID);
+    spursDmaWaitForCompletion(spu, 0x80000000);
 
+    // Load the workload to LS
     auto wklInfo = vm::get_ptr<CellSpurs::WorkloadInfo>(spu.ls_offset + 0x3FFE0);
-    if (mgmt->wklCurrentAddr != wklInfo->addr) {
+    if (ctxt->wklCurrentAddr != wklInfo->addr) {
         switch (wklInfo->addr.addr()) {
         case SPURS_IMG_ADDR_SYS_SRV_WORKLOAD:
-            spu.RegisterHleFunction(0xA00, spursSysServiceWorkloadEntry);
+            spu.RegisterHleFunction(0xA00, spursSysServiceEntry);
             break;
         case SPURS_IMG_ADDR_TASKSET_PM:
             spu.RegisterHleFunction(0xA00, spursTasksetEntry);
@@ -517,22 +471,81 @@ bool spursKernelMain(SPUThread & spu) {
             break;
         }
 
-        mgmt->wklCurrentAddr     = wklInfo->addr;
-        mgmt->wklCurrentUniqueId = wklInfo->uniqueId.read_relaxed();
+        ctxt->wklCurrentAddr     = wklInfo->addr;
+        ctxt->wklCurrentUniqueId = wklInfo->uniqueId.read_relaxed();
     }
 
     if (!isKernel2) {
-        mgmt->moduleId[0] = 0;
-        mgmt->moduleId[1] = 0;
+        ctxt->moduleId[0] = 0;
+        ctxt->moduleId[1] = 0;
     }
 
     // Run workload
-    spu.GPR[0]._u32[3] = mgmt->yieldToKernelAddr;
+    spu.GPR[0]._u32[3] = ctxt->exitToKernelAddr;
     spu.GPR[1]._u32[3] = 0x3FFB0;
     spu.GPR[3]._u32[3] = 0x100;
     spu.GPR[4]._u64[1] = wklInfo->arg;
     spu.GPR[5]._u32[3] = pollStatus;
     spu.SetBranch(0xA00);
+}
+
+/// SPURS kernel workload exit
+bool spursKernelWorkloadExit(SPUThread & spu) {
+    SpursKernelContext * ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+    auto isKernel2            = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
+
+    // Select next workload to run
+    spu.GPR[3].clear();
+    if (isKernel2) {
+        spursKernel2SelectWorkload(spu);
+    } else {
+        spursKernel1SelectWorkload(spu);
+    }
+
+    spursKernelDispatchWorkload(spu, spu.GPR[3]._u64[1]);
+    return false;
+}
+
+/// SPURS kernel entry point
+bool spursKernelEntry(SPUThread & spu) {
+    SpursKernelContext * ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+
+    // Save arguments
+    ctxt->spuNum = spu.GPR[3]._u32[3];
+    ctxt->spurs.set(spu.GPR[4]._u64[1]);
+
+    auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
+
+    memset(ctxt, 0, sizeof(SpursKernelContext));
+
+    // Initialise the SPURS context to its initial values
+    ctxt->dmaTagId           = CELL_SPURS_KERNEL_DMA_TAG_ID;
+    ctxt->wklCurrentUniqueId = 0x20;
+    ctxt->wklCurrentId       = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
+    ctxt->exitToKernelAddr   = isKernel2 ? CELL_SPURS_KERNEL2_EXIT_ADDR : CELL_SPURS_KERNEL1_EXIT_ADDR;
+    ctxt->selectWorkloadAddr = isKernel2 ? CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR : CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR;
+    if (!isKernel2) {
+        ctxt->x1F0    = 0xF0020000;
+        ctxt->x200    = 0x20000;
+        ctxt->guid[0] = 0x423A3A02;
+        ctxt->guid[1] = 0x43F43A82;
+        ctxt->guid[2] = 0x43F26502;
+        ctxt->guid[3] = 0x420EB382;
+    } else {
+        ctxt->guid[0] = 0x43A08402;
+        ctxt->guid[1] = 0x43FB0A82;
+        ctxt->guid[2] = 0x435E9302;
+        ctxt->guid[3] = 0x43A3C982;
+    }
+
+    // Register SPURS kernel HLE functions
+    spu.UnregisterHleFunctions(0, 0x40000/*LS_BOTTOM*/);
+    spu.RegisterHleFunction(isKernel2 ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR, spursKernelEntry);
+    spu.RegisterHleFunction(ctxt->exitToKernelAddr, spursKernelWorkloadExit);
+    spu.RegisterHleFunction(ctxt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload);
+
+    // Start the system service
+    spursKernelDispatchWorkload(spu, ((u64)CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) << 32);
     return false;
 }
 
@@ -540,120 +553,337 @@ bool spursKernelMain(SPUThread & spu) {
 // SPURS system workload functions
 //////////////////////////////////////////////////////////////////////////////
 
-/// Restore scheduling parameters after a workload has been preempted by the system service workload
-void spursSysServiceCleanupAfterPreemption(SPUThread & spu, SpursKernelMgmtData * mgmt) {
-    u8 wklId;
+/// Entry point of the system service
+bool spursSysServiceEntry(SPUThread & spu) {
+    auto ctxt       = vm::get_ptr<SpursKernelContext>(spu.ls_offset + spu.GPR[3]._u32[3]);
+    auto arg        = spu.GPR[4]._u64[1];
+    auto pollStatus = spu.GPR[5]._u32[3];
 
-    do {
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
-        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+    if (ctxt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+        spursSysServiceMain(spu, pollStatus);
+    } else {
+        // TODO: If we reach here it means the current workload was preempted to start the
+        // system workload. Need to implement this.
+    }
 
-        if (spurs->m.sysSrvWorkload[mgmt->spuNum] == 0xFF) {
-            return;
-        }
+    return false;
+}
 
-        wklId = spurs->m.sysSrvWorkload[mgmt->spuNum];
-        spurs->m.sysSrvWorkload[mgmt->spuNum] = 0xFF;
-    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+/// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled
+void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt) {
+    // Monitor only lock line reservation lost events
+    spu.WriteChannel(SPU_WrEventMask, u128::from32r(SPU_EVENT_LR));
 
-    spursSysServiceUpdateWorkload(spu, mgmt);
-
-    do {
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+    bool shouldExit;
+    while (true) {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
         auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
 
-        if (wklId >= CELL_SPURS_MAX_WORKLOAD) {
-            spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10;
-            spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1);
-        } else {
-            spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01;
-            spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1);
+        // Find the number of SPUs that are idling in this SPURS instance
+        u32 nIdlingSpus = 0;
+        for (u32 i = 0; i < 8; i++) {
+            if (spurs->m.spuIdling & (1 << i)) {
+                nIdlingSpus++;
+            }
         }
-    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
-    // Set the current workload id to the id of the pre-empted workload since cellSpursModulePutTrace
-    // uses the current worload id to determine the workload to which the trace belongs
-    auto wklIdSaved    = mgmt->wklCurrentId;
-    mgmt->wklCurrentId = wklId;
+        bool allSpusIdle  = nIdlingSpus == spurs->m.nSpus ? true: false;
+        bool exitIfNoWork = spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false;
+        shouldExit        = allSpusIdle && exitIfNoWork;
 
-    // Trace - STOP: GUID
+        // Check if any workloads can be scheduled
+        bool foundReadyWorkload = false;
+        if (spurs->m.sysSrvMessage.read_relaxed() & (1 << ctxt->spuNum)) {
+            foundReadyWorkload = true;
+        } else {
+            if (spurs->m.flags1 & SF1_32_WORKLOADS) {
+                for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
+                    u32 j            = i & 0x0F;
+                    u8 runnable      = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->wklRunnable1 & (0x8000 >> j) : ctxt->wklRunnable2 & (0x8000 >> j);
+                    u8 priority      = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->priority[j] & 0x0F : ctxt->priority[j] >> 4;
+                    u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4;
+                    u8 contention    = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklCurrentContention[j] & 0x0F : spurs->m.wklCurrentContention[j] >> 4;
+                    u8 wklSignal     = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
+                    u8 wklFlag       = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
+                    u8 readyCount    = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[j].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed();
+
+                    if (runnable && priority > 0 && maxContention > contention) {
+                        if (wklFlag || wklSignal || readyCount > contention) {
+                            foundReadyWorkload = true;
+                            break;
+                        }
+                    }
+                }
+            } else {
+                for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+                    u8 runnable     = ctxt->wklRunnable1 & (0x8000 >> i);
+                    u8 wklSignal    = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
+                    u8 wklFlag      = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
+                    u8 readyCount   = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed();
+                    u8 idleSpuCount = spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed();
+                    u8 requestCount = readyCount + idleSpuCount;
+
+                    if (runnable && ctxt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > spurs->m.wklCurrentContention[i]) {
+                        if (wklFlag || wklSignal || (readyCount != 0 && requestCount > spurs->m.wklCurrentContention[i])) {
+                            foundReadyWorkload = true;
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+
+        bool spuIdling = spurs->m.spuIdling & (1 << ctxt->spuNum) ? true : false;
+        if (foundReadyWorkload && shouldExit == false) {
+            spurs->m.spuIdling &= ~(1 << ctxt->spuNum);
+        } else {
+            spurs->m.spuIdling |= 1 << ctxt->spuNum;
+        }
+
+        // If all SPUs are idling and the exit_if_no_work flag is set then the SPU thread group must exit. Otherwise wait for external events.
+        if (spuIdling && shouldExit == false && foundReadyWorkload == false) {
+            // The system service blocks by making a reservation and waiting on the lock line reservation lost event.
+            u128 r;
+            spu.ReadChannel(r, SPU_RdEventStat);
+            spu.WriteChannel(SPU_WrEventAck, u128::from32r(SPU_EVENT_LR));
+        }
+
+        auto dmaSuccess = spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        if (dmaSuccess && (shouldExit || foundReadyWorkload)) {
+            break;
+        }
+    }
+
+    if (shouldExit) {
+        // TODO: exit spu thread group
+    }
+}
+
+/// Main function for the system service
+void spursSysServiceMain(SPUThread & spu, u32 pollStatus) {
+    auto ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+
+    if (ctxt->spurs.addr() % CellSpurs::align) {
+        spursHalt(spu);
+        return;
+    }
+
+    // Initialise the system service if this is the first time its being started on this SPU
+    if (ctxt->sysSrvInitialised == 0) {
+        ctxt->sysSrvInitialised = 1;
+
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+
+        do {
+            spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+            CellSpurs * spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+
+            // Halt if already initialised
+            if (spurs->m.sysSrvOnSpu & (1 << ctxt->spuNum)) {
+                spursHalt(spu);
+                return;
+            }
+
+            spurs->m.sysSrvOnSpu |= 1 << ctxt->spuNum;
+        } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+        ctxt->traceBuffer   = 0;
+        ctxt->traceMsgCount = -1;
+        spursSysServiceTraceUpdate(spu, ctxt, 1, 1, 0);
+        spursSysServiceCleanupAfterSystemWorkload(spu, ctxt);
+
+        // Trace - SERVICE: INIT
+        CellSpursTracePacket pkt;
+        memset(&pkt, 0, sizeof(pkt));
+        pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
+        pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_INIT;
+        cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
+    }
+
+    // Trace - START: Module='SYS '
     CellSpursTracePacket pkt;
     memset(&pkt, 0, sizeof(pkt));
-    pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
-    pkt.data.stop  = SPURS_GUID_SYS_WKL;
-    cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+    pkt.header.tag = CELL_SPURS_TRACE_TAG_START;
+    memcpy(pkt.data.start.module, "SYS ", 4);
+    pkt.data.start.level = 1; // Policy module
+    pkt.data.start.ls    = 0xA00 >> 2;
+    cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
 
-    mgmt->wklCurrentId = wklIdSaved;
-}
+    while (true) {
+        // Process requests for the system service
+        spursSysServiceProcessRequests(spu, ctxt);
 
-/// Update the trace count for this SPU in CellSpurs
-void spursSysServiceUpdateTraceCount(SPUThread & spu, SpursKernelMgmtData * mgmt) {
-    if (mgmt->traceBuffer) {
-        auto traceInfo = vm::ptr<CellSpursTraceInfo>::make((u32)(mgmt->traceBuffer - (mgmt->spurs->m.traceStartIndex[mgmt->spuNum] << 4)));
-        traceInfo->count[mgmt->spuNum] = mgmt->traceMsgCount;
+poll:
+        if (cellSpursModulePollStatus(spu, nullptr)) {
+            // Trace - SERVICE: EXIT
+            CellSpursTracePacket pkt;
+            memset(&pkt, 0, sizeof(pkt));
+            pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
+            pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_EXIT;
+            cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
+
+            // Trace - STOP: GUID
+            memset(&pkt, 0, sizeof(pkt));
+            pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
+            pkt.data.stop  = SPURS_GUID_SYS_WKL;
+            cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
+
+            spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId);
+            break;
+        }
+
+        // If we reach here it means that either there are more system service messages to be processed
+        // or there are no workloads that can be scheduled.
+
+        // If the SPU is not idling then process the remaining system service messages
+        if (ctxt->spuIdling == 0) {
+            continue;
+        }
+
+        // If we reach here it means that the SPU is idling
+
+        // Trace - SERVICE: WAIT
+        CellSpursTracePacket pkt;
+        memset(&pkt, 0, sizeof(pkt));
+        pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
+        pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_WAIT;
+        cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
+
+        spursSysServiceIdleHandler(spu, ctxt);
+        goto poll;
     }
 }
 
-/// Update trace control in SPU from CellSpurs
-void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4) {
-    bool notify;
+/// Process any requests
+void spursSysServiceProcessRequests(SPUThread & spu, SpursKernelContext * ctxt) {
+    bool updateTrace    = false;
+    bool updateWorkload = false;
+    bool terminate      = false;
 
-    u8 sysSrvMsgUpdateTrace;
     do {
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
         auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
 
-        sysSrvMsgUpdateTrace           = spurs->m.sysSrvMsgUpdateTrace;
-        spurs->m.sysSrvMsgUpdateTrace &= ~(1 << mgmt->spuNum);
-        spurs->m.xCC                  &= ~(1 << mgmt->spuNum);
-        spurs->m.xCC                  |= arg2 << mgmt->spuNum;
-
-        notify = false;
-        if (((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) != 0) && (spurs->m.sysSrvMsgUpdateTrace == 0) && (spurs->m.xCD != 0)) {
-            spurs->m.xCD = 0;
-            notify       = true;
+        // Terminate request
+        if (spurs->m.sysSrvMsgTerminate & (1 << ctxt->spuNum)) {
+            spurs->m.sysSrvOnSpu &= ~(1 << ctxt->spuNum);
+            terminate = true;
         }
 
-        if (arg4 && spurs->m.xCD != 0) {
-            spurs->m.xCD = 0;
-            notify       = true;
-        }
-    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
-
-    // Get trace parameters from CellSpurs and store them in the LS
-    if (((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) != 0) || (arg3 != 0)) {
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.traceBuffer), 0x80/*LSA*/, 0x80/*size*/, 0/*tag*/);
-        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x80 - offsetof(CellSpurs, m.traceBuffer));
-
-        if (mgmt->traceMsgCount != 0xFF || spurs->m.traceBuffer.addr() == 0) {
-            spursSysServiceUpdateTraceCount(spu, mgmt);
-        } else {
-            spursDma(spu, MFC_GET_CMD, spurs->m.traceBuffer.addr() & 0xFFFFFFFC, 0x2C00/*LSA*/, 0x80/*size*/, mgmt->dmaTagId);
-            auto traceBuffer    = vm::get_ptr<CellSpursTraceInfo>(spu.ls_offset + 0x2C00);
-            mgmt->traceMsgCount = traceBuffer->count[mgmt->spuNum];
+        // Update workload message
+        if (spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << ctxt->spuNum)) {
+            spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << ctxt->spuNum);
+            updateWorkload = true;
         }
 
-        mgmt->traceBuffer   = spurs->m.traceBuffer.addr() + (spurs->m.traceStartIndex[mgmt->spuNum] << 4);
-        mgmt->traceMaxCount = spurs->m.traceStartIndex[1] - spurs->m.traceStartIndex[0];
-        if (mgmt->traceBuffer == 0) {
-            mgmt->traceMsgCount = 0;
+        // Update trace message
+        if (spurs->m.sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) {
+            updateTrace = true;
         }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    // Process update workload message
+    if (updateWorkload) {
+        spursSysServiceActivateWorkload(spu, ctxt);
     }
 
-    if (notify) {
-        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
-        sys_spu_thread_send_event(spu, spurs->m.spuPort, 2, 0);
+    // Process update trace message
+    if (updateTrace) {
+        spursSysServiceTraceUpdate(spu, ctxt, 1, 0, 0);
+    }
+
+    // Process terminate request
+    if (terminate) {
+        // TODO: Rest of the terminate processing
     }
 }
 
-/// Update events in CellSpurs
-void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownBitSet) {
+/// Activate a workload
+void spursSysServiceActivateWorkload(SPUThread & spu, SpursKernelContext * ctxt) {
+    auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
+    spursDma(spu, MFC_GET_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklInfo1), 0x30000/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID);
+    if (spurs->m.flags1 & SF1_32_WORKLOADS) {
+        spursDma(spu, MFC_GET_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklInfo2), 0x30200/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID);
+    }
+
+    u32 wklShutdownBitSet = 0;
+    ctxt->wklRunnable1    = 0;
+    ctxt->wklRunnable2    = 0;
+    for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+        auto wklInfo1 = vm::get_ptr<CellSpurs::WorkloadInfo>(spu.ls_offset + 0x30000);
+
+        // Copy the priority of the workload for this SPU and its unique id to the LS
+        ctxt->priority[i]    = wklInfo1[i].priority[ctxt->spuNum] == 0 ? 0 : 0x10 - wklInfo1[i].priority[ctxt->spuNum];
+        ctxt->wklUniqueId[i] = wklInfo1[i].uniqueId.read_relaxed();
+
+        if (spurs->m.flags1 & SF1_32_WORKLOADS) {
+            auto wklInfo2 = vm::get_ptr<CellSpurs::WorkloadInfo>(spu.ls_offset + 0x30200);
+
+            // Copy the priority of the workload for this SPU to the LS
+            if (wklInfo2[i].priority[ctxt->spuNum]) {
+                ctxt->priority[i] |= (0x10 - wklInfo2[i].priority[ctxt->spuNum]) << 4;
+            }
+        }
+    }
+
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+
+        for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+            // Update workload status and runnable flag based on the workload state
+            auto wklStatus = spurs->m.wklStatus1[i];
+            if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
+                spurs->m.wklStatus1[i] |= 1 << ctxt->spuNum;
+                ctxt->wklRunnable1     |= 0x8000 >> i;
+            } else {
+                spurs->m.wklStatus1[i] &= ~(1 << ctxt->spuNum);
+            }
+
+            // If the workload is shutting down and if this is the last SPU from which it is being removed then
+            // add it to the shutdown bit set
+            if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
+                if (((wklStatus & (1 << ctxt->spuNum)) != 0) && (spurs->m.wklStatus1[i] == 0)) {
+                    spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
+                    wklShutdownBitSet |= 0x80000000u >> i;
+                }
+            }
+
+            if (spurs->m.flags1 & SF1_32_WORKLOADS) {
+                // Update workload status and runnable flag based on the workload state
+                wklStatus = spurs->m.wklStatus2[i];
+                if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
+                    spurs->m.wklStatus2[i] |= 1 << ctxt->spuNum;
+                    ctxt->wklRunnable2     |= 0x8000 >> i;
+                } else {
+                    spurs->m.wklStatus2[i] &= ~(1 << ctxt->spuNum);
+                }
+
+                // If the workload is shutting down and if this is the last SPU from which it is being removed then
+                // add it to the shutdown bit set
+                if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
+                    if (((wklStatus & (1 << ctxt->spuNum)) != 0) && (spurs->m.wklStatus2[i] == 0)) {
+                        spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
+                        wklShutdownBitSet |= 0x8000 >> i;
+                    }
+                }
+            }
+        }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    if (wklShutdownBitSet) {
+        spursSysServiceUpdateShutdownCompletionEvents(spu, ctxt, wklShutdownBitSet);
+    }
+}
+
+/// Update shutdown completion events
+void spursSysServiceUpdateShutdownCompletionEvents(SPUThread & spu, SpursKernelContext * ctxt, u32 wklShutdownBitSet) {
     // Mark the workloads in wklShutdownBitSet as completed and also generate a bit set of the completed
     // workloads that have a shutdown completion hook registered
     u32 wklNotifyBitSet;
     u8  spuPort;
     do {
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
         auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
 
         wklNotifyBitSet = 0;
@@ -673,334 +903,117 @@ void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32
                 }
             }
         }
-    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
     if (wklNotifyBitSet) {
         // TODO: sys_spu_thread_send_event(spuPort, 0, wklNotifyMask);
     }
 }
 
-/// Update workload information in the SPU from CellSpurs
-void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) {
-    auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
-    spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklInfo1), 0x30000/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID);
-    if (spurs->m.flags1 & SF1_32_WORKLOADS) {
-        spursDma(spu, MFC_GET_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklInfo2), 0x30200/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID);
-    }
-
-    u32 wklShutdownBitSet = 0;
-    mgmt->wklRunnable1    = 0;
-    mgmt->wklRunnable2    = 0;
-    for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-        auto wklInfo1 = vm::get_ptr<CellSpurs::WorkloadInfo>(spu.ls_offset + 0x30000);
-
-        // Copy the priority of the workload for this SPU and its unique id to the LS
-        mgmt->priority[i]    = wklInfo1[i].priority[mgmt->spuNum] == 0 ? 0 : 0x10 - wklInfo1[i].priority[mgmt->spuNum];
-        mgmt->wklUniqueId[i] = wklInfo1[i].uniqueId.read_relaxed();
-
-        if (spurs->m.flags1 & SF1_32_WORKLOADS) {
-            auto wklInfo2 = vm::get_ptr<CellSpurs::WorkloadInfo>(spu.ls_offset + 0x30200);
-
-            // Copy the priority of the workload for this SPU to the LS
-            if (wklInfo2[i].priority[mgmt->spuNum]) {
-                mgmt->priority[i] |= (0x10 - wklInfo2[i].priority[mgmt->spuNum]) << 4;
-            }
-        }
-    }
-
-    do {
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
-        spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
-
-        for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-            // Update workload status and runnable flag based on the workload state
-            auto wklStatus = spurs->m.wklStatus1[i];
-            if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
-                spurs->m.wklStatus1[i] |= 1 << mgmt->spuNum;
-                mgmt->wklRunnable1     |= 0x8000 >> i;
-            } else {
-                spurs->m.wklStatus1[i] &= ~(1 << mgmt->spuNum);
-            }
-
-            // If the workload is shutting down and if this is the last SPU from which it is being removed then
-            // add it to the shutdown bit set
-            if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
-                if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (spurs->m.wklStatus1[i] == 0)) {
-                    spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
-                    wklShutdownBitSet |= 0x80000000u >> i;
-                }
-            }
-
-            if (spurs->m.flags1 & SF1_32_WORKLOADS) {
-                // Update workload status and runnable flag based on the workload state
-                wklStatus = spurs->m.wklStatus2[i];
-                if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
-                    spurs->m.wklStatus2[i] |= 1 << mgmt->spuNum;
-                    mgmt->wklRunnable2     |= 0x8000 >> i;
-                } else {
-                    spurs->m.wklStatus2[i] &= ~(1 << mgmt->spuNum);
-                }
-
-                // If the workload is shutting down and if this is the last SPU from which it is being removed then
-                // add it to the shutdown bit set
-                if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
-                    if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (spurs->m.wklStatus2[i] == 0)) {
-                        spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
-                        wklShutdownBitSet |= 0x8000 >> i;
-                    }
-                }
-            }
-        }
-    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
-
-    if (wklShutdownBitSet) {
-        spursSysServiceUpdateEvent(spu, mgmt, wklShutdownBitSet);
+/// Update the trace count for this SPU
+void spursSysServiceTraceSaveCount(SPUThread & spu, SpursKernelContext * ctxt) {
+    if (ctxt->traceBuffer) {
+        auto traceInfo = vm::ptr<CellSpursTraceInfo>::make((u32)(ctxt->traceBuffer - (ctxt->spurs->m.traceStartIndex[ctxt->spuNum] << 4)));
+        traceInfo->count[ctxt->spuNum] = ctxt->traceMsgCount;
     }
 }
 
-/// Process any messages
-void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt) {
-    bool updateTrace    = false;
-    bool updateWorkload = false;
-    bool terminate      = false;
+/// Update trace control
+void spursSysServiceTraceUpdate(SPUThread & spu, SpursKernelContext * ctxt, u32 arg2, u32 arg3, u32 arg4) {
+    bool notify;
 
+    u8 sysSrvMsgUpdateTrace;
     do {
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
         auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
 
-        // Terminate request
-        if (spurs->m.sysSrvMsgTerminate & (1 << mgmt->spuNum)) {
-            spurs->m.sysSrvOnSpu &= ~(1 << mgmt->spuNum);
-            terminate = true;
+        sysSrvMsgUpdateTrace           = spurs->m.sysSrvMsgUpdateTrace;
+        spurs->m.sysSrvMsgUpdateTrace &= ~(1 << ctxt->spuNum);
+        spurs->m.xCC                  &= ~(1 << ctxt->spuNum);
+        spurs->m.xCC                  |= arg2 << ctxt->spuNum;
+
+        notify = false;
+        if (((sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) != 0) && (spurs->m.sysSrvMsgUpdateTrace == 0) && (spurs->m.xCD != 0)) {
+            spurs->m.xCD = 0;
+            notify       = true;
         }
 
-        // Update workload message
-        if (spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << mgmt->spuNum)) {
-            spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << mgmt->spuNum);
-            updateWorkload = true;
+        if (arg4 && spurs->m.xCD != 0) {
+            spurs->m.xCD = 0;
+            notify       = true;
+        }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    // Get trace parameters from CellSpurs and store them in the LS
+    if (((sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) != 0) || (arg3 != 0)) {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.traceBuffer), 0x80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x80 - offsetof(CellSpurs, m.traceBuffer));
+
+        if (ctxt->traceMsgCount != 0xFF || spurs->m.traceBuffer.addr() == 0) {
+            spursSysServiceTraceSaveCount(spu, ctxt);
+        } else {
+            spursDma(spu, MFC_GET_CMD, spurs->m.traceBuffer.addr() & 0xFFFFFFFC, 0x2C00/*LSA*/, 0x80/*size*/, ctxt->dmaTagId);
+            auto traceBuffer    = vm::get_ptr<CellSpursTraceInfo>(spu.ls_offset + 0x2C00);
+            ctxt->traceMsgCount = traceBuffer->count[ctxt->spuNum];
         }
 
-        // Update trace message
-        if (spurs->m.sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) {
-            updateTrace = true;
+        ctxt->traceBuffer   = spurs->m.traceBuffer.addr() + (spurs->m.traceStartIndex[ctxt->spuNum] << 4);
+        ctxt->traceMaxCount = spurs->m.traceStartIndex[1] - spurs->m.traceStartIndex[0];
+        if (ctxt->traceBuffer == 0) {
+            ctxt->traceMsgCount = 0;
         }
-    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
-
-    // Process update workload message
-    if (updateWorkload) {
-        spursSysServiceUpdateWorkload(spu, mgmt);
     }
 
-    // Process update trace message
-    if (updateTrace) {
-        spursSysServiceUpdateTrace(spu, mgmt, 1, 0, 0);
-    }
-
-    // Process terminate request
-    if (terminate) {
-        // TODO: Rest of the terminate processing
+    if (notify) {
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+        sys_spu_thread_send_event(spu, spurs->m.spuPort, 2, 0);
     }
 }
 
-/// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled
-void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) {
-    // Monitor only lock line reservation lost events
-    spu.WriteChannel(SPU_WrEventMask, u128::from32r(SPU_EVENT_LR));
+/// Restore state after executing the system workload
+void spursSysServiceCleanupAfterSystemWorkload(SPUThread & spu, SpursKernelContext * ctxt) {
+    u8 wklId;
 
-    bool shouldExit;
-    while (true) {
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+
+        if (spurs->m.sysSrvWorkload[ctxt->spuNum] == 0xFF) {
+            return;
+        }
+
+        wklId = spurs->m.sysSrvWorkload[ctxt->spuNum];
+        spurs->m.sysSrvWorkload[ctxt->spuNum] = 0xFF;
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    spursSysServiceActivateWorkload(spu, ctxt);
+
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
         auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
 
-        // Find the number of SPUs that are idling in this SPURS instance
-        u32 nIdlingSpus = 0;
-        for (u32 i = 0; i < 8; i++) {
-            if (spurs->m.spuIdling & (1 << i)) {
-                nIdlingSpus++;
-            }
-        }
-
-        bool allSpusIdle  = nIdlingSpus == spurs->m.nSpus ? true: false;
-        bool exitIfNoWork = spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false;
-        shouldExit        = allSpusIdle && exitIfNoWork;
-
-        // Check if any workloads can be scheduled
-        bool foundReadyWorkload = false;
-        if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
-            foundReadyWorkload = true;
+        if (wklId >= CELL_SPURS_MAX_WORKLOAD) {
+            spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10;
+            spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1);
         } else {
-            if (spurs->m.flags1 & SF1_32_WORKLOADS) {
-                for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
-                    u32 j            = i & 0x0F;
-                    u8 runnable      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j);
-                    u8 priority      = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4;
-                    u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4;
-                    u8 contention    = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklCurrentContention[j] & 0x0F : spurs->m.wklCurrentContention[j] >> 4;
-                    u8 wklSignal     = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
-                    u8 wklFlag       = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
-                    u8 readyCount    = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[j].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed();
-
-                    if (runnable && priority > 0 && maxContention > contention) {
-                        if (wklFlag || wklSignal || readyCount > contention) {
-                            foundReadyWorkload = true;
-                            break;
-                        }
-                    }
-                }
-            } else {
-                for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
-                    u8 runnable     = mgmt->wklRunnable1 & (0x8000 >> i);
-                    u8 wklSignal    = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
-                    u8 wklFlag      = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
-                    u8 readyCount   = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed();
-                    u8 idleSpuCount = spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed();
-                    u8 requestCount = readyCount + idleSpuCount;
-
-                    if (runnable && mgmt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > spurs->m.wklCurrentContention[i]) {
-                        if (wklFlag || wklSignal || (readyCount != 0 && requestCount > spurs->m.wklCurrentContention[i])) {
-                            foundReadyWorkload = true;
-                            break;
-                        }
-                    }
-                }
-            }
+            spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01;
+            spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1);
         }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
-        bool spuIdling = spurs->m.spuIdling & (1 << mgmt->spuNum) ? true : false;
-        if (foundReadyWorkload && shouldExit == false) {
-            spurs->m.spuIdling &= ~(1 << mgmt->spuNum);
-        } else {
-            spurs->m.spuIdling |= 1 << mgmt->spuNum;
-        }
+    // Set the current workload id to the id of the pre-empted workload since cellSpursModulePutTrace
+    // uses the current worload id to determine the workload to which the trace belongs
+    auto wklIdSaved    = ctxt->wklCurrentId;
+    ctxt->wklCurrentId = wklId;
 
-        // If all SPUs are idling and the exit_if_no_work flag is set then the SPU thread group must exit. Otherwise wait for external events.
-        if (spuIdling && shouldExit == false && foundReadyWorkload == false) {
-            // The system service blocks by making a reservation and waiting on the lock line reservation lost event.
-            u128 r;
-            spu.ReadChannel(r, SPU_RdEventStat);
-            spu.WriteChannel(SPU_WrEventAck, u128::from32r(SPU_EVENT_LR));
-        }
-
-        auto dmaSuccess = spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
-        if (dmaSuccess && (shouldExit || foundReadyWorkload)) {
-            break;
-        }
-    }
-
-    if (shouldExit) {
-        // TODO: exit spu thread group
-    }
-}
-
-/// Main function for the system service workload
-void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) {
-    auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
-
-    if (mgmt->spurs.addr() % CellSpurs::align) {
-        spursHalt(spu);
-        return;
-    }
-
-    // Initialise the system service if this is the first time its being started on this SPU
-    if (mgmt->sysSrvInitialised == 0) {
-        mgmt->sysSrvInitialised = 1;
-
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
-
-        do {
-            spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
-            CellSpurs * spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
-
-            // Halt if already initialised
-            if (spurs->m.sysSrvOnSpu & (1 << mgmt->spuNum)) {
-                spursHalt(spu);
-                return;
-            }
-
-            spurs->m.sysSrvOnSpu |= 1 << mgmt->spuNum;
-        } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
-
-        mgmt->traceBuffer   = 0;
-        mgmt->traceMsgCount = -1;
-        spursSysServiceUpdateTrace(spu, mgmt, 1, 1, 0);
-        spursSysServiceCleanupAfterPreemption(spu, mgmt);
-
-        // Trace - SERVICE: INIT
-        CellSpursTracePacket pkt;
-        memset(&pkt, 0, sizeof(pkt));
-        pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
-        pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_INIT;
-        cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
-    }
-
-    // Trace - START: Module='SYS '
+    // Trace - STOP: GUID
     CellSpursTracePacket pkt;
     memset(&pkt, 0, sizeof(pkt));
-    pkt.header.tag = CELL_SPURS_TRACE_TAG_START;
-    memcpy(pkt.data.start.module, "SYS ", 4);
-    pkt.data.start.level = 1; // Policy module
-    pkt.data.start.ls    = 0xA00 >> 2;
-    cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+    pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
+    pkt.data.stop  = SPURS_GUID_SYS_WKL;
+    cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
 
-    while (true) {
-        // Process messages for the system service workload
-        spursSysServiceProcessMessages(spu, mgmt);
-
-poll:
-        if (cellSpursModulePollStatus(spu, nullptr)) {
-            // Trace - SERVICE: EXIT
-            CellSpursTracePacket pkt;
-            memset(&pkt, 0, sizeof(pkt));
-            pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
-            pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_EXIT;
-            cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
-
-            // Trace - STOP: GUID
-            memset(&pkt, 0, sizeof(pkt));
-            pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
-            pkt.data.stop  = SPURS_GUID_SYS_WKL;
-            cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
-
-            spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId);
-            break;
-        }
-
-        // If we reach here it means that either there are more system service messages to be processed
-        // or there are no workloads that can be scheduled.
-
-        // If the SPU is not idling then process the remaining system service messages
-        if (mgmt->spuIdling == 0) {
-            continue;
-        }
-
-        // If we reach here it means that the SPU is idling
-
-        // Trace - SERVICE: WAIT
-        CellSpursTracePacket pkt;
-        memset(&pkt, 0, sizeof(pkt));
-        pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
-        pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_WAIT;
-        cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
-
-        spursSysServiceWaitOrExit(spu, mgmt);
-        goto poll;
-    }
-}
-
-/// Entry point of the system service workload
-bool spursSysServiceWorkloadEntry(SPUThread & spu) {
-    auto mgmt       = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + spu.GPR[3]._u32[3]);
-    auto arg        = spu.GPR[4]._u64[1];
-    auto pollStatus = spu.GPR[5]._u32[3];
-
-    if (mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
-        spursSysServiceWorkloadMain(spu, pollStatus);
-    } else {
-        // TODO: If we reach here it means the current workload was preempted to start the
-        // system service workload. Need to implement this.
-    }
-
-    return false;
+    ctxt->wklCurrentId = wklIdSaved;
 }
 
 //////////////////////////////////////////////////////////////////////////////
@@ -1018,14 +1031,98 @@ enum SpursTasksetRequest {
     SPURS_TASKSET_REQUEST_RECV_WKL_FLAG = 6,
 };
 
+/// Taskset PM entry point
+bool spursTasksetEntry(SPUThread & spu) {
+    auto ctxt       = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+    auto kernelCtxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + spu.GPR[3]._u32[3]);
+
+    auto arg        = spu.GPR[4]._u64[1];
+    auto pollStatus = spu.GPR[5]._u32[3];
+
+    // Initialise memory and save args
+    memset(ctxt, 0, sizeof(*ctxt));
+    ctxt->taskset.set(arg);
+    memcpy(ctxt->moduleId, "SPURSTASK MODULE", sizeof(ctxt->moduleId));
+    ctxt->kernelMgmtAddr = spu.GPR[3]._u32[3];
+    ctxt->syscallAddr    = CELL_SPURS_TASKSET_PM_SYSCALL_ADDR;
+    ctxt->spuNum         = kernelCtxt->spuNum;
+    ctxt->dmaTagId       = kernelCtxt->dmaTagId;
+    ctxt->taskId         = 0xFFFFFFFF;
+
+    // Register SPURS takset policy module HLE functions
+    spu.UnregisterHleFunctions(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, 0x40000/*LS_BOTTOM*/);
+    spu.RegisterHleFunction(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, spursTasksetEntry);
+    spu.RegisterHleFunction(ctxt->syscallAddr, spursTasksetSyscallEntry);
+
+    // Initialise the taskset policy module
+    spursTasksetInit(spu, pollStatus);
+
+    // Dispatch
+    spursTasksetDispatch(spu);
+    return false;
+}
+
+/// Entry point into the Taskset PM for task syscalls
+bool spursTasksetSyscallEntry(SPUThread & spu) {
+    auto ctxt = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+
+    // Save task context
+    ctxt->savedContextLr = spu.GPR[0];
+    ctxt->savedContextSp = spu.GPR[1];
+    for (auto i = 0; i < 48; i++) {
+        ctxt->savedContextR80ToR127[i] = spu.GPR[80 + i];
+    }
+
+    // Handle the syscall
+    spu.GPR[3]._u32[3] = spursTasksetProcessSyscall(spu, spu.GPR[3]._u32[3], spu.GPR[4]._u32[3]);
+
+    // Resume the previously executing task if the syscall did not cause a context switch
+    if (spu.m_is_branch == false) {
+        spursTasksetResumeTask(spu);
+    }
+
+    return false;
+}
+
+/// Resume a task
+void spursTasksetResumeTask(SPUThread & spu) {
+    auto ctxt = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+
+    // Restore task context
+    spu.GPR[0] = ctxt->savedContextLr;
+    spu.GPR[1] = ctxt->savedContextSp;
+    for (auto i = 0; i < 48; i++) {
+        spu.GPR[80 + i] = ctxt->savedContextR80ToR127[i];
+    }
+
+    spu.SetBranch(spu.GPR[0]._u32[3]);
+}
+
+/// Start a task
+void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs) {
+    auto ctxt    = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+    auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
+
+    spu.GPR[2].clear();
+    spu.GPR[3]         = u128::from64(taskArgs.u64[0], taskArgs.u64[1]);
+    spu.GPR[4]._u64[1] = taskset->m.args;
+    spu.GPR[4]._u64[0] = taskset->m.spurs.addr();
+    for (auto i = 5; i < 128; i++) {
+        spu.GPR[i].clear();
+    }
+
+    spu.SetBranch(ctxt->savedContextLr.value()._u32[3]);
+}
+
+/// Process a request and update the state of the taskset
 s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting) {
-    auto kernelMgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
-    auto mgmt       = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+    auto kernelCtxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+    auto ctxt       = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
 
     s32 rc = CELL_OK;
     s32 numNewlyReadyTasks;
     do {
-        spursDma(spu, MFC_GETLLAR_CMD, mgmt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/);
         auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
 
         // Verify taskset state is valid
@@ -1055,27 +1152,27 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 *
 
         switch (request) {
         case SPURS_TASKSET_REQUEST_POLL_SIGNAL:
-            rc = signalled._bit[mgmt->taskId] ? 1 : 0;
-            signalled._bit[mgmt->taskId] = false;
+            rc = signalled._bit[ctxt->taskId] ? 1 : 0;
+            signalled._bit[ctxt->taskId] = false;
             break;
         case SPURS_TASKSET_REQUEST_DESTROY_TASK:
             numNewlyReadyTasks--;
-            running._bit[mgmt->taskId]   = false;
-            enabled._bit[mgmt->taskId]   = false;
-            signalled._bit[mgmt->taskId] = false;
-            ready._bit[mgmt->taskId]     = false;
+            running._bit[ctxt->taskId]   = false;
+            enabled._bit[ctxt->taskId]   = false;
+            signalled._bit[ctxt->taskId] = false;
+            ready._bit[ctxt->taskId]     = false;
             break;
         case SPURS_TASKSET_REQUEST_YIELD_TASK:
-            running._bit[mgmt->taskId] = false;
-            waiting._bit[mgmt->taskId] = true;
+            running._bit[ctxt->taskId] = false;
+            waiting._bit[ctxt->taskId] = true;
             break;
         case SPURS_TASKSET_REQUEST_WAIT_SIGNAL:
-            if (signalled._bit[mgmt->taskId]) {
+            if (signalled._bit[ctxt->taskId]) {
                 numNewlyReadyTasks--;
-                running._bit[mgmt->taskId]   = false;
-                waiting._bit[mgmt->taskId]   = true;
-                signalled._bit[mgmt->taskId] = false;
-                ready._bit[mgmt->taskId]     = false;
+                running._bit[ctxt->taskId]   = false;
+                waiting._bit[ctxt->taskId]   = true;
+                signalled._bit[ctxt->taskId] = false;
+                ready._bit[ctxt->taskId]     = false;
             }
             break;
         case SPURS_TASKSET_REQUEST_POLL:
@@ -1093,9 +1190,9 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 *
                 rc                            = 0;
             } else if (taskset->m.wkl_flag_wait_task == 0x80) {
                 // No tasks are waiting for the workload flag. Mark this task as waiting for the workload flag.
-                taskset->m.wkl_flag_wait_task = mgmt->taskId;
-                running._bit[mgmt->taskId]    = false;
-                waiting._bit[mgmt->taskId]    = true;
+                taskset->m.wkl_flag_wait_task = ctxt->taskId;
+                running._bit[ctxt->taskId]    = false;
+                waiting._bit[ctxt->taskId]    = true;
                 rc                            = 1;
                 numNewlyReadyTasks--;
             } else {
@@ -1132,8 +1229,8 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 *
             *isWaiting = waiting._bit[selectedTaskId < CELL_SPURS_MAX_TASK ? selectedTaskId : 0] ? 1 : 0;
             if (selectedTaskId != CELL_SPURS_MAX_TASK) {
                 taskset->m.last_scheduled_task = selectedTaskId;
-                running._bit[mgmt->taskId]     = true;
-                waiting._bit[mgmt->taskId]     = false;
+                running._bit[ctxt->taskId]     = true;
+                waiting._bit[ctxt->taskId]     = false;
             }
             break;
         case SPURS_TASKSET_REQUEST_RECV_WKL_FLAG:
@@ -1159,101 +1256,138 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 *
         taskset->m.enabled       = enabled;
         taskset->m.signalled     = signalled;
         taskset->m.ready         = ready;
-    } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
     // Increment the ready count of the workload by the number of tasks that have become ready
     do {
-        spursDma(spu, MFC_GETLLAR_CMD, kernelMgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        spursDma(spu, MFC_GETLLAR_CMD, kernelCtxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
         auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
 
-        s32 readyCount  = kernelMgmt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].read_relaxed();
+        s32 readyCount  = kernelCtxt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[kernelCtxt->wklCurrentId].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[kernelCtxt->wklCurrentId & 0x0F].read_relaxed();
         readyCount     += numNewlyReadyTasks;
         readyCount      = readyCount < 0 ? 0 : readyCount > 0xFF ? 0xFF : readyCount;
 
-        if (kernelMgmt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD) {
-            spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].write_relaxed(readyCount);
+        if (kernelCtxt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD) {
+            spurs->m.wklReadyCount1[kernelCtxt->wklCurrentId].write_relaxed(readyCount);
         } else {
-            spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].write_relaxed(readyCount);
+            spurs->m.wklIdleSpuCountOrReadyCount2[kernelCtxt->wklCurrentId & 0x0F].write_relaxed(readyCount);
         }
-    } while (spursDma(spu, MFC_PUTLLC_CMD, kernelMgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+    } while (spursDma(spu, MFC_PUTLLC_CMD, kernelCtxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
 
     return rc;
 }
 
-s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments) {
-    if (elfAddr == 0 || (elfAddr & 0x0F) != 0) {
-        return CELL_SPURS_TASK_ERROR_INVAL;
+/// Process pollStatus received from the SPURS kernel
+void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) {
+    if (pollStatus & CELL_SPURS_MODULE_POLL_STATUS_FLAG) {
+        spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_RECV_WKL_FLAG, nullptr, nullptr);
     }
-
-    vfsStreamMemory         stream(elfAddr);
-    loader::handlers::elf32 loader;
-    auto rc = loader.init(stream);
-    if (rc != loader::handler::ok) {
-        return CELL_SPURS_TASK_ERROR_NOEXEC;
-    }
-
-    u32 _lowestLoadAddr = CELL_SPURS_TASK_BOTTOM;
-    for (auto & phdr : loader.m_phdrs) {
-        if (phdr.data_be.p_paddr >= CELL_SPURS_TASK_BOTTOM) {
-            break;
-        }
-
-        if (phdr.data_be.p_type == 1/*PT_LOAD*/) {
-            if (skipWriteableSegments == false || (phdr.data_be.p_flags & 2/*PF_W*/) == 0) {
-                if (phdr.data_be.p_vaddr < CELL_SPURS_TASK_TOP ||
-                    phdr.data_be.p_vaddr + phdr.data_be.p_memsz > CELL_SPURS_TASK_BOTTOM) {
-                    return CELL_SPURS_TASK_ERROR_FAULT;
-                }
-
-                _lowestLoadAddr = _lowestLoadAddr > phdr.data_be.p_vaddr ? phdr.data_be.p_vaddr : _lowestLoadAddr;
-            }
-        }
-    }
-
-    loader.load_data(spu.ls_offset, skipWriteableSegments);
-    *entryPoint = loader.m_ehdr.data_be.e_entry;
-    if (*lowestLoadAddr) {
-        *lowestLoadAddr = _lowestLoadAddr;
-    }
-
-    return CELL_OK;
 }
 
+/// Check execution rights
+bool spursTasksetPollStatus(SPUThread & spu) {
+    u32 pollStatus;
+
+    if (cellSpursModulePollStatus(spu, &pollStatus)) {
+        return true;
+    }
+
+    spursTasksetProcessPollStatus(spu, pollStatus);
+    return false;
+}
+
+/// Exit the Taskset PM
 void spursTasksetExit(SPUThread & spu) {
-    auto mgmt = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+    auto ctxt = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
 
     // Trace - STOP
     CellSpursTracePacket pkt;
     memset(&pkt, 0, sizeof(pkt));
     pkt.header.tag = 0x54; // Its not clear what this tag means exactly but it seems similar to CELL_SPURS_TRACE_TAG_STOP
     pkt.data.stop  = SPURS_GUID_TASKSET_PM;
-    cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+    cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
 
     // Not sure why this check exists. Perhaps to check for memory corruption.
-    if (memcmp(mgmt->moduleId, "SPURSTASK MODULE", 16) != 0) {
+    if (memcmp(ctxt->moduleId, "SPURSTASK MODULE", 16) != 0) {
         spursHalt(spu);
     }
 
     cellSpursModuleExit(spu);
 }
 
-void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs) {
-    auto mgmt    = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
-    auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
+/// Invoked when a task exits
+void spursTasksetOnTaskExit(SPUThread & spu, u64 addr, u32 taskId, s32 exitCode, u64 args) {
+    auto ctxt = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
 
-    spu.GPR[2].clear();
-    spu.GPR[3]         = u128::from64(taskArgs.u64[0], taskArgs.u64[1]);
-    spu.GPR[4]._u64[1] = taskset->m.args;
-    spu.GPR[4]._u64[0] = taskset->m.spurs.addr();
-    for (auto i = 5; i < 128; i++) {
-        spu.GPR[i].clear();
-    }
+    spursDma(spu, MFC_GET_CMD, addr & 0xFFFFFF80, 0x10000/*LSA*/, (addr & 0x7F) << 11/*size*/, 0);
+    spursDmaWaitForCompletion(spu, 1);
 
-    spu.SetBranch(mgmt->savedContextLr.value()._u32[3]);
+    spu.GPR[3]._u64[1] = ctxt->taskset.addr();
+    spu.GPR[4]._u32[3] = taskId;
+    spu.GPR[5]._u32[3] = exitCode;
+    spu.GPR[6]._u64[1] = args;
+    spu.FastCall(0x10000);
 }
 
+/// Save the context of a task
+s32 spursTasketSaveTaskContext(SPUThread & spu) {
+    auto ctxt     = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+    auto taskInfo = vm::get_ptr<CellSpursTaskset::TaskInfo>(spu.ls_offset + 0x2780);
+
+    spursDmaWaitForCompletion(spu, 0xFFFFFFFF);
+
+    if (taskInfo->context_save_storage_and_alloc_ls_blocks == 0) {
+        return CELL_SPURS_TASK_ERROR_STAT;
+    }
+
+    u32 allocLsBlocks = taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F;
+    u32 lsBlocks      = 0;
+    for (auto i = 0; i < 128; i++) {
+        if (taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) {
+            lsBlocks++;
+        }
+    }
+
+    if (lsBlocks > allocLsBlocks) {
+        return CELL_SPURS_TASK_ERROR_STAT;
+    }
+
+    // Make sure the stack is area is specified in the ls pattern
+    for (auto i = (ctxt->savedContextSp.value()._u32[3]) >> 11; i < 128; i++) {
+        if ((taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) == 0) {
+            return CELL_SPURS_TASK_ERROR_STAT;
+        }
+    }
+
+    // Get the processor context
+    u128 r;
+    spu.FPSCR.Read(r);
+    ctxt->savedContextFpscr = r;
+    spu.ReadChannel(r, SPU_RdEventMask);
+    ctxt->savedSpuWriteEventMask = r._u32[3];
+    spu.ReadChannel(r, MFC_RdTagMask);
+    ctxt->savedWriteTagGroupQueryMask = r._u32[3];
+
+    // Store the processor context
+    u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull;
+    spursDma(spu, MFC_PUT_CMD, contextSaveStorage, 0x2C80/*LSA*/, 0x380/*size*/, ctxt->dmaTagId);
+
+    // Save LS context
+    for (auto i = 6; i < 128; i++) {
+        bool shouldStore = taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false;
+        if (shouldStore) {
+            // TODO: Combine DMA requests for consecutive blocks into a single request
+            spursDma(spu, MFC_PUT_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId);
+        }
+    }
+
+    spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId);
+    return CELL_OK;
+}
+
+/// Taskset dispatcher
 void spursTasksetDispatch(SPUThread & spu) {
-    auto mgmt    = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+    auto ctxt    = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
     auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
 
     u32 taskId;
@@ -1264,11 +1398,11 @@ void spursTasksetDispatch(SPUThread & spu) {
         return;
     }
 
-    mgmt->taskId = taskId;
+    ctxt->taskId = taskId;
 
     // DMA in the task info for the selected task
-    spursDma(spu, MFC_GET_CMD, mgmt->taskset.addr() + offsetof(CellSpursTaskset, m.task_info[taskId]), 0x2780/*LSA*/, sizeof(CellSpursTaskset::TaskInfo), mgmt->dmaTagId);
-    spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId);
+    spursDma(spu, MFC_GET_CMD, ctxt->taskset.addr() + offsetof(CellSpursTaskset, m.task_info[taskId]), 0x2780/*LSA*/, sizeof(CellSpursTaskset::TaskInfo), ctxt->dmaTagId);
+    spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId);
     auto taskInfo = vm::get_ptr<CellSpursTaskset::TaskInfo>(spu.ls_offset + 0x2780);
     auto elfAddr  = taskInfo->elf_addr.addr().value();
     taskInfo->elf_addr.set(taskInfo->elf_addr.addr() & 0xFFFFFFFFFFFFFFF8ull);
@@ -1283,7 +1417,7 @@ void spursTasksetDispatch(SPUThread & spu) {
 
     if (isWaiting == 0) {
         // If we reach here it means that the task is being started and not being resumed
-        mgmt->guidAddr = CELL_SPURS_TASK_TOP;
+        ctxt->guidAddr = CELL_SPURS_TASK_TOP;
 
         u32 entryPoint;
         u32 lowestLoadAddr;
@@ -1292,17 +1426,17 @@ void spursTasksetDispatch(SPUThread & spu) {
             return;
         }
 
-        spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId);
+        spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId);
 
-        mgmt->savedContextLr.value()._u32[3] = entryPoint;
-        mgmt->guidAddr        = lowestLoadAddr;
-        mgmt->tasksetMgmtAddr = 0x2700;
-        mgmt->x2FC0           = 0;
-        mgmt->taskExitCode    = isWaiting;
-        mgmt->x2FD4           = elfAddr & 5; // TODO: Figure this out
+        ctxt->savedContextLr.value()._u32[3] = entryPoint;
+        ctxt->guidAddr        = lowestLoadAddr;
+        ctxt->tasksetMgmtAddr = 0x2700;
+        ctxt->x2FC0           = 0;
+        ctxt->taskExitCode    = isWaiting;
+        ctxt->x2FD4           = elfAddr & 5; // TODO: Figure this out
 
         if ((elfAddr & 5) == 1) {
-            spursDma(spu, MFC_GET_CMD, mgmt->taskset.addr() + offsetof(CellSpursTaskset2, m.task_exit_code[taskId]), 0x2FC0/*LSA*/, 0x10/*size*/, mgmt->dmaTagId);
+            spursDma(spu, MFC_GET_CMD, ctxt->taskset.addr() + offsetof(CellSpursTaskset2, m.task_exit_code[taskId]), 0x2FC0/*LSA*/, 0x10/*size*/, ctxt->dmaTagId);
         }
 
         // Trace - GUID
@@ -1340,16 +1474,16 @@ void spursTasksetDispatch(SPUThread & spu) {
             bool shouldLoad = taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false;
             if (shouldLoad) {
                 // TODO: Combine DMA requests for consecutive blocks into a single request
-                spursDma(spu, MFC_GET_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, mgmt->dmaTagId);
+                spursDma(spu, MFC_GET_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId);
             }
         }
 
-        spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId);
+        spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId);
 
         // Restore saved registers
-        spu.FPSCR.Write(mgmt->savedContextFpscr.value());
-        spu.WriteChannel(MFC_WrTagMask, u128::from32r(mgmt->savedWriteTagGroupQueryMask));
-        spu.WriteChannel(SPU_WrEventMask, u128::from32r(mgmt->savedSpuWriteEventMask));
+        spu.FPSCR.Write(ctxt->savedContextFpscr.value());
+        spu.WriteChannel(MFC_WrTagMask, u128::from32r(ctxt->savedWriteTagGroupQueryMask));
+        spu.WriteChannel(SPU_WrEventMask, u128::from32r(ctxt->savedSpuWriteEventMask));
 
         // Trace - GUID
         memset(&pkt, 0, sizeof(pkt));
@@ -1368,125 +1502,9 @@ void spursTasksetDispatch(SPUThread & spu) {
     }
 }
 
-void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) {
-    if (pollStatus & CELL_SPURS_MODULE_POLL_STATUS_FLAG) {
-        spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_RECV_WKL_FLAG, nullptr, nullptr);
-    }
-}
-
-bool spursTasksetPollStatus(SPUThread & spu) {
-    u32 pollStatus;
-
-    if (cellSpursModulePollStatus(spu, &pollStatus)) {
-        return true;
-    }
-
-    spursTasksetProcessPollStatus(spu, pollStatus);
-    return false;
-}
-
-void spursTasksetInit(SPUThread & spu, u32 pollStatus) {
-    auto mgmt       = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
-    auto kernelMgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
-
-    kernelMgmt->moduleId[0] = 'T';
-    kernelMgmt->moduleId[1] = 'K';
-
-    // Trace - START: Module='TKST'
-    CellSpursTracePacket pkt;
-    memset(&pkt, 0, sizeof(pkt));
-    pkt.header.tag = 0x52; // Its not clear what this tag means exactly but it seems similar to CELL_SPURS_TRACE_TAG_START
-    memcpy(pkt.data.start.module, "TKST", 4);
-    pkt.data.start.level = 2;
-    pkt.data.start.ls    = 0xA00 >> 2;
-    cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
-
-    spursTasksetProcessPollStatus(spu, pollStatus);
-}
-
-void spursTasksetResumeTask(SPUThread & spu) {
-    auto mgmt = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
-
-    // Restore task context
-    spu.GPR[0] = mgmt->savedContextLr;
-    spu.GPR[1] = mgmt->savedContextSp;
-    for (auto i = 0; i < 48; i++) {
-        spu.GPR[80 + i] = mgmt->savedContextR80ToR127[i];
-    }
-
-    spu.SetBranch(spu.GPR[0]._u32[3]);
-}
-
-s32 spursTasketSaveTaskContext(SPUThread & spu) {
-    auto mgmt     = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
-    auto taskInfo = vm::get_ptr<CellSpursTaskset::TaskInfo>(spu.ls_offset + 0x2780);
-
-    spursDmaWaitForCompletion(spu, 0xFFFFFFFF);
-
-    if (taskInfo->context_save_storage_and_alloc_ls_blocks == 0) {
-        return CELL_SPURS_TASK_ERROR_STAT;
-    }
-
-    u32 allocLsBlocks = taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F;
-    u32 lsBlocks      = 0;
-    for (auto i = 0; i < 128; i++) {
-        if (taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) {
-            lsBlocks++;
-        }
-    }
-
-    if (lsBlocks > allocLsBlocks) {
-        return CELL_SPURS_TASK_ERROR_STAT;
-    }
-
-    // Make sure the stack is area is specified in the ls pattern
-    for (auto i = (mgmt->savedContextSp.value()._u32[3]) >> 11; i < 128; i++) {
-        if ((taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i)) == 0) {
-            return CELL_SPURS_TASK_ERROR_STAT;
-        }
-    }
-
-    // Get the processor context
-    u128 r;
-    spu.FPSCR.Read(r);
-    mgmt->savedContextFpscr = r;
-    spu.ReadChannel(r, SPU_RdEventMask);
-    mgmt->savedSpuWriteEventMask = r._u32[3];
-    spu.ReadChannel(r, MFC_RdTagMask);
-    mgmt->savedWriteTagGroupQueryMask = r._u32[3];
-
-    // Store the processor context
-    u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull;
-    spursDma(spu, MFC_PUT_CMD, contextSaveStorage, 0x2C80/*LSA*/, 0x380/*size*/, mgmt->dmaTagId);
-
-    // Save LS context
-    for (auto i = 6; i < 128; i++) {
-        bool shouldStore = taskInfo->ls_pattern.u64[i < 64 ? 1 : 0] & (0x8000000000000000ull >> i) ? true : false;
-        if (shouldStore) {
-            // TODO: Combine DMA requests for consecutive blocks into a single request
-            spursDma(spu, MFC_PUT_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, mgmt->dmaTagId);
-        }
-    }
-
-    spursDmaWaitForCompletion(spu, 1 << mgmt->dmaTagId);
-    return CELL_OK;
-}
-
-void spursTasksetOnTaskExit(SPUThread & spu, u64 addr, u32 taskId, s32 exitCode, u64 args) {
-    auto mgmt = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
-
-    spursDma(spu, MFC_GET_CMD, addr & 0xFFFFFF80, 0x10000/*LSA*/, (addr & 0x7F) << 11/*size*/, 0);
-    spursDmaWaitForCompletion(spu, 1);
-
-    spu.GPR[3]._u64[1] = mgmt->taskset.addr();
-    spu.GPR[4]._u32[3] = taskId;
-    spu.GPR[5]._u32[3] = exitCode;
-    spu.GPR[6]._u64[1] = args;
-    spu.FastCall(0x10000);
-}
-
+/// Process a syscall request
 s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) {
-    auto mgmt    = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+    auto ctxt    = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
     auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
 
     // If the 0x10 bit is set in syscallNum then its the 2nd version of the
@@ -1501,14 +1519,14 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) {
     u32 incident = 0;
     switch (syscallNum) {
     case CELL_SPURS_TASK_SYSCALL_EXIT:
-        if (mgmt->x2FD4 == 4 || (mgmt->x2FC0 & 0xFFFFFFFF) != 0) { // TODO: Figure this out
-            if (mgmt->x2FD4 != 4) {
+        if (ctxt->x2FD4 == 4 || (ctxt->x2FC0 & 0xFFFFFFFF) != 0) { // TODO: Figure this out
+            if (ctxt->x2FD4 != 4) {
                 spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_DESTROY_TASK, nullptr, nullptr);
             }
 
-            auto addr = mgmt->x2FD4 == 4 ? taskset->m.x78 : mgmt->x2FC0;
-            auto args = mgmt->x2FD4 == 4 ? 0 : mgmt->x2FC8;
-            spursTasksetOnTaskExit(spu, addr, mgmt->taskId, mgmt->taskExitCode, args);
+            auto addr = ctxt->x2FD4 == 4 ? taskset->m.x78 : ctxt->x2FC0;
+            auto args = ctxt->x2FD4 == 4 ? 0 : ctxt->x2FC8;
+            spursTasksetOnTaskExit(spu, addr, ctxt->taskId, ctxt->taskExitCode, args);
         }
 
         incident = CELL_SPURS_TRACE_TASK_EXIT;
@@ -1561,11 +1579,11 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) {
         memset(&pkt, 0, sizeof(pkt));
         pkt.header.tag         = CELL_SPURS_TRACE_TAG_TASK;
         pkt.data.task.incident = incident;
-        pkt.data.task.taskId   = mgmt->taskId;
-        cellSpursModulePutTrace(&pkt, mgmt->dmaTagId);
+        pkt.data.task.taskId   = ctxt->taskId;
+        cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
 
         // Clear the GUID of the task
-        memset(vm::get_ptr<void>(spu.ls_offset + mgmt->guidAddr), 0, 0x10);
+        memset(vm::get_ptr<void>(spu.ls_offset + ctxt->guidAddr), 0, 0x10);
 
         if (spursTasksetPollStatus(spu)) {
             spursTasksetExit(spu);
@@ -1577,49 +1595,62 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) {
     return rc;
 }
 
-bool spursTasksetEntry(SPUThread & spu) {
-    auto mgmt = vm::get_ptr<SpursTasksetPmMgmtData>(spu.ls_offset + 0x2700);
+/// Initialise the Taskset PM
+void spursTasksetInit(SPUThread & spu, u32 pollStatus) {
+    auto ctxt       = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+    auto kernelCtxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
 
-    if (spu.PC == CELL_SPURS_TASKSET_PM_ENTRY_ADDR) {
-        // Called from kernel
-        auto kernelMgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + spu.GPR[3]._u32[3]);
-        auto arg        = spu.GPR[4]._u64[1];
-        auto pollStatus = spu.GPR[5]._u32[3];
+    kernelCtxt->moduleId[0] = 'T';
+    kernelCtxt->moduleId[1] = 'K';
 
-        // Initialise memory and save args
-        memset(mgmt, 0, sizeof(*mgmt));
-        mgmt->taskset.set(arg);
-        memcpy(mgmt->moduleId, "SPURSTASK MODULE", 16);
-        mgmt->kernelMgmtAddr = spu.GPR[3]._u32[3];
-        mgmt->syscallAddr    = CELL_SPURS_TASKSET_PM_SYSCALL_ADDR;
-        mgmt->spuNum         = kernelMgmt->spuNum;
-        mgmt->dmaTagId       = kernelMgmt->dmaTagId;
-        mgmt->taskId         = 0xFFFFFFFF;
+    // Trace - START: Module='TKST'
+    CellSpursTracePacket pkt;
+    memset(&pkt, 0, sizeof(pkt));
+    pkt.header.tag = 0x52; // Its not clear what this tag means exactly but it seems similar to CELL_SPURS_TRACE_TAG_START
+    memcpy(pkt.data.start.module, "TKST", 4);
+    pkt.data.start.level = 2;
+    pkt.data.start.ls    = 0xA00 >> 2;
+    cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
 
-        // Register SPURS takset policy module HLE functions
-        spu.UnregisterHleFunctions(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, 0x40000); // TODO: use a symbolic constant
-        spu.RegisterHleFunction(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, spursTasksetEntry);
-        spu.RegisterHleFunction(mgmt->syscallAddr, spursTasksetEntry);
+    spursTasksetProcessPollStatus(spu, pollStatus);
+}
 
-        spursTasksetInit(spu, pollStatus);
-        spursTasksetDispatch(spu);
-    } else if (spu.PC == CELL_SPURS_TASKSET_PM_SYSCALL_ADDR) {
-        // Save task context
-        mgmt->savedContextLr = spu.GPR[0];
-        mgmt->savedContextSp = spu.GPR[1];
-        for (auto i = 0; i < 48; i++) {
-            mgmt->savedContextR80ToR127[i] = spu.GPR[80 + i];
-        }
-
-        spu.GPR[3]._u32[3] = spursTasksetProcessSyscall(spu, spu.GPR[3]._u32[3], spu.GPR[4]._u32[3]);
-
-        // Resume the previously executing task if the syscall did not cause a context switch
-        if (spu.m_is_branch == false) {
-            spursTasksetResumeTask(spu);
-        }
-    } else {
-        assert(0);
+/// Load an ELF
+s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments) {
+    if (elfAddr == 0 || (elfAddr & 0x0F) != 0) {
+        return CELL_SPURS_TASK_ERROR_INVAL;
     }
 
-    return false;
+    vfsStreamMemory         stream(elfAddr);
+    loader::handlers::elf32 loader;
+    auto rc = loader.init(stream);
+    if (rc != loader::handler::ok) {
+        return CELL_SPURS_TASK_ERROR_NOEXEC;
+    }
+
+    u32 _lowestLoadAddr = CELL_SPURS_TASK_BOTTOM;
+    for (auto & phdr : loader.m_phdrs) {
+        if (phdr.data_be.p_paddr >= CELL_SPURS_TASK_BOTTOM) {
+            break;
+        }
+
+        if (phdr.data_be.p_type == 1/*PT_LOAD*/) {
+            if (skipWriteableSegments == false || (phdr.data_be.p_flags & 2/*PF_W*/) == 0) {
+                if (phdr.data_be.p_vaddr < CELL_SPURS_TASK_TOP ||
+                    phdr.data_be.p_vaddr + phdr.data_be.p_memsz > CELL_SPURS_TASK_BOTTOM) {
+                    return CELL_SPURS_TASK_ERROR_FAULT;
+                }
+
+                _lowestLoadAddr = _lowestLoadAddr > phdr.data_be.p_vaddr ? phdr.data_be.p_vaddr : _lowestLoadAddr;
+            }
+        }
+    }
+
+    loader.load_data(spu.ls_offset, skipWriteableSegments);
+    *entryPoint = loader.m_ehdr.data_be.e_entry;
+    if (*lowestLoadAddr) {
+        *lowestLoadAddr = _lowestLoadAddr;
+    }
+
+    return CELL_OK;
 }