diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp
index fb6e83aece..73114841f6 100644
--- a/rpcs3/Emu/Cell/SPURecompiler.cpp
+++ b/rpcs3/Emu/Cell/SPURecompiler.cpp
@@ -3869,6 +3869,50 @@ public:
 
 	void SHUFB(spu_opcode_t op)
 	{
+		if (auto ii = llvm::dyn_cast_or_null<llvm::InsertElementInst>(m_block->reg[op.rc]))
+		{
+			// Detect if the mask comes from a CWD-like constant generation instruction
+			auto c0 = llvm::dyn_cast<llvm::Constant>(ii->getOperand(0));
+
+			if (c0 && get_const_vector(c0, m_pos, op.rc) != v128::from64(0x18191a1b1c1d1e1f, 0x1011121314151617))
+			{
+				c0 = nullptr;
+			}
+
+			auto c1 = llvm::dyn_cast<llvm::ConstantInt>(ii->getOperand(1));
+
+			llvm::Type* vtype = nullptr;
+			llvm::Value* _new = nullptr;
+
+			// Optimization: emit SHUFB as simple vector insert
+			if (c0 && c1 && c1->getType() == get_type<u64>() && c1->getZExtValue() == 0x01020304050607)
+			{
+				vtype = get_type<u64[2]>();
+				_new  = extract(get_vr<u64[2]>(op.ra), 1).value;
+			}
+			else if (c0 && c1 && c1->getType() == get_type<u32>() && c1->getZExtValue() == 0x010203)
+			{
+				vtype = get_type<u32[4]>();
+				_new  = extract(get_vr<u32[4]>(op.ra), 3).value;
+			}
+			else if (c0 && c1 && c1->getType() == get_type<u16>() && c1->getZExtValue() == 0x0203)
+			{
+				vtype = get_type<u16[8]>();
+				_new  = extract(get_vr<u16[8]>(op.ra), 6).value;
+			}
+			else if (c0 && c1 && c1->getType() == get_type<u8>() && c1->getZExtValue() == 0x03)
+			{
+				vtype = get_type<u8[16]>();
+				_new  = extract(get_vr<u8[16]>(op.ra), 12).value;
+			}
+
+			if (vtype && _new)
+			{
+				set_vr(op.rt4, m_ir->CreateInsertElement(get_vr(op.rb, vtype), _new, ii->getOperand(2)));
+				return;
+			}
+		}
+
 		const auto c = get_vr<u8[16]>(op.rc);
 
 		if (auto ci = llvm::dyn_cast<llvm::Constant>(c.value))