mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-07 03:40:07 +00:00
SPU: Copy with memcpy() instead of hand-rolled SSE2
In some very unscientific benchmark: spu_thread::do_dma_transfer() was taking 2.27% of my CPU before, now 0.07%, while __memmove_avx_unaligned_erms() was taking 1.47% and now 2.88%, which added makes about 0.8% saved.
This commit is contained in:
parent
5261886449
commit
425e032a62
@ -1433,6 +1433,9 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
||||
|
||||
auto lock = vm::passive_lock(eal & -128, ::align(eal + size, 128));
|
||||
|
||||
#ifdef __GNUG__
|
||||
std::memcpy(dst, src, size);
|
||||
#else
|
||||
while (size >= 128)
|
||||
{
|
||||
mov_rdata(*reinterpret_cast<decltype(spu_thread::rdata)*>(dst), *reinterpret_cast<const decltype(spu_thread::rdata)*>(src));
|
||||
@ -1450,6 +1453,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
||||
src += 16;
|
||||
size -= 16;
|
||||
}
|
||||
#endif
|
||||
|
||||
lock->release(0);
|
||||
break;
|
||||
@ -1483,6 +1487,9 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
||||
}
|
||||
default:
|
||||
{
|
||||
#ifdef __GNUG__
|
||||
std::memcpy(dst, src, size);
|
||||
#else
|
||||
while (size >= 128)
|
||||
{
|
||||
mov_rdata(*reinterpret_cast<decltype(spu_thread::rdata)*>(dst), *reinterpret_cast<const decltype(spu_thread::rdata)*>(src));
|
||||
@ -1500,6 +1507,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
||||
src += 16;
|
||||
size -= 16;
|
||||
}
|
||||
#endif
|
||||
|
||||
break;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user