Use RTM instructions (skylake+)

This commit is contained in:
Nekotekina 2017-07-18 20:03:47 +03:00
parent 0fa148e65e
commit b24eb621ae
8 changed files with 190 additions and 21 deletions

View File

@ -46,5 +46,16 @@ std::string utils::get_system_info()
#endif
fmt::append(result, "%s | %d Threads | %.2f GiB RAM", brand, num_proc, mem_total / (1024.0f * 1024 * 1024));
if (has_avx())
{
result += " | AVX";
}
if (has_rtm())
{
result += " | TSX";
}
return result;
}

View File

@ -28,7 +28,26 @@ namespace utils
inline bool has_rtm()
{
return get_cpuid(0, 0)[0] >= 0x7 && get_cpuid(7, 0)[1] & 0x800;
// Check RTM and MPX extensions in order to filter out TSX on Haswell CPUs
return get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(7, 0)[1] & 0x4800) == 0x4800;
}
inline bool transaction_enter()
{
while (true)
{
const auto status = _xbegin();
if (status == _XBEGIN_STARTED)
{
return true;
}
if (!(status & _XABORT_RETRY))
{
return false;
}
}
}
std::string get_system_info();

View File

@ -95,7 +95,7 @@ if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--allow-multiple-definition")
endif()
add_compile_options(-msse -msse2 -mcx16 -mssse3)
add_compile_options(-msse -msse2 -mcx16 -mssse3 -mrtm)
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
# This fixes 'some' of the st11range issues. See issue #2516

View File

@ -1,9 +1,12 @@
#include "stdafx.h"
#include "Utilities/sysinfo.h"
#include "Emu/Memory/vm.h"
#include "Emu/Cell/SPUThread.h"
#include "Emu/Cell/lv2/sys_sync.h"
#include "MFC.h"
const bool s_use_rtm = utils::has_rtm();
template <>
void fmt_class_string<MFC>::format(std::string& out, u64 arg)
{
@ -145,10 +148,25 @@ void mfc_thread::cpu_task()
vm::reservation_acquire(cmd.eal, 128);
// Store unconditionally
vm::writer_lock lock(0);
data = to_write;
vm::reservation_update(cmd.eal, 128);
vm::notify(cmd.eal, 128);
if (s_use_rtm && utils::transaction_enter())
{
if (!vm::reader_lock{vm::try_to_lock})
{
_xabort(0);
}
data = to_write;
vm::reservation_update(cmd.eal, 128);
vm::notify(cmd.eal, 128);
_xend();
}
else
{
vm::writer_lock lock(0);
data = to_write;
vm::reservation_update(cmd.eal, 128);
vm::notify(cmd.eal, 128);
}
}
else if (cmd.cmd & MFC_LIST_MASK)
{

View File

@ -1,5 +1,6 @@
#include "stdafx.h"
#include "Utilities/VirtualMemory.h"
#include "Utilities/sysinfo.h"
#include "Crypto/sha1.h"
#include "Emu/Memory/Memory.h"
#include "Emu/System.h"
@ -53,6 +54,8 @@
#include <cfenv>
#include "Utilities/GSL.h"
const bool s_use_rtm = utils::has_rtm();
extern u64 get_system_time();
namespace vm { using namespace ps3; }
@ -825,6 +828,26 @@ extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value)
return false;
}
if (s_use_rtm && utils::transaction_enter())
{
if (!vm::reader_lock{vm::try_to_lock})
{
_xabort(0);
}
const bool result = ppu.rtime == vm::reservation_acquire(addr, sizeof(u32)) && data.compare_and_swap_test(static_cast<u32>(ppu.rdata), reg_value);
if (result)
{
vm::reservation_update(addr, sizeof(u32));
vm::notify(addr, sizeof(u32));
}
_xend();
ppu.raddr = 0;
return result;
}
vm::writer_lock lock(0);
const bool result = ppu.rtime == vm::reservation_acquire(addr, sizeof(u32)) && data.compare_and_swap_test(static_cast<u32>(ppu.rdata), reg_value);
@ -849,6 +872,26 @@ extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value)
return false;
}
if (s_use_rtm && utils::transaction_enter())
{
if (!vm::reader_lock{vm::try_to_lock})
{
_xabort(0);
}
const bool result = ppu.rtime == vm::reservation_acquire(addr, sizeof(u64)) && data.compare_and_swap_test(ppu.rdata, reg_value);
if (result)
{
vm::reservation_update(addr, sizeof(u64));
vm::notify(addr, sizeof(u64));
}
_xend();
ppu.raddr = 0;
return result;
}
vm::writer_lock lock(0);
const bool result = ppu.rtime == vm::reservation_acquire(addr, sizeof(u64)) && data.compare_and_swap_test(ppu.rdata, reg_value);

View File

@ -1,5 +1,6 @@
#include "stdafx.h"
#include "Utilities/lockless.h"
#include "Utilities/sysinfo.h"
#include "Emu/Memory/Memory.h"
#include "Emu/System.h"
@ -22,6 +23,8 @@
#include <atomic>
#include <thread>
const bool s_use_rtm = utils::has_rtm();
#ifdef _MSC_VER
bool operator ==(const u128& lhs, const u128& rhs)
{
@ -604,9 +607,22 @@ void SPUThread::process_mfc_cmd()
thread_ctrl::wait_for(100);
}
}
else if (s_use_rtm && utils::transaction_enter())
{
if (!vm::reader_lock{vm::try_to_lock})
{
_xabort(0);
}
rtime = vm::reservation_acquire(raddr, 128);
rdata = data;
_xend();
_ref<decltype(rdata)>(ch_mfc_cmd.lsa & 0x3ffff) = rdata;
return ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS);
}
else
{
// Fast path
rdata = data;
_mm_lfence();
}
@ -637,15 +653,36 @@ void SPUThread::process_mfc_cmd()
if (raddr == ch_mfc_cmd.eal && rtime == vm::reservation_acquire(raddr, 128) && rdata == data)
{
// TODO: vm::check_addr
vm::writer_lock lock;
if (rtime == vm::reservation_acquire(raddr, 128) && rdata == data)
if (s_use_rtm && utils::transaction_enter())
{
data = to_write;
result = true;
if (!vm::reader_lock{vm::try_to_lock})
{
_xabort(0);
}
vm::reservation_update(raddr, 128);
vm::notify(raddr, 128);
if (rtime == vm::reservation_acquire(raddr, 128) && rdata == data)
{
data = to_write;
result = true;
vm::reservation_update(raddr, 128);
vm::notify(raddr, 128);
}
_xend();
}
else
{
vm::writer_lock lock;
if (rtime == vm::reservation_acquire(raddr, 128) && rdata == data)
{
data = to_write;
result = true;
vm::reservation_update(raddr, 128);
vm::notify(raddr, 128);
}
}
}
@ -681,6 +718,23 @@ void SPUThread::process_mfc_cmd()
// Store unconditionally
// TODO: vm::check_addr
if (s_use_rtm && utils::transaction_enter())
{
if (!vm::reader_lock{vm::try_to_lock})
{
_xabort(0);
}
data = to_write;
vm::reservation_update(ch_mfc_cmd.eal, 128);
vm::notify(ch_mfc_cmd.eal, 128);
_xend();
ch_atomic_stat.set_value(MFC_PUTLLUC_SUCCESS);
return;
}
vm::writer_lock lock(0);
data = to_write;
vm::reservation_update(ch_mfc_cmd.eal, 128);

View File

@ -27,9 +27,9 @@
namespace vm
{
static u8* memory_reserve_4GiB(std::uintptr_t addr = 0)
static u8* memory_reserve_4GiB(std::uintptr_t _addr = 0)
{
for (u64 addr = 0x100000000;; addr += 0x100000000)
for (u64 addr = _addr + 0x100000000;; addr += 0x100000000)
{
if (auto ptr = utils::memory_reserve(0x100000000, (void*)addr))
{

View File

@ -1,10 +1,13 @@
#include "stdafx.h"
#include "Utilities/sysinfo.h"
#include "Emu/Memory/Memory.h"
#include "Emu/System.h"
#include "ARMv7Thread.h"
#include "ARMv7Interpreter.h"
const bool s_use_rtm = utils::has_rtm();
using namespace arm_code::arm_encoding_alias;
#define ARG(arg, ...) const u32 arg = args::arg::extract(__VA_ARGS__);
@ -2091,13 +2094,34 @@ void arm_interpreter::STREX(ARMv7Thread& cpu, const u32 op, const u32 cond)
return;
}
vm::writer_lock lock(0);
bool result;
const bool result = cpu.rtime == vm::reservation_acquire(addr, cpu.rtime) && data.compare_and_swap_test(cpu.rdata, value);
if (result)
if (s_use_rtm && utils::transaction_enter())
{
vm::reservation_update(addr, sizeof(u32));
if (!vm::reader_lock{vm::try_to_lock})
{
_xabort(0);
}
result = cpu.rtime == vm::reservation_acquire(addr, sizeof(u32)) && data.compare_and_swap_test(cpu.rdata, value);
if (result)
{
vm::reservation_update(addr, sizeof(u32));
}
_xend();
}
else
{
vm::writer_lock lock(0);
result = cpu.rtime == vm::reservation_acquire(addr, sizeof(u32)) && data.compare_and_swap_test(cpu.rdata, value);
if (result)
{
vm::reservation_update(addr, sizeof(u32));
}
}
cpu.raddr = 0;