diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index 61619466a3..2796fa72b4 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -7,6 +7,7 @@ #include "Emu/Cell/RawSPUThread.h" #include "Emu/SysCalls/SysCalls.h" #include "Thread.h" +#include "range.h" #ifdef _WIN32 #include <windows.h> @@ -902,7 +903,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) { LOG_ERROR(MEMORY, "Invalid or unsupported instruction (op=%d, reg=%d, d_size=%lld, a_size=0x%llx, i_size=%lld)", op, reg, d_size, a_size, i_size); report_opcode(); - return false; + return true; } switch (op) @@ -914,7 +915,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) if (reg - X64R_XMM0 >= 16) { LOG_ERROR(MEMORY, "X64OP_STORE: d_size=16, reg=%d", reg); - return false; + return true; } std::memcpy(vm::base_priv(addr), XMMREG(context, reg - X64R_XMM0), 16); @@ -924,7 +925,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) u64 reg_value; if (!get_x64_reg_value(context, reg, d_size, i_size, reg_value)) { - return false; + return true; } std::memcpy(vm::base_priv(addr), ®_value, d_size); @@ -935,13 +936,13 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) if (d_size > 8) { LOG_ERROR(MEMORY, "X64OP_MOVS: d_size=%lld", d_size); - return false; + return true; } if (vm::base(addr) != (void*)RDI(context)) { LOG_ERROR(MEMORY, "X64OP_MOVS: rdi=0x%llx, rsi=0x%llx, addr=0x%x", (u64)RDI(context), (u64)RSI(context), addr); - return false; + return true; } u32 a_addr = addr; @@ -958,7 +959,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) if (EFLAGS(context) & 0x400 /* direction flag */) { LOG_ERROR(MEMORY, "X64OP_MOVS TODO: reversed direction"); - return false; + return true; //RSI(context) -= d_size; //RDI(context) -= d_size; //a_addr -= (u32)d_size; @@ -990,19 +991,19 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) if (d_size > 8) { LOG_ERROR(MEMORY, "X64OP_STOS: d_size=%lld", d_size); - return false; + return true; } if (vm::base(addr) != (void*)RDI(context)) { LOG_ERROR(MEMORY, "X64OP_STOS: rdi=0x%llx, addr=0x%x", (u64)RDI(context), addr); - return false; + return true; } u64 value; if (!get_x64_reg_value(context, X64R_RAX, d_size, i_size, value)) { - return false; + return true; } u32 a_addr = addr; @@ -1016,7 +1017,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) if (EFLAGS(context) & 0x400 /* direction flag */) { LOG_ERROR(MEMORY, "X64OP_STOS TODO: reversed direction"); - return false; + return true; //RDI(context) -= d_size; //a_addr -= (u32)d_size; } @@ -1046,7 +1047,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) u64 reg_value; if (!get_x64_reg_value(context, reg, d_size, i_size, reg_value)) { - return false; + return true; } switch (d_size) @@ -1055,12 +1056,12 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) case 2: reg_value = sync_lock_test_and_set((u16*)vm::base_priv(addr), (u16)reg_value); break; case 4: reg_value = sync_lock_test_and_set((u32*)vm::base_priv(addr), (u32)reg_value); break; case 8: reg_value = sync_lock_test_and_set((u64*)vm::base_priv(addr), (u64)reg_value); break; - default: return false; + default: return true; } if (!put_x64_reg_value(context, reg, d_size, reg_value)) { - return false; + return true; } break; } @@ -1069,7 +1070,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) u64 reg_value, old_value, cmp_value; if (!get_x64_reg_value(context, reg, d_size, i_size, reg_value) || !get_x64_reg_value(context, X64R_RAX, d_size, i_size, cmp_value)) { - return false; + return true; } switch (d_size) @@ -1078,7 +1079,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) case 2: old_value = sync_val_compare_and_swap((u16*)vm::base_priv(addr), (u16)cmp_value, (u16)reg_value); break; case 4: old_value = sync_val_compare_and_swap((u32*)vm::base_priv(addr), (u32)cmp_value, (u32)reg_value); break; case 8: old_value = sync_val_compare_and_swap((u64*)vm::base_priv(addr), (u64)cmp_value, (u64)reg_value); break; - default: return false; + default: return true; } if (!put_x64_reg_value(context, X64R_RAX, d_size, old_value) || !set_x64_cmp_flags(context, d_size, cmp_value, old_value)) @@ -1092,7 +1093,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) u64 value; if (!get_x64_reg_value(context, reg, d_size, i_size, value)) { - return false; + return true; } switch (d_size) @@ -1101,12 +1102,12 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) case 2: value &= sync_fetch_and_and((u16*)vm::base_priv(addr), (u16)value); break; case 4: value &= sync_fetch_and_and((u32*)vm::base_priv(addr), (u32)value); break; case 8: value &= sync_fetch_and_and((u64*)vm::base_priv(addr), (u64)value); break; - default: return false; + default: return true; } if (!set_x64_cmp_flags(context, d_size, value, 0)) { - return false; + return true; } break; } @@ -1114,7 +1115,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) { LOG_ERROR(MEMORY, "Invalid or unsupported operation (op=%d, reg=%d, d_size=%lld, a_size=0x%llx, i_size=%lld)", op, reg, d_size, a_size, i_size); report_opcode(); - return false; + return true; } } @@ -1148,17 +1149,30 @@ void prepare_throw_access_violation(x64_context* context, const char* cause, u32 static LONG exception_handler(PEXCEPTION_POINTERS pExp) { - const u64 addr64 = pExp->ExceptionRecord->ExceptionInformation[1] - (u64)vm::base(0); - const bool is_writing = pExp->ExceptionRecord->ExceptionInformation[0] != 0; + if (pExp->ExceptionRecord->ExceptionCode == EXCEPTION_ACCESS_VIOLATION) + { + const range<u64> vm_range = range<u64>((u64)vm::base(0)).size(1ull << 32); + const u64 address = pExp->ExceptionRecord->ExceptionInformation[1]; - if (pExp->ExceptionRecord->ExceptionCode == EXCEPTION_ACCESS_VIOLATION && addr64 < 0x100000000ull && thread_ctrl::get_current() && handle_access_violation((u32)addr64, is_writing, pExp->ContextRecord)) + if (vm_range.contains(address)) + { + u32 vaddress = u32(address - vm_range.begin()); + + const bool is_writing = pExp->ExceptionRecord->ExceptionInformation[0] != 0; + + if (handle_access_violation(vaddress, is_writing, pExp->ContextRecord)) + { + return EXCEPTION_CONTINUE_EXECUTION; + } + } + } + + if (pExp->ExceptionRecord->ExceptionCode == EXCEPTION_NONCONTINUABLE_EXCEPTION) { return EXCEPTION_CONTINUE_EXECUTION; } - else - { - return EXCEPTION_CONTINUE_SEARCH; - } + + return EXCEPTION_CONTINUE_SEARCH; } static LONG exception_filter(PEXCEPTION_POINTERS pExp) @@ -1245,18 +1259,20 @@ static void signal_handler(int sig, siginfo_t* info, void* uct) const bool is_writing = context->uc_mcontext.gregs[REG_ERR] & 0x2; #endif - const u64 addr64 = (u64)info->si_addr - (u64)vm::base(0); + const range<u64> vm_range = range<u64>((u64)vm::base(0)).size(1ull << 32); + const u64 address = (u64)info->si_addr; + const auto cause = is_writing ? "writing" : "reading"; - // TODO: Exception specific informative messages - - if (addr64 < 0x100000000ull && thread_ctrl::get_current()) + if (vm_range.contains(address)) { + u32 vaddress = u32(address - vm_range.begin()); + // Try to process access violation - if (!handle_access_violation((u32)addr64, is_writing, context)) + if (!handle_access_violation(vaddress, is_writing, context)) { // Setup throw_access_violation() call on the context - prepare_throw_access_violation(context, cause, (u32)addr64); + prepare_throw_access_violation(context, cause, vaddress); } } else diff --git a/Utilities/range.h b/Utilities/range.h new file mode 100644 index 0000000000..3759c82333 --- /dev/null +++ b/Utilities/range.h @@ -0,0 +1,158 @@ +#pragma once +#include <cstddef> + +template<typename Type> +class range +{ + Type m_begin; + Type m_end; + +public: + using type = Type; + + constexpr range(Type begin, Type end) + : m_begin(begin), m_end(end) + { + } + + constexpr range(Type point) + : m_begin(point), m_end(point + 1) + { + } + + + constexpr range() + : m_begin{} + , m_end{} + { + } + + range& set(Type begin, Type end) + { + m_begin = begin; + m_end = end; + + return *this; + } + + range& set(range& other) + { + return set(other.begin(), other.end()); + } + + range& begin(Type value) + { + m_begin = value; + return *this; + } + + range& end(Type value) + { + m_end = value; + return *this; + } + + range& size(Type value) + { + m_end = m_begin + value; + return *this; + } + void extend(const range& other) + { + m_begin = std::min(m_begin, other.m_begin); + m_end = std::min(m_end, other.m_end); + } + + constexpr bool valid() const + { + return m_begin <= m_end; + } + + constexpr Type begin() const + { + return m_begin; + } + + constexpr Type end() const + { + return m_end; + } + + constexpr Type size() const + { + return m_end - m_begin; + } + + constexpr bool contains(Type point) const + { + return point >= m_begin && point < m_end; + } + + constexpr bool overlaps(const range& rhs) const + { + return m_begin < rhs.m_end && m_end > rhs.m_begin; + } + + constexpr bool operator == (const range& rhs) const + { + return m_begin == rhs.m_begin && m_end == rhs.m_end; + } + + constexpr bool operator != (const range& rhs) const + { + return m_begin != rhs.m_begin || m_end != rhs.m_end; + } + + constexpr range operator / (Type rhs) const + { + return{ m_begin / rhs, m_end / rhs }; + } + + constexpr range operator * (Type rhs) const + { + return{ m_begin * rhs, m_end * rhs }; + } + + constexpr range operator + (Type rhs) const + { + return{ m_begin + rhs, m_end + rhs }; + } + + constexpr range operator - (Type rhs) const + { + return{ m_begin - rhs, m_end - rhs }; + } + + range& operator /= (Type rhs) + { + m_begin /= rhs; + m_end /= rhs; + return *this; + } + + range& operator *= (Type rhs) + { + m_begin *= rhs; + m_end *= rhs; + return *this; + } + + range& operator += (Type rhs) + { + m_begin += rhs; + m_end += rhs; + return *this; + } + + range& operator -= (Type rhs) + { + m_begin -= rhs; + m_end -= rhs; + return *this; + } + + constexpr range operator &(const range& rhs) const + { + return{ std::max(m_begin, rhs.m_begin), std::min(m_end, rhs.m_end) }; + } +}; diff --git a/Utilities/types.h b/Utilities/types.h index 1e7de850e5..8f468ffffb 100644 --- a/Utilities/types.h +++ b/Utilities/types.h @@ -24,11 +24,11 @@ using s32 = std::int32_t; using s64 = std::int64_t; #define DECLARE_ENUM_CLASS_BITWISE_OPERATORS(type) \ - inline type operator |(type lhs, type rhs) \ + inline constexpr type operator |(type lhs, type rhs) \ { \ return type(std::underlying_type_t<type>(lhs) | std::underlying_type_t<type>(rhs)); \ } \ - inline type operator &(type lhs, type rhs) \ + inline constexpr type operator &(type lhs, type rhs) \ { \ return type(std::underlying_type_t<type>(lhs) & std::underlying_type_t<type>(rhs)); \ } \ @@ -40,7 +40,7 @@ using s64 = std::int64_t; { \ return lhs = lhs & rhs; \ } \ - inline type operator ~(type lhs) \ + inline constexpr type operator ~(type lhs) \ { \ return type(~std::underlying_type_t<type>(lhs)); \ } \ diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 78ad6d1099..7acc6fd7be 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -561,8 +561,7 @@ void GLGSRender::end() m_program->uniforms.texture(location, index + rsx::limits::textures_count, texture); } } - - if (draw_command == rsx::draw_command::array) + else if (draw_command == rsx::draw_command::array) { for (const auto &first_count : first_count_commands) { @@ -1232,7 +1231,7 @@ bool nv3089_image_in(u32 arg, GLGSRender* renderer) tmp.create(src_info.target); __glcheck scale_texture(tmp, src_info.format.internal_format, { (int)convert_w, (int)convert_h }, - src_texture.view(), { (int)src_x + int(in_x), (int)src_y + int(in_y) }, { int(src_x + in_w), int(src_y + in_h) }); + src_texture.view(), { (int)src_x + int(in_x) * 0, (int)src_y + int(in_y)*0 }, { int(src_x + in_w), int(src_y + in_h) }); src_id = tmp.id(); diff --git a/rpcs3/Emu/RSX/GL/gl_helpers.cpp b/rpcs3/Emu/RSX/GL/gl_helpers.cpp index 1692543da8..e6bf8c0c71 100644 --- a/rpcs3/Emu/RSX/GL/gl_helpers.cpp +++ b/rpcs3/Emu/RSX/GL/gl_helpers.cpp @@ -2,7 +2,11 @@ #include "gl_helpers.h" #ifdef _WIN32 +#pragma warning(push) +#pragma warning(disable: 4091) #include <DbgHelp.h> +#pragma warning(pop) + #pragma comment(lib, "Dbghelp.lib") #endif diff --git a/rpcs3/Emu/RSX/GL/gl_texture_cache.cpp b/rpcs3/Emu/RSX/GL/gl_texture_cache.cpp index 04c06f1cf8..f70a2d700a 100644 --- a/rpcs3/Emu/RSX/GL/gl_texture_cache.cpp +++ b/rpcs3/Emu/RSX/GL/gl_texture_cache.cpp @@ -25,9 +25,8 @@ namespace gl }; std::vector<capability_texture> found_textures; - u32 texture_size = info.size(); - m_parent_region->for_each(info.start_address, texture_size, [&](cached_texture& texture) + m_parent_region->for_each(info.range(), [&](cached_texture& texture) { if ((texture.m_state & cache_entry_state::local_synchronized) == cache_entry_state::invalid) { @@ -124,7 +123,7 @@ namespace gl { //read from host //flush all local textures at region - m_parent_region->for_each(info.start_address, texture_size, [](cached_texture& texture) + m_parent_region->for_each(info.range(), [](cached_texture& texture) { texture.sync(gl::cache_buffers::host); //texture.invalidate(gl::cache_buffers::local); @@ -387,7 +386,7 @@ namespace gl if ((buffers & cache_buffers::host) != cache_buffers::none) { m_state &= ~cache_entry_state::host_synchronized; - m_parent_region->for_each(info.start_address, info.size(), [this](cached_texture& texture) + m_parent_region->for_each(info.range(), [this](cached_texture& texture) { if (std::addressof(texture) != this) { @@ -522,21 +521,14 @@ namespace gl } } - void protected_region::for_each(u32 start_address, u32 size, std::function<void(cached_texture& texture)> callback) + void protected_region::for_each(range<u32> range, std::function<void(cached_texture& texture)> callback) { for (auto &entry : m_textures) { - if (entry.first.start_address >= start_address + size) + if (range.overlaps({ entry.first.start_address, entry.first.start_address + entry.first.size() })) { - continue; + callback(entry.second); } - - if (entry.first.start_address + entry.first.size() <= start_address) - { - continue; - } - - callback(entry.second); } } @@ -558,7 +550,7 @@ namespace gl if (m_current_protection != flags) { //LOG_WARNING(RSX, "protected region [0x%x, 0x%x)", start_address, start_address + size()); - vm::page_protect(start_address, size(), 0, m_current_protection & ~flags, flags); + vm::page_protect(begin(), size(), 0, m_current_protection & ~flags, flags); m_current_protection = flags; } } @@ -584,7 +576,7 @@ namespace gl } //LOG_WARNING(RSX, "unprotected region [0x%x, 0x%x)", start_address, start_address + size()); - vm::page_protect(start_address, size(), 0, flags, 0); + vm::page_protect(begin(), size(), 0, flags, 0); m_current_protection &= ~flags; } @@ -612,17 +604,7 @@ namespace gl } } - if (region.start_address < start_address) - { - pages_count += (start_address - region.start_address) / vm::page_size; - start_address = region.start_address; - } - else - { - //[start_address, region.start_address + region.pages_count * vm::page_size) - - pages_count = (region.start_address + region.pages_count * vm::page_size - start_address) / vm::page_size; - } + extend(region); } cached_texture& protected_region::add(const texture_info& info) @@ -669,40 +651,39 @@ namespace gl cached_texture &texture_cache::entry(const texture_info &info, cache_buffers sync) { - u32 aligned_address; - u32 aligned_size; + range<u32> aligned_range; const bool accurate_cache = false; if (accurate_cache) { - aligned_address = info.start_address & ~(vm::page_size - 1); - aligned_size = align(info.start_address - aligned_address + info.size(), vm::page_size); + aligned_range.begin(info.start_address & ~(vm::page_size - 1)); + aligned_range.size(align(info.start_address - aligned_range.begin() + info.size(), vm::page_size)); } else { - aligned_size = info.size() & ~(vm::page_size - 1); + u32 aligned_size = info.size() & ~(vm::page_size - 1); if (!aligned_size) { - aligned_address = info.start_address & ~(vm::page_size - 1); - aligned_size = align(info.size() + info.start_address - aligned_address, vm::page_size); + aligned_range.begin(info.start_address & ~(vm::page_size - 1)); + aligned_range.size(align(info.size() + info.start_address - aligned_range.begin(), vm::page_size)); } else { - aligned_address = align(info.start_address, vm::page_size); + aligned_range.begin(align(info.start_address, vm::page_size)); + aligned_range.size(aligned_size); } } - std::vector<std::list<protected_region>::iterator> regions = find_regions(aligned_address, aligned_size); + std::vector<std::list<protected_region>::iterator> regions = find_regions(aligned_range); protected_region *region; if (regions.empty()) { m_protected_regions.emplace_back(); region = &m_protected_regions.back(); - region->pages_count = aligned_size / vm::page_size; - region->start_address = aligned_address; + region->set(aligned_range); } else { @@ -714,14 +695,14 @@ namespace gl m_protected_regions.erase(regions[index]); } - if (region->start_address > aligned_address) + if (region->begin() > aligned_range.begin()) { - region->pages_count += (region->start_address - aligned_address) / vm::page_size; - region->start_address = aligned_address; + region->end(region->end() + (region->begin() - aligned_range.begin())); + region->begin(aligned_range.begin()); } - u32 new_pages_count = (aligned_address + aligned_size - region->start_address) / vm::page_size; - region->pages_count = std::max(region->pages_count, new_pages_count); + u32 new_size = aligned_range.end() - region->begin(); + region->size(std::max(region->size(), new_size)); } cached_texture *result = region->find(info); @@ -740,12 +721,7 @@ namespace gl { for (auto& entry : m_protected_regions) { - if (entry.start_address > address) - { - continue; - } - - if (address >= entry.start_address && address < entry.start_address + entry.size()) + if (entry.contains(address)) { return &entry; } @@ -754,23 +730,16 @@ namespace gl return nullptr; } - std::vector<std::list<protected_region>::iterator> texture_cache::find_regions(u32 address, u32 size) + std::vector<std::list<protected_region>::iterator> texture_cache::find_regions(range<u32> range) { std::vector<std::list<protected_region>::iterator> result; for (auto it = m_protected_regions.begin(); it != m_protected_regions.end(); ++it) { - if (it->start_address >= address + size) + if (it->overlaps(range)) { - continue; + result.push_back(it); } - - if (it->start_address + it->size() <= address) - { - continue; - } - - result.push_back(it); } return result; diff --git a/rpcs3/Emu/RSX/GL/gl_texture_cache.h b/rpcs3/Emu/RSX/GL/gl_texture_cache.h index 59c909c3f1..b6138d9d1b 100644 --- a/rpcs3/Emu/RSX/GL/gl_texture_cache.h +++ b/rpcs3/Emu/RSX/GL/gl_texture_cache.h @@ -1,7 +1,8 @@ #pragma once #include <vector> -#include "Utilities/types.h" #include "gl_helpers.h" +#include <Utilities/types.h> +#include <Utilities/range.h> namespace gl { @@ -74,6 +75,11 @@ namespace gl { return compressed_size ? compressed_size : height * pitch * depth; } + + range<u32> range() const + { + return{ start_address, start_address + size() }; + } }; struct protected_region; @@ -116,24 +122,16 @@ namespace gl friend protected_region; }; - struct protected_region + struct protected_region : range<u32> { - u32 start_address; - u32 pages_count; - private: std::unordered_map<texture_info, cached_texture, fnv_1a_hasher, bitwise_equals> m_textures; u32 m_current_protection = 0; public: - u32 size() const - { - return pages_count * vm::page_size; - } - cache_access requires_protection() const; void for_each(std::function<void(cached_texture& texture)> callback); - void for_each(u32 start_address, u32 size, std::function<void(cached_texture& texture)> callback); + void for_each(range<u32> range, std::function<void(cached_texture& texture)> callback); void protect(); void unprotect(cache_access access = cache_access::read_write); bool empty() const; @@ -154,7 +152,7 @@ namespace gl public: cached_texture &entry(const texture_info &info, cache_buffers sync = cache_buffers::none); protected_region *find_region(u32 address); - std::vector<std::list<protected_region>::iterator> find_regions(u32 address, u32 size); + std::vector<std::list<protected_region>::iterator> find_regions(range<u32> range); void update_protection(); void clear(); }; diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 2954ecb4b6..8dac3aada0 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -353,23 +353,15 @@ namespace rsx } }); - // TODO: exit condition - while (true) + loop([this] { - CHECK_EMU_STATUS; - be_t<u32> get = ctrl->get; be_t<u32> put = ctrl->put; if (put == get || !Emu.IsRunning()) { do_internal_task(); - continue; - } - - if (m_internal_task_waiters.load(std::memory_order_relaxed)) - { - do_internal_task(); + return; } const u32 cmd = ReadIO32(get); @@ -380,7 +372,7 @@ namespace rsx u32 offs = cmd & 0x1fffffff; //LOG_WARNING(RSX, "rsx jump(0x%x) #addr=0x%x, cmd=0x%x, get=0x%x, put=0x%x", offs, m_ioAddress + get, cmd, get, put); ctrl->get = offs; - continue; + return; } if (cmd & CELL_GCM_METHOD_FLAG_CALL) { @@ -388,7 +380,7 @@ namespace rsx u32 offs = cmd & ~3; //LOG_WARNING(RSX, "rsx call(0x%x) #0x%x - 0x%x", offs, cmd, get); ctrl->get = offs; - continue; + return; } if (cmd == CELL_GCM_METHOD_FLAG_RETURN) { @@ -396,13 +388,13 @@ namespace rsx m_call_stack.pop(); //LOG_WARNING(RSX, "rsx return(0x%x)", get); ctrl->get = get; - continue; + return; } if (cmd == 0) //nop { ctrl->get = get + 4; - continue; + return; } auto args = vm::ptr<u32>::make((u32)RSXIOMem.RealAddr(get + 4)); @@ -433,7 +425,7 @@ namespace rsx } ctrl->get = get + (count + 1) * 4; - } + }); } std::string thread::get_name() const @@ -548,7 +540,7 @@ namespace rsx void thread::invoke(std::function<void()> callback) { - if (get_thread_ctrl() == thread_ctrl::get_current()) + if (is_current()) { callback(); } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index e0f1e6b631..72d0545448 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -10,6 +10,7 @@ #include "Utilities/Thread.h" #include "Utilities/Timer.h" #include "Utilities/convert.h" +#include "Emu/System.h" extern u64 get_system_time(); @@ -412,5 +413,39 @@ namespace rsx u32 ReadIO32(u32 addr); void WriteIO32(u32 addr, u32 value); + + template<typename Type> + force_inline auto loop(Type function) -> std::enable_if_t<!std::is_same<decltype(function()), void>::value, void> + { + while (function()) + { + CHECK_EMU_STATUS; + + if (m_internal_task_waiters.load(std::memory_order_relaxed)) + { + do_internal_task(); + } + else + { + std::this_thread::sleep_for(1ms); + } + } + } + + template<typename Type> + force_inline auto loop(Type function) -> std::enable_if_t<std::is_same<decltype(function()), void>::value, void> + { + while (true) + { + CHECK_EMU_STATUS; + + if (m_internal_task_waiters.load(std::memory_order_relaxed)) + { + do_internal_task(); + } + + function(); + } + } }; } diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 7f08ccdb9b..530f2f3fc9 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -19,6 +19,36 @@ namespace rsx template<> struct vertex_data_type_from_element_type<u8> { static const vertex_base_type type = vertex_base_type::ub; }; template<> struct vertex_data_type_from_element_type<u16> { static const vertex_base_type type = vertex_base_type::s1; }; + std::atomic<uint> operations_in_progress{ 0 }; + + struct scoped_operation + { + ~scoped_operation() + { + --operations_in_progress; + } + }; + + force_inline void async_operation(std::function<void()> function) + { + ++operations_in_progress; + + std::thread([function = std::move(function)]() + { + scoped_operation operation; + function(); + }).detach(); + } + + std::vector<std::shared_ptr<thread_ctrl>> threads_storage; + + void wait_for_operations_end(thread* rsx) + { + rsx->loop([] { return operations_in_progress > 0; }); + + threads_storage.clear(); + } + namespace nv406e { force_inline void set_reference(thread* rsx, u32 arg) @@ -29,13 +59,7 @@ namespace rsx force_inline void semaphore_acquire(thread* rsx, u32 arg) { //TODO: dma - while (vm::ps3::read32(rsx->label_addr + method_registers[NV406E_SEMAPHORE_OFFSET]) != arg) - { - if (Emu.IsStopped()) - break; - - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - } + rsx->loop([=] { return vm::ps3::read32(rsx->label_addr + method_registers[NV406E_SEMAPHORE_OFFSET]) != arg; }); } force_inline void semaphore_release(thread* rsx, u32 arg) @@ -55,6 +79,8 @@ namespace rsx force_inline void back_end_write_semaphore_release(thread* rsx, u32 arg) { + wait_for_operations_end(rsx); + //TODO: dma vm::ps3::write32(rsx->label_addr + method_registers[NV4097_SET_SEMAPHORE_OFFSET], (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff)); @@ -217,6 +243,8 @@ namespace rsx force_inline void set_begin_end(thread* rsx, u32 arg) { + wait_for_operations_end(rsx); + if (arg) { rsx->draw_inline_vertex_array = false; @@ -323,7 +351,7 @@ namespace rsx namespace nv3089 { - never_inline void image_in(thread *rsx, u32 arg) + force_inline void image_in(thread *rsx, u32 arg) { u32 operation = method_registers[NV3089_SET_OPERATION]; @@ -344,6 +372,14 @@ namespace rsx u8 in_inter = method_registers[NV3089_IMAGE_IN_FORMAT] >> 24; u32 src_color_format = method_registers[NV3089_SET_COLOR_FORMAT]; + u32 context_surface = method_registers[NV3089_SET_CONTEXT_SURFACE]; + + const u32 src_offset = method_registers[NV3089_IMAGE_IN_OFFSET]; + const u32 src_dma = method_registers[NV3089_SET_CONTEXT_DMA_IMAGE]; + + f32 scale_x = 1048576.f / method_registers[NV3089_DS_DX]; + f32 scale_y = 1048576.f / method_registers[NV3089_DT_DY]; + f32 in_x = (arg & 0xffff) / 16.f; f32 in_y = (arg >> 16) / 16.f; @@ -362,16 +398,13 @@ namespace rsx LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown operation (%d)", operation); } - const u32 src_offset = method_registers[NV3089_IMAGE_IN_OFFSET]; - const u32 src_dma = method_registers[NV3089_SET_CONTEXT_DMA_IMAGE]; - u32 dst_offset; u32 dst_dma = 0; u16 dst_color_format; u32 out_pitch = 0; u32 out_aligment = 64; - switch (method_registers[NV3089_SET_CONTEXT_SURFACE]) + switch (context_surface) { case CELL_GCM_CONTEXT_SURFACE2D: dst_dma = method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN]; @@ -395,12 +428,6 @@ namespace rsx u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4; - u32 in_offset = u32(in_x) * u32(in_bpp + in_pitch * in_y); - u32 out_offset = out_x * out_bpp + out_pitch * out_y; - - tiled_region src_region = rsx->get_tiled_address(src_offset + in_offset, src_dma & 0xf);//get_address(src_offset, src_dma); - u32 dst_address = get_address(dst_offset + out_offset, dst_dma); - if (out_pitch == 0) { out_pitch = out_bpp * out_w; @@ -421,10 +448,13 @@ namespace rsx clip_h = out_h; } - //LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_address, dst_address); + u32 in_offset = u32(in_x * in_bpp) + u32(in_pitch * in_y); + u32 out_offset = out_x * out_bpp + out_pitch * out_y; - u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr; - u8* pixels_dst = vm::ps3::_ptr<u8>(dst_address + out_offset); + tiled_region src_region = rsx->get_tiled_address(src_offset + in_offset, src_dma & 0xf);//get_address(src_offset, src_dma); + u32 dst_address = get_address(dst_offset + out_offset, dst_dma); + + //LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_address, dst_address); if (dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 && dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_A8R8G8B8) @@ -442,14 +472,9 @@ namespace rsx // method_registers[NV3089_IMAGE_IN_SIZE], in_pitch, src_offset, double(1 << 20) / (method_registers[NV3089_DS_DX]), double(1 << 20) / (method_registers[NV3089_DT_DY]), // method_registers[NV3089_CLIP_SIZE], method_registers[NV3089_IMAGE_OUT_SIZE]); - std::unique_ptr<u8[]> temp1, temp2; - AVPixelFormat in_format = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; AVPixelFormat out_format = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; - f32 scale_x = 1048576.f / method_registers[NV3089_DS_DX]; - f32 scale_y = 1048576.f / method_registers[NV3089_DT_DY]; - u32 convert_w = (u32)(scale_x * in_w); u32 convert_h = (u32)(scale_y * in_h); @@ -459,146 +484,158 @@ namespace rsx bool need_convert = out_format != in_format || scale_x != 1.0 || scale_y != 1.0; - u32 slice_h = clip_h; + u8 sw_width_log2 = method_registers[NV309E_SET_FORMAT] >> 16; + u8 sw_height_log2 = method_registers[NV309E_SET_FORMAT] >> 24; - if (src_region.tile) + // 0 indicates height of 1 pixel + if (!sw_height_log2) { - if (src_region.tile->comp == CELL_GCM_COMPMODE_C32_2X2) - { - slice_h *= 2; - } - - u32 size = slice_h * in_pitch; - - if (size > src_region.tile->size - src_region.base) - { - u32 diff = size - (src_region.tile->size - src_region.base); - slice_h -= (diff + in_pitch - 1) / in_pitch; - } + sw_height_log2 = 1; } - if (method_registers[NV3089_SET_CONTEXT_SURFACE] != CELL_GCM_CONTEXT_SWIZZLE2D) + async_operation([=] { - if (need_convert || need_clip) - { - if (need_clip) - { - if (need_convert) - { - convert_scale_image(temp1, out_format, convert_w, convert_h, out_pitch, - pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false); + u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr; + u8* pixels_dst = vm::ps3::_ptr<u8>(dst_address + out_offset); - clip_image(pixels_dst + out_offset, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch); + std::unique_ptr<u8[]> temp1, temp2; + + u32 slice_h = clip_h; + + if (src_region.tile) + { + if (src_region.tile->comp == CELL_GCM_COMPMODE_C32_2X2) + { + slice_h *= 2; + } + + u32 size = slice_h * in_pitch; + + if (size > src_region.tile->size - src_region.base) + { + u32 diff = size - (src_region.tile->size - src_region.base); + slice_h -= (diff + in_pitch - 1) / in_pitch; + } + } + + if (context_surface != CELL_GCM_CONTEXT_SWIZZLE2D) + { + if (need_convert || need_clip) + { + if (need_clip) + { + if (need_convert) + { + convert_scale_image(temp1, out_format, convert_w, convert_h, out_pitch, + pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false); + + clip_image(pixels_dst + out_offset, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch); + } + else + { + clip_image(pixels_dst + out_offset, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch); + } } else { - clip_image(pixels_dst + out_offset, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch); + convert_scale_image(pixels_dst + out_offset, out_format, out_w, out_h, out_pitch, + pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false); } + } else { - convert_scale_image(pixels_dst + out_offset, out_format, out_w, out_h, out_pitch, - pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false); + if (out_pitch != in_pitch || out_pitch != out_bpp * out_w || in_pitch != in_bpp * in_w) + { + for (u32 y = 0; y < out_h; ++y) + { + u8 *dst = pixels_dst + out_pitch * y; + u8 *src = pixels_src + in_pitch * y; + + std::memmove(dst, src, out_w * out_bpp); + } + } + else + { + std::memmove(pixels_dst + out_offset, pixels_src, out_pitch * out_h); + } } } else { - if (out_pitch != in_pitch || out_pitch != out_bpp * out_w || in_pitch != in_bpp * in_w) + if (need_convert || need_clip) { - for (u32 y = 0; y < out_h; ++y) + if (need_clip) { - u8 *dst = pixels_dst + out_pitch * y; - u8 *src = pixels_src + in_pitch * y; + if (need_convert) + { + convert_scale_image(temp1, out_format, convert_w, convert_h, out_pitch, + pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false); - std::memmove(dst, src, out_w * out_bpp); - } - } - else - { - std::memmove(pixels_dst + out_offset, pixels_src, out_pitch * out_h); - } - } - } - else - { - if (need_convert || need_clip) - { - if (need_clip) - { - if (need_convert) - { - convert_scale_image(temp1, out_format, convert_w, convert_h, out_pitch, - pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false); - - clip_image(temp2, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch); + clip_image(temp2, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch); + } + else + { + clip_image(temp2, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch); + } } else { - clip_image(temp2, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch); + convert_scale_image(temp2, out_format, out_w, out_h, out_pitch, + pixels_src, in_format, in_w, in_h, in_pitch, clip_h, in_inter ? true : false); } + + pixels_src = temp2.get(); } - else + + // swizzle based on destination size + u16 sw_width = 1 << sw_width_log2; + u16 sw_height = 1 << sw_height_log2; + + temp2.reset(new u8[out_bpp * sw_width * sw_height]); + + u8* linear_pixels = pixels_src; + u8* swizzled_pixels = temp2.get(); + + std::unique_ptr<u8[]> sw_temp; + + // Check and pad texture out if we are given non square texture for swizzle to be correct + if (sw_width != out_w || sw_height != out_h) { - convert_scale_image(temp2, out_format, out_w, out_h, out_pitch, - pixels_src, in_format, in_w, in_h, in_pitch, clip_h, in_inter ? true : false); + sw_temp.reset(new u8[out_bpp * sw_width * sw_height]); + + switch (out_bpp) + { + case 1: + pad_texture<u8>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height); + break; + case 2: + pad_texture<u16>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height); + break; + case 4: + pad_texture<u32>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height); + break; + } + + linear_pixels = sw_temp.get(); } - pixels_src = temp2.get(); - } - - u8 sw_width_log2 = method_registers[NV309E_SET_FORMAT] >> 16; - u8 sw_height_log2 = method_registers[NV309E_SET_FORMAT] >> 24; - - // 0 indicates height of 1 pixel - sw_height_log2 = sw_height_log2 == 0 ? 1 : sw_height_log2; - - // swizzle based on destination size - u16 sw_width = 1 << sw_width_log2; - u16 sw_height = 1 << sw_height_log2; - - temp2.reset(new u8[out_bpp * sw_width * sw_height]); - - u8* linear_pixels = pixels_src; - u8* swizzled_pixels = temp2.get(); - - std::unique_ptr<u8[]> sw_temp; - - // Check and pad texture out if we are given non square texture for swizzle to be correct - if (sw_width != out_w || sw_height != out_h) - { - sw_temp.reset(new u8[out_bpp * sw_width * sw_height]); - switch (out_bpp) { case 1: - pad_texture<u8>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height); + convert_linear_swizzle<u8>(linear_pixels, swizzled_pixels, sw_width, sw_height, false); break; case 2: - pad_texture<u16>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height); + convert_linear_swizzle<u16>(linear_pixels, swizzled_pixels, sw_width, sw_height, false); break; case 4: - pad_texture<u32>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height); + convert_linear_swizzle<u32>(linear_pixels, swizzled_pixels, sw_width, sw_height, false); break; } - linear_pixels = sw_temp.get(); + std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height); } - - switch (out_bpp) - { - case 1: - convert_linear_swizzle<u8>(linear_pixels, swizzled_pixels, sw_width, sw_height, false); - break; - case 2: - convert_linear_swizzle<u16>(linear_pixels, swizzled_pixels, sw_width, sw_height, false); - break; - case 4: - convert_linear_swizzle<u32>(linear_pixels, swizzled_pixels, sw_width, sw_height, false); - break; - } - - std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height); - } + }); } } @@ -630,22 +667,32 @@ namespace rsx out_pitch = line_length; } - u8 *dst = (u8*)vm::base(get_address(method_registers[NV0039_OFFSET_OUT], method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_OUT])); - const u8 *src = (u8*)vm::base(get_address(method_registers[NV0039_OFFSET_IN], method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_IN])); - if (in_pitch == out_pitch && out_pitch == line_length) + u32 src_offset = method_registers[NV0039_OFFSET_IN]; + u32 src_dma = method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_IN]; + + u32 dst_offset = method_registers[NV0039_OFFSET_OUT]; + u32 dst_dma = method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_OUT]; + + async_operation([=] { - std::memcpy(dst, src, line_length * line_count); - } - else - { - for (u32 i = 0; i < line_count; ++i) + u8 *dst = (u8*)vm::base(get_address(dst_offset, dst_dma)); + const u8 *src = (u8*)vm::base(get_address(src_offset, src_dma)); + + if (in_pitch == out_pitch && out_pitch == line_length) { - std::memcpy(dst, src, line_length); - dst += out_pitch; - src += in_pitch; + std::memcpy(dst, src, line_length * line_count); } - } + else + { + for (u32 i = 0; i < line_count; ++i) + { + std::memcpy(dst, src, line_length); + dst += out_pitch; + src += in_pitch; + } + } + }); } } diff --git a/rpcs3/Emu/SysCalls/Modules/cellGame.cpp b/rpcs3/Emu/SysCalls/Modules/cellGame.cpp index 08a535e449..f810fe4bf5 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellGame.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellGame.cpp @@ -640,26 +640,31 @@ s32 cellGameThemeInstallFromBuffer() s32 cellDiscGameGetBootDiscInfo() { + return 0; throw EXCEPTION(""); } s32 cellDiscGameRegisterDiscChangeCallback() { + return 0; throw EXCEPTION(""); } s32 cellDiscGameUnregisterDiscChangeCallback() { + return 0; throw EXCEPTION(""); } s32 cellGameRegisterDiscChangeCallback() { + return 0; throw EXCEPTION(""); } s32 cellGameUnregisterDiscChangeCallback() { + return 0; throw EXCEPTION(""); } diff --git a/rpcs3/Emu/SysCalls/Modules/cellSync.cpp b/rpcs3/Emu/SysCalls/Modules/cellSync.cpp index c01f8d4ddf..6ca1c97b1a 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSync.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSync.cpp @@ -1591,6 +1591,8 @@ s32 _cellSyncLFQueueDetachLv2EventQueue(vm::ptr<u32> spus, u32 num, vm::ptr<Cell throw EXCEPTION(""); } +static const u32 cellSyncMutexTryLock_id = get_function_id("cellSyncMutexTryLock"); + Module<> cellSync("cellSync", []() { // setup error handler @@ -1624,13 +1626,17 @@ Module<> cellSync("cellSync", []() // analyse error code if (u32 code = (value & 0xffffff00) == 0x80410100 ? static_cast<u32>(value) : 0) { - cellSync.error("%s() -> %s (0x%x)", func->name, get_error(code), code); + //CELL_SYNC_ERROR_BUSY is ok for cellSyncMutexTryLock + if (code != CELL_SYNC_ERROR_BUSY || func->id != cellSyncMutexTryLock_id) + { + cellSync.error("%s() -> %s (0x%x)", func->name, get_error(code), code); + } } }; REG_FUNC(cellSync, cellSyncMutexInitialize); REG_FUNC(cellSync, cellSyncMutexLock); - REG_FUNC(cellSync, cellSyncMutexTryLock); + REG_FNID(cellSync, cellSyncMutexTryLock_id, cellSyncMutexTryLock); REG_FUNC(cellSync, cellSyncMutexUnlock); REG_FUNC(cellSync, cellSyncBarrierInitialize); diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 1861bd7ce0..0f65b4356e 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -377,6 +377,7 @@ <ClInclude Include="..\Utilities\GNU.h" /> <ClInclude Include="..\Utilities\Log.h" /> <ClInclude Include="..\Utilities\File.h" /> + <ClInclude Include="..\Utilities\range.h" /> <ClInclude Include="..\Utilities\rPlatform.h" /> <ClInclude Include="..\Utilities\rTime.h" /> <ClInclude Include="..\Utilities\rXml.h" /> diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index d3b2a9660c..c833a5b049 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -1794,5 +1794,8 @@ <ClInclude Include="Emu\RSX\Common\surface_store.h"> <Filter>Emu\GPU\RSX\Common</Filter> </ClInclude> + <ClInclude Include="..\Utilities\range.h"> + <Filter>Utilities</Filter> + </ClInclude> </ItemGroup> </Project> \ No newline at end of file diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index a20ed4fc71..ca1c070e4f 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -85,7 +85,7 @@ <OutDir>$(SolutionDir)bin\</OutDir> <LinkIncremental>false</LinkIncremental> <RunCodeAnalysis>false</RunCodeAnalysis> - <LibraryPath>$(SolutionDir)$(Platform)\$(Configuration) Library\;$(LibraryPath)</LibraryPath> + <LibraryPath>$(SolutionDir)$(Platform)\Release\;$(SolutionDir)$(Platform)\Release Library\;$(LibraryPath)</LibraryPath> </PropertyGroup> <ItemDefinitionGroup> <PreBuildEvent>