From 82a32fcb5ab11bc9a6680d58ad7461947e0873fe Mon Sep 17 00:00:00 2001 From: DHrpcs3 Date: Tue, 15 Mar 2016 23:00:05 +0300 Subject: [PATCH] Implemented writing swizzled textures Optimized protected_region::combine Implemented conditional async mode for nv3089::image_in & nv0039::buffer_notify --- Utilities/range.h | 3 +- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 29 ++++-- rpcs3/Emu/RSX/GL/gl_texture_cache.cpp | 133 ++++++++++++++++++++++++-- rpcs3/Emu/RSX/GL/gl_texture_cache.h | 4 +- rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp | 7 +- rpcs3/Emu/RSX/rsx_methods.cpp | 29 +++--- 6 files changed, 173 insertions(+), 32 deletions(-) diff --git a/Utilities/range.h b/Utilities/range.h index 3759c82333..581e0f1b8d 100644 --- a/Utilities/range.h +++ b/Utilities/range.h @@ -57,10 +57,11 @@ public: m_end = m_begin + value; return *this; } + void extend(const range& other) { m_begin = std::min(m_begin, other.m_begin); - m_end = std::min(m_end, other.m_end); + m_end = std::max(m_end, other.m_end); } constexpr bool valid() const diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 7acc6fd7be..c37f1f3e59 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1059,7 +1059,6 @@ bool nv3089_image_in(u32 arg, GLGSRender* renderer) return false; } - u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4; @@ -1204,6 +1203,15 @@ bool nv3089_image_in(u32 arg, GLGSRender* renderer) dst_info.swizzled = context_surface == CELL_GCM_CONTEXT_SWIZZLE2D; + if (dst_info.swizzled) + { + u8 sw_width_log2 = rsx::method_registers[NV309E_SET_FORMAT] >> 16; + u8 sw_height_log2 = rsx::method_registers[NV309E_SET_FORMAT] >> 24; + + dst_info.log2_width = sw_width_log2 ? sw_width_log2 : 1; + dst_info.log2_height = sw_height_log2 ? sw_height_log2 : 1; + } + switch (dst_color_format) { case CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5: @@ -1443,6 +1451,7 @@ gl::texture_info surface_info(rsx::thread &rsx, rsx::surface_color_format format { gl::texture_info info{}; info.format = gl::get_texture_format(surface_format_to_texture_format(format)); + info.format.flags &= gl::texture_flags::allow_swizzle; rsx::tiled_region region = rsx.get_tiled_address(offset, location); @@ -1510,24 +1519,29 @@ void GLGSRender::init_buffers(bool skip_reading) m_surface.width = clip_width * m_surface.width_mult + clip_x; m_surface.height = clip_height * m_surface.height_mult + clip_y; + bool swizzled_surface = m_surface.type == CELL_GCM_SURFACE_SWIZZLE; + rsx::for_each_active_color_surface([&](int index) { u32 offset = rsx::method_registers[mr_color_offset[index]]; u32 location = rsx::method_registers[mr_color_dma[index]]; u32 pitch = rsx::method_registers[mr_color_pitch[index]]; - bool swizzled = m_surface.type == CELL_GCM_SURFACE_SWIZZLE; gl::texture_info info = surface_info(*this, m_surface.color_format, offset, location, m_surface.width, m_surface.height, pitch); - info.swizzled = swizzled; + info.swizzled = swizzled_surface; + + if (swizzled_surface) + { + info.log2_width = m_surface.log2width; + info.log2_height = m_surface.log2height; + } cached_color_buffers[index] = &texture_cache.entry(info, skip_reading ? gl::cache_buffers::none : gl::cache_buffers::local); draw_fbo.color[index] = cached_color_buffers[index]->view(); }); { - u32 offset = rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET]; - u32 location = rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]; u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z] & ~63; int bpp; @@ -1542,13 +1556,16 @@ void GLGSRender::init_buffers(bool skip_reading) break; } - if (pitch && pitch < bpp * m_surface.width) + if (swizzled_surface || (pitch && pitch < bpp * m_surface.width)) { __glcheck draw_fbo.depth_stencil = null_texture; cached_depth_buffer = nullptr; } else { + u32 offset = rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET]; + u32 location = rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]; + if (!pitch) { pitch = m_surface.width * bpp; diff --git a/rpcs3/Emu/RSX/GL/gl_texture_cache.cpp b/rpcs3/Emu/RSX/GL/gl_texture_cache.cpp index f70a2d700a..d62f8ad7a6 100644 --- a/rpcs3/Emu/RSX/GL/gl_texture_cache.cpp +++ b/rpcs3/Emu/RSX/GL/gl_texture_cache.cpp @@ -333,19 +333,59 @@ namespace gl } else { - if (info.swizzled && (info.format.flags & texture_flags::allow_swizzle) != texture_flags::none) - { - //TODO - LOG_ERROR(RSX, "writing swizzled texture[0x%x] to host buffer", info.start_address); - } - gl::pixel_pack_settings{} .row_length(info.pitch / info.format.bpp) .aligment(1) .swap_bytes((info.format.flags & gl::texture_flags::swap_bytes) != gl::texture_flags::none) .apply(); - __glcheck glGetTexImage((GLenum)info.target, 0, (GLenum)info.format.format, (GLenum)info.format.type, vm::base_priv(info.start_address)); + if (info.swizzled && (info.format.flags & texture_flags::allow_swizzle) != texture_flags::none) + { + std::unique_ptr linear_pixels(new u8[info.size()]); + + __glcheck glGetTexImage((GLenum)info.target, 0, (GLenum)info.format.format, (GLenum)info.format.type, linear_pixels.get()); + + u16 sw_width = 1 << info.log2_width; + u16 sw_height = 1 << info.log2_height; + + // Check and pad texture out if we are given non square texture for swizzle to be correct + if (sw_width != info.width || sw_height != info.height) + { + std::unique_ptr sw_temp(new u8[info.format.bpp * sw_width * sw_height]); + + switch (info.format.bpp) + { + case 1: + rsx::pad_texture(linear_pixels.get(), sw_temp.get(), info.width, info.height, sw_width, sw_height); + break; + case 2: + rsx::pad_texture(linear_pixels.get(), sw_temp.get(), info.width, info.height, sw_width, sw_height); + break; + case 4: + rsx::pad_texture(linear_pixels.get(), sw_temp.get(), info.width, info.height, sw_width, sw_height); + break; + } + + linear_pixels = std::move(sw_temp); + } + + switch (info.format.bpp) + { + case 1: + rsx::convert_linear_swizzle(linear_pixels.get(), vm::base_priv(info.start_address), sw_width, sw_height, false); + break; + case 2: + rsx::convert_linear_swizzle(linear_pixels.get(), vm::base_priv(info.start_address), sw_width, sw_height, false); + break; + case 4: + rsx::convert_linear_swizzle(linear_pixels.get(), vm::base_priv(info.start_address), sw_width, sw_height, false); + break; + } + } + else + { + __glcheck glGetTexImage((GLenum)info.target, 0, (GLenum)info.format.format, (GLenum)info.format.type, vm::base_priv(info.start_address)); + } } ignore(gl::cache_buffers::all); @@ -590,20 +630,93 @@ namespace gl //TODO } + void set_page_protection(range range, u8 protect) + { + vm::page_protect(range.begin(), range.size(), 0, ~protect & (vm::page_readable | vm::page_writable), protect); + } + void protected_region::combine(protected_region& region) { - region.unprotect(); - unprotect(); + cache_access new_protection = region.requires_protection(); for (auto &texture : region.m_textures) { texture.second.parent(this); + if (!m_textures.emplace(texture).second) { throw EXCEPTION(""); } + + new_protection |= texture.second.requires_protection(); } + u8 new_protection_flags = 0; + + if ((new_protection & cache_access::read) != cache_access::none) + { + new_protection_flags |= vm::page_readable; + } + + if ((new_protection & cache_access::write) != cache_access::none) + { + new_protection_flags |= vm::page_writable; + } + + if (m_current_protection != new_protection_flags && region.m_current_protection != new_protection_flags) + { + if (begin() < region.begin()) + { + set_page_protection({ begin(), region.end() }, new_protection_flags); + } + else + { + set_page_protection({ region.begin(), end() }, new_protection_flags); + } + } + else if (m_current_protection != new_protection_flags) + { + if (begin() < region.begin()) + { + set_page_protection({ begin(), region.begin() }, new_protection_flags); + } + else + { + set_page_protection({ region.end(), end() }, new_protection_flags); + } + } + else if (region.m_current_protection != new_protection_flags) + { + if (begin() < region.begin()) + { + set_page_protection({ end(), region.end() }, new_protection_flags); + } + else + { + set_page_protection({ region.begin(), begin() }, new_protection_flags); + } + } + else + { + if (begin() < region.begin()) + { + if (u32 diff = region.begin() - end()) + { + set_page_protection({ end(), end() + diff }, new_protection_flags); + } + } + else + { + if (u32 diff = begin() - region.end()) + { + set_page_protection({ region.end(), region.end() + diff }, new_protection_flags); + } + + set_page_protection({ region.begin(), begin() }, new_protection_flags); + } + } + + m_current_protection = new_protection_flags; extend(region); } @@ -667,7 +780,7 @@ namespace gl if (!aligned_size) { aligned_range.begin(info.start_address & ~(vm::page_size - 1)); - aligned_range.size(align(info.size() + info.start_address - aligned_range.begin(), vm::page_size)); + aligned_range.size(align(info.start_address - aligned_range.begin() + info.size(), vm::page_size)); } else { diff --git a/rpcs3/Emu/RSX/GL/gl_texture_cache.h b/rpcs3/Emu/RSX/GL/gl_texture_cache.h index b6138d9d1b..0af70e6da1 100644 --- a/rpcs3/Emu/RSX/GL/gl_texture_cache.h +++ b/rpcs3/Emu/RSX/GL/gl_texture_cache.h @@ -70,6 +70,8 @@ namespace gl bool swizzled; float lod_bias; u32 start_address; + u32 log2_width; + u32 log2_height; u32 size() const { @@ -126,7 +128,7 @@ namespace gl { private: std::unordered_map m_textures; - u32 m_current_protection = 0; + u8 m_current_protection = 0; public: cache_access requires_protection() const; diff --git a/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp b/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp index 43c58c0891..7dd11f9e17 100644 --- a/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp +++ b/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp @@ -186,6 +186,8 @@ void rsx::gl_texture::bind(gl::texture_cache& cache, rsx::texture& tex) info.lod_bias = tex.bias(); } + gl::texture_flags flags = gl::texture_flags::none; + if (is_compressed) { info.format.type = gl::texture::type::ubyte; @@ -226,15 +228,18 @@ void rsx::gl_texture::bind(gl::texture_cache& cache, rsx::texture& tex) throw EXCEPTION("unimplemented texture format 0x%x", format); } + info.swizzled = info.swizzled && (found->second.flags & gl::texture_flags::allow_swizzle) != gl::texture_flags::none; info.format = found->second; info.pitch = std::max(info.width * info.format.bpp, tex.pitch()); + flags = found->second.flags; + info.format.flags &= gl::texture_flags::allow_swizzle; remap = info.format.remap.data(); } __glcheck cache.entry(info, gl::cache_buffers::local).bind(tex.index()); - if ((info.format.flags & gl::texture_flags::allow_remap) != gl::texture_flags::none) + if ((flags & gl::texture_flags::allow_remap) != gl::texture_flags::none) { u8 remap_a = tex.remap() & 0x3; u8 remap_r = (tex.remap() >> 2) & 0x3; diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 530f2f3fc9..10d845544a 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -29,15 +29,22 @@ namespace rsx } }; - force_inline void async_operation(std::function function) + force_inline void async_operation(bool call_async, std::function function) { - ++operations_in_progress; - - std::thread([function = std::move(function)]() + if (call_async) + { + ++operations_in_progress; + + std::thread([function = std::move(function)]() + { + scoped_operation operation; + function(); + }).detach(); + } + else { - scoped_operation operation; function(); - }).detach(); + } } std::vector> threads_storage; @@ -493,7 +500,7 @@ namespace rsx sw_height_log2 = 1; } - async_operation([=] + async_operation(need_clip || need_convert || (in_w + in_h) > 128, [=] { u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr; u8* pixels_dst = vm::ps3::_ptr(dst_address + out_offset); @@ -592,10 +599,8 @@ namespace rsx u16 sw_width = 1 << sw_width_log2; u16 sw_height = 1 << sw_height_log2; - temp2.reset(new u8[out_bpp * sw_width * sw_height]); - u8* linear_pixels = pixels_src; - u8* swizzled_pixels = temp2.get(); + u8* swizzled_pixels = pixels_dst; std::unique_ptr sw_temp; @@ -632,8 +637,6 @@ namespace rsx convert_linear_swizzle(linear_pixels, swizzled_pixels, sw_width, sw_height, false); break; } - - std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height); } }); } @@ -674,7 +677,7 @@ namespace rsx u32 dst_offset = method_registers[NV0039_OFFSET_OUT]; u32 dst_dma = method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_OUT]; - async_operation([=] + async_operation(line_count * line_length > 64, [=] { u8 *dst = (u8*)vm::base(get_address(dst_offset, dst_dma)); const u8 *src = (u8*)vm::base(get_address(src_offset, src_dma));