mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-12-25 00:15:18 +00:00
Implemented writing swizzled textures
Optimized protected_region::combine Implemented conditional async mode for nv3089::image_in & nv0039::buffer_notify
This commit is contained in:
parent
53e3833aa1
commit
82a32fcb5a
@ -57,10 +57,11 @@ public:
|
||||
m_end = m_begin + value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
void extend(const range& other)
|
||||
{
|
||||
m_begin = std::min(m_begin, other.m_begin);
|
||||
m_end = std::min(m_end, other.m_end);
|
||||
m_end = std::max(m_end, other.m_end);
|
||||
}
|
||||
|
||||
constexpr bool valid() const
|
||||
|
@ -1059,7 +1059,6 @@ bool nv3089_image_in(u32 arg, GLGSRender* renderer)
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel
|
||||
u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4;
|
||||
|
||||
@ -1204,6 +1203,15 @@ bool nv3089_image_in(u32 arg, GLGSRender* renderer)
|
||||
|
||||
dst_info.swizzled = context_surface == CELL_GCM_CONTEXT_SWIZZLE2D;
|
||||
|
||||
if (dst_info.swizzled)
|
||||
{
|
||||
u8 sw_width_log2 = rsx::method_registers[NV309E_SET_FORMAT] >> 16;
|
||||
u8 sw_height_log2 = rsx::method_registers[NV309E_SET_FORMAT] >> 24;
|
||||
|
||||
dst_info.log2_width = sw_width_log2 ? sw_width_log2 : 1;
|
||||
dst_info.log2_height = sw_height_log2 ? sw_height_log2 : 1;
|
||||
}
|
||||
|
||||
switch (dst_color_format)
|
||||
{
|
||||
case CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5:
|
||||
@ -1443,6 +1451,7 @@ gl::texture_info surface_info(rsx::thread &rsx, rsx::surface_color_format format
|
||||
{
|
||||
gl::texture_info info{};
|
||||
info.format = gl::get_texture_format(surface_format_to_texture_format(format));
|
||||
info.format.flags &= gl::texture_flags::allow_swizzle;
|
||||
|
||||
rsx::tiled_region region = rsx.get_tiled_address(offset, location);
|
||||
|
||||
@ -1510,24 +1519,29 @@ void GLGSRender::init_buffers(bool skip_reading)
|
||||
m_surface.width = clip_width * m_surface.width_mult + clip_x;
|
||||
m_surface.height = clip_height * m_surface.height_mult + clip_y;
|
||||
|
||||
bool swizzled_surface = m_surface.type == CELL_GCM_SURFACE_SWIZZLE;
|
||||
|
||||
rsx::for_each_active_color_surface([&](int index)
|
||||
{
|
||||
u32 offset = rsx::method_registers[mr_color_offset[index]];
|
||||
u32 location = rsx::method_registers[mr_color_dma[index]];
|
||||
u32 pitch = rsx::method_registers[mr_color_pitch[index]];
|
||||
bool swizzled = m_surface.type == CELL_GCM_SURFACE_SWIZZLE;
|
||||
|
||||
gl::texture_info info = surface_info(*this, m_surface.color_format, offset, location, m_surface.width, m_surface.height, pitch);
|
||||
|
||||
info.swizzled = swizzled;
|
||||
info.swizzled = swizzled_surface;
|
||||
|
||||
if (swizzled_surface)
|
||||
{
|
||||
info.log2_width = m_surface.log2width;
|
||||
info.log2_height = m_surface.log2height;
|
||||
}
|
||||
|
||||
cached_color_buffers[index] = &texture_cache.entry(info, skip_reading ? gl::cache_buffers::none : gl::cache_buffers::local);
|
||||
draw_fbo.color[index] = cached_color_buffers[index]->view();
|
||||
});
|
||||
|
||||
{
|
||||
u32 offset = rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET];
|
||||
u32 location = rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA];
|
||||
u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z] & ~63;
|
||||
|
||||
int bpp;
|
||||
@ -1542,13 +1556,16 @@ void GLGSRender::init_buffers(bool skip_reading)
|
||||
break;
|
||||
}
|
||||
|
||||
if (pitch && pitch < bpp * m_surface.width)
|
||||
if (swizzled_surface || (pitch && pitch < bpp * m_surface.width))
|
||||
{
|
||||
__glcheck draw_fbo.depth_stencil = null_texture;
|
||||
cached_depth_buffer = nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 offset = rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET];
|
||||
u32 location = rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA];
|
||||
|
||||
if (!pitch)
|
||||
{
|
||||
pitch = m_surface.width * bpp;
|
||||
|
@ -333,19 +333,59 @@ namespace gl
|
||||
}
|
||||
else
|
||||
{
|
||||
if (info.swizzled && (info.format.flags & texture_flags::allow_swizzle) != texture_flags::none)
|
||||
{
|
||||
//TODO
|
||||
LOG_ERROR(RSX, "writing swizzled texture[0x%x] to host buffer", info.start_address);
|
||||
}
|
||||
|
||||
gl::pixel_pack_settings{}
|
||||
.row_length(info.pitch / info.format.bpp)
|
||||
.aligment(1)
|
||||
.swap_bytes((info.format.flags & gl::texture_flags::swap_bytes) != gl::texture_flags::none)
|
||||
.apply();
|
||||
|
||||
__glcheck glGetTexImage((GLenum)info.target, 0, (GLenum)info.format.format, (GLenum)info.format.type, vm::base_priv(info.start_address));
|
||||
if (info.swizzled && (info.format.flags & texture_flags::allow_swizzle) != texture_flags::none)
|
||||
{
|
||||
std::unique_ptr<u8[]> linear_pixels(new u8[info.size()]);
|
||||
|
||||
__glcheck glGetTexImage((GLenum)info.target, 0, (GLenum)info.format.format, (GLenum)info.format.type, linear_pixels.get());
|
||||
|
||||
u16 sw_width = 1 << info.log2_width;
|
||||
u16 sw_height = 1 << info.log2_height;
|
||||
|
||||
// Check and pad texture out if we are given non square texture for swizzle to be correct
|
||||
if (sw_width != info.width || sw_height != info.height)
|
||||
{
|
||||
std::unique_ptr<u8[]> sw_temp(new u8[info.format.bpp * sw_width * sw_height]);
|
||||
|
||||
switch (info.format.bpp)
|
||||
{
|
||||
case 1:
|
||||
rsx::pad_texture<u8>(linear_pixels.get(), sw_temp.get(), info.width, info.height, sw_width, sw_height);
|
||||
break;
|
||||
case 2:
|
||||
rsx::pad_texture<u16>(linear_pixels.get(), sw_temp.get(), info.width, info.height, sw_width, sw_height);
|
||||
break;
|
||||
case 4:
|
||||
rsx::pad_texture<u32>(linear_pixels.get(), sw_temp.get(), info.width, info.height, sw_width, sw_height);
|
||||
break;
|
||||
}
|
||||
|
||||
linear_pixels = std::move(sw_temp);
|
||||
}
|
||||
|
||||
switch (info.format.bpp)
|
||||
{
|
||||
case 1:
|
||||
rsx::convert_linear_swizzle<u8>(linear_pixels.get(), vm::base_priv(info.start_address), sw_width, sw_height, false);
|
||||
break;
|
||||
case 2:
|
||||
rsx::convert_linear_swizzle<u16>(linear_pixels.get(), vm::base_priv(info.start_address), sw_width, sw_height, false);
|
||||
break;
|
||||
case 4:
|
||||
rsx::convert_linear_swizzle<u32>(linear_pixels.get(), vm::base_priv(info.start_address), sw_width, sw_height, false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
__glcheck glGetTexImage((GLenum)info.target, 0, (GLenum)info.format.format, (GLenum)info.format.type, vm::base_priv(info.start_address));
|
||||
}
|
||||
}
|
||||
|
||||
ignore(gl::cache_buffers::all);
|
||||
@ -590,20 +630,93 @@ namespace gl
|
||||
//TODO
|
||||
}
|
||||
|
||||
void set_page_protection(range<u32> range, u8 protect)
|
||||
{
|
||||
vm::page_protect(range.begin(), range.size(), 0, ~protect & (vm::page_readable | vm::page_writable), protect);
|
||||
}
|
||||
|
||||
void protected_region::combine(protected_region& region)
|
||||
{
|
||||
region.unprotect();
|
||||
unprotect();
|
||||
cache_access new_protection = region.requires_protection();
|
||||
|
||||
for (auto &texture : region.m_textures)
|
||||
{
|
||||
texture.second.parent(this);
|
||||
|
||||
if (!m_textures.emplace(texture).second)
|
||||
{
|
||||
throw EXCEPTION("");
|
||||
}
|
||||
|
||||
new_protection |= texture.second.requires_protection();
|
||||
}
|
||||
|
||||
u8 new_protection_flags = 0;
|
||||
|
||||
if ((new_protection & cache_access::read) != cache_access::none)
|
||||
{
|
||||
new_protection_flags |= vm::page_readable;
|
||||
}
|
||||
|
||||
if ((new_protection & cache_access::write) != cache_access::none)
|
||||
{
|
||||
new_protection_flags |= vm::page_writable;
|
||||
}
|
||||
|
||||
if (m_current_protection != new_protection_flags && region.m_current_protection != new_protection_flags)
|
||||
{
|
||||
if (begin() < region.begin())
|
||||
{
|
||||
set_page_protection({ begin(), region.end() }, new_protection_flags);
|
||||
}
|
||||
else
|
||||
{
|
||||
set_page_protection({ region.begin(), end() }, new_protection_flags);
|
||||
}
|
||||
}
|
||||
else if (m_current_protection != new_protection_flags)
|
||||
{
|
||||
if (begin() < region.begin())
|
||||
{
|
||||
set_page_protection({ begin(), region.begin() }, new_protection_flags);
|
||||
}
|
||||
else
|
||||
{
|
||||
set_page_protection({ region.end(), end() }, new_protection_flags);
|
||||
}
|
||||
}
|
||||
else if (region.m_current_protection != new_protection_flags)
|
||||
{
|
||||
if (begin() < region.begin())
|
||||
{
|
||||
set_page_protection({ end(), region.end() }, new_protection_flags);
|
||||
}
|
||||
else
|
||||
{
|
||||
set_page_protection({ region.begin(), begin() }, new_protection_flags);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (begin() < region.begin())
|
||||
{
|
||||
if (u32 diff = region.begin() - end())
|
||||
{
|
||||
set_page_protection({ end(), end() + diff }, new_protection_flags);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (u32 diff = begin() - region.end())
|
||||
{
|
||||
set_page_protection({ region.end(), region.end() + diff }, new_protection_flags);
|
||||
}
|
||||
|
||||
set_page_protection({ region.begin(), begin() }, new_protection_flags);
|
||||
}
|
||||
}
|
||||
|
||||
m_current_protection = new_protection_flags;
|
||||
extend(region);
|
||||
}
|
||||
|
||||
@ -667,7 +780,7 @@ namespace gl
|
||||
if (!aligned_size)
|
||||
{
|
||||
aligned_range.begin(info.start_address & ~(vm::page_size - 1));
|
||||
aligned_range.size(align(info.size() + info.start_address - aligned_range.begin(), vm::page_size));
|
||||
aligned_range.size(align(info.start_address - aligned_range.begin() + info.size(), vm::page_size));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -70,6 +70,8 @@ namespace gl
|
||||
bool swizzled;
|
||||
float lod_bias;
|
||||
u32 start_address;
|
||||
u32 log2_width;
|
||||
u32 log2_height;
|
||||
|
||||
u32 size() const
|
||||
{
|
||||
@ -126,7 +128,7 @@ namespace gl
|
||||
{
|
||||
private:
|
||||
std::unordered_map<texture_info, cached_texture, fnv_1a_hasher, bitwise_equals> m_textures;
|
||||
u32 m_current_protection = 0;
|
||||
u8 m_current_protection = 0;
|
||||
|
||||
public:
|
||||
cache_access requires_protection() const;
|
||||
|
@ -186,6 +186,8 @@ void rsx::gl_texture::bind(gl::texture_cache& cache, rsx::texture& tex)
|
||||
info.lod_bias = tex.bias();
|
||||
}
|
||||
|
||||
gl::texture_flags flags = gl::texture_flags::none;
|
||||
|
||||
if (is_compressed)
|
||||
{
|
||||
info.format.type = gl::texture::type::ubyte;
|
||||
@ -226,15 +228,18 @@ void rsx::gl_texture::bind(gl::texture_cache& cache, rsx::texture& tex)
|
||||
throw EXCEPTION("unimplemented texture format 0x%x", format);
|
||||
}
|
||||
|
||||
info.swizzled = info.swizzled && (found->second.flags & gl::texture_flags::allow_swizzle) != gl::texture_flags::none;
|
||||
info.format = found->second;
|
||||
info.pitch = std::max(info.width * info.format.bpp, tex.pitch());
|
||||
flags = found->second.flags;
|
||||
info.format.flags &= gl::texture_flags::allow_swizzle;
|
||||
|
||||
remap = info.format.remap.data();
|
||||
}
|
||||
|
||||
__glcheck cache.entry(info, gl::cache_buffers::local).bind(tex.index());
|
||||
|
||||
if ((info.format.flags & gl::texture_flags::allow_remap) != gl::texture_flags::none)
|
||||
if ((flags & gl::texture_flags::allow_remap) != gl::texture_flags::none)
|
||||
{
|
||||
u8 remap_a = tex.remap() & 0x3;
|
||||
u8 remap_r = (tex.remap() >> 2) & 0x3;
|
||||
|
@ -29,15 +29,22 @@ namespace rsx
|
||||
}
|
||||
};
|
||||
|
||||
force_inline void async_operation(std::function<void()> function)
|
||||
force_inline void async_operation(bool call_async, std::function<void()> function)
|
||||
{
|
||||
++operations_in_progress;
|
||||
|
||||
std::thread([function = std::move(function)]()
|
||||
if (call_async)
|
||||
{
|
||||
++operations_in_progress;
|
||||
|
||||
std::thread([function = std::move(function)]()
|
||||
{
|
||||
scoped_operation operation;
|
||||
function();
|
||||
}).detach();
|
||||
}
|
||||
else
|
||||
{
|
||||
scoped_operation operation;
|
||||
function();
|
||||
}).detach();
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<thread_ctrl>> threads_storage;
|
||||
@ -493,7 +500,7 @@ namespace rsx
|
||||
sw_height_log2 = 1;
|
||||
}
|
||||
|
||||
async_operation([=]
|
||||
async_operation(need_clip || need_convert || (in_w + in_h) > 128, [=]
|
||||
{
|
||||
u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr;
|
||||
u8* pixels_dst = vm::ps3::_ptr<u8>(dst_address + out_offset);
|
||||
@ -592,10 +599,8 @@ namespace rsx
|
||||
u16 sw_width = 1 << sw_width_log2;
|
||||
u16 sw_height = 1 << sw_height_log2;
|
||||
|
||||
temp2.reset(new u8[out_bpp * sw_width * sw_height]);
|
||||
|
||||
u8* linear_pixels = pixels_src;
|
||||
u8* swizzled_pixels = temp2.get();
|
||||
u8* swizzled_pixels = pixels_dst;
|
||||
|
||||
std::unique_ptr<u8[]> sw_temp;
|
||||
|
||||
@ -632,8 +637,6 @@ namespace rsx
|
||||
convert_linear_swizzle<u32>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
|
||||
break;
|
||||
}
|
||||
|
||||
std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height);
|
||||
}
|
||||
});
|
||||
}
|
||||
@ -674,7 +677,7 @@ namespace rsx
|
||||
u32 dst_offset = method_registers[NV0039_OFFSET_OUT];
|
||||
u32 dst_dma = method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_OUT];
|
||||
|
||||
async_operation([=]
|
||||
async_operation(line_count * line_length > 64, [=]
|
||||
{
|
||||
u8 *dst = (u8*)vm::base(get_address(dst_offset, dst_dma));
|
||||
const u8 *src = (u8*)vm::base(get_address(src_offset, src_dma));
|
||||
|
Loading…
Reference in New Issue
Block a user