rsx: Improve present image scanning

- Adds support for partial (letterboxed) source images by taking insets
into account.
- Bugfix for potential access violation when capturing screenshot on
vulkan
This commit is contained in:
kd-11 2020-01-17 22:44:59 +03:00 committed by kd-11
parent 7453e46a7c
commit bad4d1ff05
2 changed files with 152 additions and 170 deletions

View File

@ -5,46 +5,34 @@ GLuint GLGSRender::get_present_source(gl::present_surface_info* info, const rsx:
{
GLuint image = GL_NONE;
if (auto render_target_texture = m_rtts.get_color_surface_at(info->address))
// Check the surface store first
gl::command_context cmd = { gl_state };
const auto format_bpp = get_format_block_size_in_bytes(info->format);
const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd,
info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::read);
if (!overlap_info.empty())
{
if (render_target_texture->last_use_tag == m_rtts.write_tag)
const auto& section = overlap_info.back();
auto surface = gl::as_rtt(section.surface);
if (section.base_address >= info->address)
{
image = render_target_texture->raw_handle();
}
else
{
gl::command_context cmd = { gl_state };
const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, info->address, info->width, info->height, info->pitch, render_target_texture->get_bpp(), rsx::surface_access::read);
// Check for intentional 'borders'
const u32 inset_offset = section.base_address - info->address;
const u32 inset_y = inset_offset / info->pitch;
const u32 inset_x = (inset_offset % info->pitch) / format_bpp;
if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture)
const u32 full_width = surface->get_surface_width(rsx::surface_metrics::samples) + inset_x + inset_x;
const u32 full_height = surface->get_surface_height(rsx::surface_metrics::samples) + inset_y + inset_y;
if (full_width == info->width && full_height == info->height)
{
// Confirmed to be the newest data source in that range
image = render_target_texture->raw_handle();
}
}
surface->read_barrier(cmd);
image = section.surface->get_surface(rsx::surface_access::read)->id();
if (image)
{
const auto buffer_width = rsx::apply_resolution_scale(info->width, true);
const auto buffer_height = rsx::apply_resolution_scale(info->height, true);
if (buffer_width > render_target_texture->width() ||
buffer_height > render_target_texture->height())
{
// TODO: Should emit only once to avoid flooding the log file
// TODO: Take AA scaling into account
LOG_WARNING(RSX, "Selected output image does not satisfy the video configuration. Display buffer resolution=%dx%d, avconf resolution=%dx%d, surface=%dx%d",
info->width, info->height,
avconfig->state * avconfig->resolution_x, avconfig->state * avconfig->resolution_y,
render_target_texture->get_surface_width(rsx::surface_metrics::pixels), render_target_texture->get_surface_height(rsx::surface_metrics::pixels));
info->width = render_target_texture->width();
info->height = render_target_texture->height();
}
else
{
info->width = buffer_width;
info->height = buffer_height;
info->width = rsx::apply_resolution_scale(full_width - (inset_x + inset_x), true);
info->height = rsx::apply_resolution_scale(full_height - (inset_y + inset_y), true);
}
}
}
@ -118,7 +106,24 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info)
gl::screen.bind();
gl::screen.clear(gl::buffers::color);
// Calculate blit coordinates
GLuint image_to_flip = GL_NONE;
if (info.buffer < display_buffers_count && buffer_width && buffer_height)
{
// Find the source image
gl::present_surface_info present_info;
present_info.width = buffer_width;
present_info.height = buffer_height;
present_info.pitch = buffer_pitch;
present_info.format = av_format;
present_info.address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL);
image_to_flip = get_present_source(&present_info, avconfig);
buffer_width = present_info.width;
buffer_height = present_info.height;
}
// Calculate blit coordinates
coordi aspect_ratio;
sizei csize(m_frame->client_width(), m_frame->client_height());
sizei new_size = csize;
@ -143,20 +148,8 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info)
aspect_ratio.size = new_size;
if (info.buffer < display_buffers_count && buffer_width && buffer_height)
{
// Find the source image
gl::present_surface_info present_info;
present_info.width = buffer_width;
present_info.height = buffer_height;
present_info.pitch = buffer_pitch;
present_info.format = av_format;
present_info.address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL);
const GLuint image = get_present_source(&present_info, avconfig);
buffer_width = present_info.width;
buffer_height = present_info.height;
if (image_to_flip)
{
if (m_frame->screenshot_toggle == true)
{
m_frame->screenshot_toggle = false;
@ -167,9 +160,9 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info)
pack_settings.apply();
if (gl::get_driver_caps().ARB_dsa_supported)
glGetTextureImage(image, 0, GL_BGRA, GL_UNSIGNED_BYTE, buffer_height * buffer_width * 4, sshot_frame.data());
glGetTextureImage(image_to_flip, 0, GL_BGRA, GL_UNSIGNED_BYTE, buffer_height * buffer_width * 4, sshot_frame.data());
else
glGetTextureImageEXT(image, GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_BYTE, sshot_frame.data());
glGetTextureImageEXT(image_to_flip, GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_BYTE, sshot_frame.data());
if (GLenum err; (err = glGetError()) != GL_NO_ERROR)
LOG_ERROR(GENERAL, "[Screenshot] Failed to capture image: 0x%x", err);
@ -184,7 +177,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info)
// Blit source image to the screen
m_flip_fbo.recreate();
m_flip_fbo.bind();
m_flip_fbo.color = image;
m_flip_fbo.color = image_to_flip;
m_flip_fbo.read_buffer(m_flip_fbo.color);
m_flip_fbo.draw_buffer(m_flip_fbo.color);
m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear);
@ -195,7 +188,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info)
const bool limited_range = !g_cfg.video.full_rgb_range_output;
gl::screen.bind();
m_video_output_pass.run(areau(aspect_ratio), image, gamma, limited_range);
m_video_output_pass.run(areau(aspect_ratio), image_to_flip, gamma, limited_range);
}
}

View File

@ -265,51 +265,40 @@ vk::image* VKGSRender::get_present_source(vk::present_surface_info* info, const
{
vk::image* image_to_flip = nullptr;
if (auto render_target_texture = m_rtts.get_color_surface_at(info->address))
// Check the surface store first
const auto format_bpp = get_format_block_size_in_bytes(info->format);
const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer,
info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::read);
if (!overlap_info.empty())
{
if (render_target_texture->last_use_tag == m_rtts.write_tag)
{
image_to_flip = render_target_texture;
}
else
{
const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, info->address, info->width, info->height, info->pitch, render_target_texture->get_bpp(), rsx::surface_access::read);
if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture)
{
// Confirmed to be the newest data source in that range
image_to_flip = render_target_texture;
}
}
const auto& section = overlap_info.back();
auto surface = vk::as_rtt(section.surface);
if (image_to_flip)
if (section.base_address >= info->address)
{
const auto buffer_width = rsx::apply_resolution_scale(info->width, true);
const auto buffer_height = rsx::apply_resolution_scale(info->height, true);
// Check for intentional 'borders'
const u32 inset_offset = section.base_address - info->address;
const u32 inset_y = inset_offset / info->pitch;
const u32 inset_x = (inset_offset % info->pitch) / format_bpp;
if (buffer_width > render_target_texture->width() ||
buffer_height > render_target_texture->height())
{
// TODO: Should emit only once to avoid flooding the log file
// TODO: Take AA scaling into account
LOG_WARNING(RSX, "Selected output image does not satisfy the video configuration. Display buffer resolution=%dx%d, avconf resolution=%dx%d, surface=%dx%d",
info->width, info->height,
avconfig->state * avconfig->resolution_x, avconfig->state * avconfig->resolution_y,
render_target_texture->get_surface_width(rsx::surface_metrics::pixels), render_target_texture->get_surface_height(rsx::surface_metrics::pixels));
const u32 full_width = surface->get_surface_width(rsx::surface_metrics::samples) + inset_x + inset_x;
const u32 full_height = surface->get_surface_height(rsx::surface_metrics::samples) + inset_y + inset_y;
info->width = render_target_texture->width();
info->height = render_target_texture->height();
}
else
if (full_width == info->width && full_height == info->height)
{
info->width = buffer_width;
info->height = buffer_height;
surface->read_barrier(*m_current_command_buffer);
image_to_flip = section.surface->get_surface(rsx::surface_access::read);
info->width = rsx::apply_resolution_scale(full_width - (inset_x + inset_x), true);
info->height = rsx::apply_resolution_scale(full_height - (inset_y + inset_y), true);
}
}
}
else if (auto surface = m_texture_cache.find_texture_from_dimensions<true>(info->address, info->format, info->width, info->height))
{
//Hack - this should be the first location to check for output
//The render might have been done offscreen or in software and a blit used to display
// Hack - this should be the first location to check for output
// The render might have been done offscreen or in software and a blit used to display
image_to_flip = surface->get_raw_texture();
}
@ -426,32 +415,23 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
buffer_pitch = buffer_width * 4;
}
coordi aspect_ratio;
sizei csize = m_swapchain_dims;
sizei new_size = csize;
if (!g_cfg.video.stretch_to_display_area)
// Scan memory for required data. This is done early to optimize waiting for the driver image acquire below.
vk::image* image_to_flip = nullptr;
if (info.buffer < display_buffers_count && buffer_width && buffer_height)
{
const double aq = 1. * buffer_width / buffer_height;
const double rq = 1. * new_size.width / new_size.height;
const double q = aq / rq;
vk::present_surface_info present_info;
present_info.width = buffer_width;
present_info.height = buffer_height;
present_info.pitch = buffer_pitch;
present_info.format = av_format;
present_info.address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL);
if (q > 1.0)
{
new_size.height = static_cast<int>(new_size.height / q);
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else if (q < 1.0)
{
new_size.width = static_cast<int>(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
image_to_flip = get_present_source(&present_info, avconfig);
buffer_width = present_info.width;
buffer_height = present_info.height;
}
aspect_ratio.size = new_size;
//Prepare surface for new frame. Set no timeout here so that we wait for the next image if need be
// Prepare surface for new frame. Set no timeout here so that we wait for the next image if need be
verify(HERE), m_current_frame->present_image == UINT32_MAX;
verify(HERE), m_current_frame->swap_command_buffer == nullptr;
@ -463,14 +443,14 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
case VK_TIMEOUT:
case VK_NOT_READY:
{
//In some cases, after a fullscreen switch, the driver only allows N-1 images to be acquirable, where N = number of available swap images.
//This means that any acquired images have to be released
//before acquireNextImage can return successfully. This is despite the driver reporting 2 swap chain images available
//This makes fullscreen performance slower than windowed performance as throughput is lowered due to losing one presentable image
//Found on AMD Crimson 17.7.2
// In some cases, after a fullscreen switch, the driver only allows N-1 images to be acquirable, where N = number of available swap images.
// This means that any acquired images have to be released
// before acquireNextImage can return successfully. This is despite the driver reporting 2 swap chain images available
// This makes fullscreen performance slower than windowed performance as throughput is lowered due to losing one presentable image
// Found on AMD Crimson 17.7.2
//Whatever returned from status, this is now a spin
// Whatever returned from status, this is now a spin
timeout = 0ull;
check_present_status();
continue;
@ -488,26 +468,35 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
}
}
//Confirm that the driver did not silently fail
// Confirm that the driver did not silently fail
verify(HERE), m_current_frame->present_image != UINT32_MAX;
//Blit contents to screen..
vk::image* image_to_flip = nullptr;
// Calculate output dimensions. Done after swapchain acquisition in case it was recreated.
coordi aspect_ratio;
sizei csize = m_swapchain_dims;
sizei new_size = csize;
if (info.buffer < display_buffers_count && buffer_width && buffer_height)
{
vk::present_surface_info present_info;
present_info.width = buffer_width;
present_info.height = buffer_height;
present_info.pitch = buffer_pitch;
present_info.format = av_format;
present_info.address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL);
if (!g_cfg.video.stretch_to_display_area)
{
const double aq = 1. * buffer_width / buffer_height;
const double rq = 1. * new_size.width / new_size.height;
const double q = aq / rq;
image_to_flip = get_present_source(&present_info, avconfig);
buffer_width = present_info.width;
buffer_height = present_info.height;
}
if (q > 1.0)
{
new_size.height = static_cast<int>(new_size.height / q);
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else if (q < 1.0)
{
new_size.width = static_cast<int>(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
}
aspect_ratio.size = new_size;
// Blit contents to screen..
VkImage target_image = m_swapchain->get_image(m_current_frame->present_image);
const auto present_layout = m_swapchain->get_optimal_present_layout();
@ -558,11 +547,48 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
direct_fbo->release();
}
if (m_frame->screenshot_toggle == true)
{
m_frame->screenshot_toggle = false;
const size_t sshot_size = buffer_height * buffer_width * 4;
vk::buffer sshot_vkbuf(*m_device, align(sshot_size, 0x100000), m_device->get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
VkBufferImageCopy copy_info;
copy_info.bufferOffset = 0;
copy_info.bufferRowLength = 0;
copy_info.bufferImageHeight = 0;
copy_info.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
copy_info.imageSubresource.baseArrayLayer = 0;
copy_info.imageSubresource.layerCount = 1;
copy_info.imageSubresource.mipLevel = 0;
copy_info.imageOffset.x = 0;
copy_info.imageOffset.y = 0;
copy_info.imageOffset.z = 0;
copy_info.imageExtent.width = buffer_width;
copy_info.imageExtent.height = buffer_height;
copy_info.imageExtent.depth = 1;
image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
vk::copy_image_to_buffer(*m_current_command_buffer, image_to_flip, &sshot_vkbuf, copy_info);
image_to_flip->pop_layout(*m_current_command_buffer);
flush_command_queue(true);
auto src = sshot_vkbuf.map(0, sshot_size);
std::vector<u8> sshot_frame(sshot_size);
memcpy(sshot_frame.data(), src, sshot_size);
sshot_vkbuf.unmap();
m_frame->take_screenshot(std::move(sshot_frame), buffer_width, buffer_height);
}
}
else
{
//No draw call was issued!
//TODO: Upload raw bytes from cpu for rendering
// No draw call was issued!
// TODO: Upload raw bytes from cpu for rendering
VkClearColorValue clear_black {};
vk::change_image_layout(*m_current_command_buffer, target_image, present_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_black, 1, &subresource_range);
@ -570,43 +596,6 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
}
if (m_frame->screenshot_toggle == true)
{
m_frame->screenshot_toggle = false;
const size_t sshot_size = buffer_height * buffer_width * 4;
vk::buffer sshot_vkbuf(*m_device, align(sshot_size, 0x100000), m_device->get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
VkBufferImageCopy copy_info;
copy_info.bufferOffset = 0;
copy_info.bufferRowLength = 0;
copy_info.bufferImageHeight = 0;
copy_info.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
copy_info.imageSubresource.baseArrayLayer = 0;
copy_info.imageSubresource.layerCount = 1;
copy_info.imageSubresource.mipLevel = 0;
copy_info.imageOffset.x = 0;
copy_info.imageOffset.y = 0;
copy_info.imageOffset.z = 0;
copy_info.imageExtent.width = buffer_width;
copy_info.imageExtent.height = buffer_height;
copy_info.imageExtent.depth = 1;
image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
vk::copy_image_to_buffer(*m_current_command_buffer, image_to_flip, &sshot_vkbuf, copy_info);
image_to_flip->pop_layout(*m_current_command_buffer);
flush_command_queue(true);
auto src = sshot_vkbuf.map(0, sshot_size);
std::vector<u8> sshot_frame(sshot_size);
memcpy(sshot_frame.data(), src, sshot_size);
sshot_vkbuf.unmap();
m_frame->take_screenshot(std::move(sshot_frame), buffer_width, buffer_height);
}
const bool has_overlay = (m_overlay_manager && m_overlay_manager->has_visible());
if (g_cfg.video.overlay || has_overlay)
{