gl/vk: Bug fixes and improvements (#2206)

* gl: Only bind attrib textures on thread startup

* gl: Persistent mapped buffers

* gl: Fix emulated primitives in an inlined array

* gl: Do not re-update program information every draw call

* gl/vk: s1 type is signed normalized not unsigned normalized

* gl/rsx: Allow disabling of persistent buffers for debugging

gl: Large heap size is more practical

gl: Fix a bug with legacy opengl buffers

* gl/rsx: Allow emulation of unsupported attribute formats

* gl: Fix typos and remove dprints

gl: cleanup debug prints

* ui: Move the GL legacy buffer toggle to the left pane

* vk/gl: Fix cmp type, its range is [-1,1] not [0,1] SNORM_INT
This commit is contained in:
kd-11 2016-10-18 10:57:28 +03:00 committed by raven02
parent 8454949eea
commit 2c803dbe66
11 changed files with 378 additions and 143 deletions

View File

@ -2,12 +2,14 @@
#include "Utilities/Config.h"
#include "Emu/Memory/Memory.h"
#include "GLGSRender.h"
#include "GLVertexProgram.h"
#include "../rsx_methods.h"
#include "../Common/BufferUtils.h"
#include "../rsx_utils.h"
extern cfg::bool_entry g_cfg_rsx_debug_output;
extern cfg::bool_entry g_cfg_rsx_overlay;
extern cfg::bool_entry g_cfg_rsx_gl_legacy_buffers;
#define DUMP_VERTEX_DATA 0
@ -380,8 +382,18 @@ void GLGSRender::end()
return;
}
if (manually_flush_ring_buffers)
{
//Use approximations to reseve space. This path is mostly for debug purposes anyway
u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
u32 approx_working_buffer_size = approx_vertex_count * 256;
//Allocate 256K heap if we have no approximation at this time (inlined array)
m_attrib_ring_buffer->reserve_storage_on_heap(std::max(approx_working_buffer_size, 256 * 1024U));
m_index_ring_buffer->reserve_storage_on_heap(16 * 1024);
}
draw_fbo.bind();
m_program->use();
//Check if depth buffer is bound and valid
//If ds is not initialized clear it; it seems new depth textures should have depth cleared
@ -452,6 +464,12 @@ void GLGSRender::end()
m_program->validate();
}
if (manually_flush_ring_buffers)
{
m_attrib_ring_buffer->unmap();
m_index_ring_buffer->unmap();
}
if (indexed_draw_info)
{
if (__glcheck enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
@ -507,17 +525,39 @@ void GLGSRender::on_init_thread()
glGetIntegerv(GL_TEXTURE_BUFFER_OFFSET_ALIGNMENT, &m_min_texbuffer_alignment);
m_vao.create();
for (gl::texture &tex : m_gl_attrib_buffers)
const u32 texture_index_offset =
rsx::limits::fragment_textures_count + rsx::limits::vertex_textures_count;
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
auto &tex = m_gl_attrib_buffers[index];
tex.create();
tex.set_target(gl::texture::target::textureBuffer);
glActiveTexture(GL_TEXTURE0 + texture_index_offset + index);
tex.bind();
}
m_attrib_ring_buffer.create(gl::buffer::target::texture, 16 * 0x100000);
m_uniform_ring_buffer.create(gl::buffer::target::uniform, 16 * 0x100000);
m_index_ring_buffer.create(gl::buffer::target::element_array, 0x100000);
if (g_cfg_rsx_gl_legacy_buffers)
{
LOG_WARNING(RSX, "Using legacy openGL buffers.");
manually_flush_ring_buffers = true;
m_vao.element_array_buffer = m_index_ring_buffer;
m_attrib_ring_buffer.reset(new gl::legacy_ring_buffer());
m_uniform_ring_buffer.reset(new gl::legacy_ring_buffer());
m_index_ring_buffer.reset(new gl::legacy_ring_buffer());
}
else
{
m_attrib_ring_buffer.reset(new gl::ring_buffer());
m_uniform_ring_buffer.reset(new gl::ring_buffer());
m_index_ring_buffer.reset(new gl::ring_buffer());
}
m_attrib_ring_buffer->create(gl::buffer::target::texture, 256 * 0x100000);
m_uniform_ring_buffer->create(gl::buffer::target::uniform, 64 * 0x100000);
m_index_ring_buffer->create(gl::buffer::target::element_array, 16 * 0x100000);
m_vao.element_array_buffer = *m_index_ring_buffer;
m_gl_texture_cache.initialize_rtt_cache();
m_text_printer.init();
}
@ -553,9 +593,9 @@ void GLGSRender::on_exit()
tex.remove();
}
m_attrib_ring_buffer.remove();
m_uniform_ring_buffer.remove();
m_index_ring_buffer.remove();
m_attrib_ring_buffer->remove();
m_uniform_ring_buffer->remove();
m_index_ring_buffer->remove();
m_text_printer.close();
@ -656,6 +696,18 @@ bool GLGSRender::load_program()
RSXVertexProgram vertex_program = get_current_vertex_program();
RSXFragmentProgram fragment_program = get_current_fragment_program();
for (auto &vtx : vertex_program.rsx_vertex_inputs)
{
auto &array_info = rsx::method_registers.vertex_arrays_info[vtx.location];
if (array_info.type() == rsx::vertex_base_type::s1 ||
array_info.type() == rsx::vertex_base_type::cmp)
{
//Some vendors do not support GL_x_SNORM buffer textures
verify(HERE), vtx.flags == 0;
vtx.flags |= GL_VP_FORCE_ATTRIB_SCALING | GL_VP_ATTRIB_S16_INT;
}
}
for (int i = 0; i < 16; ++i)
{
auto &tex = rsx::method_registers.fragment_textures[i];
@ -677,13 +729,55 @@ bool GLGSRender::load_program()
}
}
auto old_program = m_program;
m_program = &m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, nullptr);
m_program->use();
if (old_program == m_program && !m_transform_constants_dirty)
{
//This path is taken alot so the savings are tangible
struct scale_offset_layout
{
u16 clip_w, clip_h;
float scale_x, offset_x, scale_y, offset_y, scale_z, offset_z;
float fog0, fog1;
u32 alpha_tested;
float alpha_ref;
}
tmp = {};
tmp.clip_w = rsx::method_registers.surface_clip_width();
tmp.clip_h = rsx::method_registers.surface_clip_height();
tmp.scale_x = rsx::method_registers.viewport_scale_x();
tmp.offset_x = rsx::method_registers.viewport_offset_x();
tmp.scale_y = rsx::method_registers.viewport_scale_y();
tmp.offset_y = rsx::method_registers.viewport_offset_y();
tmp.scale_z = rsx::method_registers.viewport_scale_z();
tmp.offset_z = rsx::method_registers.viewport_offset_z();
tmp.fog0 = rsx::method_registers.fog_params_0();
tmp.fog1 = rsx::method_registers.fog_params_1();
tmp.alpha_tested = rsx::method_registers.alpha_test_enabled();
tmp.alpha_ref = rsx::method_registers.alpha_ref();
size_t old_hash = m_transform_buffer_hash;
m_transform_buffer_hash = 0;
u8 *data = reinterpret_cast<u8*>(&tmp);
for (int i = 0; i < sizeof(tmp); ++i)
m_transform_buffer_hash ^= std::hash<char>()(data[i]);
if (old_hash == m_transform_buffer_hash)
return true;
}
m_transform_constants_dirty = false;
u32 fragment_constants_size = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program);
fragment_constants_size = std::max(32U, fragment_constants_size);
u32 max_buffer_sz = 512 + 8192 + align(fragment_constants_size, m_uniform_buffer_offset_align);
m_uniform_ring_buffer.reserve_and_map(max_buffer_sz);
if (manually_flush_ring_buffers)
m_uniform_ring_buffer->reserve_storage_on_heap(align(max_buffer_sz, 512));
u8 *buf;
u32 scale_offset_offset;
@ -691,7 +785,7 @@ bool GLGSRender::load_program()
u32 fragment_constants_offset;
// Scale offset
auto mapping = m_uniform_ring_buffer.alloc_from_reserve(512);
auto mapping = m_uniform_ring_buffer->alloc_from_heap(512, m_uniform_buffer_offset_align);
buf = static_cast<u8*>(mapping.first);
scale_offset_offset = mapping.second;
fill_scale_offset_data(buf, false);
@ -707,7 +801,7 @@ bool GLGSRender::load_program()
memcpy(buf + 19 * sizeof(float), &alpha_ref, sizeof(float));
// Vertex constants
mapping = m_uniform_ring_buffer.alloc_from_reserve(8192);
mapping = m_uniform_ring_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align);
buf = static_cast<u8*>(mapping.first);
vertex_constants_offset = mapping.second;
fill_vertex_program_constants_data(buf);
@ -715,21 +809,22 @@ bool GLGSRender::load_program()
// Fragment constants
if (fragment_constants_size)
{
mapping = m_uniform_ring_buffer.alloc_from_reserve(fragment_constants_size);
mapping = m_uniform_ring_buffer->alloc_from_heap(fragment_constants_size, m_uniform_buffer_offset_align);
buf = static_cast<u8*>(mapping.first);
fragment_constants_offset = mapping.second;
m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), gsl::narrow<int>(fragment_constants_size) }, fragment_program);
}
m_uniform_ring_buffer.unmap();
m_uniform_ring_buffer.bind_range(0, scale_offset_offset, 512);
m_uniform_ring_buffer.bind_range(1, vertex_constants_offset, 8192);
m_uniform_ring_buffer->bind_range(0, scale_offset_offset, 512);
m_uniform_ring_buffer->bind_range(1, vertex_constants_offset, 8192);
if (fragment_constants_size)
{
m_uniform_ring_buffer.bind_range(2, fragment_constants_offset, fragment_constants_size);
m_uniform_ring_buffer->bind_range(2, fragment_constants_offset, fragment_constants_size);
}
if (manually_flush_ring_buffers)
m_uniform_ring_buffer->unmap();
return true;
}

View File

@ -26,19 +26,25 @@ private:
gl::gl_texture_cache m_gl_texture_cache;
gl::texture m_gl_attrib_buffers[rsx::limits::vertex_count];
gl::ring_buffer m_attrib_ring_buffer;
gl::ring_buffer m_uniform_ring_buffer;
gl::ring_buffer m_index_ring_buffer;
std::unique_ptr<gl::ring_buffer> m_attrib_ring_buffer;
std::unique_ptr<gl::ring_buffer> m_uniform_ring_buffer;
std::unique_ptr<gl::ring_buffer> m_index_ring_buffer;
u32 m_draw_calls = 0;
u32 m_begin_time = 0;
u32 m_draw_time = 0;
u32 m_vertex_upload_time = 0;
u32 m_textures_upload_time = 0;
//Compare to see if transform matrix have changed
size_t m_transform_buffer_hash = 0;
GLint m_min_texbuffer_alignment = 256;
GLint m_uniform_buffer_offset_align = 256;
bool manually_flush_ring_buffers = false;
gl::text_writer m_text_printer;
public:

View File

@ -6,6 +6,7 @@
#include <vector>
#include <memory>
#include <unordered_map>
#include <algorithm>
#include "OpenGL.h"
#include "../GCM.h"
@ -383,7 +384,7 @@ namespace gl
read_write = GL_READ_WRITE
};
private:
protected:
GLuint m_id = GL_NONE;
GLsizeiptr m_size = 0;
target m_target = target::array;
@ -587,89 +588,210 @@ namespace gl
class ring_buffer : public buffer
{
u32 m_data_loc = 0;
protected:
u32 m_mapped_block_size = 0;
u32 m_mapped_block_offset;
u32 m_mapped_reserve_offset;
u32 m_mapped_bytes_available;
void *m_mapped_base = nullptr;
u32 m_data_loc = 0;
u32 m_limit = 0;
void *m_memory_mapping = nullptr;
GLsync m_fence = nullptr;
void wait_for_sync()
{
verify(HERE), m_fence != nullptr;
bool done = false;
while (!done)
{
//Check if we are finished, wait time = 1us
GLenum err = glClientWaitSync(m_fence, GL_SYNC_FLUSH_COMMANDS_BIT, 1000);
switch (err)
{
default:
LOG_ERROR(RSX, "err Returned 0x%X", err);
case GL_ALREADY_SIGNALED:
case GL_CONDITION_SATISFIED:
done = true;
break;
case GL_TIMEOUT_EXPIRED:
continue;
}
}
glDeleteSync(m_fence);
m_fence = nullptr;
}
public:
std::pair<void*, u32> alloc_and_map(u32 alloc_size)
virtual void recreate(GLsizeiptr size, const void* data = nullptr)
{
alloc_size = align(alloc_size, 0x100);
buffer::bind();
u32 limit = m_data_loc + alloc_size;
if (limit > buffer::size())
if (m_id)
{
if (alloc_size > buffer::size())
{
buffer::data(alloc_size);
}
m_data_loc = 0;
wait_for_sync();
remove();
}
buffer::create();
void *ptr = glMapBufferRange((GLenum)buffer::current_target(), m_data_loc, alloc_size,
GL_MAP_WRITE_BIT|GL_MAP_INVALIDATE_RANGE_BIT|GL_MAP_UNSYNCHRONIZED_BIT);
glBindBuffer((GLenum)m_target, m_id);
glBufferStorage((GLenum)m_target, size, data, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
m_memory_mapping = glMapBufferRange((GLenum)m_target, 0, size, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
verify(HERE), m_memory_mapping != nullptr;
m_data_loc = 0;
m_limit = size;
}
void create(target target_, GLsizeiptr size, const void* data_ = nullptr)
{
m_target = target_;
recreate(size, data_);
}
virtual std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment)
{
u32 offset = m_data_loc;
m_data_loc += alloc_size;
return std::make_pair(ptr, offset);
}
if (m_data_loc) offset = align(offset, alignment);
void unmap()
{
buffer::unmap();
m_mapped_block_size = 0;
m_mapped_base = 0;
}
void reserve_and_map(u32 max_size)
{
max_size = align(max_size, 0x1000);
auto mapping = alloc_and_map(max_size);
m_mapped_base = mapping.first;
m_mapped_block_offset = mapping.second;
m_mapped_reserve_offset = 0;
m_mapped_bytes_available = max_size;
}
std::pair<void*, u32> alloc_from_reserve(u32 size, u32 alignment = 16)
{
size = align(size, alignment);
if (m_mapped_bytes_available < size || !m_mapped_base)
if ((offset + alloc_size) > m_limit)
{
if (m_mapped_base)
{
//This doesn't really work for some reason, probably since the caller should bind the target
//before making this call as the block may be reallocated
LOG_ERROR(RSX, "reserved allocation exceeded. check for corruption!");
unmap();
}
reserve_and_map((size > 4096) ? size : 4096);
//TODO: Measure the stall here
wait_for_sync();
m_data_loc = 0;
offset = 0;
}
verify(HERE), m_mapped_bytes_available >= size;
if (!m_data_loc)
{
verify(HERE), m_fence == nullptr;
m_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
void *ptr = (char*)m_mapped_base + m_mapped_reserve_offset;
u32 offset = m_mapped_reserve_offset + m_mapped_block_offset;
m_mapped_reserve_offset += size;
m_mapped_bytes_available -= size;
verify(HERE), (offset & (alignment - 1)) == 0;
return std::make_pair(ptr, offset);
//Align data loc to 256; allows some "guard" region so we dont trample our own data inadvertently
m_data_loc = align(offset + alloc_size, 256);
return std::make_pair(((char*)m_memory_mapping) + offset, offset);
}
virtual void remove()
{
if (m_memory_mapping)
{
glBindBuffer((GLenum)m_target, m_id);
glUnmapBuffer((GLenum)m_target);
m_memory_mapping = nullptr;
m_data_loc = 0;
m_limit = 0;
}
glDeleteBuffers(1, &m_id);
m_id = 0;
}
virtual void reserve_storage_on_heap(u32 alloc_size) {}
virtual void unmap() {}
void bind_range(u32 index, u32 offset, u32 size) const
{
glBindBufferRange((GLenum)current_target(), index, id(), offset, size);
}
};
class legacy_ring_buffer : public ring_buffer
{
u32 m_mapped_bytes = 0;
u32 m_mapping_offset = 0;
public:
void recreate(GLsizeiptr size, const void* data = nullptr) override
{
if (m_id)
remove();
buffer::create();
buffer::data(size, data);
m_memory_mapping = nullptr;
m_data_loc = 0;
m_limit = size;
}
void create(target target_, GLsizeiptr size, const void* data_ = nullptr)
{
m_target = target_;
recreate(size, data_);
}
void reserve_storage_on_heap(u32 alloc_size) override
{
verify (HERE), m_memory_mapping == nullptr;
u32 offset = m_data_loc;
if (m_data_loc) offset = align(offset, 256);
if ((offset + alloc_size) > m_limit)
{
buffer::data(m_limit, nullptr);
m_data_loc = 0;
}
glBindBuffer((GLenum)m_target, m_id);
m_memory_mapping = glMapBufferRange((GLenum)m_target, m_data_loc, align(alloc_size, 256), GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
m_mapped_bytes = align(alloc_size, 256);
m_mapping_offset = m_data_loc;
verify(HERE), m_mapped_bytes >= alloc_size;
}
std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment) override
{
u32 offset = m_data_loc;
if (m_data_loc) offset = align(offset, alignment);
u32 padding = (offset - m_data_loc);
u32 real_size = padding + alloc_size;
if (real_size > m_mapped_bytes)
{
//Missed allocation. We take a performance hit on doing this.
//Overallocate slightly for the next allocation if requested size is too small
unmap();
reserve_storage_on_heap(std::max(real_size, 4096U));
offset = m_data_loc;
if (m_data_loc) offset = align(offset, alignment);
padding = (offset - m_data_loc);
real_size = padding + alloc_size;
}
m_data_loc = offset + alloc_size;
m_mapped_bytes -= real_size;
u32 local_offset = (offset - m_mapping_offset);
return std::make_pair(((char*)m_memory_mapping) + local_offset, offset);
}
void remove() override
{
ring_buffer::remove();
m_mapped_bytes = 0;
}
void unmap() override
{
buffer::bind();
buffer::unmap();
m_memory_mapping = nullptr;
m_mapped_bytes = 0;
m_mapping_offset = 0;
}
};
class vao
{
template<buffer::target BindId, uint GetStateId>

View File

@ -176,6 +176,14 @@ OPENGL_PROC(PFNGLTEXTUREBUFFERRANGEEXTPROC, TextureBufferRangeEXT);
//ARB_Copy_Image
OPENGL_PROC(PFNGLCOPYIMAGESUBDATAPROC, CopyImageSubData);
//ARB_Buffer_Storage
OPENGL_PROC(PFNGLBUFFERSTORAGEPROC, BufferStorage);
//ARB_sync
OPENGL_PROC(PFNGLFENCESYNCPROC, FenceSync);
OPENGL_PROC(PFNGLCLIENTWAITSYNCPROC, ClientWaitSync);
OPENGL_PROC(PFNGLDELETESYNCPROC, DeleteSync);
//KHR_debug
OPENGL_PROC(PFNGLDEBUGMESSAGECALLBACKPROC, DebugMessageCallback);

View File

@ -22,13 +22,16 @@ namespace
u32 to_gl_internal_type(rsx::vertex_base_type type, u8 size)
{
/**
* The buffer texture spec only allows fetches aligned to 8, 16, 32, etc...
* NOTE 1. The buffer texture spec only allows fetches aligned to 8, 16, 32, etc...
* This rules out most 3-component formats, except for the 32-wide RGB32F, RGB32I, RGB32UI
*
* NOTE 2. While s1 & cmp types are signed normalized 16-bit integers, some GPU vendors dont support texture buffer access
* using these formats. Pass a 16 bit unnormalized integer and convert it in the vertex shader
*/
const u32 vec1_types[] = { GL_R16, GL_R32F, GL_R16F, GL_R8, GL_R16I, GL_R16, GL_R8UI };
const u32 vec2_types[] = { GL_RG16, GL_RG32F, GL_RG16F, GL_RG8, GL_RG16I, GL_RG16, GL_RG8UI };
const u32 vec3_types[] = { GL_RGBA16, GL_RGB32F, GL_RGBA16F, GL_RGBA8, GL_RGBA16I, GL_RGBA16, GL_RGBA8UI }; //VEC3 COMPONENTS NOT SUPPORTED!
const u32 vec4_types[] = { GL_RGBA16, GL_RGBA32F, GL_RGBA16F, GL_RGBA8, GL_RGBA16I, GL_RGBA16, GL_RGBA8UI };
const u32 vec1_types[] = { GL_R16I, GL_R32F, GL_R16F, GL_R8, GL_R16I, GL_RGBA16I, GL_R8UI };
const u32 vec2_types[] = { GL_RG16I, GL_RG32F, GL_RG16F, GL_RG8, GL_RG16I, GL_RGBA16I, GL_RG8UI };
const u32 vec3_types[] = { GL_RGBA16I, GL_RGB32F, GL_RGBA16F, GL_RGBA8, GL_RGBA16I, GL_RGBA16I, GL_RGBA8UI }; //VEC3 COMPONENTS NOT SUPPORTED!
const u32 vec4_types[] = { GL_RGBA16I, GL_RGBA32F, GL_RGBA16F, GL_RGBA8, GL_RGBA16I, GL_RGBA16I, GL_RGBA8UI };
const u32* vec_selectors[] = { 0, vec1_types, vec2_types, vec3_types, vec4_types };
@ -152,7 +155,7 @@ namespace
}
u32 first = 0;
auto mapping = dst.alloc_and_map(vertex_draw_count * sizeof(u16));
auto mapping = dst.alloc_from_heap(vertex_draw_count * sizeof(u16), 256);
char *mapped_buffer = (char *)mapping.first;
for (const auto &pair : first_count_commands)
@ -163,7 +166,6 @@ namespace
first += pair.second;
}
dst.unmap();
return std::make_tuple(vertex_draw_count, mapping.second);
}
@ -201,12 +203,10 @@ namespace
struct vertex_buffer_visitor
{
vertex_buffer_visitor(u32 vtx_cnt, u32 texture_idx_offset, gl::ring_buffer& heap,
gl::glsl::program* prog, gl::texture* attrib_buffer, u32 min_texbuffer_offset)
vertex_buffer_visitor(u32 vtx_cnt, gl::ring_buffer& heap, gl::glsl::program* prog, gl::texture* attrib_buffer, u32 min_texbuffer_offset)
: vertex_count(vtx_cnt)
, m_attrib_ring_info(heap)
, m_program(prog)
, texture_index_offset(texture_idx_offset)
, m_gl_attrib_buffers(attrib_buffer)
, m_min_texbuffer_alignment(min_texbuffer_offset)
{
@ -226,7 +226,7 @@ namespace
auto& texture = m_gl_attrib_buffers[vertex_array.index];
u32 buffer_offset = 0;
auto mapping = m_attrib_ring_info.alloc_from_reserve(data_size, m_min_texbuffer_alignment);
auto mapping = m_attrib_ring_info.alloc_from_heap(data_size, m_min_texbuffer_alignment);
gsl::byte* dst = static_cast<gsl::byte*>(mapping.first);
buffer_offset = mapping.second;
gsl::span<gsl::byte> dest_span(dst, data_size);
@ -236,10 +236,6 @@ namespace
write_vertex_array_data_to_buffer(dest_span, vertex_array.data, vertex_count, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size));
texture.copy_from(m_attrib_ring_info, gl_type, buffer_offset, data_size);
//Link texture to uniform
glActiveTexture(GL_TEXTURE0 + texture_index_offset + vertex_array.index);
texture.bind();
}
void operator()(const rsx::vertex_array_register& vertex_register)
@ -257,15 +253,11 @@ namespace
auto& texture = m_gl_attrib_buffers[vertex_register.index];
auto mapping = m_attrib_ring_info.alloc_from_reserve(data_size, m_min_texbuffer_alignment);
auto mapping = m_attrib_ring_info.alloc_from_heap(data_size, m_min_texbuffer_alignment);
u8 *dst = static_cast<u8*>(mapping.first);
memcpy(dst, vertex_register.data.data(), element_size);
texture.copy_from(m_attrib_ring_info, gl_type, mapping.second, data_size);
//Link texture to uniform
glActiveTexture(GL_TEXTURE0 + texture_index_offset + vertex_register.index);
texture.bind();
break;
}
default:
@ -282,7 +274,6 @@ namespace
u32 vertex_count;
gl::ring_buffer& m_attrib_ring_info;
gl::glsl::program* m_program;
u32 texture_index_offset;
gl::texture* m_gl_attrib_buffers;
GLint m_min_texbuffer_alignment;
};
@ -325,13 +316,13 @@ namespace
rsx::method_registers.current_draw_clause.first_count_commands,
rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer);
upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size, texture_index_offset);
upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size);
return std::make_tuple(index_count,
std::make_tuple(static_cast<GLenum>(GL_UNSIGNED_SHORT), offset_in_index_buffer));
}
upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size, texture_index_offset);
upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size);
return std::make_tuple(vertex_count, std::optional<std::tuple<GLenum, u32>>());
}
@ -351,7 +342,7 @@ namespace
index_count = (u32)get_index_count(rsx::method_registers.current_draw_clause.primitive, vertex_count);
u32 max_size = index_count * type_size;
auto mapping = m_index_ring_buffer.alloc_and_map(max_size);
auto mapping = m_index_ring_buffer.alloc_from_heap(max_size, 256);
void* ptr = mapping.first;
u32 offset_in_index_buffer = mapping.second;
@ -359,9 +350,7 @@ namespace
command.raw_index_buffer, ptr, type, rsx::method_registers.current_draw_clause.primitive,
rsx::method_registers.current_draw_clause.first_count_commands, vertex_count);
m_index_ring_buffer.unmap();
upload_vertex_buffers(0, max_index, max_vertex_attrib_size, texture_index_offset);
upload_vertex_buffers(0, max_index, max_vertex_attrib_size);
return std::make_tuple(index_count, std::make_tuple(get_index_type(type), offset_in_index_buffer));
}
@ -370,13 +359,13 @@ namespace
const rsx::draw_inlined_array& command)
{
// We need to go through array to determine vertex count so upload it
u32 vertex_count = upload_inline_array(max_vertex_attrib_size, texture_index_offset);
u32 vertex_count = upload_inline_array(max_vertex_attrib_size);
if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) {
u32 offset_in_index_buffer;
u32 index_count;
std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(
rsx::method_registers.current_draw_clause.first_count_commands,
{ std::make_pair(0, vertex_count) },
rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer);
return std::make_tuple(index_count,
std::make_tuple(static_cast<GLenum>(GL_UNSIGNED_SHORT), offset_in_index_buffer));
@ -385,8 +374,6 @@ namespace
}
private:
const u32 texture_index_offset =
rsx::limits::fragment_textures_count + rsx::limits::vertex_textures_count;
u32 max_vertex_attrib_size = 0;
gl::ring_buffer& m_index_ring_buffer;
gl::ring_buffer& m_attrib_ring_buffer;
@ -397,21 +384,18 @@ namespace
std::function<attribute_storage(rsx::rsx_state, std::vector<std::pair<u32, u32>>)>
get_vertex_buffers;
void upload_vertex_buffers(u32 min_index, u32 max_index, const u32& max_vertex_attrib_size,
const u32& texture_index_offset)
void upload_vertex_buffers(u32 min_index, u32 max_index, const u32& max_vertex_attrib_size)
{
u32 verts_allocated = max_index - min_index + 1;
__glcheck m_attrib_ring_buffer.reserve_and_map(verts_allocated * max_vertex_attrib_size);
vertex_buffer_visitor visitor(verts_allocated, texture_index_offset, m_attrib_ring_buffer,
vertex_buffer_visitor visitor(verts_allocated, m_attrib_ring_buffer,
m_program, m_gl_attrib_buffers, m_min_texbuffer_alignment);
const auto& vertex_buffers =
get_vertex_buffers(rsx::method_registers, {{min_index, verts_allocated}});
for (const auto& vbo : vertex_buffers) std::apply_visitor(visitor, vbo);
m_attrib_ring_buffer.unmap();
}
u32 upload_inline_array(const u32& max_vertex_attrib_size, const u32& texture_index_offset)
u32 upload_inline_array(const u32& max_vertex_attrib_size)
{
u32 stride = 0;
u32 offsets[rsx::limits::vertex_count] = {0};
@ -427,7 +411,6 @@ namespace
u32 vertex_draw_count =
(u32)(rsx::method_registers.current_draw_clause.inline_vertex_array.size() * sizeof(u32)) /
stride;
m_attrib_ring_buffer.reserve_and_map(vertex_draw_count * max_vertex_attrib_size);
for (int index = 0; index < rsx::limits::vertex_count; ++index) {
auto& vertex_info = rsx::method_registers.vertex_arrays_info[index];
@ -435,7 +418,7 @@ namespace
int location;
if (!m_program->uniforms.has_location(s_reg_table[index], &location)) continue;
if (!vertex_info.size()) // disabled, bind a null sampler
if (!vertex_info.size())
continue;
const u32 element_size =
@ -447,7 +430,7 @@ namespace
u8* src =
reinterpret_cast<u8*>(rsx::method_registers.current_draw_clause.inline_vertex_array.data());
auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment);
auto mapping = m_attrib_ring_buffer.alloc_from_heap(data_size, m_min_texbuffer_alignment);
u8* dst = static_cast<u8*>(mapping.first);
src += offsets[index];
@ -469,11 +452,6 @@ namespace
}
texture.copy_from(m_attrib_ring_buffer, gl_type, mapping.second, data_size);
// Link texture to uniform
glActiveTexture(GL_TEXTURE0 + texture_index_offset + index);
texture.bind();
m_attrib_ring_buffer.unmap();
}
return vertex_draw_count;
}
@ -483,7 +461,7 @@ namespace
std::tuple<u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::set_vertex_buffer()
{
std::chrono::time_point<std::chrono::system_clock> then = std::chrono::system_clock::now();
auto result = std::apply_visitor(draw_command_visitor(m_index_ring_buffer, m_attrib_ring_buffer,
auto result = std::apply_visitor(draw_command_visitor(*m_index_ring_buffer, *m_attrib_ring_buffer,
m_gl_attrib_buffers, m_program, m_min_texbuffer_alignment,
[this](const auto& state, const auto& list) {
return this->get_vertex_buffers(state, list);

View File

@ -122,7 +122,7 @@ void GLVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v
{
if (attrib.location == std::get<0>(item))
{
if (attrib.int_type) is_int = true;
if (attrib.int_type || attrib.flags & GL_VP_SINT_MASK) is_int = true;
break;
}
}
@ -247,9 +247,18 @@ void add_input(std::stringstream & OS, const ParamItem &PI, const std::vector<rs
if (real_input.int_type)
vecType = " ivec4 ";
std::string scale = "";
if (real_input.flags & GL_VP_SINT_MASK)
{
if (real_input.flags & GL_VP_ATTRIB_S16_INT)
scale = " / 32767.";
else
scale = " / 2147483647.";
}
if (!real_input.is_array)
{
OS << vecType << PI.name << " = texelFetch(" << PI.name << "_buffer, 0);" << std::endl;
OS << vecType << PI.name << " = texelFetch(" << PI.name << "_buffer, 0)" << scale << ";" << std::endl;
return;
}
@ -257,19 +266,21 @@ void add_input(std::stringstream & OS, const ParamItem &PI, const std::vector<rs
{
if (real_input.is_modulo)
{
OS << vecType << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexID %" << real_input.frequency << ");" << std::endl;
OS << vecType << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexID %" << real_input.frequency << ")" << scale << ";" << std::endl;
return;
}
OS << vecType << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexID /" << real_input.frequency << ");" << std::endl;
OS << vecType << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexID /" << real_input.frequency << ")" << scale << ";" << std::endl;
return;
}
OS << vecType << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexID).rgba;" << std::endl;
OS << vecType << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexID)" << scale << ";" << std::endl;
return;
}
OS << " vec4 " << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexID).rgba;" << std::endl;
LOG_WARNING(RSX, "Vertex input %s does not have a matching vertex_input declaration", PI.name.c_str());
OS << " vec4 " << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexID);" << std::endl;
}
void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS)

View File

@ -4,6 +4,15 @@
#include "Utilities/Thread.h"
#include "OpenGL.h"
enum
{
GL_VP_FORCE_ATTRIB_SCALING = 1, //Scale vertex read result
GL_VP_ATTRIB_S16_INT = (1 << 1), //Attrib is a signed 16-bit integer
GL_VP_ATTRIB_S32_INT = (1 << 2), //Attrib is a signed 32-bit integer
GL_VP_SINT_MASK = (GL_VP_ATTRIB_S16_INT|GL_VP_ATTRIB_S32_INT)
};
struct GLVertexDecompilerThread : public VertexProgramDecompiler
{
std::string &m_shader;

View File

@ -27,6 +27,7 @@ cfg::bool_entry g_cfg_rsx_log_programs(cfg::root.video, "Log shader programs");
cfg::bool_entry g_cfg_rsx_vsync(cfg::root.video, "VSync");
cfg::bool_entry g_cfg_rsx_debug_output(cfg::root.video, "Debug output");
cfg::bool_entry g_cfg_rsx_overlay(cfg::root.video, "Debug overlay");
cfg::bool_entry g_cfg_rsx_gl_legacy_buffers(cfg::root.video, "Use Legacy OpenGL Buffers (Debug)");
bool user_asked_for_frame_capture = false;
rsx::frame_capture_data frame_debug;
@ -799,7 +800,7 @@ namespace rsx
rsx::method_registers.vertex_arrays_info[index].frequency(),
!!((modulo_mask >> index) & 0x1),
true,
is_int_type(rsx::method_registers.vertex_arrays_info[index].type())});
is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0});
}
else if (rsx::method_registers.register_vertex_info[index].size > 0)
{
@ -809,7 +810,7 @@ namespace rsx
rsx::method_registers.register_vertex_info[index].frequency,
!!((modulo_mask >> index) & 0x1),
false,
is_int_type(rsx::method_registers.vertex_arrays_info[index].type())});
is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0});
}
}
return result;

View File

@ -211,10 +211,12 @@ struct rsx_vertex_input
bool is_modulo; // either modulo frequency or divide frequency
bool is_array; // false if "reg value"
bool int_type;
u32 flags; //Initially zero, to be optionally filled by the backend
bool operator==(const rsx_vertex_input other) const
{
return location == other.location && size == other.size && frequency == other.frequency && is_modulo == other.is_modulo && is_array == other.is_array && int_type == other.int_type;
return location == other.location && size == other.size && frequency == other.frequency && is_modulo == other.is_modulo &&
is_array == other.is_array && int_type == other.int_type && flags == other.flags;
}
};

View File

@ -41,10 +41,10 @@ namespace vk
* Set up buffer fetches to only work on 4-component access. This is hardware dependant so we use 4-component access to avoid branching based on IHV implementation
* AMD GCN 1.0 for example does not support RGB32 formats for texel buffers
*/
const VkFormat vec1_types[] = { VK_FORMAT_R16_UNORM, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R16_SFLOAT, VK_FORMAT_R8_UNORM, VK_FORMAT_R16_SINT, VK_FORMAT_R16_UNORM, VK_FORMAT_R8_UINT };
const VkFormat vec2_types[] = { VK_FORMAT_R16G16_UNORM, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R16G16_SFLOAT, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16_UNORM, VK_FORMAT_R8G8_UINT };
const VkFormat vec3_types[] = { VK_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R8G8B8A8_UINT }; //VEC3 COMPONENTS NOT SUPPORTED!
const VkFormat vec4_types[] = { VK_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R8G8B8A8_UINT };
const VkFormat vec1_types[] = { VK_FORMAT_R16_SNORM, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R16_SFLOAT, VK_FORMAT_R8_UNORM, VK_FORMAT_R16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8_UINT };
const VkFormat vec2_types[] = { VK_FORMAT_R16G16_SNORM, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R16G16_SFLOAT, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8G8_UINT };
const VkFormat vec3_types[] = { VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8G8B8A8_UINT }; //VEC3 COMPONENTS NOT SUPPORTED!
const VkFormat vec4_types[] = { VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8G8B8A8_UINT };
const VkFormat* vec_selectors[] = { 0, vec1_types, vec2_types, vec3_types, vec4_types };

View File

@ -307,6 +307,7 @@ SettingsDialog::SettingsDialog(wxWindow* parent)
wxCheckBox* chbox_gs_vsync = new wxCheckBox(p_graphics, wxID_ANY, "VSync");
wxCheckBox* chbox_gs_debug_output = new wxCheckBox(p_graphics, wxID_ANY, "Debug Output");
wxCheckBox* chbox_gs_overlay = new wxCheckBox(p_graphics, wxID_ANY, "Debug overlay");
wxCheckBox* chbox_gs_gl_legacy_buffers = new wxCheckBox(p_graphics, wxID_ANY, "Use Legacy OpenGL Buffers");
wxCheckBox* chbox_audio_dump = new wxCheckBox(p_audio, wxID_ANY, "Dump to file");
wxCheckBox* chbox_audio_conv = new wxCheckBox(p_audio, wxID_ANY, "Convert to 16 bit");
wxCheckBox* chbox_hle_exitonstop = new wxCheckBox(p_misc, wxID_ANY, "Exit RPCS3 when process finishes");
@ -382,6 +383,7 @@ SettingsDialog::SettingsDialog(wxWindow* parent)
pads.emplace_back(std::make_unique<checkbox_pad>(cfg_location{ "Video", "VSync" }, chbox_gs_vsync));
pads.emplace_back(std::make_unique<checkbox_pad>(cfg_location{ "Video", "Debug output" }, chbox_gs_debug_output));
pads.emplace_back(std::make_unique<checkbox_pad>(cfg_location{ "Video", "Debug overlay" }, chbox_gs_overlay));
pads.emplace_back(std::make_unique<checkbox_pad>(cfg_location{ "Video", "Use Legacy OpenGL Buffers (Debug)" }, chbox_gs_gl_legacy_buffers));
pads.emplace_back(std::make_unique<combobox_pad>(cfg_location{ "Audio", "Renderer" }, cbox_audio_out));
pads.emplace_back(std::make_unique<checkbox_pad>(cfg_location{ "Audio", "Dump to file" }, chbox_audio_dump));
@ -467,6 +469,7 @@ SettingsDialog::SettingsDialog(wxWindow* parent)
s_subpanel_graphics1->Add(chbox_gs_read_color, wxSizerFlags().Border(wxALL, 5).Expand());
s_subpanel_graphics1->Add(chbox_gs_dump_depth, wxSizerFlags().Border(wxALL, 5).Expand());
s_subpanel_graphics1->Add(chbox_gs_read_depth, wxSizerFlags().Border(wxALL, 5).Expand());
s_subpanel_graphics1->Add(chbox_gs_gl_legacy_buffers, wxSizerFlags().Border(wxALL, 5).Expand());
s_subpanel_graphics2->Add(s_round_gs_aspect, wxSizerFlags().Border(wxALL, 5).Expand());
s_subpanel_graphics2->Add(s_round_gs_frame_limit, wxSizerFlags().Border(wxALL, 5).Expand());
s_subpanel_graphics2->AddSpacer(68);