Add vertex texture buffers for VS input

Support vertex instancing in vertex shader using VertexID

Relax OpenGL requirements by removing 4.5 features

Use EXT version of TexBufferRange; Implement buffer copy using TexBuffer

Apply travis workaround by danilaml

Fix vertex upload in in case of inlined array
This commit is contained in:
kd-11 2016-01-28 20:01:10 +03:00
parent 1e1c2007a3
commit 7b889a10cc
8 changed files with 211 additions and 29 deletions

View File

@ -33,8 +33,10 @@ git:
before_install:
# shutdown services on Travis, which may have a memory impact
- if [ "$TRAVIS_OS_NAME" = "linux" ]; then
echo "yes" | sudo apt-add-repository 'deb http://repos.codelite.org/wx3.0/ubuntu/ precise universe';
sudo apt-get install libwxgtk3.0-dev;
sudo apt-add-repository -y ppa:libreoffice/ppa;
sudo apt-get update;
sudo apt-get install libglew-dev;
fi;
- if [ "$TRAVIS_OS_NAME" = "linux" ] && [ "$CXX" = "g++" ]; then
export CXX="g++-5" CC="gcc-5" CXXFLAGS="-Wno-format-security";
@ -73,7 +75,7 @@ addons:
- cmake
- libopenal-dev
- freeglut3-dev
- libglew-dev
# - libglew-dev apt version is too old
- libc6-dev
- llvm-3.6
- llvm-3.6-dev

View File

@ -62,7 +62,7 @@ void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_
*c_dst++ = *c_src++;
}
if (vertex_array_desc.size * sizeof(u16) < element_size)
*c_dst++ = 0x3800;
*c_dst++ = 0x3c00;
break;
}

View File

@ -30,6 +30,25 @@ namespace
}
throw EXCEPTION("Unknow depth format");
}
u32 to_gl_internal_type(rsx::vertex_base_type type, u8 size)
{
/**
* The buffer texture spec only allows fetches aligned to 8, 16, 32, etc...
* This rules out most 3-component formats, except for the 32-wide RGB32F, RGB32I, RGB32UI
*/
const u32 vec1_types[] = { GL_R16, GL_R32F, GL_R16F, GL_R8, GL_R32I, GL_R16F, GL_R8 };
const u32 vec2_types[] = { GL_RG16, GL_RG32F, GL_RG16F, GL_RG8, GL_RG32I, GL_RG16F, GL_RG8 };
const u32 vec3_types[] = { GL_RGBA16, GL_RGB32F, GL_RGBA16F, GL_RGBA8, GL_RGB32I, GL_RGBA16F, GL_RGBA8 }; //VEC3 COMPONENTS NOT SUPPORTED!
const u32 vec4_types[] = { GL_RGBA16, GL_RGBA32F, GL_RGBA16F, GL_RGBA8, GL_RGBA32I, GL_RGBA16F, GL_RGBA8 };
const u32* vec_selectors[] = { 0, vec1_types, vec2_types, vec3_types, vec4_types };
if (type > rsx::vertex_base_type::ub256)
throw EXCEPTION("OpenGL error: unknown vertex base type 0x%X.", (u32)type);
return vec_selectors[size][(int)type];
}
}
GLGSRender::GLGSRender() : GSRender(frame_type::OpenGL)
@ -397,11 +416,11 @@ void GLGSRender::end()
};
u32 input_mask = rsx::method_registers[NV4097_SET_VERTEX_ATTRIB_INPUT_MASK];
m_vao.bind();
std::vector<u8> vertex_index_array;
vertex_draw_count = 0;
u32 min_index, max_index;
if (draw_command == rsx::draw_command::indexed)
{
rsx::index_array_type type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4);
@ -426,6 +445,7 @@ void GLGSRender::end()
if (draw_command == rsx::draw_command::inlined_array)
{
vertex_arrays_data.resize(inline_vertex_array.size() * sizeof(u32));
write_inline_array_to_buffer(vertex_arrays_data.data());
u32 offset = 0;
for (int index = 0; index < rsx::limits::vertex_count; ++index)
@ -436,12 +456,25 @@ void GLGSRender::end()
continue;
int location;
if (!m_program->attribs.has_location(reg_table[index], &location))
if (!m_program->uniforms.has_location(reg_table[index] + "_buffer", &location))
continue;
__glcheck m_program->attribs[location] =
(m_vao + offset)
.config(gl_types(vertex_info.type), vertex_info.size, gl_normalized(vertex_info.type));
const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size);
const u32 gl_type = to_gl_internal_type(vertex_info.type, vertex_info.size);
const u32 data_size = element_size * vertex_draw_count;
auto &buffer = m_gl_attrib_buffers[index].buffer;
auto &texture = m_gl_attrib_buffers[index].texture;
buffer->data(data_size, nullptr);
buffer->sub_data(0, data_size, vertex_arrays_data.data()+offset);
//Attach buffer to texture
texture->copy_from(*buffer, gl_type);
//Link texture to uniform
m_program->uniforms.texture(location, index +rsx::limits::vertex_count, *texture);
offset += rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size);
}
}
@ -463,7 +496,7 @@ void GLGSRender::end()
continue;
int location;
if (!m_program->attribs.has_location(reg_table[index], &location))
if (!m_program->uniforms.has_location(reg_table[index]+"_buffer", &location))
continue;
if (vertex_arrays_info[index].size > 0)
@ -495,29 +528,48 @@ void GLGSRender::end()
vertex_arrays_offsets[index] = gsl::narrow<u32>(position);
vertex_arrays_data.resize(position + size);
memcpy(vertex_arrays_data.data() + position, vertex_array.data(), size);
const u32 gl_type = to_gl_internal_type(vertex_info.type, vertex_info.size);
const u32 data_size = element_size * vertex_draw_count;
__glcheck m_program->attribs[location] =
(m_vao + vertex_arrays_offsets[index])
.config(gl_types(vertex_info.type), vertex_info.size, gl_normalized(vertex_info.type));
auto &buffer = m_gl_attrib_buffers[index].buffer;
auto &texture = m_gl_attrib_buffers[index].texture;
buffer->data(data_size, nullptr);
buffer->sub_data(0, data_size, vertex_array.data());
//Attach buffer to texture
texture->copy_from(*buffer, gl_type);
//Link texture to uniform
m_program->uniforms.texture(location, index + rsx::limits::vertex_count, *texture);
}
else if (register_vertex_info[index].size > 0)
{
//Untested!
auto &vertex_data = register_vertex_data[index];
auto &vertex_info = register_vertex_info[index];
switch (vertex_info.type)
{
case rsx::vertex_base_type::f:
switch (register_vertex_info[index].size)
{
case 1: apply_attrib_array<f32, 1>(*m_program, location, vertex_data); break;
case 2: apply_attrib_array<f32, 2>(*m_program, location, vertex_data); break;
case 3: apply_attrib_array<f32, 3>(*m_program, location, vertex_data); break;
case 4: apply_attrib_array<f32, 4>(*m_program, location, vertex_data); break;
}
break;
const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size);
const u32 gl_type = to_gl_internal_type(vertex_info.type, vertex_info.size);
const u32 data_size = vertex_data.size();
auto &buffer = m_gl_attrib_buffers[index].buffer;
auto &texture = m_gl_attrib_buffers[index].texture;
buffer->data(data_size, nullptr);
buffer->sub_data(0, data_size, vertex_data.data());
//Attach buffer to texture
texture->copy_from(*buffer, gl_type);
//Link texture to uniform
m_program->uniforms.texture(location, index + rsx::limits::vertex_count, *texture);
break;
}
default:
LOG_ERROR(RSX, "bad non array vertex data format (type = %d, size = %d)", vertex_info.type, vertex_info.size);
break;
@ -525,7 +577,11 @@ void GLGSRender::end()
}
}
}
m_vbo.data(vertex_arrays_data.size(), vertex_arrays_data.data());
// glDraw* will fail without at least attrib0 defined if we are on compatibility profile
// Someone should really test AMD behaviour here, Nvidia is too permissive. There is no buffer currently bound, but on NV it works ok
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 2, GL_FLOAT, false, 0, 0);
if (draw_command == rsx::draw_command::indexed)
{
@ -609,6 +665,18 @@ void GLGSRender::on_init_thread()
m_vao.array_buffer = m_vbo;
m_vao.element_array_buffer = m_ebo;
for (texture_buffer_pair &attrib_buffer : m_gl_attrib_buffers)
{
gl::texture *&tex = attrib_buffer.texture;
tex = new gl::texture(gl::texture::target::textureBuffer);
tex->create();
tex->set_target(gl::texture::target::textureBuffer);
gl::buffer *&buf = attrib_buffer.buffer;
buf = new gl::buffer();
buf->create();
}
}
void GLGSRender::on_exit()
@ -649,6 +717,19 @@ void GLGSRender::on_exit()
if (m_fragment_constants_buffer)
m_fragment_constants_buffer.remove();
for (texture_buffer_pair &attrib_buffer : m_gl_attrib_buffers)
{
gl::texture *&tex = attrib_buffer.texture;
tex->remove();
delete tex;
tex = nullptr;
gl::buffer *&buf = attrib_buffer.buffer;
buf->remove();
delete buf;
buf = nullptr;
}
}
void nv4097_clear_surface(u32 arg, GLGSRender* renderer)

View File

@ -22,6 +22,13 @@ private:
rsx::surface_info m_surface;
struct texture_buffer_pair
{
gl::texture *texture;
gl::buffer *buffer;
}
m_gl_attrib_buffers[rsx::limits::vertex_count];
public:
gl::fbo draw_fbo;

View File

@ -166,6 +166,10 @@ OPENGL_PROC(PFNGLCHECKFRAMEBUFFERSTATUSPROC, CheckFramebufferStatus);
OPENGL_PROC(PFNGLBINDBUFFERBASEPROC, BindBufferBase);
//Texture Buffers
OPENGL_PROC(PFNGLTEXBUFFERPROC, TexBuffer);
OPENGL_PROC(PFNGLTEXTUREBUFFERRANGEEXTPROC, TextureBufferRangeEXT);
//KHR_debug
OPENGL_PROC(PFNGLDEBUGMESSAGECONTROLARBPROC, DebugMessageControlARB);
OPENGL_PROC(PFNGLDEBUGMESSAGEINSERTARBPROC, DebugMessageInsertARB);

View File

@ -27,7 +27,7 @@ std::string GLVertexDecompilerThread::compareFunction(COMPARE f, const std::stri
void GLVertexDecompilerThread::insertHeader(std::stringstream &OS)
{
OS << "#version 420" << std::endl << std::endl;
OS << "#version 430" << std::endl << std::endl;
OS << "layout(std140, binding = 0) uniform ScaleOffsetBuffer" << std::endl;
OS << "{" << std::endl;
OS << " mat4 scaleOffsetMat;" << std::endl;
@ -36,10 +36,33 @@ void GLVertexDecompilerThread::insertHeader(std::stringstream &OS)
void GLVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::vector<ParamType>& inputs)
{
for (const ParamType PT : inputs)
std::vector<std::tuple<size_t, std::string>> input_data;
for (const ParamType &PT : inputs)
{
for (const ParamItem &PI : PT.items)
OS << /*"layout(location = " << PI.location << ") "*/ "in " << PT.type << " " << PI.name << ";" << std::endl;
{
input_data.push_back(std::make_tuple(PI.location, PI.name));
}
}
/**
* Its is important that the locations are in the order that vertex attributes are expected.
* If order is not adhered to, channels may be swapped leading to corruption
*/
std::sort(input_data.begin(), input_data.end());
int location = 1;
for (const std::tuple<size_t, std::string> item : input_data)
{
for (const ParamType &PT : inputs)
{
for (const ParamItem &PI : PT.items)
{
if (PI.name == std::get<1>(item))
OS << "layout(location=" << location++ << ")" << " uniform samplerBuffer" << " " << PI.name << "_buffer;" << std::endl;
}
}
}
}
@ -101,6 +124,37 @@ void GLVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::
}
}
void add_input(std::stringstream & OS, const ParamItem &PI, const std::vector<rsx_vertex_input> &inputs)
{
for (const auto &real_input : inputs)
{
if (real_input.location != PI.location)
continue;
if (!real_input.is_array)
{
OS << " vec4 " << PI.name << " = texelFetch(" << PI.name << "_buffer, 0);" << std::endl;
return;
}
if (real_input.frequency > 1)
{
if (real_input.is_modulo)
{
OS << " vec4 " << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexID %" << real_input.frequency << ");" << std::endl;
return;
}
OS << " vec4 " << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexID /" << real_input.frequency << ");" << std::endl;
return;
}
OS << " vec4 " << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexID);" << std::endl;
return;
}
OS << " vec4 " << PI.name << " = vec4(0., 0., 0., 1.);" << std::endl;
}
void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS)
{
@ -118,6 +172,12 @@ void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS)
OS << ";" << std::endl;
}
}
for (const ParamType &PT : m_parr.params[PF_PARAM_IN])
{
for (const ParamItem &PI : PT.items)
add_input(OS, PI, rsx_vertex_program.rsx_vertex_inputs);
}
}
void GLVertexDecompilerThread::insertMainEnd(std::stringstream & OS)

View File

@ -19,10 +19,13 @@ protected:
virtual void insertOutputs(std::stringstream &OS, const std::vector<ParamType> &outputs) override;
virtual void insertMainStart(std::stringstream &OS) override;
virtual void insertMainEnd(std::stringstream &OS) override;
const RSXVertexProgram &rsx_vertex_program;
public:
GLVertexDecompilerThread(const RSXVertexProgram &prog, std::string& shader, ParamArray& parr)
: VertexProgramDecompiler(prog)
, m_shader(shader)
, rsx_vertex_program(prog)
{
}

View File

@ -830,7 +830,8 @@ namespace gl
{
texture1D = GL_TEXTURE_1D,
texture2D = GL_TEXTURE_2D,
texture3D = GL_TEXTURE_3D
texture3D = GL_TEXTURE_3D,
textureBuffer = GL_TEXTURE_BUFFER
};
enum class channel_type
@ -863,9 +864,10 @@ namespace gl
GLenum pname;
switch (new_binding.get_target())
{
case target::texture1D: pname = GL_TEXTURE_1D_BINDING_EXT; break;
case target::texture2D: pname = GL_TEXTURE_2D_BINDING_EXT; break;
case target::texture3D: pname = GL_TEXTURE_3D_BINDING_EXT; break;
case target::texture1D: pname = GL_TEXTURE_BINDING_1D; break;
case target::texture2D: pname = GL_TEXTURE_BINDING_2D; break;
case target::texture3D: pname = GL_TEXTURE_BINDING_3D; break;
case target::textureBuffer: pname = GL_TEXTURE_BINDING_BUFFER; break;
}
glGetIntegerv(pname, &m_last_binding);
@ -1131,6 +1133,29 @@ namespace gl
__glcheck glTexSubImage2D((GLenum)get_target(), level(), 0, 0, width(), height(), (GLenum)format, (GLenum)type, src);
}
void copy_from(buffer &buf, u32 gl_format_type, u32 offset, u32 length)
{
if (get_target() != target::textureBuffer)
throw EXCEPTION("OpenGL error: texture cannot copy from buffer");
if (!offset)
{
copy_from(buf, gl_format_type);
return;
}
if (glTextureBufferRangeEXT == nullptr)
throw EXCEPTION("OpenGL error: partial buffer access for textures is unsupported on your system");
__glcheck glTextureBufferRangeEXT(id(), (GLenum)target::textureBuffer, gl_format_type, buf.id(), offset, length);
}
void copy_from(buffer &buf, u32 gl_format_type)
{
save_binding_state save(*this);
__glcheck glTexBuffer((GLenum)target::textureBuffer, gl_format_type, buf.id());
}
void copy_from(const buffer& buf, texture::format format, texture::type type, class pixel_unpack_settings pixel_settings)
{
buffer::save_binding_state save_buffer(buffer::target::pixel_unpack, buf);