OpenGL renderer: use correct MVP matrix. Cleanup

Simplified gl::ring_buffer helper
This commit is contained in:
DHrpcs3 2016-06-21 00:38:38 +03:00
parent c0487a634e
commit 3b5cd4845e
9 changed files with 299 additions and 124 deletions

View File

@ -453,6 +453,10 @@ void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w)
_mm_stream_si128((__m128i*)dst, vector);
}
void stream_vector(void *dst, f32 x, f32 y, f32 z, f32 w)
{
stream_vector(dst, (u32&)x, (u32&)y, (u32&)z, (u32&)w);
}
void stream_vector_from_memory(void *dst, void *src)
{
const __m128i &vector = _mm_loadu_si128((__m128i*)src);

View File

@ -49,6 +49,7 @@ void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst,
/**
* Stream a 128 bits vector to dst.
*/
void stream_vector(void *dst, f32 x, f32 y, f32 z, f32 w);
void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w);
/**

View File

@ -1,12 +1,11 @@
#include "stdafx.h"
#include "Utilities/Config.h"
#include "Emu/Memory/Memory.h"
#include "Emu/System.h"
#include "GLGSRender.h"
#include "rsx_gl_cache.h"
#include "../rsx_utils.h"
#include "../rsx_methods.h"
#include "../Common/BufferUtils.h"
#include "../rsx_utils.h"
extern cfg::bool_entry g_cfg_rsx_debug_output;
extern cfg::bool_entry g_cfg_rsx_overlay;
@ -153,7 +152,8 @@ void GLGSRender::begin()
__glcheck glStencilOp(rsx::method_registers[NV4097_SET_STENCIL_OP_FAIL], rsx::method_registers[NV4097_SET_STENCIL_OP_ZFAIL],
rsx::method_registers[NV4097_SET_STENCIL_OP_ZPASS]);
if (rsx::method_registers[NV4097_SET_TWO_SIDED_STENCIL_TEST_ENABLE]) {
if (rsx::method_registers[NV4097_SET_TWO_SIDED_STENCIL_TEST_ENABLE])
{
__glcheck glStencilMaskSeparate(GL_BACK, rsx::method_registers[NV4097_SET_BACK_STENCIL_MASK]);
__glcheck glStencilFuncSeparate(GL_BACK, rsx::method_registers[NV4097_SET_BACK_STENCIL_FUNC],
rsx::method_registers[NV4097_SET_BACK_STENCIL_FUNC_REF], rsx::method_registers[NV4097_SET_BACK_STENCIL_FUNC_MASK]);
@ -230,8 +230,6 @@ void GLGSRender::begin()
__glcheck glCullFace(rsx::method_registers[NV4097_SET_CULL_FACE]);
}
glDisable(GL_CULL_FACE);
__glcheck glFrontFace(rsx::method_registers[NV4097_SET_FRONT_FACE] ^ 1);
__glcheck enable(rsx::method_registers[NV4097_SET_POLY_SMOOTH_ENABLE], GL_POLYGON_SMOOTH);
@ -248,7 +246,7 @@ void GLGSRender::begin()
}
std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
m_begin_time += std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
m_draw_calls++;
}
@ -291,8 +289,8 @@ void GLGSRender::end()
int location;
if (m_program->uniforms.has_location("texture" + std::to_string(i), &location))
{
glProgramUniform1i(m_program->id(), location, texture_index);
m_gl_textures[i].init(texture_index, textures[i]);
__glcheck glProgramUniform1i(m_program->id(), location, texture_index);
__glcheck m_gl_textures[i].init(texture_index, textures[i]);
texture_index++;
@ -322,26 +320,38 @@ void GLGSRender::end()
*/
}
__glcheck 0;
u32 offset_in_index_buffer = set_vertex_buffer();
m_vao.bind();
std::chrono::time_point<std::chrono::system_clock> then = std::chrono::system_clock::now();
if (g_cfg_rsx_debug_output)
{
m_program->validate();
}
if (draw_command == rsx::draw_command::indexed)
{
rsx::index_array_type indexed_type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4);
if (indexed_type == rsx::index_array_type::u32)
__glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_INT, (GLvoid *)(offset_in_index_buffer));
if (indexed_type == rsx::index_array_type::u16)
__glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_SHORT, (GLvoid *)(offset_in_index_buffer));
{
__glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_INT, (GLvoid *)(std::ptrdiff_t)offset_in_index_buffer);
}
else if (indexed_type == rsx::index_array_type::u16)
{
__glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_SHORT, (GLvoid *)(std::ptrdiff_t)offset_in_index_buffer);
}
else
{
throw std::logic_error("bad index array type");
}
}
else if (!is_primitive_native(draw_mode))
{
__glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_SHORT, (GLvoid *)(offset_in_index_buffer));
__glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_SHORT, (GLvoid *)(std::ptrdiff_t)offset_in_index_buffer);
}
else
{
@ -349,7 +359,7 @@ void GLGSRender::end()
}
std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
write_buffers();
@ -377,8 +387,7 @@ void GLGSRender::set_viewport()
rsx::window_origin shader_window_origin = rsx::to_window_origin((shader_window >> 12) & 0xf);
//TODO
if (true || shader_window_origin == rsx::window_origin::bottom)
if (shader_window_origin == rsx::window_origin::bottom)
{
__glcheck glViewport(viewport_x, viewport_y, viewport_w, viewport_h);
__glcheck glScissor(scissor_x, scissor_y, scissor_w, scissor_h);
@ -387,11 +396,13 @@ void GLGSRender::set_viewport()
{
u16 shader_window_height = shader_window & 0xfff;
__glcheck glViewport(viewport_x, shader_window_height - viewport_y - viewport_h - 1, viewport_w, viewport_h);
__glcheck glScissor(scissor_x, shader_window_height - scissor_y - scissor_h - 1, scissor_w, scissor_h);
__glcheck glViewport(viewport_x, shader_window_height - viewport_y - viewport_h + 1, viewport_w, viewport_h);
__glcheck glScissor(scissor_x, shader_window_height - scissor_y - scissor_h + 1, scissor_w, scissor_h);
}
glEnable(GL_SCISSOR_TEST);
__glcheck 0;
}
void GLGSRender::on_init_thread()
@ -406,6 +417,7 @@ void GLGSRender::on_init_thread()
LOG_NOTICE(RSX, "%s", (const char*)glGetString(GL_VENDOR));
glEnable(GL_VERTEX_PROGRAM_POINT_SIZE);
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &m_uniform_buffer_offset_align);
glGetIntegerv(GL_TEXTURE_BUFFER_OFFSET_ALIGNMENT, &m_min_texbuffer_alignment);
m_vao.create();
@ -415,11 +427,11 @@ void GLGSRender::on_init_thread()
tex.set_target(gl::texture::target::textureBuffer);
}
m_attrib_ring_buffer.reset(new gl::ring_buffer(16 * 0x100000, gl::buffer::target::texture));
m_uniform_ring_buffer.reset(new gl::ring_buffer(16 * 0x100000, gl::buffer::target::uniform));
m_index_ring_buffer.reset(new gl::ring_buffer(0x100000, gl::buffer::target::element_array));
m_attrib_ring_buffer.create(gl::buffer::target::texture, 16 * 0x100000);
m_uniform_ring_buffer.create(gl::buffer::target::uniform, 16 * 0x100000);
m_index_ring_buffer.create(gl::buffer::target::element_array, 0x100000);
m_vao.element_array_buffer = m_index_ring_buffer->get_buffer();
m_vao.element_array_buffer = m_index_ring_buffer;
m_gl_texture_cache.initialize_rtt_cache();
}
@ -446,15 +458,18 @@ void GLGSRender::on_exit()
tex.remove();
}
m_attrib_ring_buffer->destroy();
m_uniform_ring_buffer->destroy();
m_index_ring_buffer->destroy();
m_attrib_ring_buffer.remove();
m_uniform_ring_buffer.remove();
m_index_ring_buffer.remove();
}
void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
{
//LOG_NOTICE(Log::RSX, "nv4097_clear_surface(0x%x)", arg);
if (!rsx::method_registers[NV4097_SET_SURFACE_FORMAT]) return;
if (!rsx::method_registers[NV4097_SET_SURFACE_FORMAT])
{
return;
}
if ((arg & 0xf3) == 0)
{
@ -475,9 +490,10 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
GLbitfield mask = 0;
rsx::surface_depth_format surface_depth_format = rsx::to_surface_depth_format((rsx::method_registers[NV4097_SET_SURFACE_FORMAT] >> 5) & 0x7);
if (arg & 0x1)
{
rsx::surface_depth_format surface_depth_format = rsx::to_surface_depth_format((rsx::method_registers[NV4097_SET_SURFACE_FORMAT] >> 5) & 0x7);
u32 max_depth_value = get_max_depth_value(surface_depth_format);
u32 clear_depth = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] >> 8;
@ -487,7 +503,7 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
mask |= GLenum(gl::buffers::depth);
}
if (arg & 0x2)
if (surface_depth_format == rsx::surface_depth_format::z24s8 && arg & 0x2)
{
u8 clear_stencil = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] & 0xff;
@ -535,50 +551,111 @@ bool GLGSRender::do_method(u32 cmd, u32 arg)
return true;
}
//binding 0
struct alignas(4) glsl_matrix_buffer
{
float viewport_matrix[4][4];
float window_matrix[4][4];
float normalize_matrix[4][4];
};
//binding 1
struct alignas(4) glsl_vertex_constants_buffer
{
float vc[468][4];
};
//binding 2
struct alignas(4) glsl_fragment_constants_buffer
{
float fc[2048][4];
};
static void fill_matrix_buffer(glsl_matrix_buffer *buffer)
{
rsx::fill_viewport_matrix(buffer->viewport_matrix, true);
rsx::fill_window_matrix(buffer->window_matrix, true);
u32 viewport_horizontal = rsx::method_registers[NV4097_SET_VIEWPORT_HORIZONTAL];
u32 viewport_vertical = rsx::method_registers[NV4097_SET_VIEWPORT_VERTICAL];
f32 viewport_x = f32(viewport_horizontal & 0xffff);
f32 viewport_y = f32(viewport_vertical & 0xffff);
f32 viewport_w = f32(viewport_horizontal >> 16);
f32 viewport_h = f32(viewport_vertical >> 16);
u32 shader_window = rsx::method_registers[NV4097_SET_SHADER_WINDOW];
rsx::window_origin shader_window_origin = rsx::to_window_origin((shader_window >> 12) & 0xf);
u16 shader_window_height = shader_window & 0xfff;
f32 left = viewport_x;
f32 right = viewport_x + viewport_w;
f32 top = viewport_y;
f32 bottom = viewport_y + viewport_h;
//f32 far_ = (f32&)rsx::method_registers[NV4097_SET_CLIP_MAX];
//f32 near_ = (f32&)rsx::method_registers[NV4097_SET_CLIP_MIN];
if (shader_window_origin == rsx::window_origin::bottom)
{
top = shader_window_height - (viewport_y + viewport_h) + 1;
bottom = shader_window_height - viewport_y + 1;
}
f32 scale_x = 2.0f / (right - left);
f32 scale_y = 2.0f / (top - bottom);
f32 scale_z = 2.0f;
f32 offset_x = -(right + left) / (right - left);
f32 offset_y = -(top + bottom) / (top - bottom);
f32 offset_z = -1.0;
if (shader_window_origin == rsx::window_origin::top)
{
scale_y = -scale_y;
offset_y = -offset_y;
}
rsx::fill_scale_offset_matrix(buffer->normalize_matrix, true, offset_x, offset_y, offset_z, scale_x, scale_y, scale_z);
}
bool GLGSRender::load_program()
{
rsx::program_info info = programs_cache.get(get_raw_program(), rsx::decompile_language::glsl);
m_program = (gl::glsl::program*)info.program;
m_program->use();
// u32 fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program);
u32 fragment_constants_sz = info.fragment_shader.decompiled->constants.size() * sizeof(f32) * 4;
fragment_constants_sz = std::max(32U, fragment_constants_sz);
u32 max_buffer_sz = 8192 + 512 + fragment_constants_sz;
u32 fragment_constants_count = info.fragment_shader.decompiled->constants.size();
u32 fragment_constants_size = fragment_constants_count * sizeof(rsx::fragment_program::ucode_instr);
u32 is_alpha_tested = !!(rsx::method_registers[NV4097_SET_ALPHA_TEST_ENABLE]);
u8 alpha_ref_raw = (u8)(rsx::method_registers[NV4097_SET_ALPHA_REF] & 0xFF);
float alpha_ref = alpha_ref_raw / 255.f;
u32 max_buffer_sz =
align(sizeof(glsl_matrix_buffer), m_uniform_buffer_offset_align) +
align(sizeof(glsl_vertex_constants_buffer), m_uniform_buffer_offset_align) +
align(fragment_constants_size, m_uniform_buffer_offset_align);
m_uniform_ring_buffer.reserve_and_map(max_buffer_sz);
u8 *buf;
u32 scale_offset_offset;
u32 vertex_constants_offset;
u32 fragment_constants_offset;
m_uniform_ring_buffer->reserve_and_map(max_buffer_sz);
auto mapping = m_uniform_ring_buffer->alloc_from_reserve(512);
buf = static_cast<u8*>(mapping.first);
scale_offset_offset = mapping.second;
fill_scale_offset_data(buf, false);
memcpy(buf + 16 * sizeof(float), &rsx::method_registers[NV4097_SET_FOG_PARAMS], sizeof(float));
memcpy(buf + 17 * sizeof(float), &rsx::method_registers[NV4097_SET_FOG_PARAMS + 1], sizeof(float));
memcpy(buf + 18 * sizeof(float), &is_alpha_tested, sizeof(u32));
memcpy(buf + 19 * sizeof(float), &alpha_ref, sizeof(float));
mapping = m_uniform_ring_buffer->alloc_from_reserve(512 * 16);
buf = static_cast<u8*>(mapping.first);
vertex_constants_offset = mapping.second;
fill_vertex_program_constants_data(buf);
mapping = m_uniform_ring_buffer->alloc_from_reserve(fragment_constants_sz);
buf = static_cast<u8*>(mapping.first);
fragment_constants_offset = mapping.second;
// fill fragment constants
if (!info.fragment_shader.decompiled->constants.empty())
{
auto mapping = m_uniform_ring_buffer.alloc_from_reserve(sizeof(glsl_matrix_buffer), m_uniform_buffer_offset_align);
fill_matrix_buffer((glsl_matrix_buffer *)mapping.first);
scale_offset_offset = mapping.second;
}
{
auto mapping = m_uniform_ring_buffer.alloc_from_reserve(sizeof(glsl_vertex_constants_buffer), m_uniform_buffer_offset_align);
fill_vertex_program_constants_data(mapping.first);
vertex_constants_offset = mapping.second;
}
if (fragment_constants_size)
{
auto mapping = m_uniform_ring_buffer.alloc_from_reserve(fragment_constants_size, m_uniform_buffer_offset_align);
fragment_constants_offset = mapping.second;
u32 buffer_offset = 0;
static const __m128i mask = _mm_set_epi8(
@ -587,31 +664,63 @@ bool GLGSRender::load_program()
0x6, 0x7, 0x4, 0x5,
0x2, 0x3, 0x0, 0x1);
auto ucode = (const rsx::fragment_program::ucode_instr*)info.fragment_shader.decompiled->raw->ucode_ptr;
auto ucode = (const rsx::fragment_program::ucode_instr *)info.fragment_shader.decompiled->raw->ucode_ptr;
for (const auto& constant : info.fragment_shader.decompiled->constants)
{
const void *data = ucode + (u32)(constant.id / (sizeof(f32) * 4));
const void *data = ucode + u32(constant.id / sizeof(rsx::fragment_program::ucode_instr));
const __m128i &vector = _mm_loadu_si128((const __m128i*)data);
const __m128i &shuffled_vector = _mm_shuffle_epi8(vector, mask);
_mm_stream_si128((__m128i*)((char*)buf + buffer_offset), shuffled_vector);
_mm_stream_si128((__m128i*)((char*)mapping.first + buffer_offset), shuffled_vector);
//float x = ((float*)((char*)buf + buffer_offset))[0];
//float y = ((float*)((char*)buf + buffer_offset))[1];
//float z = ((float*)((char*)buf + buffer_offset))[2];
//float w = ((float*)((char*)buf + buffer_offset))[3];
//float x = ((float*)((char*)mapping.first + buffer_offset))[0];
//float y = ((float*)((char*)mapping.first + buffer_offset))[1];
//float z = ((float*)((char*)mapping.first + buffer_offset))[2];
//float w = ((float*)((char*)mapping.first + buffer_offset))[3];
//LOG_WARNING(RSX, "fc%u = {%g, %g, %g, %g}", constant.id, x, y, z, w);
buffer_offset += 4 * sizeof(f32);
}
}
m_uniform_ring_buffer->unmap();
m_uniform_ring_buffer.unmap();
glBindBufferRange(GL_UNIFORM_BUFFER, 0, m_uniform_ring_buffer->get_buffer().id(), scale_offset_offset, 512);
glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_uniform_ring_buffer->get_buffer().id(), vertex_constants_offset, 512 * 16);
glBindBufferRange(GL_UNIFORM_BUFFER, 2, m_uniform_ring_buffer->get_buffer().id(), fragment_constants_offset, fragment_constants_sz);
/*
{
m_uniform_ring_buffer.bind();
auto buffer_range = m_uniform_ring_buffer.allocate(
align(sizeof(glsl_matrix_buffer), m_uniform_buffer_offset_align) +
align(sizeof(glsl_vertex_constants_buffer), m_uniform_buffer_offset_align));
gl::allocator allocator{ m_uniform_ring_buffer, buffer_range };
matrix_buffer_range = allocator.allocate(sizeof(glsl_matrix_buffer), m_uniform_buffer_offset_align);
vertex_constants_buffer_range = allocator.allocate(sizeof(glsl_vertex_constants_buffer), m_uniform_buffer_offset_align);
glsl_matrix_buffer *buffer = allocator.get<glsl_matrix_buffer>(matrix_buffer_range);
fill_scale_offset_data(buffer, false);
fill_matrix_buffer(buffer);
fill_vertex_program_constants_data(allocator.get<glsl_vertex_constants_buffer>(vertex_constants_buffer_range));
if (contains_fragment_constants)
{
//fragment_constants_buffer_range = allocator.allocate(info.fragment_shader.decompiled->constants.size() * sizeof(f32) * 4);
}
}
if (contains_fragment_constants)
{
//m_uniform_ring_buffer.bind_range(2, fragment_constants_buffer_range);
}
*/
m_uniform_ring_buffer.bind_range(0, scale_offset_offset, sizeof(glsl_matrix_buffer));
m_uniform_ring_buffer.bind_range(1, vertex_constants_offset, sizeof(glsl_vertex_constants_buffer));
m_uniform_ring_buffer.bind_range(2, fragment_constants_offset, fragment_constants_size);
__glcheck 0;
return true;
}

View File

@ -26,9 +26,9 @@ private:
gl::gl_texture_cache m_gl_texture_cache;
gl::texture m_gl_attrib_buffers[rsx::limits::vertex_count];
std::unique_ptr<gl::ring_buffer> m_attrib_ring_buffer;
std::unique_ptr<gl::ring_buffer> m_uniform_ring_buffer;
std::unique_ptr<gl::ring_buffer> m_index_ring_buffer;
gl::ring_buffer m_attrib_ring_buffer;
gl::ring_buffer m_uniform_ring_buffer;
gl::ring_buffer m_index_ring_buffer;
u32 m_draw_calls = 0;
u32 m_begin_time = 0;
@ -36,6 +36,7 @@ private:
u32 m_vertex_upload_time = 0;
GLint m_min_texbuffer_alignment = 256;
GLint m_uniform_buffer_offset_align = 256;
public:
gl::fbo draw_fbo;

View File

@ -47,7 +47,7 @@ namespace gl
}
}
};
#define __glcheck gl::__glcheck_impl_t{ __FILE__, __FUNCTION__, __LINE__ },
#define __glcheck ::gl::__glcheck_impl_t{ __FILE__, __FUNCTION__, __LINE__ },
#else
#define __glcheck
#endif
@ -496,6 +496,11 @@ namespace gl
glBindBuffer((GLenum)target_, m_id);
}
void bind() const
{
bind(current_target());
}
target current_target() const
{
return m_target;
@ -580,12 +585,9 @@ namespace gl
}
};
class ring_buffer
class ring_buffer : public buffer
{
buffer storage_buffer;
buffer::target m_target;
u32 m_data_loc = 0;
u32 m_size;
u32 m_mapped_block_size = 0;
u32 m_mapped_block_offset;
@ -594,50 +596,39 @@ namespace gl
void *m_mapped_base = nullptr;
public:
ring_buffer(u32 initial_size, buffer::target target)
std::pair<void*, u32> alloc_and_map(u32 alloc_size)
{
storage_buffer.create();
storage_buffer.data(initial_size);
m_size = initial_size;
m_target = target;
}
alloc_size = align(alloc_size, 0x100);
void destroy()
{
storage_buffer.remove();
}
std::pair<void*, u32> alloc_and_map(u32 size)
{
size = (size + 255) & ~255;
glBindBuffer((GLenum)m_target, storage_buffer.id());
u32 limit = m_data_loc + size;
if (limit > m_size)
buffer::bind();
u32 limit = m_data_loc + alloc_size;
if (limit > buffer::size())
{
if (size > m_size)
m_size = size;
if (alloc_size > buffer::size())
{
buffer::data(alloc_size);
}
storage_buffer.data(m_size, nullptr);
m_data_loc = 0;
}
void *ptr = glMapBufferRange((GLenum)m_target, m_data_loc, size, GL_MAP_WRITE_BIT|GL_MAP_INVALIDATE_RANGE_BIT|GL_MAP_UNSYNCHRONIZED_BIT);
void *ptr = glMapBufferRange((GLenum)buffer::current_target(), m_data_loc, alloc_size,
GL_MAP_WRITE_BIT|GL_MAP_INVALIDATE_RANGE_BIT|GL_MAP_UNSYNCHRONIZED_BIT);
u32 offset = m_data_loc;
m_data_loc += size;
m_data_loc += alloc_size;
return std::make_pair(ptr, offset);
}
void unmap()
{
glUnmapBuffer((GLenum)m_target);
buffer::unmap();
m_mapped_block_size = 0;
m_mapped_base = 0;
}
void reserve_and_map(u32 max_size)
{
max_size = (max_size + 4095) & ~4095;
max_size = align(max_size, 0x1000);
auto mapping = alloc_and_map(max_size);
m_mapped_base = mapping.first;
m_mapped_block_offset = mapping.second;
@ -647,8 +638,7 @@ namespace gl
std::pair<void*, u32> alloc_from_reserve(u32 size, u32 alignment = 16)
{
alignment -= 1;
size = (size + alignment) & ~alignment;
size = align(size, alignment);
if (m_mapped_bytes_available < size || !m_mapped_base)
{
@ -670,13 +660,13 @@ namespace gl
m_mapped_reserve_offset += size;
m_mapped_bytes_available -= size;
EXPECTS((offset & alignment) == 0);
EXPECTS((offset & (alignment - 1)) == 0);
return std::make_pair(ptr, offset);
}
buffer& get_buffer()
void bind_range(u32 index, u32 offset, u32 size) const
{
return storage_buffer;
glBindBufferRange((GLenum)current_target(), index, id(), offset, size);
}
};

View File

@ -247,17 +247,16 @@ rsx::complete_shader glsl_complete_shader(const rsx::decompiled_shader &shader,
result.code += "out vec4 wpos;\n";
// TODO
if (1)
if (0)
{
finalize += "\tgl_Position = o0;\n";
finalize += "\tgl_Position = gl_Position * viewport_matrix;\n";
}
else
{
finalize +=
" wpos = window_matrix * viewport_matrix * vec4(o0.xyz, 1.0);\n"
" gl_Position = normalize_matrix * vec4(wpos.xyz, 1.0);\n"
" gl_Position.w = wpos.w = o0.w;\n";
" gl_Position.w = o0.w;\n";
}
for (std::size_t index = 0; index < 16; ++index)

View File

@ -203,14 +203,14 @@ u32 GLGSRender::set_vertex_buffer()
vertex_draw_count = (u32)get_index_count(draw_mode, gsl::narrow<int>(vertex_draw_count));
u32 block_sz = vertex_draw_count * type_size;
auto mapping = m_index_ring_buffer->alloc_and_map(block_sz);
auto mapping = m_index_ring_buffer.alloc_and_map(block_sz);
void *ptr = mapping.first;
offset_in_index_buffer = mapping.second;
gsl::span<gsl::byte> dst{ reinterpret_cast<gsl::byte*>(ptr), gsl::narrow<u32>(block_sz) };
std::tie(min_index, max_index) = write_index_array_data_to_buffer(dst, type, draw_mode, first_count_commands);
m_index_ring_buffer->unmap();
m_index_ring_buffer.unmap();
}
if (draw_command == rsx::draw_command::inlined_array)
@ -228,7 +228,7 @@ u32 GLGSRender::set_vertex_buffer()
}
vertex_draw_count = (u32)(inline_vertex_array.size() * sizeof(u32)) / stride;
m_attrib_ring_buffer->reserve_and_map(vertex_draw_count * max_vertex_attrib_size);
m_attrib_ring_buffer.reserve_and_map(vertex_draw_count * max_vertex_attrib_size);
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
@ -253,7 +253,7 @@ u32 GLGSRender::set_vertex_buffer()
auto &texture = m_gl_attrib_buffers[index];
u8 *src = reinterpret_cast<u8*>(inline_vertex_array.data());
auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size, m_min_texbuffer_alignment);
auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment);
u8 *dst = static_cast<u8*>(mapping.first);
src += offsets[index];
@ -276,13 +276,13 @@ u32 GLGSRender::set_vertex_buffer()
dst += element_size;
}
texture.copy_from(m_attrib_ring_buffer->get_buffer(), gl_type, mapping.second, data_size);
texture.copy_from(m_attrib_ring_buffer, gl_type, mapping.second, data_size);
//Link texture to uniform
m_program->uniforms.texture(location, index + rsx::limits::textures_count, texture);
if (!is_primitive_native(draw_mode))
{
std::tie(vertex_draw_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw({ { 0, vertex_draw_count } }, draw_mode, *m_index_ring_buffer);
std::tie(vertex_draw_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw({ { 0, vertex_draw_count } }, draw_mode, m_index_ring_buffer);
}
}
}
@ -298,7 +298,7 @@ u32 GLGSRender::set_vertex_buffer()
if (draw_command == rsx::draw_command::array || draw_command == rsx::draw_command::indexed)
{
u32 verts_allocated = std::max(vertex_draw_count, max_index + 1);
m_attrib_ring_buffer->reserve_and_map(verts_allocated * max_vertex_attrib_size);
m_attrib_ring_buffer.reserve_and_map(verts_allocated * max_vertex_attrib_size);
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
@ -337,7 +337,7 @@ u32 GLGSRender::set_vertex_buffer()
if (draw_command == rsx::draw_command::array)
{
auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size, m_min_texbuffer_alignment);
auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment);
gsl::byte *dst = static_cast<gsl::byte*>(mapping.first);
buffer_offset = mapping.second;
@ -354,7 +354,7 @@ u32 GLGSRender::set_vertex_buffer()
if (draw_command == rsx::draw_command::indexed)
{
data_size = (max_index + 1) * element_size;
auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size, m_min_texbuffer_alignment);
auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment);
gsl::byte *dst = static_cast<gsl::byte*>(mapping.first);
buffer_offset = mapping.second;
@ -364,7 +364,7 @@ u32 GLGSRender::set_vertex_buffer()
write_vertex_array_data_to_buffer(dest_span, src_ptr, 0, max_index + 1, vertex_info.type, vertex_info.size, vertex_info.stride, rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size));
}
texture.copy_from(m_attrib_ring_buffer->get_buffer(), gl_type, buffer_offset, data_size);
texture.copy_from(m_attrib_ring_buffer, gl_type, buffer_offset, data_size);
//Link texture to uniform
m_program->uniforms.texture(location, index + rsx::limits::textures_count, texture);
@ -385,11 +385,11 @@ u32 GLGSRender::set_vertex_buffer()
auto &texture = m_gl_attrib_buffers[index];
auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size, m_min_texbuffer_alignment);
auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment);
u8 *dst = static_cast<u8*>(mapping.first);
memcpy(dst, vertex_data.data(), data_size);
texture.copy_from(m_attrib_ring_buffer->get_buffer(), gl_type, mapping.second, data_size);
texture.copy_from(m_attrib_ring_buffer, gl_type, mapping.second, data_size);
//Link texture to uniform
m_program->uniforms.texture(location, index + rsx::limits::textures_count, texture);
@ -411,11 +411,11 @@ u32 GLGSRender::set_vertex_buffer()
if (draw_command == rsx::draw_command::array && !is_primitive_native(draw_mode))
{
std::tie(vertex_draw_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(first_count_commands, draw_mode, *m_index_ring_buffer);
std::tie(vertex_draw_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(first_count_commands, draw_mode, m_index_ring_buffer);
}
}
m_attrib_ring_buffer->unmap();
m_attrib_ring_buffer.unmap();
std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();

View File

@ -1,5 +1,8 @@
#include "stdafx.h"
#include "rsx_utils.h"
#include "rsx_methods.h"
#include "Emu/RSX/GCM.h"
#include "Common/BufferUtils.h"
extern "C"
{
@ -42,4 +45,66 @@ namespace rsx
dst.reset(new u8[clip_h * dst_pitch]);
clip_image(dst.get(), src, clip_x, clip_y, clip_w, clip_h, bpp, src_pitch, dst_pitch);
}
void fill_scale_offset_matrix(void *dest_, bool transpose,
float offset_x, float offset_y, float offset_z,
float scale_x, float scale_y, float scale_z)
{
char *dest = (char*)dest_;
if (transpose)
{
stream_vector(dest + 4 * sizeof(f32) * 0, scale_x, 0, 0, 0);
stream_vector(dest + 4 * sizeof(f32) * 1, 0, scale_y, 0, 0);
stream_vector(dest + 4 * sizeof(f32) * 2, 0, 0, scale_z, 0);
stream_vector(dest + 4 * sizeof(f32) * 3, offset_x, offset_y, offset_z, 1);
}
else
{
stream_vector(dest + 4 * sizeof(f32) * 0, scale_x, 0, 0, offset_x);
stream_vector(dest + 4 * sizeof(f32) * 1, 0, scale_y, 0, offset_y);
stream_vector(dest + 4 * sizeof(f32) * 2, 0, 0, scale_z, offset_z);
stream_vector(dest + 4 * sizeof(f32) * 3, 0.f, 0.f, 0.f, 1.f);
}
}
void fill_window_matrix(void *dest, bool transpose)
{
u32 shader_window = method_registers[NV4097_SET_SHADER_WINDOW];
u16 height = shader_window & 0xfff;
window_origin origin = to_window_origin((shader_window >> 12) & 0xf);
window_pixel_center pixelCenter = to_window_pixel_center(shader_window >> 16);
f32 offset_x = f32(method_registers[NV4097_SET_WINDOW_OFFSET] & 0xffff);
f32 offset_y = f32(method_registers[NV4097_SET_WINDOW_OFFSET] >> 16);
f32 scale_y = 1.0;
if (origin == window_origin::bottom)
{
offset_y = height - offset_y + 1;
scale_y = -1.0f;
}
if (false && pixelCenter == window_pixel_center::half)
{
offset_x += 0.5f;
offset_y += 0.5f;
}
fill_scale_offset_matrix(dest, transpose, offset_x, offset_y, 0.0f, 1.0f, scale_y, 1.0f);
}
void fill_viewport_matrix(void *buffer, bool transpose)
{
f32 offset_x = (f32&)method_registers[NV4097_SET_VIEWPORT_OFFSET + 0];
f32 offset_y = (f32&)method_registers[NV4097_SET_VIEWPORT_OFFSET + 1];
f32 offset_z = (f32&)method_registers[NV4097_SET_VIEWPORT_OFFSET + 2];
f32 scale_x = (f32&)method_registers[NV4097_SET_VIEWPORT_SCALE + 0];
f32 scale_y = (f32&)method_registers[NV4097_SET_VIEWPORT_SCALE + 1];
f32 scale_z = (f32&)method_registers[NV4097_SET_VIEWPORT_SCALE + 2];
fill_scale_offset_matrix(buffer, transpose, offset_x, offset_y, offset_z, scale_x, scale_y, scale_z);
}
}

View File

@ -114,4 +114,10 @@ namespace rsx
void clip_image(u8 *dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch);
void clip_image(std::unique_ptr<u8[]>& dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch);
void fill_scale_offset_matrix(void *dest_, bool transpose,
float offset_x, float offset_y, float offset_z,
float scale_x, float scale_y, float scale_z);
void fill_window_matrix(void *dest, bool transpose);
void fill_viewport_matrix(void *buffer, bool transpose);
}