mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-14 10:21:21 +00:00
OpenGL renderer: use correct MVP matrix. Cleanup
Simplified gl::ring_buffer helper
This commit is contained in:
parent
c0487a634e
commit
3b5cd4845e
@ -453,6 +453,10 @@ void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w)
|
||||
_mm_stream_si128((__m128i*)dst, vector);
|
||||
}
|
||||
|
||||
void stream_vector(void *dst, f32 x, f32 y, f32 z, f32 w)
|
||||
{
|
||||
stream_vector(dst, (u32&)x, (u32&)y, (u32&)z, (u32&)w);
|
||||
}
|
||||
void stream_vector_from_memory(void *dst, void *src)
|
||||
{
|
||||
const __m128i &vector = _mm_loadu_si128((__m128i*)src);
|
||||
|
@ -49,6 +49,7 @@ void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst,
|
||||
/**
|
||||
* Stream a 128 bits vector to dst.
|
||||
*/
|
||||
void stream_vector(void *dst, f32 x, f32 y, f32 z, f32 w);
|
||||
void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w);
|
||||
|
||||
/**
|
||||
|
@ -1,12 +1,11 @@
|
||||
#include "stdafx.h"
|
||||
#include "Utilities/Config.h"
|
||||
#include "Emu/Memory/Memory.h"
|
||||
#include "Emu/System.h"
|
||||
#include "GLGSRender.h"
|
||||
#include "rsx_gl_cache.h"
|
||||
#include "../rsx_utils.h"
|
||||
#include "../rsx_methods.h"
|
||||
#include "../Common/BufferUtils.h"
|
||||
#include "../rsx_utils.h"
|
||||
|
||||
extern cfg::bool_entry g_cfg_rsx_debug_output;
|
||||
extern cfg::bool_entry g_cfg_rsx_overlay;
|
||||
@ -153,7 +152,8 @@ void GLGSRender::begin()
|
||||
__glcheck glStencilOp(rsx::method_registers[NV4097_SET_STENCIL_OP_FAIL], rsx::method_registers[NV4097_SET_STENCIL_OP_ZFAIL],
|
||||
rsx::method_registers[NV4097_SET_STENCIL_OP_ZPASS]);
|
||||
|
||||
if (rsx::method_registers[NV4097_SET_TWO_SIDED_STENCIL_TEST_ENABLE]) {
|
||||
if (rsx::method_registers[NV4097_SET_TWO_SIDED_STENCIL_TEST_ENABLE])
|
||||
{
|
||||
__glcheck glStencilMaskSeparate(GL_BACK, rsx::method_registers[NV4097_SET_BACK_STENCIL_MASK]);
|
||||
__glcheck glStencilFuncSeparate(GL_BACK, rsx::method_registers[NV4097_SET_BACK_STENCIL_FUNC],
|
||||
rsx::method_registers[NV4097_SET_BACK_STENCIL_FUNC_REF], rsx::method_registers[NV4097_SET_BACK_STENCIL_FUNC_MASK]);
|
||||
@ -230,8 +230,6 @@ void GLGSRender::begin()
|
||||
__glcheck glCullFace(rsx::method_registers[NV4097_SET_CULL_FACE]);
|
||||
}
|
||||
|
||||
glDisable(GL_CULL_FACE);
|
||||
|
||||
__glcheck glFrontFace(rsx::method_registers[NV4097_SET_FRONT_FACE] ^ 1);
|
||||
|
||||
__glcheck enable(rsx::method_registers[NV4097_SET_POLY_SMOOTH_ENABLE], GL_POLYGON_SMOOTH);
|
||||
@ -248,7 +246,7 @@ void GLGSRender::begin()
|
||||
}
|
||||
|
||||
std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
|
||||
m_begin_time += std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
|
||||
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
|
||||
m_draw_calls++;
|
||||
}
|
||||
|
||||
@ -291,8 +289,8 @@ void GLGSRender::end()
|
||||
int location;
|
||||
if (m_program->uniforms.has_location("texture" + std::to_string(i), &location))
|
||||
{
|
||||
glProgramUniform1i(m_program->id(), location, texture_index);
|
||||
m_gl_textures[i].init(texture_index, textures[i]);
|
||||
__glcheck glProgramUniform1i(m_program->id(), location, texture_index);
|
||||
__glcheck m_gl_textures[i].init(texture_index, textures[i]);
|
||||
|
||||
texture_index++;
|
||||
|
||||
@ -322,26 +320,38 @@ void GLGSRender::end()
|
||||
*/
|
||||
}
|
||||
|
||||
__glcheck 0;
|
||||
|
||||
u32 offset_in_index_buffer = set_vertex_buffer();
|
||||
m_vao.bind();
|
||||
|
||||
std::chrono::time_point<std::chrono::system_clock> then = std::chrono::system_clock::now();
|
||||
|
||||
if (g_cfg_rsx_debug_output)
|
||||
{
|
||||
m_program->validate();
|
||||
}
|
||||
|
||||
if (draw_command == rsx::draw_command::indexed)
|
||||
{
|
||||
rsx::index_array_type indexed_type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4);
|
||||
|
||||
if (indexed_type == rsx::index_array_type::u32)
|
||||
__glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_INT, (GLvoid *)(offset_in_index_buffer));
|
||||
if (indexed_type == rsx::index_array_type::u16)
|
||||
__glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_SHORT, (GLvoid *)(offset_in_index_buffer));
|
||||
{
|
||||
__glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_INT, (GLvoid *)(std::ptrdiff_t)offset_in_index_buffer);
|
||||
}
|
||||
else if (indexed_type == rsx::index_array_type::u16)
|
||||
{
|
||||
__glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_SHORT, (GLvoid *)(std::ptrdiff_t)offset_in_index_buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::logic_error("bad index array type");
|
||||
}
|
||||
}
|
||||
else if (!is_primitive_native(draw_mode))
|
||||
{
|
||||
__glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_SHORT, (GLvoid *)(offset_in_index_buffer));
|
||||
__glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_SHORT, (GLvoid *)(std::ptrdiff_t)offset_in_index_buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -349,7 +359,7 @@ void GLGSRender::end()
|
||||
}
|
||||
|
||||
std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
|
||||
m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
|
||||
m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
|
||||
|
||||
write_buffers();
|
||||
|
||||
@ -377,8 +387,7 @@ void GLGSRender::set_viewport()
|
||||
|
||||
rsx::window_origin shader_window_origin = rsx::to_window_origin((shader_window >> 12) & 0xf);
|
||||
|
||||
//TODO
|
||||
if (true || shader_window_origin == rsx::window_origin::bottom)
|
||||
if (shader_window_origin == rsx::window_origin::bottom)
|
||||
{
|
||||
__glcheck glViewport(viewport_x, viewport_y, viewport_w, viewport_h);
|
||||
__glcheck glScissor(scissor_x, scissor_y, scissor_w, scissor_h);
|
||||
@ -387,11 +396,13 @@ void GLGSRender::set_viewport()
|
||||
{
|
||||
u16 shader_window_height = shader_window & 0xfff;
|
||||
|
||||
__glcheck glViewport(viewport_x, shader_window_height - viewport_y - viewport_h - 1, viewport_w, viewport_h);
|
||||
__glcheck glScissor(scissor_x, shader_window_height - scissor_y - scissor_h - 1, scissor_w, scissor_h);
|
||||
__glcheck glViewport(viewport_x, shader_window_height - viewport_y - viewport_h + 1, viewport_w, viewport_h);
|
||||
__glcheck glScissor(scissor_x, shader_window_height - scissor_y - scissor_h + 1, scissor_w, scissor_h);
|
||||
}
|
||||
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
|
||||
__glcheck 0;
|
||||
}
|
||||
|
||||
void GLGSRender::on_init_thread()
|
||||
@ -406,6 +417,7 @@ void GLGSRender::on_init_thread()
|
||||
LOG_NOTICE(RSX, "%s", (const char*)glGetString(GL_VENDOR));
|
||||
|
||||
glEnable(GL_VERTEX_PROGRAM_POINT_SIZE);
|
||||
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &m_uniform_buffer_offset_align);
|
||||
glGetIntegerv(GL_TEXTURE_BUFFER_OFFSET_ALIGNMENT, &m_min_texbuffer_alignment);
|
||||
m_vao.create();
|
||||
|
||||
@ -415,11 +427,11 @@ void GLGSRender::on_init_thread()
|
||||
tex.set_target(gl::texture::target::textureBuffer);
|
||||
}
|
||||
|
||||
m_attrib_ring_buffer.reset(new gl::ring_buffer(16 * 0x100000, gl::buffer::target::texture));
|
||||
m_uniform_ring_buffer.reset(new gl::ring_buffer(16 * 0x100000, gl::buffer::target::uniform));
|
||||
m_index_ring_buffer.reset(new gl::ring_buffer(0x100000, gl::buffer::target::element_array));
|
||||
m_attrib_ring_buffer.create(gl::buffer::target::texture, 16 * 0x100000);
|
||||
m_uniform_ring_buffer.create(gl::buffer::target::uniform, 16 * 0x100000);
|
||||
m_index_ring_buffer.create(gl::buffer::target::element_array, 0x100000);
|
||||
|
||||
m_vao.element_array_buffer = m_index_ring_buffer->get_buffer();
|
||||
m_vao.element_array_buffer = m_index_ring_buffer;
|
||||
m_gl_texture_cache.initialize_rtt_cache();
|
||||
}
|
||||
|
||||
@ -446,15 +458,18 @@ void GLGSRender::on_exit()
|
||||
tex.remove();
|
||||
}
|
||||
|
||||
m_attrib_ring_buffer->destroy();
|
||||
m_uniform_ring_buffer->destroy();
|
||||
m_index_ring_buffer->destroy();
|
||||
m_attrib_ring_buffer.remove();
|
||||
m_uniform_ring_buffer.remove();
|
||||
m_index_ring_buffer.remove();
|
||||
}
|
||||
|
||||
void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
|
||||
{
|
||||
//LOG_NOTICE(Log::RSX, "nv4097_clear_surface(0x%x)", arg);
|
||||
if (!rsx::method_registers[NV4097_SET_SURFACE_FORMAT]) return;
|
||||
if (!rsx::method_registers[NV4097_SET_SURFACE_FORMAT])
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if ((arg & 0xf3) == 0)
|
||||
{
|
||||
@ -475,9 +490,10 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
|
||||
|
||||
GLbitfield mask = 0;
|
||||
|
||||
rsx::surface_depth_format surface_depth_format = rsx::to_surface_depth_format((rsx::method_registers[NV4097_SET_SURFACE_FORMAT] >> 5) & 0x7);
|
||||
|
||||
if (arg & 0x1)
|
||||
{
|
||||
rsx::surface_depth_format surface_depth_format = rsx::to_surface_depth_format((rsx::method_registers[NV4097_SET_SURFACE_FORMAT] >> 5) & 0x7);
|
||||
u32 max_depth_value = get_max_depth_value(surface_depth_format);
|
||||
|
||||
u32 clear_depth = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] >> 8;
|
||||
@ -487,7 +503,7 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
|
||||
mask |= GLenum(gl::buffers::depth);
|
||||
}
|
||||
|
||||
if (arg & 0x2)
|
||||
if (surface_depth_format == rsx::surface_depth_format::z24s8 && arg & 0x2)
|
||||
{
|
||||
u8 clear_stencil = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] & 0xff;
|
||||
|
||||
@ -535,50 +551,111 @@ bool GLGSRender::do_method(u32 cmd, u32 arg)
|
||||
return true;
|
||||
}
|
||||
|
||||
//binding 0
|
||||
struct alignas(4) glsl_matrix_buffer
|
||||
{
|
||||
float viewport_matrix[4][4];
|
||||
float window_matrix[4][4];
|
||||
float normalize_matrix[4][4];
|
||||
};
|
||||
|
||||
//binding 1
|
||||
struct alignas(4) glsl_vertex_constants_buffer
|
||||
{
|
||||
float vc[468][4];
|
||||
};
|
||||
|
||||
//binding 2
|
||||
struct alignas(4) glsl_fragment_constants_buffer
|
||||
{
|
||||
float fc[2048][4];
|
||||
};
|
||||
|
||||
static void fill_matrix_buffer(glsl_matrix_buffer *buffer)
|
||||
{
|
||||
rsx::fill_viewport_matrix(buffer->viewport_matrix, true);
|
||||
rsx::fill_window_matrix(buffer->window_matrix, true);
|
||||
|
||||
u32 viewport_horizontal = rsx::method_registers[NV4097_SET_VIEWPORT_HORIZONTAL];
|
||||
u32 viewport_vertical = rsx::method_registers[NV4097_SET_VIEWPORT_VERTICAL];
|
||||
|
||||
f32 viewport_x = f32(viewport_horizontal & 0xffff);
|
||||
f32 viewport_y = f32(viewport_vertical & 0xffff);
|
||||
f32 viewport_w = f32(viewport_horizontal >> 16);
|
||||
f32 viewport_h = f32(viewport_vertical >> 16);
|
||||
|
||||
u32 shader_window = rsx::method_registers[NV4097_SET_SHADER_WINDOW];
|
||||
|
||||
rsx::window_origin shader_window_origin = rsx::to_window_origin((shader_window >> 12) & 0xf);
|
||||
u16 shader_window_height = shader_window & 0xfff;
|
||||
|
||||
f32 left = viewport_x;
|
||||
f32 right = viewport_x + viewport_w;
|
||||
f32 top = viewport_y;
|
||||
f32 bottom = viewport_y + viewport_h;
|
||||
//f32 far_ = (f32&)rsx::method_registers[NV4097_SET_CLIP_MAX];
|
||||
//f32 near_ = (f32&)rsx::method_registers[NV4097_SET_CLIP_MIN];
|
||||
|
||||
if (shader_window_origin == rsx::window_origin::bottom)
|
||||
{
|
||||
top = shader_window_height - (viewport_y + viewport_h) + 1;
|
||||
bottom = shader_window_height - viewport_y + 1;
|
||||
}
|
||||
|
||||
f32 scale_x = 2.0f / (right - left);
|
||||
f32 scale_y = 2.0f / (top - bottom);
|
||||
f32 scale_z = 2.0f;
|
||||
|
||||
f32 offset_x = -(right + left) / (right - left);
|
||||
f32 offset_y = -(top + bottom) / (top - bottom);
|
||||
f32 offset_z = -1.0;
|
||||
|
||||
if (shader_window_origin == rsx::window_origin::top)
|
||||
{
|
||||
scale_y = -scale_y;
|
||||
offset_y = -offset_y;
|
||||
}
|
||||
|
||||
rsx::fill_scale_offset_matrix(buffer->normalize_matrix, true, offset_x, offset_y, offset_z, scale_x, scale_y, scale_z);
|
||||
}
|
||||
|
||||
bool GLGSRender::load_program()
|
||||
{
|
||||
rsx::program_info info = programs_cache.get(get_raw_program(), rsx::decompile_language::glsl);
|
||||
m_program = (gl::glsl::program*)info.program;
|
||||
m_program->use();
|
||||
|
||||
// u32 fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program);
|
||||
u32 fragment_constants_sz = info.fragment_shader.decompiled->constants.size() * sizeof(f32) * 4;
|
||||
fragment_constants_sz = std::max(32U, fragment_constants_sz);
|
||||
u32 max_buffer_sz = 8192 + 512 + fragment_constants_sz;
|
||||
u32 fragment_constants_count = info.fragment_shader.decompiled->constants.size();
|
||||
u32 fragment_constants_size = fragment_constants_count * sizeof(rsx::fragment_program::ucode_instr);
|
||||
|
||||
u32 is_alpha_tested = !!(rsx::method_registers[NV4097_SET_ALPHA_TEST_ENABLE]);
|
||||
u8 alpha_ref_raw = (u8)(rsx::method_registers[NV4097_SET_ALPHA_REF] & 0xFF);
|
||||
float alpha_ref = alpha_ref_raw / 255.f;
|
||||
u32 max_buffer_sz =
|
||||
align(sizeof(glsl_matrix_buffer), m_uniform_buffer_offset_align) +
|
||||
align(sizeof(glsl_vertex_constants_buffer), m_uniform_buffer_offset_align) +
|
||||
align(fragment_constants_size, m_uniform_buffer_offset_align);
|
||||
|
||||
m_uniform_ring_buffer.reserve_and_map(max_buffer_sz);
|
||||
|
||||
u8 *buf;
|
||||
u32 scale_offset_offset;
|
||||
u32 vertex_constants_offset;
|
||||
u32 fragment_constants_offset;
|
||||
|
||||
m_uniform_ring_buffer->reserve_and_map(max_buffer_sz);
|
||||
auto mapping = m_uniform_ring_buffer->alloc_from_reserve(512);
|
||||
buf = static_cast<u8*>(mapping.first);
|
||||
scale_offset_offset = mapping.second;
|
||||
|
||||
fill_scale_offset_data(buf, false);
|
||||
memcpy(buf + 16 * sizeof(float), &rsx::method_registers[NV4097_SET_FOG_PARAMS], sizeof(float));
|
||||
memcpy(buf + 17 * sizeof(float), &rsx::method_registers[NV4097_SET_FOG_PARAMS + 1], sizeof(float));
|
||||
memcpy(buf + 18 * sizeof(float), &is_alpha_tested, sizeof(u32));
|
||||
memcpy(buf + 19 * sizeof(float), &alpha_ref, sizeof(float));
|
||||
|
||||
mapping = m_uniform_ring_buffer->alloc_from_reserve(512 * 16);
|
||||
buf = static_cast<u8*>(mapping.first);
|
||||
vertex_constants_offset = mapping.second;
|
||||
|
||||
fill_vertex_program_constants_data(buf);
|
||||
|
||||
mapping = m_uniform_ring_buffer->alloc_from_reserve(fragment_constants_sz);
|
||||
buf = static_cast<u8*>(mapping.first);
|
||||
fragment_constants_offset = mapping.second;
|
||||
|
||||
// fill fragment constants
|
||||
if (!info.fragment_shader.decompiled->constants.empty())
|
||||
{
|
||||
auto mapping = m_uniform_ring_buffer.alloc_from_reserve(sizeof(glsl_matrix_buffer), m_uniform_buffer_offset_align);
|
||||
fill_matrix_buffer((glsl_matrix_buffer *)mapping.first);
|
||||
scale_offset_offset = mapping.second;
|
||||
}
|
||||
|
||||
{
|
||||
auto mapping = m_uniform_ring_buffer.alloc_from_reserve(sizeof(glsl_vertex_constants_buffer), m_uniform_buffer_offset_align);
|
||||
fill_vertex_program_constants_data(mapping.first);
|
||||
vertex_constants_offset = mapping.second;
|
||||
}
|
||||
|
||||
if (fragment_constants_size)
|
||||
{
|
||||
auto mapping = m_uniform_ring_buffer.alloc_from_reserve(fragment_constants_size, m_uniform_buffer_offset_align);
|
||||
fragment_constants_offset = mapping.second;
|
||||
|
||||
u32 buffer_offset = 0;
|
||||
|
||||
static const __m128i mask = _mm_set_epi8(
|
||||
@ -587,31 +664,63 @@ bool GLGSRender::load_program()
|
||||
0x6, 0x7, 0x4, 0x5,
|
||||
0x2, 0x3, 0x0, 0x1);
|
||||
|
||||
auto ucode = (const rsx::fragment_program::ucode_instr*)info.fragment_shader.decompiled->raw->ucode_ptr;
|
||||
auto ucode = (const rsx::fragment_program::ucode_instr *)info.fragment_shader.decompiled->raw->ucode_ptr;
|
||||
|
||||
for (const auto& constant : info.fragment_shader.decompiled->constants)
|
||||
{
|
||||
const void *data = ucode + (u32)(constant.id / (sizeof(f32) * 4));
|
||||
const void *data = ucode + u32(constant.id / sizeof(rsx::fragment_program::ucode_instr));
|
||||
const __m128i &vector = _mm_loadu_si128((const __m128i*)data);
|
||||
const __m128i &shuffled_vector = _mm_shuffle_epi8(vector, mask);
|
||||
_mm_stream_si128((__m128i*)((char*)buf + buffer_offset), shuffled_vector);
|
||||
_mm_stream_si128((__m128i*)((char*)mapping.first + buffer_offset), shuffled_vector);
|
||||
|
||||
//float x = ((float*)((char*)buf + buffer_offset))[0];
|
||||
//float y = ((float*)((char*)buf + buffer_offset))[1];
|
||||
//float z = ((float*)((char*)buf + buffer_offset))[2];
|
||||
//float w = ((float*)((char*)buf + buffer_offset))[3];
|
||||
//float x = ((float*)((char*)mapping.first + buffer_offset))[0];
|
||||
//float y = ((float*)((char*)mapping.first + buffer_offset))[1];
|
||||
//float z = ((float*)((char*)mapping.first + buffer_offset))[2];
|
||||
//float w = ((float*)((char*)mapping.first + buffer_offset))[3];
|
||||
|
||||
//LOG_WARNING(RSX, "fc%u = {%g, %g, %g, %g}", constant.id, x, y, z, w);
|
||||
buffer_offset += 4 * sizeof(f32);
|
||||
}
|
||||
}
|
||||
|
||||
m_uniform_ring_buffer->unmap();
|
||||
m_uniform_ring_buffer.unmap();
|
||||
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, 0, m_uniform_ring_buffer->get_buffer().id(), scale_offset_offset, 512);
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_uniform_ring_buffer->get_buffer().id(), vertex_constants_offset, 512 * 16);
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, 2, m_uniform_ring_buffer->get_buffer().id(), fragment_constants_offset, fragment_constants_sz);
|
||||
/*
|
||||
{
|
||||
|
||||
m_uniform_ring_buffer.bind();
|
||||
|
||||
auto buffer_range = m_uniform_ring_buffer.allocate(
|
||||
align(sizeof(glsl_matrix_buffer), m_uniform_buffer_offset_align) +
|
||||
align(sizeof(glsl_vertex_constants_buffer), m_uniform_buffer_offset_align));
|
||||
|
||||
gl::allocator allocator{ m_uniform_ring_buffer, buffer_range };
|
||||
|
||||
matrix_buffer_range = allocator.allocate(sizeof(glsl_matrix_buffer), m_uniform_buffer_offset_align);
|
||||
vertex_constants_buffer_range = allocator.allocate(sizeof(glsl_vertex_constants_buffer), m_uniform_buffer_offset_align);
|
||||
|
||||
glsl_matrix_buffer *buffer = allocator.get<glsl_matrix_buffer>(matrix_buffer_range);
|
||||
fill_scale_offset_data(buffer, false);
|
||||
fill_matrix_buffer(buffer);
|
||||
fill_vertex_program_constants_data(allocator.get<glsl_vertex_constants_buffer>(vertex_constants_buffer_range));
|
||||
|
||||
if (contains_fragment_constants)
|
||||
{
|
||||
//fragment_constants_buffer_range = allocator.allocate(info.fragment_shader.decompiled->constants.size() * sizeof(f32) * 4);
|
||||
}
|
||||
}
|
||||
|
||||
if (contains_fragment_constants)
|
||||
{
|
||||
//m_uniform_ring_buffer.bind_range(2, fragment_constants_buffer_range);
|
||||
}
|
||||
*/
|
||||
|
||||
m_uniform_ring_buffer.bind_range(0, scale_offset_offset, sizeof(glsl_matrix_buffer));
|
||||
m_uniform_ring_buffer.bind_range(1, vertex_constants_offset, sizeof(glsl_vertex_constants_buffer));
|
||||
m_uniform_ring_buffer.bind_range(2, fragment_constants_offset, fragment_constants_size);
|
||||
|
||||
__glcheck 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -26,9 +26,9 @@ private:
|
||||
gl::gl_texture_cache m_gl_texture_cache;
|
||||
|
||||
gl::texture m_gl_attrib_buffers[rsx::limits::vertex_count];
|
||||
std::unique_ptr<gl::ring_buffer> m_attrib_ring_buffer;
|
||||
std::unique_ptr<gl::ring_buffer> m_uniform_ring_buffer;
|
||||
std::unique_ptr<gl::ring_buffer> m_index_ring_buffer;
|
||||
gl::ring_buffer m_attrib_ring_buffer;
|
||||
gl::ring_buffer m_uniform_ring_buffer;
|
||||
gl::ring_buffer m_index_ring_buffer;
|
||||
|
||||
u32 m_draw_calls = 0;
|
||||
u32 m_begin_time = 0;
|
||||
@ -36,6 +36,7 @@ private:
|
||||
u32 m_vertex_upload_time = 0;
|
||||
|
||||
GLint m_min_texbuffer_alignment = 256;
|
||||
GLint m_uniform_buffer_offset_align = 256;
|
||||
|
||||
public:
|
||||
gl::fbo draw_fbo;
|
||||
|
@ -47,7 +47,7 @@ namespace gl
|
||||
}
|
||||
}
|
||||
};
|
||||
#define __glcheck gl::__glcheck_impl_t{ __FILE__, __FUNCTION__, __LINE__ },
|
||||
#define __glcheck ::gl::__glcheck_impl_t{ __FILE__, __FUNCTION__, __LINE__ },
|
||||
#else
|
||||
#define __glcheck
|
||||
#endif
|
||||
@ -496,6 +496,11 @@ namespace gl
|
||||
glBindBuffer((GLenum)target_, m_id);
|
||||
}
|
||||
|
||||
void bind() const
|
||||
{
|
||||
bind(current_target());
|
||||
}
|
||||
|
||||
target current_target() const
|
||||
{
|
||||
return m_target;
|
||||
@ -580,12 +585,9 @@ namespace gl
|
||||
}
|
||||
};
|
||||
|
||||
class ring_buffer
|
||||
class ring_buffer : public buffer
|
||||
{
|
||||
buffer storage_buffer;
|
||||
buffer::target m_target;
|
||||
u32 m_data_loc = 0;
|
||||
u32 m_size;
|
||||
|
||||
u32 m_mapped_block_size = 0;
|
||||
u32 m_mapped_block_offset;
|
||||
@ -594,50 +596,39 @@ namespace gl
|
||||
void *m_mapped_base = nullptr;
|
||||
|
||||
public:
|
||||
ring_buffer(u32 initial_size, buffer::target target)
|
||||
std::pair<void*, u32> alloc_and_map(u32 alloc_size)
|
||||
{
|
||||
storage_buffer.create();
|
||||
storage_buffer.data(initial_size);
|
||||
m_size = initial_size;
|
||||
m_target = target;
|
||||
}
|
||||
alloc_size = align(alloc_size, 0x100);
|
||||
|
||||
void destroy()
|
||||
{
|
||||
storage_buffer.remove();
|
||||
}
|
||||
|
||||
std::pair<void*, u32> alloc_and_map(u32 size)
|
||||
{
|
||||
size = (size + 255) & ~255;
|
||||
|
||||
glBindBuffer((GLenum)m_target, storage_buffer.id());
|
||||
u32 limit = m_data_loc + size;
|
||||
if (limit > m_size)
|
||||
buffer::bind();
|
||||
u32 limit = m_data_loc + alloc_size;
|
||||
if (limit > buffer::size())
|
||||
{
|
||||
if (size > m_size)
|
||||
m_size = size;
|
||||
if (alloc_size > buffer::size())
|
||||
{
|
||||
buffer::data(alloc_size);
|
||||
}
|
||||
|
||||
storage_buffer.data(m_size, nullptr);
|
||||
m_data_loc = 0;
|
||||
}
|
||||
|
||||
void *ptr = glMapBufferRange((GLenum)m_target, m_data_loc, size, GL_MAP_WRITE_BIT|GL_MAP_INVALIDATE_RANGE_BIT|GL_MAP_UNSYNCHRONIZED_BIT);
|
||||
void *ptr = glMapBufferRange((GLenum)buffer::current_target(), m_data_loc, alloc_size,
|
||||
GL_MAP_WRITE_BIT|GL_MAP_INVALIDATE_RANGE_BIT|GL_MAP_UNSYNCHRONIZED_BIT);
|
||||
u32 offset = m_data_loc;
|
||||
m_data_loc += size;
|
||||
m_data_loc += alloc_size;
|
||||
return std::make_pair(ptr, offset);
|
||||
}
|
||||
|
||||
void unmap()
|
||||
{
|
||||
glUnmapBuffer((GLenum)m_target);
|
||||
buffer::unmap();
|
||||
m_mapped_block_size = 0;
|
||||
m_mapped_base = 0;
|
||||
}
|
||||
|
||||
void reserve_and_map(u32 max_size)
|
||||
{
|
||||
max_size = (max_size + 4095) & ~4095;
|
||||
max_size = align(max_size, 0x1000);
|
||||
auto mapping = alloc_and_map(max_size);
|
||||
m_mapped_base = mapping.first;
|
||||
m_mapped_block_offset = mapping.second;
|
||||
@ -647,8 +638,7 @@ namespace gl
|
||||
|
||||
std::pair<void*, u32> alloc_from_reserve(u32 size, u32 alignment = 16)
|
||||
{
|
||||
alignment -= 1;
|
||||
size = (size + alignment) & ~alignment;
|
||||
size = align(size, alignment);
|
||||
|
||||
if (m_mapped_bytes_available < size || !m_mapped_base)
|
||||
{
|
||||
@ -670,13 +660,13 @@ namespace gl
|
||||
m_mapped_reserve_offset += size;
|
||||
m_mapped_bytes_available -= size;
|
||||
|
||||
EXPECTS((offset & alignment) == 0);
|
||||
EXPECTS((offset & (alignment - 1)) == 0);
|
||||
return std::make_pair(ptr, offset);
|
||||
}
|
||||
|
||||
buffer& get_buffer()
|
||||
void bind_range(u32 index, u32 offset, u32 size) const
|
||||
{
|
||||
return storage_buffer;
|
||||
glBindBufferRange((GLenum)current_target(), index, id(), offset, size);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -247,17 +247,16 @@ rsx::complete_shader glsl_complete_shader(const rsx::decompiled_shader &shader,
|
||||
result.code += "out vec4 wpos;\n";
|
||||
|
||||
// TODO
|
||||
if (1)
|
||||
if (0)
|
||||
{
|
||||
finalize += "\tgl_Position = o0;\n";
|
||||
finalize += "\tgl_Position = gl_Position * viewport_matrix;\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
finalize +=
|
||||
" wpos = window_matrix * viewport_matrix * vec4(o0.xyz, 1.0);\n"
|
||||
" gl_Position = normalize_matrix * vec4(wpos.xyz, 1.0);\n"
|
||||
" gl_Position.w = wpos.w = o0.w;\n";
|
||||
" gl_Position.w = o0.w;\n";
|
||||
}
|
||||
|
||||
for (std::size_t index = 0; index < 16; ++index)
|
||||
|
@ -203,14 +203,14 @@ u32 GLGSRender::set_vertex_buffer()
|
||||
vertex_draw_count = (u32)get_index_count(draw_mode, gsl::narrow<int>(vertex_draw_count));
|
||||
u32 block_sz = vertex_draw_count * type_size;
|
||||
|
||||
auto mapping = m_index_ring_buffer->alloc_and_map(block_sz);
|
||||
auto mapping = m_index_ring_buffer.alloc_and_map(block_sz);
|
||||
void *ptr = mapping.first;
|
||||
offset_in_index_buffer = mapping.second;
|
||||
|
||||
gsl::span<gsl::byte> dst{ reinterpret_cast<gsl::byte*>(ptr), gsl::narrow<u32>(block_sz) };
|
||||
std::tie(min_index, max_index) = write_index_array_data_to_buffer(dst, type, draw_mode, first_count_commands);
|
||||
|
||||
m_index_ring_buffer->unmap();
|
||||
m_index_ring_buffer.unmap();
|
||||
}
|
||||
|
||||
if (draw_command == rsx::draw_command::inlined_array)
|
||||
@ -228,7 +228,7 @@ u32 GLGSRender::set_vertex_buffer()
|
||||
}
|
||||
|
||||
vertex_draw_count = (u32)(inline_vertex_array.size() * sizeof(u32)) / stride;
|
||||
m_attrib_ring_buffer->reserve_and_map(vertex_draw_count * max_vertex_attrib_size);
|
||||
m_attrib_ring_buffer.reserve_and_map(vertex_draw_count * max_vertex_attrib_size);
|
||||
|
||||
for (int index = 0; index < rsx::limits::vertex_count; ++index)
|
||||
{
|
||||
@ -253,7 +253,7 @@ u32 GLGSRender::set_vertex_buffer()
|
||||
auto &texture = m_gl_attrib_buffers[index];
|
||||
|
||||
u8 *src = reinterpret_cast<u8*>(inline_vertex_array.data());
|
||||
auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size, m_min_texbuffer_alignment);
|
||||
auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment);
|
||||
u8 *dst = static_cast<u8*>(mapping.first);
|
||||
|
||||
src += offsets[index];
|
||||
@ -276,13 +276,13 @@ u32 GLGSRender::set_vertex_buffer()
|
||||
dst += element_size;
|
||||
}
|
||||
|
||||
texture.copy_from(m_attrib_ring_buffer->get_buffer(), gl_type, mapping.second, data_size);
|
||||
texture.copy_from(m_attrib_ring_buffer, gl_type, mapping.second, data_size);
|
||||
|
||||
//Link texture to uniform
|
||||
m_program->uniforms.texture(location, index + rsx::limits::textures_count, texture);
|
||||
if (!is_primitive_native(draw_mode))
|
||||
{
|
||||
std::tie(vertex_draw_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw({ { 0, vertex_draw_count } }, draw_mode, *m_index_ring_buffer);
|
||||
std::tie(vertex_draw_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw({ { 0, vertex_draw_count } }, draw_mode, m_index_ring_buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -298,7 +298,7 @@ u32 GLGSRender::set_vertex_buffer()
|
||||
if (draw_command == rsx::draw_command::array || draw_command == rsx::draw_command::indexed)
|
||||
{
|
||||
u32 verts_allocated = std::max(vertex_draw_count, max_index + 1);
|
||||
m_attrib_ring_buffer->reserve_and_map(verts_allocated * max_vertex_attrib_size);
|
||||
m_attrib_ring_buffer.reserve_and_map(verts_allocated * max_vertex_attrib_size);
|
||||
|
||||
for (int index = 0; index < rsx::limits::vertex_count; ++index)
|
||||
{
|
||||
@ -337,7 +337,7 @@ u32 GLGSRender::set_vertex_buffer()
|
||||
|
||||
if (draw_command == rsx::draw_command::array)
|
||||
{
|
||||
auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size, m_min_texbuffer_alignment);
|
||||
auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment);
|
||||
gsl::byte *dst = static_cast<gsl::byte*>(mapping.first);
|
||||
buffer_offset = mapping.second;
|
||||
|
||||
@ -354,7 +354,7 @@ u32 GLGSRender::set_vertex_buffer()
|
||||
if (draw_command == rsx::draw_command::indexed)
|
||||
{
|
||||
data_size = (max_index + 1) * element_size;
|
||||
auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size, m_min_texbuffer_alignment);
|
||||
auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment);
|
||||
gsl::byte *dst = static_cast<gsl::byte*>(mapping.first);
|
||||
buffer_offset = mapping.second;
|
||||
|
||||
@ -364,7 +364,7 @@ u32 GLGSRender::set_vertex_buffer()
|
||||
write_vertex_array_data_to_buffer(dest_span, src_ptr, 0, max_index + 1, vertex_info.type, vertex_info.size, vertex_info.stride, rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size));
|
||||
}
|
||||
|
||||
texture.copy_from(m_attrib_ring_buffer->get_buffer(), gl_type, buffer_offset, data_size);
|
||||
texture.copy_from(m_attrib_ring_buffer, gl_type, buffer_offset, data_size);
|
||||
|
||||
//Link texture to uniform
|
||||
m_program->uniforms.texture(location, index + rsx::limits::textures_count, texture);
|
||||
@ -385,11 +385,11 @@ u32 GLGSRender::set_vertex_buffer()
|
||||
|
||||
auto &texture = m_gl_attrib_buffers[index];
|
||||
|
||||
auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size, m_min_texbuffer_alignment);
|
||||
auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment);
|
||||
u8 *dst = static_cast<u8*>(mapping.first);
|
||||
|
||||
memcpy(dst, vertex_data.data(), data_size);
|
||||
texture.copy_from(m_attrib_ring_buffer->get_buffer(), gl_type, mapping.second, data_size);
|
||||
texture.copy_from(m_attrib_ring_buffer, gl_type, mapping.second, data_size);
|
||||
|
||||
//Link texture to uniform
|
||||
m_program->uniforms.texture(location, index + rsx::limits::textures_count, texture);
|
||||
@ -411,11 +411,11 @@ u32 GLGSRender::set_vertex_buffer()
|
||||
|
||||
if (draw_command == rsx::draw_command::array && !is_primitive_native(draw_mode))
|
||||
{
|
||||
std::tie(vertex_draw_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(first_count_commands, draw_mode, *m_index_ring_buffer);
|
||||
std::tie(vertex_draw_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(first_count_commands, draw_mode, m_index_ring_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
m_attrib_ring_buffer->unmap();
|
||||
m_attrib_ring_buffer.unmap();
|
||||
std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
|
||||
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
|
||||
|
||||
|
@ -1,5 +1,8 @@
|
||||
#include "stdafx.h"
|
||||
#include "rsx_utils.h"
|
||||
#include "rsx_methods.h"
|
||||
#include "Emu/RSX/GCM.h"
|
||||
#include "Common/BufferUtils.h"
|
||||
|
||||
extern "C"
|
||||
{
|
||||
@ -42,4 +45,66 @@ namespace rsx
|
||||
dst.reset(new u8[clip_h * dst_pitch]);
|
||||
clip_image(dst.get(), src, clip_x, clip_y, clip_w, clip_h, bpp, src_pitch, dst_pitch);
|
||||
}
|
||||
|
||||
void fill_scale_offset_matrix(void *dest_, bool transpose,
|
||||
float offset_x, float offset_y, float offset_z,
|
||||
float scale_x, float scale_y, float scale_z)
|
||||
{
|
||||
char *dest = (char*)dest_;
|
||||
|
||||
if (transpose)
|
||||
{
|
||||
stream_vector(dest + 4 * sizeof(f32) * 0, scale_x, 0, 0, 0);
|
||||
stream_vector(dest + 4 * sizeof(f32) * 1, 0, scale_y, 0, 0);
|
||||
stream_vector(dest + 4 * sizeof(f32) * 2, 0, 0, scale_z, 0);
|
||||
stream_vector(dest + 4 * sizeof(f32) * 3, offset_x, offset_y, offset_z, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
stream_vector(dest + 4 * sizeof(f32) * 0, scale_x, 0, 0, offset_x);
|
||||
stream_vector(dest + 4 * sizeof(f32) * 1, 0, scale_y, 0, offset_y);
|
||||
stream_vector(dest + 4 * sizeof(f32) * 2, 0, 0, scale_z, offset_z);
|
||||
stream_vector(dest + 4 * sizeof(f32) * 3, 0.f, 0.f, 0.f, 1.f);
|
||||
}
|
||||
}
|
||||
|
||||
void fill_window_matrix(void *dest, bool transpose)
|
||||
{
|
||||
u32 shader_window = method_registers[NV4097_SET_SHADER_WINDOW];
|
||||
|
||||
u16 height = shader_window & 0xfff;
|
||||
window_origin origin = to_window_origin((shader_window >> 12) & 0xf);
|
||||
window_pixel_center pixelCenter = to_window_pixel_center(shader_window >> 16);
|
||||
|
||||
f32 offset_x = f32(method_registers[NV4097_SET_WINDOW_OFFSET] & 0xffff);
|
||||
f32 offset_y = f32(method_registers[NV4097_SET_WINDOW_OFFSET] >> 16);
|
||||
f32 scale_y = 1.0;
|
||||
|
||||
if (origin == window_origin::bottom)
|
||||
{
|
||||
offset_y = height - offset_y + 1;
|
||||
scale_y = -1.0f;
|
||||
}
|
||||
|
||||
if (false && pixelCenter == window_pixel_center::half)
|
||||
{
|
||||
offset_x += 0.5f;
|
||||
offset_y += 0.5f;
|
||||
}
|
||||
|
||||
fill_scale_offset_matrix(dest, transpose, offset_x, offset_y, 0.0f, 1.0f, scale_y, 1.0f);
|
||||
}
|
||||
|
||||
void fill_viewport_matrix(void *buffer, bool transpose)
|
||||
{
|
||||
f32 offset_x = (f32&)method_registers[NV4097_SET_VIEWPORT_OFFSET + 0];
|
||||
f32 offset_y = (f32&)method_registers[NV4097_SET_VIEWPORT_OFFSET + 1];
|
||||
f32 offset_z = (f32&)method_registers[NV4097_SET_VIEWPORT_OFFSET + 2];
|
||||
|
||||
f32 scale_x = (f32&)method_registers[NV4097_SET_VIEWPORT_SCALE + 0];
|
||||
f32 scale_y = (f32&)method_registers[NV4097_SET_VIEWPORT_SCALE + 1];
|
||||
f32 scale_z = (f32&)method_registers[NV4097_SET_VIEWPORT_SCALE + 2];
|
||||
|
||||
fill_scale_offset_matrix(buffer, transpose, offset_x, offset_y, offset_z, scale_x, scale_y, scale_z);
|
||||
}
|
||||
}
|
||||
|
@ -114,4 +114,10 @@ namespace rsx
|
||||
|
||||
void clip_image(u8 *dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch);
|
||||
void clip_image(std::unique_ptr<u8[]>& dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch);
|
||||
|
||||
void fill_scale_offset_matrix(void *dest_, bool transpose,
|
||||
float offset_x, float offset_y, float offset_z,
|
||||
float scale_x, float scale_y, float scale_z);
|
||||
void fill_window_matrix(void *dest, bool transpose);
|
||||
void fill_viewport_matrix(void *buffer, bool transpose);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user