mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-30 03:32:55 +00:00
rsx: Batch vertex program load methods
This commit is contained in:
parent
85c4321c24
commit
d97e9f7b4a
@ -77,8 +77,10 @@ namespace
|
||||
X = X << 5;
|
||||
return{ X, Y, Z, 1 };
|
||||
}
|
||||
}
|
||||
|
||||
inline void stream_data_to_memory_swapped_u32(void *dst, const void *src, u32 vertex_count, u8 stride)
|
||||
template <bool unaligned>
|
||||
void stream_data_to_memory_swapped_u32(void *dst, const void *src, u32 vertex_count, u8 stride)
|
||||
{
|
||||
const __m128i mask = _mm_set_epi8(
|
||||
0xC, 0xD, 0xE, 0xF,
|
||||
@ -99,7 +101,15 @@ namespace
|
||||
{
|
||||
const __m128i vector = _mm_loadu_si128(src_ptr);
|
||||
const __m128i shuffled_vector = ssse3_shuffle_epi8(vector, mask);
|
||||
_mm_stream_si128(dst_ptr, shuffled_vector);
|
||||
|
||||
if constexpr (!unaligned)
|
||||
{
|
||||
_mm_stream_si128(dst_ptr, shuffled_vector);
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_storeu_si128(dst_ptr, shuffled_vector);
|
||||
}
|
||||
|
||||
src_ptr++;
|
||||
dst_ptr++;
|
||||
@ -112,7 +122,15 @@ namespace
|
||||
const __m128i vec0 = _mm_loadu_si128(src_ptr);
|
||||
const __m128i vec1 = _mm_or_si128(_mm_slli_epi16(vec0, 8), _mm_srli_epi16(vec0, 8));
|
||||
const __m128i vec2 = _mm_or_si128(_mm_slli_epi32(vec1, 16), _mm_srli_epi32(vec1, 16));
|
||||
_mm_stream_si128(dst_ptr, vec2);
|
||||
|
||||
if constexpr (!unaligned)
|
||||
{
|
||||
_mm_stream_si128(dst_ptr, vec2);
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_storeu_si128(dst_ptr, vec2);
|
||||
}
|
||||
|
||||
src_ptr++;
|
||||
dst_ptr++;
|
||||
@ -129,6 +147,11 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
template void stream_data_to_memory_swapped_u32<false>(void *, const void *, u32, u8);
|
||||
template void stream_data_to_memory_swapped_u32<true>(void*, const void*, u32, u8);
|
||||
|
||||
namespace
|
||||
{
|
||||
inline void stream_data_to_memory_swapped_u16(void *dst, const void *src, u32 vertex_count, u8 stride)
|
||||
{
|
||||
const __m128i mask = _mm_set_epi8(
|
||||
|
@ -55,3 +55,11 @@ void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w);
|
||||
* Stream a 128 bits vector from src to dst.
|
||||
*/
|
||||
void stream_vector_from_memory(void *dst, void *src);
|
||||
|
||||
/**
|
||||
* Stream and swap data in u32 units.
|
||||
*/
|
||||
template <bool unaligned = false>
|
||||
void stream_data_to_memory_swapped_u32(void *dst, const void *src, u32 vertex_count, u8 stride);
|
||||
|
||||
|
||||
|
@ -524,32 +524,44 @@ namespace rsx
|
||||
rsx::frame_capture_data::replay_command replay_cmd;
|
||||
replay_cmd.rsx_command = std::make_pair((reg << 2) | (1u << 18), value);
|
||||
|
||||
frame_capture.replay_commands.push_back(replay_cmd);
|
||||
auto it = frame_capture.replay_commands.back();
|
||||
auto& commands = frame_capture.replay_commands;
|
||||
commands.push_back(replay_cmd);
|
||||
|
||||
switch (reg)
|
||||
{
|
||||
case NV3089_IMAGE_IN:
|
||||
capture::capture_image_in(this, it);
|
||||
capture::capture_image_in(this, commands.back());
|
||||
break;
|
||||
case NV0039_BUFFER_NOTIFY:
|
||||
capture::capture_buffer_notify(this, it);
|
||||
capture::capture_buffer_notify(this, commands.back());
|
||||
break;
|
||||
default:
|
||||
{
|
||||
// Use legacy logic for NV308A_COLOR - enqueue leading command with count
|
||||
static constexpr std::array<std::pair<u32, u32>, 2> ranges
|
||||
{{
|
||||
{NV308A_COLOR, 0x700},
|
||||
{NV4097_SET_TRANSFORM_PROGRAM, 32}
|
||||
}};
|
||||
|
||||
// Use legacy logic - enqueue leading command with count
|
||||
// Then enqueue each command arg alone with a no-op command
|
||||
if (reg >= NV308A_COLOR && reg < NV308A_COLOR + 0x700)
|
||||
for (const auto& range : ranges)
|
||||
{
|
||||
const u32 remaining = std::min<u32>(fifo_ctrl->get_remaining_args_count(), (NV308A_COLOR + 0x700) - reg);
|
||||
|
||||
it.rsx_command.first = (fifo_ctrl->last_cmd() & RSX_METHOD_NON_INCREMENT_CMD_MASK) | (reg << 2) | (remaining << 18);
|
||||
|
||||
for (u32 i = 0; i < remaining && fifo_ctrl->get_pos() + (i + 1) * 4 != (ctrl->put & ~3); i++)
|
||||
if (reg >= range.first && reg < range.first + range.second)
|
||||
{
|
||||
replay_cmd.rsx_command = std::make_pair(0, vm::read32(fifo_ctrl->get_current_arg_ptr() + (i + 1) * 4));
|
||||
const u32 remaining = std::min<u32>(fifo_ctrl->get_remaining_args_count() + 1,
|
||||
(fifo_ctrl->last_cmd() & RSX_METHOD_NON_INCREMENT_CMD_MASK) ? UINT32_MAX : (range.first + range.second) - reg);
|
||||
|
||||
frame_capture.replay_commands.push_back(replay_cmd);
|
||||
commands.back().rsx_command.first = (fifo_ctrl->last_cmd() & RSX_METHOD_NON_INCREMENT_CMD_MASK) | (reg << 2) | (remaining << 18);
|
||||
|
||||
for (u32 i = 1; i < remaining && fifo_ctrl->get_pos() + (i - 1) * 4 != (ctrl->put & ~3); i++)
|
||||
{
|
||||
replay_cmd.rsx_command = std::make_pair(0, vm::read32(fifo_ctrl->get_current_arg_ptr() + (i * 4)));
|
||||
|
||||
commands.push_back(replay_cmd);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "rsx_decode.h"
|
||||
#include "Emu/Cell/PPUCallback.h"
|
||||
#include "Emu/Cell/lv2/sys_rsx.h"
|
||||
#include "Emu/RSX/Common/BufferUtils.h"
|
||||
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
@ -450,17 +451,30 @@ namespace rsx
|
||||
{
|
||||
static void impl(thread* rsx, u32 _reg, u32 arg)
|
||||
{
|
||||
if (rsx::method_registers.transform_program_load() >= 512)
|
||||
// Get real args count
|
||||
const u32 count = std::min<u32>({rsx->fifo_ctrl->get_remaining_args_count() + 1,
|
||||
static_cast<u32>(((rsx->ctrl->put & ~3ull) - (rsx->fifo_ctrl->get_pos() - 4)) / 4), 32 - index});
|
||||
|
||||
const u32 load_pos = rsx::method_registers.transform_program_load();
|
||||
|
||||
u32 rcount = count;
|
||||
|
||||
if (const u32 max = load_pos * 4 + rcount + (index % 4);
|
||||
max > 512 * 4)
|
||||
{
|
||||
// PS3 seems to allow exceeding the program buffer by upto 32 instructions before crashing
|
||||
// Discard the "excess" instructions to not overflow our transform program buffer
|
||||
// TODO: Check if the instructions in the overflow area are executed by PS3
|
||||
rsx_log.warning("Program buffer overflow!");
|
||||
return;
|
||||
rcount -= max - (512 * 4);
|
||||
}
|
||||
|
||||
method_registers.commit_4_transform_program_instructions(index);
|
||||
stream_data_to_memory_swapped_u32<true>(&rsx::method_registers.transform_program[load_pos * 4 + index % 4]
|
||||
, vm::base(rsx->fifo_ctrl->get_current_arg_ptr()), rcount, 4);
|
||||
|
||||
rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_dirty;
|
||||
rsx::method_registers.transform_program_load_set(load_pos + ((rcount + index % 4) / 4));
|
||||
rsx->fifo_ctrl->skip_methods(count - 1);
|
||||
}
|
||||
};
|
||||
|
||||
@ -2994,7 +3008,7 @@ namespace rsx
|
||||
bind_range<NV4097_SET_VERTEX_DATA2S_M, 1, 16, nv4097::set_vertex_data2s_m>();
|
||||
bind_range<NV4097_SET_VERTEX_DATA4S_M, 1, 32, nv4097::set_vertex_data4s_m>();
|
||||
bind_range<NV4097_SET_TRANSFORM_CONSTANT, 1, 32, nv4097::set_transform_constant>();
|
||||
bind_range<NV4097_SET_TRANSFORM_PROGRAM + 3, 4, 32 / 4, nv4097::set_transform_program>();
|
||||
bind_range<NV4097_SET_TRANSFORM_PROGRAM, 1, 32, nv4097::set_transform_program>();
|
||||
bind<NV4097_GET_REPORT, nv4097::get_report>();
|
||||
bind<NV4097_CLEAR_REPORT_VALUE, nv4097::clear_report_value>();
|
||||
bind<NV4097_SET_SURFACE_CLIP_HORIZONTAL, nv4097::set_surface_dirty_bit>();
|
||||
|
@ -1610,20 +1610,14 @@ namespace rsx
|
||||
return u16(registers[NV308A_SIZE_OUT] & 0xFFFF);
|
||||
}
|
||||
|
||||
u32 transform_program_load()
|
||||
u32 transform_program_load() const
|
||||
{
|
||||
return registers[NV4097_SET_TRANSFORM_PROGRAM_LOAD];
|
||||
}
|
||||
|
||||
void commit_4_transform_program_instructions(u32 index)
|
||||
void transform_program_load_set(u32 value)
|
||||
{
|
||||
u32& load = registers[NV4097_SET_TRANSFORM_PROGRAM_LOAD];
|
||||
|
||||
transform_program[load * 4] = registers[NV4097_SET_TRANSFORM_PROGRAM + index * 4];
|
||||
transform_program[load * 4 + 1] = registers[NV4097_SET_TRANSFORM_PROGRAM + index * 4 + 1];
|
||||
transform_program[load * 4 + 2] = registers[NV4097_SET_TRANSFORM_PROGRAM + index * 4 + 2];
|
||||
transform_program[load * 4 + 3] = registers[NV4097_SET_TRANSFORM_PROGRAM + index * 4 + 3];
|
||||
load++;
|
||||
registers[NV4097_SET_TRANSFORM_PROGRAM_LOAD] = value;
|
||||
}
|
||||
|
||||
u32 transform_constant_load()
|
||||
|
Loading…
x
Reference in New Issue
Block a user