diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.cpp b/rpcs3/Emu/RSX/Common/BufferUtils.cpp index 468656c542..b9f46006c7 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.cpp +++ b/rpcs3/Emu/RSX/Common/BufferUtils.cpp @@ -435,7 +435,7 @@ namespace } } -void write_vertex_array_data_to_buffer(gsl::span raw_dst_span, gsl::span src_ptr, u32 count, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride) +void write_vertex_array_data_to_buffer(gsl::span raw_dst_span, gsl::span src_ptr, u32 count, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride, bool swap_endianness) { verify(HERE), (vector_element_count > 0); const u32 src_read_stride = rsx::get_vertex_type_size_on_host(type, vector_element_count); @@ -460,12 +460,15 @@ void write_vertex_array_data_to_buffer(gsl::span raw_dst_span, gsl::s #if !DEBUG_VERTEX_STREAMING - if (real_count >= count || real_count == 1) + if (swap_endianness) { - if (attribute_src_stride == dst_stride && src_read_stride == dst_stride) - use_stream_no_stride = true; - else - use_stream_with_stride = true; + if (real_count >= count || real_count == 1) + { + if (attribute_src_stride == dst_stride && src_read_stride == dst_stride) + use_stream_no_stride = true; + else + use_stream_with_stride = true; + } } #endif @@ -492,8 +495,10 @@ void write_vertex_array_data_to_buffer(gsl::span raw_dst_span, gsl::s stream_data_to_memory_swapped_u16(raw_dst_span.data(), src_ptr.data(), count, attribute_src_stride); else if (use_stream_with_stride) stream_data_to_memory_swapped_u16_non_continuous(raw_dst_span.data(), src_ptr.data(), count, dst_stride, attribute_src_stride); - else + else if (swap_endianness) copy_whole_attribute_array, u16>((void *)raw_dst_span.data(), (void *)src_ptr.data(), vector_element_count, dst_stride, attribute_src_stride, count, real_count); + else + copy_whole_attribute_array((void *)raw_dst_span.data(), (void *)src_ptr.data(), vector_element_count, dst_stride, attribute_src_stride, count, real_count); return; } @@ -503,8 +508,10 @@ void write_vertex_array_data_to_buffer(gsl::span raw_dst_span, gsl::s stream_data_to_memory_swapped_u32(raw_dst_span.data(), src_ptr.data(), count, attribute_src_stride); else if (use_stream_with_stride) stream_data_to_memory_swapped_u32_non_continuous(raw_dst_span.data(), src_ptr.data(), count, dst_stride, attribute_src_stride); - else + else if (swap_endianness) copy_whole_attribute_array, u32>((void *)raw_dst_span.data(), (void *)src_ptr.data(), vector_element_count, dst_stride, attribute_src_stride, count, real_count); + else + copy_whole_attribute_array((void *)raw_dst_span.data(), (void *)src_ptr.data(), vector_element_count, dst_stride, attribute_src_stride, count, real_count); return; } @@ -513,10 +520,11 @@ void write_vertex_array_data_to_buffer(gsl::span raw_dst_span, gsl::s gsl::span dst_span = as_span_workaround(raw_dst_span); for (u32 i = 0; i < count; ++i) { - be_t src_value; - memcpy(&src_value, - src_ptr.subspan(attribute_src_stride * i).data(), - sizeof(be_t)); + u32 src_value; + memcpy(&src_value, src_ptr.subspan(attribute_src_stride * i).data(), sizeof(u32)); + + if (swap_endianness) src_value = se_storage::swap(src_value); + const auto& decoded_vector = decode_cmp_vector(src_value); dst_span[i * dst_stride / sizeof(u16)] = decoded_vector[0]; dst_span[i * dst_stride / sizeof(u16) + 1] = decoded_vector[1]; diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.h b/rpcs3/Emu/RSX/Common/BufferUtils.h index bdf231b935..01d98e6cd7 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.h +++ b/rpcs3/Emu/RSX/Common/BufferUtils.h @@ -10,7 +10,7 @@ * Write count vertex attributes from src_ptr. * src_ptr array layout is deduced from the type, vector element count and src_stride arguments. */ -void write_vertex_array_data_to_buffer(gsl::span raw_dst_span, gsl::span src_ptr, u32 count, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride); +void write_vertex_array_data_to_buffer(gsl::span raw_dst_span, gsl::span src_ptr, u32 count, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride, bool swap_endianness); /* * If primitive mode is not supported and need to be emulated (using an index buffer) returns false. diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 78ba8ddf35..f1225eea4a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -167,7 +167,7 @@ namespace gsl::span mapped_buffer_span = { (gsl::byte*)mapped_buffer, gsl::narrow_cast(buffer_size)}; write_vertex_array_data_to_buffer(mapped_buffer_span, vertex_array.data, vertex_count, - vertex_array.type, vertex_array.attribute_size, vertex_array.stride, element_size); + vertex_array.type, vertex_array.attribute_size, vertex_array.stride, element_size, vertex_array.is_be); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index f91247d0ac..324a932aa1 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1292,7 +1292,7 @@ namespace rsx { const rsx::data_array_format_info& info = state.vertex_arrays_info[index]; result.push_back(vertex_array_buffer{info.type(), info.size(), info.stride(), - get_raw_vertex_buffer(info, state.vertex_data_base_offset(), vertex_ranges), index}); + get_raw_vertex_buffer(info, state.vertex_data_base_offset(), vertex_ranges), index, true}); continue; } @@ -1302,7 +1302,7 @@ namespace rsx const u8 element_size = info.size * sizeof(u32); gsl::span vertex_src = { (const gsl::byte*)vertex_push_buffers[index].data.data(), vertex_push_buffers[index].vertex_count * element_size }; - result.push_back(vertex_array_buffer{ info.type, info.size, element_size, vertex_src, index }); + result.push_back(vertex_array_buffer{ info.type, info.size, element_size, vertex_src, index, false }); continue; } @@ -2416,7 +2416,7 @@ namespace rsx s32 size = 0; s32 attributes = 0; - bool is_be_type = true; + bool swap_u8_types = false; if (layout.attribute_placement[index] == attribute_buffer_placement::transient) { @@ -2429,12 +2429,14 @@ namespace rsx attributes = layout.interleaved_blocks[0].attribute_stride; attributes |= default_frequency_mask | volatile_storage_mask; - is_be_type = false; + // [NPEA90002] Grass is rendered via inline array + // Expects swapped bytes for u8 types + swap_u8_types = true; } else { - //Data is either from an immediate render or register input - //Immediate data overrides register input + // Data is either from an immediate render or register input + // Immediate data overrides register input if (rsx::method_registers.current_draw_clause.is_immediate_draw && vertex_push_buffers[index].vertex_count > 1) @@ -2446,11 +2448,13 @@ namespace rsx attributes = rsx::get_vertex_type_size_on_host(type, size); attributes |= default_frequency_mask | volatile_storage_mask; - is_be_type = true; + // RDR intro contains text passed via immediate render mode + // Expects swapped bytes for u8 types + swap_u8_types = true; } else { - //Register + // Register const auto& info = rsx::method_registers.register_vertex_info[index]; type = info.type; size = info.size; @@ -2458,7 +2462,8 @@ namespace rsx attributes = rsx::get_vertex_type_size_on_host(type, size); attributes |= volatile_storage_mask; - is_be_type = false; + // Resistance intro expects u8 types in native order + // swap_u8_types = false; } } } @@ -2478,8 +2483,10 @@ namespace rsx { case 0: case 1: + { attributes |= default_frequency_mask; break; + } default: { if (modulo_mask & (1 << index)) @@ -2487,25 +2494,31 @@ namespace rsx attributes |= repeating_frequency_mask; attributes |= (frequency << 13) & input_divisor_mask; + break; } } } } //end attribute placement check + // If data is passed via registers, it is already received in little endian + const bool is_be_type = (layout.attribute_placement[index] != attribute_buffer_placement::transient); + bool to_swap_bytes = is_be_type; + switch (type) { case rsx::vertex_base_type::cmp: + // Compressed 4 components into one 4-byte value size = 1; - //fall through - default: - if (is_be_type) attributes |= swap_storage_mask; break; case rsx::vertex_base_type::ub: case rsx::vertex_base_type::ub256: - if (!is_be_type) attributes |= swap_storage_mask; + // These are single byte formats, but inverted order (BGRA vs ARGB) when passed via registers + to_swap_bytes = swap_u8_types; break; } + if (to_swap_bytes) attributes |= swap_storage_mask; + buffer[index * 4 + 0] = static_cast(type); buffer[index * 4 + 1] = size; buffer[index * 4 + 2] = offset_in_block[index]; @@ -2529,7 +2542,7 @@ namespace rsx return; } - //NOTE: Order is important! Transient ayout is always push_buffers followed by register data + //NOTE: Order is important! Transient layout is always push_buffers followed by register data if (draw_call.is_immediate_draw) { //NOTE: It is possible for immediate draw to only contain index data, so vertex data can be in persistent memory diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 011321d0e8..afe80ee3d1 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -138,6 +138,7 @@ namespace rsx u8 stride; gsl::span data; u8 index; + bool is_be; }; struct vertex_array_register diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 3a35f34155..b4cd0af0ef 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -226,7 +226,10 @@ namespace rsx const auto vtype = vertex_data_type_from_element_type::type; if (rsx->in_begin_end) + { + // Update to immediate mode register/array, aliasing with the register view rsx->append_to_push_buffer(attribute_index, count, vertex_subreg, vtype, arg); + } auto& info = rsx::method_registers.register_vertex_info[attribute_index]; diff --git a/rpcs3/Emu/RSX/rsx_vertex_data.h b/rpcs3/Emu/RSX/rsx_vertex_data.h index 0aa7fb53bc..252020b1d9 100644 --- a/rpcs3/Emu/RSX/rsx_vertex_data.h +++ b/rpcs3/Emu/RSX/rsx_vertex_data.h @@ -106,26 +106,7 @@ struct push_buffer_vertex_info attribute_mask |= element_mask; void* dst = data.data() + ((vertex_count - 1) * vertex_size) + sub_index; - - //NOTE: Endianness on wide types is converted to BE here because unified upload code assumes input in BE - //TODO: Implement fast LE source inputs and remove the byteswap - switch (type) - { - case vertex_base_type::f: - *(u32*)dst = se_storage::swap(arg); - break; - case vertex_base_type::ub: - case vertex_base_type::ub256: - *(u32*)dst = arg; - break; - case vertex_base_type::s1: - case vertex_base_type::s32k: - ((u16*)dst)[0] = se_storage::swap((u16)(arg & 0xffff)); - ((u16*)dst)[1] = se_storage::swap((u16)(arg >> 16)); - break; - default: - fmt::throw_exception("Unsupported vertex base type %d", (u8)type); - } + *(u32*)dst = arg; } };