From 5a4babb0fdbd13d87c0862bd6d90bdd9bfa59b40 Mon Sep 17 00:00:00 2001 From: DHrpcs3 <dh.rpcs3@gmail.com> Date: Tue, 5 Jan 2016 18:26:44 +0200 Subject: [PATCH 01/13] Implemented basic tiled regions support --- rpcs3/Emu/RSX/RSXThread.cpp | 136 ++++++++++++++++++++++++++++++++++++ rpcs3/Emu/RSX/RSXThread.h | 14 ++++ 2 files changed, 150 insertions(+) diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 8c11798d31..d40b67cd9f 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -934,6 +934,108 @@ namespace rsx return 1; } } + + void tiled_address::write(const void *src, u32 width, u32 height, u32 pitch) + { + if (!tile) + { + memcpy(ptr, src, height * pitch); + return; + } + + u32 offset_x = base % tile->pitch; + u32 offset_y = base / tile->pitch; + + switch (tile->comp) + { + case CELL_GCM_COMPMODE_C32_2X1: + case CELL_GCM_COMPMODE_DISABLED: + for (int y = 0; y < height; ++y) + { + memcpy(ptr + (offset_y + y) * tile->pitch + offset_x, (u8*)src + pitch * y, pitch); + } + break; + /* + case CELL_GCM_COMPMODE_C32_2X1: + for (u32 y = 0; y < height; ++y) + { + for (u32 x = 0; x < width; ++x) + { + u32 value = *(u32*)((u8*)src + pitch * y + x * sizeof(u32)); + + *(u32*)(ptr + (offset_y + y) * tile->pitch + offset_x + (x * 2 + 0) * sizeof(u32)) = value; + *(u32*)(ptr + (offset_y + y) * tile->pitch + offset_x + (x * 2 + 1) * sizeof(u32)) = value; + } + } + break; + */ + case CELL_GCM_COMPMODE_C32_2X2: + for (u32 y = 0; y < height; ++y) + { + for (u32 x = 0; x < width; ++x) + { + u32 value = *(u32*)((u8*)src + pitch * y + x * sizeof(u32)); + + *(u32*)(ptr + (offset_y + y * 2 + 0) * tile->pitch + offset_x + (x * 2 + 0) * sizeof(u32)) = value; + *(u32*)(ptr + (offset_y + y * 2 + 0) * tile->pitch + offset_x + (x * 2 + 1) * sizeof(u32)) = value; + *(u32*)(ptr + (offset_y + y * 2 + 1) * tile->pitch + offset_x + (x * 2 + 0) * sizeof(u32)) = value; + *(u32*)(ptr + (offset_y + y * 2 + 1) * tile->pitch + offset_x + (x * 2 + 1) * sizeof(u32)) = value; + } + } + break; + default: + throw; + } + } + + void tiled_address::read(void *dst, u32 width, u32 height, u32 pitch) + { + if (!tile) + { + memcpy(dst, ptr, height * pitch); + return; + } + + u32 offset_x = base % tile->pitch; + u32 offset_y = base / tile->pitch; + + switch (tile->comp) + { + case CELL_GCM_COMPMODE_C32_2X1: + case CELL_GCM_COMPMODE_DISABLED: + for (int y = 0; y < height; ++y) + { + memcpy((u8*)dst + pitch * y, ptr + (offset_y + y) * tile->pitch + offset_x, pitch); + } + break; + /* + case CELL_GCM_COMPMODE_C32_2X1: + for (u32 y = 0; y < height; ++y) + { + for (u32 x = 0; x < width; ++x) + { + u32 value = *(u32*)(ptr + (offset_y + y) * tile->pitch + offset_x + (x * 2 + 0) * sizeof(u32)); + + *(u32*)((u8*)dst + pitch * y + x * sizeof(u32)) = value; + } + } + break; + */ + case CELL_GCM_COMPMODE_C32_2X2: + for (u32 y = 0; y < height; ++y) + { + for (u32 x = 0; x < width; ++x) + { + u32 value = *(u32*)(ptr + (offset_y + y * 2 + 0) * tile->pitch + offset_x + (x * 2 + 0) * sizeof(u32)); + + *(u32*)((u8*)dst + pitch * y + x * sizeof(u32)) = value; + } + } + break; + default: + throw; + } + } void thread::load_vertex_data(u32 first, u32 count) { @@ -1341,6 +1443,40 @@ namespace rsx start(); } + GcmTileInfo *thread::find_tile(u32 offset, u32 location) + { + for (GcmTileInfo &tile : tiles) + { + if (!tile.binded || tile.location != location) + { + continue; + } + + if (offset >= tile.offset && offset < tile.offset + tile.size) + { + return &tile; + } + } + + return nullptr; + } + + tiled_address thread::get_tiled_address(u32 offset, u32 location) + { + u32 address = get_address(offset, location); + + GcmTileInfo *tile = find_tile(offset, location); + u32 base = 0; + + if (tile) + { + base = offset - tile->offset; + address = get_address(tile->offset, location); + } + + return{ address, base, tile, (u8*)vm::base(address) }; + } + u32 thread::ReadIO32(u32 addr) { u32 value; diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index b383233ff0..0cecb914ed 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -215,6 +215,17 @@ namespace rsx u32 get_address(u32 offset, u32 location); + struct tiled_address + { + u32 address; + u32 base; + GcmTileInfo *tile; + u8 *ptr; + + void write(const void *src, u32 width, u32 height, u32 pitch); + void read(void *dst, u32 width, u32 height, u32 pitch); + }; + template<typename T> void pad_texture(void* inputPixels, void* outputPixels, u16 inputWidth, u16 inputHeight, u16 outputWidth, u16 outputHeight) { @@ -504,6 +515,9 @@ namespace rsx void reset(); void init(const u32 ioAddress, const u32 ioSize, const u32 ctrlAddress, const u32 localAddress); + tiled_address get_tiled_address(u32 offset, u32 location); + GcmTileInfo *find_tile(u32 offset, u32 location); + u32 ReadIO32(u32 addr); void WriteIO32(u32 addr, u32 value); }; From 836d14c8afecc21cf360083d3b7415c8cb289125 Mon Sep 17 00:00:00 2001 From: DHrpcs3 <dh.rpcs3@gmail.com> Date: Tue, 5 Jan 2016 18:42:54 +0200 Subject: [PATCH 02/13] rsx::pad_texture & rsx::convert_linear_swizzle moved to rsx_utils Added rsx::convert_scale_image & rsx::clip_image to rsx_utils --- rpcs3/Emu/RSX/RSXThread.cpp | 1 + rpcs3/Emu/RSX/RSXThread.h | 86 ---------------------------- rpcs3/Emu/RSX/rsx_utils.cpp | 45 +++++++++++++++ rpcs3/Emu/RSX/rsx_utils.h | 104 ++++++++++++++++++++++++++++++++++ rpcs3/emucore.vcxproj | 2 + rpcs3/emucore.vcxproj.filters | 6 ++ 6 files changed, 158 insertions(+), 86 deletions(-) create mode 100644 rpcs3/Emu/RSX/rsx_utils.cpp create mode 100644 rpcs3/Emu/RSX/rsx_utils.h diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index d40b67cd9f..9903bd1ffd 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -10,6 +10,7 @@ #include "Emu/SysCalls/lv2/sys_time.h" #include "Common/BufferUtils.h" +#include "rsx_utils.h" extern "C" { diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 0cecb914ed..620d989d1f 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -226,92 +226,6 @@ namespace rsx void read(void *dst, u32 width, u32 height, u32 pitch); }; - template<typename T> - void pad_texture(void* inputPixels, void* outputPixels, u16 inputWidth, u16 inputHeight, u16 outputWidth, u16 outputHeight) - { - T *src, *dst; - src = static_cast<T*>(inputPixels); - dst = static_cast<T*>(outputPixels); - - for (u16 h = 0; h < inputHeight; ++h) - { - const u32 padded_pos = h * outputWidth; - const u32 pos = h * inputWidth; - for (u16 w = 0; w < inputWidth; ++w) - { - dst[padded_pos + w] = src[pos + w]; - } - } - } - - /* Note: What the ps3 calls swizzling in this case is actually z-ordering / morton ordering of pixels - * - Input can be swizzled or linear, bool flag handles conversion to and from - * - It will handle any width and height that are a power of 2, square or non square - * Restriction: It has mixed results if the height or width is not a power of 2 - */ - template<typename T> - void convert_linear_swizzle(void* inputPixels, void* outputPixels, u16 width, u16 height, bool inputIsSwizzled) - { - u32 log2width, log2height; - - log2width = log2(width); - log2height = log2(height); - - // Max mask possible for square texture - u32 x_mask = 0x55555555; - u32 y_mask = 0xAAAAAAAA; - - // We have to limit the masks to the lower of the two dimensions to allow for non-square textures - u32 limit_mask = (log2width < log2height) ? log2width : log2height; - // double the limit mask to account for bits in both x and y - limit_mask = 1 << (limit_mask << 1); - - //x_mask, bits above limit are 1's for x-carry - x_mask = (x_mask | ~(limit_mask - 1)); - //y_mask. bits above limit are 0'd, as we use a different method for y-carry over - y_mask = (y_mask & (limit_mask - 1)); - - u32 offs_y = 0; - u32 offs_x = 0; - u32 offs_x0 = 0; //total y-carry offset for x - u32 y_incr = limit_mask; - - T *src, *dst; - - if (!inputIsSwizzled) - { - for (int y = 0; y < height; ++y) - { - src = static_cast<T*>(inputPixels) + y*width; - dst = static_cast<T*>(outputPixels) + offs_y; - offs_x = offs_x0; - for (int x = 0; x < width; ++x) - { - dst[offs_x] = src[x]; - offs_x = (offs_x - x_mask) & x_mask; - } - offs_y = (offs_y - y_mask) & y_mask; - if (offs_y == 0) offs_x0 += y_incr; - } - } - else - { - for (int y = 0; y < height; ++y) - { - src = static_cast<T*>(inputPixels) + offs_y; - dst = static_cast<T*>(outputPixels) + y*width; - offs_x = offs_x0; - for (int x = 0; x < width; ++x) - { - dst[x] = src[offs_x]; - offs_x = (offs_x - x_mask) & x_mask; - } - offs_y = (offs_y - y_mask) & y_mask; - if (offs_y == 0) offs_x0 += y_incr; - } - } - } - struct surface_info { u8 log2height; diff --git a/rpcs3/Emu/RSX/rsx_utils.cpp b/rpcs3/Emu/RSX/rsx_utils.cpp new file mode 100644 index 0000000000..25a9b5ecf2 --- /dev/null +++ b/rpcs3/Emu/RSX/rsx_utils.cpp @@ -0,0 +1,45 @@ +#include "stdafx.h" +#include "rsx_utils.h" + +extern "C" +{ +#include "libswscale/swscale.h" +} + +namespace rsx +{ + void convert_scale_image(u8 *dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch, + const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear) + { + std::unique_ptr<SwsContext, void(*)(SwsContext*)> sws(sws_getContext(src_width, src_height, src_format, + dst_width, dst_height, dst_format, bilinear ? SWS_FAST_BILINEAR : SWS_POINT, NULL, NULL, NULL), sws_freeContext); + + sws_scale(sws.get(), &src, &src_pitch, 0, src_slice_h, &dst, &dst_pitch); + } + + void convert_scale_image(std::unique_ptr<u8[]>& dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch, + const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear) + { + dst.reset(new u8[dst_pitch * dst_height]); + convert_scale_image(dst.get(), dst_format, dst_width, dst_height, dst_pitch, + src, src_format, src_width, src_height, src_pitch, src_slice_h, bilinear); + } + + void clip_image(u8 *dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch) + { + for (int y = 0; y < clip_h; ++y) + { + u8 *dst_row = dst + y * dst_pitch; + const u8 *src_row = src + (y + clip_y) * src_pitch + clip_x * bpp; + + std::memmove(dst_row, src_row, clip_w * bpp); + } + } + + void clip_image(std::unique_ptr<u8[]>& dst, const u8 *src, + int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch) + { + dst.reset(new u8[clip_h * dst_pitch]); + clip_image(dst.get(), src, clip_x, clip_y, clip_w, clip_h, bpp, src_pitch, dst_pitch); + } +} diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h new file mode 100644 index 0000000000..33ad93be5f --- /dev/null +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -0,0 +1,104 @@ +#pragma once + +extern "C" +{ +#include <libavutil/pixfmt.h> +} + +namespace rsx +{ + template<typename T> + void pad_texture(void* inputPixels, void* outputPixels, u16 inputWidth, u16 inputHeight, u16 outputWidth, u16 outputHeight) + { + T *src, *dst; + src = static_cast<T*>(inputPixels); + dst = static_cast<T*>(outputPixels); + + for (u16 h = 0; h < inputHeight; ++h) + { + const u32 padded_pos = h * outputWidth; + const u32 pos = h * inputWidth; + for (u16 w = 0; w < inputWidth; ++w) + { + dst[padded_pos + w] = src[pos + w]; + } + } + } + + /* Note: What the ps3 calls swizzling in this case is actually z-ordering / morton ordering of pixels + * - Input can be swizzled or linear, bool flag handles conversion to and from + * - It will handle any width and height that are a power of 2, square or non square + * Restriction: It has mixed results if the height or width is not a power of 2 + */ + template<typename T> + void convert_linear_swizzle(void* inputPixels, void* outputPixels, u16 width, u16 height, bool inputIsSwizzled) + { + u32 log2width, log2height; + + log2width = log2(width); + log2height = log2(height); + + // Max mask possible for square texture + u32 x_mask = 0x55555555; + u32 y_mask = 0xAAAAAAAA; + + // We have to limit the masks to the lower of the two dimensions to allow for non-square textures + u32 limit_mask = (log2width < log2height) ? log2width : log2height; + // double the limit mask to account for bits in both x and y + limit_mask = 1 << (limit_mask << 1); + + //x_mask, bits above limit are 1's for x-carry + x_mask = (x_mask | ~(limit_mask - 1)); + //y_mask. bits above limit are 0'd, as we use a different method for y-carry over + y_mask = (y_mask & (limit_mask - 1)); + + u32 offs_y = 0; + u32 offs_x = 0; + u32 offs_x0 = 0; //total y-carry offset for x + u32 y_incr = limit_mask; + + T *src, *dst; + + if (!inputIsSwizzled) + { + for (int y = 0; y < height; ++y) + { + src = static_cast<T*>(inputPixels) + y*width; + dst = static_cast<T*>(outputPixels) + offs_y; + offs_x = offs_x0; + for (int x = 0; x < width; ++x) + { + dst[offs_x] = src[x]; + offs_x = (offs_x - x_mask) & x_mask; + } + offs_y = (offs_y - y_mask) & y_mask; + if (offs_y == 0) offs_x0 += y_incr; + } + } + else + { + for (int y = 0; y < height; ++y) + { + src = static_cast<T*>(inputPixels) + offs_y; + dst = static_cast<T*>(outputPixels) + y*width; + offs_x = offs_x0; + for (int x = 0; x < width; ++x) + { + dst[x] = src[offs_x]; + offs_x = (offs_x - x_mask) & x_mask; + } + offs_y = (offs_y - y_mask) & y_mask; + if (offs_y == 0) offs_x0 += y_incr; + } + } + } + + void convert_scale_image(u8 *dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch, + const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear); + + void convert_scale_image(std::unique_ptr<u8[]>& dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch, + const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear); + + void clip_image(u8 *dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch); + void clip_image(std::unique_ptr<u8[]>& dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch); +} diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index ddfa06f7bb..a9d013b1f7 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -129,6 +129,7 @@ <ClCompile Include="Emu\RSX\Common\VertexProgramDecompiler.cpp" /> <ClCompile Include="Emu\RSX\GCM.cpp" /> <ClCompile Include="Emu\RSX\Null\NullGSRender.cpp" /> + <ClCompile Include="Emu\RSX\rsx_utils.cpp" /> <ClCompile Include="Emu\state.cpp" /> <ClCompile Include="Emu\SysCalls\lv2\sys_dbg.cpp" /> <ClCompile Include="Emu\SysCalls\lv2\sys_fs.cpp" /> @@ -574,6 +575,7 @@ <ClInclude Include="Emu\Memory\vm_ptr.h" /> <ClInclude Include="Emu\Memory\vm_ref.h" /> <ClInclude Include="Emu\Memory\vm_var.h" /> + <ClInclude Include="Emu\RSX\rsx_utils.h" /> <ClInclude Include="Emu\state.h" /> <ClInclude Include="Emu\SysCalls\Callback.h" /> <ClInclude Include="Emu\SysCalls\CB_FUNC.h" /> diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 30963300e8..c0cdafaaf1 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -927,6 +927,9 @@ <ClCompile Include="Emu\RSX\GCM.cpp"> <Filter>Emu\GPU\RSX</Filter> </ClCompile> + <ClCompile Include="Emu\RSX\rsx_utils.cpp"> + <Filter>Emu\GPU\RSX</Filter> + </ClCompile> </ItemGroup> <ItemGroup> <ClInclude Include="Crypto\aes.h"> @@ -1773,5 +1776,8 @@ <ClInclude Include="..\Utilities\BitField.h"> <Filter>Utilities</Filter> </ClInclude> + <ClInclude Include="Emu\RSX\rsx_utils.h"> + <Filter>Emu\GPU\RSX</Filter> + </ClInclude> </ItemGroup> </Project> \ No newline at end of file From 11ccc498fd02f8bbb1f38dc600179016681f7be2 Mon Sep 17 00:00:00 2001 From: DHrpcs3 <dh.rpcs3@gmail.com> Date: Tue, 5 Jan 2016 18:46:28 +0200 Subject: [PATCH 03/13] Reimplemented nv3089::image_in --- rpcs3/Emu/RSX/RSXThread.cpp | 264 ++++++++++++++++++++---------------- 1 file changed, 148 insertions(+), 116 deletions(-) diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 9903bd1ffd..c68ec9421e 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -12,11 +12,6 @@ #include "Common/BufferUtils.h" #include "rsx_utils.h" -extern "C" -{ -#include "libswscale/swscale.h" -} - #define CMD_DEBUG 0 bool user_asked_for_frame_capture = false; @@ -354,34 +349,36 @@ namespace rsx { never_inline void image_in(u32 arg) { - const u16 src_height = method_registers[NV3089_IMAGE_IN_SIZE] >> 16; - const u16 src_pitch = method_registers[NV3089_IMAGE_IN_FORMAT]; - const u8 src_origin = method_registers[NV3089_IMAGE_IN_FORMAT] >> 16; - const u8 src_inter = method_registers[NV3089_IMAGE_IN_FORMAT] >> 24; - const u32 src_color_format = method_registers[NV3089_SET_COLOR_FORMAT]; - const u32 operation = method_registers[NV3089_SET_OPERATION]; + u32 operation = method_registers[NV3089_SET_OPERATION]; - const u16 out_w = method_registers[NV3089_IMAGE_OUT_SIZE]; - const u16 out_h = method_registers[NV3089_IMAGE_OUT_SIZE] >> 16; + u32 clip_x = method_registers[NV3089_CLIP_POINT] & 0xffff; + u32 clip_y = method_registers[NV3089_CLIP_POINT] >> 16; + u32 clip_w = method_registers[NV3089_CLIP_SIZE] & 0xffff; + u32 clip_h = method_registers[NV3089_CLIP_SIZE] >> 16; - // handle weird RSX quirk, doesn't report less than 16 pixels width in some cases - u16 src_width = method_registers[NV3089_IMAGE_IN_SIZE]; - if (src_width == 16 && out_w < 16 && method_registers[NV3089_DS_DX] == (1 << 20)) + u32 out_x = method_registers[NV3089_IMAGE_OUT_POINT] & 0xffff; + u32 out_y = method_registers[NV3089_IMAGE_OUT_POINT] >> 16; + u32 out_w = method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff; + u32 out_h = method_registers[NV3089_IMAGE_OUT_SIZE] >> 16; + + u16 in_w = method_registers[NV3089_IMAGE_IN_SIZE]; + u16 in_h = method_registers[NV3089_IMAGE_IN_SIZE] >> 16; + u16 in_pitch = method_registers[NV3089_IMAGE_IN_FORMAT]; + u8 in_origin = method_registers[NV3089_IMAGE_IN_FORMAT] >> 16; + u8 in_inter = method_registers[NV3089_IMAGE_IN_FORMAT] >> 24; + u32 src_color_format = method_registers[NV3089_SET_COLOR_FORMAT]; + + f32 in_x = (method_registers[NV3089_IMAGE_IN] & 0xffff) / 16.f; + f32 in_y = (method_registers[NV3089_IMAGE_IN] >> 16) / 16.f; + + if (in_origin != CELL_GCM_TRANSFER_ORIGIN_CORNER) { - src_width = out_w; + LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown origin (%d)", in_origin); } - const u16 u = method_registers[NV3089_IMAGE_IN]; // inX (currently ignored) - const u16 v = method_registers[NV3089_IMAGE_IN] >> 16; // inY (currently ignored) - - if (src_origin != CELL_GCM_TRANSFER_ORIGIN_CORNER) + if (in_inter != CELL_GCM_TRANSFER_INTERPOLATOR_ZOH && in_inter != CELL_GCM_TRANSFER_INTERPOLATOR_FOH) { - LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown origin (%d)", src_origin); - } - - if (src_inter != CELL_GCM_TRANSFER_INTERPOLATOR_ZOH && src_inter != CELL_GCM_TRANSFER_INTERPOLATOR_FOH) - { - LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown inter (%d)", src_inter); + LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown inter (%d)", in_inter); } if (operation != CELL_GCM_TRANSFER_OPERATION_SRCCOPY) @@ -395,6 +392,8 @@ namespace rsx u32 dst_offset; u32 dst_dma = 0; u16 dst_color_format; + u32 out_pitch = 0; + u32 out_aligment = 64; switch (method_registers[NV3089_SET_CONTEXT_SURFACE]) { @@ -402,6 +401,8 @@ namespace rsx dst_dma = method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN]; dst_offset = method_registers[NV3062_SET_OFFSET_DESTIN]; dst_color_format = method_registers[NV3062_SET_COLOR_FORMAT]; + out_pitch = method_registers[NV3062_SET_PITCH] >> 16; + out_aligment = method_registers[NV3062_SET_PITCH] & 0xffff; break; case CELL_GCM_CONTEXT_SWIZZLE2D: @@ -412,19 +413,39 @@ namespace rsx default: LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown m_context_surface (0x%x)", method_registers[NV3089_SET_CONTEXT_SURFACE]); - break; - } - - if (!dst_dma) - { - LOG_ERROR(RSX, "dst_dma not set"); return; } - LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_offset, dst_offset); + u32 src_address = get_address(src_offset, src_dma); + u32 dst_address = get_address(dst_offset, dst_dma); - u8* pixels_src = vm::_ptr<u8>(get_address(src_offset, src_dma)); - u8* pixels_dst = vm::_ptr<u8>(get_address(dst_offset, dst_dma)); + u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel + u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4; + + if (out_pitch == 0) + { + out_pitch = out_bpp * out_w; + } + + if (in_pitch == 0) + { + in_pitch = in_bpp * in_w; + } + + if (clip_w > out_w) + { + clip_w = out_w; + } + + if (clip_h > out_h) + { + clip_h = out_h; + } + + //LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_address, dst_address); + + u8* pixels_src = vm::_ptr<u8>(src_address); + u8* pixels_dst = vm::_ptr<u8>(dst_address); if (dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 && dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_A8R8G8B8) @@ -438,38 +459,103 @@ namespace rsx LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown src_color_format (%d)", src_color_format); } - LOG_WARNING(RSX, "NV3089_IMAGE_IN_SIZE: SIZE=0x%08x, pitch=0x%x, offset=0x%x, scaleX=%f, scaleY=%f, CLIP_SIZE=0x%08x, OUT_SIZE=0x%08x", - method_registers[NV3089_IMAGE_IN_SIZE], src_pitch, src_offset, double(1 << 20) / (method_registers[NV3089_DS_DX]), double(1 << 20) / (method_registers[NV3089_DT_DY]), - method_registers[NV3089_CLIP_SIZE], method_registers[NV3089_IMAGE_OUT_SIZE]); - - const u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel - const u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4; + //LOG_WARNING(RSX, "NV3089_IMAGE_IN_SIZE: SIZE=0x%08x, pitch=0x%x, offset=0x%x, scaleX=%f, scaleY=%f, CLIP_SIZE=0x%08x, OUT_SIZE=0x%08x", + // method_registers[NV3089_IMAGE_IN_SIZE], in_pitch, src_offset, double(1 << 20) / (method_registers[NV3089_DS_DX]), double(1 << 20) / (method_registers[NV3089_DT_DY]), + // method_registers[NV3089_CLIP_SIZE], method_registers[NV3089_IMAGE_OUT_SIZE]); std::unique_ptr<u8[]> temp1, temp2; - // resize/convert if necessary - if (in_bpp != out_bpp && src_width != out_w && src_height != out_h) + AVPixelFormat in_format = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; + AVPixelFormat out_format = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; + + u32 out_offset = out_x * out_bpp + out_pitch * out_y; + + bool need_clip = method_registers[NV3089_CLIP_SIZE] != method_registers[NV3089_IMAGE_IN_SIZE] || method_registers[NV3089_CLIP_POINT]; + bool need_convert = out_format != in_format || out_w != in_w || out_h != in_h; + + u32 slice_h = (u32)(clip_h * (method_registers[NV3089_DS_DX] / 1048576.f)); + + if (slice_h) { - temp1.reset(new u8[out_bpp * out_w * out_h]); - - AVPixelFormat in_format = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; - AVPixelFormat out_format = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; - - std::unique_ptr<SwsContext, void(*)(SwsContext*)> sws(sws_getContext(src_width, src_height, in_format, out_w, out_h, out_format, - src_inter ? SWS_FAST_BILINEAR : SWS_POINT, NULL, NULL, NULL), sws_freeContext); - - int in_line = in_bpp * src_width; - u8* out_ptr = temp1.get(); - int out_line = out_bpp * out_w; - - sws_scale(sws.get(), &pixels_src, &in_line, 0, src_height, &out_ptr, &out_line); - - pixels_src = out_ptr; // use resized image as a source + if (clip_h < out_h) + { + --slice_h; + } + } + else + { + slice_h = clip_h; } - // Not sure if swizzle should be after clipping or not - if (method_registers[NV3089_SET_CONTEXT_SURFACE] == CELL_GCM_CONTEXT_SWIZZLE2D) + if (method_registers[NV3089_SET_CONTEXT_SURFACE] != CELL_GCM_CONTEXT_SWIZZLE2D) { + if (need_convert || need_clip) + { + if (need_clip) + { + if (need_convert) + { + convert_scale_image(temp1, out_format, out_w, out_h, out_pitch, + pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false); + + clip_image(pixels_dst + out_offset, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch); + } + else + { + clip_image(pixels_dst + out_offset, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch); + } + } + else + { + convert_scale_image(pixels_dst + out_offset, out_format, out_w, out_h, out_pitch, + pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false); + } + } + else + { + if (out_pitch != in_pitch || out_pitch != out_bpp * out_w) + { + for (u32 y = 0; y < out_h; ++y) + { + u8 *dst = pixels_dst + out_x * out_bpp + out_pitch * (y + out_y); + u8 *src = pixels_src + in_pitch * y; + + std::memmove(dst, src, out_w * out_bpp); + } + } + else + { + std::memmove(pixels_dst + out_offset, pixels_src, out_pitch * out_h); + } + } + } + else + { + if (need_convert || need_clip) + { + if (need_clip) + { + if (need_convert) + { + convert_scale_image(temp1, out_format, out_w, out_h, out_pitch, + pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false); + + clip_image(temp2, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch); + } + else + { + clip_image(temp2, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch); + } + } + else + { + convert_scale_image(temp2, out_format, out_w, out_h, out_pitch, + pixels_src, in_format, in_w, in_h, in_pitch, clip_h, in_inter ? true : false); + } + + pixels_src = temp2.get(); + } + u8 sw_width_log2 = method_registers[NV309E_SET_FORMAT] >> 16; u8 sw_height_log2 = method_registers[NV309E_SET_FORMAT] >> 24; @@ -480,8 +566,6 @@ namespace rsx u16 sw_width = 1 << sw_width_log2; u16 sw_height = 1 << sw_height_log2; - std::unique_ptr<u8[]> sw_temp; - temp2.reset(new u8[out_bpp * sw_width * sw_height]); u8* linear_pixels = pixels_src; @@ -490,7 +574,7 @@ namespace rsx // Check and pad texture out if we are given non square texture for swizzle to be correct if (sw_width != out_w || sw_height != out_h) { - sw_temp.reset(new u8[out_bpp * sw_width * sw_height]); + std::unique_ptr<u8[]> sw_temp(new u8[out_bpp * sw_width * sw_height]); switch (out_bpp) { @@ -521,59 +605,7 @@ namespace rsx break; } - //pixels_src = swizzled_pixels; - - // TODO: Handle Clipping/Image out when swizzled std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height); - return; - } - - // clip if necessary - if (method_registers[NV3089_CLIP_SIZE] != method_registers[NV3089_IMAGE_OUT_SIZE] || - method_registers[NV3089_CLIP_POINT] || method_registers[NV3089_IMAGE_OUT_POINT]) - { - // Note: There are cases currently where the if statement above is true, but this for loop doesn't hit, leading to nothing getting copied to pixels_dst - // Currently it seems needed to avoid some errors/crashes - for (s32 y = (method_registers[NV3089_CLIP_POINT] >> 16), dst_y = (method_registers[NV3089_IMAGE_OUT_POINT] >> 16); y < out_h; y++, dst_y++) - { - if (dst_y >= 0 && dst_y < method_registers[NV3089_IMAGE_OUT_SIZE] >> 16) - { - // destination line - u8* dst_line = pixels_dst + dst_y * out_bpp * (method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff) - + std::min<s32>(std::max<s32>(method_registers[NV3089_IMAGE_OUT_POINT] & 0xffff, 0), method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff); - - size_t dst_max = std::min<s32>( - std::max<s32>((s32)(method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff) - (method_registers[NV3089_IMAGE_OUT_POINT] & 0xffff), 0), - method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff) * out_bpp; - - if (y >= 0 && y < std::min<s32>(method_registers[NV3089_CLIP_SIZE] >> 16, out_h)) - { - // source line - u8* src_line = pixels_src + y * out_bpp * out_w + - std::min<s32>(std::max<s32>(method_registers[NV3089_CLIP_POINT] & 0xffff, 0), method_registers[NV3089_CLIP_SIZE] & 0xffff); - size_t src_max = std::min<s32>( - std::max<s32>((s32)(method_registers[NV3089_CLIP_SIZE] & 0xffff) - (method_registers[NV3089_CLIP_POINT] & 0xffff), 0), - method_registers[NV3089_CLIP_SIZE] & 0xffff) * out_bpp; - - std::pair<u8*, size_t> - z0 = { src_line + 0, std::min<size_t>(dst_max, std::max<s64>(0, method_registers[NV3089_CLIP_POINT] & 0xffff)) }, - d0 = { src_line + z0.second, std::min<size_t>(dst_max - z0.second, src_max) }, - z1 = { src_line + d0.second, dst_max - z0.second - d0.second }; - - std::memset(z0.first, 0, z0.second); - std::memcpy(d0.first, src_line, d0.second); - std::memset(z1.first, 0, z1.second); - } - else - { - std::memset(dst_line, 0, dst_max); - } - } - } - } - else - { - std::memcpy(pixels_dst, pixels_src, out_w * out_h * out_bpp); } } } From 509bbddac1c7dc58311eceabc0860c080d19f600 Mon Sep 17 00:00:00 2001 From: DHrpcs3 <dh.rpcs3@gmail.com> Date: Tue, 5 Jan 2016 22:27:41 +0200 Subject: [PATCH 04/13] OpenGL renderer: use pitch as image row length Fixed rsx_utils code style --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 33 +++++++- rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp | 115 ++++++++++++++++++---------- rpcs3/Emu/RSX/rsx_utils.h | 49 +++++++----- 3 files changed, 132 insertions(+), 65 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 46edb5f7e5..7fb1ed6811 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -948,6 +948,14 @@ static const u32 mr_color_dma[rsx::limits::color_buffers_count] = NV4097_SET_CONTEXT_DMA_COLOR_D }; +static const u32 mr_color_pitch[rsx::limits::color_buffers_count] = +{ + NV4097_SET_SURFACE_PITCH_A, + NV4097_SET_SURFACE_PITCH_B, + NV4097_SET_SURFACE_PITCH_C, + NV4097_SET_SURFACE_PITCH_D +}; + void GLGSRender::read_buffers() { if (!draw_fbo) @@ -963,7 +971,16 @@ void GLGSRender::read_buffers() { for (int i = index; i < index + count; ++i) { - u32 color_address = rsx::get_address(rsx::method_registers[mr_color_offset[i]], rsx::method_registers[mr_color_dma[i]]); + u32 offset = rsx::method_registers[mr_color_offset[i]]; + u32 location = rsx::method_registers[mr_color_dma[i]]; + u32 pitch = rsx::method_registers[mr_color_pitch[i]]; + + if (pitch <= 64) + continue; + + m_draw_tex_color[i].pixel_unpack_settings().row_length(pitch / (color_format.channel_size * color_format.channel_count)); + + u32 color_address = rsx::get_address(offset, location); __glcheck m_draw_tex_color[i].copy_from(vm::base(color_address), color_format.format, color_format.type); } }; @@ -1065,7 +1082,16 @@ void GLGSRender::write_buffers() //}, gl::buffer::access::read); - u32 color_address = rsx::get_address(rsx::method_registers[mr_color_offset[i]], rsx::method_registers[mr_color_dma[i]]); + u32 offset = rsx::method_registers[mr_color_offset[i]]; + u32 location = rsx::method_registers[mr_color_dma[i]]; + u32 pitch = rsx::method_registers[mr_color_pitch[i]]; + + if (pitch <= 64) + continue; + + m_draw_tex_color[i].pixel_pack_settings().row_length(pitch / (color_format.channel_size * color_format.channel_count)); + + u32 color_address = rsx::get_address(offset, location); __glcheck m_draw_tex_color[i].copy_to(vm::base(color_address), color_format.format, color_format.type); } }; @@ -1173,8 +1199,7 @@ void GLGSRender::flip(int buffer) .type(gl::texture::type::uint_8_8_8_8) .format(gl::texture::format::bgra); - m_flip_tex_color.pixel_unpack_settings().aligment(1); - m_flip_tex_color.pixel_pack_settings().aligment(1); + m_flip_tex_color.pixel_unpack_settings().aligment(1).row_length(buffer_pitch); __glcheck m_flip_fbo.recreate(); __glcheck m_flip_fbo.color = m_flip_tex_color; diff --git a/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp b/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp index 6653eace01..baad5818f7 100644 --- a/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp +++ b/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp @@ -4,11 +4,45 @@ #include "../GCM.h" #include "../RSXThread.h" #include "../RSXTexture.h" +#include "../rsx_utils.h" namespace rsx { namespace gl { + static const int gl_tex_min_filter[] = + { + GL_NEAREST, // unused + GL_NEAREST, + GL_LINEAR, + GL_NEAREST_MIPMAP_NEAREST, + GL_LINEAR_MIPMAP_NEAREST, + GL_NEAREST_MIPMAP_LINEAR, + GL_LINEAR_MIPMAP_LINEAR, + GL_NEAREST, // CELL_GCM_TEXTURE_CONVOLUTION_MIN + }; + + static const int gl_tex_mag_filter[] = + { + GL_NEAREST, // unused + GL_NEAREST, + GL_LINEAR, + GL_NEAREST, // unused + GL_LINEAR // CELL_GCM_TEXTURE_CONVOLUTION_MAG + }; + + static const int gl_tex_zfunc[] = + { + GL_NEVER, + GL_LESS, + GL_EQUAL, + GL_LEQUAL, + GL_GREATER, + GL_NOTEQUAL, + GL_GEQUAL, + GL_ALWAYS, + }; + void texture::create() { if (m_id) @@ -59,7 +93,9 @@ namespace rsx void texture::init(rsx::texture& tex) { if (!m_id) + { create(); + } bind(); @@ -69,8 +105,10 @@ namespace rsx //TODO: safe init - int format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - bool is_swizzled = !(tex.format() & CELL_GCM_TEXTURE_LN); + u32 full_format = tex.format(); + + u32 format = full_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + bool is_swizzled = ~full_format & CELL_GCM_TEXTURE_LN; const u8* pixels = vm::ps3::_ptr<u8>(texaddr); u8 *unswizzledPixels; @@ -78,10 +116,16 @@ namespace rsx // NOTE: This must be in ARGB order in all forms below. const GLint *glRemap = glRemapStandard; + ::gl::pixel_pack_settings().apply(); + ::gl::pixel_unpack_settings().apply(); + + u32 pitch = tex.pitch(); + switch (format) { case CELL_GCM_TEXTURE_B8: // One 8-bit fixed-point number { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_BLUE, GL_UNSIGNED_BYTE, pixels); static const GLint swizzleMaskB8[] = { GL_BLUE, GL_BLUE, GL_BLUE, GL_BLUE }; @@ -92,6 +136,7 @@ namespace rsx case CELL_GCM_TEXTURE_A1R5G5B5: { glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE); + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2); // TODO: texture swizzling glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, pixels); @@ -101,6 +146,7 @@ namespace rsx case CELL_GCM_TEXTURE_A4R4G4B4: { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4, pixels); // We read it in as R4G4B4A4, so we need to remap each component. @@ -111,6 +157,7 @@ namespace rsx case CELL_GCM_TEXTURE_R5G6B5: { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2); glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, tex.width(), tex.height(), 0, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, pixels); glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE); @@ -119,6 +166,8 @@ namespace rsx case CELL_GCM_TEXTURE_A8R8G8B8: { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4); + if (is_swizzled) { u32 *src, *dst; @@ -167,6 +216,7 @@ namespace rsx case CELL_GCM_TEXTURE_G8B8: { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RG, GL_UNSIGNED_BYTE, pixels); static const GLint swizzleMaskG8B8[] = { GL_RED, GL_GREEN, GL_RED, GL_GREEN }; @@ -176,6 +226,8 @@ namespace rsx case CELL_GCM_TEXTURE_R6G5B5: { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2); + // TODO: Probably need to actually unswizzle if is_swizzled. const u32 numPixels = tex.width() * tex.height(); unswizzledPixels = (u8 *)malloc(numPixels * 4); @@ -196,30 +248,36 @@ namespace rsx case CELL_GCM_TEXTURE_DEPTH24_D8: // 24-bit unsigned fixed-point number and 8 bits of garbage { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT24, tex.width(), tex.height(), 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, pixels); break; } case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: // 24-bit unsigned float and 8 bits of garbage { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4); glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT24, tex.width(), tex.height(), 0, GL_DEPTH_COMPONENT, GL_FLOAT, pixels); break; } case CELL_GCM_TEXTURE_DEPTH16: // 16-bit unsigned fixed-point number { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2); glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT16, tex.width(), tex.height(), 0, GL_DEPTH_COMPONENT, GL_SHORT, pixels); break; } case CELL_GCM_TEXTURE_DEPTH16_FLOAT: // 16-bit unsigned float { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2); glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT16, tex.width(), tex.height(), 0, GL_DEPTH_COMPONENT, GL_FLOAT, pixels); break; } case CELL_GCM_TEXTURE_X16: // A 16-bit fixed-point number { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2); glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RED, GL_UNSIGNED_SHORT, pixels); glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE); @@ -231,6 +289,7 @@ namespace rsx case CELL_GCM_TEXTURE_Y16_X16: // Two 16-bit fixed-point numbers { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4); glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RG, GL_UNSIGNED_SHORT, pixels); glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE); @@ -241,6 +300,7 @@ namespace rsx case CELL_GCM_TEXTURE_R5G5B5A1: { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2); glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1, pixels); glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE); @@ -249,6 +309,7 @@ namespace rsx case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: // Four fp16 values { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 8); glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RGBA, GL_HALF_FLOAT, pixels); glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE); @@ -257,12 +318,16 @@ namespace rsx case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: // Four fp32 values { - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_BGRA, GL_FLOAT, pixels); + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 16); + glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RGBA, GL_FLOAT, pixels); + glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE); break; } case CELL_GCM_TEXTURE_X32_FLOAT: // One 32-bit floating-point number { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RED, GL_FLOAT, pixels); static const GLint swizzleMaskX32_FLOAT[] = { GL_RED, GL_ONE, GL_ONE, GL_ONE }; @@ -272,9 +337,9 @@ namespace rsx case CELL_GCM_TEXTURE_D1R5G5B5: { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 2); glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE); - // TODO: Texture swizzling glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, pixels); @@ -287,6 +352,7 @@ namespace rsx case CELL_GCM_TEXTURE_D8R8G8B8: // 8 bits of garbage and three unsigned 8-bit fixed-point numbers { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, pixels); static const GLint swizzleMaskX32_D8R8G8B8[] = { GL_ONE, GL_RED, GL_GREEN, GL_BLUE }; @@ -297,6 +363,7 @@ namespace rsx case CELL_GCM_TEXTURE_Y16_X16_FLOAT: // Two fp16 values { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4); glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width(), tex.height(), 0, GL_RG, GL_HALF_FLOAT, pixels); glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE); @@ -308,6 +375,8 @@ namespace rsx case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4); + const u32 numPixels = tex.width() * tex.height(); unswizzledPixels = (u8 *)malloc(numPixels * 4); // TODO: Speed. @@ -332,6 +401,8 @@ namespace rsx case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: { + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / 4); + const u32 numPixels = tex.width() * tex.height(); unswizzledPixels = (u8 *)malloc(numPixels * 4); // TODO: Speed. @@ -385,18 +456,6 @@ namespace rsx glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_B, glRemap[3]); } - static const int gl_tex_zfunc[] = - { - GL_NEVER, - GL_LESS, - GL_EQUAL, - GL_LEQUAL, - GL_GREATER, - GL_NOTEQUAL, - GL_GEQUAL, - GL_ALWAYS, - }; - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, gl_wrap(tex.wrap_s())); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, gl_wrap(tex.wrap_t())); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_R, gl_wrap(tex.wrap_r())); @@ -407,34 +466,10 @@ namespace rsx glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_LOD, (tex.min_lod() >> 8)); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LOD, (tex.max_lod() >> 8)); - - - static const int gl_tex_min_filter[] = - { - GL_NEAREST, // unused - GL_NEAREST, - GL_LINEAR, - GL_NEAREST_MIPMAP_NEAREST, - GL_LINEAR_MIPMAP_NEAREST, - GL_NEAREST_MIPMAP_LINEAR, - GL_LINEAR_MIPMAP_LINEAR, - GL_NEAREST, // CELL_GCM_TEXTURE_CONVOLUTION_MIN - }; - - static const int gl_tex_mag_filter[] = { - GL_NEAREST, // unused - GL_NEAREST, - GL_LINEAR, - GL_NEAREST, // unused - GL_LINEAR // CELL_GCM_TEXTURE_CONVOLUTION_MAG - }; - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, gl_tex_min_filter[tex.min_filter()]); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, gl_tex_mag_filter[tex.mag_filter()]); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_aniso(tex.max_aniso())); - //Unbind(); - if (is_swizzled && format == CELL_GCM_TEXTURE_A8R8G8B8) { free(unswizzledPixels); diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index 33ad93be5f..7fdff4d2dd 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -8,17 +8,16 @@ extern "C" namespace rsx { template<typename T> - void pad_texture(void* inputPixels, void* outputPixels, u16 inputWidth, u16 inputHeight, u16 outputWidth, u16 outputHeight) + void pad_texture(void* input_pixels, void* output_pixels, u16 input_width, u16 input_height, u16 output_width, u16 output_height) { - T *src, *dst; - src = static_cast<T*>(inputPixels); - dst = static_cast<T*>(outputPixels); + T *src = static_cast<T*>(input_pixels); + T *dst = static_cast<T*>(output_pixels); for (u16 h = 0; h < inputHeight; ++h) { - const u32 padded_pos = h * outputWidth; - const u32 pos = h * inputWidth; - for (u16 w = 0; w < inputWidth; ++w) + const u32 padded_pos = h * output_width; + const u32 pos = h * input_width; + for (u16 w = 0; w < input_width; ++w) { dst[padded_pos + w] = src[pos + w]; } @@ -31,12 +30,10 @@ namespace rsx * Restriction: It has mixed results if the height or width is not a power of 2 */ template<typename T> - void convert_linear_swizzle(void* inputPixels, void* outputPixels, u16 width, u16 height, bool inputIsSwizzled) + void convert_linear_swizzle(void* input_pixels, void* output_pixels, u16 width, u16 height, bool input_is_swizzled) { - u32 log2width, log2height; - - log2width = log2(width); - log2height = log2(height); + u32 log2width = log2(width); + u32 log2height = log2(height); // Max mask possible for square texture u32 x_mask = 0x55555555; @@ -57,38 +54,48 @@ namespace rsx u32 offs_x0 = 0; //total y-carry offset for x u32 y_incr = limit_mask; - T *src, *dst; - - if (!inputIsSwizzled) + if (!input_is_swizzled) { for (int y = 0; y < height; ++y) { - src = static_cast<T*>(inputPixels) + y*width; - dst = static_cast<T*>(outputPixels) + offs_y; + T *src = static_cast<T*>(input_pixels) + y * width; + T *dst = static_cast<T*>(output_pixels) + offs_y; offs_x = offs_x0; + for (int x = 0; x < width; ++x) { dst[offs_x] = src[x]; offs_x = (offs_x - x_mask) & x_mask; } + offs_y = (offs_y - y_mask) & y_mask; - if (offs_y == 0) offs_x0 += y_incr; + + if (offs_y == 0) + { + offs_x0 += y_incr; + } } } else { for (int y = 0; y < height; ++y) { - src = static_cast<T*>(inputPixels) + offs_y; - dst = static_cast<T*>(outputPixels) + y*width; + T *src = static_cast<T*>(input_pixels) + offs_y; + T *dst = static_cast<T*>(output_pixels) + y * width; offs_x = offs_x0; + for (int x = 0; x < width; ++x) { dst[x] = src[offs_x]; offs_x = (offs_x - x_mask) & x_mask; } + offs_y = (offs_y - y_mask) & y_mask; - if (offs_y == 0) offs_x0 += y_incr; + + if (offs_y == 0) + { + offs_x0 += y_incr; + } } } } From dbccf5fbad69dc9fd12cf35518662a79821eb61d Mon Sep 17 00:00:00 2001 From: DHrpcs3 <dh.rpcs3@gmail.com> Date: Tue, 5 Jan 2016 22:38:18 +0200 Subject: [PATCH 05/13] gl: fixed multiple textures binding --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 6 ++++-- rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp | 4 ++-- rpcs3/Emu/RSX/GL/rsx_gl_texture.h | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 7fb1ed6811..1e553ce533 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -325,13 +325,15 @@ void GLGSRender::end() for (int i = 0; i < rsx::limits::textures_count; ++i) { if (!textures[i].enabled()) + { continue; + } int location; if (m_program->uniforms.has_location("tex" + std::to_string(i), &location)) { - __glcheck m_gl_textures[i].init(textures[i]); - __glcheck m_program->uniforms.texture(location, i, gl::texture_view(gl::texture::target::texture2D, m_gl_textures[i].id())); + __glcheck m_gl_textures[i].init(i, textures[i]); + glProgramUniform1i(m_program->id(), location, i); } } diff --git a/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp b/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp index baad5818f7..132378acdd 100644 --- a/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp +++ b/rpcs3/Emu/RSX/GL/rsx_gl_texture.cpp @@ -51,7 +51,6 @@ namespace rsx } glGenTextures(1, &m_id); - bind(); } int texture::gl_wrap(int wrap) @@ -90,13 +89,14 @@ namespace rsx return 1.0f; } - void texture::init(rsx::texture& tex) + void texture::init(int index, rsx::texture& tex) { if (!m_id) { create(); } + glActiveTexture(GL_TEXTURE0 + index); bind(); const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); diff --git a/rpcs3/Emu/RSX/GL/rsx_gl_texture.h b/rpcs3/Emu/RSX/GL/rsx_gl_texture.h index ace0b86c2e..1b66efa05a 100644 --- a/rpcs3/Emu/RSX/GL/rsx_gl_texture.h +++ b/rpcs3/Emu/RSX/GL/rsx_gl_texture.h @@ -34,7 +34,7 @@ namespace rsx return (v << 2) | (v >> 4); } - void init(rsx::texture& tex); + void init(int index, rsx::texture& tex); void bind(); void unbind(); void remove(); From ba12c489ec65e6070e78fcea5bc9ef719c2969b9 Mon Sep 17 00:00:00 2001 From: DHrpcs3 <dh.rpcs3@gmail.com> Date: Tue, 5 Jan 2016 22:55:43 +0200 Subject: [PATCH 06/13] gl: using tiled region for read/write color buffers and flip gl: fixed flip buffer row length compilation fixes --- rpcs3/Emu/RSX/Common/TextureUtils.cpp | 1 + rpcs3/Emu/RSX/GL/GLGSRender.cpp | 67 ++++++++++++++++++++++++--- rpcs3/Emu/RSX/RSXThread.cpp | 6 +-- rpcs3/Emu/RSX/RSXThread.h | 4 +- rpcs3/Emu/RSX/rsx_utils.h | 2 +- 5 files changed, 67 insertions(+), 13 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index 99a68b1d09..a54297475b 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -2,6 +2,7 @@ #include "Emu/Memory/vm.h" #include "TextureUtils.h" #include "../RSXThread.h" +#include "../rsx_utils.h" #define MAX2(a, b) ((a) > (b)) ? (a) : (b) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 1e553ce533..7dab5f83e3 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -971,6 +971,10 @@ void GLGSRender::read_buffers() auto read_color_buffers = [&](int index, int count) { + u32 width = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; + u32 height = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; + + for (int i = index; i < index + count; ++i) { u32 offset = rsx::method_registers[mr_color_offset[i]]; @@ -982,8 +986,19 @@ void GLGSRender::read_buffers() m_draw_tex_color[i].pixel_unpack_settings().row_length(pitch / (color_format.channel_size * color_format.channel_count)); - u32 color_address = rsx::get_address(offset, location); - __glcheck m_draw_tex_color[i].copy_from(vm::base(color_address), color_format.format, color_format.type); + rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf); + + if (!color_buffer.tile) + { + __glcheck m_draw_tex_color[i].copy_from(color_buffer.ptr, color_format.format, color_format.type); + } + else + { + std::unique_ptr<u8[]> buffer(new u8[pitch * height]); + color_buffer.read(buffer.get(), width, height, pitch); + + __glcheck m_draw_tex_color[i].copy_from(buffer.get(), color_format.format, color_format.type); + } } }; @@ -1016,6 +1031,12 @@ void GLGSRender::read_buffers() if (rpcs3::state.config.rsx.opengl.read_depth_buffer) { + //TODO: use pitch + u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z]; + + if (pitch <= 64) + return; + auto depth_format = surface_depth_format_to_gl(m_surface.depth_format); int pixel_size = m_surface.depth_format == CELL_GCM_SURFACE_Z16 ? 2 : 4; @@ -1065,6 +1086,9 @@ void GLGSRender::write_buffers() auto write_color_buffers = [&](int index, int count) { + u32 width = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16; + u32 height = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16; + for (int i = index; i < index + count; ++i) { //TODO: swizzle @@ -1093,8 +1117,20 @@ void GLGSRender::write_buffers() m_draw_tex_color[i].pixel_pack_settings().row_length(pitch / (color_format.channel_size * color_format.channel_count)); - u32 color_address = rsx::get_address(offset, location); - __glcheck m_draw_tex_color[i].copy_to(vm::base(color_address), color_format.format, color_format.type); + rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf); + + if (!color_buffer.tile) + { + __glcheck m_draw_tex_color[i].copy_to(color_buffer.ptr, color_format.format, color_format.type); + } + else + { + std::unique_ptr<u8[]> buffer(new u8[pitch * height]); + + __glcheck m_draw_tex_color[i].copy_to(buffer.get(), color_format.format, color_format.type); + + color_buffer.write(buffer.get(), width, height, pitch); + } } }; @@ -1127,6 +1163,12 @@ void GLGSRender::write_buffers() if (rpcs3::state.config.rsx.opengl.write_depth_buffer) { + //TODO: use pitch + u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z]; + + if (pitch <= 64) + return; + auto depth_format = surface_depth_format_to_gl(m_surface.depth_format); gl::buffer pbo_depth; @@ -1169,7 +1211,9 @@ void GLGSRender::flip(int buffer) u32 buffer_width = gcm_buffers[buffer].width; u32 buffer_height = gcm_buffers[buffer].height; u32 buffer_pitch = gcm_buffers[buffer].pitch; - u32 buffer_address = rsx::get_address(gcm_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL); + + rsx::tiled_region buffer_region = get_tiled_address(gcm_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL); + bool skip_read = false; if (draw_fbo && !rpcs3::state.config.rsx.opengl.write_color_buffers) @@ -1201,7 +1245,7 @@ void GLGSRender::flip(int buffer) .type(gl::texture::type::uint_8_8_8_8) .format(gl::texture::format::bgra); - m_flip_tex_color.pixel_unpack_settings().aligment(1).row_length(buffer_pitch); + m_flip_tex_color.pixel_unpack_settings().aligment(1).row_length(buffer_pitch / 4); __glcheck m_flip_fbo.recreate(); __glcheck m_flip_fbo.color = m_flip_tex_color; @@ -1218,7 +1262,16 @@ void GLGSRender::flip(int buffer) glDisable(GL_LOGIC_OP); glDisable(GL_CULL_FACE); - __glcheck m_flip_tex_color.copy_from(vm::base(buffer_address), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8); + if (buffer_region.tile) + { + std::unique_ptr<u8> temp(new u8[buffer_height * buffer_pitch]); + buffer_region.read(temp.get(), buffer_width, buffer_height, buffer_pitch); + __glcheck m_flip_tex_color.copy_from(temp.get(), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8); + } + else + { + __glcheck m_flip_tex_color.copy_from(buffer_region.ptr, gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8); + } } areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height }); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index c68ec9421e..d7846797e3 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -968,7 +968,7 @@ namespace rsx } } - void tiled_address::write(const void *src, u32 width, u32 height, u32 pitch) + void tiled_region::write(const void *src, u32 width, u32 height, u32 pitch) { if (!tile) { @@ -1021,7 +1021,7 @@ namespace rsx } } - void tiled_address::read(void *dst, u32 width, u32 height, u32 pitch) + void tiled_region::read(void *dst, u32 width, u32 height, u32 pitch) { if (!tile) { @@ -1494,7 +1494,7 @@ namespace rsx return nullptr; } - tiled_address thread::get_tiled_address(u32 offset, u32 location) + tiled_region thread::get_tiled_address(u32 offset, u32 location) { u32 address = get_address(offset, location); diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 620d989d1f..e4537aa13f 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -215,7 +215,7 @@ namespace rsx u32 get_address(u32 offset, u32 location); - struct tiled_address + struct tiled_region { u32 address; u32 base; @@ -429,7 +429,7 @@ namespace rsx void reset(); void init(const u32 ioAddress, const u32 ioSize, const u32 ctrlAddress, const u32 localAddress); - tiled_address get_tiled_address(u32 offset, u32 location); + tiled_region get_tiled_address(u32 offset, u32 location); GcmTileInfo *find_tile(u32 offset, u32 location); u32 ReadIO32(u32 addr); diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index 7fdff4d2dd..bb8b08ed45 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -13,7 +13,7 @@ namespace rsx T *src = static_cast<T*>(input_pixels); T *dst = static_cast<T*>(output_pixels); - for (u16 h = 0; h < inputHeight; ++h) + for (u16 h = 0; h < input_height; ++h) { const u32 padded_pos = h * output_width; const u32 pos = h * input_width; From 48919330d7a57cb97d1283c7365c0f23d7c20068 Mon Sep 17 00:00:00 2001 From: DHrpcs3 <dh.rpcs3@gmail.com> Date: Tue, 5 Jan 2016 23:29:49 +0200 Subject: [PATCH 07/13] rsx methods moved from rsx thread --- rpcs3/Emu/RSX/Common/BufferUtils.cpp | 1 + rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 1 + rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 1 + rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 1 + rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp | 1 + rpcs3/Emu/RSX/GL/GLGSRender.cpp | 1 + rpcs3/Emu/RSX/RSXTexture.cpp | 1 + rpcs3/Emu/RSX/RSXThread.cpp | 831 +----------------- rpcs3/Emu/RSX/RSXThread.h | 63 -- rpcs3/emucore.vcxproj | 2 + rpcs3/emucore.vcxproj.filters | 12 +- 11 files changed, 19 insertions(+), 896 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.cpp b/rpcs3/Emu/RSX/Common/BufferUtils.cpp index af3b0a9684..591da7b89b 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.cpp +++ b/rpcs3/Emu/RSX/Common/BufferUtils.cpp @@ -1,5 +1,6 @@ #include "stdafx.h" #include "BufferUtils.h" +#include "../rsx_methods.h" #define MIN2(x, y) ((x) < (y)) ? (x) : (y) #define MAX2(x, y) ((x) > (y)) ? (x) : (y) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 28bda5f5a5..fc9c3b23a7 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -6,6 +6,7 @@ #include "d3dx12.h" #include "../Common/BufferUtils.h" #include "D3D12Formats.h" +#include "../rsx_methods.h" namespace { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index dd8629fb7f..f6d50187d1 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -10,6 +10,7 @@ #include <d3d11on12.h> #include "Emu/state.h" #include "D3D12Formats.h" +#include "../rsx_methods.h" PFN_D3D12_CREATE_DEVICE wrapD3D12CreateDevice; PFN_D3D12_GET_DEBUG_INTERFACE wrapD3D12GetDebugInterface; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 89392c0b96..477b05acf5 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -5,6 +5,7 @@ #include "D3D12GSRender.h" #include "Emu/state.h" #include "D3D12Formats.h" +#include "../rsx_methods.h" #define TO_STRING(x) #x diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp index 95f50089bd..c940631c59 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.cpp @@ -7,6 +7,7 @@ #include "Emu/System.h" #include "Emu/state.h" #include "Emu/RSX/GSRender.h" +#include "../rsx_methods.h" #include "D3D12.h" #include "D3D12GSRender.h" diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 7dab5f83e3..95f5ac3348 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -4,6 +4,7 @@ #include "Emu/System.h" #include "Emu/state.h" #include "GLGSRender.h" +#include "../rsx_methods.h" #define DUMP_VERTEX_DATA 0 diff --git a/rpcs3/Emu/RSX/RSXTexture.cpp b/rpcs3/Emu/RSX/RSXTexture.cpp index 017b4e9146..a6eafa09b6 100644 --- a/rpcs3/Emu/RSX/RSXTexture.cpp +++ b/rpcs3/Emu/RSX/RSXTexture.cpp @@ -2,6 +2,7 @@ #include "Emu/Memory/Memory.h" #include "RSXThread.h" #include "RSXTexture.h" +#include "rsx_methods.h" namespace rsx { diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index d7846797e3..fd5e3f8554 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -10,7 +10,7 @@ #include "Emu/SysCalls/lv2/sys_time.h" #include "Common/BufferUtils.h" -#include "rsx_utils.h" +#include "rsx_methods.h" #define CMD_DEBUG 0 @@ -19,835 +19,6 @@ frame_capture_data frame_debug; namespace rsx { - using rsx_method_t = void(*)(thread*, u32); - - u32 method_registers[0x10000 >> 2]; - rsx_method_t methods[0x10000 >> 2]{}; - - template<typename Type> struct vertex_data_type_from_element_type; - template<> struct vertex_data_type_from_element_type<float> { enum { type = CELL_GCM_VERTEX_F }; }; - template<> struct vertex_data_type_from_element_type<f16> { enum { type = CELL_GCM_VERTEX_SF }; }; - template<> struct vertex_data_type_from_element_type<u8> { enum { type = CELL_GCM_VERTEX_UB }; }; - template<> struct vertex_data_type_from_element_type<u16> { enum { type = CELL_GCM_VERTEX_S1 }; }; - - namespace nv406e - { - force_inline void set_reference(thread* rsx, u32 arg) - { - rsx->ctrl->ref.exchange(arg); - } - - force_inline void semaphore_acquire(thread* rsx, u32 arg) - { - //TODO: dma - while (vm::read32(rsx->label_addr + method_registers[NV406E_SEMAPHORE_OFFSET]) != arg) - { - if (Emu.IsStopped()) - break; - - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - } - } - - force_inline void semaphore_release(thread* rsx, u32 arg) - { - //TODO: dma - vm::write32(rsx->label_addr + method_registers[NV406E_SEMAPHORE_OFFSET], arg); - } - } - - namespace nv4097 - { - force_inline void texture_read_semaphore_release(thread* rsx, u32 arg) - { - //TODO: dma - vm::write32(rsx->label_addr + method_registers[NV4097_SET_SEMAPHORE_OFFSET], arg); - } - - force_inline void back_end_write_semaphore_release(thread* rsx, u32 arg) - { - //TODO: dma - vm::write32(rsx->label_addr + method_registers[NV4097_SET_SEMAPHORE_OFFSET], - (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff)); - } - - //fire only when all data passed to rsx cmd buffer - template<u32 id, u32 index, int count, typename type> - force_inline void set_vertex_data_impl(thread* rsx, u32 arg) - { - static const size_t element_size = (count * sizeof(type)); - static const size_t element_size_in_words = element_size / sizeof(u32); - - auto& info = rsx->register_vertex_info[index]; - - info.type = vertex_data_type_from_element_type<type>::type; - info.size = count; - info.frequency = 0; - info.stride = 0; - - auto& entry = rsx->register_vertex_data[index]; - - //find begin of data - size_t begin = id + index * element_size_in_words; - - size_t position = 0;//entry.size(); - entry.resize(position + element_size); - - memcpy(entry.data() + position, method_registers + begin, element_size); - } - - template<u32 index> - struct set_vertex_data4ub_m - { - force_inline static void impl(thread* rsx, u32 arg) - { - set_vertex_data_impl<NV4097_SET_VERTEX_DATA4UB_M, index, 4, u8>(rsx, arg); - } - }; - - template<u32 index> - struct set_vertex_data1f_m - { - force_inline static void impl(thread* rsx, u32 arg) - { - set_vertex_data_impl<NV4097_SET_VERTEX_DATA1F_M, index, 1, f32>(rsx, arg); - } - }; - - template<u32 index> - struct set_vertex_data2f_m - { - force_inline static void impl(thread* rsx, u32 arg) - { - set_vertex_data_impl<NV4097_SET_VERTEX_DATA2F_M, index, 2, f32>(rsx, arg); - } - }; - - template<u32 index> - struct set_vertex_data3f_m - { - force_inline static void impl(thread* rsx, u32 arg) - { - set_vertex_data_impl<NV4097_SET_VERTEX_DATA3F_M, index, 3, f32>(rsx, arg); - } - }; - - template<u32 index> - struct set_vertex_data4f_m - { - force_inline static void impl(thread* rsx, u32 arg) - { - set_vertex_data_impl<NV4097_SET_VERTEX_DATA4F_M, index, 4, f32>(rsx, arg); - } - }; - - template<u32 index> - struct set_vertex_data2s_m - { - force_inline static void impl(thread* rsx, u32 arg) - { - set_vertex_data_impl<NV4097_SET_VERTEX_DATA2S_M, index, 2, u16>(rsx, arg); - } - }; - - template<u32 index> - struct set_vertex_data4s_m - { - force_inline static void impl(thread* rsx, u32 arg) - { - set_vertex_data_impl<NV4097_SET_VERTEX_DATA4S_M, index, 4, u16>(rsx, arg); - } - }; - - template<u32 index> - struct set_vertex_data_array_format - { - force_inline static void impl(thread* rsx, u32 arg) - { - auto& info = rsx->vertex_arrays_info[index]; - info.unpack_array(arg); - } - }; - - force_inline void draw_arrays(thread* rsx, u32 arg) - { - rsx->draw_command = thread::Draw_command::draw_command_array; - u32 first = arg & 0xffffff; - u32 count = (arg >> 24) + 1; - - rsx->load_vertex_data(first, count); - } - - force_inline void draw_index_array(thread* rsx, u32 arg) - { - rsx->draw_command = thread::Draw_command::draw_command_indexed; - u32 first = arg & 0xffffff; - u32 count = (arg >> 24) + 1; - - rsx->load_vertex_data(first, count); - rsx->load_vertex_index_data(first, count); - } - - force_inline void draw_inline_array(thread* rsx, u32 arg) - { - rsx->draw_command = thread::Draw_command::draw_command_inlined_array; - rsx->draw_inline_vertex_array = true; - rsx->inline_vertex_array.push_back(arg); - } - - template<u32 index> - struct set_transform_constant - { - force_inline static void impl(thread* rsxthr, u32 arg) - { - u32 load = method_registers[NV4097_SET_TRANSFORM_CONSTANT_LOAD]; - - static const size_t count = 4; - static const size_t size = count * sizeof(f32); - - size_t reg = index / 4; - size_t subreg = index % 4; - - memcpy(rsxthr->transform_constants[load + reg].rgba + subreg, method_registers + NV4097_SET_TRANSFORM_CONSTANT + reg * count + subreg, sizeof(f32)); - } - }; - - template<u32 index> - struct set_transform_program - { - force_inline static void impl(thread* rsx, u32 arg) - { - u32& load = method_registers[NV4097_SET_TRANSFORM_PROGRAM_LOAD]; - - static const size_t count = 4; - static const size_t size = count * sizeof(u32); - - memcpy(rsx->transform_program + load++ * count, method_registers + NV4097_SET_TRANSFORM_PROGRAM + index * count, size); - } - }; - - force_inline void set_begin_end(thread* rsx, u32 arg) - { - if (arg) - { - rsx->draw_inline_vertex_array = false; - rsx->inline_vertex_array.clear(); - rsx->begin(); - return; - } - - if (!rsx->vertex_draw_count) - { - bool has_array = false; - - for (int i = 0; i < rsx::limits::vertex_count; ++i) - { - if (rsx->vertex_arrays_info[i].size > 0) - { - has_array = true; - break; - } - } - - if (!has_array) - { - u32 min_count = ~0; - - for (int i = 0; i < rsx::limits::vertex_count; ++i) - { - if (!rsx->register_vertex_info[i].size) - continue; - - u32 count = u32(rsx->register_vertex_data[i].size()) / - rsx::get_vertex_type_size(rsx->register_vertex_info[i].type) * rsx->register_vertex_info[i].size; - - if (count < min_count) - min_count = count; - } - - if (min_count && min_count < ~0) - { - rsx->vertex_draw_count = min_count; - } - } - } - - rsx->end(); - rsx->vertex_draw_count = 0; - } - - force_inline void get_report(thread* rsx, u32 arg) - { - u8 type = arg >> 24; - u32 offset = arg & 0xffffff; - - //TODO: use DMA - vm::ptr<CellGcmReportData> result = { rsx->local_mem_addr + offset, vm::addr }; - - result->timer = rsx->timestamp(); - - switch (type) - { - case CELL_GCM_ZPASS_PIXEL_CNT: - case CELL_GCM_ZCULL_STATS: - case CELL_GCM_ZCULL_STATS1: - case CELL_GCM_ZCULL_STATS2: - case CELL_GCM_ZCULL_STATS3: - result->value = 0; - LOG_WARNING(RSX, "NV4097_GET_REPORT: Unimplemented type %d", type); - break; - - default: - result->value = 0; - LOG_ERROR(RSX, "NV4097_GET_REPORT: Bad type %d", type); - break; - } - - //result->padding = 0; - } - - force_inline void clear_report_value(thread* rsx, u32 arg) - { - switch (arg) - { - case CELL_GCM_ZPASS_PIXEL_CNT: - LOG_WARNING(RSX, "TODO: NV4097_CLEAR_REPORT_VALUE: ZPASS_PIXEL_CNT"); - break; - case CELL_GCM_ZCULL_STATS: - LOG_WARNING(RSX, "TODO: NV4097_CLEAR_REPORT_VALUE: ZCULL_STATS"); - break; - default: - LOG_ERROR(RSX, "NV4097_CLEAR_REPORT_VALUE: Bad type: %d", arg); - break; - } - } - } - - namespace nv308a - { - template<u32 index> - struct color - { - force_inline static void impl(u32 arg) - { - u32 point = method_registers[NV308A_POINT]; - u16 x = point; - u16 y = point >> 16; - - if (y) - { - LOG_ERROR(RSX, "%s: y is not null (0x%x)", __FUNCTION__, y); - } - - u32 address = get_address(method_registers[NV3062_SET_OFFSET_DESTIN] + (x << 2) + index * 4, method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN]); - vm::write32(address, arg); - } - }; - } - - namespace nv3089 - { - never_inline void image_in(u32 arg) - { - u32 operation = method_registers[NV3089_SET_OPERATION]; - - u32 clip_x = method_registers[NV3089_CLIP_POINT] & 0xffff; - u32 clip_y = method_registers[NV3089_CLIP_POINT] >> 16; - u32 clip_w = method_registers[NV3089_CLIP_SIZE] & 0xffff; - u32 clip_h = method_registers[NV3089_CLIP_SIZE] >> 16; - - u32 out_x = method_registers[NV3089_IMAGE_OUT_POINT] & 0xffff; - u32 out_y = method_registers[NV3089_IMAGE_OUT_POINT] >> 16; - u32 out_w = method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff; - u32 out_h = method_registers[NV3089_IMAGE_OUT_SIZE] >> 16; - - u16 in_w = method_registers[NV3089_IMAGE_IN_SIZE]; - u16 in_h = method_registers[NV3089_IMAGE_IN_SIZE] >> 16; - u16 in_pitch = method_registers[NV3089_IMAGE_IN_FORMAT]; - u8 in_origin = method_registers[NV3089_IMAGE_IN_FORMAT] >> 16; - u8 in_inter = method_registers[NV3089_IMAGE_IN_FORMAT] >> 24; - u32 src_color_format = method_registers[NV3089_SET_COLOR_FORMAT]; - - f32 in_x = (method_registers[NV3089_IMAGE_IN] & 0xffff) / 16.f; - f32 in_y = (method_registers[NV3089_IMAGE_IN] >> 16) / 16.f; - - if (in_origin != CELL_GCM_TRANSFER_ORIGIN_CORNER) - { - LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown origin (%d)", in_origin); - } - - if (in_inter != CELL_GCM_TRANSFER_INTERPOLATOR_ZOH && in_inter != CELL_GCM_TRANSFER_INTERPOLATOR_FOH) - { - LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown inter (%d)", in_inter); - } - - if (operation != CELL_GCM_TRANSFER_OPERATION_SRCCOPY) - { - LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown operation (%d)", operation); - } - - const u32 src_offset = method_registers[NV3089_IMAGE_IN_OFFSET]; - const u32 src_dma = method_registers[NV3089_SET_CONTEXT_DMA_IMAGE]; - - u32 dst_offset; - u32 dst_dma = 0; - u16 dst_color_format; - u32 out_pitch = 0; - u32 out_aligment = 64; - - switch (method_registers[NV3089_SET_CONTEXT_SURFACE]) - { - case CELL_GCM_CONTEXT_SURFACE2D: - dst_dma = method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN]; - dst_offset = method_registers[NV3062_SET_OFFSET_DESTIN]; - dst_color_format = method_registers[NV3062_SET_COLOR_FORMAT]; - out_pitch = method_registers[NV3062_SET_PITCH] >> 16; - out_aligment = method_registers[NV3062_SET_PITCH] & 0xffff; - break; - - case CELL_GCM_CONTEXT_SWIZZLE2D: - dst_dma = method_registers[NV309E_SET_CONTEXT_DMA_IMAGE]; - dst_offset = method_registers[NV309E_SET_OFFSET]; - dst_color_format = method_registers[NV309E_SET_FORMAT]; - break; - - default: - LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown m_context_surface (0x%x)", method_registers[NV3089_SET_CONTEXT_SURFACE]); - return; - } - - u32 src_address = get_address(src_offset, src_dma); - u32 dst_address = get_address(dst_offset, dst_dma); - - u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel - u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4; - - if (out_pitch == 0) - { - out_pitch = out_bpp * out_w; - } - - if (in_pitch == 0) - { - in_pitch = in_bpp * in_w; - } - - if (clip_w > out_w) - { - clip_w = out_w; - } - - if (clip_h > out_h) - { - clip_h = out_h; - } - - //LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_address, dst_address); - - u8* pixels_src = vm::_ptr<u8>(src_address); - u8* pixels_dst = vm::_ptr<u8>(dst_address); - - if (dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 && - dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_A8R8G8B8) - { - LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown dst_color_format (%d)", dst_color_format); - } - - if (src_color_format != CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 && - src_color_format != CELL_GCM_TRANSFER_SCALE_FORMAT_A8R8G8B8) - { - LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown src_color_format (%d)", src_color_format); - } - - //LOG_WARNING(RSX, "NV3089_IMAGE_IN_SIZE: SIZE=0x%08x, pitch=0x%x, offset=0x%x, scaleX=%f, scaleY=%f, CLIP_SIZE=0x%08x, OUT_SIZE=0x%08x", - // method_registers[NV3089_IMAGE_IN_SIZE], in_pitch, src_offset, double(1 << 20) / (method_registers[NV3089_DS_DX]), double(1 << 20) / (method_registers[NV3089_DT_DY]), - // method_registers[NV3089_CLIP_SIZE], method_registers[NV3089_IMAGE_OUT_SIZE]); - - std::unique_ptr<u8[]> temp1, temp2; - - AVPixelFormat in_format = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; - AVPixelFormat out_format = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; - - u32 out_offset = out_x * out_bpp + out_pitch * out_y; - - bool need_clip = method_registers[NV3089_CLIP_SIZE] != method_registers[NV3089_IMAGE_IN_SIZE] || method_registers[NV3089_CLIP_POINT]; - bool need_convert = out_format != in_format || out_w != in_w || out_h != in_h; - - u32 slice_h = (u32)(clip_h * (method_registers[NV3089_DS_DX] / 1048576.f)); - - if (slice_h) - { - if (clip_h < out_h) - { - --slice_h; - } - } - else - { - slice_h = clip_h; - } - - if (method_registers[NV3089_SET_CONTEXT_SURFACE] != CELL_GCM_CONTEXT_SWIZZLE2D) - { - if (need_convert || need_clip) - { - if (need_clip) - { - if (need_convert) - { - convert_scale_image(temp1, out_format, out_w, out_h, out_pitch, - pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false); - - clip_image(pixels_dst + out_offset, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch); - } - else - { - clip_image(pixels_dst + out_offset, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch); - } - } - else - { - convert_scale_image(pixels_dst + out_offset, out_format, out_w, out_h, out_pitch, - pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false); - } - } - else - { - if (out_pitch != in_pitch || out_pitch != out_bpp * out_w) - { - for (u32 y = 0; y < out_h; ++y) - { - u8 *dst = pixels_dst + out_x * out_bpp + out_pitch * (y + out_y); - u8 *src = pixels_src + in_pitch * y; - - std::memmove(dst, src, out_w * out_bpp); - } - } - else - { - std::memmove(pixels_dst + out_offset, pixels_src, out_pitch * out_h); - } - } - } - else - { - if (need_convert || need_clip) - { - if (need_clip) - { - if (need_convert) - { - convert_scale_image(temp1, out_format, out_w, out_h, out_pitch, - pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false); - - clip_image(temp2, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch); - } - else - { - clip_image(temp2, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch); - } - } - else - { - convert_scale_image(temp2, out_format, out_w, out_h, out_pitch, - pixels_src, in_format, in_w, in_h, in_pitch, clip_h, in_inter ? true : false); - } - - pixels_src = temp2.get(); - } - - u8 sw_width_log2 = method_registers[NV309E_SET_FORMAT] >> 16; - u8 sw_height_log2 = method_registers[NV309E_SET_FORMAT] >> 24; - - // 0 indicates height of 1 pixel - sw_height_log2 = sw_height_log2 == 0 ? 1 : sw_height_log2; - - // swizzle based on destination size - u16 sw_width = 1 << sw_width_log2; - u16 sw_height = 1 << sw_height_log2; - - temp2.reset(new u8[out_bpp * sw_width * sw_height]); - - u8* linear_pixels = pixels_src; - u8* swizzled_pixels = temp2.get(); - - // Check and pad texture out if we are given non square texture for swizzle to be correct - if (sw_width != out_w || sw_height != out_h) - { - std::unique_ptr<u8[]> sw_temp(new u8[out_bpp * sw_width * sw_height]); - - switch (out_bpp) - { - case 1: - pad_texture<u8>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height); - break; - case 2: - pad_texture<u16>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height); - break; - case 4: - pad_texture<u32>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height); - break; - } - - linear_pixels = sw_temp.get(); - } - - switch (out_bpp) - { - case 1: - convert_linear_swizzle<u8>(linear_pixels, swizzled_pixels, sw_width, sw_height, false); - break; - case 2: - convert_linear_swizzle<u16>(linear_pixels, swizzled_pixels, sw_width, sw_height, false); - break; - case 4: - convert_linear_swizzle<u32>(linear_pixels, swizzled_pixels, sw_width, sw_height, false); - break; - } - - std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height); - } - } - } - - namespace nv0039 - { - force_inline void buffer_notify(u32 arg) - { - const u32 inPitch = method_registers[NV0039_PITCH_IN]; - const u32 outPitch = method_registers[NV0039_PITCH_OUT]; - const u32 lineLength = method_registers[NV0039_LINE_LENGTH_IN]; - const u32 lineCount = method_registers[NV0039_LINE_COUNT]; - const u8 outFormat = method_registers[NV0039_FORMAT] >> 8; - const u8 inFormat = method_registers[NV0039_FORMAT]; - const u32 notify = arg; - - // The existing GCM commands use only the value 0x1 for inFormat and outFormat - if (inFormat != 0x01 || outFormat != 0x01) - { - LOG_ERROR(RSX, "NV0039_OFFSET_IN: Unsupported format: inFormat=%d, outFormat=%d", inFormat, outFormat); - } - - if (lineCount == 1 && !inPitch && !outPitch && !notify) - { - std::memcpy( - vm::base(get_address(method_registers[NV0039_OFFSET_OUT], method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_OUT])), - vm::base(get_address(method_registers[NV0039_OFFSET_IN], method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_IN])), - lineLength); - } - else - { - LOG_ERROR(RSX, "NV0039_OFFSET_IN: bad offset(in=0x%x, out=0x%x), pitch(in=0x%x, out=0x%x), line(len=0x%x, cnt=0x%x), fmt(in=0x%x, out=0x%x), notify=0x%x", - method_registers[NV0039_OFFSET_IN], method_registers[NV0039_OFFSET_OUT], inPitch, outPitch, lineLength, lineCount, inFormat, outFormat, notify); - } - } - } - - void flip_command(thread* rsx, u32 arg) - { - if (user_asked_for_frame_capture) - { - rsx->capture_current_frame = true; - user_asked_for_frame_capture = false; - frame_debug.reset(); - } - else if (rsx->capture_current_frame) - { - rsx->capture_current_frame = false; - Emu.Pause(); - } - - rsx->gcm_current_buffer = arg; - rsx->flip(arg); - // After each flip PS3 system is executing a routine that changes registers value to some default. - // Some game use this default state (SH3). - rsx->reset(); - - rsx->last_flip_time = get_system_time() - 1000000; - rsx->gcm_current_buffer = arg; - rsx->flip_status = 0; - - if (rsx->flip_handler) - { - Emu.GetCallbackManager().Async([func = rsx->flip_handler](PPUThread& ppu) - { - func(ppu, 1); - }); - } - - rsx->sem_flip.post_and_wait(); - - //sync - double limit; - switch (rpcs3::state.config.rsx.frame_limit.value()) - { - case rsx_frame_limit::_50: limit = 50.; break; - case rsx_frame_limit::_59_94: limit = 59.94; break; - case rsx_frame_limit::_30: limit = 30.; break; - case rsx_frame_limit::_60: limit = 60.; break; - case rsx_frame_limit::Auto: limit = rsx->fps_limit; break; //TODO - - case rsx_frame_limit::Off: - default: - return; - } - - std::this_thread::sleep_for(std::chrono::milliseconds((s64)(1000.0 / limit - rsx->timer_sync.GetElapsedTimeInMilliSec()))); - rsx->timer_sync.Start(); - rsx->local_transform_constants.clear(); - } - - void user_command(thread* rsx, u32 arg) - { - if (rsx->user_handler) - { - Emu.GetCallbackManager().Async([func = rsx->user_handler, arg](PPUThread& ppu) - { - func(ppu, arg); - }); - } - else - { - throw EXCEPTION("User handler not set"); - } - } - - struct __rsx_methods_t - { - using rsx_impl_method_t = void(*)(u32); - - template<rsx_method_t impl_func> - force_inline static void call_impl_func(thread *rsx, u32 arg) - { - impl_func(rsx, arg); - } - - template<rsx_impl_method_t impl_func> - force_inline static void call_impl_func(thread *rsx, u32 arg) - { - impl_func(arg); - } - - template<int id, typename T, T impl_func> - static void wrapper(thread *rsx, u32 arg) - { - // try process using gpu - if (rsx->do_method(id, arg)) - { - if (rsx->capture_current_frame && id == NV4097_CLEAR_SURFACE) - rsx->capture_frame("clear"); - return; - } - - // not handled by renderer - // try process using cpu - if (impl_func != nullptr) - call_impl_func<impl_func>(rsx, arg); - } - - template<int id, int step, int count, template<u32> class T, int index = 0> - struct bind_range_impl_t - { - force_inline static void impl() - { - bind_range_impl_t<id + step, step, count, T, index + 1>::impl(); - bind<id, T<index>::impl>(); - } - }; - - template<int id, int step, int count, template<u32> class T> - struct bind_range_impl_t<id, step, count, T, count> - { - force_inline static void impl() - { - } - }; - - template<int id, int step, int count, template<u32> class T, int index = 0> - force_inline static void bind_range() - { - bind_range_impl_t<id, step, count, T, index>::impl(); - } - - [[noreturn]] never_inline static void bind_redefinition_error(int id) - { - throw EXCEPTION("RSX method implementation redefinition (0x%04x)", id); - } - - template<int id, typename T, T impl_func> - static void bind_impl() - { - if (methods[id]) - { - bind_redefinition_error(id); - } - - methods[id] = wrapper<id, T, impl_func>; - } - - template<int id, typename T, T impl_func> - static void bind_cpu_only_impl() - { - if (methods[id]) - { - bind_redefinition_error(id); - } - - methods[id] = call_impl_func<impl_func>; - } - - template<int id, rsx_impl_method_t impl_func> static void bind() { bind_impl<id, rsx_impl_method_t, impl_func>(); } - template<int id, rsx_method_t impl_func = nullptr> static void bind() { bind_impl<id, rsx_method_t, impl_func>(); } - - //do not try process on gpu - template<int id, rsx_impl_method_t impl_func> static void bind_cpu_only() { bind_cpu_only_impl<id, rsx_impl_method_t, impl_func>(); } - //do not try process on gpu - template<int id, rsx_method_t impl_func = nullptr> static void bind_cpu_only() { bind_cpu_only_impl<id, rsx_method_t, impl_func>(); } - - __rsx_methods_t() - { - // NV406E - bind_cpu_only<NV406E_SET_REFERENCE, nv406e::set_reference>(); - bind<NV406E_SEMAPHORE_ACQUIRE, nv406e::semaphore_acquire>(); - bind<NV406E_SEMAPHORE_RELEASE, nv406e::semaphore_release>(); - - // NV4097 - bind<NV4097_TEXTURE_READ_SEMAPHORE_RELEASE, nv4097::texture_read_semaphore_release>(); - bind<NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE, nv4097::back_end_write_semaphore_release>(); - bind<NV4097_SET_BEGIN_END, nv4097::set_begin_end>(); - bind<NV4097_CLEAR_SURFACE>(); - bind<NV4097_DRAW_ARRAYS, nv4097::draw_arrays>(); - bind<NV4097_DRAW_INDEX_ARRAY, nv4097::draw_index_array>(); - bind<NV4097_INLINE_ARRAY, nv4097::draw_inline_array>(); - bind_range<NV4097_SET_VERTEX_DATA_ARRAY_FORMAT, 1, 16, nv4097::set_vertex_data_array_format>(); - bind_range<NV4097_SET_VERTEX_DATA4UB_M, 1, 16, nv4097::set_vertex_data4ub_m>(); - bind_range<NV4097_SET_VERTEX_DATA1F_M, 1, 16, nv4097::set_vertex_data1f_m>(); - bind_range<NV4097_SET_VERTEX_DATA2F_M + 1, 2, 16, nv4097::set_vertex_data2f_m>(); - bind_range<NV4097_SET_VERTEX_DATA3F_M + 2, 3, 16, nv4097::set_vertex_data3f_m>(); - bind_range<NV4097_SET_VERTEX_DATA4F_M + 3, 4, 16, nv4097::set_vertex_data4f_m>(); - bind_range<NV4097_SET_VERTEX_DATA2S_M, 1, 16, nv4097::set_vertex_data2s_m>(); - bind_range<NV4097_SET_VERTEX_DATA4S_M + 1, 2, 16, nv4097::set_vertex_data4s_m>(); - bind_range<NV4097_SET_TRANSFORM_CONSTANT, 1, 32, nv4097::set_transform_constant>(); - bind_range<NV4097_SET_TRANSFORM_PROGRAM + 3, 4, 128, nv4097::set_transform_program>(); - bind_cpu_only<NV4097_GET_REPORT, nv4097::get_report>(); - bind_cpu_only<NV4097_CLEAR_REPORT_VALUE, nv4097::clear_report_value>(); - - //NV308A - bind_range<NV308A_COLOR, 1, 256, nv308a::color>(); - bind_range<NV308A_COLOR + 256, 1, 512, nv308a::color, 256>(); - - //NV3089 - bind<NV3089_IMAGE_IN, nv3089::image_in>(); - - //NV0039 - bind<NV0039_BUFFER_NOTIFY, nv0039::buffer_notify>(); - - // custom methods - bind_cpu_only<GCM_FLIP_COMMAND, flip_command>(); - bind_cpu_only<GCM_SET_USER_COMMAND, user_command>(); - } - } __rsx_methods; - std::string shaders_cache::path_to_root() { return fs::get_executable_dir() + "data/"; diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index e4537aa13f..600719b90a 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -148,69 +148,6 @@ namespace rsx static std::string path_to_root(); }; - //TODO - union alignas(4) method_registers_t - { - u8 _u8[0x10000]; - u32 _u32[0x10000 >> 2]; -/* - struct alignas(4) - { - u8 pad[NV4097_SET_TEXTURE_OFFSET - 4]; - - struct alignas(4) texture_t - { - u32 offset; - - union format_t - { - u32 _u32; - - struct - { - u32: 1; - u32 location : 1; - u32 cubemap : 1; - u32 border_type : 1; - u32 dimension : 4; - u32 format : 8; - u32 mipmap : 16; - }; - } format; - - union address_t - { - u32 _u32; - - struct - { - u32 wrap_s : 4; - u32 aniso_bias : 4; - u32 wrap_t : 4; - u32 unsigned_remap : 4; - u32 wrap_r : 4; - u32 gamma : 4; - u32 signed_remap : 4; - u32 zfunc : 4; - }; - } address; - - u32 control0; - u32 control1; - u32 filter; - u32 image_rect; - u32 border_color; - } textures[limits::textures_count]; - }; -*/ - u32& operator[](int index) - { - return _u32[index >> 2]; - } - }; - - extern u32 method_registers[0x10000 >> 2]; - u32 get_vertex_type_size(u32 type); u32 get_address(u32 offset, u32 location); diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index a9d013b1f7..b7481ccaf9 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -129,6 +129,7 @@ <ClCompile Include="Emu\RSX\Common\VertexProgramDecompiler.cpp" /> <ClCompile Include="Emu\RSX\GCM.cpp" /> <ClCompile Include="Emu\RSX\Null\NullGSRender.cpp" /> + <ClCompile Include="Emu\RSX\rsx_methods.cpp" /> <ClCompile Include="Emu\RSX\rsx_utils.cpp" /> <ClCompile Include="Emu\state.cpp" /> <ClCompile Include="Emu\SysCalls\lv2\sys_dbg.cpp" /> @@ -575,6 +576,7 @@ <ClInclude Include="Emu\Memory\vm_ptr.h" /> <ClInclude Include="Emu\Memory\vm_ref.h" /> <ClInclude Include="Emu\Memory\vm_var.h" /> + <ClInclude Include="Emu\RSX\rsx_methods.h" /> <ClInclude Include="Emu\RSX\rsx_utils.h" /> <ClInclude Include="Emu\state.h" /> <ClInclude Include="Emu\SysCalls\Callback.h" /> diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index c0cdafaaf1..659e72fb71 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -377,9 +377,6 @@ <ClCompile Include="Loader\TRP.cpp"> <Filter>Loader</Filter> </ClCompile> - <ClCompile Include="stdafx.cpp"> - <Filter>Source Files</Filter> - </ClCompile> <ClCompile Include="..\Utilities\StrFmt.cpp"> <Filter>Utilities</Filter> </ClCompile> @@ -930,6 +927,12 @@ <ClCompile Include="Emu\RSX\rsx_utils.cpp"> <Filter>Emu\GPU\RSX</Filter> </ClCompile> + <ClCompile Include="Emu\RSX\rsx_methods.cpp"> + <Filter>Emu\GPU\RSX</Filter> + </ClCompile> + <ClCompile Include="stdafx.cpp"> + <Filter>Source Files</Filter> + </ClCompile> </ItemGroup> <ItemGroup> <ClInclude Include="Crypto\aes.h"> @@ -1779,5 +1782,8 @@ <ClInclude Include="Emu\RSX\rsx_utils.h"> <Filter>Emu\GPU\RSX</Filter> </ClInclude> + <ClInclude Include="Emu\RSX\rsx_methods.h"> + <Filter>Emu\GPU\RSX</Filter> + </ClInclude> </ItemGroup> </Project> \ No newline at end of file From c1be0cf3bf0e208823be259c41122a0060f7528c Mon Sep 17 00:00:00 2001 From: DHrpcs3 <dh.rpcs3@gmail.com> Date: Tue, 5 Jan 2016 23:32:25 +0200 Subject: [PATCH 08/13] Added missed files --- rpcs3/Emu/RSX/rsx_methods.cpp | 839 ++++++++++++++++++++++++++++++++++ rpcs3/Emu/RSX/rsx_methods.h | 69 +++ 2 files changed, 908 insertions(+) create mode 100644 rpcs3/Emu/RSX/rsx_methods.cpp create mode 100644 rpcs3/Emu/RSX/rsx_methods.h diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp new file mode 100644 index 0000000000..7bbc95723f --- /dev/null +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -0,0 +1,839 @@ +#include "stdafx.h" +#include "rsx_methods.h" +#include "RSXThread.h" +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" +#include "Emu/state.h" +#include "rsx_utils.h" +#include "Emu/SysCalls/Callback.h" +#include "Emu/SysCalls/CB_FUNC.h" + +namespace rsx +{ + u32 method_registers[0x10000 >> 2]; + rsx_method_t methods[0x10000 >> 2]{}; + + template<typename Type> struct vertex_data_type_from_element_type; + template<> struct vertex_data_type_from_element_type<float> { enum { type = CELL_GCM_VERTEX_F }; }; + template<> struct vertex_data_type_from_element_type<f16> { enum { type = CELL_GCM_VERTEX_SF }; }; + template<> struct vertex_data_type_from_element_type<u8> { enum { type = CELL_GCM_VERTEX_UB }; }; + template<> struct vertex_data_type_from_element_type<u16> { enum { type = CELL_GCM_VERTEX_S1 }; }; + + namespace nv406e + { + force_inline void set_reference(thread* rsx, u32 arg) + { + rsx->ctrl->ref.exchange(arg); + } + + force_inline void semaphore_acquire(thread* rsx, u32 arg) + { + //TODO: dma + while (vm::ps3::read32(rsx->label_addr + method_registers[NV406E_SEMAPHORE_OFFSET]) != arg) + { + if (Emu.IsStopped()) + break; + + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + } + + force_inline void semaphore_release(thread* rsx, u32 arg) + { + //TODO: dma + vm::ps3::write32(rsx->label_addr + method_registers[NV406E_SEMAPHORE_OFFSET], arg); + } + } + + namespace nv4097 + { + force_inline void texture_read_semaphore_release(thread* rsx, u32 arg) + { + //TODO: dma + vm::ps3::write32(rsx->label_addr + method_registers[NV4097_SET_SEMAPHORE_OFFSET], arg); + } + + force_inline void back_end_write_semaphore_release(thread* rsx, u32 arg) + { + //TODO: dma + vm::ps3::write32(rsx->label_addr + method_registers[NV4097_SET_SEMAPHORE_OFFSET], + (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff)); + } + + //fire only when all data passed to rsx cmd buffer + template<u32 id, u32 index, int count, typename type> + force_inline void set_vertex_data_impl(thread* rsx, u32 arg) + { + static const size_t element_size = (count * sizeof(type)); + static const size_t element_size_in_words = element_size / sizeof(u32); + + auto& info = rsx->register_vertex_info[index]; + + info.type = vertex_data_type_from_element_type<type>::type; + info.size = count; + info.frequency = 0; + info.stride = 0; + + auto& entry = rsx->register_vertex_data[index]; + + //find begin of data + size_t begin = id + index * element_size_in_words; + + size_t position = 0;//entry.size(); + entry.resize(position + element_size); + + memcpy(entry.data() + position, method_registers + begin, element_size); + } + + template<u32 index> + struct set_vertex_data4ub_m + { + force_inline static void impl(thread* rsx, u32 arg) + { + set_vertex_data_impl<NV4097_SET_VERTEX_DATA4UB_M, index, 4, u8>(rsx, arg); + } + }; + + template<u32 index> + struct set_vertex_data1f_m + { + force_inline static void impl(thread* rsx, u32 arg) + { + set_vertex_data_impl<NV4097_SET_VERTEX_DATA1F_M, index, 1, f32>(rsx, arg); + } + }; + + template<u32 index> + struct set_vertex_data2f_m + { + force_inline static void impl(thread* rsx, u32 arg) + { + set_vertex_data_impl<NV4097_SET_VERTEX_DATA2F_M, index, 2, f32>(rsx, arg); + } + }; + + template<u32 index> + struct set_vertex_data3f_m + { + force_inline static void impl(thread* rsx, u32 arg) + { + set_vertex_data_impl<NV4097_SET_VERTEX_DATA3F_M, index, 3, f32>(rsx, arg); + } + }; + + template<u32 index> + struct set_vertex_data4f_m + { + force_inline static void impl(thread* rsx, u32 arg) + { + set_vertex_data_impl<NV4097_SET_VERTEX_DATA4F_M, index, 4, f32>(rsx, arg); + } + }; + + template<u32 index> + struct set_vertex_data2s_m + { + force_inline static void impl(thread* rsx, u32 arg) + { + set_vertex_data_impl<NV4097_SET_VERTEX_DATA2S_M, index, 2, u16>(rsx, arg); + } + }; + + template<u32 index> + struct set_vertex_data4s_m + { + force_inline static void impl(thread* rsx, u32 arg) + { + set_vertex_data_impl<NV4097_SET_VERTEX_DATA4S_M, index, 4, u16>(rsx, arg); + } + }; + + template<u32 index> + struct set_vertex_data_array_format + { + force_inline static void impl(thread* rsx, u32 arg) + { + auto& info = rsx->vertex_arrays_info[index]; + info.unpack_array(arg); + } + }; + + force_inline void draw_arrays(thread* rsx, u32 arg) + { + rsx->draw_command = thread::Draw_command::draw_command_array; + u32 first = arg & 0xffffff; + u32 count = (arg >> 24) + 1; + + rsx->load_vertex_data(first, count); + } + + force_inline void draw_index_array(thread* rsx, u32 arg) + { + rsx->draw_command = thread::Draw_command::draw_command_indexed; + u32 first = arg & 0xffffff; + u32 count = (arg >> 24) + 1; + + rsx->load_vertex_data(first, count); + rsx->load_vertex_index_data(first, count); + } + + force_inline void draw_inline_array(thread* rsx, u32 arg) + { + rsx->draw_command = thread::Draw_command::draw_command_inlined_array; + rsx->draw_inline_vertex_array = true; + rsx->inline_vertex_array.push_back(arg); + } + + template<u32 index> + struct set_transform_constant + { + force_inline static void impl(thread* rsxthr, u32 arg) + { + u32 load = method_registers[NV4097_SET_TRANSFORM_CONSTANT_LOAD]; + + static const size_t count = 4; + static const size_t size = count * sizeof(f32); + + size_t reg = index / 4; + size_t subreg = index % 4; + + memcpy(rsxthr->transform_constants[load + reg].rgba + subreg, method_registers + NV4097_SET_TRANSFORM_CONSTANT + reg * count + subreg, sizeof(f32)); + } + }; + + template<u32 index> + struct set_transform_program + { + force_inline static void impl(thread* rsx, u32 arg) + { + u32& load = method_registers[NV4097_SET_TRANSFORM_PROGRAM_LOAD]; + + static const size_t count = 4; + static const size_t size = count * sizeof(u32); + + memcpy(rsx->transform_program + load++ * count, method_registers + NV4097_SET_TRANSFORM_PROGRAM + index * count, size); + } + }; + + force_inline void set_begin_end(thread* rsx, u32 arg) + { + if (arg) + { + rsx->draw_inline_vertex_array = false; + rsx->inline_vertex_array.clear(); + rsx->begin(); + return; + } + + if (!rsx->vertex_draw_count) + { + bool has_array = false; + + for (int i = 0; i < rsx::limits::vertex_count; ++i) + { + if (rsx->vertex_arrays_info[i].size > 0) + { + has_array = true; + break; + } + } + + if (!has_array) + { + u32 min_count = ~0; + + for (int i = 0; i < rsx::limits::vertex_count; ++i) + { + if (!rsx->register_vertex_info[i].size) + continue; + + u32 count = u32(rsx->register_vertex_data[i].size()) / + rsx::get_vertex_type_size(rsx->register_vertex_info[i].type) * rsx->register_vertex_info[i].size; + + if (count < min_count) + min_count = count; + } + + if (min_count && min_count < ~0) + { + rsx->vertex_draw_count = min_count; + } + } + } + + rsx->end(); + rsx->vertex_draw_count = 0; + } + + force_inline void get_report(thread* rsx, u32 arg) + { + u8 type = arg >> 24; + u32 offset = arg & 0xffffff; + + //TODO: use DMA + vm::ps3::ptr<CellGcmReportData> result = { rsx->local_mem_addr + offset, vm::addr }; + + result->timer = rsx->timestamp(); + + switch (type) + { + case CELL_GCM_ZPASS_PIXEL_CNT: + case CELL_GCM_ZCULL_STATS: + case CELL_GCM_ZCULL_STATS1: + case CELL_GCM_ZCULL_STATS2: + case CELL_GCM_ZCULL_STATS3: + result->value = 0; + LOG_WARNING(RSX, "NV4097_GET_REPORT: Unimplemented type %d", type); + break; + + default: + result->value = 0; + LOG_ERROR(RSX, "NV4097_GET_REPORT: Bad type %d", type); + break; + } + + //result->padding = 0; + } + + force_inline void clear_report_value(thread* rsx, u32 arg) + { + switch (arg) + { + case CELL_GCM_ZPASS_PIXEL_CNT: + LOG_WARNING(RSX, "TODO: NV4097_CLEAR_REPORT_VALUE: ZPASS_PIXEL_CNT"); + break; + case CELL_GCM_ZCULL_STATS: + LOG_WARNING(RSX, "TODO: NV4097_CLEAR_REPORT_VALUE: ZCULL_STATS"); + break; + default: + LOG_ERROR(RSX, "NV4097_CLEAR_REPORT_VALUE: Bad type: %d", arg); + break; + } + } + } + + namespace nv308a + { + template<u32 index> + struct color + { + force_inline static void impl(u32 arg) + { + u32 point = method_registers[NV308A_POINT]; + u16 x = point; + u16 y = point >> 16; + + if (y) + { + LOG_ERROR(RSX, "%s: y is not null (0x%x)", __FUNCTION__, y); + } + + u32 address = get_address(method_registers[NV3062_SET_OFFSET_DESTIN] + (x << 2) + index * 4, method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN]); + vm::ps3::write32(address, arg); + } + }; + } + + namespace nv3089 + { + never_inline void image_in(u32 arg) + { + u32 operation = method_registers[NV3089_SET_OPERATION]; + + u32 clip_x = method_registers[NV3089_CLIP_POINT] & 0xffff; + u32 clip_y = method_registers[NV3089_CLIP_POINT] >> 16; + u32 clip_w = method_registers[NV3089_CLIP_SIZE] & 0xffff; + u32 clip_h = method_registers[NV3089_CLIP_SIZE] >> 16; + + u32 out_x = method_registers[NV3089_IMAGE_OUT_POINT] & 0xffff; + u32 out_y = method_registers[NV3089_IMAGE_OUT_POINT] >> 16; + u32 out_w = method_registers[NV3089_IMAGE_OUT_SIZE] & 0xffff; + u32 out_h = method_registers[NV3089_IMAGE_OUT_SIZE] >> 16; + + u16 in_w = method_registers[NV3089_IMAGE_IN_SIZE]; + u16 in_h = method_registers[NV3089_IMAGE_IN_SIZE] >> 16; + u16 in_pitch = method_registers[NV3089_IMAGE_IN_FORMAT]; + u8 in_origin = method_registers[NV3089_IMAGE_IN_FORMAT] >> 16; + u8 in_inter = method_registers[NV3089_IMAGE_IN_FORMAT] >> 24; + u32 src_color_format = method_registers[NV3089_SET_COLOR_FORMAT]; + + f32 in_x = (method_registers[NV3089_IMAGE_IN] & 0xffff) / 16.f; + f32 in_y = (method_registers[NV3089_IMAGE_IN] >> 16) / 16.f; + + if (in_origin != CELL_GCM_TRANSFER_ORIGIN_CORNER) + { + LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown origin (%d)", in_origin); + } + + if (in_inter != CELL_GCM_TRANSFER_INTERPOLATOR_ZOH && in_inter != CELL_GCM_TRANSFER_INTERPOLATOR_FOH) + { + LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown inter (%d)", in_inter); + } + + if (operation != CELL_GCM_TRANSFER_OPERATION_SRCCOPY) + { + LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown operation (%d)", operation); + } + + const u32 src_offset = method_registers[NV3089_IMAGE_IN_OFFSET]; + const u32 src_dma = method_registers[NV3089_SET_CONTEXT_DMA_IMAGE]; + + u32 dst_offset; + u32 dst_dma = 0; + u16 dst_color_format; + u32 out_pitch = 0; + u32 out_aligment = 64; + + switch (method_registers[NV3089_SET_CONTEXT_SURFACE]) + { + case CELL_GCM_CONTEXT_SURFACE2D: + dst_dma = method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN]; + dst_offset = method_registers[NV3062_SET_OFFSET_DESTIN]; + dst_color_format = method_registers[NV3062_SET_COLOR_FORMAT]; + out_pitch = method_registers[NV3062_SET_PITCH] >> 16; + out_aligment = method_registers[NV3062_SET_PITCH] & 0xffff; + break; + + case CELL_GCM_CONTEXT_SWIZZLE2D: + dst_dma = method_registers[NV309E_SET_CONTEXT_DMA_IMAGE]; + dst_offset = method_registers[NV309E_SET_OFFSET]; + dst_color_format = method_registers[NV309E_SET_FORMAT]; + break; + + default: + LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown m_context_surface (0x%x)", method_registers[NV3089_SET_CONTEXT_SURFACE]); + return; + } + + u32 src_address = get_address(src_offset, src_dma); + u32 dst_address = get_address(dst_offset, dst_dma); + + u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel + u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4; + + if (out_pitch == 0) + { + out_pitch = out_bpp * out_w; + } + + if (in_pitch == 0) + { + in_pitch = in_bpp * in_w; + } + + if (clip_w > out_w) + { + clip_w = out_w; + } + + if (clip_h > out_h) + { + clip_h = out_h; + } + + //LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_address, dst_address); + + u8* pixels_src = vm::ps3::_ptr<u8>(src_address); + u8* pixels_dst = vm::ps3::_ptr<u8>(dst_address); + + if (dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 && + dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_A8R8G8B8) + { + LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown dst_color_format (%d)", dst_color_format); + } + + if (src_color_format != CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 && + src_color_format != CELL_GCM_TRANSFER_SCALE_FORMAT_A8R8G8B8) + { + LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown src_color_format (%d)", src_color_format); + } + + //LOG_WARNING(RSX, "NV3089_IMAGE_IN_SIZE: SIZE=0x%08x, pitch=0x%x, offset=0x%x, scaleX=%f, scaleY=%f, CLIP_SIZE=0x%08x, OUT_SIZE=0x%08x", + // method_registers[NV3089_IMAGE_IN_SIZE], in_pitch, src_offset, double(1 << 20) / (method_registers[NV3089_DS_DX]), double(1 << 20) / (method_registers[NV3089_DT_DY]), + // method_registers[NV3089_CLIP_SIZE], method_registers[NV3089_IMAGE_OUT_SIZE]); + + std::unique_ptr<u8[]> temp1, temp2; + + AVPixelFormat in_format = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; + AVPixelFormat out_format = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; + + u32 out_offset = out_x * out_bpp + out_pitch * out_y; + + bool need_clip = method_registers[NV3089_CLIP_SIZE] != method_registers[NV3089_IMAGE_IN_SIZE] || method_registers[NV3089_CLIP_POINT]; + bool need_convert = out_format != in_format || out_w != in_w || out_h != in_h; + + u32 slice_h = (u32)(clip_h * (method_registers[NV3089_DS_DX] / 1048576.f)); + + if (slice_h) + { + if (clip_h < out_h) + { + --slice_h; + } + } + else + { + slice_h = clip_h; + } + + if (method_registers[NV3089_SET_CONTEXT_SURFACE] != CELL_GCM_CONTEXT_SWIZZLE2D) + { + if (need_convert || need_clip) + { + if (need_clip) + { + if (need_convert) + { + convert_scale_image(temp1, out_format, out_w, out_h, out_pitch, + pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false); + + clip_image(pixels_dst + out_offset, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch); + } + else + { + clip_image(pixels_dst + out_offset, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch); + } + } + else + { + convert_scale_image(pixels_dst + out_offset, out_format, out_w, out_h, out_pitch, + pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false); + } + } + else + { + if (out_pitch != in_pitch || out_pitch != out_bpp * out_w) + { + for (u32 y = 0; y < out_h; ++y) + { + u8 *dst = pixels_dst + out_x * out_bpp + out_pitch * (y + out_y); + u8 *src = pixels_src + in_pitch * y; + + std::memmove(dst, src, out_w * out_bpp); + } + } + else + { + std::memmove(pixels_dst + out_offset, pixels_src, out_pitch * out_h); + } + } + } + else + { + if (need_convert || need_clip) + { + if (need_clip) + { + if (need_convert) + { + convert_scale_image(temp1, out_format, out_w, out_h, out_pitch, + pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false); + + clip_image(temp2, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch); + } + else + { + clip_image(temp2, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch); + } + } + else + { + convert_scale_image(temp2, out_format, out_w, out_h, out_pitch, + pixels_src, in_format, in_w, in_h, in_pitch, clip_h, in_inter ? true : false); + } + + pixels_src = temp2.get(); + } + + u8 sw_width_log2 = method_registers[NV309E_SET_FORMAT] >> 16; + u8 sw_height_log2 = method_registers[NV309E_SET_FORMAT] >> 24; + + // 0 indicates height of 1 pixel + sw_height_log2 = sw_height_log2 == 0 ? 1 : sw_height_log2; + + // swizzle based on destination size + u16 sw_width = 1 << sw_width_log2; + u16 sw_height = 1 << sw_height_log2; + + temp2.reset(new u8[out_bpp * sw_width * sw_height]); + + u8* linear_pixels = pixels_src; + u8* swizzled_pixels = temp2.get(); + + // Check and pad texture out if we are given non square texture for swizzle to be correct + if (sw_width != out_w || sw_height != out_h) + { + std::unique_ptr<u8[]> sw_temp(new u8[out_bpp * sw_width * sw_height]); + + switch (out_bpp) + { + case 1: + pad_texture<u8>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height); + break; + case 2: + pad_texture<u16>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height); + break; + case 4: + pad_texture<u32>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height); + break; + } + + linear_pixels = sw_temp.get(); + } + + switch (out_bpp) + { + case 1: + convert_linear_swizzle<u8>(linear_pixels, swizzled_pixels, sw_width, sw_height, false); + break; + case 2: + convert_linear_swizzle<u16>(linear_pixels, swizzled_pixels, sw_width, sw_height, false); + break; + case 4: + convert_linear_swizzle<u32>(linear_pixels, swizzled_pixels, sw_width, sw_height, false); + break; + } + + std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height); + } + } + } + + namespace nv0039 + { + force_inline void buffer_notify(u32 arg) + { + const u32 inPitch = method_registers[NV0039_PITCH_IN]; + const u32 outPitch = method_registers[NV0039_PITCH_OUT]; + const u32 lineLength = method_registers[NV0039_LINE_LENGTH_IN]; + const u32 lineCount = method_registers[NV0039_LINE_COUNT]; + const u8 outFormat = method_registers[NV0039_FORMAT] >> 8; + const u8 inFormat = method_registers[NV0039_FORMAT]; + const u32 notify = arg; + + // The existing GCM commands use only the value 0x1 for inFormat and outFormat + if (inFormat != 0x01 || outFormat != 0x01) + { + LOG_ERROR(RSX, "NV0039_OFFSET_IN: Unsupported format: inFormat=%d, outFormat=%d", inFormat, outFormat); + } + + if (lineCount == 1 && !inPitch && !outPitch && !notify) + { + std::memcpy( + vm::base(get_address(method_registers[NV0039_OFFSET_OUT], method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_OUT])), + vm::base(get_address(method_registers[NV0039_OFFSET_IN], method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_IN])), + lineLength); + } + else + { + LOG_ERROR(RSX, "NV0039_OFFSET_IN: bad offset(in=0x%x, out=0x%x), pitch(in=0x%x, out=0x%x), line(len=0x%x, cnt=0x%x), fmt(in=0x%x, out=0x%x), notify=0x%x", + method_registers[NV0039_OFFSET_IN], method_registers[NV0039_OFFSET_OUT], inPitch, outPitch, lineLength, lineCount, inFormat, outFormat, notify); + } + } + } + + void flip_command(thread* rsx, u32 arg) + { + if (user_asked_for_frame_capture) + { + rsx->capture_current_frame = true; + user_asked_for_frame_capture = false; + frame_debug.reset(); + } + else if (rsx->capture_current_frame) + { + rsx->capture_current_frame = false; + Emu.Pause(); + } + + rsx->gcm_current_buffer = arg; + rsx->flip(arg); + // After each flip PS3 system is executing a routine that changes registers value to some default. + // Some game use this default state (SH3). + rsx->reset(); + + rsx->last_flip_time = get_system_time() - 1000000; + rsx->gcm_current_buffer = arg; + rsx->flip_status = 0; + + if (rsx->flip_handler) + { + Emu.GetCallbackManager().Async([func = rsx->flip_handler](PPUThread& ppu) + { + func(ppu, 1); + }); + } + + rsx->sem_flip.post_and_wait(); + + //sync + double limit; + switch (rpcs3::state.config.rsx.frame_limit.value()) + { + case rsx_frame_limit::_50: limit = 50.; break; + case rsx_frame_limit::_59_94: limit = 59.94; break; + case rsx_frame_limit::_30: limit = 30.; break; + case rsx_frame_limit::_60: limit = 60.; break; + case rsx_frame_limit::Auto: limit = rsx->fps_limit; break; //TODO + + case rsx_frame_limit::Off: + default: + return; + } + + std::this_thread::sleep_for(std::chrono::milliseconds((s64)(1000.0 / limit - rsx->timer_sync.GetElapsedTimeInMilliSec()))); + rsx->timer_sync.Start(); + rsx->local_transform_constants.clear(); + } + + void user_command(thread* rsx, u32 arg) + { + if (rsx->user_handler) + { + Emu.GetCallbackManager().Async([func = rsx->user_handler, arg](PPUThread& ppu) + { + func(ppu, arg); + }); + } + else + { + throw EXCEPTION("User handler not set"); + } + } + + struct __rsx_methods_t + { + using rsx_impl_method_t = void(*)(u32); + + template<rsx_method_t impl_func> + force_inline static void call_impl_func(thread *rsx, u32 arg) + { + impl_func(rsx, arg); + } + + template<rsx_impl_method_t impl_func> + force_inline static void call_impl_func(thread *rsx, u32 arg) + { + impl_func(arg); + } + + template<int id, typename T, T impl_func> + static void wrapper(thread *rsx, u32 arg) + { + // try process using gpu + if (rsx->do_method(id, arg)) + { + if (rsx->capture_current_frame && id == NV4097_CLEAR_SURFACE) + rsx->capture_frame("clear"); + return; + } + + // not handled by renderer + // try process using cpu + if (impl_func != nullptr) + call_impl_func<impl_func>(rsx, arg); + } + + template<int id, int step, int count, template<u32> class T, int index = 0> + struct bind_range_impl_t + { + force_inline static void impl() + { + bind_range_impl_t<id + step, step, count, T, index + 1>::impl(); + bind<id, T<index>::impl>(); + } + }; + + template<int id, int step, int count, template<u32> class T> + struct bind_range_impl_t<id, step, count, T, count> + { + force_inline static void impl() + { + } + }; + + template<int id, int step, int count, template<u32> class T, int index = 0> + force_inline static void bind_range() + { + bind_range_impl_t<id, step, count, T, index>::impl(); + } + + [[noreturn]] never_inline static void bind_redefinition_error(int id) + { + throw EXCEPTION("RSX method implementation redefinition (0x%04x)", id); + } + + template<int id, typename T, T impl_func> + static void bind_impl() + { + if (methods[id]) + { + bind_redefinition_error(id); + } + + methods[id] = wrapper<id, T, impl_func>; + } + + template<int id, typename T, T impl_func> + static void bind_cpu_only_impl() + { + if (methods[id]) + { + bind_redefinition_error(id); + } + + methods[id] = call_impl_func<impl_func>; + } + + template<int id, rsx_impl_method_t impl_func> static void bind() { bind_impl<id, rsx_impl_method_t, impl_func>(); } + template<int id, rsx_method_t impl_func = nullptr> static void bind() { bind_impl<id, rsx_method_t, impl_func>(); } + + //do not try process on gpu + template<int id, rsx_impl_method_t impl_func> static void bind_cpu_only() { bind_cpu_only_impl<id, rsx_impl_method_t, impl_func>(); } + //do not try process on gpu + template<int id, rsx_method_t impl_func = nullptr> static void bind_cpu_only() { bind_cpu_only_impl<id, rsx_method_t, impl_func>(); } + + __rsx_methods_t() + { + // NV406E + bind_cpu_only<NV406E_SET_REFERENCE, nv406e::set_reference>(); + bind<NV406E_SEMAPHORE_ACQUIRE, nv406e::semaphore_acquire>(); + bind<NV406E_SEMAPHORE_RELEASE, nv406e::semaphore_release>(); + + // NV4097 + bind<NV4097_TEXTURE_READ_SEMAPHORE_RELEASE, nv4097::texture_read_semaphore_release>(); + bind<NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE, nv4097::back_end_write_semaphore_release>(); + bind<NV4097_SET_BEGIN_END, nv4097::set_begin_end>(); + bind<NV4097_CLEAR_SURFACE>(); + bind<NV4097_DRAW_ARRAYS, nv4097::draw_arrays>(); + bind<NV4097_DRAW_INDEX_ARRAY, nv4097::draw_index_array>(); + bind<NV4097_INLINE_ARRAY, nv4097::draw_inline_array>(); + bind_range<NV4097_SET_VERTEX_DATA_ARRAY_FORMAT, 1, 16, nv4097::set_vertex_data_array_format>(); + bind_range<NV4097_SET_VERTEX_DATA4UB_M, 1, 16, nv4097::set_vertex_data4ub_m>(); + bind_range<NV4097_SET_VERTEX_DATA1F_M, 1, 16, nv4097::set_vertex_data1f_m>(); + bind_range<NV4097_SET_VERTEX_DATA2F_M + 1, 2, 16, nv4097::set_vertex_data2f_m>(); + bind_range<NV4097_SET_VERTEX_DATA3F_M + 2, 3, 16, nv4097::set_vertex_data3f_m>(); + bind_range<NV4097_SET_VERTEX_DATA4F_M + 3, 4, 16, nv4097::set_vertex_data4f_m>(); + bind_range<NV4097_SET_VERTEX_DATA2S_M, 1, 16, nv4097::set_vertex_data2s_m>(); + bind_range<NV4097_SET_VERTEX_DATA4S_M + 1, 2, 16, nv4097::set_vertex_data4s_m>(); + bind_range<NV4097_SET_TRANSFORM_CONSTANT, 1, 32, nv4097::set_transform_constant>(); + bind_range<NV4097_SET_TRANSFORM_PROGRAM + 3, 4, 128, nv4097::set_transform_program>(); + bind_cpu_only<NV4097_GET_REPORT, nv4097::get_report>(); + bind_cpu_only<NV4097_CLEAR_REPORT_VALUE, nv4097::clear_report_value>(); + + //NV308A + bind_range<NV308A_COLOR, 1, 256, nv308a::color>(); + bind_range<NV308A_COLOR + 256, 1, 512, nv308a::color, 256>(); + + //NV3089 + bind<NV3089_IMAGE_IN, nv3089::image_in>(); + + //NV0039 + bind<NV0039_BUFFER_NOTIFY, nv0039::buffer_notify>(); + + // custom methods + bind_cpu_only<GCM_FLIP_COMMAND, flip_command>(); + bind_cpu_only<GCM_SET_USER_COMMAND, user_command>(); + } + } __rsx_methods; +} diff --git a/rpcs3/Emu/RSX/rsx_methods.h b/rpcs3/Emu/RSX/rsx_methods.h new file mode 100644 index 0000000000..ef34418dee --- /dev/null +++ b/rpcs3/Emu/RSX/rsx_methods.h @@ -0,0 +1,69 @@ +#pragma once + +namespace rsx +{ + //TODO + union alignas(4) method_registers_t + { + u8 _u8[0x10000]; + u32 _u32[0x10000 >> 2]; + /* + struct alignas(4) + { + u8 pad[NV4097_SET_TEXTURE_OFFSET - 4]; + + struct alignas(4) texture_t + { + u32 offset; + + union format_t + { + u32 _u32; + + struct + { + u32: 1; + u32 location : 1; + u32 cubemap : 1; + u32 border_type : 1; + u32 dimension : 4; + u32 format : 8; + u32 mipmap : 16; + }; + } format; + + union address_t + { + u32 _u32; + + struct + { + u32 wrap_s : 4; + u32 aniso_bias : 4; + u32 wrap_t : 4; + u32 unsigned_remap : 4; + u32 wrap_r : 4; + u32 gamma : 4; + u32 signed_remap : 4; + u32 zfunc : 4; + }; + } address; + + u32 control0; + u32 control1; + u32 filter; + u32 image_rect; + u32 border_color; + } textures[limits::textures_count]; + }; + */ + u32& operator[](int index) + { + return _u32[index >> 2]; + } + }; + + using rsx_method_t = void(*)(class thread*, u32); + extern u32 method_registers[0x10000 >> 2]; + extern rsx_method_t methods[0x10000 >> 2]; +} From 3ac9e0933fad212c11f47e29ad7b601061ce64e1 Mon Sep 17 00:00:00 2001 From: DHrpcs3 <dh.rpcs3@gmail.com> Date: Wed, 6 Jan 2016 01:15:35 +0200 Subject: [PATCH 09/13] gl: fixed nv4097_clear_surface & front face selection added window shader and clip plane constants to GCM.h --- rpcs3/Emu/RSX/GCM.h | 12 ++++- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 89 ++++++++++++++++++--------------- rpcs3/Emu/RSX/GL/GLGSRender.h | 4 +- 3 files changed, 61 insertions(+), 44 deletions(-) diff --git a/rpcs3/Emu/RSX/GCM.h b/rpcs3/Emu/RSX/GCM.h index 156b69837c..c83a009943 100644 --- a/rpcs3/Emu/RSX/GCM.h +++ b/rpcs3/Emu/RSX/GCM.h @@ -451,7 +451,17 @@ enum CELL_GCM_POLYGON_MODE_FILL = 0x1B02, CELL_GCM_TRUE = 1, - CELL_GCM_FALSE = 0 + CELL_GCM_FALSE = 0, + + CELL_GCM_WINDOW_ORIGIN_TOP = 0, + CELL_GCM_WINDOW_ORIGIN_BOTTOM = 1, + + CELL_GCM_WINDOW_PIXEL_CENTER_HALF = 0, + CELL_GCM_WINDOW_PIXEL_CENTER_INTEGER = 1, + + CELL_GCM_USER_CLIP_PLANE_DISABLE = 0, + CELL_GCM_USER_CLIP_PLANE_ENABLE_LT = 1, + CELL_GCM_USER_CLIP_PLANE_ENABLE_GE = 2, }; enum diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 95f5ac3348..937afc4146 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -65,24 +65,6 @@ void GLGSRender::begin() __glcheck glDepthMask(rsx::method_registers[NV4097_SET_DEPTH_MASK]); __glcheck glStencilMask(rsx::method_registers[NV4097_SET_STENCIL_MASK]); - int viewport_x = int(rsx::method_registers[NV4097_SET_VIEWPORT_HORIZONTAL] & 0xffff); - int viewport_y = int(rsx::method_registers[NV4097_SET_VIEWPORT_VERTICAL] & 0xffff); - int viewport_w = int(rsx::method_registers[NV4097_SET_VIEWPORT_HORIZONTAL] >> 16); - int viewport_h = int(rsx::method_registers[NV4097_SET_VIEWPORT_VERTICAL] >> 16); - glViewport(viewport_x, viewport_y, viewport_w, viewport_h); - - //scissor test is always enabled - glEnable(GL_SCISSOR_TEST); - - u32 scissor_horizontal = rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL]; - u32 scissor_vertical = rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL]; - u16 scissor_x = scissor_horizontal; - u16 scissor_w = scissor_horizontal >> 16; - u16 scissor_y = scissor_vertical; - u16 scissor_h = scissor_vertical >> 16; - - __glcheck glScissor(scissor_x, scissor_y, scissor_w, scissor_h); - if (__glcheck enable(rsx::method_registers[NV4097_SET_DEPTH_TEST_ENABLE], GL_DEPTH_TEST)) { __glcheck glDepthFunc(rsx::method_registers[NV4097_SET_DEPTH_FUNC]); @@ -236,9 +218,10 @@ void GLGSRender::begin() if (__glcheck enable(rsx::method_registers[NV4097_SET_CULL_FACE_ENABLE], GL_CULL_FACE)) { __glcheck glCullFace(rsx::method_registers[NV4097_SET_CULL_FACE]); - __glcheck glFrontFace(rsx::method_registers[NV4097_SET_FRONT_FACE]); } + __glcheck glFrontFace(rsx::method_registers[NV4097_SET_FRONT_FACE] ^ 1); + __glcheck enable(rsx::method_registers[NV4097_SET_POLY_SMOOTH_ENABLE], GL_POLYGON_SMOOTH); //NV4097_SET_COLOR_KEY_COLOR @@ -480,6 +463,44 @@ void GLGSRender::end() rsx::thread::end(); } +void GLGSRender::set_viewport() +{ + u32 viewport_horizontal = rsx::method_registers[NV4097_SET_VIEWPORT_HORIZONTAL]; + u32 viewport_vertical = rsx::method_registers[NV4097_SET_VIEWPORT_VERTICAL]; + + u16 viewport_x = viewport_horizontal & 0xffff; + u16 viewport_y = viewport_vertical & 0xffff; + u16 viewport_w = viewport_horizontal >> 16; + u16 viewport_h = viewport_vertical >> 16; + + u32 scissor_horizontal = rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL]; + u32 scissor_vertical = rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL]; + u16 scissor_x = scissor_horizontal; + u16 scissor_w = scissor_horizontal >> 16; + u16 scissor_y = scissor_vertical; + u16 scissor_h = scissor_vertical >> 16; + + u32 shader_window = rsx::method_registers[NV4097_SET_SHADER_WINDOW]; + + u8 shader_window_origin = (shader_window >> 12) & 0xf; + + //TODO + if (true || shader_window_origin == CELL_GCM_WINDOW_ORIGIN_BOTTOM) + { + __glcheck glViewport(viewport_x, viewport_y, viewport_w, viewport_h); + __glcheck glScissor(scissor_x, scissor_y, scissor_w, scissor_h); + } + else + { + u16 shader_window_height = shader_window & 0xfff; + + __glcheck glViewport(viewport_x, shader_window_height - viewport_y - viewport_h - 1, viewport_w, viewport_h); + __glcheck glScissor(scissor_x, shader_window_height - scissor_y - scissor_h - 1, scissor_w, scissor_h); + } + + glEnable(GL_SCISSOR_TEST); +} + void GLGSRender::on_init_thread() { GSRender::on_init_thread(); @@ -558,9 +579,6 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer) return; } - renderer->draw_fbo.bind(); - glEnable(GL_SCISSOR_TEST); - /* u16 clear_x = rsx::method_registers[NV4097_SET_CLEAR_RECT_HORIZONTAL]; u16 clear_y = rsx::method_registers[NV4097_SET_CLEAR_RECT_VERTICAL]; @@ -569,14 +587,8 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer) glScissor(clear_x, clear_y, clear_w, clear_h); */ - u32 scissor_horizontal = rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL]; - u32 scissor_vertical = rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL]; - u16 scissor_x = scissor_horizontal; - u16 scissor_w = scissor_horizontal >> 16; - u16 scissor_y = scissor_vertical; - u16 scissor_h = scissor_vertical >> 16; - - glScissor(scissor_x, scissor_y, scissor_w, scissor_h); + renderer->init_buffers(true); + renderer->draw_fbo.bind(); GLbitfield mask = 0; @@ -616,8 +628,8 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer) mask |= GLenum(gl::buffers::color); } - renderer->clear_surface_buffers = (gl::buffers)mask; - renderer->draw_fbo.clear((gl::buffers)mask); + glClear(mask); + renderer->write_buffers(); } using rsx_method_impl_t = void(*)(u32, GLGSRender*); @@ -811,7 +823,7 @@ std::pair<gl::texture::type, gl::texture::format> surface_depth_format_to_gl(int } } -void GLGSRender::init_buffers() +void GLGSRender::init_buffers(bool skip_reading) { u32 surface_format = rsx::method_registers[NV4097_SET_SURFACE_FORMAT]; @@ -893,11 +905,13 @@ void GLGSRender::init_buffers() __glcheck m_draw_tex_depth_stencil.pixel_unpack_settings().aligment(1); } - if (clear_surface_buffers == gl::buffers::none) + if (!skip_reading) { read_buffers(); } + set_viewport(); + switch (rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]) { case CELL_GCM_SURFACE_TARGET_NONE: break; @@ -926,13 +940,6 @@ void GLGSRender::init_buffers() LOG_ERROR(RSX, "Bad surface color target: %d", rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]); break; } - - if (clear_surface_buffers != gl::buffers::none) - { - //draw_fbo.clear(clear_surface_buffers); - - clear_surface_buffers = gl::buffers::none; - } } static const u32 mr_color_offset[rsx::limits::color_buffers_count] = diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index b49d52a569..ec7babfe86 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -24,7 +24,6 @@ private: public: gl::fbo draw_fbo; - gl::buffers clear_surface_buffers = gl::buffers::none; private: GLProgramBuffer m_prog_buffer; @@ -53,9 +52,10 @@ private: public: bool load_program(); - void init_buffers(); + void init_buffers(bool skip_reading = false); void read_buffers(); void write_buffers(); + void set_viewport(); protected: void begin() override; From 8912b9880e18a33cfa0d3ae6f00d0f26536e5655 Mon Sep 17 00:00:00 2001 From: DHrpcs3 <dh.rpcs3@gmail.com> Date: Wed, 6 Jan 2016 01:25:17 +0200 Subject: [PATCH 10/13] rsx: initialize vertex textures on reset --- rpcs3/Emu/RSX/RSXTexture.cpp | 23 ----------------------- rpcs3/Emu/RSX/RSXTexture.h | 6 ------ rpcs3/Emu/RSX/RSXThread.cpp | 7 +++++++ 3 files changed, 7 insertions(+), 29 deletions(-) diff --git a/rpcs3/Emu/RSX/RSXTexture.cpp b/rpcs3/Emu/RSX/RSXTexture.cpp index a6eafa09b6..9ad938f6d6 100644 --- a/rpcs3/Emu/RSX/RSXTexture.cpp +++ b/rpcs3/Emu/RSX/RSXTexture.cpp @@ -279,24 +279,6 @@ namespace rsx return ((method_registers[NV4097_SET_VERTEX_TEXTURE_FORMAT + (m_index * 8)] >> 16) & 0xffff); } - u8 vertex_texture::wrap_s() const - { - return 1; - //return ((method_registers[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 8)]) & 0xf); - } - - u8 vertex_texture::wrap_t() const - { - return 1; - //return ((method_registers[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 8)] >> 8) & 0xf); - } - - u8 vertex_texture::wrap_r() const - { - return 1; - //return ((method_registers[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 8)] >> 16) & 0xf); - } - u8 vertex_texture::unsigned_remap() const { return ((method_registers[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 8)] >> 12) & 0xf); @@ -347,11 +329,6 @@ namespace rsx return ((method_registers[NV4097_SET_VERTEX_TEXTURE_CONTROL0 + (m_index * 8)] >> 2) & 0x1); } - u32 vertex_texture::remap() const - { - return 0 | (1 << 2) | (2 << 4) | (3 << 6);//(method_registers[NV4097_SET_VERTEX_TEXTURE_CONTROL1 + (m_index * 8)]); - } - u16 vertex_texture::bias() const { return ((method_registers[NV4097_SET_VERTEX_TEXTURE_FILTER + (m_index * 8)]) & 0x1fff); diff --git a/rpcs3/Emu/RSX/RSXTexture.h b/rpcs3/Emu/RSX/RSXTexture.h index c5d2a5a4bb..a258006c95 100644 --- a/rpcs3/Emu/RSX/RSXTexture.h +++ b/rpcs3/Emu/RSX/RSXTexture.h @@ -86,9 +86,6 @@ namespace rsx u16 mipmap() const; // Address - u8 wrap_s() const; - u8 wrap_t() const; - u8 wrap_r() const; u8 unsigned_remap() const; u8 zfunc() const; u8 gamma() const; @@ -102,9 +99,6 @@ namespace rsx u8 max_aniso() const; bool alpha_kill_enabled() const; - // Control1 - u32 remap() const; - // Filter u16 bias() const; u8 min_filter() const; diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index fd5e3f8554..7d4babcd01 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -624,13 +624,20 @@ namespace rsx // Reset vertex attrib array for (int i = 0; i < limits::vertex_count; i++) + { vertex_arrays_info[i].size = 0; + } // Construct Textures for (int i = 0; i < limits::textures_count; i++) { textures[i].init(i); } + + for (int i = 0; i < limits::vertex_textures_count; i++) + { + vertex_textures[i].init(i); + } } void thread::init(const u32 ioAddress, const u32 ioSize, const u32 ctrlAddress, const u32 localAddress) From f7e787958ece6bb0045fbc20e0cb0b3c8c325c8e Mon Sep 17 00:00:00 2001 From: DHrpcs3 <dh.rpcs3@gmail.com> Date: Wed, 6 Jan 2016 02:24:57 +0200 Subject: [PATCH 11/13] fixed nv3089::image_in scale value --- rpcs3/Emu/RSX/RSXThread.cpp | 5 +++++ rpcs3/Emu/RSX/rsx_methods.cpp | 19 ++++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 7d4babcd01..d3a016caa9 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -350,13 +350,18 @@ namespace rsx void thread::end() { vertex_index_array.clear(); + for (auto &vertex_array : vertex_arrays) + { vertex_array.clear(); + } transform_constants.clear(); if (capture_current_frame) + { capture_frame("Draw " + std::to_string(vertex_draw_count)); + } } void thread::on_task() diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 7bbc95723f..0c65d73769 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -459,10 +459,19 @@ namespace rsx u32 out_offset = out_x * out_bpp + out_pitch * out_y; - bool need_clip = method_registers[NV3089_CLIP_SIZE] != method_registers[NV3089_IMAGE_IN_SIZE] || method_registers[NV3089_CLIP_POINT]; - bool need_convert = out_format != in_format || out_w != in_w || out_h != in_h; + f32 scale_x = 1048576.f / method_registers[NV3089_DS_DX]; + f32 scale_y = 1048576.f / method_registers[NV3089_DT_DY]; - u32 slice_h = (u32)(clip_h * (method_registers[NV3089_DS_DX] / 1048576.f)); + u32 slice_h = (u32)(clip_h * (1.0 / scale_y)); + + u32 convert_w = (u32)(scale_x * in_w); + u32 convert_h = (u32)(scale_y * in_h); + + bool need_clip = + method_registers[NV3089_CLIP_SIZE] != method_registers[NV3089_IMAGE_IN_SIZE] || + method_registers[NV3089_CLIP_POINT] || convert_w != out_w || convert_h != out_h; + + bool need_convert = out_format != in_format || scale_x != 1.0 || scale_y != 1.0; if (slice_h) { @@ -484,7 +493,7 @@ namespace rsx { if (need_convert) { - convert_scale_image(temp1, out_format, out_w, out_h, out_pitch, + convert_scale_image(temp1, out_format, convert_w, convert_h, out_pitch, pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false); clip_image(pixels_dst + out_offset, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch); @@ -526,7 +535,7 @@ namespace rsx { if (need_convert) { - convert_scale_image(temp1, out_format, out_w, out_h, out_pitch, + convert_scale_image(temp1, out_format, convert_w, convert_h, out_pitch, pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false); clip_image(temp2, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch); From 6406cece578942e9e61b71776b16483a61b14653 Mon Sep 17 00:00:00 2001 From: DHrpcs3 <dh.rpcs3@gmail.com> Date: Wed, 6 Jan 2016 03:37:38 +0200 Subject: [PATCH 12/13] nv3089::image_in: fixed reading from tiled regions --- rpcs3/Emu/RSX/rsx_methods.cpp | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 0c65d73769..6870c1830c 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -336,7 +336,7 @@ namespace rsx namespace nv3089 { - never_inline void image_in(u32 arg) + never_inline void image_in(thread *rsx, u32 arg) { u32 operation = method_registers[NV3089_SET_OPERATION]; @@ -405,7 +405,7 @@ namespace rsx return; } - u32 src_address = get_address(src_offset, src_dma); + tiled_region src_region = rsx->get_tiled_address(src_offset, src_dma & 0xf);//get_address(src_offset, src_dma); u32 dst_address = get_address(dst_offset, dst_dma); u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel @@ -433,7 +433,7 @@ namespace rsx //LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_address, dst_address); - u8* pixels_src = vm::ps3::_ptr<u8>(src_address); + u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr; u8* pixels_dst = vm::ps3::_ptr<u8>(dst_address); if (dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 && @@ -462,8 +462,6 @@ namespace rsx f32 scale_x = 1048576.f / method_registers[NV3089_DS_DX]; f32 scale_y = 1048576.f / method_registers[NV3089_DT_DY]; - u32 slice_h = (u32)(clip_h * (1.0 / scale_y)); - u32 convert_w = (u32)(scale_x * in_w); u32 convert_h = (u32)(scale_y * in_h); @@ -473,16 +471,22 @@ namespace rsx bool need_convert = out_format != in_format || scale_x != 1.0 || scale_y != 1.0; - if (slice_h) + u32 slice_h = clip_h; + + if (src_region.tile) { - if (clip_h < out_h) + if (src_region.tile->comp == CELL_GCM_COMPMODE_C32_2X2) { - --slice_h; + slice_h *= 2; + } + + u32 size = slice_h * in_pitch; + + if (size > src_region.tile->size - src_region.base) + { + u32 diff = size - (src_region.tile->size - src_region.base); + slice_h -= diff / in_pitch + (diff % in_pitch ? 1 : 0); } - } - else - { - slice_h = clip_h; } if (method_registers[NV3089_SET_CONTEXT_SURFACE] != CELL_GCM_CONTEXT_SWIZZLE2D) From e9560da4e2bcafad4b988a5ccc3d1fe2b619f64e Mon Sep 17 00:00:00 2001 From: DHrpcs3 <dh.rpcs3@gmail.com> Date: Wed, 6 Jan 2016 13:47:05 +0200 Subject: [PATCH 13/13] nv3089::image_in: use in_x/in_y & out_x/out_y --- rpcs3/Emu/RSX/rsx_methods.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 6870c1830c..3529418999 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -405,12 +405,15 @@ namespace rsx return; } - tiled_region src_region = rsx->get_tiled_address(src_offset, src_dma & 0xf);//get_address(src_offset, src_dma); - u32 dst_address = get_address(dst_offset, dst_dma); - u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4; + u32 in_offset = u32(in_x * in_bpp + in_pitch * in_y); + u32 out_offset = out_x * out_bpp + out_pitch * out_y; + + tiled_region src_region = rsx->get_tiled_address(src_offset + in_offset, src_dma & 0xf);//get_address(src_offset, src_dma); + u32 dst_address = get_address(dst_offset + out_offset, dst_dma); + if (out_pitch == 0) { out_pitch = out_bpp * out_w; @@ -434,7 +437,7 @@ namespace rsx //LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_address, dst_address); u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr; - u8* pixels_dst = vm::ps3::_ptr<u8>(dst_address); + u8* pixels_dst = vm::ps3::_ptr<u8>(dst_address + out_offset); if (dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 && dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_A8R8G8B8) @@ -457,8 +460,6 @@ namespace rsx AVPixelFormat in_format = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; AVPixelFormat out_format = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; - u32 out_offset = out_x * out_bpp + out_pitch * out_y; - f32 scale_x = 1048576.f / method_registers[NV3089_DS_DX]; f32 scale_y = 1048576.f / method_registers[NV3089_DT_DY]; @@ -519,7 +520,7 @@ namespace rsx { for (u32 y = 0; y < out_h; ++y) { - u8 *dst = pixels_dst + out_x * out_bpp + out_pitch * (y + out_y); + u8 *dst = pixels_dst + out_pitch * y; u8 *src = pixels_src + in_pitch * y; std::memmove(dst, src, out_w * out_bpp);