From e32c48d0dd87e5a9978af2e2841e7951903ff757 Mon Sep 17 00:00:00 2001 From: Megamouse Date: Sun, 21 Apr 2024 00:10:20 +0200 Subject: [PATCH] Camera: optimize some loops Factor out some conditions in RAW8 loop. Use floats YUV loop. Replace pixel getters with simple pointers. --- rpcs3/Emu/Cell/Modules/cellGem.cpp | 46 +++++++------ rpcs3/rpcs3qt/qt_camera_video_sink.cpp | 93 ++++++++++++++++---------- rpcs3/rpcs3qt/qt_utils.cpp | 2 +- 3 files changed, 83 insertions(+), 58 deletions(-) diff --git a/rpcs3/Emu/Cell/Modules/cellGem.cpp b/rpcs3/Emu/Cell/Modules/cellGem.cpp index 616cf47072..c71e356cab 100644 --- a/rpcs3/Emu/Cell/Modules/cellGem.cpp +++ b/rpcs3/Emu/Cell/Modules/cellGem.cpp @@ -22,6 +22,11 @@ LOG_CHANNEL(cellGem); +static inline constexpr u32 rgba(u8 r, u8 g, u8 b, u8 a) +{ + return ((r & 0xffu) << 24) | ((g & 0xffu) << 16) | ((b & 0xffu) << 8) | (a & 0xffu); +} + template <> void fmt_class_string::format(std::string& out, u64 arg) { @@ -493,42 +498,39 @@ void gem_config_data::operator()() { constexpr u32 in_pitch = 640; constexpr u32 out_pitch = 640 * 4; + u8* dst = vc_attribute.video_data_out.get_ptr(); for (u32 y = 0; y < 480 - 1; y += 2) { + const u8* src = &video_data_in[y * in_pitch]; + const u16* src0 = reinterpret_cast(src); + const u16* src1 = reinterpret_cast(src + in_pitch); + + u8* dst_row = dst + y * out_pitch; + u32* dst0 = reinterpret_cast(dst_row); + u32* dst1 = reinterpret_cast(dst_row + out_pitch); + for (u32 x = 0; x < 640 - 1; x += 2) { - const u32 in_offset = 1 * (y * 640 + x); - const u32 out_offset = 4 * (y * 640 + x); + const u16 top = *src0++; + const u16 bottom = *src1++; - const u8 b = video_data_in[in_offset + 0]; - const u8 g0 = video_data_in[in_offset + 1]; - const u8 g1 = video_data_in[in_offset + in_pitch + 0]; - const u8 r = video_data_in[in_offset + in_pitch + 1]; + const u8 b = (top & 0xFF); + const u8 g0 = ((top >> 8) & 0xFF); + const u8 g1 = (bottom & 0xFF); + const u8 r = ((bottom >> 8) & 0xFF); // Top-Left - vc_attribute.video_data_out[out_offset + 0] = r; // R - vc_attribute.video_data_out[out_offset + 1] = g0; // G - vc_attribute.video_data_out[out_offset + 2] = b; // B - vc_attribute.video_data_out[out_offset + 3] = 255; // A + *dst0++ = rgba(r, g0, b, 255); // Top-Right Pixel - vc_attribute.video_data_out[out_offset + 4] = r; // R - vc_attribute.video_data_out[out_offset + 5] = g0; // G - vc_attribute.video_data_out[out_offset + 6] = b; // B - vc_attribute.video_data_out[out_offset + 7] = 255; // A + *dst0++ = rgba(r, g0, b, 255); // Bottom-Left Pixel - vc_attribute.video_data_out[out_offset + out_pitch + 0] = r; // R - vc_attribute.video_data_out[out_offset + out_pitch + 1] = g1; // G - vc_attribute.video_data_out[out_offset + out_pitch + 2] = b; // B - vc_attribute.video_data_out[out_offset + out_pitch + 3] = 255; // A + *dst1++ = rgba(r, g1, b, 255); // Bottom-Right Pixel - vc_attribute.video_data_out[out_offset + out_pitch + 4] = r; // R - vc_attribute.video_data_out[out_offset + out_pitch + 5] = g1; // G - vc_attribute.video_data_out[out_offset + out_pitch + 6] = b; // B - vc_attribute.video_data_out[out_offset + out_pitch + 7] = 255; // A + *dst1++ = rgba(r, g1, b, 255); } } } diff --git a/rpcs3/rpcs3qt/qt_camera_video_sink.cpp b/rpcs3/rpcs3qt/qt_camera_video_sink.cpp index 2715c21cef..3ddbfc7478 100644 --- a/rpcs3/rpcs3qt/qt_camera_video_sink.cpp +++ b/rpcs3/rpcs3qt/qt_camera_video_sink.cpp @@ -98,6 +98,9 @@ bool qt_camera_video_sink::present(const QVideoFrame& frame) // TODO: check if pixel format and bytes per pixel match and convert if necessary // TODO: implement or improve more conversions + const u32 width = std::min(image_buffer.width, image.width()); + const u32 height = std::min(image_buffer.height, image.height()); + switch (m_format) { case CELL_CAMERA_JPG: @@ -107,27 +110,46 @@ bool qt_camera_video_sink::present(const QVideoFrame& frame) case CELL_CAMERA_RAW8: // The game seems to expect BGGR { // Let's use a very simple algorithm to convert the image to raw BGGR - const auto convert_to_bggr = [&image_buffer, &image](u32 y_begin, u32 y_end) + const auto convert_to_bggr = [&image_buffer, &image, width, height](u32 y_begin, u32 y_end) { - for (u32 y = y_begin; y < std::min(image_buffer.height, image.height()) && y < y_end; y++) - { - for (u32 x = 0; x < std::min(image_buffer.width, image.width()); x++) - { - u8& pixel = image_buffer.data[image_buffer.width * y + x]; - const bool is_left_pixel = (x % 2) == 0; - const bool is_top_pixel = (y % 2) == 0; + u8* dst = &image_buffer.data[image_buffer.width * y_begin]; - if (is_left_pixel && is_top_pixel) + for (u32 y = y_begin; y < height && y < y_end; y++) + { + const QRgb* src = reinterpret_cast(image.constScanLine(y)); + const bool is_top_pixel = (y % 2) == 0; + + // Split loops (roughly twice the performance by removing one condition) + if (is_top_pixel) + { + for (u32 x = 0; x < width; x++, dst++, src++) { - pixel = qBlue(image.pixel(x, y)); + const bool is_left_pixel = (x % 2) == 0; + + if (is_left_pixel) + { + *dst = qBlue(*src); + } + else + { + *dst = qGreen(*src); + } } - else if (is_left_pixel || is_top_pixel) + } + else + { + for (u32 x = 0; x < width; x++, dst++, src++) { - pixel = qGreen(image.pixel(x, y)); - } - else - { - pixel = qRed(image.pixel(x, y)); + const bool is_left_pixel = (x % 2) == 0; + + if (is_left_pixel) + { + *dst = qGreen(*src); + } + else + { + *dst = qRed(*src); + } } } } @@ -154,7 +176,7 @@ bool qt_camera_video_sink::present(const QVideoFrame& frame) case CELL_CAMERA_V_Y1_U_Y0: { // Simple RGB to Y0_U_Y1_V conversion from stackoverflow. - const auto convert_to_yuv422 = [&image_buffer, &image, format = m_format](u32 y_begin, u32 y_end) + const auto convert_to_yuv422 = [&image_buffer, &image, width, height, format = m_format](u32 y_begin, u32 y_end) { constexpr int yuv_bytes_per_pixel = 2; const int yuv_pitch = image_buffer.width * yuv_bytes_per_pixel; @@ -164,32 +186,33 @@ bool qt_camera_video_sink::present(const QVideoFrame& frame) const int y1_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 2 : 1; const int v_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 3 : 0; - for (u32 y = y_begin; y < std::min(image_buffer.height, image.height()) && y < y_end; y++) + for (u32 y = y_begin; y < height && y < y_end; y++) { + const QRgb* src = reinterpret_cast(image.constScanLine(y)); uint8_t* yuv_row_ptr = &image_buffer.data[y * yuv_pitch]; - for (u32 x = 0; x < std::min(image_buffer.width, image.width()) - 1; x += 2) + for (u32 x = 0; x < width - 1; x += 2) { - const QRgb pixel_1 = image.pixel(x, y); - const QRgb pixel_2 = image.pixel(x + 1, y); + const QRgb pixel_1 = *src++; + const QRgb pixel_2 = *src++; - const double r1 = qRed(pixel_1); - const double g1 = qGreen(pixel_1); - const double b1 = qBlue(pixel_1); - const double r2 = qRed(pixel_2); - const double g2 = qGreen(pixel_2); - const double b2 = qBlue(pixel_2); + const float r1 = qRed(pixel_1); + const float g1 = qGreen(pixel_1); + const float b1 = qBlue(pixel_1); + const float r2 = qRed(pixel_2); + const float g2 = qGreen(pixel_2); + const float b2 = qBlue(pixel_2); - const int y0 = (0.257 * r1) + (0.504 * g1) + (0.098 * b1) + 16.0; - const int u = -(0.148 * r1) - (0.291 * g1) + (0.439 * b1) + 128.0; - const int v = (0.439 * r1) - (0.368 * g1) - (0.071 * b1) + 128.0; - const int y1 = (0.257 * r2) + (0.504 * g2) + (0.098 * b2) + 16.0; + const int y0 = (0.257f * r1) + (0.504f * g1) + (0.098f * b1) + 16.0f; + const int u = -(0.148f * r1) - (0.291f * g1) + (0.439f * b1) + 128.0f; + const int v = (0.439f * r1) - (0.368f * g1) - (0.071f * b1) + 128.0f; + const int y1 = (0.257f * r2) + (0.504f * g2) + (0.098f * b2) + 16.0f; const int yuv_index = x * yuv_bytes_per_pixel; - yuv_row_ptr[yuv_index + y0_offset] = std::max(0, std::min(y0, 255)); - yuv_row_ptr[yuv_index + u_offset] = std::max(0, std::min( u, 255)); - yuv_row_ptr[yuv_index + y1_offset] = std::max(0, std::min(y1, 255)); - yuv_row_ptr[yuv_index + v_offset] = std::max(0, std::min( v, 255)); + yuv_row_ptr[yuv_index + y0_offset] = static_cast(std::clamp(y0, 0, 255)); + yuv_row_ptr[yuv_index + u_offset] = static_cast(std::clamp( u, 0, 255)); + yuv_row_ptr[yuv_index + y1_offset] = static_cast(std::clamp(y1, 0, 255)); + yuv_row_ptr[yuv_index + v_offset] = static_cast(std::clamp( v, 0, 255)); } } }; diff --git a/rpcs3/rpcs3qt/qt_utils.cpp b/rpcs3/rpcs3qt/qt_utils.cpp index fe67437eea..e041f5b9a6 100644 --- a/rpcs3/rpcs3qt/qt_utils.cpp +++ b/rpcs3/rpcs3qt/qt_utils.cpp @@ -271,7 +271,7 @@ namespace gui for (int y = 0; y < image.height(); ++y) { - QRgb* row = reinterpret_cast(image.scanLine(y)); + const QRgb* row = reinterpret_cast(image.constScanLine(y)); bool row_filled = false; for (int x = 0; x < image.width(); ++x)