mirror of
https://github.com/libretro/RetroArch
synced 2025-01-26 18:35:22 +00:00
Optimize rgb1555 -> argb8888 conversion in SSE2.
This commit is contained in:
parent
86b444cb97
commit
b9630e2a2c
@ -17,7 +17,69 @@
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#if defined(__SSE2__)
|
||||
void conv_0rgb1555_argb8888(void *output_, const void *input_,
|
||||
int width, int height,
|
||||
int out_stride, int in_stride)
|
||||
{
|
||||
const uint16_t *input = (const uint16_t*)input_;
|
||||
uint32_t *output = (uint32_t*)output_;
|
||||
|
||||
__m128i pix_mask_r = _mm_set1_epi16(0x1f << 10);
|
||||
__m128i pix_mask_gb = _mm_set1_epi16(0x1f << 5);
|
||||
__m128i mul15_mid = _mm_set1_epi16(0x4200);
|
||||
__m128i mul15_hi = _mm_set1_epi16(0x0210);
|
||||
__m128i a = _mm_set1_epi16(0x00ff);
|
||||
|
||||
int max_width = width - 7;
|
||||
|
||||
for (int h = 0; h < height; h++, output += out_stride >> 2, input += in_stride >> 1)
|
||||
{
|
||||
int w;
|
||||
for (w = 0; w < max_width; w += 8)
|
||||
{
|
||||
__m128i in = _mm_loadu_si128((const __m128i*)(input + w));
|
||||
__m128i r = _mm_and_si128(in, pix_mask_r);
|
||||
__m128i g = _mm_and_si128(in, pix_mask_gb);
|
||||
__m128i b = _mm_and_si128(_mm_slli_epi16(in, 5), pix_mask_gb);
|
||||
|
||||
r = _mm_mulhi_epi16(r, mul15_hi);
|
||||
g = _mm_mulhi_epi16(g, mul15_mid);
|
||||
b = _mm_mulhi_epi16(b, mul15_mid);
|
||||
|
||||
__m128i res_lo_bg = _mm_unpacklo_epi8(b, g);
|
||||
__m128i res_hi_bg = _mm_unpackhi_epi8(b, g);
|
||||
__m128i res_lo_ra = _mm_unpacklo_epi8(r, a);
|
||||
__m128i res_hi_ra = _mm_unpackhi_epi8(r, a);
|
||||
|
||||
__m128i res_lo = _mm_or_si128(res_lo_bg, _mm_slli_si128(res_lo_ra, 2));
|
||||
__m128i res_hi = _mm_or_si128(res_hi_bg, _mm_slli_si128(res_hi_ra, 2));
|
||||
|
||||
_mm_storeu_si128((__m128i*)(output + w + 0), res_lo);
|
||||
_mm_storeu_si128((__m128i*)(output + w + 4), res_hi);
|
||||
}
|
||||
|
||||
for (; w < width; w++)
|
||||
{
|
||||
uint32_t col = input[w];
|
||||
uint32_t r = (col >> 10) & 0x1f;
|
||||
uint32_t g = (col >> 5) & 0x1f;
|
||||
uint32_t b = (col >> 0) & 0x1f;
|
||||
r = (r << 3) | (r >> 2);
|
||||
g = (g << 3) | (g >> 2);
|
||||
b = (b << 3) | (b >> 2);
|
||||
|
||||
output[w] = (0xff << 24) | (r << 16) | (g << 8) | (b << 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
void conv_0rgb1555_argb8888(void *output_, const void *input_,
|
||||
int width, int height,
|
||||
int out_stride, int in_stride)
|
||||
@ -41,6 +103,7 @@ void conv_0rgb1555_argb8888(void *output_, const void *input_,
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void conv_0rgb1555_bgr24(void *output_, const void *input_,
|
||||
int width, int height,
|
||||
|
Loading…
x
Reference in New Issue
Block a user