mirror of
https://github.com/libretro/RetroArch
synced 2025-02-06 00:39:53 +00:00
pixconv: add MMX version for conv_rgb565_argb8888
This commit is contained in:
parent
b617bb5d64
commit
1922d898af
@ -35,6 +35,8 @@
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#include <emmintrin.h>
|
||||
#elif defined(__MMX__)
|
||||
#include <mmintrin.h>
|
||||
#endif
|
||||
|
||||
void conv_rgb565_0rgb1555(void *output_, const void *input_,
|
||||
@ -204,6 +206,16 @@ void conv_rgb565_argb8888(void *output_, const void *input_,
|
||||
const __m128i a = _mm_set1_epi16(0x00ff);
|
||||
|
||||
int max_width = width - 7;
|
||||
#elif defined(__MMX__)
|
||||
const __m64 pix_mask_r = _mm_set1_pi16(0x1f << 10);
|
||||
const __m64 pix_mask_g = _mm_set1_pi16(0x3f << 5);
|
||||
const __m64 pix_mask_b = _mm_set1_pi16(0x1f << 5);
|
||||
const __m64 mul16_r = _mm_set1_pi16(0x0210);
|
||||
const __m64 mul16_g = _mm_set1_pi16(0x2080);
|
||||
const __m64 mul16_b = _mm_set1_pi16(0x4200);
|
||||
const __m64 a = _mm_set1_pi16(0x00ff);
|
||||
|
||||
int max_width = width - 3;
|
||||
#endif
|
||||
|
||||
for (h = 0; h < height;
|
||||
@ -237,6 +249,35 @@ void conv_rgb565_argb8888(void *output_, const void *input_,
|
||||
_mm_storeu_si128((__m128i*)(output + w + 0), res_lo);
|
||||
_mm_storeu_si128((__m128i*)(output + w + 4), res_hi);
|
||||
}
|
||||
#elif defined(__MMX__)
|
||||
for (; w < max_width; w += 4)
|
||||
{
|
||||
__m64 res_lo, res_hi;
|
||||
__m64 res_lo_bg, res_hi_bg, res_lo_ra, res_hi_ra;
|
||||
const __m64 in = _mm_cvtsi64_m64(*((int64_t*)(input + w)));
|
||||
__m64 r = _mm_and_si64(_mm_srli_pi16(in, 1), pix_mask_r);
|
||||
__m64 g = _mm_and_si64(in, pix_mask_g);
|
||||
__m64 b = _mm_and_si64(_mm_slli_pi16(in, 5), pix_mask_b);
|
||||
|
||||
r = _mm_mulhi_pi16(r, mul16_r);
|
||||
g = _mm_mulhi_pi16(g, mul16_g);
|
||||
b = _mm_mulhi_pi16(b, mul16_b);
|
||||
|
||||
res_lo_bg = _mm_unpacklo_pi8(b, g);
|
||||
res_hi_bg = _mm_unpackhi_pi8(b, g);
|
||||
res_lo_ra = _mm_unpacklo_pi8(r, a);
|
||||
res_hi_ra = _mm_unpackhi_pi8(r, a);
|
||||
|
||||
res_lo = _mm_or_si64(res_lo_bg,
|
||||
_mm_slli_si64(res_lo_ra, 16));
|
||||
res_hi = _mm_or_si64(res_hi_bg,
|
||||
_mm_slli_si64(res_hi_ra, 16));
|
||||
|
||||
*((int64_t*)(output + w + 0)) = _mm_cvtm64_si64(res_lo);
|
||||
*((int64_t*)(output + w + 2)) = _mm_cvtm64_si64(res_hi);
|
||||
}
|
||||
|
||||
_mm_empty();
|
||||
#endif
|
||||
|
||||
for (; w < width; w++)
|
||||
@ -284,18 +325,18 @@ void conv_rgb565_abgr8888(void *output_, const void *input_,
|
||||
__m128i r = _mm_and_si128(_mm_srli_epi16(in, 1), pix_mask_r);
|
||||
__m128i g = _mm_and_si128(in, pix_mask_g);
|
||||
__m128i b = _mm_and_si128(_mm_slli_epi16(in, 5), pix_mask_b);
|
||||
r = _mm_mulhi_epi16(r, mul16_r);
|
||||
r = _mm_mulhi_epi16(r, mul16_r);
|
||||
g = _mm_mulhi_epi16(g, mul16_g);
|
||||
b = _mm_mulhi_epi16(b, mul16_b);
|
||||
res_lo_bg = _mm_unpacklo_epi8(b, g);
|
||||
res_lo_bg = _mm_unpacklo_epi8(b, g);
|
||||
res_hi_bg = _mm_unpackhi_epi8(b, g);
|
||||
res_lo_ra = _mm_unpacklo_epi8(r, a);
|
||||
res_hi_ra = _mm_unpackhi_epi8(r, a);
|
||||
res_lo = _mm_or_si128(res_lo_bg,
|
||||
res_lo = _mm_or_si128(res_lo_bg,
|
||||
_mm_slli_si128(res_lo_ra, 2));
|
||||
res_hi = _mm_or_si128(res_hi_bg,
|
||||
_mm_slli_si128(res_hi_ra, 2));
|
||||
_mm_storeu_si128((__m128i*)(output + w + 0), res_lo);
|
||||
_mm_storeu_si128((__m128i*)(output + w + 0), res_lo);
|
||||
_mm_storeu_si128((__m128i*)(output + w + 4), res_hi);
|
||||
}
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user