(VITA) Try to use NEON

This commit is contained in:
Francisco José García García 2016-10-17 18:39:10 +02:00
parent dd2c3ab1ee
commit 81e7a60759
8 changed files with 25 additions and 23 deletions

View File

@ -211,7 +211,7 @@ else ifeq ($(platform), vita)
WANT_ZLIB := 1
INCLUDE += -Ideps/libvita2d/include
INCLUDE += -Ideps/zlib
PLATCFLAGS := -mthumb -mfloat-abi=hard -fsingle-precision-constant \
PLATCFLAGS := -marm -mfloat-abi=hard -fsingle-precision-constant \
-mword-relocations -fno-unwind-tables -fno-asynchronous-unwind-tables -ftree-vectorize -fno-optimize-sibling-calls
LIBS += -lSceKernel_stub -lSceDisplay_stub -lSceGxm_stub -lSceNet_stub -lSceNetCtl_stub\
-lSceSysmodule_stub -lSceCtrl_stub -lSceTouch_stub -lSceAudio_stub -lSceFiber_stub\
@ -226,8 +226,10 @@ else ifeq ($(platform), vita)
deps/libvita2d/shader/compiled/texture_f_gxp.o \
deps/libvita2d/shader/compiled/texture_tint_f_gxp.o
PLATOBJS += libretro-common/conversion/s16_to_float_neon.o \
audio/drivers_resampler/sinc_resampler_neon.o \
audio/drivers_resampler/cc_resampler_neon.o
libretro-common/conversion/float_to_s16_neon.o \
memory/neon/memcpy-neon.o \
audio/drivers_resampler/sinc_resampler_neon.o \
audio/drivers_resampler/cc_resampler_neon.o
LIBDIRS += -L.
LDFLAGS += -Wl,-q
@ -404,7 +406,7 @@ all: $(EXT_TARGET)
%.velf: %.elf
ifneq ($(DEBUG), 1)
arm-vita-eabi-strip -g $<
arm-vita-eabi-strip -g $<
endif
vita-elf-create $< $@
vita-make-fself -s $@ eboot.bin

View File

@ -334,7 +334,7 @@ static void process_sinc(rarch_sinc_resampler_t *resamp, float *out_buffer)
/* movehl { X, R, X, L } == { X, R, X, R } */
_mm_store_ss(out_buffer + 1, _mm_movehl_ps(sum, sum));
}
#elif defined(__ARM_NEON__) && !defined(VITA)
#elif defined(__ARM_NEON__)
#if SINC_COEFF_LERP
#error "NEON asm does not support SINC lerp."
@ -437,7 +437,7 @@ static void *resampler_sinc_new(const struct resampler_config *config,
}
/* Be SIMD-friendly. */
#if (defined(__AVX__) && ENABLE_AVX) || (defined(__ARM_NEON__)&& !defined(VITA))
#if (defined(__AVX__) && ENABLE_AVX) || (defined(__ARM_NEON__))
re->taps = (re->taps + 7) & ~7;
#else
re->taps = (re->taps + 3) & ~3;
@ -460,7 +460,7 @@ static void *resampler_sinc_new(const struct resampler_config *config,
init_sinc_table(re, cutoff, re->phase_table,
1 << PHASE_BITS, re->taps, SINC_COEFF_LERP);
#if defined(__ARM_NEON__) && !defined(VITA)
#if defined(__ARM_NEON__)
process_sinc_func = mask & RESAMPLER_SIMD_NEON
? process_sinc_neon : process_sinc_C;
#endif

View File

@ -66,7 +66,7 @@ vita2d_texture *vita2d_create_empty_texture_format(unsigned int w, unsigned int
}
/* Clear the texture */
memset(texture_data, 0, tex_size);
//memset(texture_data, 0, tex_size);
/* Create the gxm texture */
sceGxmTextureInitLinear(
@ -94,7 +94,7 @@ vita2d_texture *vita2d_create_empty_texture_format(unsigned int w, unsigned int
return NULL;
}
memset(texture_palette, 0, pal_size);
//memset(texture_palette, 0, pal_size);
sceGxmTextureSetPalette(&texture->gxm_tex, texture_palette);
} else {

View File

@ -33,6 +33,9 @@
#include "../../runloop.h"
#include "../video_coord_array.h"
extern void *memcpy_neon(void *dst, const void *src, size_t n);
static void vita2d_gfx_set_viewport(void *data, unsigned viewport_width,
unsigned viewport_height, bool force_full, bool allow_rotate);
@ -139,7 +142,7 @@ static bool vita2d_gfx_frame(void *data, const void *frame,
if (frame)
{
if(!(vita->texture&&vita2d_texture_get_datap(vita->texture)==frame)){
unsigned i, j;
unsigned i;
unsigned int stride;
if ((width != vita->width || height != vita->height) && vita->texture)
@ -167,8 +170,7 @@ static bool vita2d_gfx_frame(void *data, const void *frame,
const uint32_t *frame32 = frame;
for (i = 0; i < height; i++)
for (j = 0; j < width; j++)
tex32[j + i*stride] = frame32[j + i*pitch];
memcpy_neon(&tex32[i*stride],&frame32[i*pitch],pitch*sizeof(uint32_t));
}
else
{
@ -178,8 +180,7 @@ static bool vita2d_gfx_frame(void *data, const void *frame,
const uint16_t *frame16 = frame;
for (i = 0; i < height; i++)
for (j = 0; j < width; j++)
tex16[j + i*stride] = frame16[j + i*pitch];
memcpy_neon(&tex16[i*stride],&frame16[i*pitch],width*sizeof(uint16_t));
}
}
}
@ -700,7 +701,7 @@ static void vita_set_texture_enable(void *data, bool state, bool full_screen)
static uintptr_t vita_load_texture(void *video_data, void *data,
bool threaded, enum texture_filter_type filter_type)
{
unsigned int stride, pitch, j, k;
unsigned int stride, pitch, j;
struct texture_image *image = (struct texture_image*)data;
struct vita2d_texture *texture = vita2d_create_empty_texture_format(image->width,
image->height,SCE_GXM_TEXTURE_FORMAT_U8U8U8U8_ARGB);
@ -717,8 +718,7 @@ static uintptr_t vita_load_texture(void *video_data, void *data,
const uint32_t *frame32 = image->pixels;
pitch = image->width;
for (j = 0; j < image->height; j++)
for (k = 0; k < image->width; k++)
tex32[k + j*stride] = frame32[k + j*pitch];
memcpy_neon(&tex32[j*stride],&frame32[j*pitch],pitch*sizeof(uint32_t));
return (uintptr_t)texture;
}

View File

@ -122,7 +122,7 @@ void convert_float_to_s16_altivec(int16_t *out,
}
convert_float_to_s16_C(out, in, samples_in);
}
#elif defined(__ARM_NEON__) && !defined(VITA)
#elif defined(__ARM_NEON__)
void convert_float_s16_asm(int16_t *out, const float *in, size_t samples);
/**
* convert_float_to_s16_neon:
@ -210,7 +210,7 @@ void convert_float_to_s16_init_simd(void)
unsigned cpu = cpu_features_get();
(void)cpu;
#if defined(__ARM_NEON__) && !defined(VITA)
#if defined(__ARM_NEON__)
convert_float_to_s16_arm = (cpu & RETRO_SIMD_NEON) ?
convert_float_to_s16_neon : convert_float_to_s16_C;
#endif

View File

@ -122,7 +122,7 @@ void convert_s16_to_float_altivec(float *out,
convert_s16_to_float_C(out, in, samples_in, gain);
}
#elif defined(__ARM_NEON__) && !defined(VITA)
#elif defined(__ARM_NEON__)
/* Avoid potential hard-float/soft-float ABI issues. */
void convert_s16_float_asm(float *out, const int16_t *in,
size_t samples, const float *gain);
@ -235,7 +235,7 @@ void convert_s16_to_float_init_simd(void)
unsigned cpu = cpu_features_get();
(void)cpu;
#if defined(__ARM_NEON__) && !defined(VITA)
#if defined(__ARM_NEON__)
convert_s16_to_float_arm = (cpu & RETRO_SIMD_NEON) ?
convert_s16_to_float_neon : convert_s16_to_float_C;
#endif

View File

@ -74,7 +74,7 @@ void convert_float_to_s16_SSE2(int16_t *out,
**/
void convert_float_to_s16_altivec(int16_t *out,
const float *in, size_t samples);
#elif defined(__ARM_NEON__) && !defined(VITA)
#elif defined(__ARM_NEON__)
#define convert_float_to_s16 convert_float_to_s16_arm
void (*convert_float_to_s16_arm)(int16_t *out,

View File

@ -65,7 +65,7 @@ void convert_s16_to_float_SSE2(float *out,
void convert_s16_to_float_altivec(float *out,
const int16_t *in, size_t samples, float gain);
#elif defined(__ARM_NEON__) && !defined(VITA)
#elif defined(__ARM_NEON__)
#define convert_s16_to_float convert_s16_to_float_arm
void (*convert_s16_to_float_arm)(float *out,