diff --git a/Makefile.griffin b/Makefile.griffin index abdd7b89a3..4930887373 100644 --- a/Makefile.griffin +++ b/Makefile.griffin @@ -211,7 +211,7 @@ else ifeq ($(platform), vita) WANT_ZLIB := 1 INCLUDE += -Ideps/libvita2d/include INCLUDE += -Ideps/zlib - PLATCFLAGS := -mthumb -mfloat-abi=hard -fsingle-precision-constant \ + PLATCFLAGS := -marm -mfloat-abi=hard -fsingle-precision-constant \ -mword-relocations -fno-unwind-tables -fno-asynchronous-unwind-tables -ftree-vectorize -fno-optimize-sibling-calls LIBS += -lSceKernel_stub -lSceDisplay_stub -lSceGxm_stub -lSceNet_stub -lSceNetCtl_stub\ -lSceSysmodule_stub -lSceCtrl_stub -lSceTouch_stub -lSceAudio_stub -lSceFiber_stub\ @@ -226,8 +226,10 @@ else ifeq ($(platform), vita) deps/libvita2d/shader/compiled/texture_f_gxp.o \ deps/libvita2d/shader/compiled/texture_tint_f_gxp.o PLATOBJS += libretro-common/conversion/s16_to_float_neon.o \ - audio/drivers_resampler/sinc_resampler_neon.o \ - audio/drivers_resampler/cc_resampler_neon.o + libretro-common/conversion/float_to_s16_neon.o \ + memory/neon/memcpy-neon.o \ + audio/drivers_resampler/sinc_resampler_neon.o \ + audio/drivers_resampler/cc_resampler_neon.o LIBDIRS += -L. LDFLAGS += -Wl,-q @@ -404,7 +406,7 @@ all: $(EXT_TARGET) %.velf: %.elf ifneq ($(DEBUG), 1) - arm-vita-eabi-strip -g $< + arm-vita-eabi-strip -g $< endif vita-elf-create $< $@ vita-make-fself -s $@ eboot.bin diff --git a/audio/drivers_resampler/sinc_resampler.c b/audio/drivers_resampler/sinc_resampler.c index 75a747b2a2..1192896caa 100644 --- a/audio/drivers_resampler/sinc_resampler.c +++ b/audio/drivers_resampler/sinc_resampler.c @@ -334,7 +334,7 @@ static void process_sinc(rarch_sinc_resampler_t *resamp, float *out_buffer) /* movehl { X, R, X, L } == { X, R, X, R } */ _mm_store_ss(out_buffer + 1, _mm_movehl_ps(sum, sum)); } -#elif defined(__ARM_NEON__) && !defined(VITA) +#elif defined(__ARM_NEON__) #if SINC_COEFF_LERP #error "NEON asm does not support SINC lerp." @@ -437,7 +437,7 @@ static void *resampler_sinc_new(const struct resampler_config *config, } /* Be SIMD-friendly. */ -#if (defined(__AVX__) && ENABLE_AVX) || (defined(__ARM_NEON__)&& !defined(VITA)) +#if (defined(__AVX__) && ENABLE_AVX) || (defined(__ARM_NEON__)) re->taps = (re->taps + 7) & ~7; #else re->taps = (re->taps + 3) & ~3; @@ -460,7 +460,7 @@ static void *resampler_sinc_new(const struct resampler_config *config, init_sinc_table(re, cutoff, re->phase_table, 1 << PHASE_BITS, re->taps, SINC_COEFF_LERP); -#if defined(__ARM_NEON__) && !defined(VITA) +#if defined(__ARM_NEON__) process_sinc_func = mask & RESAMPLER_SIMD_NEON ? process_sinc_neon : process_sinc_C; #endif diff --git a/deps/libvita2d/source/vita2d_texture.c b/deps/libvita2d/source/vita2d_texture.c index 86b30fbb26..9d43071fb2 100644 --- a/deps/libvita2d/source/vita2d_texture.c +++ b/deps/libvita2d/source/vita2d_texture.c @@ -66,7 +66,7 @@ vita2d_texture *vita2d_create_empty_texture_format(unsigned int w, unsigned int } /* Clear the texture */ - memset(texture_data, 0, tex_size); + //memset(texture_data, 0, tex_size); /* Create the gxm texture */ sceGxmTextureInitLinear( @@ -94,7 +94,7 @@ vita2d_texture *vita2d_create_empty_texture_format(unsigned int w, unsigned int return NULL; } - memset(texture_palette, 0, pal_size); + //memset(texture_palette, 0, pal_size); sceGxmTextureSetPalette(&texture->gxm_tex, texture_palette); } else { diff --git a/gfx/drivers/vita2d_gfx.c b/gfx/drivers/vita2d_gfx.c index 1b2cd19636..7ab33a1183 100644 --- a/gfx/drivers/vita2d_gfx.c +++ b/gfx/drivers/vita2d_gfx.c @@ -33,6 +33,9 @@ #include "../../runloop.h" #include "../video_coord_array.h" +extern void *memcpy_neon(void *dst, const void *src, size_t n); + + static void vita2d_gfx_set_viewport(void *data, unsigned viewport_width, unsigned viewport_height, bool force_full, bool allow_rotate); @@ -139,7 +142,7 @@ static bool vita2d_gfx_frame(void *data, const void *frame, if (frame) { if(!(vita->texture&&vita2d_texture_get_datap(vita->texture)==frame)){ - unsigned i, j; + unsigned i; unsigned int stride; if ((width != vita->width || height != vita->height) && vita->texture) @@ -167,8 +170,7 @@ static bool vita2d_gfx_frame(void *data, const void *frame, const uint32_t *frame32 = frame; for (i = 0; i < height; i++) - for (j = 0; j < width; j++) - tex32[j + i*stride] = frame32[j + i*pitch]; + memcpy_neon(&tex32[i*stride],&frame32[i*pitch],pitch*sizeof(uint32_t)); } else { @@ -178,8 +180,7 @@ static bool vita2d_gfx_frame(void *data, const void *frame, const uint16_t *frame16 = frame; for (i = 0; i < height; i++) - for (j = 0; j < width; j++) - tex16[j + i*stride] = frame16[j + i*pitch]; + memcpy_neon(&tex16[i*stride],&frame16[i*pitch],width*sizeof(uint16_t)); } } } @@ -700,7 +701,7 @@ static void vita_set_texture_enable(void *data, bool state, bool full_screen) static uintptr_t vita_load_texture(void *video_data, void *data, bool threaded, enum texture_filter_type filter_type) { - unsigned int stride, pitch, j, k; + unsigned int stride, pitch, j; struct texture_image *image = (struct texture_image*)data; struct vita2d_texture *texture = vita2d_create_empty_texture_format(image->width, image->height,SCE_GXM_TEXTURE_FORMAT_U8U8U8U8_ARGB); @@ -717,8 +718,7 @@ static uintptr_t vita_load_texture(void *video_data, void *data, const uint32_t *frame32 = image->pixels; pitch = image->width; for (j = 0; j < image->height; j++) - for (k = 0; k < image->width; k++) - tex32[k + j*stride] = frame32[k + j*pitch]; + memcpy_neon(&tex32[j*stride],&frame32[j*pitch],pitch*sizeof(uint32_t)); return (uintptr_t)texture; } diff --git a/libretro-common/conversion/float_to_s16.c b/libretro-common/conversion/float_to_s16.c index 9ae0fab4f0..9f1fdfeb33 100644 --- a/libretro-common/conversion/float_to_s16.c +++ b/libretro-common/conversion/float_to_s16.c @@ -122,7 +122,7 @@ void convert_float_to_s16_altivec(int16_t *out, } convert_float_to_s16_C(out, in, samples_in); } -#elif defined(__ARM_NEON__) && !defined(VITA) +#elif defined(__ARM_NEON__) void convert_float_s16_asm(int16_t *out, const float *in, size_t samples); /** * convert_float_to_s16_neon: @@ -210,7 +210,7 @@ void convert_float_to_s16_init_simd(void) unsigned cpu = cpu_features_get(); (void)cpu; -#if defined(__ARM_NEON__) && !defined(VITA) +#if defined(__ARM_NEON__) convert_float_to_s16_arm = (cpu & RETRO_SIMD_NEON) ? convert_float_to_s16_neon : convert_float_to_s16_C; #endif diff --git a/libretro-common/conversion/s16_to_float.c b/libretro-common/conversion/s16_to_float.c index 2bc402b1c1..c9040c9d09 100644 --- a/libretro-common/conversion/s16_to_float.c +++ b/libretro-common/conversion/s16_to_float.c @@ -122,7 +122,7 @@ void convert_s16_to_float_altivec(float *out, convert_s16_to_float_C(out, in, samples_in, gain); } -#elif defined(__ARM_NEON__) && !defined(VITA) +#elif defined(__ARM_NEON__) /* Avoid potential hard-float/soft-float ABI issues. */ void convert_s16_float_asm(float *out, const int16_t *in, size_t samples, const float *gain); @@ -235,7 +235,7 @@ void convert_s16_to_float_init_simd(void) unsigned cpu = cpu_features_get(); (void)cpu; -#if defined(__ARM_NEON__) && !defined(VITA) +#if defined(__ARM_NEON__) convert_s16_to_float_arm = (cpu & RETRO_SIMD_NEON) ? convert_s16_to_float_neon : convert_s16_to_float_C; #endif diff --git a/libretro-common/include/conversion/float_to_s16.h b/libretro-common/include/conversion/float_to_s16.h index 3ee1aad112..0867233fcf 100644 --- a/libretro-common/include/conversion/float_to_s16.h +++ b/libretro-common/include/conversion/float_to_s16.h @@ -74,7 +74,7 @@ void convert_float_to_s16_SSE2(int16_t *out, **/ void convert_float_to_s16_altivec(int16_t *out, const float *in, size_t samples); -#elif defined(__ARM_NEON__) && !defined(VITA) +#elif defined(__ARM_NEON__) #define convert_float_to_s16 convert_float_to_s16_arm void (*convert_float_to_s16_arm)(int16_t *out, diff --git a/libretro-common/include/conversion/s16_to_float.h b/libretro-common/include/conversion/s16_to_float.h index b418a61396..46620e518a 100644 --- a/libretro-common/include/conversion/s16_to_float.h +++ b/libretro-common/include/conversion/s16_to_float.h @@ -65,7 +65,7 @@ void convert_s16_to_float_SSE2(float *out, void convert_s16_to_float_altivec(float *out, const int16_t *in, size_t samples, float gain); -#elif defined(__ARM_NEON__) && !defined(VITA) +#elif defined(__ARM_NEON__) #define convert_s16_to_float convert_s16_to_float_arm void (*convert_s16_to_float_arm)(float *out,