diff --git a/griffin/griffin.c b/griffin/griffin.c index fde82465a6..08abfd3cf3 100644 --- a/griffin/griffin.c +++ b/griffin/griffin.c @@ -27,11 +27,6 @@ #include "../msvc/msvc_compat.h" #endif -#ifdef __CELLOS_LV2__ -#include "../mem/altivec/altivec_mem.c" -#endif - - /*============================================================ CONSOLE EXTENSIONS ============================================================ */ diff --git a/mem/altivec/altivec_mem.c b/mem/altivec/altivec_mem.c deleted file mode 100644 index 9a2e837f6d..0000000000 --- a/mem/altivec/altivec_mem.c +++ /dev/null @@ -1,226 +0,0 @@ -/********************************************************************************* - * Copyright (C) 2008-2010 by Konstantinos Margaritis * - * All rights reserved. * - * * - * Redistribution and use in source and binary forms, with or without * - * modification, are permitted provided that the following conditions are met: * - * 1. Redistributions of source code must retain the above copyright * - * notice, this list of conditions and the following disclaimer. * - * 2. Redistributions in binary form must reproduce the above copyright * - * notice, this list of conditions and the following disclaimer in the * - * documentation and/or other materials provided with the distribution. * - * 3. Neither the name of the Codex nor the * - * names of its contributors may be used to endorse or promote products * - * derived from this software without specific prior written permission. * - * * - * THIS SOFTWARE IS PROVIDED BY CODEX ''AS IS'' AND ANY * - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * - * DISCLAIMED. IN NO EVENT SHALL CODEX BE LIABLE FOR ANY * - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - *********************************************************************************/ - -#include -#include -#include "altivec_mem.h" - -void *vec_memcpy(void *dstpp, const void *srcpp, size_t len) -{ - const uint8_t *src = srcpp; - uint8_t *dst = dstpp; - - if (len >= sizeof(word_t)) - { - // Prefetch some stuff - READ_PREFETCH_START1(src); - WRITE_PREFETCH_START2(dst); - - // Copy until dst is word aligned - int al = copy_fwd_until_dst_word_aligned(dst, src), l; - - if (al) - { - src += sizeof(word_t) - al; - dst += sizeof(word_t) - al; - len -= sizeof(word_t) - al; - } - - // Now dst is word aligned. We'll continue by word copying, but - // for this we have to know the word-alignment of src also. - int srcoffset = ((word_t)(src) % sizeof(word_t)), sh_l, sh_r; - sh_l = srcoffset * CHAR_BIT; - sh_r = CHAR_BIT * sizeof(word_t) - sh_l; - - // Take the word-aligned long pointers of src and dest. - word_t *dstl = (word_t *)(dst); - const word_t *srcl = (word_t *)(src - srcoffset); - - if (len >= SIMD_PACKETSIZE) - { - // While we're not 16-byte aligned, move in 4-byte long steps. - - al = (word_t)dstl % SIMD_PACKETSIZE; - if (al) - { - copy_fwd_until_dst_simd_aligned(dstl, srcl, srcoffset, al, sh_l, sh_r); - srcl += (SIMD_PACKETSIZE - al)/WORDS_IN_PACKET; - src = (uint8_t *) srcl + srcoffset; - dstl += (SIMD_PACKETSIZE - al)/WORDS_IN_PACKET; - len -= SIMD_PACKETSIZE - al; - } - - // Now, dst is 16byte aligned. We can use SIMD if len >= 16 - l = len / SIMD_PACKETSIZE; - len -= l * SIMD_PACKETSIZE; - if (((word_t)(src) % SIMD_PACKETSIZE) == 0) - copy_fwd_rest_blocks_aligned(dstl, src, l); - else - copy_fwd_rest_blocks_unaligned(dstl, src, srcoffset, sh_l, sh_r, l); - src += l*SIMD_PACKETSIZE; - dstl += l * WORDS_IN_PACKET; - srcl = (word_t *)(src - srcoffset); - } - // Stop the prefetching - PREFETCH_STOP1; - PREFETCH_STOP2; - //#endif - - // Copy the remaining bytes using word-copying - // Handle alignment as appropriate - l = len / sizeof(word_t); - len -= l * sizeof(word_t); - - if (srcoffset == 0) - { - copy_fwd_rest_words_aligned(dstl, srcl, l); - srcl += l; - src = (uint8_t *) srcl; - } - else - { - copy_fwd_rest_words_unaligned(dstl, srcl, sh_l, sh_r, l); - srcl += l; - src = (uint8_t *) srcl + srcoffset; - } - - dstl += l; - - // For the end copy we have to use char * pointers. - dst = (uint8_t *) dstl; - } - - // Copy the remaining bytes - copy_fwd_rest_bytes(dst, src, len); - - return dstpp; -} - -void *vec_memcpy_aligned(void *dstpp, const void *srcpp, size_t len) -{ - const uint8_t *src = srcpp; - uint8_t *dst = dstpp; - - if (len >= sizeof(word_t)) - { - // Prefetch some stuff - READ_PREFETCH_START1(src); - WRITE_PREFETCH_START2(dst); - - // Take the word-aligned long pointers of src and dest. - word_t *dstl = (word_t *)(dst); - const word_t *srcl = (word_t *)(src); - int l; - -#ifdef LIBFREEVEC_SIMD_ENGINE - if (len >= SIMD_PACKETSIZE) - { - l = len / SIMD_PACKETSIZE; - len -= l * SIMD_PACKETSIZE; - // Now, dst is 16byte aligned. We can use SIMD if len >= 16 - copy_fwd_rest_blocks_aligned(dstl, src, l); - } -#endif - - // Copy the remaining bytes using word-copying - // Handle alignment as appropriate - l = len / sizeof(word_t); - copy_fwd_rest_words_aligned(dstl, srcl, l); - srcl += l; - dstl += l; - len -= l * sizeof(word_t); - // For the end copy we have to use char * pointers. - src = (uint8_t *) srcl; - dst = (uint8_t *) dstl; - } - - // Stop the prefetching - PREFETCH_STOP1; - - PREFETCH_STOP2; - - // Copy the remaining bytes - copy_fwd_rest_bytes(dst, src, len); - - return dstpp; -} - -void *vec_memset(void *s, int p, size_t len) -{ - uint8_t* ptr = s; - uint8_t __attribute__ ((aligned(16))) P = p; - if (len >= sizeof(word_t)) - { - word_t pw = charmask(P); - - size_t al = ((size_t)ptr) % sizeof(word_t); - if (al) - { - memset_fwd_until_dst_word_aligned(ptr, P, al); - ptr += sizeof(word_t) - al; - len -= sizeof(word_t) - al; - } - - int l; - word_t *ptr_w = (word_t *)(ptr); - if (len >= SIMD_PACKETSIZE) - { - // ptr is now word (32/64bit) aligned, memset until ptr is SIMD aligned - al = (word_t) ptr_w % SIMD_PACKETSIZE; - if (al) - { - memset_fwd_until_simd_aligned(ptr_w, pw, al); - ptr_w += (SIMD_PACKETSIZE - al)/WORDS_IN_PACKET; - len -= SIMD_PACKETSIZE - al; - } - // ptr is now 128-bit aligned - // perform set using SIMD - - l = len / SIMD_PACKETSIZE; - len -= l * SIMD_PACKETSIZE; - memset_set_blocks(ptr_w, pw, P, l); - ptr_w += l * WORDS_IN_PACKET; - } - // memset the remaining words - l = len / sizeof(word_t); - len -= l * sizeof(word_t); - memset_rest_words(ptr_w, pw, l); - ptr_w += l; - ptr = (uint8_t *)ptr_w; - } - // Handle the remaining bytes - switch(len) - { - case 3: - *ptr++ = P; - case 2: - *ptr++ = P; - case 1: - *ptr++ = P; - } - return s; -} diff --git a/mem/altivec/altivec_mem.h b/mem/altivec/altivec_mem.h deleted file mode 100644 index 3b22113c9f..0000000000 --- a/mem/altivec/altivec_mem.h +++ /dev/null @@ -1,446 +0,0 @@ -/********************************************************************************* - * Copyright (C) 2008-2010 by Konstantinos Margaritis * - * All rights reserved. * - * * - * Redistribution and use in source and binary forms, with or without * - * modification, are permitted provided that the following conditions are met: * - * 1. Redistributions of source code must retain the above copyright * - * notice, this list of conditions and the following disclaimer. * - * 2. Redistributions in binary form must reproduce the above copyright * - * notice, this list of conditions and the following disclaimer in the * - * documentation and/or other materials provided with the distribution. * - * 3. Neither the name of the Codex nor the * - * names of its contributors may be used to endorse or promote products * - * derived from this software without specific prior written permission. * - * * - * THIS SOFTWARE IS PROVIDED BY CODEX ''AS IS'' AND ANY * - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * - * DISCLAIMED. IN NO EVENT SHALL CODEX BE LIABLE FOR ANY * - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - *********************************************************************************/ - -/* $Id$ */ - -#ifndef ALTIVEC_MEM_H -#define ALTIVEC_MEM_H - -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined(__SNC__) -#include -#else -#include -#endif - -#define himagic 0x80808080L -#define lomagic 0x01010101L -#define magic_bits32 0x07efefeff -#define magic_bits64 (((unsigned long int) 0x7efefefe << 32) | 0xfefefeff) - -#define charmask8(c) ((uint8_t)(c & 0xff)) -#define charmask16(c) (uint16_t)((charmask8(c)) | (charmask8(c) << 8)) -#define charmask32(c) (uint32_t)((charmask16(c)) | (charmask16(c) << 16)) -#define charmask64(c) (uint64_t)((charmask32(c)) | (charmask32(c) << 32)) - -#define QMAKESTR(x) #x -#define MAKESTR(x) QMAKESTR(x) -#define SMASH(x,y) x/y -#define MAKEINC(x) SMASH(x,MACROFILE) - -#define LIBFREEVEC_SIMD_MACROS_INC MAKEINC(LIBFREEVEC_SIMD_ENGINE) - -#define ptrdiff_t(a, b) ((word_t)(a)-(word_t)(b)) -#define CMP_LT_OR_GT(a, b) ((a) - (b)) - -#ifndef MIN -#define MIN(a,b) ((a) <= (b) ? (a) : (b)) -#endif - -#define DIFF(a, b) ((a)-(b)) - -#ifdef LSB_FIRST -#define MERGE_SHIFTED_WORDS(a, b, sl, sr) ((a) >> sl) | ((b) << sr) -#else -#define MERGE_SHIFTED_WORDS(a, b, sl, sr) ((a) << sl) | ((b) >> sr) -#endif - -#define DST_CHAN_1 1 -#define DST_CHAN_2 2 - -#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride)) - -//#define READ_PREFETCH_START1(addr) vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN_1) -//#define READ_PREFETCH_START2(addr) vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN_2) -//#define WRITE_PREFETCH_START1(addr) vec_dststt(addr, DST_CTRL(2,2,32), DST_CHAN_1) -//#define WRITE_PREFETCH_START2(addr) vec_dststt(addr, DST_CTRL(2,2,32), DST_CHAN_2) -//#define PREFETCH_STOP1 vec_dss(DST_CHAN_1) -//#define PREFETCH_STOP2 vec_dss(DST_CHAN_2) -#define READ_PREFETCH_START1(addr) -#define READ_PREFETCH_START2(addr) -#define WRITE_PREFETCH_START1(addr) -#define WRITE_PREFETCH_START2(addr) -#define PREFETCH_STOP1 -#define PREFETCH_STOP2 - -//#define LINUX64 - -#ifdef LINUX64 - -#define word_t uint64_t -#define LIBFREEVEC_SCALAR_MACROS_INC MAKEINC(scalar64) -#define charmask(c) charmask64(c) - -#define SIMD_PACKETSIZE 16 -#define WORDS_IN_PACKET 2 - -static inline int copy_fwd_until_dst_word_aligned(uint8_t *d, const uint8_t *s) -{ - int dstal = ((word_t)d) % sizeof(word_t); - - switch (dstal) - { - case 1: - *d++ = *s++; - - case 2: - *d++ = *s++; - - case 3: - *d++ = *s++; - - case 4: - *d++ = *s++; - - case 5: - *d++ = *s++; - - case 6: - *d++ = *s++; - - case 7: - *d = *s; - } - - return dstal; -} - -static inline void copy_fwd_rest_bytes(uint8_t *d, const uint8_t *s, size_t len) -{ - switch (len) - { - case 7: - *d++ = *s++; - - case 6: - *d++ = *s++; - - case 5: - *d++ = *s++; - - case 4: - *d++ = *s++; - - case 3: - *d++ = *s++; - - case 2: - *d++ = *s++; - - case 1: - *d = *s; - } -} - -static inline void copy_fwd_rest_words_aligned(word_t *d, const word_t *s, size_t l) -{ - while (l > 0) - { - *d++ = *s++; - l--; - } -} - -static inline void copy_fwd_rest_words_unaligned(word_t *d, const word_t *s, int sl, int sr, size_t l) -{ - while (l > 0) - { - *d++ = MERGE_SHIFTED_WORDS(*(s), *(s + 1), sl, sr); s++; - l--; - } -} - -static inline void copy_fwd_until_dst_simd_aligned(word_t *d, const word_t *s, - int srcoffset4, size_t dstal, int sl, int sr) -{ - if (srcoffset4 == 0) - { - if (dstal == 8) - *d++ = *s++; - } - else - { - if (dstal == 8) - { - *d = MERGE_SHIFTED_WORDS(*(s), *(s + 1), sl, sr); - d++; s++; - } - } -} - -static inline void memset_fwd_until_dst_word_aligned(uint8_t *ptr, uint8_t c, size_t al) -{ - switch(al) - { - case 1: - *ptr++ = c; - case 2: - *ptr++ = c; - case 3: - *ptr++ = c; - } -} - -static inline void memset_fwd_until_simd_aligned(word_t *ptr_w, word_t w, size_t al) -{ - switch (al) - { - case 4: - *ptr_w++ = w; - case 8: - *ptr_w++ = w; - case 12: - *ptr_w++ = w; - } -} - - -#else - -#define word_t uint32_t -#define LIBFREEVEC_SCALAR_MACROS_INC MAKEINC(scalar32) -#define charmask(c) charmask32(c) - -#define SIMD_PACKETSIZE 16 -#define WORDS_IN_PACKET 4 - -static inline int copy_fwd_until_dst_word_aligned(uint8_t *d, const uint8_t *s) -{ - size_t dstal = ((size_t)d) % sizeof(word_t); - - switch (dstal) - { - case 1: - *d++ = *s++; - case 2: - *d++ = *s++; - case 3: - *d = *s; - } - - return dstal; -} - -static inline void copy_fwd_rest_bytes(uint8_t *d, const uint8_t *s, size_t len) -{ - switch (len) - { - case 3: - *d++ = *s++; - case 2: - *d++ = *s++; - case 1: - *d++ = *s++; - } -} - -static inline void copy_fwd_rest_words_aligned(word_t *d, const word_t *s, size_t l) -{ - while (l > 0) - { - *d++ = *s++; - l--; - } -} - -static inline void copy_fwd_rest_words_unaligned(word_t *d, const word_t *s, int sl, int sr, size_t l) -{ - while (l > 0) - { - *d++ = MERGE_SHIFTED_WORDS(*(s), *(s + 1), sl, sr); s++; - l--; - } -} - -static inline void copy_fwd_until_dst_simd_aligned(word_t *d, const word_t *s, - int srcoffset4, size_t dstal, int sl, int sr) -{ - if (srcoffset4 == 0) - { - switch (dstal) - { - case 4: - *d++ = *s++; - case 8: - *d++ = *s++; - case 12: - *d++ = *s++; - } - } - else - { - switch (dstal) - { - case 4: - *d = MERGE_SHIFTED_WORDS(*(s), *(s + 1), sl, sr); - d++; s++; - case 8: - *d = MERGE_SHIFTED_WORDS(*(s), *(s + 1), sl, sr); - d++; s++; - case 12: - *d = MERGE_SHIFTED_WORDS(*(s), *(s + 1), sl, sr); - d++; s++; - } - } -} - -static inline void memset_fwd_until_dst_word_aligned(uint8_t *ptr, uint8_t c, size_t al) -{ - switch(al) - { - case 1: - *ptr++ = c; - case 2: - *ptr++ = c; - case 3: - *ptr++ = c; - } -} - -static inline void memset_fwd_until_simd_aligned(word_t *ptr_w, word_t w, size_t al) -{ - switch (al) - { - case 4: - *ptr_w++ = w; - case 8: - *ptr_w++ = w; - case 12: - *ptr_w++ = w; - } -} -#endif - -static inline void memset_rest_words(word_t *ptr_w, word_t w, size_t l) -{ - while (l--) - *ptr_w++ = w; -} - -static inline void copy_fwd_rest_blocks_aligned(word_t *d, const uint8_t *s, size_t blocks) -{ - __vector unsigned char v1, v2, v3, v4; - // Unroll blocks of 4 words - while (blocks > 4) - { - v1 = vec_ld(0, s); - v2 = vec_ld(16, s); - v3 = vec_ld(32, s); - v4 = vec_ld(48, s); - vec_st(v1, 0, (uint8_t *)d); - vec_st(v2, 16, (uint8_t *)d); - vec_st(v3, 32, (uint8_t *)d); - vec_st(v4, 48, (uint8_t *)d); - d += 16; s += 4 * SIMD_PACKETSIZE; - blocks -= 4; - } - - while (blocks > 0) - { - v1 = vec_ld(0, s); - vec_st(v1, 0, (uint8_t *)d); - d += 4; s += SIMD_PACKETSIZE; - blocks--; - } -} - -static inline void copy_fwd_rest_blocks_unaligned(word_t *d, const uint8_t *s, int srcoffset, int sl, int sr, size_t blocks) -{ - __vector unsigned char mask, MSQ1, LSQ1, LSQ2, LSQ3, LSQ4; - mask = vec_lvsl(0, s); - - // Unroll blocks of 4 words - while (blocks > 4) - { - MSQ1 = vec_ld(0, s); - LSQ1 = vec_ld(15, s); - LSQ2 = vec_ld(31, s); - LSQ3 = vec_ld(47, s); - LSQ4 = vec_ld(63, s); - vec_st(vec_perm(MSQ1, LSQ1, mask), 0, (uint8_t *)d); - vec_st(vec_perm(LSQ1, LSQ2, mask), 16, (uint8_t *)d); - vec_st(vec_perm(LSQ2, LSQ3, mask), 32, (uint8_t *)d); - vec_st(vec_perm(LSQ3, LSQ4, mask), 48, (uint8_t *)d); - d += 16; s += 4 * SIMD_PACKETSIZE; - blocks -= 4; - } - - while (blocks > 0) - { - MSQ1 = vec_ld(0, s); - LSQ1 = vec_ld(15, s); - vec_st(vec_perm(MSQ1, LSQ1, mask), 0, (uint8_t *)d); - d += 4; s += SIMD_PACKETSIZE; - blocks--; - } -} - -static inline __vector unsigned char simdpacket_set_from_byte(const uint8_t c) -{ - __vector unsigned char v = vec_lde(0, &c); - return vec_splat(v, 0); -} - -static inline void memset_set_blocks(word_t *ptr_w, word_t pw, uint8_t c, size_t blocks) -{ - __vector unsigned char vc = simdpacket_set_from_byte(c); - while (blocks > 4) - { - vec_st(vc, 0, (uint8_t *)ptr_w); - vec_st(vc, 16, (uint8_t *)ptr_w); - vec_st(vc, 32, (uint8_t *)ptr_w); - vec_st(vc, 48, (uint8_t *)ptr_w); - ptr_w += 4 * WORDS_IN_PACKET; - blocks -= 4; - } - - while (blocks--) - { - vec_st(vc, 0, (uint8_t *)ptr_w); - ptr_w += WORDS_IN_PACKET; - } -} - -extern void *vec_memcpy(void *dstpp, const void *srcpp, size_t len); -extern void *vec_memcpy_aligned(void *dstpp, const void *srcpp, size_t len); -extern void *vec_memset(void *s, int p, size_t len); - -#ifdef __cplusplus -} -#endif - -#define memcpy(dest, src, n) vec_memcpy(dest, src, n) -#define memset(s, c, n) vec_memset(s, c, n) - -#endif diff --git a/ps3/gcmgl/src/rgl_ps3.cpp b/ps3/gcmgl/src/rgl_ps3.cpp index 8801d13546..8badba96e0 100644 --- a/ps3/gcmgl/src/rgl_ps3.cpp +++ b/ps3/gcmgl/src/rgl_ps3.cpp @@ -4,8 +4,6 @@ #include -#include "../../../mem/altivec/altivec_mem.h" - #include "include/GmmAlloc.h" #include "include/rgl-typedefs.h" #include "include/rgl-inline.h" diff --git a/ps3/gcmgl/src/rgl_ps3_raster.cpp b/ps3/gcmgl/src/rgl_ps3_raster.cpp index 109640c485..8b472f4468 100644 --- a/ps3/gcmgl/src/rgl_ps3_raster.cpp +++ b/ps3/gcmgl/src/rgl_ps3_raster.cpp @@ -2,8 +2,6 @@ #include -#include "../../../mem/altivec/altivec_mem.h" - #include "include/GmmAlloc.h" #include "include/rgl-typedefs.h" #include "include/rgl-inline.h"