/* Math-NEON: Neon Optimised Math Library based on cmath Contact: lachlan.ts@gmail.com Copyright (C) 2009 Lachlan Tychsen - Smith aka Adventus This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #ifdef WIN32 #include #else #include #include #endif #define randf() (rand() / (RAND_MAX + 1.0f)) void LOG(const char *format, ...) { __gnuc_va_list arg; va_start(arg, format); char msg[512]; vsprintf(msg, format, arg); va_end(arg); FILE *log = fopen("ux0:/data/mathneon.log", "a+"); if (log != NULL) { fwrite(msg, 1, strlen(msg), log); fclose(log); } } struct test1_s { const char* name; float (*func)(float); //the function float (*bench)(float); //the function to benchmark against. float rng0, rng1; int num; float emaxabs; float xmaxabs; float emaxrel; float xmaxrel; float erms; int time; //time to execute num functions; }; struct test2_s { const char* name; float (*func)(float, float); //the function float (*bench)(float, float); //the function to benchmark against. float rng0, rng1; int num; float emaxabs; float xmaxabs; float emaxrel; float xmaxrel; float erms; int time; //time to execute num functions; }; float invsqrtf(float x){ return (1.0f / sqrtf(x)); } typedef struct test1_s test1_t; typedef struct test2_s test2_t; test1_t test1[51] = { {"sinf ", sinf, sinf, -M_PI, M_PI, 500000}, {"sinf_c ", sinf_c, sinf, -M_PI, M_PI, 500000}, {"sinf_neon ", sinf_neon, sinf, -M_PI, M_PI, 500000}, {"cosf ", cosf, cosf, -M_PI, M_PI, 500000}, {"cosf_c ", cosf_c, cosf, -M_PI, M_PI, 500000}, {"cosf_neon ", cosf_neon, cosf, -M_PI, M_PI, 500000}, {"tanf ", tanf, tanf, -M_PI_4, M_PI_4, 500000, 0, 0, 0}, {"tanf_c ", tanf_c, tanf, -M_PI_4, M_PI_4, 500000, 0, 0, 0}, {"tanf_neon ", tanf_neon, tanf, -M_PI_4, M_PI_4, 500000, 0, 0, 0}, {"asinf ", asinf, asinf, -1, 1, 500000, 0, 0, 0}, {"asinf_c ", asinf_c, asinf, -1, 1, 500000, 0, 0, 0}, {"asinf_neon ", asinf_neon, asinf, -1, 1, 500000, 0, 0, 0}, {"acosf ", acosf, acosf, -1, 1, 500000, 0, 0, 0}, {"acosf_c ", acosf_c, acosf, -1, 1, 500000, 0, 0, 0}, {"acosf_neon ", acosf_neon, acosf, -1, 1, 500000, 0, 0, 0}, {"atanf ", atanf, atanf, -1, 1, 500000, 0, 0, 0}, {"atanf_c ", atanf_c, atanf, -1, 1, 500000, 0, 0, 0}, {"atanf_neon ", atanf_neon, atanf, -1, 1, 500000, 0, 0, 0}, {"sinhf ", sinhf, sinhf, -M_PI, M_PI, 500000, 0, 0, 0}, {"sinhf_c ", sinhf_c, sinhf, -M_PI, M_PI, 500000, 0, 0, 0}, {"sinhf_neon ", sinhf_neon, sinhf, -M_PI, M_PI, 500000, 0, 0, 0}, {"coshf ", coshf, coshf, -M_PI, M_PI, 500000, 0, 0, 0}, {"coshf_c ", coshf_c, coshf, -M_PI, M_PI, 500000, 0, 0, 0}, {"coshf_neon ", coshf_neon, coshf, -M_PI, M_PI, 500000, 0, 0, 0}, {"tanhf ", tanhf, tanhf, -M_PI, M_PI, 500000, 0, 0, 0}, {"tanhf_c ", tanhf_c, tanhf, -M_PI, M_PI, 500000, 0, 0, 0}, {"tanhf_neon ", tanhf_neon, tanhf, -M_PI, M_PI, 500000, 0, 0, 0}, {"expf ", expf, expf, 0, 10, 500000, 0, 0, 0}, {"expf_c ", expf_c, expf, 0, 10, 500000, 0, 0, 0}, {"expf_neon ", expf_neon, expf, 0, 10, 500000, 0, 0, 0}, {"logf ", logf, logf, 1, 1000, 500000, 0, 0, 0}, {"logf_c ", logf_c, logf, 1, 1000, 500000, 0, 0, 0}, {"logf_neon ", logf_neon, logf, 1, 1000, 500000, 0, 0, 0}, {"log10f ", log10f, log10f, 1, 1000, 500000, 0, 0, 0}, {"log10f_c ", log10f_c, log10f, 1, 1000, 500000, 0, 0, 0}, {"log10f_neon ", log10f_neon,log10f, 1, 1000, 500000, 0, 0, 0}, {"floorf ", floorf, floorf, 1, 1000, 5000000, 0, 0, 0}, {"floorf_c ", floorf_c, floorf, 1, 1000, 5000000, 0, 0, 0}, {"floorf_neon", floorf_neon,floorf, 1, 1000, 5000000, 0, 0, 0}, {"ceilf ", ceilf, ceilf, 1, 1000, 5000000, 0, 0, 0}, {"ceilf_c ", ceilf_c, ceilf, 1, 1000, 5000000, 0, 0, 0}, {"ceilf_neon", ceilf_neon, ceilf, 1, 1000, 5000000, 0, 0, 0}, {"fabsf ", fabsf, fabsf, 1, 1000, 5000000, 0, 0, 0}, {"fabsf_c ", fabsf_c, fabsf, 1, 1000, 5000000, 0, 0, 0}, {"fabsf_neon", fabsf_neon, fabsf, 1, 1000, 5000000, 0, 0, 0}, {"sqrtf ", sqrtf, sqrtf, 1, 1000, 500000, 0, 0, 0}, {"sqrtf_c ", sqrtf_c, sqrtf, 1, 1000, 500000, 0, 0, 0}, {"sqrtf_neon ", sqrtf_neon, sqrtf, 1, 1000, 500000, 0, 0, 0}, {"invsqrtf ", invsqrtf, invsqrtf, 1, 1000, 500000, 0, 0, 0}, {"invsqrtf_c ", invsqrtf_c, invsqrtf, 1, 1000, 500000, 0, 0, 0}, {"invsqrtf_neon ", invsqrtf_neon, invsqrtf, 1, 1000, 500000, 0, 0, 0}, }; test2_t test2[9] = { {"atan2f ", atan2f, atan2f, 0.1, 10, 10000, 0, 0, 0}, {"atan2f_c ", atan2f_c, atan2f, 0.1, 10, 10000, 0, 0, 0}, {"atan2f_neon ", atan2f_neon,atan2f, 0.1, 10, 10000, 0, 0, 0}, {"powf ", powf, powf, 1, 10, 10000, 0, 0, 0}, {"powf_c ", powf_c, powf, 1, 10, 10000, 0, 0, 0}, {"powf_neon ", powf_neon, powf, 1, 10, 10000, 0, 0, 0}, {"fmodf ", fmodf, fmodf, 1, 10, 10000, 0, 0, 0}, {"fmodf_c ", fmodf_c, fmodf, 1, 10, 10000, 0, 0, 0}, {"fmodf_neon ", fmodf_neon, fmodf, 1, 10, 10000, 0, 0, 0}, }; void test_mathfunc1(test1_t *tst) { float x; float dx = (tst->rng1 - tst->rng0) / ((float)tst->num); #ifndef WIN32 struct rusage ru; #endif tst->emaxabs = tst->xmaxabs = 0; tst->emaxrel = tst->xmaxrel = 0; tst->erms = 0; for(x = tst->rng0; x < tst->rng1 ; x += dx){ float r = (tst->func)((float)x); float rr = (tst->bench)((float)x); float dr = fabs(r - rr); float drr = dr * (100.0f / rr); tst->erms += dr*dr; if (dr > tst->emaxabs){ tst->emaxabs = dr; tst->xmaxabs = x; } if (drr > tst->emaxrel){ tst->emaxrel = drr; tst->xmaxrel = x; } } tst->erms = sqrt(tst->erms / ((float) tst->num)); #ifdef WIN32 tst->time = (1000 * clock()) / (CLOCKS_PER_SEC / 1000); #else tst->time = sceKernelGetSystemTimeWide(); #endif for(x = tst->rng0; x < tst->rng1 ; x += dx){ (tst->func)((float)x); } #ifdef WIN32 tst->time = (1000 * clock()) / (CLOCKS_PER_SEC / 1000) - tst->time; #else tst->time = sceKernelGetSystemTimeWide(); #endif } void test_mathfunc2(test2_t *tst) { float x, y; float rng = tst->rng1 - tst->rng0; float d = (rng * rng) / ((float) tst->num); #ifndef WIN32 struct rusage ru; #endif tst->emaxabs = tst->xmaxabs = 0; tst->emaxrel = tst->xmaxrel = 0; for(y = (tst->rng0); y < (tst->rng1) ; y += d){ for(x = (tst->rng0); x < (tst->rng1); x += d){ float r = (tst->func)((float)x, y); float rr = (tst->bench)((float)x, y); float dr = fabs(r - rr); float drr = dr * (100.0f / rr); if (dr > tst->emaxabs){ tst->emaxabs = dr; tst->xmaxabs = x; } if (drr > tst->emaxrel && fabsf(rr) > 0.0001){ tst->emaxrel = drr; tst->xmaxrel = x; } } } #ifdef WIN32 tst->time = (1000 * clock()) / (CLOCKS_PER_SEC / 1000) ; #else tst->time = sceKernelGetSystemTimeWide(); #endif for(y = tst->rng0; y < tst->rng1 ; y += d){ for(x = tst->rng0; x < tst->rng1 ; x += d){ (tst->func)((float)x, (float)y); } } #ifdef WIN32 tst->time = (1000 * clock()) / (CLOCKS_PER_SEC / 1000) - tst->time; #else tst->time = sceKernelGetSystemTimeWide(); #endif } void test_vectorfunc() { float v0[4], v1[4], d[4]; for(int i=0;i<4;i++) { v0[i] = 10*randf() - 5; v1[i] = 10*randf() - 5; d[i] = 10*randf() - 5; } int testnum = 5000000; struct rusage ru; int v2t[3], v3t[3], v4t[3]; float r; LOG("\n"); //dot 2 v2t[0] = sceKernelGetSystemTimeWide(); for(int i=0;i < testnum; i++) { r = dot2_c(v0, v1); }; v2t[1] = sceKernelGetSystemTimeWide(); for(int i=0;i < testnum; i++) { r = dot2_neon(v0, v1); }; v2t[2] = sceKernelGetSystemTimeWide(); r = dot2_c(v0, v1); LOG("dot2_c = %f\n", r); r = dot2_neon(v0, v1); LOG("dot2_neon = %f\n", r); LOG("dot2: c=%i \t neon=%i \t rate=%.2f \n", v2t[1] - v2t[0], v2t[2] - v2t[1], (float)(v2t[1] - v2t[0]) / (float)(v2t[2] - v2t[1])); //normalize 2 v2t[0] = sceKernelGetSystemTimeWide(); for(int i=0;i < testnum; i++) { normalize2_c(v0, d); }; v2t[1] = sceKernelGetSystemTimeWide(); for(int i=0;i < testnum; i++) { normalize2_neon(v0, d); }; v2t[2] = sceKernelGetSystemTimeWide(); normalize2_c(v0, d); LOG("normalize2_c = [%.2f, %.2f]\n", d[0], d[1]); normalize2_neon(v0, d); LOG("normalize2_neon = [%.2f, %.2f]\n", d[0], d[1]); LOG("normalize2: c=%i \t neon=%i \t rate=%.2f \n", v2t[1] - v2t[0], v2t[2] - v2t[1], (float)(v2t[1] - v2t[0]) / (float)(v2t[2] - v2t[1])); LOG("\n"); //dot 3 v3t[0] = sceKernelGetSystemTimeWide(); for(int i=0;i < testnum; i++) { r = dot3_c(v0, v1); }; v3t[1] = sceKernelGetSystemTimeWide(); for(int i=0;i < testnum; i++) { r = dot3_neon(v0, v1); }; v3t[2] = sceKernelGetSystemTimeWide(); r = dot3_c(v0, v1); LOG("dot3_c = %f\n", r); r = dot3_neon(v0, v1); LOG("dot3_neon = %f\n", r); LOG("dot3: c=%i \t neon=%i \t rate=%.2f \n", v3t[1] - v3t[0], v3t[2] - v3t[1], (float)(v3t[1] - v3t[0]) / (float)(v3t[2] - v3t[1])); //normalize 3 v3t[0] = sceKernelGetSystemTimeWide(); for(int i=0;i < testnum; i++) { normalize3_c(v0, d); }; v3t[1] = sceKernelGetSystemTimeWide(); for(int i=0;i < testnum; i++) { normalize3_neon(v0, d); }; v3t[2] = sceKernelGetSystemTimeWide(); normalize3_c(v0, d); LOG("normalize3_c = [%.2f, %.2f, %.2f]\n", d[0], d[1], d[2]); normalize3_neon(v0, d); LOG("normalize3_neon = [%.2f, %.2f, %.2f]\n", d[0], d[1], d[2]); LOG("normalize3: c=%i \t neon=%i \t rate=%.2f \n", v3t[1] - v3t[0], v3t[2] - v3t[1], (float)(v3t[1] - v3t[0]) / (float)(v3t[2] - v3t[1])); //cross 3 v3t[0] = sceKernelGetSystemTimeWide(); for(int i=0;i < testnum; i++) { cross3_c(v0, v1, d); }; v3t[1] = sceKernelGetSystemTimeWide(); for(int i=0;i < testnum; i++) { cross3_neon(v0, v1, d); }; v3t[2] = sceKernelGetSystemTimeWide(); cross3_c(v0, v1, d); LOG("cross3_c = [%.2f, %.2f, %.2f]\n", d[0], d[1], d[2]); cross3_neon(v0, v1, d); LOG("cross3_neon = [%.2f, %.2f, %.2f]\n", d[0], d[1], d[2]); LOG("cross3: c=%i \t neon=%i \t rate=%.2f \n", v3t[1] - v3t[0], v3t[2] - v3t[1], (float)(v3t[1] - v3t[0]) / (float)(v3t[2] - v3t[1])); LOG("\n"); //dot 4 v4t[0] = sceKernelGetSystemTimeWide(); for(int i=0;i < testnum; i++) { r = dot4_c(v0, v1); }; v4t[1] = sceKernelGetSystemTimeWide(); for(int i=0;i < testnum; i++) { r = dot4_neon(v0, v1); }; v4t[2] = sceKernelGetSystemTimeWide(); r = dot4_c(v0, v1); LOG("dot4_c = %f\n", r); r = dot4_neon(v0, v1); LOG("dot4_neon = %f\n", r); LOG("dot4: c=%i \t neon=%i \t rate=%.2f \n", v4t[1] - v4t[0], v4t[2] - v4t[1], (float)(v4t[1] - v4t[0]) / (float)(v4t[2] - v4t[1])); //normalize 4 v4t[0] = sceKernelGetSystemTimeWide(); for(int i=0;i < testnum; i++) { normalize4_c(v0, d); }; v4t[1] = sceKernelGetSystemTimeWide(); for(int i=0;i < testnum; i++) { normalize4_neon(v0, d); }; v4t[2] = sceKernelGetSystemTimeWide(); normalize4_c(v0, d); LOG("normalize4_c = [%.2f, %.2f, %.2f, %.2f]\n", d[0], d[1], d[2], d[3]); normalize4_neon(v0, d); LOG("normalize4_neon = [%.2f, %.2f, %.2f, %.2f]\n", d[0], d[1], d[2], d[3]); LOG("normalize4: c=%i \t neon=%i \t rate=%.2f \n", v4t[1] - v4t[0], v4t[2] - v4t[1], (float)(v4t[1] - v4t[0]) / (float)(v4t[2] - v4t[1])); LOG("\n"); } void test_matrixfunc() { float m0[16], m1[16], m2[16]; int m2t[3], m3t[3], m4t[3]; int i; int testnum = 1000000; struct rusage ru; for(int i=0;i<16;i++) { m0[i] = 10.0f * randf() - 5.0f; m1[i] = 10.0f * randf() - 5.0f; m2[i] = 10.0f * randf() - 5.0f; } //matmul2 m2t[0] = sceKernelGetSystemTimeWide(); for(i = 0; i < testnum; i++){ matmul2_c(m0, m1, m2); } m2t[1] = sceKernelGetSystemTimeWide(); for(i = 0; i < testnum; i++){ matmul2_neon(m0, m1, m2); } m2t[2] = sceKernelGetSystemTimeWide(); matmul2_c(m0, m1, m2); LOG("matmul2_c = \n"); LOG("\t\t\t|%.2f, %.2f|\n", m2[0], m2[2]); LOG("\t\t\t|%.2f, %.2f|\n", m2[1], m2[3]); matmul2_neon(m0, m1, m2); LOG("matmul2_neon = \n"); LOG("\t\t\t|%.2f, %.2f|\n", m2[0], m2[2]); LOG("\t\t\t|%.2f, %.2f|\n", m2[1], m2[3]); LOG("matmul2: c=%i \t neon=%i \t rate=%.2f \n", m2t[1] - m2t[0], m2t[2] - m2t[1], (float)(m2t[1] - m2t[0]) / (float)(m2t[2] - m2t[1])); //matvec2 m2t[0] = sceKernelGetSystemTimeWide(); for(i = 0; i < testnum; i++){ matvec2_c(m0, m1, m2); } m2t[1] = sceKernelGetSystemTimeWide(); for(i = 0; i < testnum; i++){ matvec2_neon(m0, m1, m2); } m2t[2] = sceKernelGetSystemTimeWide(); memset(m2, 0, 4*sizeof(float)); matvec2_c(m0, m1, m2); LOG("matvec2_c = |%.2f, %.2f|\n", m2[0], m2[1]); memset(m2, 0, 4*sizeof(float)); matvec2_neon(m0, m1, m2); LOG("matvec2_neon = |%.2f, %.2f|\n", m2[0], m2[1]); LOG("matvec2: c=%i \t neon=%i \t rate=%.2f \n", m2t[1] - m2t[0], m2t[2] - m2t[1], (float)(m2t[1] - m2t[0]) / (float)(m2t[2] - m2t[1])); //MAT3 m3t[0] = sceKernelGetSystemTimeWide(); for(i = 0; i < testnum; i++){ matmul3_c(m0, m1, m2); } m3t[1] = sceKernelGetSystemTimeWide(); for(i = 0; i < testnum; i++){ matmul3_neon(m0, m1, m2); } m3t[2] = sceKernelGetSystemTimeWide(); memset(m2, 0, 9*sizeof(float)); matmul3_c(m0, m1, m2); LOG("matmul3_c =\n"); LOG("\t\t\t|%.2f, %.2f, %.2f|\n", m2[0], m2[3], m2[6]); LOG("\t\t\t|%.2f, %.2f, %.2f|\n", m2[1], m2[4], m2[7]); LOG("\t\t\t|%.2f, %.2f, %.2f|\n", m2[2], m2[5], m2[8]); memset(m2, 0, 9*sizeof(float)); matmul3_neon(m0, m1, m2); LOG("matmul3_neon =\n"); LOG("\t\t\t|%.2f, %.2f, %.2f|\n", m2[0], m2[3], m2[6]); LOG("\t\t\t|%.2f, %.2f, %.2f|\n", m2[1], m2[4], m2[7]); LOG("\t\t\t|%.2f, %.2f, %.2f|\n", m2[2], m2[5], m2[8]); LOG("matmul3: c=%i \t neon=%i \t rate=%.2f \n", m3t[1] - m3t[0], m3t[2] - m3t[1], (float)(m3t[1] - m3t[0]) / (float)(m3t[2] - m3t[1])); //matvec3 m3t[0] = sceKernelGetSystemTimeWide(); for(i = 0; i < testnum; i++){ matvec3_c(m0, m1, m2); } m3t[1] = sceKernelGetSystemTimeWide(); for(i = 0; i < testnum; i++){ matvec3_neon(m0, m1, m2); } m3t[2] = sceKernelGetSystemTimeWide(); memset(m2, 0, 4*sizeof(float)); matvec3_c(m0, m1, m2); LOG("matvec3_c = |%.2f, %.2f, %.2f|\n", m2[0], m2[1], m2[2]); memset(m2, 0, 4*sizeof(float)); matvec3_neon(m0, m1, m2); LOG("matvec3_neon = |%.2f, %.2f, %.2f|\n", m2[0], m2[1], m2[2]); LOG("matvec3: c=%i \t neon=%i \t rate=%.2f \n", m3t[1] - m3t[0], m3t[2] - m3t[1], (float)(m3t[1] - m3t[0]) / (float)(m3t[2] - m3t[1])); //MAT4 m4t[0] = sceKernelGetSystemTimeWide(); for(i = 0; i < testnum; i++){ matmul4_c(m0, m1, m2); } m4t[1] = sceKernelGetSystemTimeWide(); for(i = 0; i < testnum; i++){ matmul4_neon(m0, m1, m2); } m4t[2] = sceKernelGetSystemTimeWide(); memset(m2, 0, 16*sizeof(float)); matmul4_c(m0, m1, m2); LOG("matmul4_c =\n"); LOG("\t\t\t|%.2f, %.2f, %.2f, %.2f|\n", m2[0], m2[4], m2[8], m2[12]); LOG("\t\t\t|%.2f, %.2f, %.2f, %.2f|\n", m2[1], m2[5], m2[9], m2[13]); LOG("\t\t\t|%.2f, %.2f, %.2f, %.2f|\n", m2[2], m2[6], m2[10], m2[14]); LOG("\t\t\t|%.2f, %.2f, %.2f, %.2f|\n", m2[3], m2[7], m2[11], m2[15]); memset(m2, 0, 16*sizeof(float)); matmul4_neon(m0, m1, m2); LOG("matmul4_neon =\n"); LOG("\t\t\t|%.2f, %.2f, %.2f, %.2f|\n", m2[0], m2[4], m2[8], m2[12]); LOG("\t\t\t|%.2f, %.2f, %.2f, %.2f|\n", m2[1], m2[5], m2[9], m2[13]); LOG("\t\t\t|%.2f, %.2f, %.2f, %.2f|\n", m2[2], m2[6], m2[10], m2[14]); LOG("\t\t\t|%.2f, %.2f, %.2f, %.2f|\n", m2[3], m2[7], m2[11], m2[15]); LOG("matmul4: c=%i \t neon=%i \t rate=%.2f \n", m4t[1] - m4t[0], m4t[2] - m4t[1], (float)(m4t[1] - m4t[0]) / (float)(m4t[2] - m4t[1])); //matvec4 m4t[0] = sceKernelGetSystemTimeWide(); for(i = 0; i < testnum; i++){ matvec4_c(m0, m1, m2); } m4t[1] = sceKernelGetSystemTimeWide(); for(i = 0; i < testnum; i++){ matvec4_neon(m0, m1, m2); } m4t[2] = sceKernelGetSystemTimeWide(); memset(m2, 0, 4*sizeof(float)); matvec4_c(m0, m1, m2); LOG("matvec4_c = |%.2f, %.2f, %.2f, %f|\n", m2[0], m2[1], m2[2], m2[3]); memset(m2, 0, 4*sizeof(float)); matvec4_neon(m0, m1, m2); LOG("matvec4_neon = |%.2f, %.2f, %.2f, %f|\n", m2[0], m2[1], m2[2], m2[3]); LOG("matvec4: c=%i \t neon=%i \t rate=%.2f \n", m4t[1] - m4t[0], m4t[2] - m4t[1], (float)(m4t[1] - m4t[0]) / (float)(m4t[2] - m4t[1])); } int main(int argc, char** argv) { int i, ii; #if 1 LOG("RUNFAST: Disabled \n"); #else LOG("RUNFAST: Enabled \n"); enable_runfast(); #endif srand(time(NULL)); #if 1 //test single argument functions: LOG("------------------------------------------------------------------------------------------------------\n"); LOG("MATRIX FUNCTION TESTS \n"); LOG("------------------------------------------------------------------------------------------------------\n"); test_matrixfunc(); test_vectorfunc(); LOG("------------------------------------------------------------------------------------------------------\n"); LOG("CMATH FUNCTION TESTS \n"); LOG("------------------------------------------------------------------------------------------------------\n"); LOG("Function\tRange\t\tNumber\tABS Max Error\tREL Max Error\tRMS Error\tTime\tRate\n"); LOG("------------------------------------------------------------------------------------------------------\n"); for(i = 0; i < 51; i++){ test_mathfunc1(&test1[i]); ii = i - (i % 3); LOG("%s\t", test1[i].name); LOG("[%.2f, %.2f]\t", test1[i].rng0, test1[i].rng1); LOG("%i\t", test1[i].num); LOG("%.2e\t", test1[i].emaxabs); LOG("%.2e%%\t", test1[i].emaxrel); LOG("%.2e\t", test1[i].erms); LOG("%i\t", test1[i].time); LOG("x%.2f\t", (float)test1[ii].time / test1[i].time); LOG("\n"); } for(i = 0; i < 9; i++){ test_mathfunc2(&test2[i]); ii = i - (i % 3); LOG("%s\t", test2[i].name); LOG("[%.2f, %.2f]\t", test2[i].rng0, test2[i].rng1); LOG("%i\t", test2[i].num); LOG("%.2e\t", test2[i].emaxabs); LOG("%.2e%%\t", test2[i].emaxrel); LOG("%.2e\t", test2[i].erms); LOG("%i\t", test2[i].time); LOG("x%.2f\t", (float)test2[ii].time / test2[i].time); LOG("\n"); } #else float x = 0; for(x = -M_PI_2; x < M_PI_2; x+= 0.01) { LOG("x=%.2f\t in=%.2f\t c=%.2f\t neon=%.2f \n", x, sinhf(x), sinhf_c(x), sinhf_neon(x)); } #endif return 0; }