From 56797b8841ab5a9a9d7a9ab82383567300361252 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Jos=C3=A9=20Garc=C3=ADa=20Garc=C3=ADa?=
 <frangarcj@gmail.com>
Date: Sun, 31 May 2020 17:18:53 +0200
Subject: [PATCH 1/3] Squashed 'deps/vitaGL/' changes from
 9a6e4b3397..fb87308d15

fb87308d15 Added vglBindPackedAttribLocation.
7e933f6051 Added NO_DEBUG compile option.
28e8516718 Faster textures storing when format is the same as internal format.

git-subtree-dir: deps/vitaGL
git-subtree-split: fb87308d15a387d2549fb45d860b3d87ede8a0ca
---
 Makefile                 |  4 ++++
 source/custom_shaders.c  | 12 ++++++++----
 source/textures.c        |  9 ++++++---
 source/utils/gpu_utils.c | 25 +++++++++++++++++--------
 source/utils/gpu_utils.h |  2 +-
 source/vitaGL.h          |  1 +
 6 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/Makefile b/Makefile
index f5efeb0449..e4b82ce5fa 100644
--- a/Makefile
+++ b/Makefile
@@ -17,6 +17,10 @@ AR      = $(PREFIX)-gcc-ar
 CFLAGS  = -g -Wl,-q -O2 -ffast-math -mtune=cortex-a9 -mfpu=neon -flto -ftree-vectorize -DSTB_DXT_IMPLEMENTATION
 ASFLAGS = $(CFLAGS)
 
+ifeq ($(NO_DEBUG),1)
+CFLAGS  += -DSKIP_ERROR_HANDLING
+endif
+
 all: $(TARGET).a
 
 $(TARGET).a: $(OBJS)
diff --git a/source/custom_shaders.c b/source/custom_shaders.c
index d3d5beeb34..7ff792b2e4 100644
--- a/source/custom_shaders.c
+++ b/source/custom_shaders.c
@@ -357,8 +357,7 @@ void glUniformMatrix4fv(GLint location, GLsizei count, GLboolean transpose, cons
  * ------------------------------
  */
 
-// Equivalent of glBindAttribLocation but for sceGxm architecture
-void vglBindAttribLocation(GLuint prog, GLuint index, const GLchar *name, const GLuint num, const GLenum type) {
+void vglBindPackedAttribLocation(GLuint prog, GLuint index, const GLchar *name, const GLuint num, const GLenum type, GLuint offset) {
 	// Grabbing passed program
 	program *p = &progs[prog - 1];
 	SceGxmVertexAttribute *attributes = &p->attr[index];
@@ -369,7 +368,7 @@ void vglBindAttribLocation(GLuint prog, GLuint index, const GLchar *name, const
 
 	// Setting stream index and offset values
 	attributes->streamIndex = index;
-	attributes->offset = 0;
+	attributes->offset = offset;
 
 	// Detecting attribute format and size
 	int bpe;
@@ -396,7 +395,12 @@ void vglBindAttribLocation(GLuint prog, GLuint index, const GLchar *name, const
 		p->attr_num = index + 1;
 }
 
-// Equivalent of glVertexAttribLocation but for sceGxm architecture
+// Equivalent of glBindAttribLocation but for sceGxm architecture
+void vglBindAttribLocation(GLuint prog, GLuint index, const GLchar *name, const GLuint num, const GLenum type) {
+	vglBindPackedAttribLocation(prog, index, name, num, type, 0);
+}
+
+// Equivalent of glVertexAttribPointer but for sceGxm architecture
 void vglVertexAttribPointer(GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, GLuint count, const GLvoid *pointer) {
 #ifndef SKIP_ERROR_HANDLING
 	// Error handling
diff --git a/source/textures.c b/source/textures.c
index 8533e38f0f..a16d58cfec 100644
--- a/source/textures.c
+++ b/source/textures.c
@@ -101,6 +101,7 @@ void glTexImage2D(GLenum target, GLint level, GLint internalFormat, GLsizei widt
 
 	SceGxmTextureFormat tex_format;
 	uint8_t data_bpp = 0;
+	uint8_t fast_store = GL_FALSE;
 
 	// Support for legacy GL1.0 internalFormat
 	switch (internalFormat) {
@@ -157,7 +158,8 @@ void glTexImage2D(GLenum target, GLint level, GLint internalFormat, GLsizei widt
 		switch (type) {
 		case GL_UNSIGNED_BYTE:
 			data_bpp = 3;
-			read_cb = readRGB;
+			if (internalFormat == GL_RGB) fast_store = GL_TRUE;
+			else read_cb = readRGB;
 			break;
 		default:
 			error = GL_INVALID_ENUM;
@@ -168,7 +170,8 @@ void glTexImage2D(GLenum target, GLint level, GLint internalFormat, GLsizei widt
 		switch (type) {
 		case GL_UNSIGNED_BYTE:
 			data_bpp = 4;
-			read_cb = readRGBA;
+			if (internalFormat == GL_RGBA) fast_store = GL_TRUE;
+			else read_cb = readRGBA;
 			break;
 		case GL_UNSIGNED_SHORT_5_5_5_1:
 			data_bpp = 2;
@@ -236,7 +239,7 @@ void glTexImage2D(GLenum target, GLint level, GLint internalFormat, GLsizei widt
 		tex->type = internalFormat;
 		tex->write_cb = write_cb;
 		if (level == 0)
-			if (tex->write_cb) gpu_alloc_texture(width, height, tex_format, data, tex, data_bpp, read_cb, write_cb);
+			if (tex->write_cb) gpu_alloc_texture(width, height, tex_format, data, tex, data_bpp, read_cb, write_cb, fast_store);
 			else gpu_alloc_compressed_texture(width, height, tex_format, data, tex, data_bpp, read_cb);
 		else {
 			gpu_alloc_mipmaps(level, tex);
diff --git a/source/utils/gpu_utils.c b/source/utils/gpu_utils.c
index 17ff1fb82f..8497a8de7d 100644
--- a/source/utils/gpu_utils.c
+++ b/source/utils/gpu_utils.c
@@ -255,7 +255,7 @@ void gpu_free_texture(texture *tex) {
 	tex->valid = 0;
 }
 
-void gpu_alloc_texture(uint32_t w, uint32_t h, SceGxmTextureFormat format, const void *data, texture *tex, uint8_t src_bpp, uint32_t (*read_cb)(void *), void (*write_cb)(void *, uint32_t)) {
+void gpu_alloc_texture(uint32_t w, uint32_t h, SceGxmTextureFormat format, const void *data, texture *tex, uint8_t src_bpp, uint32_t (*read_cb)(void *), void (*write_cb)(void *, uint32_t), uint8_t fast_store) {
 	// If there's already a texture in passed texture object we first dealloc it
 	if (tex->valid)
 		gpu_free_texture(tex);
@@ -274,13 +274,22 @@ void gpu_alloc_texture(uint32_t w, uint32_t h, SceGxmTextureFormat format, const
 			int i, j;
 			uint8_t *src = (uint8_t *)data;
 			uint8_t *dst;
-			for (i = 0; i < h; i++) {
-				dst = ((uint8_t *)texture_data) + (ALIGN(w, 8) * bpp) * i;
-				for (j = 0; j < w; j++) {
-					uint32_t clr = read_cb(src);
-					write_cb(dst, clr);
-					src += src_bpp;
-					dst += bpp;
+			if (fast_store) { // Internal Format and Data Format are the same, we can just use memcpy for better performance
+				uint32_t line_size = w * bpp;
+				for (i = 0; i < h; i++) {
+					dst = ((uint8_t *)texture_data) + (ALIGN(w, 8) * bpp) * i;
+					memcpy(dst, src, line_size);
+					src += line_size;
+				}
+			} else { // Different internal and data formats, we need to go with slower callbacks system
+				for (i = 0; i < h; i++) {
+					dst = ((uint8_t *)texture_data) + (ALIGN(w, 8) * bpp) * i;
+					for (j = 0; j < w; j++) {
+						uint32_t clr = read_cb(src);
+						write_cb(dst, clr);
+						src += src_bpp;
+						dst += bpp;
+					}
 				}
 			}
 		} else
diff --git a/source/utils/gpu_utils.h b/source/utils/gpu_utils.h
index d5a93efc87..15deb02c5e 100644
--- a/source/utils/gpu_utils.h
+++ b/source/utils/gpu_utils.h
@@ -82,7 +82,7 @@ void gpu_pool_init(uint32_t temp_pool_size);
 int tex_format_to_bytespp(SceGxmTextureFormat format);
 
 // Alloc a texture
-void gpu_alloc_texture(uint32_t w, uint32_t h, SceGxmTextureFormat format, const void *data, texture *tex, uint8_t src_bpp, uint32_t (*read_cb)(void *), void (*write_cb)(void *, uint32_t));
+void gpu_alloc_texture(uint32_t w, uint32_t h, SceGxmTextureFormat format, const void *data, texture *tex, uint8_t src_bpp, uint32_t (*read_cb)(void *), void (*write_cb)(void *, uint32_t), uint8_t fast_store);
 
 // Alloc a compresseed texture
 void gpu_alloc_compressed_texture(uint32_t w, uint32_t h, SceGxmTextureFormat format, const void *data, texture *tex, uint8_t src_bpp, uint32_t (*read_cb)(void *));
diff --git a/source/vitaGL.h b/source/vitaGL.h
index 4cfe1d5721..6498d8dc5b 100644
--- a/source/vitaGL.h
+++ b/source/vitaGL.h
@@ -378,6 +378,7 @@ void vglVertexPointerMapped(const GLvoid *pointer);
 
 // VGL_EXT_gxp_shaders extension implementation
 void vglBindAttribLocation(GLuint prog, GLuint index, const GLchar *name, const GLuint num, const GLenum type);
+void vglBindPackedAttribLocation(GLuint prog, GLuint index, const GLchar *name, const GLuint num, const GLenum type, GLuint offset);
 void vglVertexAttribPointer(GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, GLuint count, const GLvoid *pointer);
 void vglVertexAttribPointerMapped(GLuint index, const GLvoid *pointer);
 

From b2f61389ff5c99fe7d7b36b52294a5b284c9127a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Jos=C3=A9=20Garc=C3=ADa=20Garc=C3=ADa?=
 <frangarcj@gmail.com>
Date: Sun, 31 May 2020 17:24:11 +0200
Subject: [PATCH 2/3] [VITA] Remove math-neon

---
 deps/math-neon/.gitattributes         |  17 -
 deps/math-neon/.gitignore             |  26 --
 deps/math-neon/Makefile               |  29 --
 deps/math-neon/README                 | 169 ----------
 deps/math-neon/source/math_acosf.c    |  67 ----
 deps/math-neon/source/math_asinf.c    | 183 -----------
 deps/math-neon/source/math_atan2f.c   | 170 ----------
 deps/math-neon/source/math_atanf.c    | 149 ---------
 deps/math-neon/source/math_ceilf.c    |  71 -----
 deps/math-neon/source/math_cosf.c     |  50 ---
 deps/math-neon/source/math_coshf.c    | 120 -------
 deps/math-neon/source/math_expf.c     | 135 --------
 deps/math-neon/source/math_fabsf.c    |  58 ----
 deps/math-neon/source/math_floorf.c   |  66 ----
 deps/math-neon/source/math_fmodf.c    | 100 ------
 deps/math-neon/source/math_invsqrtf.c |  79 -----
 deps/math-neon/source/math_ldexpf.c   |  67 ----
 deps/math-neon/source/math_log10f.c   | 135 --------
 deps/math-neon/source/math_logf.c     | 135 --------
 deps/math-neon/source/math_mat2.c     |  95 ------
 deps/math-neon/source/math_mat3.c     | 131 --------
 deps/math-neon/source/math_mat4.c     | 144 ---------
 deps/math-neon/source/math_modf.c     |  71 -----
 deps/math-neon/source/math_neon.h     | 435 --------------------------
 deps/math-neon/source/math_powf.c     | 182 -----------
 deps/math-neon/source/math_runfast.c  |  42 ---
 deps/math-neon/source/math_sincosf.c  | 163 ----------
 deps/math-neon/source/math_sinf.c     | 128 --------
 deps/math-neon/source/math_sinfv.c    | 110 -------
 deps/math-neon/source/math_sinhf.c    | 120 -------
 deps/math-neon/source/math_sqrtf.c    | 105 -------
 deps/math-neon/source/math_sqrtfv.c   | 147 ---------
 deps/math-neon/source/math_tanf.c     | 156 ---------
 deps/math-neon/source/math_tanhf.c    |  95 ------
 deps/math-neon/source/math_vec2.c     | 118 -------
 deps/math-neon/source/math_vec3.c     | 172 ----------
 deps/math-neon/source/math_vec4.c     | 126 --------
 37 files changed, 4366 deletions(-)
 delete mode 100644 deps/math-neon/.gitattributes
 delete mode 100644 deps/math-neon/.gitignore
 delete mode 100644 deps/math-neon/Makefile
 delete mode 100644 deps/math-neon/README
 delete mode 100644 deps/math-neon/source/math_acosf.c
 delete mode 100644 deps/math-neon/source/math_asinf.c
 delete mode 100644 deps/math-neon/source/math_atan2f.c
 delete mode 100644 deps/math-neon/source/math_atanf.c
 delete mode 100644 deps/math-neon/source/math_ceilf.c
 delete mode 100644 deps/math-neon/source/math_cosf.c
 delete mode 100644 deps/math-neon/source/math_coshf.c
 delete mode 100644 deps/math-neon/source/math_expf.c
 delete mode 100644 deps/math-neon/source/math_fabsf.c
 delete mode 100644 deps/math-neon/source/math_floorf.c
 delete mode 100644 deps/math-neon/source/math_fmodf.c
 delete mode 100644 deps/math-neon/source/math_invsqrtf.c
 delete mode 100644 deps/math-neon/source/math_ldexpf.c
 delete mode 100644 deps/math-neon/source/math_log10f.c
 delete mode 100644 deps/math-neon/source/math_logf.c
 delete mode 100644 deps/math-neon/source/math_mat2.c
 delete mode 100644 deps/math-neon/source/math_mat3.c
 delete mode 100644 deps/math-neon/source/math_mat4.c
 delete mode 100644 deps/math-neon/source/math_modf.c
 delete mode 100644 deps/math-neon/source/math_neon.h
 delete mode 100644 deps/math-neon/source/math_powf.c
 delete mode 100644 deps/math-neon/source/math_runfast.c
 delete mode 100644 deps/math-neon/source/math_sincosf.c
 delete mode 100644 deps/math-neon/source/math_sinf.c
 delete mode 100644 deps/math-neon/source/math_sinfv.c
 delete mode 100644 deps/math-neon/source/math_sinhf.c
 delete mode 100644 deps/math-neon/source/math_sqrtf.c
 delete mode 100644 deps/math-neon/source/math_sqrtfv.c
 delete mode 100644 deps/math-neon/source/math_tanf.c
 delete mode 100644 deps/math-neon/source/math_tanhf.c
 delete mode 100644 deps/math-neon/source/math_vec2.c
 delete mode 100644 deps/math-neon/source/math_vec3.c
 delete mode 100644 deps/math-neon/source/math_vec4.c

diff --git a/deps/math-neon/.gitattributes b/deps/math-neon/.gitattributes
deleted file mode 100644
index bdb0cabc87..0000000000
--- a/deps/math-neon/.gitattributes
+++ /dev/null
@@ -1,17 +0,0 @@
-# Auto detect text files and perform LF normalization
-* text=auto
-
-# Custom for Visual Studio
-*.cs     diff=csharp
-
-# Standard to msysgit
-*.doc	 diff=astextplain
-*.DOC	 diff=astextplain
-*.docx diff=astextplain
-*.DOCX diff=astextplain
-*.dot  diff=astextplain
-*.DOT  diff=astextplain
-*.pdf  diff=astextplain
-*.PDF	 diff=astextplain
-*.rtf	 diff=astextplain
-*.RTF	 diff=astextplain
diff --git a/deps/math-neon/.gitignore b/deps/math-neon/.gitignore
deleted file mode 100644
index 6b55e9b64e..0000000000
--- a/deps/math-neon/.gitignore
+++ /dev/null
@@ -1,26 +0,0 @@
-*.o
-*.a
-
-# Windows thumbnail cache files
-Thumbs.db
-ehthumbs.db
-ehthumbs_vista.db
-
-# Folder config file
-Desktop.ini
-
-# Recycle Bin used on file shares
-$RECYCLE.BIN/
-
-# Windows Installer files
-*.cab
-*.msi
-*.msm
-*.msp
-
-# Windows shortcuts
-*.lnk
-
-# =========================
-# Operating System Files
-# =========================
diff --git a/deps/math-neon/Makefile b/deps/math-neon/Makefile
deleted file mode 100644
index 269d8cdd57..0000000000
--- a/deps/math-neon/Makefile
+++ /dev/null
@@ -1,29 +0,0 @@
-TARGET          := libmathneon
-SOURCES         := source
-
-LIBS = -lc -lm -lSceGxm_stub -lSceDisplay_stub
-
-CFILES   := $(foreach dir,$(SOURCES), $(wildcard $(dir)/*.c))
-CGFILES  := $(foreach dir,$(SHADERS), $(wildcard $(dir)/*.cg))
-HEADERS  := $(CGFILES:.cg=.h)
-OBJS     := $(CFILES:.c=.o)
-
-PREFIX  = arm-vita-eabi
-CC      = $(PREFIX)-gcc
-AR      = $(PREFIX)-gcc-ar
-CFLAGS  = -g -Wl,-q -O2 -ffast-math -mtune=cortex-a9 -mfpu=neon -flto -ftree-vectorize
-ASFLAGS = $(CFLAGS)
-
-all: $(TARGET).a
-
-$(TARGET).a: $(OBJS)
-	$(AR) -rc $@ $^
-	
-clean:
-	@rm -rf $(TARGET).a $(TARGET).elf $(OBJS)
-	
-install: $(TARGET).a
-	@mkdir -p $(VITASDK)/$(PREFIX)/lib/
-	cp $(TARGET).a $(VITASDK)/$(PREFIX)/lib/
-	@mkdir -p $(VITASDK)/$(PREFIX)/include/
-	cp source/math_neon.h $(VITASDK)/$(PREFIX)/include/
diff --git a/deps/math-neon/README b/deps/math-neon/README
deleted file mode 100644
index 8740253a58..0000000000
--- a/deps/math-neon/README
+++ /dev/null
@@ -1,169 +0,0 @@
-
-Library: 	MATH-NEON
-By:			Lachlan Tychsen-Smith
-Licence:	MIT (expat)
-=======================================================================================
-This project implements the cmath functions and some optimised matrix functions 
-with the aim of increasing the floating point performance of ARM Cortex A-8
-based platforms. As well as implementing the functions in ARM NEON assembly, 
-they sacrifice error checking and some accuracy to achieve better performance.
-
-Function Errors:
-=======================================================================================
-The measurement and characterisations of the inaccuracies present within these 
-functions is really a field within itself. For the benchmark i provide the 
-maximum absolute, maximum relative and root mean squared error compared to the
-cmath implementations over the specified range. However these values can be 
-misleading, especially for functions which quickly go to infinity. So its always a 
-good idea to test it within your actual program. In general, this library will not 
-be as accurate as cmath, however for many functions it is close enough to be
-negilible. 
-	
-Notes:
-=======================================================================================
-- The *_c functions are c implementations of the *_neon code.
-- Like cmath, The errors present in the functions are very dependent on the 
-  range which your operating in. So you should test them first.
-- Look in the "math_neon.h" file for discriptions of the functions. In some 
-  function files there are also notes on the specific implementation.
-- The *_neon functions make certain assumptions about the location of arguments 
-  that is incompatible with inlining. 
-	  
-Contact:
-=======================================================================================
-Name: 	Lachlan Tychsen-Smith 
-Email: 	lachlan.ts@gmail.com
-
-PSVITA performances test results:
-
-RUNFAST: Enabled 
-------------------------------------------------------------------------------------------------------
-MATRIX FUNCTION TESTS 
-------------------------------------------------------------------------------------------------------
-matmul2_c = 
-			|-7.16, 9.42|
-			|17.86, -10.70|
-matmul2_neon = 
-			|-7.16, 9.42|
-			|17.86, -10.70|
-matmul2: c=183985 	 neon=87480 	 rate=2.10 
-matvec2_c = |-7.16, 17.86|
-matvec2_neon = |-7.16, 17.86|
-matvec2: c=98178 	 neon=66040 	 rate=1.49 
-matmul3_c =
-			|11.14, -0.78, -3.98|
-			|16.56, 17.96, 23.58|
-			|8.73, -0.18, 1.57|
-matmul3_neon =
-			|11.14, -0.78, -3.98|
-			|16.56, 17.96, 23.58|
-			|8.73, -0.18, 1.57|
-matmul3: c=551838 	 neon=340292 	 rate=1.62 
-matvec3_c = |11.14, 16.56, 8.73|
-matvec3_neon = |11.14, 16.56, 8.73|
-matvec3: c=98178 	 neon=66040 	 rate=1.49 
-matmul4_c =
-			|17.91, -23.96, 1.86, 16.53|
-			|4.10, -18.16, 4.17, 29.06|
-			|6.92, -1.60, 3.12, 27.81|
-			|-15.13, -7.46, -17.91, 22.49|
-matmul4_neon =
-			|17.91, -23.96, 1.86, 16.53|
-			|4.10, -18.16, 4.17, 29.06|
-			|6.92, -1.60, 3.12, 27.81|
-			|-15.13, -7.46, -17.91, 22.49|
-matmul4: c=1316131 	 neon=315444 	 rate=4.17 
-matvec4_c = |17.91, 4.10, 6.92, -15.126419|
-matvec4_neon = |17.91, 4.10, 6.92, -15.126419|
-matvec4: c=98178 	 neon=66040 	 rate=1.49 
-
-dot2_c = 5.804099
-dot2_neon = 5.804099
-dot2: c=291526 	 neon=307025 	 rate=0.95 
-normalize2_c = [0.97, 0.24]
-normalize2_neon = [0.97, 0.24]
-normalize2: c=1058588 	 neon=965696 	 rate=1.10 
-
-dot3_c = -0.817487
-dot3_neon = -0.817487
-dot3: c=322094 	 neon=444834 	 rate=0.72 
-normalize3_c = [0.50, 0.12, -0.86]
-normalize3_neon = [0.50, 0.12, -0.86]
-normalize3: c=1257201 	 neon=1134375 	 rate=1.11 
-cross3_c = [-13.16, -17.29, -10.19]
-cross3_neon = [-13.16, -17.29, -10.19]
-cross3: c=705298 	 neon=766477 	 rate=0.92 
-
-dot4_c = -7.880241
-dot4_neon = -7.880241
-dot4: c=414431 	 neon=506460 	 rate=0.82 
-normalize4_c = [0.45, 0.11, -0.77, -0.44]
-normalize4_neon = [0.45, 0.11, -0.77, -0.44]
-normalize4: c=1410727 	 neon=1102802 	 rate=1.28 
-
-------------------------------------------------------------------------------------------------------
-CMATH FUNCTION TESTS 
-------------------------------------------------------------------------------------------------------
-Function	Range		Number	ABS Max Error	REL Max Error	RMS Error	Time	Rate
-------------------------------------------------------------------------------------------------------
-sinf       	[-3.14, 3.14]	500000	0.00e+00	0.00e+00%	0.00e+00	647042739	x1.00	
-sinf_c     	[-3.14, 3.14]	500000	7.75e-07	1.00e+02%	4.09e-07	646276691	x1.00	
-sinf_neon  	[-3.14, 3.14]	500000	1.00e+00	1.00e+02%	7.07e-01	645546381	x1.00	
-cosf       	[-3.14, 3.14]	500000	0.00e+00	0.00e+00%	0.00e+00	644742077	x1.00	
-cosf_c     	[-3.14, 3.14]	500000	7.75e-07	6.74e-01%	4.15e-07	643957358	x1.00	
-cosf_neon  	[-3.14, 3.14]	500000	1.00e+00	1.00e+02%	7.06e-01	643211256	x1.00	
-tanf       	[-0.79, 0.79]	500000	0.00e+00	0.00e+00%	0.00e+00	642444112	x1.00	
-tanf_c     	[-0.79, 0.79]	500000	2.98e-06	7.94e-04%	1.31e-06	641628507	x1.00	
-tanf_neon  	[-0.79, 0.79]	500000	1.00e+00	1.00e+02%	nan	640740514	x1.00	
-asinf      	[-1.00, 1.00]	500000	0.00e+00	0.00e+00%	0.00e+00	639560380	x1.00	
-asinf_c    	[-1.00, 1.00]	500000	5.54e-05	1.06e-02%	nan	638453383	x1.00	
-asinf_neon 	[-1.00, 1.00]	500000	1.57e+00	1.00e+02%	6.84e-01	637349653	x1.00	
-acosf      	[-1.00, 1.00]	500000	0.00e+00	0.00e+00%	0.00e+00	636078992	x1.00	
-acosf_c    	[-1.00, 1.00]	500000	5.56e-05	6.46e-03%	nan	634934201	x1.00	
-acosf_neon 	[-1.00, 1.00]	500000	1.57e+00	1.02e+05%	6.84e-01	633793585	x1.00	
-atanf      	[-1.00, 1.00]	500000	0.00e+00	0.00e+00%	0.00e+00	632835241	x1.00	
-atanf_c    	[-1.00, 1.00]	500000	1.67e-04	2.12e-02%	7.40e-05	632142823	x1.00	
-atanf_neon 	[-1.00, 1.00]	500000	7.85e-01	0.00e+00%	nan	631387330	x1.00	
-sinhf       	[-3.14, 3.14]	500000	0.00e+00	0.00e+00%	0.00e+00	630142014	x1.00	
-sinhf_c     	[-3.14, 3.14]	500000	1.91e-06	1.52e-01%	1.85e-07	628992714	x1.00	
-sinhf_neon  	[-3.14, 3.14]	500000	1.15e+01	1.00e+02%	4.55e+00	627998454	x1.00	
-coshf       	[-3.14, 3.14]	500000	0.00e+00	0.00e+00%	0.00e+00	626869866	x1.00	
-coshf_c     	[-3.14, 3.14]	500000	9.54e-07	2.38e-05%	1.64e-07	625829657	x1.00	
-coshf_neon  	[-3.14, 3.14]	500000	1.06e+01	9.14e+01%	3.92e+00	624873969	x1.00	
-tanhf       	[-3.14, 3.14]	500000	0.00e+00	0.00e+00%	0.00e+00	623689093	x1.00	
-tanhf_c     	[-3.14, 3.14]	500000	1.20e-05	2.48e-01%	5.48e-06	622547097	x1.00	
-tanhf_neon  	[-3.14, 3.14]	500000	9.96e-01	1.00e+02%	8.26e-01	621506812	x1.00	
-expf       	[0.00, 10.00]	500000	0.00e+00	0.00e+00%	0.00e+00	620497304	x1.00	
-expf_c     	[0.00, 10.00]	500000	9.77e-03	6.15e-05%	1.64e-03	619569554	x1.00	
-expf_neon  	[0.00, 10.00]	500000	2.20e+04	1.00e+02%	4.92e+03	618761400	x1.00	
-logf       	[1.00, 1000.00]	500000	0.00e+00	0.00e+00%	0.00e+00	617882765	x1.00	
-logf_c     	[1.00, 1000.00]	500000	6.20e-06	1.62e-02%	9.83e-07	617087810	x1.00	
-logf_neon  	[1.00, 1000.00]	500000	9.49e+01	inf%	9.39e+01	616388420	x1.00	
-log10f       	[1.00, 1000.00]	500000	0.00e+00	0.00e+00%	0.00e+00	615405364	x1.00	
-log10f_c     	[1.00, 1000.00]	500000	2.86e-06	6.68e-03%	4.79e-07	614442585	x1.00	
-log10f_neon  	[1.00, 1000.00]	500000	4.12e+01	inf%	4.07e+01	613671782	x1.00	
-floorf     	[1.00, 1000.00]	5000000	0.00e+00	0.00e+00%	0.00e+00	611113689	x1.00	
-floorf_c   	[1.00, 1000.00]	5000000	0.00e+00	0.00e+00%	0.00e+00	608159325	x1.00	
-floorf_neon	[1.00, 1000.00]	5000000	2.00e+00	2.00e+02%	1.42e-02	604769008	x1.01	
-ceilf     	[1.00, 1000.00]	5000000	0.00e+00	0.00e+00%	0.00e+00	601342443	x1.00	
-ceilf_c   	[1.00, 1000.00]	5000000	0.00e+00	0.00e+00%	0.00e+00	598387998	x1.00	
-ceilf_neon	[1.00, 1000.00]	5000000	2.00e+00	1.00e+02%	1.02e+00	594959710	x1.01	
-fabsf     	[1.00, 1000.00]	5000000	0.00e+00	0.00e+00%	0.00e+00	592068236	x1.00	
-fabsf_c   	[1.00, 1000.00]	5000000	0.00e+00	0.00e+00%	0.00e+00	589808748	x1.00	
-fabsf_neon	[1.00, 1000.00]	5000000	0.00e+00	0.00e+00%	0.00e+00	587712180	x1.01	
-sqrtf      	[1.00, 1000.00]	500000	0.00e+00	0.00e+00%	0.00e+00	586496654	x1.00	
-sqrtf_c    	[1.00, 1000.00]	500000	2.33e-04	1.06e-03%	8.69e-05	585470866	x1.00	
-sqrtf_neon 	[1.00, 1000.00]	500000	0.00e+00	0.00e+00%	nan	584594551	x1.00	
-invsqrtf      	[1.00, 1000.00]	500000	0.00e+00	0.00e+00%	0.00e+00	583492213	x1.00	
-invsqrtf_c    	[1.00, 1000.00]	500000	4.35e-06	4.78e-04%	2.00e-07	582448164	x1.00	
-invsqrtf_neon 	[1.00, 1000.00]	500000	0.00e+00	0.00e+00%	nan	581642365	x1.00	
-atan2f       	[0.10, 10.00]	10000	0.00e+00	0.00e+00%	0.00e+00	83594269	x1.00	
-atan2f_c     	[0.10, 10.00]	10000	1.73e-04	2.23e-02%	0.00e+00	85383651	x0.98	
-atan2f_neon  	[0.10, 10.00]	10000	0.00e+00	0.00e+00%	0.00e+00	87387055	x0.96	
-powf       	[1.00, 10.00]	10000	0.00e+00	0.00e+00%	0.00e+00	93430489	x1.00	
-powf_c     	[1.00, 10.00]	10000	1.08e+05	4.37e-03%	0.00e+00	96726976	x0.97	
-powf_neon  	[1.00, 10.00]	10000	9.97e+09	1.00e+02%	0.00e+00	100185753	x0.93	
-fmodf       	[1.00, 10.00]	10000	0.00e+00	0.00e+00%	0.00e+00	101653673	x1.00	
-fmodf_c     	[1.00, 10.00]	10000	9.90e+00	8.06e-02%	0.00e+00	103177551	x0.99	
-fmodf_neon  	[1.00, 10.00]	10000	9.99e+00	1.00e+02%	0.00e+00	104771240	x0.97	
-
diff --git a/deps/math-neon/source/math_acosf.c b/deps/math-neon/source/math_acosf.c
deleted file mode 100644
index 59a22b2985..0000000000
--- a/deps/math-neon/source/math_acosf.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-/*
-Test func : acosf(x)
-Test Range: -1.0 < x < 1.0
-Peak Error:	~0.005%
-RMS  Error: ~0.001%
-*/
-
-const float __acosf_pi_2 = M_PI_2;
-
-float acosf_c(float x)
-{
-	return __acosf_pi_2 - asinf_c(x);
-}
-
-
-float acosf_neon_hfp(float x)
-{
-#ifdef __MATH_NEON
-	asinf_neon_hfp(x);
-	asm volatile (
-	"vdup.f32	 	d1, %0					\n\t"	//d1 = {pi/2, pi/2};
-	"vsub.f32	 	d0, d1, d0				\n\t"	//d0 = d1 - d0;
-	::"r"(__acosf_pi_2):
-	);
-#endif
-}
-
-float acosf_neon_sfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vmov.f32 s0, r0 		\n\t");
-	acosf_neon_hfp(x);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return acosf_c(x);
-#endif
-}
-
-
-
diff --git a/deps/math-neon/source/math_asinf.c b/deps/math-neon/source/math_asinf.c
deleted file mode 100644
index 0ae8ef9b84..0000000000
--- a/deps/math-neon/source/math_asinf.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-/*
-Test func : asinf(x)
-Test Range: -1.0 < x < 1.0
-Peak Error:	~0.005%
-RMS  Error: ~0.001%
-*/
-
-
-const float __asinf_lut[4] = {
-	0.105312459675071, 	//p7
-	0.169303418571894,	//p3
-	0.051599985887214, 	//p5
-	0.999954835104825	//p1
-}; 
-
-const float __asinf_pi_2 = M_PI_2;
-
-float asinf_c(float x)
-{
-
-	float a, b, c, d, r, ax;
-	int m;
-	
-	union {
-		float f;
-		int i;
-	} xx;
-
-	ax = fabs(x);
-	d = 0.5;
-	d = d - ax*0.5;
-		
-	//fast invsqrt approx
-	xx.f = d;
-	xx.i = 0x5F3759DF - (xx.i >> 1);		//VRSQRTE
-	c = d * xx.f;
-	b = (3.0f - c * xx.f) * 0.5;		//VRSQRTS
-	xx.f = xx.f * b;		
-	c = d * xx.f;
-	b = (3.0f - c * xx.f) * 0.5;
-    xx.f = xx.f * b;	
-
-	//fast inverse approx
-	d = xx.f;
-	m = 0x3F800000 - (xx.i & 0x7F800000);
-	xx.i = xx.i + m;
-	xx.f = 1.41176471f - 0.47058824f * xx.f;
-	xx.i = xx.i + m;
-	b = 2.0 - xx.f * d;
-	xx.f = xx.f * b;	
-	b = 2.0 - xx.f * d;
-	xx.f = xx.f * b;
-	
-	//if |x|>0.5 -> x = sqrt((1-x)/2)
-	xx.f = xx.f - ax;	
-	a = (ax > 0.5f);
-	d = __asinf_pi_2 * a;
-	c = 1.0f - 3.0f * a;
-	ax = ax + xx.f * a;
-		
-	//polynomial evaluation
-	xx.f = ax * ax;	
-	a = (__asinf_lut[0] * ax) * xx.f + (__asinf_lut[2] * ax);
-	b = (__asinf_lut[1] * ax) * xx.f + (__asinf_lut[3] * ax);
-	xx.f = xx.f * xx.f;
-	r = b + a * xx.f; 
-	r = d + c * r;
-
-	a = r + r;
-	b = (x < 0.0f);
-	r = r - a * b;
-	return r;
-}
-
-
-float asinf_neon_hfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-
-	"vdup.f32	 	d0, d0[0]				\n\t"	//d0 = {x, x};
-	"vdup.f32	 	d4, %1					\n\t"	//d4 = {pi/2, pi/2};
-	"vmov.f32	 	d6, d0					\n\t"	//d6 = d0;
-	"vabs.f32	 	d0, d0					\n\t"	//d0 = fabs(d0) ;
-
-	"vmov.f32	 	d5, #0.5				\n\t"	//d5 = 0.5;
-	"vmls.f32	 	d5, d0, d5				\n\t"	//d5 = d5 - d0*d5;
-
-	//fast invsqrt approx
-	"vmov.f32 		d1, d5					\n\t"	//d1 = d5
-	"vrsqrte.f32 	d5, d5					\n\t"	//d5 = ~ 1.0 / sqrt(d5)
-	"vmul.f32 		d2, d5, d1				\n\t"	//d2 = d5 * d1
-	"vrsqrts.f32 	d3, d2, d5				\n\t"	//d3 = (3 - d5 * d2) / 2 	
-	"vmul.f32 		d5, d5, d3				\n\t"	//d5 = d5 * d3
-	"vmul.f32 		d2, d5, d1				\n\t"	//d2 = d5 * d1	
-	"vrsqrts.f32 	d3, d2, d5				\n\t"	//d3 = (3 - d5 * d3) / 2	
-	"vmul.f32 		d5, d5, d3				\n\t"	//d5 = d5 * d3	
-		
-	//fast reciporical approximation
-	"vrecpe.f32		d1, d5					\n\t"	//d1 = ~ 1 / d5; 
-	"vrecps.f32		d2, d1, d5				\n\t"	//d2 = 2.0 - d1 * d5; 
-	"vmul.f32		d1, d1, d2				\n\t"	//d1 = d1 * d2; 
-	"vrecps.f32		d2, d1, d5				\n\t"	//d2 = 2.0 - d1 * d5; 
-	"vmul.f32		d5, d1, d2				\n\t"	//d5 = d1 * d2; 
-	
-	//if |x| > 0.5 -> ax = sqrt((1-ax)/2), r = pi/2
-	"vsub.f32		d5, d0, d5				\n\t"	//d5 = d0 - d5; 
-	"vmov.f32	 	d2, #0.5				\n\t"	//d2 = 0.5;
-	"vcgt.f32	 	d3, d0, d2				\n\t"	//d3 = (d0 > d2);
-	"vmov.f32		d1, #3.0 				\n\t"	//d5 = 3.0; 	
-	"vshr.u32	 	d3, #31					\n\t"	//d3 = d3 >> 31;
-	"vmov.f32		d16, #1.0 				\n\t"	//d16 = 1.0; 	
-	"vcvt.f32.u32	d3, d3					\n\t"	//d3 = (float) d3;	
-	"vmls.f32		d0, d5, d3[0]			\n\t"	//d0 = d0 - d5 * d3[0]; 	
-	"vmul.f32		d7, d4, d3[0] 			\n\t"	//d7 = d5 * d4; 		
-	"vmls.f32		d16, d1, d3[0] 			\n\t"	//d16 = d16 - d1 * d3; 	
-		
-	//polynomial:
-	"vmul.f32 		d2, d0, d0				\n\t"	//d2 = d0*d0 = {ax^2, ax^2}	
-	"vld1.32 		{d4, d5}, [%0]			\n\t"	//d4 = {p7, p3}, d5 = {p5, p1}
-	"vmul.f32 		d3, d2, d2				\n\t"	//d3 = d2*d2 = {x^4, x^4}		
-	"vmul.f32 		q0, q2, d0[0]			\n\t"	//q0 = q2 * d0[0] = {p7x, p3x, p5x, p1x}
-	"vmla.f32 		d1, d0, d2[0]			\n\t"	//d1 = d1 + d0*d2[0] = {p5x + p7x^3, p1x + p3x^3}		
-	"vmla.f32 		d1, d3, d1[0]			\n\t"	//d1 = d1 + d3*d1[0] = {..., p1x + p3x^3 + p5x^5 + p7x^7}		
-
-	"vmla.f32 		d7, d1, d16				\n\t"	//d7 = d7 + d1*d16		
-
-	"vadd.f32 		d2, d7, d7				\n\t"	//d2 = d7 + d7		
-	"vclt.f32	 	d3, d6, #0				\n\t"	//d3 = (d6 < 0)	
-	"vshr.u32	 	d3, #31					\n\t"	//d3 = d3 >> 31;
-	"vcvt.f32.u32	d3, d3					\n\t"	//d3 = (float) d3	
-	"vmls.f32 		d7, d2, d3[0]			\n\t"	//d7 = d7 - d2 * d3[0];
-
-	"vmov.f32 		s0, s15					\n\t"	//s0 = s3
-
-	:: "r"(__asinf_lut),  "r"(__asinf_pi_2) 
-    : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"
-	);
-#endif
-}
-
-
-float asinf_neon_sfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vmov.f32 s0, r0 		\n\t");
-	asinf_neon_hfp(x);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return asinf_c(x);
-#endif
-}
-
-
-
-
diff --git a/deps/math-neon/source/math_atan2f.c b/deps/math-neon/source/math_atan2f.c
deleted file mode 100644
index d076a04c04..0000000000
--- a/deps/math-neon/source/math_atan2f.c
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-const float __atan2f_lut[4] = {
-	-0.0443265554792128,	//p7
-	-0.3258083974640975,	//p3
-	+0.1555786518463281,	//p5
-	+0.9997878412794807  	//p1
-}; 
- 
-const float __atan2f_pi_2 = M_PI_2;
-
-float atan2f_c(float y, float x)
-{
-	float a, b, c, r, xx;
-	int m;
-	union {
-		float f;
-		int i;
-	} xinv;
-
-	//fast inverse approximation (2x newton)
-	xx = fabs(x);
-	xinv.f = xx;
-	m = 0x3F800000 - (xinv.i & 0x7F800000);
-	xinv.i = xinv.i + m;
-	xinv.f = 1.41176471f - 0.47058824f * xinv.f;
-	xinv.i = xinv.i + m;
-	b = 2.0 - xinv.f * xx;
-	xinv.f = xinv.f * b;	
-	b = 2.0 - xinv.f * xx;
-	xinv.f = xinv.f * b;
-	
-	c = fabs(y * xinv.f);
-
-	//fast inverse approximation (2x newton)
-	xinv.f = c;
-	m = 0x3F800000 - (xinv.i & 0x7F800000);
-	xinv.i = xinv.i + m;
-	xinv.f = 1.41176471f - 0.47058824f * xinv.f;
-	xinv.i = xinv.i + m;
-	b = 2.0 - xinv.f * c;
-	xinv.f = xinv.f * b;	
-	b = 2.0 - xinv.f * c;
-	xinv.f = xinv.f * b;
-	
-	//if |x| > 1.0 -> ax = -1/ax, r = pi/2
-	xinv.f = xinv.f + c;
-	a = (c > 1.0f);
-	c = c - a * xinv.f;
-	r = a * __atan2f_pi_2;
-	
-	//polynomial evaluation
-	xx = c * c;	
-	a = (__atan2f_lut[0] * c) * xx + (__atan2f_lut[2] * c);
-	b = (__atan2f_lut[1] * c) * xx + (__atan2f_lut[3] * c);
-	xx = xx * xx;
-	r = r + a * xx; 
-	r = r + b;
-
-	//determine quadrant and test for small x.
-	b = M_PI;
-	b = b - 2.0f * r;
-	r = r + (x < 0.0f) * b;
-	b = (fabs(x) < 0.000001f);
-	c = !b;
-	r = c * r;
-	r = r + __atan2f_pi_2 * b;
-	b = r + r;
-	r = r - (y < 0.0f) * b;
-	
-	return r;
-}
-
-float atan2f_neon_hfp(float y, float x)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-
-	"vdup.f32	 	d17, d0[1]				\n\t"	//d17 = {x, x};
-	"vdup.f32	 	d16, d0[0]				\n\t"	//d16 = {y, y};
-	
-	//1.0 / x
-	"vrecpe.f32		d18, d17				\n\t"	//d16 = ~ 1 / d1; 
-	"vrecps.f32		d19, d18, d17			\n\t"	//d17 = 2.0 - d16 * d1; 
-	"vmul.f32		d18, d18, d19			\n\t"	//d16 = d16 * d17; 
-	"vrecps.f32		d19, d18, d17			\n\t"	//d17 = 2.0 - d16 * d1; 
-	"vmul.f32		d18, d18, d19			\n\t"	//d16 = d16 * d17; 
-
-	//y * (1.0 /x)
-	"vmul.f32		d0, d16, d18			\n\t"	//d0 = d16 * d18; 
-
-
-	"vdup.f32	 	d4, %1					\n\t"	//d4 = {pi/2, pi/2};
-	"vmov.f32	 	d6, d0					\n\t"	//d6 = d0;
-	"vabs.f32	 	d0, d0					\n\t"	//d0 = fabs(d0) ;
-
-	//fast reciporical approximation
-	"vrecpe.f32		d1, d0					\n\t"	//d1 = ~ 1 / d0; 
-	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
-	"vmul.f32		d1, d1, d2				\n\t"	//d1 = d1 * d2; 
-	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
-	"vmul.f32		d1, d1, d2				\n\t"	//d1 = d1 * d2; 
-
-	//if |x| > 1.0 -> ax = 1/ax, r = pi/2
-	"vadd.f32		d1, d1, d0				\n\t"	//d1 = d1 + d0; 
-	"vmov.f32	 	d2, #1.0				\n\t"	//d2 = 1.0;
-	"vcgt.f32	 	d3, d0, d2				\n\t"	//d3 = (d0 > d2);
-	"vcvt.f32.u32	d3, d3					\n\t"	//d3 = (float) d3;
-	"vmls.f32		d0, d1, d3				\n\t"	//d0 = d0 - d1 * d3; 	
-	"vmul.f32		d7, d3, d4				\n\t"	//d7 = d3 * d4; 	
-		
-	//polynomial:
-	"vmul.f32 		d2, d0, d0				\n\t"	//d2 = d0*d0 = {ax^2, ax^2}	
-	"vld1.32 		{d4, d5}, [%0]			\n\t"	//d4 = {p7, p3}, d5 = {p5, p1}
-	"vmul.f32 		d3, d2, d2				\n\t"	//d3 = d2*d2 = {x^4, x^4}		
-	"vmul.f32 		q0, q2, d0[0]			\n\t"	//q0 = q2 * d0[0] = {p7x, p3x, p5x, p1x}
-	"vmla.f32 		d1, d0, d2[0]			\n\t"	//d1 = d1 + d0*d2[0] = {p5x + p7x^3, p1x + p3x^3}		
-	"vmla.f32 		d1, d3, d1[0]			\n\t"	//d1 = d1 + d3*d1[0] = {..., p1x + p3x^3 + p5x^5 + p7x^7}		
-	"vadd.f32 		d1, d1, d7				\n\t"	//d1 = d1 + d7		
-	
-	"vadd.f32 		d2, d1, d1				\n\t"	//d2 = d1 + d1		
-	"vclt.f32	 	d3, d6, #0				\n\t"	//d3 = (d6 < 0)	
-	"vcvt.f32.u32	d3, d3					\n\t"	//d3 = (float) d3	
-	"vmls.f32 		d1, d3, d2				\n\t"	//d1 = d1 - d2 * d3;
-
-	"vmov.f32 		s0, s3					\n\t"	//s0 = s3
-
-	:: "r"(__atan2f_lut),  "r"(__atan2f_pi_2) 
-    : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"
-	);
-#endif
-}
-
-
-float atan2f_neon_sfp(float x, float y)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vmov.f32 s0, r0 		\n\t");
-	asm volatile ("vmov.f32 s1, r1 		\n\t");
-	atan2f_neon_hfp(x, y);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return atan2f_c(y, x);
-#endif
-};
diff --git a/deps/math-neon/source/math_atanf.c b/deps/math-neon/source/math_atanf.c
deleted file mode 100644
index c983756dd2..0000000000
--- a/deps/math-neon/source/math_atanf.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-const float __atanf_lut[4] = {
-	-0.0443265554792128,	//p7
-	-0.3258083974640975,	//p3
-	+0.1555786518463281,	//p5
-	+0.9997878412794807  	//p1
-}; 
- 
-const float __atanf_pi_2 = M_PI_2;
-    
-float atanf_c(float x)
-{
-
-	float a, b, r, xx;
-	int m;
-	
-	union {
-		float f;
-		int i;
-	} xinv, ax;
-
-	ax.f = fabs(x);
-	
-	//fast inverse approximation (2x newton)
-	xinv.f = ax.f;
-	m = 0x3F800000 - (xinv.i & 0x7F800000);
-	xinv.i = xinv.i + m;
-	xinv.f = 1.41176471f - 0.47058824f * xinv.f;
-	xinv.i = xinv.i + m;
-	b = 2.0 - xinv.f * ax.f;
-	xinv.f = xinv.f * b;	
-	b = 2.0 - xinv.f * ax.f;
-	xinv.f = xinv.f * b;
-	
-	//if |x| > 1.0 -> ax = -1/ax, r = pi/2
-	xinv.f = xinv.f + ax.f;
-	a = (ax.f > 1.0f);
-	ax.f = ax.f - a * xinv.f;
-	r = a * __atanf_pi_2;
-	
-	//polynomial evaluation
-	xx = ax.f * ax.f;	
-	a = (__atanf_lut[0] * ax.f) * xx + (__atanf_lut[2] * ax.f);
-	b = (__atanf_lut[1] * ax.f) * xx + (__atanf_lut[3] * ax.f);
-	xx = xx * xx;
-	b = b + a * xx; 
-	r = r + b;
-
-	//if x < 0 -> r = -r
-	a = 2 * r;
-	b = (x < 0.0f);
-	r = r - a * b;
-
-	return r;
-}
-
-
-float atanf_neon_hfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-
-	"vdup.f32	 	d0, d0[0]				\n\t"	//d0 = {x, x};
-
-	"vdup.f32	 	d4, %1					\n\t"	//d4 = {pi/2, pi/2};
-	"vmov.f32	 	d6, d0					\n\t"	//d6 = d0;
-	"vabs.f32	 	d0, d0					\n\t"	//d0 = fabs(d0) ;
-
-	//fast reciporical approximation
-	"vrecpe.f32		d1, d0					\n\t"	//d1 = ~ 1 / d0; 
-	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
-	"vmul.f32		d1, d1, d2				\n\t"	//d1 = d1 * d2; 
-	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
-	"vmul.f32		d1, d1, d2				\n\t"	//d1 = d1 * d2; 
-
-		
-	//if |x| > 1.0 -> ax = -1/ax, r = pi/2
-	"vadd.f32		d1, d1, d0				\n\t"	//d1 = d1 + d0; 
-	"vmov.f32	 	d2, #1.0				\n\t"	//d2 = 1.0;
-	"vcgt.f32	 	d3, d0, d2				\n\t"	//d3 = (d0 > d2);
-	"vshr.u32	 	d3, #31					\n\t"	//d3 = (d0 > d2);
-	"vcvt.f32.u32	d3, d3					\n\t"	//d5 = (float) d3;	
-	"vmls.f32		d0, d1, d3[0]			\n\t"	//d0 = d0 - d1 * d3[0]; 	
-	"vmul.f32		d7, d4, d3[0] 			\n\t"	//d7 = d5 * d4; 	
-	
-	//polynomial:
-	"vmul.f32 		d2, d0, d0				\n\t"	//d2 = d0*d0 = {ax^2, ax^2}	
-	"vld1.32 		{d4, d5}, [%0]			\n\t"	//d4 = {p7, p3}, d5 = {p5, p1}
-	"vmul.f32 		d3, d2, d2				\n\t"	//d3 = d2*d2 = {x^4, x^4}		
-	"vmul.f32 		q0, q2, d0[0]			\n\t"	//q0 = q2 * d0[0] = {p7x, p3x, p5x, p1x}
-	"vmla.f32 		d1, d0, d2[0]			\n\t"	//d1 = d1 + d0*d2[0] = {p5x + p7x^3, p1x + p3x^3}		
-	"vmla.f32 		d1, d3, d1[0]			\n\t"	//d1 = d1 + d3*d1[0] = {..., p1x + p3x^3 + p5x^5 + p7x^7}		
-	"vadd.f32 		d1, d1, d7				\n\t"	//d1 = d1 + d7		
-
-	"vadd.f32 		d2, d1, d1				\n\t"	//d2 = d1 + d1		
-	"vclt.f32	 	d3, d6, #0				\n\t"	//d3 = (d6 < 0)	
-	"vshr.u32	 	d3, #31					\n\t"	//d3 = (d0 > d2);
-	"vcvt.f32.u32	d3, d3					\n\t"	//d3 = (float) d3	
-	"vmls.f32 		d1, d3, d2				\n\t"	//d1 = d1 - d2 * d3;
-
-	"vmov.f32 		s0, s3					\n\t"	//s0 = s3
-
-	:: "r"(__atanf_lut),  "r"(__atanf_pi_2) 
-    : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"
-	);
-
-#endif
-}
-
-
-float atanf_neon_sfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vdup.f32 d0, r0 		\n\t");
-	atanf_neon_hfp(x);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return atanf_c(x);
-#endif
-};
-
-
-
diff --git a/deps/math-neon/source/math_ceilf.c b/deps/math-neon/source/math_ceilf.c
deleted file mode 100644
index 1432efee73..0000000000
--- a/deps/math-neon/source/math_ceilf.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/*
-Assumes the floating point value |x| < 2147483648
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-float ceilf_c(float x)
-{
-	int n;
-	float r;	
-	n = (int) x;
-	r = (float) n;
-	r = r + (x > r);
-	return r;
-}
-
-float ceilf_neon_hfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-
-	"vcvt.s32.f32 	d1, d0					\n\t"	//d1 = (int) d0;
-	"vcvt.f32.s32 	d1, d1					\n\t"	//d1 = (float) d1;
-	"vcgt.f32 		d0, d0, d1				\n\t"	//d0 = (d0 > d1);
-	"vshr.u32 		d0, #31					\n\t"	//d0 = d0 >> 31;
-	"vcvt.f32.u32 	d0, d0					\n\t"	//d0 = (float) d0;
-	"vadd.f32 		d0, d1, d0				\n\t"	//d0 = d1 + d0;
-
-	::: "d0", "d1"
-	);
-		
-#endif
-}
-
-float ceilf_neon_sfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vmov.f32 s0, r0 		\n\t");
-	ceilf_neon_hfp(x);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return ceilf_c(x);
-#endif
-};
-
-
diff --git a/deps/math-neon/source/math_cosf.c b/deps/math-neon/source/math_cosf.c
deleted file mode 100644
index cb14498069..0000000000
--- a/deps/math-neon/source/math_cosf.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include "math_neon.h"
-
-float cosf_c(float x)
-{
-	return sinf_c(x + M_PI_2);
-}
-
-float cosf_neon_hfp(float x)
-{
-#ifdef __MATH_NEON
-	float xx = x + M_PI_2;
-	return sinf_neon_hfp(xx);
-#endif
-}
-
-float cosf_neon_sfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vdup.f32 d0, r0 		\n\t");
-	cosf_neon_hfp(x);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return cosf_c(x);
-#endif
-};
-
diff --git a/deps/math-neon/source/math_coshf.c b/deps/math-neon/source/math_coshf.c
deleted file mode 100644
index a779b6a7be..0000000000
--- a/deps/math-neon/source/math_coshf.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-const float __coshf_rng[2] = {
-	1.442695041f,
-	0.693147180f
-};
-
-const float __coshf_lut[16] = {
-	0.00019578093328483123,	//p7
-	0.00019578093328483123,	//p7
-	0.0014122663401803872, 	//p6
-	0.0014122663401803872, 	//p6
-	0.008336936973260111, 	//p5
-	0.008336936973260111, 	//p5
-	0.04165989275009526, 	//p4
-	0.04165989275009526, 	//p4
-	0.16666570253074878, 	//p3
-	0.16666570253074878, 	//p3
-	0.5000006143673624, 	//p2
-	0.5000006143673624, 	//p2
-	1.000000059694879, 		//p1
-	1.000000059694879, 		//p1
-	0.9999999916728642,		//p0
-	0.9999999916728642		//p0
-};
-
-  
-float coshf_c(float x)
-{
-	float a, b, xx;
-	xx = -x;
-	a = expf_c(x);
-	b = expf_c(xx);
-	a = a * 0.5f;
-	a = a + 0.5f * b;
-	return a;
-}
-
-
-float coshf_neon_hfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"vdup.f32 		d0, d0[0]				\n\t"	//d0 = {x, x}	
-	"fnegs 			s1, s1					\n\t"	//s1 = -s1
-	
-	//Range Reduction:
-	"vld1.32 		d2, [%0]				\n\t"	//d2 = {invrange, range}
-	"vld1.32 		{d16, d17}, [%1]!		\n\t"	
-	"vmul.f32 		d6, d0, d2[0]			\n\t"	//d6 = d0 * d2[0] 
-	"vcvt.s32.f32 	d6, d6					\n\t"	//d6 = (int) d6
-	"vld1.32 		{d18}, [%1]!			\n\t"	
-	"vcvt.f32.s32 	d1, d6					\n\t"	//d1 = (float) d6
-	"vld1.32 		{d19}, [%1]!			\n\t"	
-	"vmls.f32 		d0, d1, d2[1]			\n\t"	//d0 = d0 - d1 * d2[1]
-	"vld1.32 		{d20}, [%1]!			\n\t"	
-		
-	//polynomial:
-	"vmla.f32 		d17, d16, d0			\n\t"	//d17 = d17 + d16 * d0;	
-	"vld1.32 		{d21}, [%1]!			\n\t"	
-	"vmla.f32 		d18, d17, d0			\n\t"	//d18 = d18 + d17 * d0;	
-	"vld1.32 		{d22}, [%1]!			\n\t"	
-	"vmla.f32 		d19, d18, d0			\n\t"	//d19 = d19 + d18 * d0;	
-	"vld1.32 		{d23}, [%1]!			\n\t"	
-	"vmla.f32 		d20, d19, d0			\n\t"	//d20 = d20 + d19 * d0;	
-	"vmla.f32 		d21, d20, d0			\n\t"	//d21 = d21 + d20 * d0;	
-	"vmla.f32 		d22, d21, d0			\n\t"	//d22 = d22 + d21 * d0;	
-	"vmla.f32 		d23, d22, d0			\n\t"	//d23 = d23 + d22 * d0;	
-	
-	//multiply by 2 ^ m 	
-	"vshl.i32 		d6, d6, #23				\n\t"	//d6 = d6 << 23		
-	"vadd.i32 		d0, d23, d6				\n\t"	//d0 = d22 + d6		
-
-	"vdup.f32 		d2, d0[1]				\n\t"	//d2 = s1		
-	"vmov.f32 		d1, #0.5				\n\t"	//d1 = 0.5		
-	"vadd.f32 		d0, d0, d2				\n\t"	//d0 = d0 + d2		
-	"vmul.f32 		d0, d1					\n\t"	//d0 = d0 * d1		
-
-	:: "r"(__coshf_rng), "r"(__coshf_lut) 
-    : "d0", "d1", "q1", "q2", "d6"
-	);
-		
-#endif
-}
-
-float coshf_neon_sfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vmov.f32 s0, r0 		\n\t");
-	coshf_neon_hfp(x);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return coshf_c(x);
-#endif
-};
diff --git a/deps/math-neon/source/math_expf.c b/deps/math-neon/source/math_expf.c
deleted file mode 100644
index 011b9495bd..0000000000
--- a/deps/math-neon/source/math_expf.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/*
-Based on: 
-
-		e ^ x = (1+m) * (2^n)
-		x = log(1+m) + n * log(2)
-		n = (int) (x * 1.0 / log(2))
-		(1+m) = e ^ (x - n * log(2))
-		(1+m) = Poly(x - n * log(2))
-		
-		where Poly(x) is the Minimax approximation of e ^ x over the 
-		range [-Log(2), Log(2)]
-
-Test func : expf(x)
-Test Range: 0 < x < 50
-Peak Error:	~0.00024%
-RMS  Error: ~0.00007%
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-const float __expf_rng[2] = {
-	1.442695041f,
-	0.693147180f
-};
-
-const float __expf_lut[8] = {
-	0.9999999916728642,		//p0
-	0.04165989275009526, 	//p4
-	0.5000006143673624, 	//p2
-	0.0014122663401803872, 	//p6
-	1.000000059694879, 		//p1
-	0.008336936973260111, 	//p5
-	0.16666570253074878, 	//p3
-	0.00019578093328483123	//p7
-};
-
-float expf_c(float x)
-{
-	float a, b, c, d, xx;
-	int m;
-	
-	union {
-		float   f;
-		int 	i;
-	} r;
-		
-	//Range Reduction:
-	m = (int) (x * __expf_rng[0]);
-	x = x - ((float) m) * __expf_rng[1];	
-	
-	//Taylor Polynomial (Estrins)
-	a = (__expf_lut[4] * x) + (__expf_lut[0]);
-	b = (__expf_lut[6] * x) + (__expf_lut[2]);
-	c = (__expf_lut[5] * x) + (__expf_lut[1]);
-	d = (__expf_lut[7] * x) + (__expf_lut[3]);
-	xx = x * x;
-	a = a + b * xx; 
-	c = c + d * xx;
-	xx = xx* xx;
-	r.f = a + c * xx; 
-	
-	//multiply by 2 ^ m 
-	m = m << 23;
-	r.i = r.i + m;
-
-	return r.f;
-}
-
-float expf_neon_hfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"vdup.f32 		d0, d0[0]				\n\t"	//d0 = {x, x}
-	
-	//Range Reduction:
-	"vld1.32 		d2, [%0]				\n\t"	//d2 = {invrange, range}
-	"vmul.f32 		d6, d0, d2[0]			\n\t"	//d6 = d0 * d2[0] 
-	"vcvt.s32.f32 	d6, d6					\n\t"	//d6 = (int) d6
-	"vcvt.f32.s32 	d1, d6					\n\t"	//d1 = (float) d6
-	"vmls.f32 		d0, d1, d2[1]			\n\t"	//d0 = d0 - d1 * d2[1]
-		
-	//polynomial:
-	"vmul.f32 		d1, d0, d0				\n\t"	//d1 = d0*d0 = {x^2, x^2}	
-	"vld1.32 		{d2, d3, d4, d5}, [%1]	\n\t"	//q1 = {p0, p4, p2, p6}, q2 = {p1, p5, p3, p7} ;
-	"vmla.f32 		q1, q2, d0[0]			\n\t"	//q1 = q1 + q2 * d0[0]		
-	"vmla.f32 		d2, d3, d1[0]			\n\t"	//d2 = d2 + d3 * d1[0]		
-	"vmul.f32 		d1, d1, d1				\n\t"	//d1 = d1 * d1 = {x^4, x^4}	
-	"vmla.f32 		d2, d1, d2[1]			\n\t"	//d2 = d2 + d1 * d2[1]		
-
-	//multiply by 2 ^ m 	
-	"vshl.i32 		d6, d6, #23				\n\t"	//d6 = d6 << 23		
-	"vadd.i32 		d0, d2, d6				\n\t"	//d0 = d2 + d6		
-
-	:: "r"(__expf_rng), "r"(__expf_lut) 
-    : "d0", "d1", "q1", "q2", "d6"
-	);
-#endif
-}
-
-float expf_neon_sfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vmov.f32 s0, r0 		\n\t");
-	expf_neon_hfp(x);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return expf_c(x);
-#endif
-};
-
diff --git a/deps/math-neon/source/math_fabsf.c b/deps/math-neon/source/math_fabsf.c
deleted file mode 100644
index c22244704f..0000000000
--- a/deps/math-neon/source/math_fabsf.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include "math_neon.h"
-
-	
-float fabsf_c(float x)
-{
-	union {
-		int i;
-		float f;
-	} xx;
-
-	xx.f = x;
-	xx.i = xx.i & 0x7FFFFFFF;
-	return xx.f;
-}
-
-float fabsf_neon_hfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"fabss	 		s0, s0					\n\t"	//s0 = fabs(s0)
-	);
-#endif
-}
-
-float fabsf_neon_sfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"bic	 		r0, r0, #0x80000000		\n\t"	//r0 = r0 & ~(1 << 31)
-	);
-#else
-	return fabsf_c(x);
-#endif
-}
diff --git a/deps/math-neon/source/math_floorf.c b/deps/math-neon/source/math_floorf.c
deleted file mode 100644
index 091709140e..0000000000
--- a/deps/math-neon/source/math_floorf.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/*
-Assumes the floating point value |x| < 2147483648
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-float floorf_c(float x)
-{
-	int n;
-	float r;	
-	n = (int) x;
-	r = (float) n;
-	r = r - (r > x);
-	return r;
-}
-
-float floorf_neon_hfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"vcvt.s32.f32 	d1, d0					\n\t"	//d1 = (int) d0;
-	"vcvt.f32.s32 	d1, d1					\n\t"	//d1 = (float) d1;
-	"vcgt.f32 		d0, d1, d0				\n\t"	//d0 = (d1 > d0);
-	"vshr.u32 		d0, #31					\n\t"	//d0 = d0 >> 31;
-	"vcvt.f32.u32 	d0, d0					\n\t"	//d0 = (float) d0;
-	"vsub.f32 		d0, d1, d0				\n\t"	//d0 = d1 - d0;
-	::: "d0", "d1"
-	);
-#endif
-}
-
-float floorf_neon_sfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vmov.f32 s0, r0 		\n\t");
-	floorf_neon_hfp(x);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return floorf_c(x);
-#endif
-};
diff --git a/deps/math-neon/source/math_fmodf.c b/deps/math-neon/source/math_fmodf.c
deleted file mode 100644
index 86af55da34..0000000000
--- a/deps/math-neon/source/math_fmodf.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/*
-Assumes the floating point value |x / y| < 2,147,483,648
-*/
-
-#include "math_neon.h"
-
-float fmodf_c(float x, float y)
-{
-	int n;
-	union {
-		float f;
-		int   i;
-	} yinv;
-	float a;
-	
-	//fast reciporical approximation (4x Newton)
-	yinv.f = y;
-	n = 0x3F800000 - (yinv.i & 0x7F800000);
-	yinv.i = yinv.i + n;
-	yinv.f = 1.41176471f - 0.47058824f * yinv.f;
-	yinv.i = yinv.i + n;
-	a = 2.0 - yinv.f * y;
-	yinv.f = yinv.f * a;	
-	a = 2.0 - yinv.f * y;
-	yinv.f = yinv.f * a;
-	a = 2.0 - yinv.f * y;
-	yinv.f = yinv.f * a;
-	a = 2.0 - yinv.f * y;
-	yinv.f = yinv.f * a;
-	
-	n = (int)(x * yinv.f);
-	x = x - ((float)n) * y;
-	return x;
-}
-
-
-float fmodf_neon_hfp(float x, float y)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"vdup.f32 		d1, d0[1]					\n\t"	//d1[0] = y
-	"vdup.f32 		d0, d0[0]					\n\t"	//d1[0] = y
-	
-	//fast reciporical approximation
-	"vrecpe.f32 	d2, d1					\n\t"	//d2 = ~1.0 / d1
-	"vrecps.f32		d3, d2, d1				\n\t"	//d3 = 2.0 - d2 * d1; 
-	"vmul.f32		d2, d2, d3				\n\t"	//d2 = d2 * d3; 
-	"vrecps.f32		d3, d2, d1				\n\t"	//d3 = 2.0 - d2 * d1; 
-	"vmul.f32		d2, d2, d3				\n\t"	//d2 = d2 * d3; 
-	"vrecps.f32		d3, d2, d1				\n\t"	//d3 = 2.0 - d2 * d1; 
-	"vmul.f32		d2, d2, d3				\n\t"	//d2 = d2 * d3; 
-	"vrecps.f32		d3, d2, d1				\n\t"	//d3 = 2.0 - d2 * d1; 
-	"vmul.f32		d2, d2, d3				\n\t"	//d2 = d2 * d3; 
-
-	"vmul.f32		d2, d2, d0				\n\t"	//d2 = d2 * d0; 
-	"vcvt.s32.f32	d2, d2					\n\t"	//d2 = (int) d2; 
-	"vcvt.f32.s32	d2, d2					\n\t"	//d2 = (float) d2; 
-	"vmls.f32		d0, d1, d2				\n\t"	//d0 = d0 - d1 * d2; 
-
-	::: "d0", "d1", "d2", "d3"
-	);
-#endif
-}
-
-
-float fmodf_neon_sfp(float x, float y)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vmov.f32 s0, r0 		\n\t");
-	asm volatile ("vmov.f32 s1, r1 		\n\t");
-	fmodf_neon_hfp(x, y);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return fmodf_c(x,y);
-#endif
-};
diff --git a/deps/math-neon/source/math_invsqrtf.c b/deps/math-neon/source/math_invsqrtf.c
deleted file mode 100644
index c4d2b1d52a..0000000000
--- a/deps/math-neon/source/math_invsqrtf.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-float invsqrtf_c(float x)
-{
-
-	float b, c;
-	union {
-		float 	f;
-		int 	i;
-	} a;
-	
-	//fast invsqrt approx
-	a.f = x;
-	a.i = 0x5F3759DF - (a.i >> 1);		//VRSQRTE
-	c = x * a.f;
-	b = (3.0f - c * a.f) * 0.5;		//VRSQRTS
-	a.f = a.f * b;		
-	c = x * a.f;
-	b = (3.0f - c * a.f) * 0.5;
-    a.f = a.f * b;	
-
-	return a.f;
-}
-
-float invsqrtf_neon_hfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-		
-	"vmov.f32 		d1, d0					\n\t"	//d1 = d0
-	"vrsqrte.f32 	d0, d0					\n\t"	//d0 = ~ 1.0 / sqrt(d0)
-	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1
-	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d3 = (3 - d0 * d2) / 2 	
-	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d3
-	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1	
-	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d4 = (3 - d0 * d3) / 2	
-	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d4	
-		
-	::: "d0", "d1", "d2", "d3"
-	);
-#endif
-}
-
-float invsqrtf_neon_sfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vmov.f32 s0, r0 		\n\t");
-	invsqrtf_neon_hfp(x);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return invsqrtf_c(x);
-#endif
-};
-
diff --git a/deps/math-neon/source/math_ldexpf.c b/deps/math-neon/source/math_ldexpf.c
deleted file mode 100644
index 673158958f..0000000000
--- a/deps/math-neon/source/math_ldexpf.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-float ldexpf_c(float m, int e)
-{
-	union {
-		float 	f;
-		int 	i;
-	} r;
-	r.f = m;
-	r.i += (e << 23);
-	return r.f;
-}
-
-float ldexpf_neon_hfp(float m, int e)
-{
-#ifdef __MATH_NEON
-	float r;
-	asm volatile (
-	"lsl 			r0, r0, #23				\n\t"	//r0 = r0 << 23	
-	"vdup.i32 		d1, r0					\n\t"	//d1 = {r0, r0}
-	"vadd.i32 		d0, d0, d1				\n\t"	//d0 = d0 + d1
-	::: "d0", "d1"
-	);
-#endif
-}
-
-float ldexpf_neon_sfp(float m, int e)
-{
-#ifdef __MATH_NEON
-	float r;
-	asm volatile (
-	"lsl 			r1, r1, #23				\n\t"	//r1 = r1 << 23	
-	"vdup.f32 		d0, r0					\n\t"	//d0 = {r0, r0}	
-	"vdup.i32 		d1, r1					\n\t"	//d1 = {r1, r1}
-	"vadd.i32 		d0, d0, d1				\n\t"	//d0 = d0 + d1
-	"vmov.f32 		r0, s0					\n\t"	//r0 = s0
-	::: "d0", "d1"
-	);
-#else
-	return ldexpf_c(m,e);
-#endif
-}
diff --git a/deps/math-neon/source/math_log10f.c b/deps/math-neon/source/math_log10f.c
deleted file mode 100644
index f68912f0fe..0000000000
--- a/deps/math-neon/source/math_log10f.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/*
-Based on: 
-
-		log10(x) = log10((1+m) * (2^n))
-		log(x) = n * log10(2) + log10(1 + m)
-		log(1+m) = Poly(1+m)
-		
-		where Poly(x) is the Minimax approximation of log10(x) over the 
-		range [1, 2]
-
-Test func : log10f(x)
-Test Range: 1 < x < 10000
-Peak Error:	~0.000040%
-RMS  Error: ~0.000008%
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-const float __log10f_rng =  0.3010299957f;
-
-const float __log10f_lut[8] = {
-	-0.99697286229624, 		//p0
-	-1.07301643912502, 		//p4
-	-2.46980061535534, 		//p2
-	-0.07176870463131, 		//p6
-	2.247870219989470, 		//p1
-	0.366547581117400, 		//p5
-	1.991005185100089, 		//p3
-	0.006135635201050,		//p7
-};
-
-float log10f_c(float x)
-{
-	float a, b, c, d, xx;
-	int m;
-	
-	union {
-		float   f;
-		int 	i;
-	} r;
-	
-	//extract exponent
-	r.f = x;
-	m = (r.i >> 23);
-	m = m - 127;
-	r.i = r.i - (m << 23);
-		
-	//Taylor Polynomial (Estrins)
-	xx = r.f * r.f;
-	a = (__log10f_lut[4] * r.f) + (__log10f_lut[0]);
-	b = (__log10f_lut[6] * r.f) + (__log10f_lut[2]);
-	c = (__log10f_lut[5] * r.f) + (__log10f_lut[1]);
-	d = (__log10f_lut[7] * r.f) + (__log10f_lut[3]);
-	a = a + b * xx;
-	c = c + d * xx;
-	xx = xx * xx;
-	r.f = a + c * xx;
-
-	//add exponent
-	r.f = r.f + ((float) m) * __log10f_rng;
-
-	return r.f;
-}
-
-float log10f_neon_hfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	
-	"vdup.f32		d0, d0[0]				\n\t"	//d0 = {x,x};
-	
-	//extract exponent
-	"vmov.i32		d2, #127				\n\t"	//d2 = 127;
-	"vshr.u32		d6, d0, #23				\n\t"	//d6 = d0 >> 23;
-	"vsub.i32		d6, d6, d2				\n\t"	//d6 = d6 - d2;
-	"vshl.u32		d1, d6, #23				\n\t"	//d1 = d6 << 23;
-	"vsub.i32		d0, d0, d1				\n\t"	//d0 = d0 + d1;
-
-	//polynomial:
-	"vmul.f32 		d1, d0, d0				\n\t"	//d1 = d0*d0 = {x^2, x^2}	
-	"vld1.32 		{d2, d3, d4, d5}, [%1]	\n\t"	//q1 = {p0, p4, p2, p6}, q2 = {p1, p5, p3, p7} ;
-	"vmla.f32 		q1, q2, d0[0]			\n\t"	//q1 = q1 + q2 * d0[0]		
-	"vmla.f32 		d2, d3, d1[0]			\n\t"	//d2 = d2 + d3 * d1[0]		
-	"vmul.f32 		d1, d1, d1				\n\t"	//d1 = d1 * d1 = {x^4, x^4}	
-	"vmla.f32 		d2, d1, d2[1]			\n\t"	//d2 = d2 + d1 * d2[1]		
-
-	//add exponent 	
-	"vdup.32 		d7, %0					\n\t"	//d7 = {rng, rng}
-	"vcvt.f32.s32 	d6, d6					\n\t"	//d6 = (float) d6
-	"vmla.f32 		d2, d6, d7				\n\t"	//d2 = d2 + d6 * d7		
-
-	"vmov.f32 		s0, s4					\n\t"	//s0 = s4
-
-	:: "r"(__log10f_rng), "r"(__log10f_lut) 
-    : "d0", "d1", "q1", "q2", "d6", "d7"
-	);
-#endif
-}
-
-
-float log10f_neon_sfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vmov.f32 s0, r0 		\n\t");
-	log10f_neon_hfp(x);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return log10f_c(x);
-#endif
-};
diff --git a/deps/math-neon/source/math_logf.c b/deps/math-neon/source/math_logf.c
deleted file mode 100644
index 61761363e5..0000000000
--- a/deps/math-neon/source/math_logf.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/*
-Based on: 
-
-		log(x) = log((1+m) * (2^n))
-		log(x) = n * log(2) + log(1 + m)
-		log(1+m) = Poly(1+m)
-		
-		where Poly(x) is the Minimax approximation of log(x) over the 
-		range [1, 2]
-
-Test func : logf(x)
-Test Range: 1 < x < 10000
-Peak Error:	~0.000601%
-RMS  Error: ~0.000005%
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-const float __logf_rng =  0.693147180f;
-
-const float __logf_lut[8] = {
-	-2.295614848256274, 	//p0
-	-2.470711633419806, 	//p4
-	-5.686926051100417, 	//p2
-	-0.165253547131978, 	//p6
-	+5.175912446351073, 	//p1
-	+0.844006986174912, 	//p5
-	+4.584458825456749, 	//p3
-	+0.014127821926000		//p7
-};
-
-float logf_c(float x)
-{
-	float a, b, c, d, xx;
-	int m;
-	
-	union {
-		float   f;
-		int 	i;
-	} r;
-	
-	//extract exponent
-	r.f = x;
-	m = (r.i >> 23);
-	m = m - 127;
-	r.i = r.i - (m << 23);
-		
-	//Taylor Polynomial (Estrins)
-	xx = r.f * r.f;
-	a = (__logf_lut[4] * r.f) + (__logf_lut[0]);
-	b = (__logf_lut[6] * r.f) + (__logf_lut[2]);
-	c = (__logf_lut[5] * r.f) + (__logf_lut[1]);
-	d = (__logf_lut[7] * r.f) + (__logf_lut[3]);
-	a = a + b * xx;
-	c = c + d * xx;
-	xx = xx * xx;
-	r.f = a + c * xx;
-
-	//add exponent
-	r.f = r.f + ((float) m) * __logf_rng;
-
-	return r.f;
-}
-
-float logf_neon_hfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	
-	"vdup.f32		d0, d0[0]				\n\t"	//d0 = {x,x};
-	
-	//extract exponent
-	"vmov.i32		d2, #127				\n\t"	//d2 = 127;
-	"vshr.u32		d6, d0, #23				\n\t"	//d6 = d0 >> 23;
-	"vsub.i32		d6, d6, d2				\n\t"	//d6 = d6 - d2;
-	"vshl.u32		d1, d6, #23				\n\t"	//d1 = d6 << 23;
-	"vsub.i32		d0, d0, d1				\n\t"	//d0 = d0 + d1;
-
-	//polynomial:
-	"vmul.f32 		d1, d0, d0				\n\t"	//d1 = d0*d0 = {x^2, x^2}	
-	"vld1.32 		{d2, d3, d4, d5}, [%1]	\n\t"	//q1 = {p0, p4, p2, p6}, q2 = {p1, p5, p3, p7} ;
-	"vmla.f32 		q1, q2, d0[0]			\n\t"	//q1 = q1 + q2 * d0[0]		
-	"vmla.f32 		d2, d3, d1[0]			\n\t"	//d2 = d2 + d3 * d1[0]		
-	"vmul.f32 		d1, d1, d1				\n\t"	//d1 = d1 * d1 = {x^4, x^4}	
-	"vmla.f32 		d2, d1, d2[1]			\n\t"	//d2 = d2 + d1 * d2[1]		
-
-	//add exponent 	
-	"vdup.32 		d7, %0					\n\t"	//d7 = {rng, rng}
-	"vcvt.f32.s32 	d6, d6					\n\t"	//d6 = (float) d6
-	"vmla.f32 		d2, d6, d7				\n\t"	//d2 = d2 + d6 * d7		
-
-	"vmov.f32 		s0, s4					\n\t"	//s0 = s4
-
-	:: "r"(__logf_rng), "r"(__logf_lut) 
-    : "d0", "d1", "q1", "q2", "d6", "d7"
-	);
-#endif
-}
-
-float logf_neon_sfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vmov.f32 s0, r0 		\n\t");
-	logf_neon_hfp(x);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return logf_c(x);
-#endif
-};
-
diff --git a/deps/math-neon/source/math_mat2.c b/deps/math-neon/source/math_mat2.c
deleted file mode 100644
index 0baad4b771..0000000000
--- a/deps/math-neon/source/math_mat2.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/*
-Matrices are specified in column major format:
-
-| a c |
-| b d |
-
-therefore m[2] = c
-*/
-
-#include "math_neon.h"
-
-//matrix matrix multipication. d = m0 * m1;
-void
-matmul2_c(float m0[4], float m1[4], float d[4])
-{	
-	d[0] = m0[0]*m1[0] + m0[2]*m1[1];	
-	d[1] = m0[1]*m1[0] + m0[3]*m1[1];
-	d[2] = m0[0]*m1[2] + m0[2]*m1[3];
-	d[3] = m0[1]*m1[2] + m0[3]*m1[3];
-}
-
-void
-matmul2_neon(float m0[4], float m1[4], float d[4])
-{	
-#ifdef __MATH_NEON
-	asm volatile (
-	"vld1.32 		{d0, d1}, [%0]			\n\t"	//Q1 = m0
-	"vld1.32 		{d2, d3}, [%1]			\n\t"	//Q2 = m1
-	
-	"vmul.f32 		d4, d0, d2[0]			\n\t"	//D4 = D0*D2[0]
-	"vmul.f32 		d5, d0, d3[0]			\n\t"	//D5 = D0*D3[0]
-	"vmla.f32 		d4, d1, d2[1]			\n\t"	//D4 += D1*D2[1]
-	"vmla.f32 		d5, d1, d3[1]			\n\t"	//D5 += D1*D3[1]
-	
-	"vst1.32 		{d4, d5}, [%2] 			\n\t"	//Q4 = m+12	
-	:: "r"(m0), "r"(m1), "r"(d) 
-    : "q0", "q1", "q2", "memory"
-	);	
-#else
-	matmul2_c(m0, m1, d);
-#endif
-}
-
-
-//matrix vector multiplication. d = m * v
-void
-matvec2_c(float m[4], float v[2], float d[2])
-{
-	d[0] = m[0]*v[0] + m[2]*v[1];
-	d[1] = m[1]*v[0] + m[3]*v[1];
-}
-
-void
-matvec2_neon(float m[4], float v[2], float d[2])
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"vld1.32        d0, [%1]				\n\t"	//d0 = v
-	"vld1.32 		{d1, d2}, [%0]			\n\t"	//Q1 = m
-	
-	"vmul.f32 		d3, d1, d0[0]			\n\t"	//Q5 = Q1*d0[0]
-	"vmla.f32 		d3, d2, d0[1]			\n\t"	//Q5 += Q1*d0[1] 
-	
-	"vst1.32 		d3, [%2] 				\n\t"	//Q4 = m+12	
-	:: "r"(m), "r"(v), "r"(d) 
-    : "d0", "d1", "d2","d3", "memory"
-	);	
-#else
-	matvec2_c(m, v, d);
-#endif
-}
diff --git a/deps/math-neon/source/math_mat3.c b/deps/math-neon/source/math_mat3.c
deleted file mode 100644
index aae178e179..0000000000
--- a/deps/math-neon/source/math_mat3.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/*
-Matrices are specified in row major format:
-
-| x0 x2 |
-| x1 x3 |
-
-therefore m[2] = x2
-
-*/
-
-#include "math_neon.h"
-
-//matrix matrix multipication. d = m0 * m1;
-void
-matmul3_c(float m0[9], float m1[9], float d[9])
-{
-	d[0] = m0[0]*m1[0] + m0[3]*m1[1] + m0[6]*m1[2];
-	d[1] = m0[1]*m1[0] + m0[4]*m1[1] + m0[7]*m1[2];
-	d[2] = m0[2]*m1[0] + m0[5]*m1[1] + m0[8]*m1[2];
-	d[3] = m0[0]*m1[3] + m0[3]*m1[4] + m0[6]*m1[5];
-	d[4] = m0[1]*m1[3] + m0[4]*m1[4] + m0[7]*m1[5];
-	d[5] = m0[2]*m1[3] + m0[5]*m1[4] + m0[8]*m1[5];
-	d[6] = m0[0]*m1[6] + m0[3]*m1[7] + m0[6]*m1[8];
-	d[7] = m0[1]*m1[6] + m0[4]*m1[7] + m0[7]*m1[8];
-	d[8] = m0[2]*m1[6] + m0[5]*m1[7] + m0[8]*m1[8];
-}
-
-void 
-matmul3_neon(float m0[9], float m1[9], float d[9])
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"vld1.32 		{d0, d1}, [%1]!			\n\t"	//q0 = m1
-	"vld1.32 		{d2, d3}, [%1]!			\n\t"	//q1 = m1+4
-	"flds 			s8, [%1]				\n\t"	//q2 = m1+8
-	
-	"vld1.32 		{d6, d7}, [%0]			\n\t"	//q3[0] = m0
-	"add 			%0, %0, #12				\n\t"	//q3[0] = m0
-	"vld1.32 		{d8, d9}, [%0]			\n\t"	//q4[0] = m0+12
-	"add 			%0, %0, #12				\n\t"	//q3[0] = m0
-	"vld1.32 		{d10}, [%0]				\n\t"	//q5[0] = m0+24
-	"add 			%0, %0, #8				\n\t"	//q3[0] = m0
-	"flds 			s22, [%0]				\n\t"	//q2 = m1+8
-	
-	"vmul.f32 		q6, q3, d0[0] 			\n\t"	//q12 = q3 * d0[0]
-	"vmul.f32 		q7, q3, d1[1] 			\n\t"	//q13 = q3 * d2[0]
-	"vmul.f32 		q8, q3, d3[0] 			\n\t"	//q14 = q3 * d4[0]
-	"vmla.f32 		q6, q4, d0[1] 			\n\t"	//q12 = q9 * d0[1]
-	"vmla.f32 		q7, q4, d2[0] 			\n\t"	//q13 = q9 * d2[1]
-	"vmla.f32 		q8, q4, d3[1] 			\n\t"	//q14 = q9 * d4[1]
-	"vmla.f32 		q6, q5, d1[0] 			\n\t"	//q12 = q10 * d0[0]
-	"vmla.f32 		q7, q5, d2[1] 			\n\t"	//q13 = q10 * d2[0]
-	"vmla.f32 		q8, q5, d4[0] 			\n\t"	//q14 = q10 * d4[0]
-
-	"vmov.f32 		q0, q8 					\n\t"	//q14 = q10 * d4[0]
-	"vst1.32 		{d12, d13}, [%2] 		\n\t"	//d = q12
-	"add 			%2, %2, #12				\n\t"	//q3[0] = m0
-	"vst1.32 		{d14, d15}, [%2] 		\n\t"	//d+4 = q13	
-	"add 			%2, %2, #12				\n\t"	//q3[0] = m0
-	"vst1.32 		{d0}, [%2] 				\n\t"	//d+8 = q14	
-	"add 			%2, %2, #8				\n\t"	//q3[0] = m0
-	"fsts 			s2, [%2] 				\n\t"	//d = q12	
-	
-	: "+r"(m0), "+r"(m1), "+r"(d): 
-    : "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "memory"
-	);	
-#else
-	matmul3_c(m0, m1, d);
-#endif
-};
-
-//matrix vector multiplication. d = m * v
-void
-matvec3_c(float m[9], float v[3], float d[3])
-{
-	d[0] = m[0]*v[0] + m[3]*v[1] + m[6]*v[2];
-	d[1] = m[1]*v[0] + m[4]*v[1] + m[7]*v[2];
-	d[2] = m[2]*v[0] + m[5]*v[1] + m[8]*v[2];
-}
-
-void
-matvec3_neon(float m[9], float v[3], float d[3])
-{
-#ifdef __MATH_NEON
-	int tmp;
-	asm volatile (
-	"mov 			%3, #12					\n\t"	//r3 = 12
-	"vld1.32 		{d0, d1}, [%1]			\n\t"	//Q0 = v
-	"vld1.32 		{d2, d3}, [%0], %3		\n\t"	//Q1 = m
-	"vld1.32 		{d4, d5}, [%0], %3		\n\t"	//Q2 = m+12
-	"vld1.32 		{d6, d7}, [%0], %3		\n\t"	//Q3 = m+24
-	
-	"vmul.f32 		q9, q1, d0[0]			\n\t"	//Q9 = Q1*Q0[0]
-	"vmla.f32 		q9, q2, d0[1]			\n\t"	//Q9 += Q2*Q0[1] 
-	"vmla.f32 		q9, q3, d1[0]			\n\t"	//Q9 += Q3*Q0[2] 
-	"vmov.f32 		q0, q9					\n\t"	//Q0 = q9
-	
-	"vst1.32 		d0, [%2]! 				\n\t"	//r2 = D24	
-	"fsts 			s2, [%2] 				\n\t"	//r2 = D25[0]	
-
-	: "+r"(m), "+r"(v), "+r"(d), "+r"(tmp):
-    : "q0", "q9", "q10","q11", "q12", "q13", "memory"
-	);	
-#else
-	matvec3_c(m, v, d);
-#endif
-}
diff --git a/deps/math-neon/source/math_mat4.c b/deps/math-neon/source/math_mat4.c
deleted file mode 100644
index 5bcf34b596..0000000000
--- a/deps/math-neon/source/math_mat4.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/*
-Matrices are specified in row major format:
-
-| x0 x2 |
-| x1 x3 |
-
-therefore m[2] = x2
-
-*/
-
-#include "math_neon.h"
-
-//matrix matrix multipication. d = m0 * m1;
-void
-matmul4_c(float m0[16], float m1[16], float d[16])
-{
-	d[0] = m0[0]*m1[0] + m0[4]*m1[1] + m0[8]*m1[2] + m0[12]*m1[3];
-	d[1] = m0[1]*m1[0] + m0[5]*m1[1] + m0[9]*m1[2] + m0[13]*m1[3];
-	d[2] = m0[2]*m1[0] + m0[6]*m1[1] + m0[10]*m1[2] + m0[14]*m1[3];
-	d[3] = m0[3]*m1[0] + m0[7]*m1[1] + m0[11]*m1[2] + m0[15]*m1[3];
-	d[4] = m0[0]*m1[4] + m0[4]*m1[5] + m0[8]*m1[6] + m0[12]*m1[7];
-	d[5] = m0[1]*m1[4] + m0[5]*m1[5] + m0[9]*m1[6] + m0[13]*m1[7];
-	d[6] = m0[2]*m1[4] + m0[6]*m1[5] + m0[10]*m1[6] + m0[14]*m1[7];
-	d[7] = m0[3]*m1[4] + m0[7]*m1[5] + m0[11]*m1[6] + m0[15]*m1[7];
-	d[8] = m0[0]*m1[8] + m0[4]*m1[9] + m0[8]*m1[10] + m0[12]*m1[11];
-	d[9] = m0[1]*m1[8] + m0[5]*m1[9] + m0[9]*m1[10] + m0[13]*m1[11];
-	d[10] = m0[2]*m1[8] + m0[6]*m1[9] + m0[10]*m1[10] + m0[14]*m1[11];
-	d[11] = m0[3]*m1[8] + m0[7]*m1[9] + m0[11]*m1[10] + m0[15]*m1[11];
-	d[12] = m0[0]*m1[12] + m0[4]*m1[13] + m0[8]*m1[14] + m0[12]*m1[15];
-	d[13] = m0[1]*m1[12] + m0[5]*m1[13] + m0[9]*m1[14] + m0[13]*m1[15];
-	d[14] = m0[2]*m1[12] + m0[6]*m1[13] + m0[10]*m1[14] + m0[14]*m1[15];
-	d[15] = m0[3]*m1[12] + m0[7]*m1[13] + m0[11]*m1[14] + m0[15]*m1[15];
-}
-
-void 
-matmul4_neon(float m0[16], float m1[16], float d[16])
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"vld1.32 		{d0, d1}, [%1]!			\n\t"	//q0 = m1
-	"vld1.32 		{d2, d3}, [%1]!			\n\t"	//q1 = m1+4
-	"vld1.32 		{d4, d5}, [%1]!			\n\t"	//q2 = m1+8
-	"vld1.32 		{d6, d7}, [%1]			\n\t"	//q3 = m1+12
-	"vld1.32 		{d16, d17}, [%0]!		\n\t"	//q8 = m0
-	"vld1.32 		{d18, d19}, [%0]!		\n\t"	//q9 = m0+4
-	"vld1.32 		{d20, d21}, [%0]!		\n\t"	//q10 = m0+8
-	"vld1.32 		{d22, d23}, [%0]		\n\t"	//q11 = m0+12
-
-	"vmul.f32 		q12, q8, d0[0] 			\n\t"	//q12 = q8 * d0[0]
-	"vmul.f32 		q13, q8, d2[0] 			\n\t"	//q13 = q8 * d2[0]
-	"vmul.f32 		q14, q8, d4[0] 			\n\t"	//q14 = q8 * d4[0]
-	"vmul.f32 		q15, q8, d6[0]	 		\n\t"	//q15 = q8 * d6[0]
-	"vmla.f32 		q12, q9, d0[1] 			\n\t"	//q12 = q9 * d0[1]
-	"vmla.f32 		q13, q9, d2[1] 			\n\t"	//q13 = q9 * d2[1]
-	"vmla.f32 		q14, q9, d4[1] 			\n\t"	//q14 = q9 * d4[1]
-	"vmla.f32 		q15, q9, d6[1] 			\n\t"	//q15 = q9 * d6[1]
-	"vmla.f32 		q12, q10, d1[0] 		\n\t"	//q12 = q10 * d0[0]
-	"vmla.f32 		q13, q10, d3[0] 		\n\t"	//q13 = q10 * d2[0]
-	"vmla.f32 		q14, q10, d5[0] 		\n\t"	//q14 = q10 * d4[0]
-	"vmla.f32 		q15, q10, d7[0] 		\n\t"	//q15 = q10 * d6[0]
-	"vmla.f32 		q12, q11, d1[1] 		\n\t"	//q12 = q11 * d0[1]
-	"vmla.f32 		q13, q11, d3[1] 		\n\t"	//q13 = q11 * d2[1]
-	"vmla.f32 		q14, q11, d5[1] 		\n\t"	//q14 = q11 * d4[1]
-	"vmla.f32 		q15, q11, d7[1]	 		\n\t"	//q15 = q11 * d6[1]
-
-	"vst1.32 		{d24, d25}, [%2]! 		\n\t"	//d = q12	
-	"vst1.32 		{d26, d27}, [%2]!		\n\t"	//d+4 = q13	
-	"vst1.32 		{d28, d29}, [%2]! 		\n\t"	//d+8 = q14	
-	"vst1.32 		{d30, d31}, [%2]	 	\n\t"	//d+12 = q15	
-
-	: "+r"(m0), "+r"(m1), "+r"(d) : 
-    : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
-	"memory"
-	);	
-#else
-	matmul4_c(m0, m1, d);
-#endif
-}
-
-
-//matrix vector multiplication. d = m * v
-void
-matvec4_c(float m[16], float v[4], float d[4])
-{
-	d[0] = m[0]*v[0] + m[4]*v[1] + m[8]*v[2] + m[12]*v[3];
-	d[1] = m[1]*v[0] + m[5]*v[1] + m[9]*v[2] + m[13]*v[3];
-	d[2] = m[2]*v[0] + m[6]*v[1] + m[10]*v[2] + m[14]*v[3];
-	d[3] = m[3]*v[0] + m[7]*v[1] + m[11]*v[2] + m[15]*v[3];
-}
-
-void
-matvec4_neon(float m[16], float v[4], float d[4])
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"vld1.32 		{d0, d1}, [%1]			\n\t"	//Q0 = v
-	"vld1.32 		{d18, d19}, [%0]!		\n\t"	//Q1 = m
-	"vld1.32 		{d20, d21}, [%0]!		\n\t"	//Q2 = m+4
-	"vld1.32 		{d22, d23}, [%0]!		\n\t"	//Q3 = m+8
-	"vld1.32 		{d24, d25}, [%0]!		\n\t"	//Q4 = m+12	
-	
-	"vmul.f32 		q13, q9, d0[0]			\n\t"	//Q5 = Q1*Q0[0]
-	"vmla.f32 		q13, q10, d0[1]			\n\t"	//Q5 += Q1*Q0[1] 
-	"vmla.f32 		q13, q11, d1[0]			\n\t"	//Q5 += Q2*Q0[2] 
-	"vmla.f32 		q13, q12, d1[1]			\n\t"	//Q5 += Q3*Q0[3]
-	
-	"vst1.32 		{d26, d27}, [%2] 		\n\t"	//Q4 = m+12	
-	: 
-	: "r"(m), "r"(v), "r"(d) 
-    : "q0", "q9", "q10","q11", "q12", "q13", "memory"
-	);	
-#else
-	matvec4_c(m, v, d);
-#endif
-}
-
-
-
-
-
diff --git a/deps/math-neon/source/math_modf.c b/deps/math-neon/source/math_modf.c
deleted file mode 100644
index f3259710af..0000000000
--- a/deps/math-neon/source/math_modf.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/*
-Assumes the floating point value |x| < 2,147,483,648
-*/
-
-#include "math_neon.h"
-
-float modf_c(float x, int *i)
-{
-	int n;
-	n = (int)x;
-	*i = n;
-	x = x - (float)n;
-	return x;
-}
-
-
-float modf_neon_hfp(float x, int *i)
-{
-#ifdef __MATH_NEON
-	asm volatile (	
-	"vcvt.s32.f32	d1, d0					\n\t"	//d1 = (int) d0; 
-	"vcvt.f32.s32	d2, d1					\n\t"	//d2 = (float) d1;
-	"vsub.f32		d0, d0, d2				\n\t"	//d0 = d0 - d2; 
-	"vstr.i32		s2, [r0]				\n\t"	//[r0] = d1[0] 
-	::: "d0", "d1", "d2"
-	);		
-#endif
-}
-
-
-float modf_neon_sfp(float x, int *i)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"vdup.f32 		d0, r0					\n\t"	//d0 = {x, x}	
-	"vcvt.s32.f32	d1, d0					\n\t"	//d1 = (int) d0; 
-	"vcvt.f32.s32	d2, d1					\n\t"	//d2 = (float) d1;
-	"vsub.f32		d0, d0, d2				\n\t"	//d0 = d0 - d2; 
-	"vstr.i32		s2, [r1]				\n\t"	//[r0] = d1[0] 
-	"vmov.f32 		r0, s0					\n\t"	//r0 = d0[0];
-	::: "d0", "d1", "d2"
-	);
-		
-#else
-	return modf_c(x, i);
-#endif
-}
diff --git a/deps/math-neon/source/math_neon.h b/deps/math-neon/source/math_neon.h
deleted file mode 100644
index 66635808d0..0000000000
--- a/deps/math-neon/source/math_neon.h
+++ /dev/null
@@ -1,435 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef __MATH_NEON_H__ 
-#define __MATH_NEON_H__ 
-
-#if !defined(__i386__) && defined(__arm__)
-//if defined neon ASM routines are used, otherwise all calls to *_neon 
-//functions are rerouted to their equivalent *_c function.
-#define __MATH_NEON			
-
-//Default Floating Point value ABI: 0=softfp, 1=hardfp. Only effects *_neon routines.
-//You can access the hardfp versions directly via the *_hard suffix. 
-//You can access the softfp versions directly via the *_soft suffix. 
-#define __MATH_FPABI 	0	
-
-#endif
-
-#ifdef GCC
-#define ALIGN(A) __attribute__ ((aligned (A))
-#else
-#define ALIGN(A)
-#endif
-
-#ifndef _MATH_H
-#define M_PI		3.14159265358979323846	/* pi */
-#define M_PI_2		1.57079632679489661923	/* pi/2 */
-#define M_PI_4		0.78539816339744830962	/* pi/4 */
-#define M_E			2.7182818284590452354	/* e */
-#define M_LOG2E		1.4426950408889634074	/* log_2 e */
-#define M_LOG10E	0.43429448190325182765	/* log_10 e */
-#define M_LN2		0.69314718055994530942	/* log_e 2 */
-#define M_LN10		2.30258509299404568402	/* log_e 10 */
-#define M_1_PI		0.31830988618379067154	/* 1/pi */
-#define M_2_PI		0.63661977236758134308	/* 2/pi */
-#define M_2_SQRTPI	1.12837916709551257390	/* 2/sqrt(pi) */
-#define M_SQRT2		1.41421356237309504880	/* sqrt(2) */
-#define M_SQRT1_2	0.70710678118654752440	/* 1/sqrt(2) */
-#endif 
-
-#if __MATH_FPABI == 1
-#define sinf_neon		sinf_neon_hfp
-#define cosf_neon		cosf_neon_hfp
-#define	sincosf_neon	sincosf_neon_hfp
-#define tanf_neon		tanf_neon_hfp
-#define atanf_neon		atanf_neon_hfp
-#define atan2f_neon		atan2f_neon_hfp
-#define asinf_neon		asinf_neon_hfp
-#define acosf_neon		acosf_neon_hfp
-#define sinhf_neon		sinhf_neon_hfp
-#define coshf_neon		coshf_neon_hfp
-#define tanhf_neon		tanhf_neon_hfp
-#define expf_neon		expf_neon_hfp
-#define logf_neon		logf_neon_hfp
-#define log10f_neon		log10f_neon_hfp
-#define powf_neon		powf_neon_hfp
-#define floorf_neon		floorf_neon_hfp
-#define ceilf_neon		ceilf_neon_hfp
-#define fabsf_neon		fabsf_neon_hfp
-#define ldexpf_neon		ldexpf_neon_hfp
-#define frexpf_neon		frexpf_neon_hfp
-#define fmodf_neon		fmodf_neon_hfp
-#define modf_neon		modf_neon_hfp
-#define sqrtf_neon		sqrtf_neon_hfp
-#define invsqrtf_neon	invsqrtf_neon_hfp
-#else
-#define sinf_neon		sinf_neon_sfp
-#define cosf_neon		cosf_neon_sfp
-#define	sincosf_neon	sincosf_neon_sfp
-#define tanf_neon		tanf_neon_sfp
-#define atanf_neon		atanf_neon_sfp
-#define atan2f_neon		atan2f_neon_sfp
-#define asinf_neon		asinf_neon_sfp
-#define acosf_neon		acosf_neon_sfp
-#define sinhf_neon		sinhf_neon_sfp
-#define coshf_neon		coshf_neon_sfp
-#define tanhf_neon		tanhf_neon_sfp
-#define expf_neon		expf_neon_sfp
-#define logf_neon		logf_neon_sfp
-#define log10f_neon		log10f_neon_sfp
-#define powf_neon		powf_neon_sfp
-#define floorf_neon		floorf_neon_sfp
-#define ceilf_neon		ceilf_neon_sfp
-#define fabsf_neon		fabsf_neon_sfp
-#define ldexpf_neon		ldexpf_neon_sfp
-#define frexpf_neon		frexpf_neon_sfp
-#define fmodf_neon		fmodf_neon_sfp
-#define modf_neon		modf_neon_sfp
-#define sqrtf_neon		sqrtf_neon_sfp
-#define invsqrtf_neon	invsqrtf_neon_sfp
-
-#define dot2_neon		dot2_neon_sfp
-#define dot3_neon		dot3_neon_sfp
-#define dot4_neon		dot4_neon_sfp
-#endif
-
-/* 
-function:	enable_runfast
-			this function enables the floating point runfast mode on the 
-			ARM Cortex A8.  	
-*/
-void		enable_runfast();
-
-
-float dot2_c(float v0[2], float v1[2]);
-float dot2_neon(float v0[2], float v1[2]);
-float dot3_c(float v0[3], float v1[3]);
-float dot3_neon(float v0[3], float v1[3]);
-float dot4_c(float v0[4], float v1[4]);
-float dot4_neon(float v0[4], float v1[4]);
-
-void cross3_c(float v0[3], float v1[3], float d[3]);
-void cross3_neon(float v0[3], float v1[3], float d[3]);
-
-void normalize2_c(float v[2], float d[2]);
-void normalize2_neon(float v[2], float d[2]);
-void normalize3_c(float v[3], float d[3]);
-void normalize3_neon(float v[3], float d[3]);
-void normalize4_c(float v[4], float d[4]);
-void normalize4_neon(float v[4], float d[4]);
-
-/* 
-function:	matmul2
-arguments:  m0 2x2 matrix, m1 2x2 matrix
-return: 	d 2x2 matrix
-expression: d = m0 * m1
-*/
-void		matmul2_c(float m0[4], float m1[4], float d[4]);
-void		matmul2_neon(float m0[4], float m1[4], float d[4]);
-
-/* 
-function:	matmul3
-arguments:  m0 3x3 matrix, m1 3x3 matrix
-return: 	d 3x3 matrix
-expression: d = m0 * m1
-*/
-void		matmul3_c(float m0[9], float m1[9], float d[9]);
-void		matmul3_neon(float m0[9], float m1[9], float d[9]);
-
-/* 
-function:	matmul4
-arguments:  m0 4x4 matrix, m1 4x4 matrix
-return: 	d 4x4 matrix
-expression: d = m0 * m1
-*/
-void		matmul4_c(float m0[16], float m1[16], float d[16]);
-void		matmul4_neon(float m0[16], float m1[16], float d[16]);
-
-/* 
-function:	matvec2
-arguments:  m 2x2 matrix, v 2 element vector
-return: 	d 2x2 matrix
-expression: d = m * v
-*/
-void		matvec2_c(float m[4], float v[2], float d[2]);
-void		matvec2_neon(float m[4], float v[2], float d[2]);
-
-/* 
-function:	matvec3
-arguments:  m 3x3 matrix, v 3 element vector
-return: 	d 3x3 matrix
-expression: d = m * v
-*/
-void		matvec3_c(float m[9], float v[3], float d[3]);
-void		matvec3_neon(float m[9], float v[3], float d[3]);
-
-/* 
-function:	matvec4
-arguments:  m 4x4 matrix, v 4 element vector
-return: 	d 4x4 matrix
-expression: d = m * v
-*/
-void		matvec4_c(float m[16], float v[4], float d[4]);
-void		matvec4_neon(float m[16], float v[4], float d[4]);
-
-/* 
-function:	sinf
-arguments:  x radians
-return: 	the sine function evaluated at x radians.	
-expression: r = sin(x) 	
-*/
-float 		sinf_c(float x);
-float 		sinf_neon_hfp(float x);
-float 		sinf_neon_sfp(float x);
-
-/* 
-function:	cosf
-arguments:  x radians
-return: 	the cosine function evaluated at x radians.	
-expression: r = cos(x) 	
-notes:		computed using cos(x) = sin(x + pi/2)
-*/
-float 		cosf_c(float x);
-float 		cosf_neon_hfp(float x);
-float 		cosf_neon_sfp(float x);
-
-/* 
-function:	sincosf
-arguments:  x radians, r[2] result array.
-return: 	both the sine and the cosine evaluated at x radians.	
-expression: r = {sin(x), cos(x)} 	
-notes:		faster than evaluating seperately.
-*/
-void		sincosf_c(float x, float r[2]);
-void		sincosf_neon_hfp(float x, float r[2]);
-void		sincosf_neon_sfp(float x, float r[2]);
-
-/* 
-function:	sinfv
-return: 	the sine function evaluated at x[i] radians 	
-expression: r[i] = sin(x[i])	
-notes:		faster than evaluating individually.
-			r and x can be the same memory location.
-*/
-void		sinfv_c(float *x, int n, float *r);
-void  		sinfv_neon(float *x, int n, float *r);
-
-/* 
-function:	tanf
-return: 	the tangent evaluated at x radians.	
-expression: r = tan(x) 	
-notes:		computed using tan(x) = sin(x) / cos(x)
-*/
-float 		tanf_c(float x);
-float 		tanf_neon_hfp(float x);
-float 		tanf_neon_sfp(float x);
-
-/* 
-function:	atanf
-return: 	the arctangent evaluated at x.	
-expression: r = atan(x) 	
-*/
-float 		atanf_c(float x);
-float 		atanf_neon_hfp(float x);
-float 		atanf_neon_sfp(float x);
-
-/* 
-function:	atanf
-return: 	the arctangent evaluated at x.	
-expression: r = atan(x) 	
-*/
-float 		atan2f_c(float y, float x);
-float 		atan2f_neon_hfp(float y, float x);
-float 		atan2f_neon_sfp(float y, float x);
-
-/* 
-function:	asinf
-return: 	the arcsine evaluated at x.	
-expression: r = asin(x) 	
-*/
-float 		asinf_c(float x);
-float 		asinf_neon_hfp(float x);
-float 		asinf_neon_sfp(float x);
-
-/* 
-function:	acosf
-return: 	the arcsine evaluated at x.	
-expression: r = asin(x) 	
-*/
-float 		acosf_c(float x);
-float 		acosf_neon_hfp(float x);
-float 		acosf_neon_sfp(float x);
-
-/* 
-function:	sinhf
-return: 	the arcsine evaluated at x.	
-expression: r = asin(x) 	
-*/
-float 		sinhf_c(float x);
-float 		sinhf_neon_hfp(float x);
-float 		sinhf_neon_sfp(float x);
-
-/* 
-function:	coshf
-return: 	the arcsine evaluated at x.	
-expression: r = asin(x) 	
-*/
-float 		coshf_c(float x);
-float 		coshf_neon_hfp(float x);
-float 		coshf_neon_sfp(float x);
-
-/* 
-function:	tanhf
-return: 	the arcsine evaluated at x.	
-expression: r = asin(x) 	
-*/
-float 		tanhf_c(float x);
-float 		tanhf_neon_hfp(float x);
-float 		tanhf_neon_sfp(float x);
-
-/* 
-function:	expf
-return: 	the natural exponential evaluated at x.	
-expression: r = e ** x	
-*/
-float 		expf_c(float x);
-float 		expf_neon_hfp(float x);
-float 		expf_neon_sfp(float x);
-
-/* 
-function:	logf
-return: 	the value of the natural logarithm of x.	
-expression: r = ln(x)	
-notes:		assumes x > 0
-*/
-float 		logf_c(float x);
-float 		logf_neon_hfp(float x);
-float 		logf_neon_sfp(float x);
-
-/* 
-function:	log10f
-return: 	the value of the power 10 logarithm of x.	
-expression: r = log10(x)	
-notes:		assumes x > 0
-*/
-float 		log10f_c(float x);
-float 		log10f_neon_hfp(float x);
-float 		log10f_neon_sfp(float x);
-
-/* 
-function:	powf
-return: 	x raised to the power of n, x ** n.
-expression: r = x ** y	
-notes:		computed using e ** (y * ln(x))
-*/
-float 		powf_c(float x, float n);
-float 		powf_neon_sfp(float x, float n);
-float 		powf_neon_hfp(float x, float n);
-
-/* 
-function:	floorf
-return: 	x rounded down (towards negative infinity) to its nearest 
-			integer value.	
-notes:		assumes |x| < 2 ** 31
-*/
-float 		floorf_c(float x);
-float 		floorf_neon_sfp(float x);
-float 		floorf_neon_hfp(float x);
-
-/* 
-function:	ceilf
-return: 	x rounded up (towards positive infinity) to its nearest 
-			integer value.	
-notes:		assumes |x| < 2 ** 31
-*/
-float 		ceilf_c(float x);
-float 		ceilf_neon_hfp(float x);
-float 		ceilf_neon_sfp(float x);
-
-/* 
-function:	fabsf
-return: 	absolute vvalue of x	
-notes:		assumes |x| < 2 ** 31
-*/
-float 		fabsf_c(float x);
-float 		fabsf_neon_hfp(float x);
-float 		fabsf_neon_sfp(float x);
-
-/* 
-function:	ldexpf
-return: 	the value of m multiplied by 2 to the power of e. 
-expression: r = m * (2 ** e)
-*/
-float 		ldexpf_c(float m, int e);
-float 		ldexpf_neon_hfp(float m, int e);
-float 		ldexpf_neon_sfp(float m, int e);
-
-/* 
-function:	frexpf
-return: 	the exponent and mantissa of x 
-*/
-float 		frexpf_c(float x, int *e);
-float 		frexpf_neon_hfp(float x, int *e);
-float 		frexpf_neon_sfp(float x, int *e);
-
-/* 
-function:	fmodf
-return: 	the remainder of x divided by y, x % y	
-expression: r = x - floor(x / y) * y;
-notes:		assumes that |x / y| < 2 ** 31 
-*/
-float 		fmodf_c(float x, float y);
-float 		fmodf_neon_hfp(float x, float y);
-float 		fmodf_neon_sfp(float x, float y);
-
-/* 
-function:	modf
-return: 	breaks x into the integer (i) and fractional part (return)
-notes:		assumes that |x| < 2 ** 31 
-*/
-float 		modf_c(float x, int *i);
-float 		modf_neon_hfp(float x, int *i);
-float 		modf_neon_sfp(float x, int *i);
-
-/* 
-function:	sqrtf
-return: 	(x^0.5)
-notes:		 
-*/
-float 		sqrtf_c(float x);
-float 		sqrtf_neon_hfp(float x);
-float 		sqrtf_neon_sfp(float x);
-
-
-/* 
-function:	invsqrtf
-return: 	1.0f / (x^0.5)
-notes:		 
-*/
-float 		invsqrtf_c(float x);
-float 		invsqrtf_neon_hfp(float x);
-float 		invsqrtf_neon_sfp(float x);
-
-#endif
diff --git a/deps/math-neon/source/math_powf.c b/deps/math-neon/source/math_powf.c
deleted file mode 100644
index 6faed4eeac..0000000000
--- a/deps/math-neon/source/math_powf.c
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/*
-Based on x ^ n = exp(n * log(x))
-
-Test func : powf(x, n)
-Test Range: (1,1) < (x, n) < (10, 10)
-Peak Error:	~0.0010%
-RMS  Error: ~0.0002%
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-const float __powf_rng[2] = {
-	1.442695041f,
-	0.693147180f
-};
-
-const float __powf_lut[16] = {
-	-2.295614848256274, 	//p0	log
-	-2.470711633419806, 	//p4
-	-5.686926051100417, 	//p2
-	-0.165253547131978, 	//p6
-	+5.175912446351073, 	//p1
-	+0.844006986174912, 	//p5
-	+4.584458825456749, 	//p3
-	+0.014127821926000,		//p7
-	0.9999999916728642,		//p0	exp
-	0.04165989275009526, 	//p4
-	0.5000006143673624, 	//p2
-	0.0014122663401803872, 	//p6
-	1.000000059694879, 		//p1
-	0.008336936973260111, 	//p5
-	0.16666570253074878, 	//p3
-	0.00019578093328483123	//p7
-};
-
-float powf_c(float x, float n)
-{
-	float a, b, c, d, xx;
-	int m;
-	
-	union {
-		float   f;
-		int 	i;
-	} r;
-	
-	//extract exponent
-	r.f = x;
-	m = (r.i >> 23);
-	m = m - 127;
-	r.i = r.i - (m << 23);
-	
-	//Taylor Polynomial (Estrins)
-	xx = r.f * r.f;
-	a = (__powf_lut[4] * r.f) + (__powf_lut[0]);
-	b = (__powf_lut[6] * r.f) + (__powf_lut[2]);
-	c = (__powf_lut[5] * r.f) + (__powf_lut[1]);
-	d = (__powf_lut[7] * r.f) + (__powf_lut[3]);
-	a = a + b * xx;
-	c = c + d * xx;
-	xx = xx * xx;
-	r.f = a + c * xx;
-
-	//add exponent
-	r.f = r.f + ((float) m) * __powf_rng[1];
-
-	r.f = r.f * n;
-
-
-	//Range Reduction:
-	m = (int) (r.f * __powf_rng[0]);
-	r.f = r.f - ((float) m) * __powf_rng[1];	
-	
-	//Taylor Polynomial (Estrins)
-	a = (__powf_lut[12] * r.f) + (__powf_lut[8]);
-	b = (__powf_lut[14] * r.f) + (__powf_lut[10]);
-	c = (__powf_lut[13] * r.f) + (__powf_lut[9]);
-	d = (__powf_lut[15] * r.f) + (__powf_lut[11]);
-	xx = r.f * r.f;
-	a = a + b * xx; 
-	c = c + d * xx;
-	xx = xx* xx;
-	r.f = a + c * xx; 
-	
-	//multiply by 2 ^ m 
-	m = m << 23;
-	r.i = r.i + m;
-
-	return r.f;
-}
-
-float powf_neon_hfp(float x, float n)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-		
-	"vdup.f32		d16, d0[1]				\n\t"	//d16 = {y,y};	
-	"vdup.f32		d0, d0[0]				\n\t"	//d0 = {x,x};
-	
-	//extract exponent
-	"vmov.i32		d2, #127				\n\t"	//d2 = 127;
-	"vshr.u32		d6, d0, #23				\n\t"	//d6 = d0 >> 23;
-	"vsub.i32		d6, d6, d2				\n\t"	//d6 = d6 - d2;
-	"vshl.u32		d1, d6, #23				\n\t"	//d1 = d6 << 23;
-	"vsub.i32		d0, d0, d1				\n\t"	//d0 = d0 + d1;
-
-	//polynomial:
-	"vmul.f32 		d1, d0, d0				\n\t"	//d1 = d0*d0 = {x^2, x^2}	
-	"vld1.32 		{d2, d3, d4, d5}, [%1]!	\n\t"	//q1 = {p0, p4, p2, p6}, q2 = {p1, p5, p3, p7} ;
-	"vmla.f32 		q1, q2, d0[0]			\n\t"	//q1 = q1 + q2 * d0[0]		
-	"vmla.f32 		d2, d3, d1[0]			\n\t"	//d2 = d2 + d3 * d1[0]		
-	"vmul.f32 		d1, d1, d1				\n\t"	//d1 = d1 * d1 = {x^4, x^4}	
-	"vmla.f32 		d2, d1, d2[1]			\n\t"	//d2 = d2 + d1 * d2[1]		
-
-	//add exponent 	
-	"vld1.32 		d7, [%0]				\n\t"	//d7 = {invrange, range}
-	"vcvt.f32.s32 	d6, d6					\n\t"	//d6 = (float) d6
-	"vmla.f32 		d2, d6, d7[1]			\n\t"	//d2 = d2 + d6 * d7[1]		
-
-	"vdup.f32 		d0, d2[0]				\n\t"	//d0 = d2[0]		
-	"vmul.f32 		d0, d0, d16				\n\t"	//d0 = d0 * d16	
-
-	//Range Reduction:
-	"vmul.f32 		d6, d0, d7[0]			\n\t"	//d6 = d0 * d7[0] 
-	"vcvt.u32.f32 	d6, d6					\n\t"	//d6 = (int) d6
-	"vcvt.f32.u32 	d1, d6					\n\t"	//d1 = (float) d6
-	"vmls.f32 		d0, d1, d7[1]			\n\t"	//d0 = d0 - d1 * d7[1]
-		
-	//polynomial:
-	"vmul.f32 		d1, d0, d0				\n\t"	//d1 = d0*d0 = {x^2, x^2}	
-	"vld1.32 		{d2, d3, d4, d5}, [%1]	\n\t"	//q1 = {p0, p4, p2, p6}, q2 = {p1, p5, p3, p7} ;
-	"vmla.f32 		q1, q2, d0[0]			\n\t"	//q1 = q1 + q2 * d0[0]		
-	"vmla.f32 		d2, d3, d1[0]			\n\t"	//d2 = d2 + d3 * d1[0]		
-	"vmul.f32 		d1, d1, d1				\n\t"	//d1 = d1 * d1 = {x^4, x^4}	
-	"vmla.f32 		d2, d1, d2[1]			\n\t"	//d2 = d2 + d1 * d2[1]		
-
-	//multiply by 2 ^ m 	
-	"vshl.i32 		d6, d6, #23				\n\t"	//d6 = d6 << 23		
-	"vadd.i32 		d0, d2, d6				\n\t"	//d0 = d2 + d6		
-
-
-	:: "r"(__powf_rng), "r"(__powf_lut) 
-    : "d0", "d1", "d2","d3", "d4", "d5", "d6", "d7"
-	);
-#endif
-}
-
-float powf_neon_sfp(float x, float n)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vmov.f32 s0, r0 		\n\t");
-	asm volatile ("vmov.f32 s1, r1 		\n\t");
-	powf_neon_hfp(x, n);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return powf_c(x, n);
-#endif
-};
diff --git a/deps/math-neon/source/math_runfast.c b/deps/math-neon/source/math_runfast.c
deleted file mode 100644
index 0d06c0bfc8..0000000000
--- a/deps/math-neon/source/math_runfast.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-
-void 
-enable_runfast()
-{
-#ifdef __arm__
-	static const unsigned int x = 0x04086060;
-	static const unsigned int y = 0x03000000;
-	int r;
-	asm volatile (
-		"fmrx	%0, fpscr			\n\t"	//r0 = FPSCR
-		"and	%0, %0, %1			\n\t"	//r0 = r0 & 0x04086060
-		"orr	%0, %0, %2			\n\t"	//r0 = r0 | 0x03000000
-		"fmxr	fpscr, %0			\n\t"	//FPSCR = r0
-		: "=r"(r)
-		: "r"(x), "r"(y)
-	);
-#endif
-}
diff --git a/deps/math-neon/source/math_sincosf.c b/deps/math-neon/source/math_sincosf.c
deleted file mode 100644
index 365826f8ff..0000000000
--- a/deps/math-neon/source/math_sincosf.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-const float __sincosf_rng[2] = {
-	2.0 / M_PI,
-	M_PI / 2.0
-};
-
-const float __sincosf_lut[8] = {
-	-0.00018365f,	//p7
-	-0.00018365f,	//p7
-	+0.00830636f,	//p5
-	+0.00830636f,	//p5
-	-0.16664831f,	//p3
-	-0.16664831f,	//p3
-	+0.99999661f,	//p1
-	+0.99999661f,	//p1
-};
-
-void sincosf_c( float x, float r[2])
-{
-	union {
-		float 	f;
-		int 	i;
-	} ax, bx;
-	
-	float y;
-	float a, b, c, d, xx, yy;
-	int m, n, o, p;
-	
-	y = x + __sincosf_rng[1];
-	ax.f = fabsf(x);
-	bx.f = fabsf(y);
-	
-	//Range Reduction:
-	m = (int) (ax.f * __sincosf_rng[0]);	
-	o = (int) (bx.f * __sincosf_rng[0]);	
-	ax.f = ax.f - (((float)m) * __sincosf_rng[1]);
-	bx.f = bx.f - (((float)o) * __sincosf_rng[1]);
-	
-	//Test Quadrant
-	n = m & 1;
-	p = o & 1;
-	ax.f = ax.f - n * __sincosf_rng[1];	
-	bx.f = bx.f - p * __sincosf_rng[1];	
-	m = m >> 1;
-	o = o >> 1;
-	n = n ^ m;
-	p = p ^ o;
-	m = (x < 0.0);
-	o = (y < 0.0);
-	n = n ^ m;	
-	p = p ^ o;	
-	n = n << 31;
-	p = p << 31;
-	ax.i = ax.i ^ n; 
-	bx.i = bx.i ^ p; 
-
-	//Taylor Polynomial
-	xx = ax.f * ax.f;	
-	yy = bx.f * bx.f;
-	r[0] = __sincosf_lut[0];
-	r[1] = __sincosf_lut[1];
-	r[0] = r[0] * xx + __sincosf_lut[2];
-	r[1] = r[1] * yy + __sincosf_lut[3];
-	r[0] = r[0] * xx + __sincosf_lut[4];
-	r[1] = r[1] * yy + __sincosf_lut[5];
-	r[0] = r[0] * xx + __sincosf_lut[6];
-	r[1] = r[1] * yy + __sincosf_lut[7];
-	r[0] = r[0] * ax.f;
-	r[1] = r[1] * bx.f;
-
-}
-
-void sincosf_neon_hfp(float x, float r[2])
-{
-//HACK: Assumes for softfp that r1 = x, and for hardfp that s0 = x.
-#ifdef __MATH_NEON
-	asm volatile (
-	//{x, y} = {x, x + pi/2}
-	"vdup.f32 		d1, d0[0]				\n\t"	//d1 = {x, x}
-	"vld1.32 		d3, [%1]				\n\t"	//d3 = {invrange, range}
-	"vadd.f32 		d0, d1, d3				\n\t"	//d0 = d1 + d3
-	"vmov.f32 		s0, s2					\n\t"	//d0[0] = d1[0]	
-	"vabs.f32 		d1, d0					\n\t"	//d1 = {abs(x), abs(y)}
-	
-	//Range Reduction:
-	"vmul.f32 		d2, d1, d3[0]			\n\t"	//d2 = d1 * d3[0] 
-	"vcvt.u32.f32 	d2, d2					\n\t"	//d2 = (int) d2
-	"vcvt.f32.u32 	d4, d2					\n\t"	//d4 = (float) d2
-	"vmls.f32 		d1, d4, d3[1]			\n\t"	//d1 = d1 - d4 * d3[1]
-	
-	//Checking Quadrant:
-	//ax = ax - (k&1) * M_PI_2
-	"vmov.i32	 	d4, #1					\n\t"	//d4 = 1
-	"vand.i32	 	d4, d4, d2				\n\t"	//d4 = d4 & d2
-	"vcvt.f32.u32 	d5, d4					\n\t"	//d5 = (float) d4
-	"vmls.f32 		d1, d5, d3[1]			\n\t"	//d1 = d1 - d5 * d3[1]
-
-	//ax = ax ^ ((k & 1) ^ (k >> 1) ^ (x < 0) << 31)
-	"vshr.u32 		d3, d2, #1				\n\t"	//d3 = d2 >> 1
-	"veor.i32 		d4, d4, d3				\n\t"	//d4 = d4 ^ d3	
-	"vclt.f32 		d3, d0, #0				\n\t"	//d3 = (d0 < 0.0)
-	"veor.i32 		d4, d4, d3				\n\t"	//d4 = d4 ^ d3	
-	"vshl.i32 		d4, d4, #31				\n\t"	//d4 = d4 << 31
-	"veor.i32 		d0, d1, d4				\n\t"	//d0 = d1 ^ d4
-	
-	//polynomial:
-	"vldm 			%2!, {d2, d3}	 		\n\t"	//d2 = {p7, p7}, d3 = {p5, p5}, r3 += 4;
-	"vmul.f32 		d1, d0, d0				\n\t"	//d1 = d0 * d0 = {x^2, y^2}
-	"vldm 			%2!, {d4}				\n\t"	//d4 = {p3, p3}, r3 += 2;
-	"vmla.f32 		d3, d2, d1				\n\t"	//d3 = d3 + d2 * d1;	
-	"vldm	 		%2!, {d5}				\n\t"	//d5 = {p1, p1}, r3 += 2;
-	"vmla.f32 		d4, d3, d1				\n\t"	//d4 = d4 + d3 * d1;	
-	"vmla.f32 		d5, d4, d1				\n\t"	//d5 = d5 + d4 * d1;	
-	"vmul.f32 		d5, d5, d0				\n\t"	//d5 = d5 * d0;	
-	
-	"vstm.f32 		%0, {d5}				\n\t"	//r[0] = d5[0], r[1]=d5[1];	
-	
-	: "+r"(r)
-	: "r"(__sincosf_rng), "r"(__sincosf_lut) 
-    : "d0", "d1", "d2", "d3", "d4", "d5"
-	);
-#else
-	sincosf_c(x, r);
-#endif
-}
-
-void sincosf_neon_sfp(float x, float r[2])
-{
-#ifdef __MATH_NEON
-	asm volatile ("vdup.f32 d0, r0 		\n\t");
-	sincosf_neon_hfp(x, r);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else 
-    sincosf_c(x, r);
-#endif
-};
-
diff --git a/deps/math-neon/source/math_sinf.c b/deps/math-neon/source/math_sinf.c
deleted file mode 100644
index 257f219672..0000000000
--- a/deps/math-neon/source/math_sinf.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <math.h>
-#include "math_neon.h"
-
-static const float __sinf_rng[2] = {
-	2.0 / M_PI,
-	M_PI / 2.0
-} ALIGN(16);
-
-static const float __sinf_lut[4] = {
-	-0.00018365f,	//p7
-	-0.16664831f,	//p3
-	+0.00830636f,	//p5
-	+0.99999661f,	//p1
-} ALIGN(16);
-
-float sinf_c(float x)
-{
-	union {
-		float 	f;
-		int 	i;
-	} ax;
-	
-	float r, a, b, xx;
-	int m, n;
-	
-	ax.f = fabsf(x);
-
-	//Range Reduction:
-	m = (int) (ax.f * __sinf_rng[0]);	
-	ax.f = ax.f - (((float)m) * __sinf_rng[1]);
-
-	//Test Quadrant
-	n = m & 1;
-	ax.f = ax.f - n * __sinf_rng[1];	
-	m = m >> 1;
-	n = n ^ m;
-	m = (x < 0.0);
-	n = n ^ m;	
-	n = n << 31;
-	ax.i = ax.i ^ n; 
-
-	//Taylor Polynomial (Estrins)
-	xx = ax.f * ax.f;	
-	a = (__sinf_lut[0] * ax.f) * xx + (__sinf_lut[2] * ax.f);
-	b = (__sinf_lut[1] * ax.f) * xx + (__sinf_lut[3] * ax.f);
-	xx = xx * xx;
-	r = b + a * xx;
-
-	return r;
-}
-
-float sinf_neon_hfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	
-	"vld1.32 		d3, [%0]				\n\t"	//d3 = {invrange, range}
-	"vdup.f32 		d0, d0[0]				\n\t"	//d0 = {x, x}
-	"vabs.f32 		d1, d0					\n\t"	//d1 = {ax, ax}
-	
-	"vmul.f32 		d2, d1, d3[0]			\n\t"	//d2 = d1 * d3[0] 
-	"vcvt.u32.f32 	d2, d2					\n\t"	//d2 = (int) d2
-	"vmov.i32	 	d5, #1					\n\t"	//d5 = 1	
-	"vcvt.f32.u32 	d4, d2					\n\t"	//d4 = (float) d2	
-	"vshr.u32 		d7, d2, #1				\n\t"	//d7 = d2 >> 1
-	"vmls.f32 		d1, d4, d3[1]			\n\t"	//d1 = d1 - d4 * d3[1]
-	
-	"vand.i32 		d5, d2, d5				\n\t"	//d5 = d2 & d5
-	"vclt.f32 		d18, d0, #0				\n\t"	//d18 = (d0 < 0.0)
-	"vcvt.f32.u32 	d6, d5					\n\t"	//d6 = (float) d5
-	"vmls.f32 		d1, d6, d3[1]			\n\t"	//d1 = d1 - d6 * d3[1]
-	"veor.i32 		d5, d5, d7				\n\t"	//d5 = d5 ^ d7	
-	"vmul.f32 		d2, d1, d1				\n\t"	//d2 = d1*d1 = {x^2, x^2}	
-	
-	"vld1.32 		{d16, d17}, [%1]		\n\t"	//q8 = {p7, p3, p5, p1}
-	"veor.i32 		d5, d5, d18				\n\t"	//d5 = d5 ^ d18	
-	"vshl.i32 		d5, d5, #31				\n\t"	//d5 = d5 << 31
-	"veor.i32 		d1, d1, d5				\n\t"	//d1 = d1 ^ d5
-	
-	"vmul.f32 		d3, d2, d2				\n\t"	//d3 = d2*d2 = {x^4, x^4}		
-	"vmul.f32 		q0, q8, d1[0]			\n\t"	//q0 = q8 * d1[0] = {p7x, p3x, p5x, p1x}
-	"vmla.f32 		d1, d0, d2[0]			\n\t"	//d1 = d1 + d0*d2 = {p5x + p7x^3, p1x + p3x^3}		
-	"vmla.f32 		d1, d3, d1[0]			\n\t"	//d1 = d1 + d3*d0 = {...., p1x + p3x^3 + p5x^5 + p7x^7}		
-
-	"vmov.f32 		s0, s3					\n\t"	//s0 = s3
-	: 
-	: "r"(__sinf_rng), "r"(__sinf_lut) 
-    : "q0", "q1", "q2", "q3", "q8", "q9"
-	);
-#endif
-}
-
-float sinf_neon_sfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vdup.f32 d0, r0 		\n\t");
-	sinf_neon_hfp(x);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return sinf_c(x);
-#endif
-
-};
-
diff --git a/deps/math-neon/source/math_sinfv.c b/deps/math-neon/source/math_sinfv.c
deleted file mode 100644
index 0dfc878170..0000000000
--- a/deps/math-neon/source/math_sinfv.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-const float __sinfv_rng[2] = {
-	2.0 / M_PI,
-	M_PI / 2.0, 
-};
-
-const float __sinfv_lut[4] = {
-	-0.00018365f,	//p7
-	-0.16664831f,	//p3
-	+0.00830636f,	//p5
-	+0.99999661f,	//p1
-};
-
-void sinfv_c(float *x, int n, float *r)
-{
-	union {
-		float 	f;
-		int 	i;
-	} ax, bx;
-	
-	float aa, ab, ba, bb, axx, bxx;
-	int am, bm, an, bn;
-
-	if (n & 0x1) {
-		*r++ = sinf_c(*x++);
-		n--;
-	}
-
-	float rng0 = __sinfv_rng[0];
-	float rng1 = __sinfv_rng[1];
-
-	while(n > 0){
-		
-		float x0 = *x++;
-		float x1 = *x++;
-		
-		ax.f = fabsf(x0);
-		bx.f = fabsf(x1);
-
-		//Range Reduction:
-		am = (int) (ax.f * rng0);	
-		bm = (int) (bx.f * rng0);	
-		
-		ax.f = ax.f - (((float)am) * rng1);
-		bx.f = bx.f - (((float)bm) * rng1);
-
-		//Test Quadrant
-		an = am & 1;
-		bn = bm & 1;
-		ax.f = ax.f - an * rng1;
-		bx.f = bx.f - bn * rng1;
-		am = (am & 2) >> 1;
-		bm = (bm & 2) >> 1;
-		ax.i = ax.i ^ ((an ^ am ^ (x0 < 0)) << 31);
-		bx.i = bx.i ^ ((bn ^ bm ^ (x1 < 0)) << 31);
-			
-		//Taylor Polynomial (Estrins)
-		axx = ax.f * ax.f;	
-		bxx = bx.f * bx.f;	
-		aa = (__sinfv_lut[0] * ax.f) * axx + (__sinfv_lut[2] * ax.f);
-		ba = (__sinfv_lut[0] * bx.f) * bxx + (__sinfv_lut[2] * bx.f);
-		ab = (__sinfv_lut[1] * ax.f) * axx + (__sinfv_lut[3] * ax.f);
-		bb = (__sinfv_lut[1] * bx.f) * bxx + (__sinfv_lut[3] * bx.f);
-		axx = axx * axx;
-		bxx = bxx * bxx;
-		*r++ = ab + aa * axx;
-		*r++ = bb + ba * bxx;
-		n -= 2;
-	}
-	
-	
-}
-
-void sinfv_neon(float *x, int n, float *r)
-{
-#ifdef __MATH_NEON
-	asm volatile (""
-	:
-	:"r"(x), "r"(n)
-	);
-#else
-	sinfv_c(x, n, r);
-#endif
-}
diff --git a/deps/math-neon/source/math_sinhf.c b/deps/math-neon/source/math_sinhf.c
deleted file mode 100644
index 820a490dae..0000000000
--- a/deps/math-neon/source/math_sinhf.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-const float __sinhf_rng[2] = {
-	1.442695041f,
-	0.693147180f
-};
-
-const float __sinhf_lut[16] = {
-	0.00019578093328483123,	//p7
-	0.00019578093328483123,	//p7
-	0.0014122663401803872, 	//p6
-	0.0014122663401803872, 	//p6
-	0.008336936973260111, 	//p5
-	0.008336936973260111, 	//p5
-	0.04165989275009526, 	//p4
-	0.04165989275009526, 	//p4
-	0.16666570253074878, 	//p3
-	0.16666570253074878, 	//p3
-	0.5000006143673624, 	//p2
-	0.5000006143673624, 	//p2
-	1.000000059694879, 		//p1
-	1.000000059694879, 		//p1
-	0.9999999916728642,		//p0
-	0.9999999916728642		//p0
-};
-
-
-float sinhf_c(float x)
-{
-	float a, b, xx;
-	xx = -x;
-	a = expf_c(x);
-	b = expf_c(xx);
-	a = a - b;
-	a = a * 0.5f;
-	return a;
-}
-
-
-float sinhf_neon_hfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"vdup.f32 		d0, d0[0]				\n\t"	//d0 = {x, x}	
-	"fnegs 			s1, s1					\n\t"	//s1 = -s1
-	
-	//Range Reduction:
-	"vld1.32 		d2, [%0]				\n\t"	//d2 = {invrange, range}
-	"vld1.32 		{d16, d17}, [%1]!		\n\t"	
-	"vmul.f32 		d6, d0, d2[0]			\n\t"	//d6 = d0 * d2[0] 
-	"vcvt.s32.f32 	d6, d6					\n\t"	//d6 = (int) d6
-	"vld1.32 		{d18}, [%1]!			\n\t"	
-	"vcvt.f32.s32 	d1, d6					\n\t"	//d1 = (float) d6
-	"vld1.32 		{d19}, [%1]!			\n\t"	
-	"vmls.f32 		d0, d1, d2[1]			\n\t"	//d0 = d0 - d1 * d2[1]
-	"vld1.32 		{d20}, [%1]!			\n\t"	
-		
-	//polynomial:
-	"vmla.f32 		d17, d16, d0			\n\t"	//d17 = d17 + d16 * d0;	
-	"vld1.32 		{d21}, [%1]!			\n\t"	
-	"vmla.f32 		d18, d17, d0			\n\t"	//d18 = d18 + d17 * d0;	
-	"vld1.32 		{d22}, [%1]!			\n\t"	
-	"vmla.f32 		d19, d18, d0			\n\t"	//d19 = d19 + d18 * d0;	
-	"vld1.32 		{d23}, [%1]!			\n\t"	
-	"vmla.f32 		d20, d19, d0			\n\t"	//d20 = d20 + d19 * d0;	
-	"vmla.f32 		d21, d20, d0			\n\t"	//d21 = d21 + d20 * d0;	
-	"vmla.f32 		d22, d21, d0			\n\t"	//d22 = d22 + d21 * d0;	
-	"vmla.f32 		d23, d22, d0			\n\t"	//d23 = d23 + d22 * d0;	
-	
-	//multiply by 2 ^ m 	
-	"vshl.i32 		d6, d6, #23				\n\t"	//d6 = d6 << 23		
-	"vadd.i32 		d0, d23, d6				\n\t"	//d0 = d22 + d6		
-
-	"vdup.f32 		d2, d0[1]				\n\t"	//d2 = s1		
-	"vmov.f32 		d1, #0.5				\n\t"	//d1 = 0.5		
-	"vsub.f32 		d0, d0, d2				\n\t"	//d0 = d0 - d2		
-	"vmul.f32 		d0, d1					\n\t"	//d0 = d0 * d1		
-
-	:: "r"(__sinhf_rng), "r"(__sinhf_lut) 
-    : "d0", "d1", "q1", "q2", "d6"
-	);
-	
-#endif
-}
-
-float sinhf_neon_sfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vmov.f32 s0, r0 		\n\t");
-	sinhf_neon_hfp(x);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return sinhf_c(x);
-#endif
-};
diff --git a/deps/math-neon/source/math_sqrtf.c b/deps/math-neon/source/math_sqrtf.c
deleted file mode 100644
index ee3f86bdbf..0000000000
--- a/deps/math-neon/source/math_sqrtf.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/*
-Test func : sqrtf(x)
-Test Range: 0 < x < 1,000,000,000
-Peak Error:	~0.0010%
-RMS  Error: ~0.0005%
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-float sqrtf_c(float x)
-{
-
-	float b, c;
-	int m;
-	union {
-		float 	f;
-		int 	i;
-	} a;
-	
-	//fast invsqrt approx
-	a.f = x;
-	a.i = 0x5F3759DF - (a.i >> 1);		//VRSQRTE
-	c = x * a.f;
-	b = (3.0f - c * a.f) * 0.5;		//VRSQRTS
-	a.f = a.f * b;		
-	c = x * a.f;
-	b = (3.0f - c * a.f) * 0.5;
-    a.f = a.f * b;	
-
-	//fast inverse approx
-	x = a.f;
-	m = 0x3F800000 - (a.i & 0x7F800000);
-	a.i = a.i + m;
-	a.f = 1.41176471f - 0.47058824f * a.f;
-	a.i = a.i + m;
-	b = 2.0 - a.f * x;
-	a.f = a.f * b;	
-	b = 2.0 - a.f * x;
-	a.f = a.f * b;
-
-	return a.f;
-}
-
-float sqrtf_neon_hfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-		
-	//fast invsqrt approx
-	"vmov.f32 		d1, d0					\n\t"	//d1 = d0
-	"vrsqrte.f32 	d0, d0					\n\t"	//d0 = ~ 1.0 / sqrt(d0)
-	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1
-	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d3 = (3 - d0 * d2) / 2 	
-	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d3
-	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1	
-	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d4 = (3 - d0 * d3) / 2	
-	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d3	
-		
-	//fast reciporical approximation
-	"vrecpe.f32		d1, d0					\n\t"	//d1 = ~ 1 / d0; 
-	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
-	"vmul.f32		d1, d1, d2				\n\t"	//d1 = d1 * d2; 
-	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
-	"vmul.f32		d0, d1, d2				\n\t"	//d0 = d1 * d2; 
-
-	::: "d0", "d1", "d2", "d3"
-	);
-#endif
-}
-
-float sqrtf_neon_sfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vmov.f32 s0, r0 		\n\t");
-	sqrtf_neon_hfp(x);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return sqrtf_c(x);
-#endif
-};
diff --git a/deps/math-neon/source/math_sqrtfv.c b/deps/math-neon/source/math_sqrtfv.c
deleted file mode 100644
index c647403a28..0000000000
--- a/deps/math-neon/source/math_sqrtfv.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-/*
-Test func : sqrtf(x)
-Test Range: 0 < x < 1,000,000,000
-Peak Error:	~0.0010%
-RMS  Error: ~0.0005%
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-void sqrtfv_c(float *x, int n, float *r)
-{
-
-	float x0, x1;
-	float b0, b1, c0, c1;
-	int m0, m1;
-	union {
-		float 	f;
-		int 	i;
-	} a0, a1;
-
-
-	if (n & 0x1){
-		*r++ = sqrtf_c(*x++);
-		n--;
-	}
-
-	while(n > 0){
-	
-		x0 = *x++;
-		x1 = *x++;
-	
-		//fast invsqrt approx
-		a0.f = x0;
-		a1.f = x1;
-		a0.i = 0x5F3759DF - (a0.i >> 1);		//VRSQRTE
-		a1.i = 0x5F3759DF - (a1.i >> 1);		//VRSQRTE
-		c0 = x0 * a0.f;
-		c1 = x1 * a1.f;
-		b0 = (3.0f - c0 * a0.f) * 0.5;		//VRSQRTS
-		b1 = (3.0f - c1 * a1.f) * 0.5;		//VRSQRTS
-		a0.f = a0.f * b0;		
-		a1.f = a1.f * b1;		
-		c0 = x0 * a0.f;
-		c1 = x1 * a1.f;
-		b0 = (3.0f - c0 * a0.f) * 0.5;		//VRSQRTS
-		b1 = (3.0f - c1 * a1.f) * 0.5;		//VRSQRTS
-		a0.f = a0.f * b0;		
-		a1.f = a1.f * b1;		
-
-		//fast inverse approx
-		c0 = a0.f;
-		c0 = a1.f;
-		m0 = 0x3F800000 - (a0.i & 0x7F800000);
-		m1 = 0x3F800000 - (a1.i & 0x7F800000);
-		a0.i = a0.i + m0;
-		a1.i = a1.i + m1;
-		a0.f = 1.41176471f - 0.47058824f * a0.f;
-		a1.f = 1.41176471f - 0.47058824f * a1.f;
-		a0.i = a0.i + m0;
-		a1.i = a1.i + m1;
-		b0 = 2.0 - a0.f * c0;
-		b1 = 2.0 - a1.f * c1;
-		a0.f = a0.f * b0;	
-		a1.f = a1.f * b1;	
-		b0 = 2.0 - a0.f * c0;
-		b1 = 2.0 - a1.f * c1;
-		a0.f = a0.f * b0;
-		a1.f = a1.f * b1;
-		
-		*r++ = a0.f;
-		*r++ = a1.f;
-		n -= 2;
-
-	}
-}
-
-void sqrtfv_neon(float *x, int n, float *r)
-{
-#if 0
-	asm volatile (
-
-	"tst 			r1, #1 					\n\t"	//r1 & 1
-	"beq 			1f 						\n\t"	//
-
-	"vld1.32		d0[0], [r0]! 			\n\t"	//s0 = *x++
-	"mov 			ip, lr 					\n\t"	//ip = lr
-	//"bl 			sqrtf_neon_hfp 			\n\t"	//sqrtf_neon
-	"mov 			lr, ip 					\n\t"	//lr = ip
-	"vst1.32		d0[0], [r2]! 			\n\t"	//*r++ = r0
-	"subs 			r1, r1, #1				\n\t"	//r1 = r1 - 1;		
-	"bxeq 			lr						\n\t"	//
-
-	"1:				 						\n\t"	//
-
-	"vld1.32 		d0, [r0]! 				\n\t"	//d0 = (*x[0], *x[1]), x+=2;
-	
-	//fast invsqrt approx
-	"vmov.f32 		d1, d0					\n\t"	//d1 = d0
-	"vrsqrte.f32 	d0, d0					\n\t"	//d0 = ~ 1.0 / sqrt(d0)
-	"vmul.f32 		d2, d0, d1				\n\t"	//d3 = d0 * d2
-	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d4 = (3 - d0 * d3) / 2 	
-	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d4	
-	"vmul.f32 		d2, d0, d1				\n\t"	//d3 = d0 * d2	
-	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d4 = (3 - d0 * d3) / 2	
-	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d4	
-		
-	//fast reciporical approximation
-	"vrecpe.f32		d1, d0					\n\t"	//d1 = ~ 1 / d0; 
-	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
-	"vmul.f32		d1, d1, d2				\n\t"	//d1 = d1 * d2; 
-	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
-	"vmul.f32		d0, d1, d2				\n\t"	//d0 = d1 * d2; 
-
-	"vst1.64 		d0, [r2]!				\n\t"	//*r++ = d0;
-	"subs 			r1, r1, #2				\n\t"	//n = n - 2; update flags
-	"bgt 			1b 						\n\t"	//
-
-	::: "d0", "d1", "d2", "d3"
-);
-#else
-	sqrtfv_c(x, n, r);
-#endif
-}
diff --git a/deps/math-neon/source/math_tanf.c b/deps/math-neon/source/math_tanf.c
deleted file mode 100644
index e87c1ffd1c..0000000000
--- a/deps/math-neon/source/math_tanf.c
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include "math.h"
-#include "math_neon.h"
-
-const float __tanf_rng[2] = {
-	2.0 / M_PI,
-	M_PI / 2.0
-};
-
-const float __tanf_lut[4] = {
-	-0.00018365f,	//p7
-	-0.16664831f,	//p3
-	+0.00830636f,	//p5
-	+0.99999661f,	//p1
-};
- 
-float tanf_c(float x){
-
-	union {
-		float f;
-		int i;
-	} ax, c;
-
-	float r, a, b, xx, cc, cx;
-	int m;
-	
-	ax.f = fabsf(x);
-
-	//Range Reduction:
-	m = (int) (ax.f * __tanf_rng[0]);	
-	ax.f = ax.f - (((float)m) * __tanf_rng[1]);
-
-	//Test Quadrant
-	ax.f = ax.f - (m & 1) * __tanf_rng[1];
-	ax.i = ax.i ^ ((*(int*)&x) & 0x80000000);
-		
-	//Taylor Polynomial (Estrins)
-	xx = ax.f * ax.f;	
-	a = (__tanf_lut[0] * ax.f) * xx + (__tanf_lut[2] * ax.f);
-	b = (__tanf_lut[1] * ax.f) * xx + (__tanf_lut[3] * ax.f);
-	xx = xx * xx;
-	r = b + a * xx;
-
-	//cosine
-	c.f = 1.0 - r * r;
-	
-	//fast invsqrt approximation (2x newton iterations)
-    cc = c.f;
-	c.i = 0x5F3759DF - (c.i >> 1);		//VRSQRTE
-	cx = cc * c.f;
-	a = (3.0f - cx * c.f) / 2;			//VRSQRTS
-	c.f = c.f * a;		
-	cx = cc * c.f;
-	a = (3.0f - cx * c.f) / 2;
-    c.f = c.f * a;	
-
-	r = r * c.f;
-	
-	return r;
-}
-
-
-float tanf_neon_hfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile (
-
-	"vdup.f32 		d0, d0[0]				\n\t"	//d0 = {x, x}
-	"vabs.f32 		d1, d0					\n\t"	//d1 = {ax, ax}
-	
-	//Range Reduction:
-	"vld1.32 		d3, [%0]				\n\t"	//d3 = {invrange, range}
-	"vmul.f32 		d2, d1, d3[0]			\n\t"	//d2 = d1 * d3[0] 
-	"vcvt.u32.f32 	d2, d2					\n\t"	//d2 = (int) d2
-	"vcvt.f32.u32 	d4, d2					\n\t"	//d4 = (float) d2
-	"vmls.f32 		d1, d4, d3[1]			\n\t"	//d1 = d1 - d4 * d3[1]
-	
-	//Checking Quadrant:
-	//ax = ax - (k&1) * M_PI_2
-	"vmov.i32 		d4, #1					\n\t"	//d4 = 1
-	"vand.i32 		d2, d2, d4				\n\t"	//d2 = d2 & d4
-	"vcvt.f32.u32 	d2, d2					\n\t"	//d2 = (float) d2
-	"vmls.f32 		d1, d2, d3[1]			\n\t"	//d1 = d1 - d2 * d3[1]
-	
-	//ax = ax ^ ( x.i & 0x800000000)
-	"vmov.i32 		d4, #0x80000000			\n\t"	//d4 = 0x80000000
-	"vand.i32 		d0, d0, d4				\n\t"	//d0 = d0 & d4
-	"veor.i32 		d1, d1, d0				\n\t"	//d1 = d1 ^ d0
-	
-	//polynomial:
-	"vmul.f32 		d2, d1, d1				\n\t"	//d2 = d1*d1 = {x^2, x^2}	
-	"vld1.32 		{d4, d5}, [%1]			\n\t"	//d4 = {p7, p3}, d5 = {p5, p1}
-	"vmul.f32 		d3, d2, d2				\n\t"	//d3 = d2*d2 = {x^4, x^4}		
-	"vmul.f32 		q0, q2, d1[0]			\n\t"	//q0 = q2 * d1[0] = {p7x, p3x, p5x, p1x}
-	"vmla.f32 		d1, d0, d2[0]			\n\t"	//d1 = d1 + d0*d2 = {p5x + p7x^3, p1x + p3x^3}		
-	"vmla.f32 		d1, d3, d1[0]			\n\t"	//d1 = d1 + d3*d0 = {..., p1x + p3x^3 + p5x^5 + p7x^7}		
-	
-	//cosine
-	"vmov.f32 		s1, #1.0				\n\t"	//d0[1] = 1.0
-	"vmls.f32 		d0, d1, d1				\n\t"	//d0 = {..., 1.0 - sx*sx}
-	
-	//invsqrt approx
-	"vmov.f32 		d2, d0					\n\t"	//d2 = d0
-	"vrsqrte.f32 	d0, d0					\n\t"	//d0 = ~ 1.0 / sqrt(d0)
-	"vmul.f32 		d3, d0, d2				\n\t"	//d3 = d0 * d2
-	"vrsqrts.f32 	d4, d3, d0				\n\t"	//d4 = (3 - d0 * d3) / 2 	
-	"vmul.f32 		d0, d0, d4				\n\t"	//d0 = d0 * d4	
-	"vmul.f32 		d3, d0, d2				\n\t"	//d3 = d0 * d2	
-	"vrsqrts.f32 	d4, d3, d0				\n\t"	//d4 = (3 - d0 * d3) / 2	
-	"vmul.f32 		d0, d0, d4				\n\t"	//d0 = d0 * d4	
-	
-	"vmul.f32 		d0, d0, d1				\n\t"	//d0 = d0 * d1
-	
-	"vmov.f32 		s0, s1					\n\t"	//s0 = s1
-	
-	:: "r"(__tanf_rng), "r"(__tanf_lut) 
-    : "d0", "d1", "d2", "d3", "d4", "d5"
-	);
-#endif
-}
-
-
-float tanf_neon_sfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vdup.f32 d0, r0 		\n\t");
-	tanf_neon_hfp(x);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return tanf_c(x);
-#endif
-};
-
diff --git a/deps/math-neon/source/math_tanhf.c b/deps/math-neon/source/math_tanhf.c
deleted file mode 100644
index 219655be4d..0000000000
--- a/deps/math-neon/source/math_tanhf.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include "math.h"
-#include "math_neon.h"
- 
-/* 
-TanH = (e^x - e^-x) / (e^x + e^-x)
-TanH = (e^x - e^-x)(e^x) / (e^x + e^-x)(e^x)
-TanH = (e^2x - 1) / (e^2x + 1)
-
-*/
- 
-float tanhf_c(float x)
-{
-	float a, b, c;
-	int m;
-	union{
-		float 	f;
-		int 	i;
-	} xx;
-	
-	x = 2.0f * x;
-	a = expf_c(x);
-	c = a + 1.0f;
-		
-	//reciporical approx.
-	xx.f = c;
-	m = 0x3F800000 - (xx.i & 0x7F800000);
-	xx.i = xx.i + m;
-	xx.f = 1.41176471f - 0.47058824f * xx.f;
-	xx.i = xx.i + m;
-	b = 2.0 - xx.f * c;
-	xx.f = xx.f * b;	
-	b = 2.0 - xx.f * c;
-	xx.f = xx.f * b;
-	c = a - 1.0;
-	xx.f *= c;
-	return xx.f;
-}
-
-
-float tanhf_neon_hfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vadd.f32 d0, d0, d0 		\n\t");
-	expf_neon_hfp(x);
-	asm volatile (
-	"vmov.f32 		d2, #1.0 				\n\t"
-	"vsub.f32 		d3, d0, d2 				\n\t"
-	"vadd.f32 		d0, d0, d2 				\n\t"
-
-	"vrecpe.f32		d1, d0					\n\t"	//d1 = ~ 1 / d0; 
-	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
-	"vmul.f32		d1, d1, d2				\n\t"	//d1 = d1 * d2; 
-	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
-	"vmul.f32		d0, d1, d2				\n\t"	//d0 = d1 * d2; 
-	"vmul.f32		d0, d0, d3				\n\t"	//d0 = d0 * d3; 	
-	::: "d0", "d1", "d2", "d3"
-	);	
-#endif
-}
-
-float tanhf_neon_sfp(float x)
-{
-#ifdef __MATH_NEON
-	asm volatile ("vmov.f32 s0, r0 		\n\t");
-	tanhf_neon_hfp(x);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return tanhf_c(x);
-#endif
-};
-
diff --git a/deps/math-neon/source/math_vec2.c b/deps/math-neon/source/math_vec2.c
deleted file mode 100644
index d970c37676..0000000000
--- a/deps/math-neon/source/math_vec2.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-
-#include "math_neon.h"
-
-//vec2 scalar product
-float 
-dot2_c(float v0[2], float v1[2])
-{
-	float r;
-	r = v0[0]*v1[0];
-	r += v0[1]*v1[1];
-	return r;
-}
-
-void 
-normalize2_c(float v[2], float d[2])
-{
-	float b, c, x;
-	union {
-		float 	f;
-		int 	i;
-	} a;
-	
-	x = v[0]*v[0];
-	x += v[1]*v[1];
-
-	//fast invsqrt approx
-	a.f = x;
-	a.i = 0x5F3759DF - (a.i >> 1);		//VRSQRTE
-	c = x * a.f;
-	b = (3.0f - c * a.f) * 0.5;		//VRSQRTS
-	a.f = a.f * b;		
-	c = x * a.f;
-	b = (3.0f - c * a.f) * 0.5;
-    a.f = a.f * b;	
-
-	d[0] = v[0]*a.f;
-	d[1] = v[1]*a.f;
-}
-
-float 
-dot2_neon_hfp(float v0[2], float v1[2])
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"vld1.32 		{d2}, [%0]			\n\t"	//d2={x0,y0}
-	"vld1.32 		{d4}, [%1]			\n\t"	//d4={x1,y1}
-	"vmul.f32 		d0, d2, d4			\n\t"	//d0 = d2*d4
-	"vpadd.f32 		d0, d0, d0			\n\t"	//d0 = d[0] + d[1]
-	:: "r"(v0), "r"(v1) 
-    : 
-	);	
-#endif
-}
-
-float 
-dot2_neon_sfp(float v0[2], float v1[2])
-{
-#ifdef __MATH_NEON
-	dot2_neon_hfp(v0, v1);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return dot2_c(v0, v1);
-#endif
-};
-
-void 
-normalize2_neon(float v[2], float d[2])
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"vld1.32 		d4, [%0]				\n\t"	//d4 = {x0,y0}
-	"vmul.f32 		d0, d4, d4				\n\t"	//d0 = d2*d2
-	"vpadd.f32 		d0, d0					\n\t"	//d0 = d[0] + d[1]
-	
-	"vmov.f32 		d1, d0					\n\t"	//d1 = d0
-	"vrsqrte.f32 	d0, d0					\n\t"	//d0 = ~ 1.0 / sqrt(d0)
-	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1
-	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d3 = (3 - d0 * d2) / 2 	
-	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d3
-	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1	
-	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d3 = (3 - d0 * d2) / 2	
-	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d3	
-
-	"vmul.f32 		d4, d4, d0[0]			\n\t"	//d4 = d4*d0[0]
-	"vst1.32 		d4, [%1]				\n\t"	//
-	
-	:: "r"(v), "r"(d) 
-    : "d0", "d1", "d2", "d3", "d4", "memory"
-	);	
-#else
-	normalize2_c(v, d);
-#endif
-}
-
diff --git a/deps/math-neon/source/math_vec3.c b/deps/math-neon/source/math_vec3.c
deleted file mode 100644
index 998ff2e4d5..0000000000
--- a/deps/math-neon/source/math_vec3.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include "math_neon.h"
-
-//vec4 scalar product
-float 
-dot3_c(float v0[3], float v1[3])
-{
-	float r;
-	r = v0[0]*v1[0];
-	r += v0[1]*v1[1];
-	r += v0[2]*v1[2]; 
-	return r;
-}
-
-void
-cross3_c(float v0[3], float v1[3], float d[3])
-{
-	d[0] = v0[1]*v1[2] - v0[2]*v1[1];
-	d[1] = v0[2]*v1[0] - v0[0]*v1[2];
-	d[2] = v0[0]*v1[1] - v0[1]*v1[0];
-}
-
-void 
-normalize3_c(float v[3], float d[3])
-{
-	float b, c, x;
-	union {
-		float 	f;
-		int 	i;
-	} a;
-	
-	x = v[0]*v[0];
-	x += v[1]*v[1];
-	x += v[2]*v[2];
-
-	//fast invsqrt approx
-	a.f = x;
-	a.i = 0x5F3759DF - (a.i >> 1);		//VRSQRTE
-	c = x * a.f;
-	b = (3.0f - c * a.f) * 0.5;		//VRSQRTS
-	a.f = a.f * b;		
-	c = x * a.f;
-	b = (3.0f - c * a.f) * 0.5;
-    a.f = a.f * b;	
-
-	d[0] = v[0]*a.f;
-	d[1] = v[1]*a.f;
-	d[2] = v[2]*a.f;
-}
-
-
-float 
-dot3_neon_hfp(float v0[3], float v1[3])
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"vld1.32 		{d2}, [%0]			\n\t"	//d2={x0,y0}
-	"flds 			s6, [%0, #8]		\n\t"	//d3[0]={z0}
-	"vld1.32 		{d4}, [%1]			\n\t"	//d4={x1,y1}
-	"flds 			s10, [%1, #8]	\n\t"	//d5[0]={z1}
-
-	"vmul.f32 		d0, d2, d4			\n\t"	//d0= d2*d4
-	"vpadd.f32 		d0, d0, d0			\n\t"	//d0 = d[0] + d[1]
-	"vmla.f32 		d0, d3, d5			\n\t"	//d0 = d0 + d3*d5 
-	:: "r"(v0), "r"(v1) 
-    : "d0","d1","d2","d3","d4","d5"
-	);	
-#endif
-}
-
-float 
-dot3_neon_sfp(float v0[3], float v1[3])
-{
-#ifdef __MATH_NEON
-	dot3_neon_hfp(v0, v1);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return dot3_c(v0, v1);
-#endif
-};
-
-
-void cross3_neon(float v0[3], float v1[3], float d[3])
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"flds 			s3, [%0]			\n\t"	//d1[1]={x0}
-	"add 			%0, %0, #4			\n\t"	//
-	"vld1.32 		{d0}, [%0]			\n\t"	//d0={y0,z0}
-	"vmov.f32 		s2, s1		 		\n\t"	//d1[0]={z0}
-
-	"flds 			s5, [%1]			\n\t"	//d2[1]={x1}
-	"add 			%1, %1, #4			\n\t"	//
-	"vld1.32 		{d3}, [%1]			\n\t"	//d3={y1,z1}
-	"vmov.f32 		s4, s7				\n\t"	//d2[0]=d3[1]
-	
-	"vmul.f32 		d4, d0, d2			\n\t"	//d4=d0*d2
-	"vmls.f32 		d4, d1, d3			\n\t"	//d4-=d1*d3
-	
-	"vmul.f32 		d5, d3, d1[1]		\n\t"	//d5=d3*d1[1]
-	"vmls.f32 		d5, d0, d2[1]		\n\t"	//d5-=d0*d2[1]
-	
-	"vst1.32 		d4, [%2]			\n\t"	//
-	"add 			%2, %2, #8			\n\t"	//
-	"fsts 			s10, [%2]			\n\t"	//
-	
-	: "+r"(v0), "+r"(v1), "+r"(d):
-    : "d0", "d1", "d2", "d3", "d4", "d5", "memory"
-	);	
-#else
-	cross3_c(v0,v1,d);
-#endif
-}
-
-void 
-normalize3_neon(float v[3], float d[3])
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"vld1.32 		{d4}, [%0]				\n\t"	//d4={x0,y0}
-	"flds 			s10, [%0, #8]			\n\t"	//d5[0]={z0}
-
-	"vmul.f32 		d0, d4, d4				\n\t"	//d0= d4*d4
-	"vpadd.f32 		d0, d0					\n\t"	//d0 = d[0] + d[1]
-	"vmla.f32 		d0, d5, d5				\n\t"	//d0 = d0 + d5*d5 
-	
-	"vmov.f32 		d1, d0					\n\t"	//d1 = d0
-	"vrsqrte.f32 	d0, d0					\n\t"	//d0 = ~ 1.0 / sqrt(d0)
-	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1
-	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d3 = (3 - d0 * d2) / 2 	
-	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d3
-	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1	
-	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d4 = (3 - d0 * d3) / 2	
-	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d4	
-
-	"vmul.f32 		q2, q2, d0[0]			\n\t"	//d0= d2*d4
-	"vst1.32 		{d4}, [%1]				\n\t"	//
-	"fsts 			s10, [%1, #8]			\n\t"	//
-	
-	:: "r"(v), "r"(d) 
-    : "d0", "d1", "d2", "d3", "d4", "d5", "memory"
-	);	
-#else
-	normalize3_c(v, d);
-#endif
-
-}
-
-
diff --git a/deps/math-neon/source/math_vec4.c b/deps/math-neon/source/math_vec4.c
deleted file mode 100644
index 483fc57190..0000000000
--- a/deps/math-neon/source/math_vec4.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
-The MIT License (MIT)
-
-Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include "math_neon.h"
-
-
-#ifdef __MATH_NEON
-#include "arm_neon.h" 
-#endif
-
-//vec4 scalar product
-float dot4_c(float v0[4], float v1[4])
-{
-	float r;
-	r = v0[0]*v1[0];
-	r += v0[1]*v1[1];
-	r += v0[2]*v1[2]; 
-	r += v0[3]*v1[3];
-	return r;
-}
-
-void normalize4_c(float v[4], float d[4])
-{
-	float b, c, x;
-	union {
-		float 	f;
-		int 	i;
-	} a;
-	
-	x = v[0]*v[0];
-	x += v[1]*v[1];
-	x += v[2]*v[2];
-	x += v[3]*v[3];
-
-	//fast invsqrt approx
-	a.f = x;
-	a.i = 0x5F3759DF - (a.i >> 1);		//VRSQRTE
-	c = x * a.f;
-	b = (3.0f - c * a.f) * 0.5;		//VRSQRTS
-	a.f = a.f * b;		
-	c = x * a.f;
-	b = (3.0f - c * a.f) * 0.5;
-    a.f = a.f * b;	
-
-	d[0] = v[0]*a.f;
-	d[1] = v[1]*a.f;
-	d[2] = v[2]*a.f;
-	d[3] = v[3]*a.f;
-}
-
-void normalize4_neon(float v[4], float d[4])
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"vld1.32 		{d4, d5}, [%0]			\n\t"	//d2={x0,y0}, d3={z0, w0}
-	"vmul.f32 		d0, d4, d4				\n\t"	//d0= d4*d4
-	"vmla.f32 		d0, d5, d5				\n\t"	//d0 = d0 + d5*d5 
-	"vpadd.f32 		d0, d0					\n\t"	//d0 = d[0] + d[1]
-	
-	"vmov.f32 		d1, d0					\n\t"	//d1 = d0
-	"vrsqrte.f32 	d0, d0					\n\t"	//d0 = ~ 1.0 / sqrt(d0)
-	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1
-	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d3 = (3 - d0 * d2) / 2 	
-	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d3
-	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1	
-	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d4 = (3 - d0 * d3) / 2	
-	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d4	
-
-	"vmul.f32 		q2, q2, d0[0]			\n\t"	//d0= d2*d4
-	"vst1.32 		{d4, d5}, [%1]			\n\t"	//d2={x0,y0}, d3={z0, w0}
-	
-	:: "r"(v), "r"(d) 
-    : "d0", "d1", "d2", "d3", "d4", "d5", "memory"
-	);	
-#else
-	normalize4_c(v, d);
-#endif
-
-}
-
-
-float dot4_neon_hfp(float v0[4], float v1[4])
-{
-#ifdef __MATH_NEON
-	asm volatile (
-	"vld1.32 		{d2, d3}, [%0]			\n\t"	//d2={x0,y0}, d3={z0, w0}
-	"vld1.32 		{d4, d5}, [%1]			\n\t"	//d4={x1,y1}, d5={z1, w1}
-	"vmul.f32 		d0, d2, d4				\n\t"	//d0= d2*d4
-	"vmla.f32 		d0, d3, d5				\n\t"	//d0 = d0 + d3*d5 
-	"vpadd.f32 		d0, d0					\n\t"	//d0 = d[0] + d[1]
-	:: "r"(v0), "r"(v1) : 
-	);	
-#endif
-}
-
-float dot4_neon_sfp(float v0[4], float v1[4])
-{
-#ifdef __MATH_NEON
-	dot4_neon_hfp(v0, v1);
-	asm volatile ("vmov.f32 r0, s0 		\n\t");
-#else
-	return dot4_c(v0, v1);
-#endif
-};
-

From af97efdc17a1215a63a9ee639cf9361876ded50d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Jos=C3=A9=20Garc=C3=ADa=20Garc=C3=ADa?=
 <frangarcj@gmail.com>
Date: Sun, 31 May 2020 17:24:53 +0200
Subject: [PATCH 3/3] Squashed 'deps/math-neon/' content from commit bf34c68a8e

git-subtree-dir: deps/math-neon
git-subtree-split: bf34c68a8e141f7e6f37040da9311b07f1bbe529
---
 .gitattributes         |  17 +
 .gitignore             |  26 ++
 Makefile               |  29 ++
 README                 | 168 ++++++++++
 math_debug.c           | 689 +++++++++++++++++++++++++++++++++++++++++
 source/math_acosf.c    |  67 ++++
 source/math_asinf.c    | 183 +++++++++++
 source/math_atan2f.c   | 170 ++++++++++
 source/math_atanf.c    | 149 +++++++++
 source/math_ceilf.c    |  71 +++++
 source/math_cosf.c     |  50 +++
 source/math_coshf.c    | 120 +++++++
 source/math_expf.c     | 135 ++++++++
 source/math_fabsf.c    |  58 ++++
 source/math_floorf.c   |  66 ++++
 source/math_fmodf.c    | 100 ++++++
 source/math_invsqrtf.c |  79 +++++
 source/math_ldexpf.c   |  67 ++++
 source/math_log10f.c   | 135 ++++++++
 source/math_logf.c     | 135 ++++++++
 source/math_mat2.c     |  95 ++++++
 source/math_mat3.c     | 131 ++++++++
 source/math_mat4.c     | 144 +++++++++
 source/math_modf.c     |  71 +++++
 source/math_neon.h     | 439 ++++++++++++++++++++++++++
 source/math_powf.c     | 182 +++++++++++
 source/math_runfast.c  |  42 +++
 source/math_sincosf.c  | 163 ++++++++++
 source/math_sinf.c     | 128 ++++++++
 source/math_sinfv.c    | 110 +++++++
 source/math_sinhf.c    | 120 +++++++
 source/math_sqrtf.c    | 105 +++++++
 source/math_sqrtfv.c   | 147 +++++++++
 source/math_tanf.c     | 156 ++++++++++
 source/math_tanhf.c    |  95 ++++++
 source/math_vec2.c     | 118 +++++++
 source/math_vec3.c     | 172 ++++++++++
 source/math_vec4.c     | 126 ++++++++
 38 files changed, 5058 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 .gitignore
 create mode 100644 Makefile
 create mode 100644 README
 create mode 100644 math_debug.c
 create mode 100644 source/math_acosf.c
 create mode 100644 source/math_asinf.c
 create mode 100644 source/math_atan2f.c
 create mode 100644 source/math_atanf.c
 create mode 100644 source/math_ceilf.c
 create mode 100644 source/math_cosf.c
 create mode 100644 source/math_coshf.c
 create mode 100644 source/math_expf.c
 create mode 100644 source/math_fabsf.c
 create mode 100644 source/math_floorf.c
 create mode 100644 source/math_fmodf.c
 create mode 100644 source/math_invsqrtf.c
 create mode 100644 source/math_ldexpf.c
 create mode 100644 source/math_log10f.c
 create mode 100644 source/math_logf.c
 create mode 100644 source/math_mat2.c
 create mode 100644 source/math_mat3.c
 create mode 100644 source/math_mat4.c
 create mode 100644 source/math_modf.c
 create mode 100644 source/math_neon.h
 create mode 100644 source/math_powf.c
 create mode 100644 source/math_runfast.c
 create mode 100644 source/math_sincosf.c
 create mode 100644 source/math_sinf.c
 create mode 100644 source/math_sinfv.c
 create mode 100644 source/math_sinhf.c
 create mode 100644 source/math_sqrtf.c
 create mode 100644 source/math_sqrtfv.c
 create mode 100644 source/math_tanf.c
 create mode 100644 source/math_tanhf.c
 create mode 100644 source/math_vec2.c
 create mode 100644 source/math_vec3.c
 create mode 100644 source/math_vec4.c

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000..bdb0cabc87
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,17 @@
+# Auto detect text files and perform LF normalization
+* text=auto
+
+# Custom for Visual Studio
+*.cs     diff=csharp
+
+# Standard to msysgit
+*.doc	 diff=astextplain
+*.DOC	 diff=astextplain
+*.docx diff=astextplain
+*.DOCX diff=astextplain
+*.dot  diff=astextplain
+*.DOT  diff=astextplain
+*.pdf  diff=astextplain
+*.PDF	 diff=astextplain
+*.rtf	 diff=astextplain
+*.RTF	 diff=astextplain
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000..6b55e9b64e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,26 @@
+*.o
+*.a
+
+# Windows thumbnail cache files
+Thumbs.db
+ehthumbs.db
+ehthumbs_vista.db
+
+# Folder config file
+Desktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Windows Installer files
+*.cab
+*.msi
+*.msm
+*.msp
+
+# Windows shortcuts
+*.lnk
+
+# =========================
+# Operating System Files
+# =========================
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000000..269d8cdd57
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,29 @@
+TARGET          := libmathneon
+SOURCES         := source
+
+LIBS = -lc -lm -lSceGxm_stub -lSceDisplay_stub
+
+CFILES   := $(foreach dir,$(SOURCES), $(wildcard $(dir)/*.c))
+CGFILES  := $(foreach dir,$(SHADERS), $(wildcard $(dir)/*.cg))
+HEADERS  := $(CGFILES:.cg=.h)
+OBJS     := $(CFILES:.c=.o)
+
+PREFIX  = arm-vita-eabi
+CC      = $(PREFIX)-gcc
+AR      = $(PREFIX)-gcc-ar
+CFLAGS  = -g -Wl,-q -O2 -ffast-math -mtune=cortex-a9 -mfpu=neon -flto -ftree-vectorize
+ASFLAGS = $(CFLAGS)
+
+all: $(TARGET).a
+
+$(TARGET).a: $(OBJS)
+	$(AR) -rc $@ $^
+	
+clean:
+	@rm -rf $(TARGET).a $(TARGET).elf $(OBJS)
+	
+install: $(TARGET).a
+	@mkdir -p $(VITASDK)/$(PREFIX)/lib/
+	cp $(TARGET).a $(VITASDK)/$(PREFIX)/lib/
+	@mkdir -p $(VITASDK)/$(PREFIX)/include/
+	cp source/math_neon.h $(VITASDK)/$(PREFIX)/include/
diff --git a/README b/README
new file mode 100644
index 0000000000..4f388e9374
--- /dev/null
+++ b/README
@@ -0,0 +1,168 @@
+
+Library: 	MATH-NEON
+By:			Lachlan Tychsen-Smith
+Licence:	MIT (expat)
+=======================================================================================
+This project implements the cmath functions and some optimised matrix functions 
+with the aim of increasing the floating point performance of ARM Cortex A-8
+based platforms. As well as implementing the functions in ARM NEON assembly, 
+they sacrifice error checking and some accuracy to achieve better performance.
+
+Function Errors:
+=======================================================================================
+The measurement and characterisations of the inaccuracies present within these 
+functions is really a field within itself. For the benchmark i provide the 
+maximum absolute, maximum relative and root mean squared error compared to the
+cmath implementations over the specified range. However these values can be 
+misleading, especially for functions which quickly go to infinity. So its always a 
+good idea to test it within your actual program. In general, this library will not 
+be as accurate as cmath, however for many functions it is close enough to be
+negilible. 
+	
+Notes:
+=======================================================================================
+- The *_c functions are c implementations of the *_neon code.
+- Like cmath, The errors present in the functions are very dependent on the 
+  range which your operating in. So you should test them first.
+- Look in the "math_neon.h" file for discriptions of the functions. In some 
+  function files there are also notes on the specific implementation.
+- The *_neon functions make certain assumptions about the location of arguments 
+  that is incompatible with inlining. 
+	  
+Contact:
+=======================================================================================
+Name: 	Lachlan Tychsen-Smith 
+Email: 	lachlan.ts@gmail.com
+
+PSVITA performances test results:
+
+RUNFAST: Disabled 
+------------------------------------------------------------------------------------------------------
+MATRIX FUNCTION TESTS 
+------------------------------------------------------------------------------------------------------
+matmul2_c = 
+			|-14.56, 5.96|
+			|-15.35, 10.50|
+matmul2_neon = 
+			|-14.56, 5.96|
+			|-15.35, 10.50|
+matmul2: c=174924 	 neon=64490 	 rate=2.71 
+matvec2_c = |-14.56, -15.35|
+matvec2_neon = |-14.56, -15.35|
+matvec2: c=88957 	 neon=58337 	 rate=1.52 
+matmul3_c =
+			|-21.39, -4.68, -1.74|
+			|-8.66, -8.97, 1.83|
+			|15.88, 0.30, -2.23|
+matmul3_neon =
+			|-21.39, -4.68, -1.74|
+			|-8.66, -8.97, 1.83|
+			|15.88, 0.30, -2.23|
+matmul3: c=552486 	 neon=297268 	 rate=1.86 
+matvec3_c = |-21.39, -8.66, 15.88|
+matvec3_neon = |-21.39, -8.66, 15.88|
+matvec3: c=184104 	 neon=128780 	 rate=1.43 
+matmul4_c =
+			|-13.65, -1.80, -12.92, 6.56|
+			|-10.21, 9.47, 2.73, 14.79|
+			|0.97, 11.69, -0.64, -12.87|
+			|20.06, 6.77, 35.61, -0.02|
+matmul4_neon =
+			|-13.65, -1.80, -12.92, 6.56|
+			|-10.21, 9.47, 2.73, 14.79|
+			|0.97, 11.69, -0.64, -12.87|
+			|20.06, 6.77, 35.61, -0.02|
+matmul4: c=1315568 	 neon=254227 	 rate=5.17 
+matvec4_c = |-13.65, -10.21, 0.97, 20.058556|
+matvec4_neon = |-13.65, -10.21, 0.97, 20.058556|
+matvec4: c=331712 	 neon=147196 	 rate=2.25 
+
+dot2_c = -10.903330
+dot2_neon = -10.903330
+dot2: c=230295 	 neon=168799 	 rate=1.36 
+normalize2_c = [-0.74, 0.67]
+normalize2_neon = [-0.74, 0.67]
+normalize2: c=950716 	 neon=965780 	 rate=0.98 
+
+dot3_c = -4.226746
+dot3_neon = -4.226746
+dot3: c=306957 	 neon=337316 	 rate=0.91 
+normalize3_c = [-0.69, 0.62, -0.38]
+normalize3_neon = [-0.69, 0.62, -0.38]
+normalize3: c=1180950 	 neon=1134557 	 rate=1.04 
+cross3_c = [-9.67, -19.39, -14.24]
+cross3_neon = [-9.67, -19.39, -14.24]
+cross3: c=659558 	 neon=766896 	 rate=0.86 
+
+dot4_c = 2.782796
+dot4_neon = 2.782796
+dot4: c=414233 	 neon=276068 	 rate=1.50 
+normalize4_c = [-0.59, 0.53, -0.32, -0.52]
+normalize4_neon = [-0.59, 0.53, -0.32, -0.52]
+normalize4: c=1364294 	 neon=1103327 	 rate=1.24 
+
+------------------------------------------------------------------------------------------------------
+CMATH FUNCTION TESTS 
+------------------------------------------------------------------------------------------------------
+Function	Range		Number	ABS Max Error	REL Max Error	RMS Error	Time	Rate
+------------------------------------------------------------------------------------------------------
+sinf       	[-3.14, 3.14]	500000	0.00e+00	0.00e+00%	0.00e+00	1394459996	x1.00	
+sinf_c     	[-3.14, 3.14]	500000	7.75e-07	1.00e+02%	4.09e-07	1395128226	x1.00	
+sinf_neon  	[-3.14, 3.14]	500000	8.34e-07	1.00e+02%	4.09e-07	1395853554	x1.00	
+cosf       	[-3.14, 3.14]	500000	0.00e+00	0.00e+00%	0.00e+00	1396644271	x1.00	
+cosf_c     	[-3.14, 3.14]	500000	7.75e-07	6.74e-01%	4.15e-07	1397360321	x1.00	
+cosf_neon  	[-3.14, 3.14]	500000	8.34e-07	6.74e-01%	4.16e-07	1398126872	x1.00	
+tanf       	[-0.79, 0.79]	500000	0.00e+00	0.00e+00%	0.00e+00	1398889596	x1.00	
+tanf_c     	[-0.79, 0.79]	500000	2.98e-06	7.94e-04%	1.31e-06	1399704712	x1.00	
+tanf_neon  	[-0.79, 0.79]	500000	1.91e-06	3.62e-04%	6.66e-07	1400612899	x1.00	
+asinf      	[-1.00, 1.00]	500000	0.00e+00	0.00e+00%	0.00e+00	1401838993	x1.00	
+asinf_c    	[-1.00, 1.00]	500000	5.54e-05	1.06e-02%	nan	1402745512	x1.00	
+asinf_neon 	[-1.00, 1.00]	500000	4.66e-05	8.90e-03%	nan	1403967661	x1.00	
+acosf      	[-1.00, 1.00]	500000	0.00e+00	0.00e+00%	0.00e+00	1405317842	x1.00	
+acosf_c    	[-1.00, 1.00]	500000	5.56e-05	6.46e-03%	nan	1406294753	x1.00	
+acosf_neon 	[-1.00, 1.00]	500000	4.67e-05	6.35e-03%	nan	1407598039	x1.00	
+atanf      	[-1.00, 1.00]	500000	0.00e+00	0.00e+00%	0.00e+00	1408314869	x1.00	
+atanf_c    	[-1.00, 1.00]	500000	1.67e-04	2.12e-02%	7.40e-05	1408872421	x1.00	
+atanf_neon 	[-1.00, 1.00]	500000	1.67e-04	2.12e-02%	7.40e-05	1409736652	x1.00	
+sinhf       	[-3.14, 3.14]	500000	0.00e+00	0.00e+00%	0.00e+00	1411101066	x1.00	
+sinhf_c     	[-3.14, 3.14]	500000	1.91e-06	1.52e-01%	1.85e-07	1412173492	x1.00	
+sinhf_neon  	[-3.14, 3.14]	500000	1.91e-06	1.52e-01%	1.90e-07	1413205410	x1.00	
+coshf       	[-3.14, 3.14]	500000	0.00e+00	0.00e+00%	0.00e+00	1414417802	x1.00	
+coshf_c     	[-3.14, 3.14]	500000	9.54e-07	2.38e-05%	1.64e-07	1415426083	x1.00	
+coshf_neon  	[-3.14, 3.14]	500000	1.91e-06	2.22e-05%	1.68e-07	1416412636	x1.00	
+tanhf       	[-3.14, 3.14]	500000	0.00e+00	0.00e+00%	0.00e+00	1417684273	x1.00	
+tanhf_c     	[-3.14, 3.14]	500000	1.20e-05	2.48e-01%	5.48e-06	1418659628	x1.00	
+tanhf_neon  	[-3.14, 3.14]	500000	2.38e-07	2.47e-01%	5.40e-08	1419650721	x1.00	
+expf       	[0.00, 10.00]	500000	0.00e+00	0.00e+00%	0.00e+00	1420706074	x1.00	
+expf_c     	[0.00, 10.00]	500000	9.77e-03	6.15e-05%	1.64e-03	1421444150	x1.00	
+expf_neon  	[0.00, 10.00]	500000	9.77e-03	6.58e-05%	1.64e-03	1422203499	x1.00	
+logf       	[1.00, 1000.00]	500000	0.00e+00	0.00e+00%	0.00e+00	1423106698	x1.00	
+logf_c     	[1.00, 1000.00]	500000	6.20e-06	1.62e-02%	9.83e-07	1423735174	x1.00	
+logf_neon  	[1.00, 1000.00]	500000	7.63e-06	1.03e-02%	1.07e-06	1424434406	x1.00	
+log10f       	[1.00, 1000.00]	500000	0.00e+00	0.00e+00%	0.00e+00	1425516892	x1.00	
+log10f_c     	[1.00, 1000.00]	500000	2.86e-06	6.68e-03%	4.79e-07	1426200368	x1.00	
+log10f_neon  	[1.00, 1000.00]	500000	3.34e-06	6.68e-03%	4.84e-07	1426966844	x1.00	
+floorf     	[1.00, 1000.00]	5000000	0.00e+00	0.00e+00%	0.00e+00	1429081993	x1.00	
+floorf_c   	[1.00, 1000.00]	5000000	0.00e+00	0.00e+00%	0.00e+00	1430839273	x1.00	
+floorf_neon	[1.00, 1000.00]	5000000	0.00e+00	0.00e+00%	0.00e+00	1433474766	x1.00	
+ceilf     	[1.00, 1000.00]	5000000	0.00e+00	0.00e+00%	0.00e+00	1435602956	x1.00	
+ceilf_c   	[1.00, 1000.00]	5000000	0.00e+00	0.00e+00%	0.00e+00	1437403711	x1.00	
+ceilf_neon	[1.00, 1000.00]	5000000	0.00e+00	0.00e+00%	0.00e+00	1440044970	x1.00	
+fabsf     	[1.00, 1000.00]	5000000	0.00e+00	0.00e+00%	0.00e+00	1441265630	x1.00	
+fabsf_c   	[1.00, 1000.00]	5000000	0.00e+00	0.00e+00%	0.00e+00	1442491716	x1.00	
+fabsf_neon	[1.00, 1000.00]	5000000	0.00e+00	0.00e+00%	0.00e+00	1443680744	x1.00	
+sqrtf      	[1.00, 1000.00]	500000	0.00e+00	0.00e+00%	0.00e+00	1444844144	x1.00	
+sqrtf_c    	[1.00, 1000.00]	500000	2.33e-04	1.06e-03%	8.69e-05	1445710342	x1.00	
+sqrtf_neon 	[1.00, 1000.00]	500000	7.63e-06	2.91e-05%	1.60e-06	1446544637	x1.00	
+invsqrtf      	[1.00, 1000.00]	500000	0.00e+00	0.00e+00%	0.00e+00	1446995307	x1.00	
+invsqrtf_c    	[1.00, 1000.00]	500000	4.35e-06	4.78e-04%	2.00e-07	1447471977	x1.00	
+invsqrtf_neon 	[1.00, 1000.00]	500000	1.19e-07	2.12e-05%	4.81e-09	1447987675	x1.00	
+atan2f       	[0.10, 10.00]	10000	0.00e+00	0.00e+00%	0.00e+00	1449713108	x1.00	
+atan2f_c     	[0.10, 10.00]	10000	1.73e-04	2.23e-02%	0.00e+00	1451276575	x1.00	
+atan2f_neon  	[0.10, 10.00]	10000	1.67e-04	2.12e-02%	0.00e+00	1453093260	x1.00	
+powf       	[1.00, 10.00]	10000	0.00e+00	0.00e+00%	0.00e+00	1458606663	x1.00	
+powf_c     	[1.00, 10.00]	10000	1.08e+05	4.37e-03%	0.00e+00	1461584933	x1.00	
+powf_neon  	[1.00, 10.00]	10000	1.36e+05	5.88e-03%	0.00e+00	1464702743	x1.00	
+fmodf       	[1.00, 10.00]	10000	0.00e+00	0.00e+00%	0.00e+00	1466022029	x1.00	
+fmodf_c     	[1.00, 10.00]	10000	9.90e+00	8.06e-02%	0.00e+00	1467403015	x1.00	
+fmodf_neon  	[1.00, 10.00]	10000	9.97e+00	8.06e-02%	0.00e+00	1468767755	x1.00	
diff --git a/math_debug.c b/math_debug.c
new file mode 100644
index 0000000000..a5125a3a25
--- /dev/null
+++ b/math_debug.c
@@ -0,0 +1,689 @@
+/*
+Math-NEON:  Neon Optimised Math Library based on cmath
+Contact:    lachlan.ts@gmail.com
+Copyright (C) 2009  Lachlan Tychsen - Smith aka Adventus
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+
+#include <math_neon.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <time.h>
+#ifdef WIN32
+#include <time.h>
+#else
+#include <sys/time.h>
+#include <sys/resource.h>
+#endif
+
+#define randf()	(rand() / (RAND_MAX + 1.0f))
+
+void LOG(const char *format, ...) {
+	__gnuc_va_list arg;
+	va_start(arg, format);
+	char msg[512];
+	vsprintf(msg, format, arg);
+	va_end(arg);
+
+	FILE *log = fopen("ux0:/data/mathneon.log", "a+");
+	if (log != NULL) {
+		fwrite(msg, 1, strlen(msg), log);
+		fclose(log);
+	}
+}
+
+struct	test1_s {
+	const char*	name;
+	float 		(*func)(float);	//the function
+	float 		(*bench)(float);	//the function to benchmark against.
+	float 		rng0, rng1;
+	int			num;
+	float 		emaxabs;
+	float 		xmaxabs;
+	float 		emaxrel;
+	float 		xmaxrel;
+	float 		erms;
+	int			time;				//time to execute num functions;
+};
+
+struct	test2_s {
+	const char*	name;
+	float 		(*func)(float, float);	//the function
+	float 		(*bench)(float, float);	//the function to benchmark against.
+	float 		rng0, rng1;
+	int			num;
+	float 		emaxabs;
+	float 		xmaxabs;
+	float 		emaxrel;
+	float 		xmaxrel;
+	float 		erms;
+	int			time;				//time to execute num functions;
+};
+
+
+float invsqrtf(float x){
+	return (1.0f / sqrtf(x));
+}
+
+typedef struct test1_s test1_t;
+typedef struct test2_s test2_t;
+
+test1_t test1[51] = 
+{
+	{"sinf       ", 	sinf, 		sinf, 	-M_PI, 		M_PI, 	500000},
+	{"sinf_c     ", 	sinf_c, 	sinf, 	-M_PI, 		M_PI, 	500000},
+	{"sinf_neon  ", 	sinf_neon, 	sinf, 	-M_PI, 		M_PI, 	500000},
+	
+	{"cosf       ", 	cosf, 		cosf, 	-M_PI, 		M_PI, 	500000},
+	{"cosf_c     ", 	cosf_c, 	cosf, 	-M_PI, 		M_PI, 	500000},
+	{"cosf_neon  ", 	cosf_neon, 	cosf, 	-M_PI, 		M_PI, 	500000},
+
+	{"tanf       ", 	tanf, 		tanf, 	-M_PI_4, 	M_PI_4, 500000, 0, 0, 0},
+	{"tanf_c     ", 	tanf_c, 	tanf, 	-M_PI_4, 	M_PI_4, 500000, 0, 0, 0},
+	{"tanf_neon  ", 	tanf_neon, 	tanf, 	-M_PI_4, 	M_PI_4, 500000, 0, 0, 0},
+
+	{"asinf      ", 	asinf, 		asinf, 	-1, 		1, 		500000, 0, 0, 0},
+	{"asinf_c    ", 	asinf_c, 	asinf, 	-1, 		1,	 	500000, 0, 0, 0},
+	{"asinf_neon ",		asinf_neon,	asinf, 	-1, 		1, 		500000, 0, 0, 0},
+	
+	{"acosf      ", 	acosf, 		acosf, 	-1, 		1, 		500000, 0, 0, 0},
+	{"acosf_c    ", 	acosf_c, 	acosf, 	-1, 		1,	 	500000, 0, 0, 0},
+	{"acosf_neon ",		acosf_neon,	acosf, 	-1, 		1, 		500000, 0, 0, 0},
+	
+	{"atanf      ", 	atanf, 		atanf, 	-1, 		1, 		500000, 0, 0, 0},
+	{"atanf_c    ", 	atanf_c, 	atanf, 	-1, 		1,	 	500000, 0, 0, 0},
+	{"atanf_neon ",		atanf_neon,	atanf, 	-1, 		1, 		500000, 0, 0, 0},
+
+	{"sinhf       ", 	sinhf, 		sinhf, 	-M_PI, 		M_PI, 	500000, 0, 0, 0},
+	{"sinhf_c     ", 	sinhf_c, 	sinhf, 	-M_PI, 		M_PI, 	500000, 0, 0, 0},
+	{"sinhf_neon  ", 	sinhf_neon, sinhf, 	-M_PI, 		M_PI, 	500000, 0, 0, 0},
+	
+	{"coshf       ", 	coshf, 		coshf, 	-M_PI, 		M_PI, 	500000, 0, 0, 0},
+	{"coshf_c     ", 	coshf_c, 	coshf, 	-M_PI, 		M_PI, 	500000, 0, 0, 0},
+	{"coshf_neon  ", 	coshf_neon, coshf, 	-M_PI, 		M_PI, 	500000, 0, 0, 0},
+
+	{"tanhf       ", 	tanhf, 		tanhf, 	-M_PI, 		M_PI, 	500000, 0, 0, 0},
+	{"tanhf_c     ", 	tanhf_c, 	tanhf, 	-M_PI, 		M_PI, 	500000, 0, 0, 0},
+	{"tanhf_neon  ", 	tanhf_neon, tanhf, 	-M_PI, 		M_PI, 	500000, 0, 0, 0},
+
+	{"expf       ", 	expf, 		expf, 	0, 			10, 	500000, 0, 0, 0},
+	{"expf_c     ", 	expf_c, 	expf, 	0, 			10, 	500000, 0, 0, 0},
+	{"expf_neon  ",		expf_neon, 	expf, 	0, 			10, 	500000, 0, 0, 0},
+	
+	{"logf       ", 	logf, 		logf, 	1, 			1000, 	500000, 0, 0, 0},
+	{"logf_c     ", 	logf_c, 	logf, 	1, 			1000, 	500000, 0, 0, 0},
+	{"logf_neon  ",		logf_neon, 	logf, 	1, 			1000, 	500000, 0, 0, 0},
+
+	{"log10f       ", 	log10f, 	log10f, 1, 			1000, 	500000, 0, 0, 0},
+	{"log10f_c     ", 	log10f_c, 	log10f, 1, 			1000, 	500000, 0, 0, 0},
+	{"log10f_neon  ",	log10f_neon,log10f, 1, 			1000, 	500000, 0, 0, 0},
+
+	{"floorf     ", 	floorf, 	floorf, 1, 			1000, 	5000000, 0, 0, 0},
+	{"floorf_c   ", 	floorf_c, 	floorf, 1, 			1000, 	5000000, 0, 0, 0},
+	{"floorf_neon",		floorf_neon,floorf, 1, 			1000, 	5000000, 0, 0, 0},
+
+	{"ceilf     ", 		ceilf, 		ceilf, 	1, 			1000, 	5000000, 0, 0, 0},
+	{"ceilf_c   ", 		ceilf_c, 	ceilf, 	1, 			1000, 	5000000, 0, 0, 0},
+	{"ceilf_neon",		ceilf_neon,	ceilf, 	1, 			1000, 	5000000, 0, 0, 0},
+
+	{"fabsf     ", 		fabsf, 		fabsf, 	1, 			1000, 	5000000, 0, 0, 0},
+	{"fabsf_c   ", 		fabsf_c, 	fabsf, 	1, 			1000, 	5000000, 0, 0, 0},
+	{"fabsf_neon",		fabsf_neon,	fabsf, 	1, 			1000, 	5000000, 0, 0, 0},
+
+	{"sqrtf      ", 	sqrtf, 		sqrtf, 	1, 			1000, 	500000, 0, 0, 0},
+	{"sqrtf_c    ", 	sqrtf_c, 	sqrtf, 	1, 			1000, 	500000, 0, 0, 0},
+	{"sqrtf_neon ",		sqrtf_neon,	sqrtf, 	1, 			1000, 	500000, 0, 0, 0},
+
+	{"invsqrtf      ", 	invsqrtf, 		invsqrtf, 	1, 	1000, 	500000, 0, 0, 0},
+	{"invsqrtf_c    ", 	invsqrtf_c, 	invsqrtf, 	1, 	1000, 	500000, 0, 0, 0},
+	{"invsqrtf_neon ",	invsqrtf_neon,	invsqrtf, 	1, 	1000, 	500000, 0, 0, 0},
+};
+
+test2_t test2[9] = 
+{
+	{"atan2f       ", 	atan2f, 	atan2f, 0.1, 		10, 	10000, 0, 0, 0},
+	{"atan2f_c     ", 	atan2f_c, 	atan2f, 0.1, 		10, 	10000, 0, 0, 0},
+	{"atan2f_neon  ", 	atan2f_neon,atan2f, 0.1, 		10, 	10000, 0, 0, 0},
+	
+	{"powf       ", 	powf, 		powf, 	1, 			10, 	10000, 0, 0, 0},
+	{"powf_c     ", 	powf_c, 	powf, 	1, 			10, 	10000, 0, 0, 0},
+	{"powf_neon  ", 	powf_neon, 	powf, 	1, 			10, 	10000, 0, 0, 0},
+
+	{"fmodf       ", 	fmodf, 		fmodf, 	1, 			10, 	10000, 0, 0, 0},
+	{"fmodf_c     ", 	fmodf_c, 	fmodf, 	1, 			10, 	10000, 0, 0, 0},
+	{"fmodf_neon  ", 	fmodf_neon, fmodf, 	1, 			10, 	10000, 0, 0, 0},
+
+};
+
+
+void 
+test_mathfunc1(test1_t *tst)
+{
+
+	float x;
+	float dx = (tst->rng1 - tst->rng0) / ((float)tst->num);
+#ifndef WIN32
+	struct rusage ru;
+#endif
+
+	tst->emaxabs = tst->xmaxabs = 0;
+	tst->emaxrel = tst->xmaxrel = 0;
+	tst->erms = 0;
+	for(x = tst->rng0; x < tst->rng1 ; x += dx){	
+		float r = (tst->func)((float)x);
+		float rr = (tst->bench)((float)x);
+		float dr = fabs(r - rr);
+		float drr = dr * (100.0f / rr);
+		tst->erms += dr*dr;
+		if (dr > tst->emaxabs){
+			tst->emaxabs = dr;
+			tst->xmaxabs = x;
+		}
+		if (drr > tst->emaxrel){
+			tst->emaxrel = drr;
+			tst->xmaxrel = x;
+		}
+	}
+	tst->erms = sqrt(tst->erms / ((float) tst->num));
+	
+#ifdef WIN32
+	tst->time = (1000 * clock()) / (CLOCKS_PER_SEC / 1000);
+#else
+	tst->time = sceKernelGetSystemTimeWide();
+#endif
+
+	for(x = tst->rng0; x < tst->rng1 ; x += dx){	
+		(tst->func)((float)x);
+	}
+
+#ifdef WIN32
+	tst->time = (1000 * clock()) / (CLOCKS_PER_SEC / 1000) - tst->time;
+#else
+	tst->time = sceKernelGetSystemTimeWide();
+#endif
+
+}
+
+void
+test_mathfunc2(test2_t *tst)
+{
+	float x, y;
+	float rng = tst->rng1 - tst->rng0;
+	float d = (rng * rng) / ((float) tst->num);
+#ifndef WIN32
+	struct rusage ru;
+#endif
+
+	tst->emaxabs = tst->xmaxabs = 0;
+	tst->emaxrel = tst->xmaxrel = 0;
+	for(y = (tst->rng0); y < (tst->rng1) ; y += d){	
+		for(x = (tst->rng0); x < (tst->rng1); x += d){	
+			float r = (tst->func)((float)x, y);
+			float rr = (tst->bench)((float)x, y);
+			float dr = fabs(r - rr);
+			float drr = dr * (100.0f / rr);
+			if (dr > tst->emaxabs){
+				tst->emaxabs = dr;
+				tst->xmaxabs = x;
+			}
+			if (drr > tst->emaxrel && fabsf(rr) > 0.0001){
+				tst->emaxrel = drr;
+				tst->xmaxrel = x;
+			}
+		}
+	}
+	
+#ifdef WIN32
+	tst->time = (1000 * clock()) / (CLOCKS_PER_SEC / 1000) ;
+#else
+	tst->time = sceKernelGetSystemTimeWide();
+#endif
+
+	for(y = tst->rng0; y < tst->rng1 ; y += d){	
+		for(x = tst->rng0; x < tst->rng1 ; x += d){	
+			(tst->func)((float)x, (float)y);
+		}
+	}
+
+#ifdef WIN32
+	tst->time = (1000 * clock()) / (CLOCKS_PER_SEC / 1000) - tst->time;
+#else
+	tst->time = sceKernelGetSystemTimeWide();
+#endif
+
+}
+
+void test_vectorfunc()
+{
+	float v0[4], v1[4], d[4];
+	
+	for(int i=0;i<4;i++)
+	{
+		v0[i] = 10*randf() - 5;
+		v1[i] = 10*randf() - 5;
+		d[i] = 10*randf() - 5;		
+	}
+	
+	int testnum = 5000000;
+	struct rusage ru;
+	int v2t[3], v3t[3], v4t[3];
+	float r;
+	
+	LOG("\n");
+	
+	//dot 2
+	v2t[0] = sceKernelGetSystemTimeWide();
+	for(int i=0;i < testnum; i++)
+	{
+		r = dot2_c(v0, v1);
+	};
+	v2t[1] = sceKernelGetSystemTimeWide();
+	for(int i=0;i < testnum; i++)
+	{
+		r = dot2_neon(v0, v1);
+	};
+	v2t[2] = sceKernelGetSystemTimeWide();
+
+	r = dot2_c(v0, v1);
+	LOG("dot2_c = %f\n", r);
+	r = dot2_neon(v0, v1);
+	LOG("dot2_neon = %f\n", r);
+	
+	LOG("dot2: c=%i \t neon=%i \t rate=%.2f \n", v2t[1] - v2t[0], v2t[2] - v2t[1], 
+	(float)(v2t[1] - v2t[0]) / (float)(v2t[2] - v2t[1]));
+
+	//normalize 2
+	v2t[0] = sceKernelGetSystemTimeWide();
+	for(int i=0;i < testnum; i++)
+	{
+		normalize2_c(v0, d);
+	};
+	v2t[1] = sceKernelGetSystemTimeWide();
+	for(int i=0;i < testnum; i++)
+	{
+		normalize2_neon(v0, d);
+	};
+	v2t[2] = sceKernelGetSystemTimeWide();
+
+
+	normalize2_c(v0, d);
+	LOG("normalize2_c = [%.2f, %.2f]\n", d[0], d[1]);
+	normalize2_neon(v0, d);
+	LOG("normalize2_neon = [%.2f, %.2f]\n", d[0], d[1]);
+	
+	LOG("normalize2: c=%i \t neon=%i \t rate=%.2f \n", v2t[1] - v2t[0], v2t[2] - v2t[1], 
+	(float)(v2t[1] - v2t[0]) / (float)(v2t[2] - v2t[1]));
+	LOG("\n");
+
+	
+	//dot 3
+	v3t[0] = sceKernelGetSystemTimeWide();
+	for(int i=0;i < testnum; i++)
+	{
+		r = dot3_c(v0, v1);
+	};	
+	v3t[1] = sceKernelGetSystemTimeWide();
+	for(int i=0;i < testnum; i++)
+	{
+		r = dot3_neon(v0, v1);
+	};
+	v3t[2] = sceKernelGetSystemTimeWide();
+
+	r = dot3_c(v0, v1);
+	LOG("dot3_c = %f\n", r);
+	r = dot3_neon(v0, v1);
+	LOG("dot3_neon = %f\n", r);
+	
+	LOG("dot3: c=%i \t neon=%i \t rate=%.2f \n", v3t[1] - v3t[0], v3t[2] - v3t[1], 
+	(float)(v3t[1] - v3t[0]) / (float)(v3t[2] - v3t[1]));
+
+	//normalize 3
+	v3t[0] = sceKernelGetSystemTimeWide();
+	for(int i=0;i < testnum; i++)
+	{
+		normalize3_c(v0, d);
+	};	
+	v3t[1] = sceKernelGetSystemTimeWide();
+	for(int i=0;i < testnum; i++)
+	{
+		normalize3_neon(v0, d);
+	};	
+	v3t[2] = sceKernelGetSystemTimeWide();
+
+
+	normalize3_c(v0, d);
+	LOG("normalize3_c = [%.2f, %.2f, %.2f]\n", d[0], d[1], d[2]);
+	normalize3_neon(v0, d);
+	LOG("normalize3_neon = [%.2f, %.2f, %.2f]\n", d[0], d[1], d[2]);
+	
+	LOG("normalize3: c=%i \t neon=%i \t rate=%.2f \n", v3t[1] - v3t[0], v3t[2] - v3t[1], 
+	(float)(v3t[1] - v3t[0]) / (float)(v3t[2] - v3t[1]));
+
+	//cross 3	
+	v3t[0] = sceKernelGetSystemTimeWide();
+	for(int i=0;i < testnum; i++)
+	{
+		cross3_c(v0, v1, d);
+	};
+	v3t[1] = sceKernelGetSystemTimeWide();
+	for(int i=0;i < testnum; i++)
+	{
+		cross3_neon(v0, v1, d);
+	};
+	v3t[2] = sceKernelGetSystemTimeWide();
+
+
+	cross3_c(v0, v1, d);
+	LOG("cross3_c = [%.2f, %.2f, %.2f]\n", d[0], d[1], d[2]);
+	cross3_neon(v0, v1, d);
+	LOG("cross3_neon = [%.2f, %.2f, %.2f]\n", d[0], d[1], d[2]);
+	
+	LOG("cross3: c=%i \t neon=%i \t rate=%.2f \n", v3t[1] - v3t[0], v3t[2] - v3t[1], 
+	(float)(v3t[1] - v3t[0]) / (float)(v3t[2] - v3t[1]));
+	LOG("\n");
+
+
+	//dot 4
+	v4t[0] = sceKernelGetSystemTimeWide();
+	for(int i=0;i < testnum; i++)
+	{
+		r = dot4_c(v0, v1);
+	};
+	v4t[1] = sceKernelGetSystemTimeWide();
+	for(int i=0;i < testnum; i++)
+	{
+		r = dot4_neon(v0, v1);
+	};
+	v4t[2] = sceKernelGetSystemTimeWide();
+
+	r = dot4_c(v0, v1);
+	LOG("dot4_c = %f\n", r);
+	r = dot4_neon(v0, v1);
+	LOG("dot4_neon = %f\n", r);
+	
+	LOG("dot4: c=%i \t neon=%i \t rate=%.2f \n", v4t[1] - v4t[0], v4t[2] - v4t[1], 
+	(float)(v4t[1] - v4t[0]) / (float)(v4t[2] - v4t[1]));
+	
+	//normalize 4
+	v4t[0] = sceKernelGetSystemTimeWide();
+	for(int i=0;i < testnum; i++)
+	{
+		normalize4_c(v0, d);
+	};	
+	v4t[1] = sceKernelGetSystemTimeWide();
+	for(int i=0;i < testnum; i++)
+	{
+		normalize4_neon(v0, d);
+	};	
+	v4t[2] = sceKernelGetSystemTimeWide();
+
+
+	normalize4_c(v0, d);
+	LOG("normalize4_c = [%.2f, %.2f, %.2f, %.2f]\n", d[0], d[1], d[2], d[3]);
+	normalize4_neon(v0, d);
+	LOG("normalize4_neon = [%.2f, %.2f, %.2f, %.2f]\n", d[0], d[1], d[2], d[3]);
+	
+	LOG("normalize4: c=%i \t neon=%i \t rate=%.2f \n", v4t[1] - v4t[0], v4t[2] - v4t[1], 
+	(float)(v4t[1] - v4t[0]) / (float)(v4t[2] - v4t[1]));
+	LOG("\n");
+
+
+}
+
+
+
+void test_matrixfunc()
+{
+	float m0[16], m1[16], m2[16];
+	int m2t[3], m3t[3], m4t[3];
+	
+	int i;
+	int testnum = 1000000;
+	struct rusage ru;
+	
+	for(int i=0;i<16;i++)
+	{
+		m0[i] = 10.0f * randf() - 5.0f; 
+		m1[i] = 10.0f * randf() - 5.0f; 
+		m2[i] = 10.0f * randf() - 5.0f; 
+	}
+
+
+	//matmul2 
+	m2t[0] = sceKernelGetSystemTimeWide();
+	for(i = 0; i < testnum; i++){
+		matmul2_c(m0, m1, m2);	
+	}
+	m2t[1] = sceKernelGetSystemTimeWide();
+	for(i = 0; i < testnum; i++){
+		matmul2_neon(m0, m1, m2);
+	}
+	m2t[2] = sceKernelGetSystemTimeWide();
+
+	matmul2_c(m0, m1, m2);	
+	LOG("matmul2_c = \n");
+	LOG("\t\t\t|%.2f, %.2f|\n", m2[0], m2[2]);
+	LOG("\t\t\t|%.2f, %.2f|\n", m2[1], m2[3]);
+
+	matmul2_neon(m0, m1, m2);	
+	LOG("matmul2_neon = \n");
+	LOG("\t\t\t|%.2f, %.2f|\n", m2[0], m2[2]);
+	LOG("\t\t\t|%.2f, %.2f|\n", m2[1], m2[3]);
+	
+	LOG("matmul2: c=%i \t neon=%i \t rate=%.2f \n", m2t[1] - m2t[0], m2t[2] - m2t[1], 
+		(float)(m2t[1] - m2t[0]) / (float)(m2t[2] - m2t[1]));
+
+
+	//matvec2 
+	m2t[0] = sceKernelGetSystemTimeWide();
+	for(i = 0; i < testnum; i++){
+		matvec2_c(m0, m1, m2);	
+	}
+	m2t[1] = sceKernelGetSystemTimeWide();
+	for(i = 0; i < testnum; i++){
+		matvec2_neon(m0, m1, m2);
+	}
+	m2t[2] = sceKernelGetSystemTimeWide();
+
+	memset(m2, 0, 4*sizeof(float));
+	matvec2_c(m0, m1, m2);	
+	LOG("matvec2_c = |%.2f, %.2f|\n", m2[0], m2[1]);
+	
+	memset(m2, 0, 4*sizeof(float));
+	matvec2_neon(m0, m1, m2);	
+	LOG("matvec2_neon = |%.2f, %.2f|\n", m2[0], m2[1]);
+
+	LOG("matvec2: c=%i \t neon=%i \t rate=%.2f \n", m2t[1] - m2t[0], m2t[2] - m2t[1], 
+		(float)(m2t[1] - m2t[0]) / (float)(m2t[2] - m2t[1]));
+
+	//MAT3
+	m3t[0] = sceKernelGetSystemTimeWide();
+	for(i = 0; i < testnum; i++){
+		matmul3_c(m0, m1, m2);	
+	}
+	m3t[1] = sceKernelGetSystemTimeWide();
+	for(i = 0; i < testnum; i++){
+		matmul3_neon(m0, m1, m2);
+	}
+	m3t[2] = sceKernelGetSystemTimeWide();
+
+	memset(m2, 0, 9*sizeof(float));
+	matmul3_c(m0, m1, m2);	
+	LOG("matmul3_c =\n");
+	LOG("\t\t\t|%.2f, %.2f, %.2f|\n", m2[0], m2[3], m2[6]);
+	LOG("\t\t\t|%.2f, %.2f, %.2f|\n", m2[1], m2[4], m2[7]);
+	LOG("\t\t\t|%.2f, %.2f, %.2f|\n", m2[2], m2[5], m2[8]);
+	
+	memset(m2, 0, 9*sizeof(float));
+	matmul3_neon(m0, m1, m2);	
+	LOG("matmul3_neon =\n");
+	LOG("\t\t\t|%.2f, %.2f, %.2f|\n", m2[0], m2[3], m2[6]);
+	LOG("\t\t\t|%.2f, %.2f, %.2f|\n", m2[1], m2[4], m2[7]);
+	LOG("\t\t\t|%.2f, %.2f, %.2f|\n", m2[2], m2[5], m2[8]);
+	
+	LOG("matmul3: c=%i \t neon=%i \t rate=%.2f \n", m3t[1] - m3t[0], m3t[2] - m3t[1], 
+		(float)(m3t[1] - m3t[0]) / (float)(m3t[2] - m3t[1]));
+
+	//matvec3
+	m3t[0] = sceKernelGetSystemTimeWide();
+	for(i = 0; i < testnum; i++){
+		matvec3_c(m0, m1, m2);	
+	}
+	m3t[1] = sceKernelGetSystemTimeWide();
+	for(i = 0; i < testnum; i++){
+		matvec3_neon(m0, m1, m2);
+	}
+	m3t[2] = sceKernelGetSystemTimeWide();
+
+	memset(m2, 0, 4*sizeof(float));
+	matvec3_c(m0, m1, m2);	
+	LOG("matvec3_c = |%.2f, %.2f, %.2f|\n", m2[0], m2[1], m2[2]);
+
+	memset(m2, 0, 4*sizeof(float));
+	matvec3_neon(m0, m1, m2);	
+	LOG("matvec3_neon = |%.2f, %.2f, %.2f|\n", m2[0], m2[1], m2[2]);
+	
+	LOG("matvec3: c=%i \t neon=%i \t rate=%.2f \n", m3t[1] - m3t[0], m3t[2] - m3t[1], 
+		(float)(m3t[1] - m3t[0]) / (float)(m3t[2] - m3t[1]));
+
+	//MAT4
+	m4t[0] = sceKernelGetSystemTimeWide();
+	for(i = 0; i < testnum; i++){
+		matmul4_c(m0, m1, m2);	
+	}
+	m4t[1] = sceKernelGetSystemTimeWide();
+	for(i = 0; i < testnum; i++){
+		matmul4_neon(m0, m1, m2);
+	}
+	m4t[2] = sceKernelGetSystemTimeWide();
+
+	memset(m2, 0, 16*sizeof(float));
+	matmul4_c(m0, m1, m2);	
+	LOG("matmul4_c =\n");
+	LOG("\t\t\t|%.2f, %.2f, %.2f, %.2f|\n", m2[0], m2[4], m2[8], m2[12]);
+	LOG("\t\t\t|%.2f, %.2f, %.2f, %.2f|\n", m2[1], m2[5], m2[9], m2[13]);
+	LOG("\t\t\t|%.2f, %.2f, %.2f, %.2f|\n", m2[2], m2[6], m2[10], m2[14]);
+	LOG("\t\t\t|%.2f, %.2f, %.2f, %.2f|\n", m2[3], m2[7], m2[11], m2[15]);
+	
+	memset(m2, 0, 16*sizeof(float));
+	matmul4_neon(m0, m1, m2);	
+	LOG("matmul4_neon =\n");
+	LOG("\t\t\t|%.2f, %.2f, %.2f, %.2f|\n", m2[0], m2[4], m2[8], m2[12]);
+	LOG("\t\t\t|%.2f, %.2f, %.2f, %.2f|\n", m2[1], m2[5], m2[9], m2[13]);
+	LOG("\t\t\t|%.2f, %.2f, %.2f, %.2f|\n", m2[2], m2[6], m2[10], m2[14]);
+	LOG("\t\t\t|%.2f, %.2f, %.2f, %.2f|\n", m2[3], m2[7], m2[11], m2[15]);
+	
+	LOG("matmul4: c=%i \t neon=%i \t rate=%.2f \n", m4t[1] - m4t[0], m4t[2] - m4t[1], 
+		(float)(m4t[1] - m4t[0]) / (float)(m4t[2] - m4t[1]));
+
+	//matvec4
+	m4t[0] = sceKernelGetSystemTimeWide();
+	for(i = 0; i < testnum; i++){
+		matvec4_c(m0, m1, m2);	
+	}
+	m4t[1] = sceKernelGetSystemTimeWide();
+	for(i = 0; i < testnum; i++){
+		matvec4_neon(m0, m1, m2);
+	}
+	m4t[2] = sceKernelGetSystemTimeWide();
+
+	memset(m2, 0, 4*sizeof(float));
+	matvec4_c(m0, m1, m2);	
+	LOG("matvec4_c = |%.2f, %.2f, %.2f, %f|\n", m2[0], m2[1], m2[2], m2[3]);
+
+	memset(m2, 0, 4*sizeof(float));
+	matvec4_neon(m0, m1, m2);	
+	LOG("matvec4_neon = |%.2f, %.2f, %.2f, %f|\n", m2[0], m2[1], m2[2], m2[3]);
+	
+	LOG("matvec4: c=%i \t neon=%i \t rate=%.2f \n", m4t[1] - m4t[0], m4t[2] - m4t[1], 
+		(float)(m4t[1] - m4t[0]) / (float)(m4t[2] - m4t[1]));
+
+
+}
+
+int main(int argc, char** argv)
+{	
+
+	int i, ii;
+#if 1
+	LOG("RUNFAST: Disabled \n");
+#else
+	LOG("RUNFAST: Enabled \n");
+	enable_runfast();
+#endif
+	srand(time(NULL));
+
+#if 1
+	//test single argument functions:
+	LOG("------------------------------------------------------------------------------------------------------\n");	
+	LOG("MATRIX FUNCTION TESTS \n");	
+	LOG("------------------------------------------------------------------------------------------------------\n");	
+	
+	test_matrixfunc();
+	test_vectorfunc();
+
+	LOG("------------------------------------------------------------------------------------------------------\n");	
+	LOG("CMATH FUNCTION TESTS \n");	
+	LOG("------------------------------------------------------------------------------------------------------\n");	
+	LOG("Function\tRange\t\tNumber\tABS Max Error\tREL Max Error\tRMS Error\tTime\tRate\n");	
+	LOG("------------------------------------------------------------------------------------------------------\n");	
+	for(i = 0; i < 51; i++){
+		test_mathfunc1(&test1[i]);	
+		
+		ii = i - (i % 3);
+		LOG("%s\t", test1[i].name);
+		LOG("[%.2f, %.2f]\t", test1[i].rng0, test1[i].rng1);
+		LOG("%i\t", test1[i].num);
+		LOG("%.2e\t", test1[i].emaxabs);
+		LOG("%.2e%%\t", test1[i].emaxrel);
+		LOG("%.2e\t", test1[i].erms);
+		LOG("%i\t", test1[i].time);
+		LOG("x%.2f\t", (float)test1[ii].time / test1[i].time);
+		LOG("\n");
+	}
+	for(i = 0; i < 9; i++){
+		test_mathfunc2(&test2[i]);
+	
+		ii = i - (i % 3);
+		
+		LOG("%s\t", test2[i].name);
+		LOG("[%.2f, %.2f]\t", test2[i].rng0, test2[i].rng1);
+		LOG("%i\t", test2[i].num);
+		LOG("%.2e\t", test2[i].emaxabs);
+		LOG("%.2e%%\t", test2[i].emaxrel);
+		LOG("%.2e\t", test2[i].erms);
+		LOG("%i\t", test2[i].time);
+		LOG("x%.2f\t", (float)test2[ii].time / test2[i].time);
+		LOG("\n");
+	}
+	
+#else
+
+
+	float x = 0;
+	for(x = -M_PI_2; x < M_PI_2; x+= 0.01)
+	{
+		LOG("x=%.2f\t in=%.2f\t c=%.2f\t neon=%.2f \n", x, sinhf(x), sinhf_c(x), sinhf_neon(x));
+	}
+
+#endif
+	
+	return 0;
+} 
diff --git a/source/math_acosf.c b/source/math_acosf.c
new file mode 100644
index 0000000000..59a22b2985
--- /dev/null
+++ b/source/math_acosf.c
@@ -0,0 +1,67 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+/*
+Test func : acosf(x)
+Test Range: -1.0 < x < 1.0
+Peak Error:	~0.005%
+RMS  Error: ~0.001%
+*/
+
+const float __acosf_pi_2 = M_PI_2;
+
+float acosf_c(float x)
+{
+	return __acosf_pi_2 - asinf_c(x);
+}
+
+
+float acosf_neon_hfp(float x)
+{
+#ifdef __MATH_NEON
+	asinf_neon_hfp(x);
+	asm volatile (
+	"vdup.f32	 	d1, %0					\n\t"	//d1 = {pi/2, pi/2};
+	"vsub.f32	 	d0, d1, d0				\n\t"	//d0 = d1 - d0;
+	::"r"(__acosf_pi_2):
+	);
+#endif
+}
+
+float acosf_neon_sfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vmov.f32 s0, r0 		\n\t");
+	acosf_neon_hfp(x);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return acosf_c(x);
+#endif
+}
+
+
+
diff --git a/source/math_asinf.c b/source/math_asinf.c
new file mode 100644
index 0000000000..0ae8ef9b84
--- /dev/null
+++ b/source/math_asinf.c
@@ -0,0 +1,183 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+/*
+Test func : asinf(x)
+Test Range: -1.0 < x < 1.0
+Peak Error:	~0.005%
+RMS  Error: ~0.001%
+*/
+
+
+const float __asinf_lut[4] = {
+	0.105312459675071, 	//p7
+	0.169303418571894,	//p3
+	0.051599985887214, 	//p5
+	0.999954835104825	//p1
+}; 
+
+const float __asinf_pi_2 = M_PI_2;
+
+float asinf_c(float x)
+{
+
+	float a, b, c, d, r, ax;
+	int m;
+	
+	union {
+		float f;
+		int i;
+	} xx;
+
+	ax = fabs(x);
+	d = 0.5;
+	d = d - ax*0.5;
+		
+	//fast invsqrt approx
+	xx.f = d;
+	xx.i = 0x5F3759DF - (xx.i >> 1);		//VRSQRTE
+	c = d * xx.f;
+	b = (3.0f - c * xx.f) * 0.5;		//VRSQRTS
+	xx.f = xx.f * b;		
+	c = d * xx.f;
+	b = (3.0f - c * xx.f) * 0.5;
+    xx.f = xx.f * b;	
+
+	//fast inverse approx
+	d = xx.f;
+	m = 0x3F800000 - (xx.i & 0x7F800000);
+	xx.i = xx.i + m;
+	xx.f = 1.41176471f - 0.47058824f * xx.f;
+	xx.i = xx.i + m;
+	b = 2.0 - xx.f * d;
+	xx.f = xx.f * b;	
+	b = 2.0 - xx.f * d;
+	xx.f = xx.f * b;
+	
+	//if |x|>0.5 -> x = sqrt((1-x)/2)
+	xx.f = xx.f - ax;	
+	a = (ax > 0.5f);
+	d = __asinf_pi_2 * a;
+	c = 1.0f - 3.0f * a;
+	ax = ax + xx.f * a;
+		
+	//polynomial evaluation
+	xx.f = ax * ax;	
+	a = (__asinf_lut[0] * ax) * xx.f + (__asinf_lut[2] * ax);
+	b = (__asinf_lut[1] * ax) * xx.f + (__asinf_lut[3] * ax);
+	xx.f = xx.f * xx.f;
+	r = b + a * xx.f; 
+	r = d + c * r;
+
+	a = r + r;
+	b = (x < 0.0f);
+	r = r - a * b;
+	return r;
+}
+
+
+float asinf_neon_hfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+
+	"vdup.f32	 	d0, d0[0]				\n\t"	//d0 = {x, x};
+	"vdup.f32	 	d4, %1					\n\t"	//d4 = {pi/2, pi/2};
+	"vmov.f32	 	d6, d0					\n\t"	//d6 = d0;
+	"vabs.f32	 	d0, d0					\n\t"	//d0 = fabs(d0) ;
+
+	"vmov.f32	 	d5, #0.5				\n\t"	//d5 = 0.5;
+	"vmls.f32	 	d5, d0, d5				\n\t"	//d5 = d5 - d0*d5;
+
+	//fast invsqrt approx
+	"vmov.f32 		d1, d5					\n\t"	//d1 = d5
+	"vrsqrte.f32 	d5, d5					\n\t"	//d5 = ~ 1.0 / sqrt(d5)
+	"vmul.f32 		d2, d5, d1				\n\t"	//d2 = d5 * d1
+	"vrsqrts.f32 	d3, d2, d5				\n\t"	//d3 = (3 - d5 * d2) / 2 	
+	"vmul.f32 		d5, d5, d3				\n\t"	//d5 = d5 * d3
+	"vmul.f32 		d2, d5, d1				\n\t"	//d2 = d5 * d1	
+	"vrsqrts.f32 	d3, d2, d5				\n\t"	//d3 = (3 - d5 * d3) / 2	
+	"vmul.f32 		d5, d5, d3				\n\t"	//d5 = d5 * d3	
+		
+	//fast reciporical approximation
+	"vrecpe.f32		d1, d5					\n\t"	//d1 = ~ 1 / d5; 
+	"vrecps.f32		d2, d1, d5				\n\t"	//d2 = 2.0 - d1 * d5; 
+	"vmul.f32		d1, d1, d2				\n\t"	//d1 = d1 * d2; 
+	"vrecps.f32		d2, d1, d5				\n\t"	//d2 = 2.0 - d1 * d5; 
+	"vmul.f32		d5, d1, d2				\n\t"	//d5 = d1 * d2; 
+	
+	//if |x| > 0.5 -> ax = sqrt((1-ax)/2), r = pi/2
+	"vsub.f32		d5, d0, d5				\n\t"	//d5 = d0 - d5; 
+	"vmov.f32	 	d2, #0.5				\n\t"	//d2 = 0.5;
+	"vcgt.f32	 	d3, d0, d2				\n\t"	//d3 = (d0 > d2);
+	"vmov.f32		d1, #3.0 				\n\t"	//d5 = 3.0; 	
+	"vshr.u32	 	d3, #31					\n\t"	//d3 = d3 >> 31;
+	"vmov.f32		d16, #1.0 				\n\t"	//d16 = 1.0; 	
+	"vcvt.f32.u32	d3, d3					\n\t"	//d3 = (float) d3;	
+	"vmls.f32		d0, d5, d3[0]			\n\t"	//d0 = d0 - d5 * d3[0]; 	
+	"vmul.f32		d7, d4, d3[0] 			\n\t"	//d7 = d5 * d4; 		
+	"vmls.f32		d16, d1, d3[0] 			\n\t"	//d16 = d16 - d1 * d3; 	
+		
+	//polynomial:
+	"vmul.f32 		d2, d0, d0				\n\t"	//d2 = d0*d0 = {ax^2, ax^2}	
+	"vld1.32 		{d4, d5}, [%0]			\n\t"	//d4 = {p7, p3}, d5 = {p5, p1}
+	"vmul.f32 		d3, d2, d2				\n\t"	//d3 = d2*d2 = {x^4, x^4}		
+	"vmul.f32 		q0, q2, d0[0]			\n\t"	//q0 = q2 * d0[0] = {p7x, p3x, p5x, p1x}
+	"vmla.f32 		d1, d0, d2[0]			\n\t"	//d1 = d1 + d0*d2[0] = {p5x + p7x^3, p1x + p3x^3}		
+	"vmla.f32 		d1, d3, d1[0]			\n\t"	//d1 = d1 + d3*d1[0] = {..., p1x + p3x^3 + p5x^5 + p7x^7}		
+
+	"vmla.f32 		d7, d1, d16				\n\t"	//d7 = d7 + d1*d16		
+
+	"vadd.f32 		d2, d7, d7				\n\t"	//d2 = d7 + d7		
+	"vclt.f32	 	d3, d6, #0				\n\t"	//d3 = (d6 < 0)	
+	"vshr.u32	 	d3, #31					\n\t"	//d3 = d3 >> 31;
+	"vcvt.f32.u32	d3, d3					\n\t"	//d3 = (float) d3	
+	"vmls.f32 		d7, d2, d3[0]			\n\t"	//d7 = d7 - d2 * d3[0];
+
+	"vmov.f32 		s0, s15					\n\t"	//s0 = s3
+
+	:: "r"(__asinf_lut),  "r"(__asinf_pi_2) 
+    : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"
+	);
+#endif
+}
+
+
+float asinf_neon_sfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vmov.f32 s0, r0 		\n\t");
+	asinf_neon_hfp(x);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return asinf_c(x);
+#endif
+}
+
+
+
+
diff --git a/source/math_atan2f.c b/source/math_atan2f.c
new file mode 100644
index 0000000000..d076a04c04
--- /dev/null
+++ b/source/math_atan2f.c
@@ -0,0 +1,170 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+const float __atan2f_lut[4] = {
+	-0.0443265554792128,	//p7
+	-0.3258083974640975,	//p3
+	+0.1555786518463281,	//p5
+	+0.9997878412794807  	//p1
+}; 
+ 
+const float __atan2f_pi_2 = M_PI_2;
+
+float atan2f_c(float y, float x)
+{
+	float a, b, c, r, xx;
+	int m;
+	union {
+		float f;
+		int i;
+	} xinv;
+
+	//fast inverse approximation (2x newton)
+	xx = fabs(x);
+	xinv.f = xx;
+	m = 0x3F800000 - (xinv.i & 0x7F800000);
+	xinv.i = xinv.i + m;
+	xinv.f = 1.41176471f - 0.47058824f * xinv.f;
+	xinv.i = xinv.i + m;
+	b = 2.0 - xinv.f * xx;
+	xinv.f = xinv.f * b;	
+	b = 2.0 - xinv.f * xx;
+	xinv.f = xinv.f * b;
+	
+	c = fabs(y * xinv.f);
+
+	//fast inverse approximation (2x newton)
+	xinv.f = c;
+	m = 0x3F800000 - (xinv.i & 0x7F800000);
+	xinv.i = xinv.i + m;
+	xinv.f = 1.41176471f - 0.47058824f * xinv.f;
+	xinv.i = xinv.i + m;
+	b = 2.0 - xinv.f * c;
+	xinv.f = xinv.f * b;	
+	b = 2.0 - xinv.f * c;
+	xinv.f = xinv.f * b;
+	
+	//if |x| > 1.0 -> ax = -1/ax, r = pi/2
+	xinv.f = xinv.f + c;
+	a = (c > 1.0f);
+	c = c - a * xinv.f;
+	r = a * __atan2f_pi_2;
+	
+	//polynomial evaluation
+	xx = c * c;	
+	a = (__atan2f_lut[0] * c) * xx + (__atan2f_lut[2] * c);
+	b = (__atan2f_lut[1] * c) * xx + (__atan2f_lut[3] * c);
+	xx = xx * xx;
+	r = r + a * xx; 
+	r = r + b;
+
+	//determine quadrant and test for small x.
+	b = M_PI;
+	b = b - 2.0f * r;
+	r = r + (x < 0.0f) * b;
+	b = (fabs(x) < 0.000001f);
+	c = !b;
+	r = c * r;
+	r = r + __atan2f_pi_2 * b;
+	b = r + r;
+	r = r - (y < 0.0f) * b;
+	
+	return r;
+}
+
+float atan2f_neon_hfp(float y, float x)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+
+	"vdup.f32	 	d17, d0[1]				\n\t"	//d17 = {x, x};
+	"vdup.f32	 	d16, d0[0]				\n\t"	//d16 = {y, y};
+	
+	//1.0 / x
+	"vrecpe.f32		d18, d17				\n\t"	//d16 = ~ 1 / d1; 
+	"vrecps.f32		d19, d18, d17			\n\t"	//d17 = 2.0 - d16 * d1; 
+	"vmul.f32		d18, d18, d19			\n\t"	//d16 = d16 * d17; 
+	"vrecps.f32		d19, d18, d17			\n\t"	//d17 = 2.0 - d16 * d1; 
+	"vmul.f32		d18, d18, d19			\n\t"	//d16 = d16 * d17; 
+
+	//y * (1.0 /x)
+	"vmul.f32		d0, d16, d18			\n\t"	//d0 = d16 * d18; 
+
+
+	"vdup.f32	 	d4, %1					\n\t"	//d4 = {pi/2, pi/2};
+	"vmov.f32	 	d6, d0					\n\t"	//d6 = d0;
+	"vabs.f32	 	d0, d0					\n\t"	//d0 = fabs(d0) ;
+
+	//fast reciporical approximation
+	"vrecpe.f32		d1, d0					\n\t"	//d1 = ~ 1 / d0; 
+	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
+	"vmul.f32		d1, d1, d2				\n\t"	//d1 = d1 * d2; 
+	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
+	"vmul.f32		d1, d1, d2				\n\t"	//d1 = d1 * d2; 
+
+	//if |x| > 1.0 -> ax = 1/ax, r = pi/2
+	"vadd.f32		d1, d1, d0				\n\t"	//d1 = d1 + d0; 
+	"vmov.f32	 	d2, #1.0				\n\t"	//d2 = 1.0;
+	"vcgt.f32	 	d3, d0, d2				\n\t"	//d3 = (d0 > d2);
+	"vcvt.f32.u32	d3, d3					\n\t"	//d3 = (float) d3;
+	"vmls.f32		d0, d1, d3				\n\t"	//d0 = d0 - d1 * d3; 	
+	"vmul.f32		d7, d3, d4				\n\t"	//d7 = d3 * d4; 	
+		
+	//polynomial:
+	"vmul.f32 		d2, d0, d0				\n\t"	//d2 = d0*d0 = {ax^2, ax^2}	
+	"vld1.32 		{d4, d5}, [%0]			\n\t"	//d4 = {p7, p3}, d5 = {p5, p1}
+	"vmul.f32 		d3, d2, d2				\n\t"	//d3 = d2*d2 = {x^4, x^4}		
+	"vmul.f32 		q0, q2, d0[0]			\n\t"	//q0 = q2 * d0[0] = {p7x, p3x, p5x, p1x}
+	"vmla.f32 		d1, d0, d2[0]			\n\t"	//d1 = d1 + d0*d2[0] = {p5x + p7x^3, p1x + p3x^3}		
+	"vmla.f32 		d1, d3, d1[0]			\n\t"	//d1 = d1 + d3*d1[0] = {..., p1x + p3x^3 + p5x^5 + p7x^7}		
+	"vadd.f32 		d1, d1, d7				\n\t"	//d1 = d1 + d7		
+	
+	"vadd.f32 		d2, d1, d1				\n\t"	//d2 = d1 + d1		
+	"vclt.f32	 	d3, d6, #0				\n\t"	//d3 = (d6 < 0)	
+	"vcvt.f32.u32	d3, d3					\n\t"	//d3 = (float) d3	
+	"vmls.f32 		d1, d3, d2				\n\t"	//d1 = d1 - d2 * d3;
+
+	"vmov.f32 		s0, s3					\n\t"	//s0 = s3
+
+	:: "r"(__atan2f_lut),  "r"(__atan2f_pi_2) 
+    : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"
+	);
+#endif
+}
+
+
+float atan2f_neon_sfp(float x, float y)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vmov.f32 s0, r0 		\n\t");
+	asm volatile ("vmov.f32 s1, r1 		\n\t");
+	atan2f_neon_hfp(x, y);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return atan2f_c(y, x);
+#endif
+};
diff --git a/source/math_atanf.c b/source/math_atanf.c
new file mode 100644
index 0000000000..c983756dd2
--- /dev/null
+++ b/source/math_atanf.c
@@ -0,0 +1,149 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+const float __atanf_lut[4] = {
+	-0.0443265554792128,	//p7
+	-0.3258083974640975,	//p3
+	+0.1555786518463281,	//p5
+	+0.9997878412794807  	//p1
+}; 
+ 
+const float __atanf_pi_2 = M_PI_2;
+    
+float atanf_c(float x)
+{
+
+	float a, b, r, xx;
+	int m;
+	
+	union {
+		float f;
+		int i;
+	} xinv, ax;
+
+	ax.f = fabs(x);
+	
+	//fast inverse approximation (2x newton)
+	xinv.f = ax.f;
+	m = 0x3F800000 - (xinv.i & 0x7F800000);
+	xinv.i = xinv.i + m;
+	xinv.f = 1.41176471f - 0.47058824f * xinv.f;
+	xinv.i = xinv.i + m;
+	b = 2.0 - xinv.f * ax.f;
+	xinv.f = xinv.f * b;	
+	b = 2.0 - xinv.f * ax.f;
+	xinv.f = xinv.f * b;
+	
+	//if |x| > 1.0 -> ax = -1/ax, r = pi/2
+	xinv.f = xinv.f + ax.f;
+	a = (ax.f > 1.0f);
+	ax.f = ax.f - a * xinv.f;
+	r = a * __atanf_pi_2;
+	
+	//polynomial evaluation
+	xx = ax.f * ax.f;	
+	a = (__atanf_lut[0] * ax.f) * xx + (__atanf_lut[2] * ax.f);
+	b = (__atanf_lut[1] * ax.f) * xx + (__atanf_lut[3] * ax.f);
+	xx = xx * xx;
+	b = b + a * xx; 
+	r = r + b;
+
+	//if x < 0 -> r = -r
+	a = 2 * r;
+	b = (x < 0.0f);
+	r = r - a * b;
+
+	return r;
+}
+
+
+float atanf_neon_hfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+
+	"vdup.f32	 	d0, d0[0]				\n\t"	//d0 = {x, x};
+
+	"vdup.f32	 	d4, %1					\n\t"	//d4 = {pi/2, pi/2};
+	"vmov.f32	 	d6, d0					\n\t"	//d6 = d0;
+	"vabs.f32	 	d0, d0					\n\t"	//d0 = fabs(d0) ;
+
+	//fast reciporical approximation
+	"vrecpe.f32		d1, d0					\n\t"	//d1 = ~ 1 / d0; 
+	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
+	"vmul.f32		d1, d1, d2				\n\t"	//d1 = d1 * d2; 
+	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
+	"vmul.f32		d1, d1, d2				\n\t"	//d1 = d1 * d2; 
+
+		
+	//if |x| > 1.0 -> ax = -1/ax, r = pi/2
+	"vadd.f32		d1, d1, d0				\n\t"	//d1 = d1 + d0; 
+	"vmov.f32	 	d2, #1.0				\n\t"	//d2 = 1.0;
+	"vcgt.f32	 	d3, d0, d2				\n\t"	//d3 = (d0 > d2);
+	"vshr.u32	 	d3, #31					\n\t"	//d3 = (d0 > d2);
+	"vcvt.f32.u32	d3, d3					\n\t"	//d5 = (float) d3;	
+	"vmls.f32		d0, d1, d3[0]			\n\t"	//d0 = d0 - d1 * d3[0]; 	
+	"vmul.f32		d7, d4, d3[0] 			\n\t"	//d7 = d5 * d4; 	
+	
+	//polynomial:
+	"vmul.f32 		d2, d0, d0				\n\t"	//d2 = d0*d0 = {ax^2, ax^2}	
+	"vld1.32 		{d4, d5}, [%0]			\n\t"	//d4 = {p7, p3}, d5 = {p5, p1}
+	"vmul.f32 		d3, d2, d2				\n\t"	//d3 = d2*d2 = {x^4, x^4}		
+	"vmul.f32 		q0, q2, d0[0]			\n\t"	//q0 = q2 * d0[0] = {p7x, p3x, p5x, p1x}
+	"vmla.f32 		d1, d0, d2[0]			\n\t"	//d1 = d1 + d0*d2[0] = {p5x + p7x^3, p1x + p3x^3}		
+	"vmla.f32 		d1, d3, d1[0]			\n\t"	//d1 = d1 + d3*d1[0] = {..., p1x + p3x^3 + p5x^5 + p7x^7}		
+	"vadd.f32 		d1, d1, d7				\n\t"	//d1 = d1 + d7		
+
+	"vadd.f32 		d2, d1, d1				\n\t"	//d2 = d1 + d1		
+	"vclt.f32	 	d3, d6, #0				\n\t"	//d3 = (d6 < 0)	
+	"vshr.u32	 	d3, #31					\n\t"	//d3 = (d0 > d2);
+	"vcvt.f32.u32	d3, d3					\n\t"	//d3 = (float) d3	
+	"vmls.f32 		d1, d3, d2				\n\t"	//d1 = d1 - d2 * d3;
+
+	"vmov.f32 		s0, s3					\n\t"	//s0 = s3
+
+	:: "r"(__atanf_lut),  "r"(__atanf_pi_2) 
+    : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"
+	);
+
+#endif
+}
+
+
+float atanf_neon_sfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vdup.f32 d0, r0 		\n\t");
+	atanf_neon_hfp(x);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return atanf_c(x);
+#endif
+};
+
+
+
diff --git a/source/math_ceilf.c b/source/math_ceilf.c
new file mode 100644
index 0000000000..1432efee73
--- /dev/null
+++ b/source/math_ceilf.c
@@ -0,0 +1,71 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/*
+Assumes the floating point value |x| < 2147483648
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+float ceilf_c(float x)
+{
+	int n;
+	float r;	
+	n = (int) x;
+	r = (float) n;
+	r = r + (x > r);
+	return r;
+}
+
+float ceilf_neon_hfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+
+	"vcvt.s32.f32 	d1, d0					\n\t"	//d1 = (int) d0;
+	"vcvt.f32.s32 	d1, d1					\n\t"	//d1 = (float) d1;
+	"vcgt.f32 		d0, d0, d1				\n\t"	//d0 = (d0 > d1);
+	"vshr.u32 		d0, #31					\n\t"	//d0 = d0 >> 31;
+	"vcvt.f32.u32 	d0, d0					\n\t"	//d0 = (float) d0;
+	"vadd.f32 		d0, d1, d0				\n\t"	//d0 = d1 + d0;
+
+	::: "d0", "d1"
+	);
+		
+#endif
+}
+
+float ceilf_neon_sfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vmov.f32 s0, r0 		\n\t");
+	ceilf_neon_hfp(x);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return ceilf_c(x);
+#endif
+};
+
+
diff --git a/source/math_cosf.c b/source/math_cosf.c
new file mode 100644
index 0000000000..cb14498069
--- /dev/null
+++ b/source/math_cosf.c
@@ -0,0 +1,50 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "math_neon.h"
+
+float cosf_c(float x)
+{
+	return sinf_c(x + M_PI_2);
+}
+
+float cosf_neon_hfp(float x)
+{
+#ifdef __MATH_NEON
+	float xx = x + M_PI_2;
+	return sinf_neon_hfp(xx);
+#endif
+}
+
+float cosf_neon_sfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vdup.f32 d0, r0 		\n\t");
+	cosf_neon_hfp(x);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return cosf_c(x);
+#endif
+};
+
diff --git a/source/math_coshf.c b/source/math_coshf.c
new file mode 100644
index 0000000000..a779b6a7be
--- /dev/null
+++ b/source/math_coshf.c
@@ -0,0 +1,120 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+const float __coshf_rng[2] = {
+	1.442695041f,
+	0.693147180f
+};
+
+const float __coshf_lut[16] = {
+	0.00019578093328483123,	//p7
+	0.00019578093328483123,	//p7
+	0.0014122663401803872, 	//p6
+	0.0014122663401803872, 	//p6
+	0.008336936973260111, 	//p5
+	0.008336936973260111, 	//p5
+	0.04165989275009526, 	//p4
+	0.04165989275009526, 	//p4
+	0.16666570253074878, 	//p3
+	0.16666570253074878, 	//p3
+	0.5000006143673624, 	//p2
+	0.5000006143673624, 	//p2
+	1.000000059694879, 		//p1
+	1.000000059694879, 		//p1
+	0.9999999916728642,		//p0
+	0.9999999916728642		//p0
+};
+
+  
+float coshf_c(float x)
+{
+	float a, b, xx;
+	xx = -x;
+	a = expf_c(x);
+	b = expf_c(xx);
+	a = a * 0.5f;
+	a = a + 0.5f * b;
+	return a;
+}
+
+
+float coshf_neon_hfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"vdup.f32 		d0, d0[0]				\n\t"	//d0 = {x, x}	
+	"fnegs 			s1, s1					\n\t"	//s1 = -s1
+	
+	//Range Reduction:
+	"vld1.32 		d2, [%0]				\n\t"	//d2 = {invrange, range}
+	"vld1.32 		{d16, d17}, [%1]!		\n\t"	
+	"vmul.f32 		d6, d0, d2[0]			\n\t"	//d6 = d0 * d2[0] 
+	"vcvt.s32.f32 	d6, d6					\n\t"	//d6 = (int) d6
+	"vld1.32 		{d18}, [%1]!			\n\t"	
+	"vcvt.f32.s32 	d1, d6					\n\t"	//d1 = (float) d6
+	"vld1.32 		{d19}, [%1]!			\n\t"	
+	"vmls.f32 		d0, d1, d2[1]			\n\t"	//d0 = d0 - d1 * d2[1]
+	"vld1.32 		{d20}, [%1]!			\n\t"	
+		
+	//polynomial:
+	"vmla.f32 		d17, d16, d0			\n\t"	//d17 = d17 + d16 * d0;	
+	"vld1.32 		{d21}, [%1]!			\n\t"	
+	"vmla.f32 		d18, d17, d0			\n\t"	//d18 = d18 + d17 * d0;	
+	"vld1.32 		{d22}, [%1]!			\n\t"	
+	"vmla.f32 		d19, d18, d0			\n\t"	//d19 = d19 + d18 * d0;	
+	"vld1.32 		{d23}, [%1]!			\n\t"	
+	"vmla.f32 		d20, d19, d0			\n\t"	//d20 = d20 + d19 * d0;	
+	"vmla.f32 		d21, d20, d0			\n\t"	//d21 = d21 + d20 * d0;	
+	"vmla.f32 		d22, d21, d0			\n\t"	//d22 = d22 + d21 * d0;	
+	"vmla.f32 		d23, d22, d0			\n\t"	//d23 = d23 + d22 * d0;	
+	
+	//multiply by 2 ^ m 	
+	"vshl.i32 		d6, d6, #23				\n\t"	//d6 = d6 << 23		
+	"vadd.i32 		d0, d23, d6				\n\t"	//d0 = d22 + d6		
+
+	"vdup.f32 		d2, d0[1]				\n\t"	//d2 = s1		
+	"vmov.f32 		d1, #0.5				\n\t"	//d1 = 0.5		
+	"vadd.f32 		d0, d0, d2				\n\t"	//d0 = d0 + d2		
+	"vmul.f32 		d0, d1					\n\t"	//d0 = d0 * d1		
+
+	:: "r"(__coshf_rng), "r"(__coshf_lut) 
+    : "d0", "d1", "q1", "q2", "d6"
+	);
+		
+#endif
+}
+
+float coshf_neon_sfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vmov.f32 s0, r0 		\n\t");
+	coshf_neon_hfp(x);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return coshf_c(x);
+#endif
+};
diff --git a/source/math_expf.c b/source/math_expf.c
new file mode 100644
index 0000000000..011b9495bd
--- /dev/null
+++ b/source/math_expf.c
@@ -0,0 +1,135 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/*
+Based on: 
+
+		e ^ x = (1+m) * (2^n)
+		x = log(1+m) + n * log(2)
+		n = (int) (x * 1.0 / log(2))
+		(1+m) = e ^ (x - n * log(2))
+		(1+m) = Poly(x - n * log(2))
+		
+		where Poly(x) is the Minimax approximation of e ^ x over the 
+		range [-Log(2), Log(2)]
+
+Test func : expf(x)
+Test Range: 0 < x < 50
+Peak Error:	~0.00024%
+RMS  Error: ~0.00007%
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+const float __expf_rng[2] = {
+	1.442695041f,
+	0.693147180f
+};
+
+const float __expf_lut[8] = {
+	0.9999999916728642,		//p0
+	0.04165989275009526, 	//p4
+	0.5000006143673624, 	//p2
+	0.0014122663401803872, 	//p6
+	1.000000059694879, 		//p1
+	0.008336936973260111, 	//p5
+	0.16666570253074878, 	//p3
+	0.00019578093328483123	//p7
+};
+
+float expf_c(float x)
+{
+	float a, b, c, d, xx;
+	int m;
+	
+	union {
+		float   f;
+		int 	i;
+	} r;
+		
+	//Range Reduction:
+	m = (int) (x * __expf_rng[0]);
+	x = x - ((float) m) * __expf_rng[1];	
+	
+	//Taylor Polynomial (Estrins)
+	a = (__expf_lut[4] * x) + (__expf_lut[0]);
+	b = (__expf_lut[6] * x) + (__expf_lut[2]);
+	c = (__expf_lut[5] * x) + (__expf_lut[1]);
+	d = (__expf_lut[7] * x) + (__expf_lut[3]);
+	xx = x * x;
+	a = a + b * xx; 
+	c = c + d * xx;
+	xx = xx* xx;
+	r.f = a + c * xx; 
+	
+	//multiply by 2 ^ m 
+	m = m << 23;
+	r.i = r.i + m;
+
+	return r.f;
+}
+
+float expf_neon_hfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"vdup.f32 		d0, d0[0]				\n\t"	//d0 = {x, x}
+	
+	//Range Reduction:
+	"vld1.32 		d2, [%0]				\n\t"	//d2 = {invrange, range}
+	"vmul.f32 		d6, d0, d2[0]			\n\t"	//d6 = d0 * d2[0] 
+	"vcvt.s32.f32 	d6, d6					\n\t"	//d6 = (int) d6
+	"vcvt.f32.s32 	d1, d6					\n\t"	//d1 = (float) d6
+	"vmls.f32 		d0, d1, d2[1]			\n\t"	//d0 = d0 - d1 * d2[1]
+		
+	//polynomial:
+	"vmul.f32 		d1, d0, d0				\n\t"	//d1 = d0*d0 = {x^2, x^2}	
+	"vld1.32 		{d2, d3, d4, d5}, [%1]	\n\t"	//q1 = {p0, p4, p2, p6}, q2 = {p1, p5, p3, p7} ;
+	"vmla.f32 		q1, q2, d0[0]			\n\t"	//q1 = q1 + q2 * d0[0]		
+	"vmla.f32 		d2, d3, d1[0]			\n\t"	//d2 = d2 + d3 * d1[0]		
+	"vmul.f32 		d1, d1, d1				\n\t"	//d1 = d1 * d1 = {x^4, x^4}	
+	"vmla.f32 		d2, d1, d2[1]			\n\t"	//d2 = d2 + d1 * d2[1]		
+
+	//multiply by 2 ^ m 	
+	"vshl.i32 		d6, d6, #23				\n\t"	//d6 = d6 << 23		
+	"vadd.i32 		d0, d2, d6				\n\t"	//d0 = d2 + d6		
+
+	:: "r"(__expf_rng), "r"(__expf_lut) 
+    : "d0", "d1", "q1", "q2", "d6"
+	);
+#endif
+}
+
+float expf_neon_sfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vmov.f32 s0, r0 		\n\t");
+	expf_neon_hfp(x);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return expf_c(x);
+#endif
+};
+
diff --git a/source/math_fabsf.c b/source/math_fabsf.c
new file mode 100644
index 0000000000..c22244704f
--- /dev/null
+++ b/source/math_fabsf.c
@@ -0,0 +1,58 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "math_neon.h"
+
+	
+float fabsf_c(float x)
+{
+	union {
+		int i;
+		float f;
+	} xx;
+
+	xx.f = x;
+	xx.i = xx.i & 0x7FFFFFFF;
+	return xx.f;
+}
+
+float fabsf_neon_hfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"fabss	 		s0, s0					\n\t"	//s0 = fabs(s0)
+	);
+#endif
+}
+
+float fabsf_neon_sfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"bic	 		r0, r0, #0x80000000		\n\t"	//r0 = r0 & ~(1 << 31)
+	);
+#else
+	return fabsf_c(x);
+#endif
+}
diff --git a/source/math_floorf.c b/source/math_floorf.c
new file mode 100644
index 0000000000..091709140e
--- /dev/null
+++ b/source/math_floorf.c
@@ -0,0 +1,66 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/*
+Assumes the floating point value |x| < 2147483648
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+float floorf_c(float x)
+{
+	int n;
+	float r;	
+	n = (int) x;
+	r = (float) n;
+	r = r - (r > x);
+	return r;
+}
+
+float floorf_neon_hfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"vcvt.s32.f32 	d1, d0					\n\t"	//d1 = (int) d0;
+	"vcvt.f32.s32 	d1, d1					\n\t"	//d1 = (float) d1;
+	"vcgt.f32 		d0, d1, d0				\n\t"	//d0 = (d1 > d0);
+	"vshr.u32 		d0, #31					\n\t"	//d0 = d0 >> 31;
+	"vcvt.f32.u32 	d0, d0					\n\t"	//d0 = (float) d0;
+	"vsub.f32 		d0, d1, d0				\n\t"	//d0 = d1 - d0;
+	::: "d0", "d1"
+	);
+#endif
+}
+
+float floorf_neon_sfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vmov.f32 s0, r0 		\n\t");
+	floorf_neon_hfp(x);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return floorf_c(x);
+#endif
+};
diff --git a/source/math_fmodf.c b/source/math_fmodf.c
new file mode 100644
index 0000000000..86af55da34
--- /dev/null
+++ b/source/math_fmodf.c
@@ -0,0 +1,100 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/*
+Assumes the floating point value |x / y| < 2,147,483,648
+*/
+
+#include "math_neon.h"
+
+float fmodf_c(float x, float y)
+{
+	int n;
+	union {
+		float f;
+		int   i;
+	} yinv;
+	float a;
+	
+	//fast reciporical approximation (4x Newton)
+	yinv.f = y;
+	n = 0x3F800000 - (yinv.i & 0x7F800000);
+	yinv.i = yinv.i + n;
+	yinv.f = 1.41176471f - 0.47058824f * yinv.f;
+	yinv.i = yinv.i + n;
+	a = 2.0 - yinv.f * y;
+	yinv.f = yinv.f * a;	
+	a = 2.0 - yinv.f * y;
+	yinv.f = yinv.f * a;
+	a = 2.0 - yinv.f * y;
+	yinv.f = yinv.f * a;
+	a = 2.0 - yinv.f * y;
+	yinv.f = yinv.f * a;
+	
+	n = (int)(x * yinv.f);
+	x = x - ((float)n) * y;
+	return x;
+}
+
+
+float fmodf_neon_hfp(float x, float y)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"vdup.f32 		d1, d0[1]					\n\t"	//d1[0] = y
+	"vdup.f32 		d0, d0[0]					\n\t"	//d1[0] = y
+	
+	//fast reciporical approximation
+	"vrecpe.f32 	d2, d1					\n\t"	//d2 = ~1.0 / d1
+	"vrecps.f32		d3, d2, d1				\n\t"	//d3 = 2.0 - d2 * d1; 
+	"vmul.f32		d2, d2, d3				\n\t"	//d2 = d2 * d3; 
+	"vrecps.f32		d3, d2, d1				\n\t"	//d3 = 2.0 - d2 * d1; 
+	"vmul.f32		d2, d2, d3				\n\t"	//d2 = d2 * d3; 
+	"vrecps.f32		d3, d2, d1				\n\t"	//d3 = 2.0 - d2 * d1; 
+	"vmul.f32		d2, d2, d3				\n\t"	//d2 = d2 * d3; 
+	"vrecps.f32		d3, d2, d1				\n\t"	//d3 = 2.0 - d2 * d1; 
+	"vmul.f32		d2, d2, d3				\n\t"	//d2 = d2 * d3; 
+
+	"vmul.f32		d2, d2, d0				\n\t"	//d2 = d2 * d0; 
+	"vcvt.s32.f32	d2, d2					\n\t"	//d2 = (int) d2; 
+	"vcvt.f32.s32	d2, d2					\n\t"	//d2 = (float) d2; 
+	"vmls.f32		d0, d1, d2				\n\t"	//d0 = d0 - d1 * d2; 
+
+	::: "d0", "d1", "d2", "d3"
+	);
+#endif
+}
+
+
+float fmodf_neon_sfp(float x, float y)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vmov.f32 s0, r0 		\n\t");
+	asm volatile ("vmov.f32 s1, r1 		\n\t");
+	fmodf_neon_hfp(x, y);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return fmodf_c(x,y);
+#endif
+};
diff --git a/source/math_invsqrtf.c b/source/math_invsqrtf.c
new file mode 100644
index 0000000000..c4d2b1d52a
--- /dev/null
+++ b/source/math_invsqrtf.c
@@ -0,0 +1,79 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+float invsqrtf_c(float x)
+{
+
+	float b, c;
+	union {
+		float 	f;
+		int 	i;
+	} a;
+	
+	//fast invsqrt approx
+	a.f = x;
+	a.i = 0x5F3759DF - (a.i >> 1);		//VRSQRTE
+	c = x * a.f;
+	b = (3.0f - c * a.f) * 0.5;		//VRSQRTS
+	a.f = a.f * b;		
+	c = x * a.f;
+	b = (3.0f - c * a.f) * 0.5;
+    a.f = a.f * b;	
+
+	return a.f;
+}
+
+float invsqrtf_neon_hfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+		
+	"vmov.f32 		d1, d0					\n\t"	//d1 = d0
+	"vrsqrte.f32 	d0, d0					\n\t"	//d0 = ~ 1.0 / sqrt(d0)
+	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1
+	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d3 = (3 - d0 * d2) / 2 	
+	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d3
+	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1	
+	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d4 = (3 - d0 * d3) / 2	
+	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d4	
+		
+	::: "d0", "d1", "d2", "d3"
+	);
+#endif
+}
+
+float invsqrtf_neon_sfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vmov.f32 s0, r0 		\n\t");
+	invsqrtf_neon_hfp(x);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return invsqrtf_c(x);
+#endif
+};
+
diff --git a/source/math_ldexpf.c b/source/math_ldexpf.c
new file mode 100644
index 0000000000..673158958f
--- /dev/null
+++ b/source/math_ldexpf.c
@@ -0,0 +1,67 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+float ldexpf_c(float m, int e)
+{
+	union {
+		float 	f;
+		int 	i;
+	} r;
+	r.f = m;
+	r.i += (e << 23);
+	return r.f;
+}
+
+float ldexpf_neon_hfp(float m, int e)
+{
+#ifdef __MATH_NEON
+	float r;
+	asm volatile (
+	"lsl 			r0, r0, #23				\n\t"	//r0 = r0 << 23	
+	"vdup.i32 		d1, r0					\n\t"	//d1 = {r0, r0}
+	"vadd.i32 		d0, d0, d1				\n\t"	//d0 = d0 + d1
+	::: "d0", "d1"
+	);
+#endif
+}
+
+float ldexpf_neon_sfp(float m, int e)
+{
+#ifdef __MATH_NEON
+	float r;
+	asm volatile (
+	"lsl 			r1, r1, #23				\n\t"	//r1 = r1 << 23	
+	"vdup.f32 		d0, r0					\n\t"	//d0 = {r0, r0}	
+	"vdup.i32 		d1, r1					\n\t"	//d1 = {r1, r1}
+	"vadd.i32 		d0, d0, d1				\n\t"	//d0 = d0 + d1
+	"vmov.f32 		r0, s0					\n\t"	//r0 = s0
+	::: "d0", "d1"
+	);
+#else
+	return ldexpf_c(m,e);
+#endif
+}
diff --git a/source/math_log10f.c b/source/math_log10f.c
new file mode 100644
index 0000000000..f68912f0fe
--- /dev/null
+++ b/source/math_log10f.c
@@ -0,0 +1,135 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/*
+Based on: 
+
+		log10(x) = log10((1+m) * (2^n))
+		log(x) = n * log10(2) + log10(1 + m)
+		log(1+m) = Poly(1+m)
+		
+		where Poly(x) is the Minimax approximation of log10(x) over the 
+		range [1, 2]
+
+Test func : log10f(x)
+Test Range: 1 < x < 10000
+Peak Error:	~0.000040%
+RMS  Error: ~0.000008%
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+const float __log10f_rng =  0.3010299957f;
+
+const float __log10f_lut[8] = {
+	-0.99697286229624, 		//p0
+	-1.07301643912502, 		//p4
+	-2.46980061535534, 		//p2
+	-0.07176870463131, 		//p6
+	2.247870219989470, 		//p1
+	0.366547581117400, 		//p5
+	1.991005185100089, 		//p3
+	0.006135635201050,		//p7
+};
+
+float log10f_c(float x)
+{
+	float a, b, c, d, xx;
+	int m;
+	
+	union {
+		float   f;
+		int 	i;
+	} r;
+	
+	//extract exponent
+	r.f = x;
+	m = (r.i >> 23);
+	m = m - 127;
+	r.i = r.i - (m << 23);
+		
+	//Taylor Polynomial (Estrins)
+	xx = r.f * r.f;
+	a = (__log10f_lut[4] * r.f) + (__log10f_lut[0]);
+	b = (__log10f_lut[6] * r.f) + (__log10f_lut[2]);
+	c = (__log10f_lut[5] * r.f) + (__log10f_lut[1]);
+	d = (__log10f_lut[7] * r.f) + (__log10f_lut[3]);
+	a = a + b * xx;
+	c = c + d * xx;
+	xx = xx * xx;
+	r.f = a + c * xx;
+
+	//add exponent
+	r.f = r.f + ((float) m) * __log10f_rng;
+
+	return r.f;
+}
+
+float log10f_neon_hfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	
+	"vdup.f32		d0, d0[0]				\n\t"	//d0 = {x,x};
+	
+	//extract exponent
+	"vmov.i32		d2, #127				\n\t"	//d2 = 127;
+	"vshr.u32		d6, d0, #23				\n\t"	//d6 = d0 >> 23;
+	"vsub.i32		d6, d6, d2				\n\t"	//d6 = d6 - d2;
+	"vshl.u32		d1, d6, #23				\n\t"	//d1 = d6 << 23;
+	"vsub.i32		d0, d0, d1				\n\t"	//d0 = d0 + d1;
+
+	//polynomial:
+	"vmul.f32 		d1, d0, d0				\n\t"	//d1 = d0*d0 = {x^2, x^2}	
+	"vld1.32 		{d2, d3, d4, d5}, [%1]	\n\t"	//q1 = {p0, p4, p2, p6}, q2 = {p1, p5, p3, p7} ;
+	"vmla.f32 		q1, q2, d0[0]			\n\t"	//q1 = q1 + q2 * d0[0]		
+	"vmla.f32 		d2, d3, d1[0]			\n\t"	//d2 = d2 + d3 * d1[0]		
+	"vmul.f32 		d1, d1, d1				\n\t"	//d1 = d1 * d1 = {x^4, x^4}	
+	"vmla.f32 		d2, d1, d2[1]			\n\t"	//d2 = d2 + d1 * d2[1]		
+
+	//add exponent 	
+	"vdup.32 		d7, %0					\n\t"	//d7 = {rng, rng}
+	"vcvt.f32.s32 	d6, d6					\n\t"	//d6 = (float) d6
+	"vmla.f32 		d2, d6, d7				\n\t"	//d2 = d2 + d6 * d7		
+
+	"vmov.f32 		s0, s4					\n\t"	//s0 = s4
+
+	:: "r"(__log10f_rng), "r"(__log10f_lut) 
+    : "d0", "d1", "q1", "q2", "d6", "d7"
+	);
+#endif
+}
+
+
+float log10f_neon_sfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vmov.f32 s0, r0 		\n\t");
+	log10f_neon_hfp(x);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return log10f_c(x);
+#endif
+};
diff --git a/source/math_logf.c b/source/math_logf.c
new file mode 100644
index 0000000000..61761363e5
--- /dev/null
+++ b/source/math_logf.c
@@ -0,0 +1,135 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/*
+Based on: 
+
+		log(x) = log((1+m) * (2^n))
+		log(x) = n * log(2) + log(1 + m)
+		log(1+m) = Poly(1+m)
+		
+		where Poly(x) is the Minimax approximation of log(x) over the 
+		range [1, 2]
+
+Test func : logf(x)
+Test Range: 1 < x < 10000
+Peak Error:	~0.000601%
+RMS  Error: ~0.000005%
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+const float __logf_rng =  0.693147180f;
+
+const float __logf_lut[8] = {
+	-2.295614848256274, 	//p0
+	-2.470711633419806, 	//p4
+	-5.686926051100417, 	//p2
+	-0.165253547131978, 	//p6
+	+5.175912446351073, 	//p1
+	+0.844006986174912, 	//p5
+	+4.584458825456749, 	//p3
+	+0.014127821926000		//p7
+};
+
+float logf_c(float x)
+{
+	float a, b, c, d, xx;
+	int m;
+	
+	union {
+		float   f;
+		int 	i;
+	} r;
+	
+	//extract exponent
+	r.f = x;
+	m = (r.i >> 23);
+	m = m - 127;
+	r.i = r.i - (m << 23);
+		
+	//Taylor Polynomial (Estrins)
+	xx = r.f * r.f;
+	a = (__logf_lut[4] * r.f) + (__logf_lut[0]);
+	b = (__logf_lut[6] * r.f) + (__logf_lut[2]);
+	c = (__logf_lut[5] * r.f) + (__logf_lut[1]);
+	d = (__logf_lut[7] * r.f) + (__logf_lut[3]);
+	a = a + b * xx;
+	c = c + d * xx;
+	xx = xx * xx;
+	r.f = a + c * xx;
+
+	//add exponent
+	r.f = r.f + ((float) m) * __logf_rng;
+
+	return r.f;
+}
+
+float logf_neon_hfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	
+	"vdup.f32		d0, d0[0]				\n\t"	//d0 = {x,x};
+	
+	//extract exponent
+	"vmov.i32		d2, #127				\n\t"	//d2 = 127;
+	"vshr.u32		d6, d0, #23				\n\t"	//d6 = d0 >> 23;
+	"vsub.i32		d6, d6, d2				\n\t"	//d6 = d6 - d2;
+	"vshl.u32		d1, d6, #23				\n\t"	//d1 = d6 << 23;
+	"vsub.i32		d0, d0, d1				\n\t"	//d0 = d0 + d1;
+
+	//polynomial:
+	"vmul.f32 		d1, d0, d0				\n\t"	//d1 = d0*d0 = {x^2, x^2}	
+	"vld1.32 		{d2, d3, d4, d5}, [%1]	\n\t"	//q1 = {p0, p4, p2, p6}, q2 = {p1, p5, p3, p7} ;
+	"vmla.f32 		q1, q2, d0[0]			\n\t"	//q1 = q1 + q2 * d0[0]		
+	"vmla.f32 		d2, d3, d1[0]			\n\t"	//d2 = d2 + d3 * d1[0]		
+	"vmul.f32 		d1, d1, d1				\n\t"	//d1 = d1 * d1 = {x^4, x^4}	
+	"vmla.f32 		d2, d1, d2[1]			\n\t"	//d2 = d2 + d1 * d2[1]		
+
+	//add exponent 	
+	"vdup.32 		d7, %0					\n\t"	//d7 = {rng, rng}
+	"vcvt.f32.s32 	d6, d6					\n\t"	//d6 = (float) d6
+	"vmla.f32 		d2, d6, d7				\n\t"	//d2 = d2 + d6 * d7		
+
+	"vmov.f32 		s0, s4					\n\t"	//s0 = s4
+
+	:: "r"(__logf_rng), "r"(__logf_lut) 
+    : "d0", "d1", "q1", "q2", "d6", "d7"
+	);
+#endif
+}
+
+float logf_neon_sfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vmov.f32 s0, r0 		\n\t");
+	logf_neon_hfp(x);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return logf_c(x);
+#endif
+};
+
diff --git a/source/math_mat2.c b/source/math_mat2.c
new file mode 100644
index 0000000000..0baad4b771
--- /dev/null
+++ b/source/math_mat2.c
@@ -0,0 +1,95 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/*
+Matrices are specified in column major format:
+
+| a c |
+| b d |
+
+therefore m[2] = c
+*/
+
+#include "math_neon.h"
+
+//matrix matrix multipication. d = m0 * m1;
+void
+matmul2_c(float m0[4], float m1[4], float d[4])
+{	
+	d[0] = m0[0]*m1[0] + m0[2]*m1[1];	
+	d[1] = m0[1]*m1[0] + m0[3]*m1[1];
+	d[2] = m0[0]*m1[2] + m0[2]*m1[3];
+	d[3] = m0[1]*m1[2] + m0[3]*m1[3];
+}
+
+void
+matmul2_neon(float m0[4], float m1[4], float d[4])
+{	
+#ifdef __MATH_NEON
+	asm volatile (
+	"vld1.32 		{d0, d1}, [%0]			\n\t"	//Q1 = m0
+	"vld1.32 		{d2, d3}, [%1]			\n\t"	//Q2 = m1
+	
+	"vmul.f32 		d4, d0, d2[0]			\n\t"	//D4 = D0*D2[0]
+	"vmul.f32 		d5, d0, d3[0]			\n\t"	//D5 = D0*D3[0]
+	"vmla.f32 		d4, d1, d2[1]			\n\t"	//D4 += D1*D2[1]
+	"vmla.f32 		d5, d1, d3[1]			\n\t"	//D5 += D1*D3[1]
+	
+	"vst1.32 		{d4, d5}, [%2] 			\n\t"	//Q4 = m+12	
+	:: "r"(m0), "r"(m1), "r"(d) 
+    : "q0", "q1", "q2", "memory"
+	);	
+#else
+	matmul2_c(m0, m1, d);
+#endif
+}
+
+
+//matrix vector multiplication. d = m * v
+void
+matvec2_c(float m[4], float v[2], float d[2])
+{
+	d[0] = m[0]*v[0] + m[2]*v[1];
+	d[1] = m[1]*v[0] + m[3]*v[1];
+}
+
+void
+matvec2_neon(float m[4], float v[2], float d[2])
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"vld1.32        d0, [%1]				\n\t"	//d0 = v
+	"vld1.32 		{d1, d2}, [%0]			\n\t"	//Q1 = m
+	
+	"vmul.f32 		d3, d1, d0[0]			\n\t"	//Q5 = Q1*d0[0]
+	"vmla.f32 		d3, d2, d0[1]			\n\t"	//Q5 += Q1*d0[1] 
+	
+	"vst1.32 		d3, [%2] 				\n\t"	//Q4 = m+12	
+	:: "r"(m), "r"(v), "r"(d) 
+    : "d0", "d1", "d2","d3", "memory"
+	);	
+#else
+	matvec2_c(m, v, d);
+#endif
+}
diff --git a/source/math_mat3.c b/source/math_mat3.c
new file mode 100644
index 0000000000..aae178e179
--- /dev/null
+++ b/source/math_mat3.c
@@ -0,0 +1,131 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/*
+Matrices are specified in row major format:
+
+| x0 x2 |
+| x1 x3 |
+
+therefore m[2] = x2
+
+*/
+
+#include "math_neon.h"
+
+//matrix matrix multipication. d = m0 * m1;
+void
+matmul3_c(float m0[9], float m1[9], float d[9])
+{
+	d[0] = m0[0]*m1[0] + m0[3]*m1[1] + m0[6]*m1[2];
+	d[1] = m0[1]*m1[0] + m0[4]*m1[1] + m0[7]*m1[2];
+	d[2] = m0[2]*m1[0] + m0[5]*m1[1] + m0[8]*m1[2];
+	d[3] = m0[0]*m1[3] + m0[3]*m1[4] + m0[6]*m1[5];
+	d[4] = m0[1]*m1[3] + m0[4]*m1[4] + m0[7]*m1[5];
+	d[5] = m0[2]*m1[3] + m0[5]*m1[4] + m0[8]*m1[5];
+	d[6] = m0[0]*m1[6] + m0[3]*m1[7] + m0[6]*m1[8];
+	d[7] = m0[1]*m1[6] + m0[4]*m1[7] + m0[7]*m1[8];
+	d[8] = m0[2]*m1[6] + m0[5]*m1[7] + m0[8]*m1[8];
+}
+
+void 
+matmul3_neon(float m0[9], float m1[9], float d[9])
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"vld1.32 		{d0, d1}, [%1]!			\n\t"	//q0 = m1
+	"vld1.32 		{d2, d3}, [%1]!			\n\t"	//q1 = m1+4
+	"flds 			s8, [%1]				\n\t"	//q2 = m1+8
+	
+	"vld1.32 		{d6, d7}, [%0]			\n\t"	//q3[0] = m0
+	"add 			%0, %0, #12				\n\t"	//q3[0] = m0
+	"vld1.32 		{d8, d9}, [%0]			\n\t"	//q4[0] = m0+12
+	"add 			%0, %0, #12				\n\t"	//q3[0] = m0
+	"vld1.32 		{d10}, [%0]				\n\t"	//q5[0] = m0+24
+	"add 			%0, %0, #8				\n\t"	//q3[0] = m0
+	"flds 			s22, [%0]				\n\t"	//q2 = m1+8
+	
+	"vmul.f32 		q6, q3, d0[0] 			\n\t"	//q12 = q3 * d0[0]
+	"vmul.f32 		q7, q3, d1[1] 			\n\t"	//q13 = q3 * d2[0]
+	"vmul.f32 		q8, q3, d3[0] 			\n\t"	//q14 = q3 * d4[0]
+	"vmla.f32 		q6, q4, d0[1] 			\n\t"	//q12 = q9 * d0[1]
+	"vmla.f32 		q7, q4, d2[0] 			\n\t"	//q13 = q9 * d2[1]
+	"vmla.f32 		q8, q4, d3[1] 			\n\t"	//q14 = q9 * d4[1]
+	"vmla.f32 		q6, q5, d1[0] 			\n\t"	//q12 = q10 * d0[0]
+	"vmla.f32 		q7, q5, d2[1] 			\n\t"	//q13 = q10 * d2[0]
+	"vmla.f32 		q8, q5, d4[0] 			\n\t"	//q14 = q10 * d4[0]
+
+	"vmov.f32 		q0, q8 					\n\t"	//q14 = q10 * d4[0]
+	"vst1.32 		{d12, d13}, [%2] 		\n\t"	//d = q12
+	"add 			%2, %2, #12				\n\t"	//q3[0] = m0
+	"vst1.32 		{d14, d15}, [%2] 		\n\t"	//d+4 = q13	
+	"add 			%2, %2, #12				\n\t"	//q3[0] = m0
+	"vst1.32 		{d0}, [%2] 				\n\t"	//d+8 = q14	
+	"add 			%2, %2, #8				\n\t"	//q3[0] = m0
+	"fsts 			s2, [%2] 				\n\t"	//d = q12	
+	
+	: "+r"(m0), "+r"(m1), "+r"(d): 
+    : "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "memory"
+	);	
+#else
+	matmul3_c(m0, m1, d);
+#endif
+};
+
+//matrix vector multiplication. d = m * v
+void
+matvec3_c(float m[9], float v[3], float d[3])
+{
+	d[0] = m[0]*v[0] + m[3]*v[1] + m[6]*v[2];
+	d[1] = m[1]*v[0] + m[4]*v[1] + m[7]*v[2];
+	d[2] = m[2]*v[0] + m[5]*v[1] + m[8]*v[2];
+}
+
+void
+matvec3_neon(float m[9], float v[3], float d[3])
+{
+#ifdef __MATH_NEON
+	int tmp;
+	asm volatile (
+	"mov 			%3, #12					\n\t"	//r3 = 12
+	"vld1.32 		{d0, d1}, [%1]			\n\t"	//Q0 = v
+	"vld1.32 		{d2, d3}, [%0], %3		\n\t"	//Q1 = m
+	"vld1.32 		{d4, d5}, [%0], %3		\n\t"	//Q2 = m+12
+	"vld1.32 		{d6, d7}, [%0], %3		\n\t"	//Q3 = m+24
+	
+	"vmul.f32 		q9, q1, d0[0]			\n\t"	//Q9 = Q1*Q0[0]
+	"vmla.f32 		q9, q2, d0[1]			\n\t"	//Q9 += Q2*Q0[1] 
+	"vmla.f32 		q9, q3, d1[0]			\n\t"	//Q9 += Q3*Q0[2] 
+	"vmov.f32 		q0, q9					\n\t"	//Q0 = q9
+	
+	"vst1.32 		d0, [%2]! 				\n\t"	//r2 = D24	
+	"fsts 			s2, [%2] 				\n\t"	//r2 = D25[0]	
+
+	: "+r"(m), "+r"(v), "+r"(d), "+r"(tmp):
+    : "q0", "q9", "q10","q11", "q12", "q13", "memory"
+	);	
+#else
+	matvec3_c(m, v, d);
+#endif
+}
diff --git a/source/math_mat4.c b/source/math_mat4.c
new file mode 100644
index 0000000000..5bcf34b596
--- /dev/null
+++ b/source/math_mat4.c
@@ -0,0 +1,144 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/*
+Matrices are specified in row major format:
+
+| x0 x2 |
+| x1 x3 |
+
+therefore m[2] = x2
+
+*/
+
+#include "math_neon.h"
+
+//matrix matrix multipication. d = m0 * m1;
+void
+matmul4_c(float m0[16], float m1[16], float d[16])
+{
+	d[0] = m0[0]*m1[0] + m0[4]*m1[1] + m0[8]*m1[2] + m0[12]*m1[3];
+	d[1] = m0[1]*m1[0] + m0[5]*m1[1] + m0[9]*m1[2] + m0[13]*m1[3];
+	d[2] = m0[2]*m1[0] + m0[6]*m1[1] + m0[10]*m1[2] + m0[14]*m1[3];
+	d[3] = m0[3]*m1[0] + m0[7]*m1[1] + m0[11]*m1[2] + m0[15]*m1[3];
+	d[4] = m0[0]*m1[4] + m0[4]*m1[5] + m0[8]*m1[6] + m0[12]*m1[7];
+	d[5] = m0[1]*m1[4] + m0[5]*m1[5] + m0[9]*m1[6] + m0[13]*m1[7];
+	d[6] = m0[2]*m1[4] + m0[6]*m1[5] + m0[10]*m1[6] + m0[14]*m1[7];
+	d[7] = m0[3]*m1[4] + m0[7]*m1[5] + m0[11]*m1[6] + m0[15]*m1[7];
+	d[8] = m0[0]*m1[8] + m0[4]*m1[9] + m0[8]*m1[10] + m0[12]*m1[11];
+	d[9] = m0[1]*m1[8] + m0[5]*m1[9] + m0[9]*m1[10] + m0[13]*m1[11];
+	d[10] = m0[2]*m1[8] + m0[6]*m1[9] + m0[10]*m1[10] + m0[14]*m1[11];
+	d[11] = m0[3]*m1[8] + m0[7]*m1[9] + m0[11]*m1[10] + m0[15]*m1[11];
+	d[12] = m0[0]*m1[12] + m0[4]*m1[13] + m0[8]*m1[14] + m0[12]*m1[15];
+	d[13] = m0[1]*m1[12] + m0[5]*m1[13] + m0[9]*m1[14] + m0[13]*m1[15];
+	d[14] = m0[2]*m1[12] + m0[6]*m1[13] + m0[10]*m1[14] + m0[14]*m1[15];
+	d[15] = m0[3]*m1[12] + m0[7]*m1[13] + m0[11]*m1[14] + m0[15]*m1[15];
+}
+
+void 
+matmul4_neon(float m0[16], float m1[16], float d[16])
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"vld1.32 		{d0, d1}, [%1]!			\n\t"	//q0 = m1
+	"vld1.32 		{d2, d3}, [%1]!			\n\t"	//q1 = m1+4
+	"vld1.32 		{d4, d5}, [%1]!			\n\t"	//q2 = m1+8
+	"vld1.32 		{d6, d7}, [%1]			\n\t"	//q3 = m1+12
+	"vld1.32 		{d16, d17}, [%0]!		\n\t"	//q8 = m0
+	"vld1.32 		{d18, d19}, [%0]!		\n\t"	//q9 = m0+4
+	"vld1.32 		{d20, d21}, [%0]!		\n\t"	//q10 = m0+8
+	"vld1.32 		{d22, d23}, [%0]		\n\t"	//q11 = m0+12
+
+	"vmul.f32 		q12, q8, d0[0] 			\n\t"	//q12 = q8 * d0[0]
+	"vmul.f32 		q13, q8, d2[0] 			\n\t"	//q13 = q8 * d2[0]
+	"vmul.f32 		q14, q8, d4[0] 			\n\t"	//q14 = q8 * d4[0]
+	"vmul.f32 		q15, q8, d6[0]	 		\n\t"	//q15 = q8 * d6[0]
+	"vmla.f32 		q12, q9, d0[1] 			\n\t"	//q12 = q9 * d0[1]
+	"vmla.f32 		q13, q9, d2[1] 			\n\t"	//q13 = q9 * d2[1]
+	"vmla.f32 		q14, q9, d4[1] 			\n\t"	//q14 = q9 * d4[1]
+	"vmla.f32 		q15, q9, d6[1] 			\n\t"	//q15 = q9 * d6[1]
+	"vmla.f32 		q12, q10, d1[0] 		\n\t"	//q12 = q10 * d0[0]
+	"vmla.f32 		q13, q10, d3[0] 		\n\t"	//q13 = q10 * d2[0]
+	"vmla.f32 		q14, q10, d5[0] 		\n\t"	//q14 = q10 * d4[0]
+	"vmla.f32 		q15, q10, d7[0] 		\n\t"	//q15 = q10 * d6[0]
+	"vmla.f32 		q12, q11, d1[1] 		\n\t"	//q12 = q11 * d0[1]
+	"vmla.f32 		q13, q11, d3[1] 		\n\t"	//q13 = q11 * d2[1]
+	"vmla.f32 		q14, q11, d5[1] 		\n\t"	//q14 = q11 * d4[1]
+	"vmla.f32 		q15, q11, d7[1]	 		\n\t"	//q15 = q11 * d6[1]
+
+	"vst1.32 		{d24, d25}, [%2]! 		\n\t"	//d = q12	
+	"vst1.32 		{d26, d27}, [%2]!		\n\t"	//d+4 = q13	
+	"vst1.32 		{d28, d29}, [%2]! 		\n\t"	//d+8 = q14	
+	"vst1.32 		{d30, d31}, [%2]	 	\n\t"	//d+12 = q15	
+
+	: "+r"(m0), "+r"(m1), "+r"(d) : 
+    : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
+	"memory"
+	);	
+#else
+	matmul4_c(m0, m1, d);
+#endif
+}
+
+
+//matrix vector multiplication. d = m * v
+void
+matvec4_c(float m[16], float v[4], float d[4])
+{
+	d[0] = m[0]*v[0] + m[4]*v[1] + m[8]*v[2] + m[12]*v[3];
+	d[1] = m[1]*v[0] + m[5]*v[1] + m[9]*v[2] + m[13]*v[3];
+	d[2] = m[2]*v[0] + m[6]*v[1] + m[10]*v[2] + m[14]*v[3];
+	d[3] = m[3]*v[0] + m[7]*v[1] + m[11]*v[2] + m[15]*v[3];
+}
+
+void
+matvec4_neon(float m[16], float v[4], float d[4])
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"vld1.32 		{d0, d1}, [%1]			\n\t"	//Q0 = v
+	"vld1.32 		{d18, d19}, [%0]!		\n\t"	//Q1 = m
+	"vld1.32 		{d20, d21}, [%0]!		\n\t"	//Q2 = m+4
+	"vld1.32 		{d22, d23}, [%0]!		\n\t"	//Q3 = m+8
+	"vld1.32 		{d24, d25}, [%0]!		\n\t"	//Q4 = m+12	
+	
+	"vmul.f32 		q13, q9, d0[0]			\n\t"	//Q5 = Q1*Q0[0]
+	"vmla.f32 		q13, q10, d0[1]			\n\t"	//Q5 += Q1*Q0[1] 
+	"vmla.f32 		q13, q11, d1[0]			\n\t"	//Q5 += Q2*Q0[2] 
+	"vmla.f32 		q13, q12, d1[1]			\n\t"	//Q5 += Q3*Q0[3]
+	
+	"vst1.32 		{d26, d27}, [%2] 		\n\t"	//Q4 = m+12	
+	: 
+	: "r"(m), "r"(v), "r"(d) 
+    : "q0", "q9", "q10","q11", "q12", "q13", "memory"
+	);	
+#else
+	matvec4_c(m, v, d);
+#endif
+}
+
+
+
+
+
diff --git a/source/math_modf.c b/source/math_modf.c
new file mode 100644
index 0000000000..f3259710af
--- /dev/null
+++ b/source/math_modf.c
@@ -0,0 +1,71 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/*
+Assumes the floating point value |x| < 2,147,483,648
+*/
+
+#include "math_neon.h"
+
+float modf_c(float x, int *i)
+{
+	int n;
+	n = (int)x;
+	*i = n;
+	x = x - (float)n;
+	return x;
+}
+
+
+float modf_neon_hfp(float x, int *i)
+{
+#ifdef __MATH_NEON
+	asm volatile (	
+	"vcvt.s32.f32	d1, d0					\n\t"	//d1 = (int) d0; 
+	"vcvt.f32.s32	d2, d1					\n\t"	//d2 = (float) d1;
+	"vsub.f32		d0, d0, d2				\n\t"	//d0 = d0 - d2; 
+	"vstr.i32		s2, [r0]				\n\t"	//[r0] = d1[0] 
+	::: "d0", "d1", "d2"
+	);		
+#endif
+}
+
+
+float modf_neon_sfp(float x, int *i)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"vdup.f32 		d0, r0					\n\t"	//d0 = {x, x}	
+	"vcvt.s32.f32	d1, d0					\n\t"	//d1 = (int) d0; 
+	"vcvt.f32.s32	d2, d1					\n\t"	//d2 = (float) d1;
+	"vsub.f32		d0, d0, d2				\n\t"	//d0 = d0 - d2; 
+	"vstr.i32		s2, [r1]				\n\t"	//[r0] = d1[0] 
+	"vmov.f32 		r0, s0					\n\t"	//r0 = d0[0];
+	::: "d0", "d1", "d2"
+	);
+		
+#else
+	return modf_c(x, i);
+#endif
+}
diff --git a/source/math_neon.h b/source/math_neon.h
new file mode 100644
index 0000000000..2db33acd87
--- /dev/null
+++ b/source/math_neon.h
@@ -0,0 +1,439 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#ifndef __MATH_NEON_H__ 
+#define __MATH_NEON_H__ 
+
+#if !defined(__i386__) && defined(__arm__)
+//if defined neon ASM routines are used, otherwise all calls to *_neon 
+//functions are rerouted to their equivalent *_c function.
+#define __MATH_NEON			
+
+//Default Floating Point value ABI: 0=softfp, 1=hardfp. Only effects *_neon routines.
+//You can access the hardfp versions directly via the *_hard suffix. 
+//You can access the softfp versions directly via the *_soft suffix. 
+#define __MATH_FPABI 	1	
+
+#endif
+
+#ifdef GCC
+#define ALIGN(A) __attribute__ ((aligned (A))
+#else
+#define ALIGN(A)
+#endif
+
+#ifndef _MATH_H
+#define M_PI		3.14159265358979323846	/* pi */
+#define M_PI_2		1.57079632679489661923	/* pi/2 */
+#define M_PI_4		0.78539816339744830962	/* pi/4 */
+#define M_E			2.7182818284590452354	/* e */
+#define M_LOG2E		1.4426950408889634074	/* log_2 e */
+#define M_LOG10E	0.43429448190325182765	/* log_10 e */
+#define M_LN2		0.69314718055994530942	/* log_e 2 */
+#define M_LN10		2.30258509299404568402	/* log_e 10 */
+#define M_1_PI		0.31830988618379067154	/* 1/pi */
+#define M_2_PI		0.63661977236758134308	/* 2/pi */
+#define M_2_SQRTPI	1.12837916709551257390	/* 2/sqrt(pi) */
+#define M_SQRT2		1.41421356237309504880	/* sqrt(2) */
+#define M_SQRT1_2	0.70710678118654752440	/* 1/sqrt(2) */
+#endif 
+
+#if __MATH_FPABI == 1
+#define sinf_neon		sinf_neon_hfp
+#define cosf_neon		cosf_neon_hfp
+#define	sincosf_neon	sincosf_neon_hfp
+#define tanf_neon		tanf_neon_hfp
+#define atanf_neon		atanf_neon_hfp
+#define atan2f_neon		atan2f_neon_hfp
+#define asinf_neon		asinf_neon_hfp
+#define acosf_neon		acosf_neon_hfp
+#define sinhf_neon		sinhf_neon_hfp
+#define coshf_neon		coshf_neon_hfp
+#define tanhf_neon		tanhf_neon_hfp
+#define expf_neon		expf_neon_hfp
+#define logf_neon		logf_neon_hfp
+#define log10f_neon		log10f_neon_hfp
+#define powf_neon		powf_neon_hfp
+#define floorf_neon		floorf_neon_hfp
+#define ceilf_neon		ceilf_neon_hfp
+#define fabsf_neon		fabsf_neon_hfp
+#define ldexpf_neon		ldexpf_neon_hfp
+#define frexpf_neon		frexpf_neon_hfp
+#define fmodf_neon		fmodf_neon_hfp
+#define modf_neon		modf_neon_hfp
+#define sqrtf_neon		sqrtf_neon_hfp
+#define invsqrtf_neon	invsqrtf_neon_hfp
+
+#define dot2_neon		dot2_neon_hfp
+#define dot3_neon		dot3_neon_hfp
+#define dot4_neon		dot4_neon_hfp
+#else
+#define sinf_neon		sinf_neon_sfp
+#define cosf_neon		cosf_neon_sfp
+#define	sincosf_neon	sincosf_neon_sfp
+#define tanf_neon		tanf_neon_sfp
+#define atanf_neon		atanf_neon_sfp
+#define atan2f_neon		atan2f_neon_sfp
+#define asinf_neon		asinf_neon_sfp
+#define acosf_neon		acosf_neon_sfp
+#define sinhf_neon		sinhf_neon_sfp
+#define coshf_neon		coshf_neon_sfp
+#define tanhf_neon		tanhf_neon_sfp
+#define expf_neon		expf_neon_sfp
+#define logf_neon		logf_neon_sfp
+#define log10f_neon		log10f_neon_sfp
+#define powf_neon		powf_neon_sfp
+#define floorf_neon		floorf_neon_sfp
+#define ceilf_neon		ceilf_neon_sfp
+#define fabsf_neon		fabsf_neon_sfp
+#define ldexpf_neon		ldexpf_neon_sfp
+#define frexpf_neon		frexpf_neon_sfp
+#define fmodf_neon		fmodf_neon_sfp
+#define modf_neon		modf_neon_sfp
+#define sqrtf_neon		sqrtf_neon_sfp
+#define invsqrtf_neon	invsqrtf_neon_sfp
+
+#define dot2_neon		dot2_neon_sfp
+#define dot3_neon		dot3_neon_sfp
+#define dot4_neon		dot4_neon_sfp
+#endif
+
+/* 
+function:	enable_runfast
+			this function enables the floating point runfast mode on the 
+			ARM Cortex A8.  	
+*/
+void		enable_runfast();
+
+
+float dot2_c(float v0[2], float v1[2]);
+float dot2_neon(float v0[2], float v1[2]);
+float dot3_c(float v0[3], float v1[3]);
+float dot3_neon(float v0[3], float v1[3]);
+float dot4_c(float v0[4], float v1[4]);
+float dot4_neon(float v0[4], float v1[4]);
+
+void cross3_c(float v0[3], float v1[3], float d[3]);
+void cross3_neon(float v0[3], float v1[3], float d[3]);
+
+void normalize2_c(float v[2], float d[2]);
+void normalize2_neon(float v[2], float d[2]);
+void normalize3_c(float v[3], float d[3]);
+void normalize3_neon(float v[3], float d[3]);
+void normalize4_c(float v[4], float d[4]);
+void normalize4_neon(float v[4], float d[4]);
+
+/* 
+function:	matmul2
+arguments:  m0 2x2 matrix, m1 2x2 matrix
+return: 	d 2x2 matrix
+expression: d = m0 * m1
+*/
+void		matmul2_c(float m0[4], float m1[4], float d[4]);
+void		matmul2_neon(float m0[4], float m1[4], float d[4]);
+
+/* 
+function:	matmul3
+arguments:  m0 3x3 matrix, m1 3x3 matrix
+return: 	d 3x3 matrix
+expression: d = m0 * m1
+*/
+void		matmul3_c(float m0[9], float m1[9], float d[9]);
+void		matmul3_neon(float m0[9], float m1[9], float d[9]);
+
+/* 
+function:	matmul4
+arguments:  m0 4x4 matrix, m1 4x4 matrix
+return: 	d 4x4 matrix
+expression: d = m0 * m1
+*/
+void		matmul4_c(float m0[16], float m1[16], float d[16]);
+void		matmul4_neon(float m0[16], float m1[16], float d[16]);
+
+/* 
+function:	matvec2
+arguments:  m 2x2 matrix, v 2 element vector
+return: 	d 2x2 matrix
+expression: d = m * v
+*/
+void		matvec2_c(float m[4], float v[2], float d[2]);
+void		matvec2_neon(float m[4], float v[2], float d[2]);
+
+/* 
+function:	matvec3
+arguments:  m 3x3 matrix, v 3 element vector
+return: 	d 3x3 matrix
+expression: d = m * v
+*/
+void		matvec3_c(float m[9], float v[3], float d[3]);
+void		matvec3_neon(float m[9], float v[3], float d[3]);
+
+/* 
+function:	matvec4
+arguments:  m 4x4 matrix, v 4 element vector
+return: 	d 4x4 matrix
+expression: d = m * v
+*/
+void		matvec4_c(float m[16], float v[4], float d[4]);
+void		matvec4_neon(float m[16], float v[4], float d[4]);
+
+/* 
+function:	sinf
+arguments:  x radians
+return: 	the sine function evaluated at x radians.	
+expression: r = sin(x) 	
+*/
+float 		sinf_c(float x);
+float 		sinf_neon_hfp(float x);
+float 		sinf_neon_sfp(float x);
+
+/* 
+function:	cosf
+arguments:  x radians
+return: 	the cosine function evaluated at x radians.	
+expression: r = cos(x) 	
+notes:		computed using cos(x) = sin(x + pi/2)
+*/
+float 		cosf_c(float x);
+float 		cosf_neon_hfp(float x);
+float 		cosf_neon_sfp(float x);
+
+/* 
+function:	sincosf
+arguments:  x radians, r[2] result array.
+return: 	both the sine and the cosine evaluated at x radians.	
+expression: r = {sin(x), cos(x)} 	
+notes:		faster than evaluating seperately.
+*/
+void		sincosf_c(float x, float r[2]);
+void		sincosf_neon_hfp(float x, float r[2]);
+void		sincosf_neon_sfp(float x, float r[2]);
+
+/* 
+function:	sinfv
+return: 	the sine function evaluated at x[i] radians 	
+expression: r[i] = sin(x[i])	
+notes:		faster than evaluating individually.
+			r and x can be the same memory location.
+*/
+void		sinfv_c(float *x, int n, float *r);
+void  		sinfv_neon(float *x, int n, float *r);
+
+/* 
+function:	tanf
+return: 	the tangent evaluated at x radians.	
+expression: r = tan(x) 	
+notes:		computed using tan(x) = sin(x) / cos(x)
+*/
+float 		tanf_c(float x);
+float 		tanf_neon_hfp(float x);
+float 		tanf_neon_sfp(float x);
+
+/* 
+function:	atanf
+return: 	the arctangent evaluated at x.	
+expression: r = atan(x) 	
+*/
+float 		atanf_c(float x);
+float 		atanf_neon_hfp(float x);
+float 		atanf_neon_sfp(float x);
+
+/* 
+function:	atanf
+return: 	the arctangent evaluated at x.	
+expression: r = atan(x) 	
+*/
+float 		atan2f_c(float y, float x);
+float 		atan2f_neon_hfp(float y, float x);
+float 		atan2f_neon_sfp(float y, float x);
+
+/* 
+function:	asinf
+return: 	the arcsine evaluated at x.	
+expression: r = asin(x) 	
+*/
+float 		asinf_c(float x);
+float 		asinf_neon_hfp(float x);
+float 		asinf_neon_sfp(float x);
+
+/* 
+function:	acosf
+return: 	the arcsine evaluated at x.	
+expression: r = asin(x) 	
+*/
+float 		acosf_c(float x);
+float 		acosf_neon_hfp(float x);
+float 		acosf_neon_sfp(float x);
+
+/* 
+function:	sinhf
+return: 	the arcsine evaluated at x.	
+expression: r = asin(x) 	
+*/
+float 		sinhf_c(float x);
+float 		sinhf_neon_hfp(float x);
+float 		sinhf_neon_sfp(float x);
+
+/* 
+function:	coshf
+return: 	the arcsine evaluated at x.	
+expression: r = asin(x) 	
+*/
+float 		coshf_c(float x);
+float 		coshf_neon_hfp(float x);
+float 		coshf_neon_sfp(float x);
+
+/* 
+function:	tanhf
+return: 	the arcsine evaluated at x.	
+expression: r = asin(x) 	
+*/
+float 		tanhf_c(float x);
+float 		tanhf_neon_hfp(float x);
+float 		tanhf_neon_sfp(float x);
+
+/* 
+function:	expf
+return: 	the natural exponential evaluated at x.	
+expression: r = e ** x	
+*/
+float 		expf_c(float x);
+float 		expf_neon_hfp(float x);
+float 		expf_neon_sfp(float x);
+
+/* 
+function:	logf
+return: 	the value of the natural logarithm of x.	
+expression: r = ln(x)	
+notes:		assumes x > 0
+*/
+float 		logf_c(float x);
+float 		logf_neon_hfp(float x);
+float 		logf_neon_sfp(float x);
+
+/* 
+function:	log10f
+return: 	the value of the power 10 logarithm of x.	
+expression: r = log10(x)	
+notes:		assumes x > 0
+*/
+float 		log10f_c(float x);
+float 		log10f_neon_hfp(float x);
+float 		log10f_neon_sfp(float x);
+
+/* 
+function:	powf
+return: 	x raised to the power of n, x ** n.
+expression: r = x ** y	
+notes:		computed using e ** (y * ln(x))
+*/
+float 		powf_c(float x, float n);
+float 		powf_neon_sfp(float x, float n);
+float 		powf_neon_hfp(float x, float n);
+
+/* 
+function:	floorf
+return: 	x rounded down (towards negative infinity) to its nearest 
+			integer value.	
+notes:		assumes |x| < 2 ** 31
+*/
+float 		floorf_c(float x);
+float 		floorf_neon_sfp(float x);
+float 		floorf_neon_hfp(float x);
+
+/* 
+function:	ceilf
+return: 	x rounded up (towards positive infinity) to its nearest 
+			integer value.	
+notes:		assumes |x| < 2 ** 31
+*/
+float 		ceilf_c(float x);
+float 		ceilf_neon_hfp(float x);
+float 		ceilf_neon_sfp(float x);
+
+/* 
+function:	fabsf
+return: 	absolute vvalue of x	
+notes:		assumes |x| < 2 ** 31
+*/
+float 		fabsf_c(float x);
+float 		fabsf_neon_hfp(float x);
+float 		fabsf_neon_sfp(float x);
+
+/* 
+function:	ldexpf
+return: 	the value of m multiplied by 2 to the power of e. 
+expression: r = m * (2 ** e)
+*/
+float 		ldexpf_c(float m, int e);
+float 		ldexpf_neon_hfp(float m, int e);
+float 		ldexpf_neon_sfp(float m, int e);
+
+/* 
+function:	frexpf
+return: 	the exponent and mantissa of x 
+*/
+float 		frexpf_c(float x, int *e);
+float 		frexpf_neon_hfp(float x, int *e);
+float 		frexpf_neon_sfp(float x, int *e);
+
+/* 
+function:	fmodf
+return: 	the remainder of x divided by y, x % y	
+expression: r = x - floor(x / y) * y;
+notes:		assumes that |x / y| < 2 ** 31 
+*/
+float 		fmodf_c(float x, float y);
+float 		fmodf_neon_hfp(float x, float y);
+float 		fmodf_neon_sfp(float x, float y);
+
+/* 
+function:	modf
+return: 	breaks x into the integer (i) and fractional part (return)
+notes:		assumes that |x| < 2 ** 31 
+*/
+float 		modf_c(float x, int *i);
+float 		modf_neon_hfp(float x, int *i);
+float 		modf_neon_sfp(float x, int *i);
+
+/* 
+function:	sqrtf
+return: 	(x^0.5)
+notes:		 
+*/
+float 		sqrtf_c(float x);
+float 		sqrtf_neon_hfp(float x);
+float 		sqrtf_neon_sfp(float x);
+
+
+/* 
+function:	invsqrtf
+return: 	1.0f / (x^0.5)
+notes:		 
+*/
+float 		invsqrtf_c(float x);
+float 		invsqrtf_neon_hfp(float x);
+float 		invsqrtf_neon_sfp(float x);
+
+#endif
diff --git a/source/math_powf.c b/source/math_powf.c
new file mode 100644
index 0000000000..6faed4eeac
--- /dev/null
+++ b/source/math_powf.c
@@ -0,0 +1,182 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/*
+Based on x ^ n = exp(n * log(x))
+
+Test func : powf(x, n)
+Test Range: (1,1) < (x, n) < (10, 10)
+Peak Error:	~0.0010%
+RMS  Error: ~0.0002%
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+const float __powf_rng[2] = {
+	1.442695041f,
+	0.693147180f
+};
+
+const float __powf_lut[16] = {
+	-2.295614848256274, 	//p0	log
+	-2.470711633419806, 	//p4
+	-5.686926051100417, 	//p2
+	-0.165253547131978, 	//p6
+	+5.175912446351073, 	//p1
+	+0.844006986174912, 	//p5
+	+4.584458825456749, 	//p3
+	+0.014127821926000,		//p7
+	0.9999999916728642,		//p0	exp
+	0.04165989275009526, 	//p4
+	0.5000006143673624, 	//p2
+	0.0014122663401803872, 	//p6
+	1.000000059694879, 		//p1
+	0.008336936973260111, 	//p5
+	0.16666570253074878, 	//p3
+	0.00019578093328483123	//p7
+};
+
+float powf_c(float x, float n)
+{
+	float a, b, c, d, xx;
+	int m;
+	
+	union {
+		float   f;
+		int 	i;
+	} r;
+	
+	//extract exponent
+	r.f = x;
+	m = (r.i >> 23);
+	m = m - 127;
+	r.i = r.i - (m << 23);
+	
+	//Taylor Polynomial (Estrins)
+	xx = r.f * r.f;
+	a = (__powf_lut[4] * r.f) + (__powf_lut[0]);
+	b = (__powf_lut[6] * r.f) + (__powf_lut[2]);
+	c = (__powf_lut[5] * r.f) + (__powf_lut[1]);
+	d = (__powf_lut[7] * r.f) + (__powf_lut[3]);
+	a = a + b * xx;
+	c = c + d * xx;
+	xx = xx * xx;
+	r.f = a + c * xx;
+
+	//add exponent
+	r.f = r.f + ((float) m) * __powf_rng[1];
+
+	r.f = r.f * n;
+
+
+	//Range Reduction:
+	m = (int) (r.f * __powf_rng[0]);
+	r.f = r.f - ((float) m) * __powf_rng[1];	
+	
+	//Taylor Polynomial (Estrins)
+	a = (__powf_lut[12] * r.f) + (__powf_lut[8]);
+	b = (__powf_lut[14] * r.f) + (__powf_lut[10]);
+	c = (__powf_lut[13] * r.f) + (__powf_lut[9]);
+	d = (__powf_lut[15] * r.f) + (__powf_lut[11]);
+	xx = r.f * r.f;
+	a = a + b * xx; 
+	c = c + d * xx;
+	xx = xx* xx;
+	r.f = a + c * xx; 
+	
+	//multiply by 2 ^ m 
+	m = m << 23;
+	r.i = r.i + m;
+
+	return r.f;
+}
+
+float powf_neon_hfp(float x, float n)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+		
+	"vdup.f32		d16, d0[1]				\n\t"	//d16 = {y,y};	
+	"vdup.f32		d0, d0[0]				\n\t"	//d0 = {x,x};
+	
+	//extract exponent
+	"vmov.i32		d2, #127				\n\t"	//d2 = 127;
+	"vshr.u32		d6, d0, #23				\n\t"	//d6 = d0 >> 23;
+	"vsub.i32		d6, d6, d2				\n\t"	//d6 = d6 - d2;
+	"vshl.u32		d1, d6, #23				\n\t"	//d1 = d6 << 23;
+	"vsub.i32		d0, d0, d1				\n\t"	//d0 = d0 + d1;
+
+	//polynomial:
+	"vmul.f32 		d1, d0, d0				\n\t"	//d1 = d0*d0 = {x^2, x^2}	
+	"vld1.32 		{d2, d3, d4, d5}, [%1]!	\n\t"	//q1 = {p0, p4, p2, p6}, q2 = {p1, p5, p3, p7} ;
+	"vmla.f32 		q1, q2, d0[0]			\n\t"	//q1 = q1 + q2 * d0[0]		
+	"vmla.f32 		d2, d3, d1[0]			\n\t"	//d2 = d2 + d3 * d1[0]		
+	"vmul.f32 		d1, d1, d1				\n\t"	//d1 = d1 * d1 = {x^4, x^4}	
+	"vmla.f32 		d2, d1, d2[1]			\n\t"	//d2 = d2 + d1 * d2[1]		
+
+	//add exponent 	
+	"vld1.32 		d7, [%0]				\n\t"	//d7 = {invrange, range}
+	"vcvt.f32.s32 	d6, d6					\n\t"	//d6 = (float) d6
+	"vmla.f32 		d2, d6, d7[1]			\n\t"	//d2 = d2 + d6 * d7[1]		
+
+	"vdup.f32 		d0, d2[0]				\n\t"	//d0 = d2[0]		
+	"vmul.f32 		d0, d0, d16				\n\t"	//d0 = d0 * d16	
+
+	//Range Reduction:
+	"vmul.f32 		d6, d0, d7[0]			\n\t"	//d6 = d0 * d7[0] 
+	"vcvt.u32.f32 	d6, d6					\n\t"	//d6 = (int) d6
+	"vcvt.f32.u32 	d1, d6					\n\t"	//d1 = (float) d6
+	"vmls.f32 		d0, d1, d7[1]			\n\t"	//d0 = d0 - d1 * d7[1]
+		
+	//polynomial:
+	"vmul.f32 		d1, d0, d0				\n\t"	//d1 = d0*d0 = {x^2, x^2}	
+	"vld1.32 		{d2, d3, d4, d5}, [%1]	\n\t"	//q1 = {p0, p4, p2, p6}, q2 = {p1, p5, p3, p7} ;
+	"vmla.f32 		q1, q2, d0[0]			\n\t"	//q1 = q1 + q2 * d0[0]		
+	"vmla.f32 		d2, d3, d1[0]			\n\t"	//d2 = d2 + d3 * d1[0]		
+	"vmul.f32 		d1, d1, d1				\n\t"	//d1 = d1 * d1 = {x^4, x^4}	
+	"vmla.f32 		d2, d1, d2[1]			\n\t"	//d2 = d2 + d1 * d2[1]		
+
+	//multiply by 2 ^ m 	
+	"vshl.i32 		d6, d6, #23				\n\t"	//d6 = d6 << 23		
+	"vadd.i32 		d0, d2, d6				\n\t"	//d0 = d2 + d6		
+
+
+	:: "r"(__powf_rng), "r"(__powf_lut) 
+    : "d0", "d1", "d2","d3", "d4", "d5", "d6", "d7"
+	);
+#endif
+}
+
+float powf_neon_sfp(float x, float n)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vmov.f32 s0, r0 		\n\t");
+	asm volatile ("vmov.f32 s1, r1 		\n\t");
+	powf_neon_hfp(x, n);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return powf_c(x, n);
+#endif
+};
diff --git a/source/math_runfast.c b/source/math_runfast.c
new file mode 100644
index 0000000000..0d06c0bfc8
--- /dev/null
+++ b/source/math_runfast.c
@@ -0,0 +1,42 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+
+void 
+enable_runfast()
+{
+#ifdef __arm__
+	static const unsigned int x = 0x04086060;
+	static const unsigned int y = 0x03000000;
+	int r;
+	asm volatile (
+		"fmrx	%0, fpscr			\n\t"	//r0 = FPSCR
+		"and	%0, %0, %1			\n\t"	//r0 = r0 & 0x04086060
+		"orr	%0, %0, %2			\n\t"	//r0 = r0 | 0x03000000
+		"fmxr	fpscr, %0			\n\t"	//FPSCR = r0
+		: "=r"(r)
+		: "r"(x), "r"(y)
+	);
+#endif
+}
diff --git a/source/math_sincosf.c b/source/math_sincosf.c
new file mode 100644
index 0000000000..365826f8ff
--- /dev/null
+++ b/source/math_sincosf.c
@@ -0,0 +1,163 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+const float __sincosf_rng[2] = {
+	2.0 / M_PI,
+	M_PI / 2.0
+};
+
+const float __sincosf_lut[8] = {
+	-0.00018365f,	//p7
+	-0.00018365f,	//p7
+	+0.00830636f,	//p5
+	+0.00830636f,	//p5
+	-0.16664831f,	//p3
+	-0.16664831f,	//p3
+	+0.99999661f,	//p1
+	+0.99999661f,	//p1
+};
+
+void sincosf_c( float x, float r[2])
+{
+	union {
+		float 	f;
+		int 	i;
+	} ax, bx;
+	
+	float y;
+	float a, b, c, d, xx, yy;
+	int m, n, o, p;
+	
+	y = x + __sincosf_rng[1];
+	ax.f = fabsf(x);
+	bx.f = fabsf(y);
+	
+	//Range Reduction:
+	m = (int) (ax.f * __sincosf_rng[0]);	
+	o = (int) (bx.f * __sincosf_rng[0]);	
+	ax.f = ax.f - (((float)m) * __sincosf_rng[1]);
+	bx.f = bx.f - (((float)o) * __sincosf_rng[1]);
+	
+	//Test Quadrant
+	n = m & 1;
+	p = o & 1;
+	ax.f = ax.f - n * __sincosf_rng[1];	
+	bx.f = bx.f - p * __sincosf_rng[1];	
+	m = m >> 1;
+	o = o >> 1;
+	n = n ^ m;
+	p = p ^ o;
+	m = (x < 0.0);
+	o = (y < 0.0);
+	n = n ^ m;	
+	p = p ^ o;	
+	n = n << 31;
+	p = p << 31;
+	ax.i = ax.i ^ n; 
+	bx.i = bx.i ^ p; 
+
+	//Taylor Polynomial
+	xx = ax.f * ax.f;	
+	yy = bx.f * bx.f;
+	r[0] = __sincosf_lut[0];
+	r[1] = __sincosf_lut[1];
+	r[0] = r[0] * xx + __sincosf_lut[2];
+	r[1] = r[1] * yy + __sincosf_lut[3];
+	r[0] = r[0] * xx + __sincosf_lut[4];
+	r[1] = r[1] * yy + __sincosf_lut[5];
+	r[0] = r[0] * xx + __sincosf_lut[6];
+	r[1] = r[1] * yy + __sincosf_lut[7];
+	r[0] = r[0] * ax.f;
+	r[1] = r[1] * bx.f;
+
+}
+
+void sincosf_neon_hfp(float x, float r[2])
+{
+//HACK: Assumes for softfp that r1 = x, and for hardfp that s0 = x.
+#ifdef __MATH_NEON
+	asm volatile (
+	//{x, y} = {x, x + pi/2}
+	"vdup.f32 		d1, d0[0]				\n\t"	//d1 = {x, x}
+	"vld1.32 		d3, [%1]				\n\t"	//d3 = {invrange, range}
+	"vadd.f32 		d0, d1, d3				\n\t"	//d0 = d1 + d3
+	"vmov.f32 		s0, s2					\n\t"	//d0[0] = d1[0]	
+	"vabs.f32 		d1, d0					\n\t"	//d1 = {abs(x), abs(y)}
+	
+	//Range Reduction:
+	"vmul.f32 		d2, d1, d3[0]			\n\t"	//d2 = d1 * d3[0] 
+	"vcvt.u32.f32 	d2, d2					\n\t"	//d2 = (int) d2
+	"vcvt.f32.u32 	d4, d2					\n\t"	//d4 = (float) d2
+	"vmls.f32 		d1, d4, d3[1]			\n\t"	//d1 = d1 - d4 * d3[1]
+	
+	//Checking Quadrant:
+	//ax = ax - (k&1) * M_PI_2
+	"vmov.i32	 	d4, #1					\n\t"	//d4 = 1
+	"vand.i32	 	d4, d4, d2				\n\t"	//d4 = d4 & d2
+	"vcvt.f32.u32 	d5, d4					\n\t"	//d5 = (float) d4
+	"vmls.f32 		d1, d5, d3[1]			\n\t"	//d1 = d1 - d5 * d3[1]
+
+	//ax = ax ^ ((k & 1) ^ (k >> 1) ^ (x < 0) << 31)
+	"vshr.u32 		d3, d2, #1				\n\t"	//d3 = d2 >> 1
+	"veor.i32 		d4, d4, d3				\n\t"	//d4 = d4 ^ d3	
+	"vclt.f32 		d3, d0, #0				\n\t"	//d3 = (d0 < 0.0)
+	"veor.i32 		d4, d4, d3				\n\t"	//d4 = d4 ^ d3	
+	"vshl.i32 		d4, d4, #31				\n\t"	//d4 = d4 << 31
+	"veor.i32 		d0, d1, d4				\n\t"	//d0 = d1 ^ d4
+	
+	//polynomial:
+	"vldm 			%2!, {d2, d3}	 		\n\t"	//d2 = {p7, p7}, d3 = {p5, p5}, r3 += 4;
+	"vmul.f32 		d1, d0, d0				\n\t"	//d1 = d0 * d0 = {x^2, y^2}
+	"vldm 			%2!, {d4}				\n\t"	//d4 = {p3, p3}, r3 += 2;
+	"vmla.f32 		d3, d2, d1				\n\t"	//d3 = d3 + d2 * d1;	
+	"vldm	 		%2!, {d5}				\n\t"	//d5 = {p1, p1}, r3 += 2;
+	"vmla.f32 		d4, d3, d1				\n\t"	//d4 = d4 + d3 * d1;	
+	"vmla.f32 		d5, d4, d1				\n\t"	//d5 = d5 + d4 * d1;	
+	"vmul.f32 		d5, d5, d0				\n\t"	//d5 = d5 * d0;	
+	
+	"vstm.f32 		%0, {d5}				\n\t"	//r[0] = d5[0], r[1]=d5[1];	
+	
+	: "+r"(r)
+	: "r"(__sincosf_rng), "r"(__sincosf_lut) 
+    : "d0", "d1", "d2", "d3", "d4", "d5"
+	);
+#else
+	sincosf_c(x, r);
+#endif
+}
+
+void sincosf_neon_sfp(float x, float r[2])
+{
+#ifdef __MATH_NEON
+	asm volatile ("vdup.f32 d0, r0 		\n\t");
+	sincosf_neon_hfp(x, r);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else 
+    sincosf_c(x, r);
+#endif
+};
+
diff --git a/source/math_sinf.c b/source/math_sinf.c
new file mode 100644
index 0000000000..257f219672
--- /dev/null
+++ b/source/math_sinf.c
@@ -0,0 +1,128 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <math.h>
+#include "math_neon.h"
+
+static const float __sinf_rng[2] = {
+	2.0 / M_PI,
+	M_PI / 2.0
+} ALIGN(16);
+
+static const float __sinf_lut[4] = {
+	-0.00018365f,	//p7
+	-0.16664831f,	//p3
+	+0.00830636f,	//p5
+	+0.99999661f,	//p1
+} ALIGN(16);
+
+float sinf_c(float x)
+{
+	union {
+		float 	f;
+		int 	i;
+	} ax;
+	
+	float r, a, b, xx;
+	int m, n;
+	
+	ax.f = fabsf(x);
+
+	//Range Reduction:
+	m = (int) (ax.f * __sinf_rng[0]);	
+	ax.f = ax.f - (((float)m) * __sinf_rng[1]);
+
+	//Test Quadrant
+	n = m & 1;
+	ax.f = ax.f - n * __sinf_rng[1];	
+	m = m >> 1;
+	n = n ^ m;
+	m = (x < 0.0);
+	n = n ^ m;	
+	n = n << 31;
+	ax.i = ax.i ^ n; 
+
+	//Taylor Polynomial (Estrins)
+	xx = ax.f * ax.f;	
+	a = (__sinf_lut[0] * ax.f) * xx + (__sinf_lut[2] * ax.f);
+	b = (__sinf_lut[1] * ax.f) * xx + (__sinf_lut[3] * ax.f);
+	xx = xx * xx;
+	r = b + a * xx;
+
+	return r;
+}
+
+float sinf_neon_hfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	
+	"vld1.32 		d3, [%0]				\n\t"	//d3 = {invrange, range}
+	"vdup.f32 		d0, d0[0]				\n\t"	//d0 = {x, x}
+	"vabs.f32 		d1, d0					\n\t"	//d1 = {ax, ax}
+	
+	"vmul.f32 		d2, d1, d3[0]			\n\t"	//d2 = d1 * d3[0] 
+	"vcvt.u32.f32 	d2, d2					\n\t"	//d2 = (int) d2
+	"vmov.i32	 	d5, #1					\n\t"	//d5 = 1	
+	"vcvt.f32.u32 	d4, d2					\n\t"	//d4 = (float) d2	
+	"vshr.u32 		d7, d2, #1				\n\t"	//d7 = d2 >> 1
+	"vmls.f32 		d1, d4, d3[1]			\n\t"	//d1 = d1 - d4 * d3[1]
+	
+	"vand.i32 		d5, d2, d5				\n\t"	//d5 = d2 & d5
+	"vclt.f32 		d18, d0, #0				\n\t"	//d18 = (d0 < 0.0)
+	"vcvt.f32.u32 	d6, d5					\n\t"	//d6 = (float) d5
+	"vmls.f32 		d1, d6, d3[1]			\n\t"	//d1 = d1 - d6 * d3[1]
+	"veor.i32 		d5, d5, d7				\n\t"	//d5 = d5 ^ d7	
+	"vmul.f32 		d2, d1, d1				\n\t"	//d2 = d1*d1 = {x^2, x^2}	
+	
+	"vld1.32 		{d16, d17}, [%1]		\n\t"	//q8 = {p7, p3, p5, p1}
+	"veor.i32 		d5, d5, d18				\n\t"	//d5 = d5 ^ d18	
+	"vshl.i32 		d5, d5, #31				\n\t"	//d5 = d5 << 31
+	"veor.i32 		d1, d1, d5				\n\t"	//d1 = d1 ^ d5
+	
+	"vmul.f32 		d3, d2, d2				\n\t"	//d3 = d2*d2 = {x^4, x^4}		
+	"vmul.f32 		q0, q8, d1[0]			\n\t"	//q0 = q8 * d1[0] = {p7x, p3x, p5x, p1x}
+	"vmla.f32 		d1, d0, d2[0]			\n\t"	//d1 = d1 + d0*d2 = {p5x + p7x^3, p1x + p3x^3}		
+	"vmla.f32 		d1, d3, d1[0]			\n\t"	//d1 = d1 + d3*d0 = {...., p1x + p3x^3 + p5x^5 + p7x^7}		
+
+	"vmov.f32 		s0, s3					\n\t"	//s0 = s3
+	: 
+	: "r"(__sinf_rng), "r"(__sinf_lut) 
+    : "q0", "q1", "q2", "q3", "q8", "q9"
+	);
+#endif
+}
+
+float sinf_neon_sfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vdup.f32 d0, r0 		\n\t");
+	sinf_neon_hfp(x);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return sinf_c(x);
+#endif
+
+};
+
diff --git a/source/math_sinfv.c b/source/math_sinfv.c
new file mode 100644
index 0000000000..0dfc878170
--- /dev/null
+++ b/source/math_sinfv.c
@@ -0,0 +1,110 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+const float __sinfv_rng[2] = {
+	2.0 / M_PI,
+	M_PI / 2.0, 
+};
+
+const float __sinfv_lut[4] = {
+	-0.00018365f,	//p7
+	-0.16664831f,	//p3
+	+0.00830636f,	//p5
+	+0.99999661f,	//p1
+};
+
+void sinfv_c(float *x, int n, float *r)
+{
+	union {
+		float 	f;
+		int 	i;
+	} ax, bx;
+	
+	float aa, ab, ba, bb, axx, bxx;
+	int am, bm, an, bn;
+
+	if (n & 0x1) {
+		*r++ = sinf_c(*x++);
+		n--;
+	}
+
+	float rng0 = __sinfv_rng[0];
+	float rng1 = __sinfv_rng[1];
+
+	while(n > 0){
+		
+		float x0 = *x++;
+		float x1 = *x++;
+		
+		ax.f = fabsf(x0);
+		bx.f = fabsf(x1);
+
+		//Range Reduction:
+		am = (int) (ax.f * rng0);	
+		bm = (int) (bx.f * rng0);	
+		
+		ax.f = ax.f - (((float)am) * rng1);
+		bx.f = bx.f - (((float)bm) * rng1);
+
+		//Test Quadrant
+		an = am & 1;
+		bn = bm & 1;
+		ax.f = ax.f - an * rng1;
+		bx.f = bx.f - bn * rng1;
+		am = (am & 2) >> 1;
+		bm = (bm & 2) >> 1;
+		ax.i = ax.i ^ ((an ^ am ^ (x0 < 0)) << 31);
+		bx.i = bx.i ^ ((bn ^ bm ^ (x1 < 0)) << 31);
+			
+		//Taylor Polynomial (Estrins)
+		axx = ax.f * ax.f;	
+		bxx = bx.f * bx.f;	
+		aa = (__sinfv_lut[0] * ax.f) * axx + (__sinfv_lut[2] * ax.f);
+		ba = (__sinfv_lut[0] * bx.f) * bxx + (__sinfv_lut[2] * bx.f);
+		ab = (__sinfv_lut[1] * ax.f) * axx + (__sinfv_lut[3] * ax.f);
+		bb = (__sinfv_lut[1] * bx.f) * bxx + (__sinfv_lut[3] * bx.f);
+		axx = axx * axx;
+		bxx = bxx * bxx;
+		*r++ = ab + aa * axx;
+		*r++ = bb + ba * bxx;
+		n -= 2;
+	}
+	
+	
+}
+
+void sinfv_neon(float *x, int n, float *r)
+{
+#ifdef __MATH_NEON
+	asm volatile (""
+	:
+	:"r"(x), "r"(n)
+	);
+#else
+	sinfv_c(x, n, r);
+#endif
+}
diff --git a/source/math_sinhf.c b/source/math_sinhf.c
new file mode 100644
index 0000000000..820a490dae
--- /dev/null
+++ b/source/math_sinhf.c
@@ -0,0 +1,120 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+const float __sinhf_rng[2] = {
+	1.442695041f,
+	0.693147180f
+};
+
+const float __sinhf_lut[16] = {
+	0.00019578093328483123,	//p7
+	0.00019578093328483123,	//p7
+	0.0014122663401803872, 	//p6
+	0.0014122663401803872, 	//p6
+	0.008336936973260111, 	//p5
+	0.008336936973260111, 	//p5
+	0.04165989275009526, 	//p4
+	0.04165989275009526, 	//p4
+	0.16666570253074878, 	//p3
+	0.16666570253074878, 	//p3
+	0.5000006143673624, 	//p2
+	0.5000006143673624, 	//p2
+	1.000000059694879, 		//p1
+	1.000000059694879, 		//p1
+	0.9999999916728642,		//p0
+	0.9999999916728642		//p0
+};
+
+
+float sinhf_c(float x)
+{
+	float a, b, xx;
+	xx = -x;
+	a = expf_c(x);
+	b = expf_c(xx);
+	a = a - b;
+	a = a * 0.5f;
+	return a;
+}
+
+
+float sinhf_neon_hfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"vdup.f32 		d0, d0[0]				\n\t"	//d0 = {x, x}	
+	"fnegs 			s1, s1					\n\t"	//s1 = -s1
+	
+	//Range Reduction:
+	"vld1.32 		d2, [%0]				\n\t"	//d2 = {invrange, range}
+	"vld1.32 		{d16, d17}, [%1]!		\n\t"	
+	"vmul.f32 		d6, d0, d2[0]			\n\t"	//d6 = d0 * d2[0] 
+	"vcvt.s32.f32 	d6, d6					\n\t"	//d6 = (int) d6
+	"vld1.32 		{d18}, [%1]!			\n\t"	
+	"vcvt.f32.s32 	d1, d6					\n\t"	//d1 = (float) d6
+	"vld1.32 		{d19}, [%1]!			\n\t"	
+	"vmls.f32 		d0, d1, d2[1]			\n\t"	//d0 = d0 - d1 * d2[1]
+	"vld1.32 		{d20}, [%1]!			\n\t"	
+		
+	//polynomial:
+	"vmla.f32 		d17, d16, d0			\n\t"	//d17 = d17 + d16 * d0;	
+	"vld1.32 		{d21}, [%1]!			\n\t"	
+	"vmla.f32 		d18, d17, d0			\n\t"	//d18 = d18 + d17 * d0;	
+	"vld1.32 		{d22}, [%1]!			\n\t"	
+	"vmla.f32 		d19, d18, d0			\n\t"	//d19 = d19 + d18 * d0;	
+	"vld1.32 		{d23}, [%1]!			\n\t"	
+	"vmla.f32 		d20, d19, d0			\n\t"	//d20 = d20 + d19 * d0;	
+	"vmla.f32 		d21, d20, d0			\n\t"	//d21 = d21 + d20 * d0;	
+	"vmla.f32 		d22, d21, d0			\n\t"	//d22 = d22 + d21 * d0;	
+	"vmla.f32 		d23, d22, d0			\n\t"	//d23 = d23 + d22 * d0;	
+	
+	//multiply by 2 ^ m 	
+	"vshl.i32 		d6, d6, #23				\n\t"	//d6 = d6 << 23		
+	"vadd.i32 		d0, d23, d6				\n\t"	//d0 = d22 + d6		
+
+	"vdup.f32 		d2, d0[1]				\n\t"	//d2 = s1		
+	"vmov.f32 		d1, #0.5				\n\t"	//d1 = 0.5		
+	"vsub.f32 		d0, d0, d2				\n\t"	//d0 = d0 - d2		
+	"vmul.f32 		d0, d1					\n\t"	//d0 = d0 * d1		
+
+	:: "r"(__sinhf_rng), "r"(__sinhf_lut) 
+    : "d0", "d1", "q1", "q2", "d6"
+	);
+	
+#endif
+}
+
+float sinhf_neon_sfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vmov.f32 s0, r0 		\n\t");
+	sinhf_neon_hfp(x);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return sinhf_c(x);
+#endif
+};
diff --git a/source/math_sqrtf.c b/source/math_sqrtf.c
new file mode 100644
index 0000000000..ee3f86bdbf
--- /dev/null
+++ b/source/math_sqrtf.c
@@ -0,0 +1,105 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/*
+Test func : sqrtf(x)
+Test Range: 0 < x < 1,000,000,000
+Peak Error:	~0.0010%
+RMS  Error: ~0.0005%
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+float sqrtf_c(float x)
+{
+
+	float b, c;
+	int m;
+	union {
+		float 	f;
+		int 	i;
+	} a;
+	
+	//fast invsqrt approx
+	a.f = x;
+	a.i = 0x5F3759DF - (a.i >> 1);		//VRSQRTE
+	c = x * a.f;
+	b = (3.0f - c * a.f) * 0.5;		//VRSQRTS
+	a.f = a.f * b;		
+	c = x * a.f;
+	b = (3.0f - c * a.f) * 0.5;
+    a.f = a.f * b;	
+
+	//fast inverse approx
+	x = a.f;
+	m = 0x3F800000 - (a.i & 0x7F800000);
+	a.i = a.i + m;
+	a.f = 1.41176471f - 0.47058824f * a.f;
+	a.i = a.i + m;
+	b = 2.0 - a.f * x;
+	a.f = a.f * b;	
+	b = 2.0 - a.f * x;
+	a.f = a.f * b;
+
+	return a.f;
+}
+
+float sqrtf_neon_hfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+		
+	//fast invsqrt approx
+	"vmov.f32 		d1, d0					\n\t"	//d1 = d0
+	"vrsqrte.f32 	d0, d0					\n\t"	//d0 = ~ 1.0 / sqrt(d0)
+	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1
+	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d3 = (3 - d0 * d2) / 2 	
+	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d3
+	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1	
+	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d4 = (3 - d0 * d3) / 2	
+	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d3	
+		
+	//fast reciporical approximation
+	"vrecpe.f32		d1, d0					\n\t"	//d1 = ~ 1 / d0; 
+	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
+	"vmul.f32		d1, d1, d2				\n\t"	//d1 = d1 * d2; 
+	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
+	"vmul.f32		d0, d1, d2				\n\t"	//d0 = d1 * d2; 
+
+	::: "d0", "d1", "d2", "d3"
+	);
+#endif
+}
+
+float sqrtf_neon_sfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vmov.f32 s0, r0 		\n\t");
+	sqrtf_neon_hfp(x);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return sqrtf_c(x);
+#endif
+};
diff --git a/source/math_sqrtfv.c b/source/math_sqrtfv.c
new file mode 100644
index 0000000000..c657db5d34
--- /dev/null
+++ b/source/math_sqrtfv.c
@@ -0,0 +1,147 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+/*
+Test func : sqrtf(x)
+Test Range: 0 < x < 1,000,000,000
+Peak Error:	~0.0010%
+RMS  Error: ~0.0005%
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+void sqrtfv_c(float *x, int n, float *r)
+{
+
+	float x0, x1;
+	float b0, b1, c0, c1;
+	int m0, m1;
+	union {
+		float 	f;
+		int 	i;
+	} a0, a1;
+
+
+	if (n & 0x1){
+		*r++ = sqrtf_c(*x++);
+		n--;
+	}
+
+	while(n > 0){
+	
+		x0 = *x++;
+		x1 = *x++;
+	
+		//fast invsqrt approx
+		a0.f = x0;
+		a1.f = x1;
+		a0.i = 0x5F3759DF - (a0.i >> 1);		//VRSQRTE
+		a1.i = 0x5F3759DF - (a1.i >> 1);		//VRSQRTE
+		c0 = x0 * a0.f;
+		c1 = x1 * a1.f;
+		b0 = (3.0f - c0 * a0.f) * 0.5;		//VRSQRTS
+		b1 = (3.0f - c1 * a1.f) * 0.5;		//VRSQRTS
+		a0.f = a0.f * b0;		
+		a1.f = a1.f * b1;		
+		c0 = x0 * a0.f;
+		c1 = x1 * a1.f;
+		b0 = (3.0f - c0 * a0.f) * 0.5;		//VRSQRTS
+		b1 = (3.0f - c1 * a1.f) * 0.5;		//VRSQRTS
+		a0.f = a0.f * b0;		
+		a1.f = a1.f * b1;		
+
+		//fast inverse approx
+		c0 = a0.f;
+		c0 = a1.f;
+		m0 = 0x3F800000 - (a0.i & 0x7F800000);
+		m1 = 0x3F800000 - (a1.i & 0x7F800000);
+		a0.i = a0.i + m0;
+		a1.i = a1.i + m1;
+		a0.f = 1.41176471f - 0.47058824f * a0.f;
+		a1.f = 1.41176471f - 0.47058824f * a1.f;
+		a0.i = a0.i + m0;
+		a1.i = a1.i + m1;
+		b0 = 2.0 - a0.f * c0;
+		b1 = 2.0 - a1.f * c1;
+		a0.f = a0.f * b0;	
+		a1.f = a1.f * b1;	
+		b0 = 2.0 - a0.f * c0;
+		b1 = 2.0 - a1.f * c1;
+		a0.f = a0.f * b0;
+		a1.f = a1.f * b1;
+		
+		*r++ = a0.f;
+		*r++ = a1.f;
+		n -= 2;
+
+	}
+}
+
+void sqrtfv_neon(float *x, int n, float *r)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+
+	"tst 			r1, #1 					\n\t"	//r1 & 1
+	"beq 			1f 						\n\t"	//
+
+	"vld1.32		d0[0], [r0]! 			\n\t"	//s0 = *x++
+	"mov 			ip, lr 					\n\t"	//ip = lr
+	//"bl 			sqrtf_neon_hfp 			\n\t"	//sqrtf_neon
+	"mov 			lr, ip 					\n\t"	//lr = ip
+	"vst1.32		d0[0], [r2]! 			\n\t"	//*r++ = r0
+	"subs 			r1, r1, #1				\n\t"	//r1 = r1 - 1;		
+	"bxeq 			lr						\n\t"	//
+
+	"1:				 						\n\t"	//
+
+	"vld1.32 		d0, [r0]! 				\n\t"	//d0 = (*x[0], *x[1]), x+=2;
+	
+	//fast invsqrt approx
+	"vmov.f32 		d1, d0					\n\t"	//d1 = d0
+	"vrsqrte.f32 	d0, d0					\n\t"	//d0 = ~ 1.0 / sqrt(d0)
+	"vmul.f32 		d2, d0, d1				\n\t"	//d3 = d0 * d2
+	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d4 = (3 - d0 * d3) / 2 	
+	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d4	
+	"vmul.f32 		d2, d0, d1				\n\t"	//d3 = d0 * d2	
+	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d4 = (3 - d0 * d3) / 2	
+	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d4	
+		
+	//fast reciporical approximation
+	"vrecpe.f32		d1, d0					\n\t"	//d1 = ~ 1 / d0; 
+	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
+	"vmul.f32		d1, d1, d2				\n\t"	//d1 = d1 * d2; 
+	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
+	"vmul.f32		d0, d1, d2				\n\t"	//d0 = d1 * d2; 
+
+	"vst1.64 		d0, [r2]!				\n\t"	//*r++ = d0;
+	"subs 			r1, r1, #2				\n\t"	//n = n - 2; update flags
+	"bgt 			1b 						\n\t"	//
+
+	::: "d0", "d1", "d2", "d3"
+);
+#else
+	sqrtfv_c(x, n, r);
+#endif
+}
diff --git a/source/math_tanf.c b/source/math_tanf.c
new file mode 100644
index 0000000000..e87c1ffd1c
--- /dev/null
+++ b/source/math_tanf.c
@@ -0,0 +1,156 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "math.h"
+#include "math_neon.h"
+
+const float __tanf_rng[2] = {
+	2.0 / M_PI,
+	M_PI / 2.0
+};
+
+const float __tanf_lut[4] = {
+	-0.00018365f,	//p7
+	-0.16664831f,	//p3
+	+0.00830636f,	//p5
+	+0.99999661f,	//p1
+};
+ 
+float tanf_c(float x){
+
+	union {
+		float f;
+		int i;
+	} ax, c;
+
+	float r, a, b, xx, cc, cx;
+	int m;
+	
+	ax.f = fabsf(x);
+
+	//Range Reduction:
+	m = (int) (ax.f * __tanf_rng[0]);	
+	ax.f = ax.f - (((float)m) * __tanf_rng[1]);
+
+	//Test Quadrant
+	ax.f = ax.f - (m & 1) * __tanf_rng[1];
+	ax.i = ax.i ^ ((*(int*)&x) & 0x80000000);
+		
+	//Taylor Polynomial (Estrins)
+	xx = ax.f * ax.f;	
+	a = (__tanf_lut[0] * ax.f) * xx + (__tanf_lut[2] * ax.f);
+	b = (__tanf_lut[1] * ax.f) * xx + (__tanf_lut[3] * ax.f);
+	xx = xx * xx;
+	r = b + a * xx;
+
+	//cosine
+	c.f = 1.0 - r * r;
+	
+	//fast invsqrt approximation (2x newton iterations)
+    cc = c.f;
+	c.i = 0x5F3759DF - (c.i >> 1);		//VRSQRTE
+	cx = cc * c.f;
+	a = (3.0f - cx * c.f) / 2;			//VRSQRTS
+	c.f = c.f * a;		
+	cx = cc * c.f;
+	a = (3.0f - cx * c.f) / 2;
+    c.f = c.f * a;	
+
+	r = r * c.f;
+	
+	return r;
+}
+
+
+float tanf_neon_hfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile (
+
+	"vdup.f32 		d0, d0[0]				\n\t"	//d0 = {x, x}
+	"vabs.f32 		d1, d0					\n\t"	//d1 = {ax, ax}
+	
+	//Range Reduction:
+	"vld1.32 		d3, [%0]				\n\t"	//d3 = {invrange, range}
+	"vmul.f32 		d2, d1, d3[0]			\n\t"	//d2 = d1 * d3[0] 
+	"vcvt.u32.f32 	d2, d2					\n\t"	//d2 = (int) d2
+	"vcvt.f32.u32 	d4, d2					\n\t"	//d4 = (float) d2
+	"vmls.f32 		d1, d4, d3[1]			\n\t"	//d1 = d1 - d4 * d3[1]
+	
+	//Checking Quadrant:
+	//ax = ax - (k&1) * M_PI_2
+	"vmov.i32 		d4, #1					\n\t"	//d4 = 1
+	"vand.i32 		d2, d2, d4				\n\t"	//d2 = d2 & d4
+	"vcvt.f32.u32 	d2, d2					\n\t"	//d2 = (float) d2
+	"vmls.f32 		d1, d2, d3[1]			\n\t"	//d1 = d1 - d2 * d3[1]
+	
+	//ax = ax ^ ( x.i & 0x800000000)
+	"vmov.i32 		d4, #0x80000000			\n\t"	//d4 = 0x80000000
+	"vand.i32 		d0, d0, d4				\n\t"	//d0 = d0 & d4
+	"veor.i32 		d1, d1, d0				\n\t"	//d1 = d1 ^ d0
+	
+	//polynomial:
+	"vmul.f32 		d2, d1, d1				\n\t"	//d2 = d1*d1 = {x^2, x^2}	
+	"vld1.32 		{d4, d5}, [%1]			\n\t"	//d4 = {p7, p3}, d5 = {p5, p1}
+	"vmul.f32 		d3, d2, d2				\n\t"	//d3 = d2*d2 = {x^4, x^4}		
+	"vmul.f32 		q0, q2, d1[0]			\n\t"	//q0 = q2 * d1[0] = {p7x, p3x, p5x, p1x}
+	"vmla.f32 		d1, d0, d2[0]			\n\t"	//d1 = d1 + d0*d2 = {p5x + p7x^3, p1x + p3x^3}		
+	"vmla.f32 		d1, d3, d1[0]			\n\t"	//d1 = d1 + d3*d0 = {..., p1x + p3x^3 + p5x^5 + p7x^7}		
+	
+	//cosine
+	"vmov.f32 		s1, #1.0				\n\t"	//d0[1] = 1.0
+	"vmls.f32 		d0, d1, d1				\n\t"	//d0 = {..., 1.0 - sx*sx}
+	
+	//invsqrt approx
+	"vmov.f32 		d2, d0					\n\t"	//d2 = d0
+	"vrsqrte.f32 	d0, d0					\n\t"	//d0 = ~ 1.0 / sqrt(d0)
+	"vmul.f32 		d3, d0, d2				\n\t"	//d3 = d0 * d2
+	"vrsqrts.f32 	d4, d3, d0				\n\t"	//d4 = (3 - d0 * d3) / 2 	
+	"vmul.f32 		d0, d0, d4				\n\t"	//d0 = d0 * d4	
+	"vmul.f32 		d3, d0, d2				\n\t"	//d3 = d0 * d2	
+	"vrsqrts.f32 	d4, d3, d0				\n\t"	//d4 = (3 - d0 * d3) / 2	
+	"vmul.f32 		d0, d0, d4				\n\t"	//d0 = d0 * d4	
+	
+	"vmul.f32 		d0, d0, d1				\n\t"	//d0 = d0 * d1
+	
+	"vmov.f32 		s0, s1					\n\t"	//s0 = s1
+	
+	:: "r"(__tanf_rng), "r"(__tanf_lut) 
+    : "d0", "d1", "d2", "d3", "d4", "d5"
+	);
+#endif
+}
+
+
+float tanf_neon_sfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vdup.f32 d0, r0 		\n\t");
+	tanf_neon_hfp(x);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return tanf_c(x);
+#endif
+};
+
diff --git a/source/math_tanhf.c b/source/math_tanhf.c
new file mode 100644
index 0000000000..219655be4d
--- /dev/null
+++ b/source/math_tanhf.c
@@ -0,0 +1,95 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "math.h"
+#include "math_neon.h"
+ 
+/* 
+TanH = (e^x - e^-x) / (e^x + e^-x)
+TanH = (e^x - e^-x)(e^x) / (e^x + e^-x)(e^x)
+TanH = (e^2x - 1) / (e^2x + 1)
+
+*/
+ 
+float tanhf_c(float x)
+{
+	float a, b, c;
+	int m;
+	union{
+		float 	f;
+		int 	i;
+	} xx;
+	
+	x = 2.0f * x;
+	a = expf_c(x);
+	c = a + 1.0f;
+		
+	//reciporical approx.
+	xx.f = c;
+	m = 0x3F800000 - (xx.i & 0x7F800000);
+	xx.i = xx.i + m;
+	xx.f = 1.41176471f - 0.47058824f * xx.f;
+	xx.i = xx.i + m;
+	b = 2.0 - xx.f * c;
+	xx.f = xx.f * b;	
+	b = 2.0 - xx.f * c;
+	xx.f = xx.f * b;
+	c = a - 1.0;
+	xx.f *= c;
+	return xx.f;
+}
+
+
+float tanhf_neon_hfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vadd.f32 d0, d0, d0 		\n\t");
+	expf_neon_hfp(x);
+	asm volatile (
+	"vmov.f32 		d2, #1.0 				\n\t"
+	"vsub.f32 		d3, d0, d2 				\n\t"
+	"vadd.f32 		d0, d0, d2 				\n\t"
+
+	"vrecpe.f32		d1, d0					\n\t"	//d1 = ~ 1 / d0; 
+	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
+	"vmul.f32		d1, d1, d2				\n\t"	//d1 = d1 * d2; 
+	"vrecps.f32		d2, d1, d0				\n\t"	//d2 = 2.0 - d1 * d0; 
+	"vmul.f32		d0, d1, d2				\n\t"	//d0 = d1 * d2; 
+	"vmul.f32		d0, d0, d3				\n\t"	//d0 = d0 * d3; 	
+	::: "d0", "d1", "d2", "d3"
+	);	
+#endif
+}
+
+float tanhf_neon_sfp(float x)
+{
+#ifdef __MATH_NEON
+	asm volatile ("vmov.f32 s0, r0 		\n\t");
+	tanhf_neon_hfp(x);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return tanhf_c(x);
+#endif
+};
+
diff --git a/source/math_vec2.c b/source/math_vec2.c
new file mode 100644
index 0000000000..d970c37676
--- /dev/null
+++ b/source/math_vec2.c
@@ -0,0 +1,118 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+
+#include "math_neon.h"
+
+//vec2 scalar product
+float 
+dot2_c(float v0[2], float v1[2])
+{
+	float r;
+	r = v0[0]*v1[0];
+	r += v0[1]*v1[1];
+	return r;
+}
+
+void 
+normalize2_c(float v[2], float d[2])
+{
+	float b, c, x;
+	union {
+		float 	f;
+		int 	i;
+	} a;
+	
+	x = v[0]*v[0];
+	x += v[1]*v[1];
+
+	//fast invsqrt approx
+	a.f = x;
+	a.i = 0x5F3759DF - (a.i >> 1);		//VRSQRTE
+	c = x * a.f;
+	b = (3.0f - c * a.f) * 0.5;		//VRSQRTS
+	a.f = a.f * b;		
+	c = x * a.f;
+	b = (3.0f - c * a.f) * 0.5;
+    a.f = a.f * b;	
+
+	d[0] = v[0]*a.f;
+	d[1] = v[1]*a.f;
+}
+
+float 
+dot2_neon_hfp(float v0[2], float v1[2])
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"vld1.32 		{d2}, [%0]			\n\t"	//d2={x0,y0}
+	"vld1.32 		{d4}, [%1]			\n\t"	//d4={x1,y1}
+	"vmul.f32 		d0, d2, d4			\n\t"	//d0 = d2*d4
+	"vpadd.f32 		d0, d0, d0			\n\t"	//d0 = d[0] + d[1]
+	:: "r"(v0), "r"(v1) 
+    : 
+	);	
+#endif
+}
+
+float 
+dot2_neon_sfp(float v0[2], float v1[2])
+{
+#ifdef __MATH_NEON
+	dot2_neon_hfp(v0, v1);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return dot2_c(v0, v1);
+#endif
+};
+
+void 
+normalize2_neon(float v[2], float d[2])
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"vld1.32 		d4, [%0]				\n\t"	//d4 = {x0,y0}
+	"vmul.f32 		d0, d4, d4				\n\t"	//d0 = d2*d2
+	"vpadd.f32 		d0, d0					\n\t"	//d0 = d[0] + d[1]
+	
+	"vmov.f32 		d1, d0					\n\t"	//d1 = d0
+	"vrsqrte.f32 	d0, d0					\n\t"	//d0 = ~ 1.0 / sqrt(d0)
+	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1
+	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d3 = (3 - d0 * d2) / 2 	
+	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d3
+	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1	
+	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d3 = (3 - d0 * d2) / 2	
+	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d3	
+
+	"vmul.f32 		d4, d4, d0[0]			\n\t"	//d4 = d4*d0[0]
+	"vst1.32 		d4, [%1]				\n\t"	//
+	
+	:: "r"(v), "r"(d) 
+    : "d0", "d1", "d2", "d3", "d4", "memory"
+	);	
+#else
+	normalize2_c(v, d);
+#endif
+}
+
diff --git a/source/math_vec3.c b/source/math_vec3.c
new file mode 100644
index 0000000000..998ff2e4d5
--- /dev/null
+++ b/source/math_vec3.c
@@ -0,0 +1,172 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "math_neon.h"
+
+//vec4 scalar product
+float 
+dot3_c(float v0[3], float v1[3])
+{
+	float r;
+	r = v0[0]*v1[0];
+	r += v0[1]*v1[1];
+	r += v0[2]*v1[2]; 
+	return r;
+}
+
+void
+cross3_c(float v0[3], float v1[3], float d[3])
+{
+	d[0] = v0[1]*v1[2] - v0[2]*v1[1];
+	d[1] = v0[2]*v1[0] - v0[0]*v1[2];
+	d[2] = v0[0]*v1[1] - v0[1]*v1[0];
+}
+
+void 
+normalize3_c(float v[3], float d[3])
+{
+	float b, c, x;
+	union {
+		float 	f;
+		int 	i;
+	} a;
+	
+	x = v[0]*v[0];
+	x += v[1]*v[1];
+	x += v[2]*v[2];
+
+	//fast invsqrt approx
+	a.f = x;
+	a.i = 0x5F3759DF - (a.i >> 1);		//VRSQRTE
+	c = x * a.f;
+	b = (3.0f - c * a.f) * 0.5;		//VRSQRTS
+	a.f = a.f * b;		
+	c = x * a.f;
+	b = (3.0f - c * a.f) * 0.5;
+    a.f = a.f * b;	
+
+	d[0] = v[0]*a.f;
+	d[1] = v[1]*a.f;
+	d[2] = v[2]*a.f;
+}
+
+
+float 
+dot3_neon_hfp(float v0[3], float v1[3])
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"vld1.32 		{d2}, [%0]			\n\t"	//d2={x0,y0}
+	"flds 			s6, [%0, #8]		\n\t"	//d3[0]={z0}
+	"vld1.32 		{d4}, [%1]			\n\t"	//d4={x1,y1}
+	"flds 			s10, [%1, #8]	\n\t"	//d5[0]={z1}
+
+	"vmul.f32 		d0, d2, d4			\n\t"	//d0= d2*d4
+	"vpadd.f32 		d0, d0, d0			\n\t"	//d0 = d[0] + d[1]
+	"vmla.f32 		d0, d3, d5			\n\t"	//d0 = d0 + d3*d5 
+	:: "r"(v0), "r"(v1) 
+    : "d0","d1","d2","d3","d4","d5"
+	);	
+#endif
+}
+
+float 
+dot3_neon_sfp(float v0[3], float v1[3])
+{
+#ifdef __MATH_NEON
+	dot3_neon_hfp(v0, v1);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return dot3_c(v0, v1);
+#endif
+};
+
+
+void cross3_neon(float v0[3], float v1[3], float d[3])
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"flds 			s3, [%0]			\n\t"	//d1[1]={x0}
+	"add 			%0, %0, #4			\n\t"	//
+	"vld1.32 		{d0}, [%0]			\n\t"	//d0={y0,z0}
+	"vmov.f32 		s2, s1		 		\n\t"	//d1[0]={z0}
+
+	"flds 			s5, [%1]			\n\t"	//d2[1]={x1}
+	"add 			%1, %1, #4			\n\t"	//
+	"vld1.32 		{d3}, [%1]			\n\t"	//d3={y1,z1}
+	"vmov.f32 		s4, s7				\n\t"	//d2[0]=d3[1]
+	
+	"vmul.f32 		d4, d0, d2			\n\t"	//d4=d0*d2
+	"vmls.f32 		d4, d1, d3			\n\t"	//d4-=d1*d3
+	
+	"vmul.f32 		d5, d3, d1[1]		\n\t"	//d5=d3*d1[1]
+	"vmls.f32 		d5, d0, d2[1]		\n\t"	//d5-=d0*d2[1]
+	
+	"vst1.32 		d4, [%2]			\n\t"	//
+	"add 			%2, %2, #8			\n\t"	//
+	"fsts 			s10, [%2]			\n\t"	//
+	
+	: "+r"(v0), "+r"(v1), "+r"(d):
+    : "d0", "d1", "d2", "d3", "d4", "d5", "memory"
+	);	
+#else
+	cross3_c(v0,v1,d);
+#endif
+}
+
+void 
+normalize3_neon(float v[3], float d[3])
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"vld1.32 		{d4}, [%0]				\n\t"	//d4={x0,y0}
+	"flds 			s10, [%0, #8]			\n\t"	//d5[0]={z0}
+
+	"vmul.f32 		d0, d4, d4				\n\t"	//d0= d4*d4
+	"vpadd.f32 		d0, d0					\n\t"	//d0 = d[0] + d[1]
+	"vmla.f32 		d0, d5, d5				\n\t"	//d0 = d0 + d5*d5 
+	
+	"vmov.f32 		d1, d0					\n\t"	//d1 = d0
+	"vrsqrte.f32 	d0, d0					\n\t"	//d0 = ~ 1.0 / sqrt(d0)
+	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1
+	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d3 = (3 - d0 * d2) / 2 	
+	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d3
+	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1	
+	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d4 = (3 - d0 * d3) / 2	
+	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d4	
+
+	"vmul.f32 		q2, q2, d0[0]			\n\t"	//d0= d2*d4
+	"vst1.32 		{d4}, [%1]				\n\t"	//
+	"fsts 			s10, [%1, #8]			\n\t"	//
+	
+	:: "r"(v), "r"(d) 
+    : "d0", "d1", "d2", "d3", "d4", "d5", "memory"
+	);	
+#else
+	normalize3_c(v, d);
+#endif
+
+}
+
+
diff --git a/source/math_vec4.c b/source/math_vec4.c
new file mode 100644
index 0000000000..483fc57190
--- /dev/null
+++ b/source/math_vec4.c
@@ -0,0 +1,126 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "math_neon.h"
+
+
+#ifdef __MATH_NEON
+#include "arm_neon.h" 
+#endif
+
+//vec4 scalar product
+float dot4_c(float v0[4], float v1[4])
+{
+	float r;
+	r = v0[0]*v1[0];
+	r += v0[1]*v1[1];
+	r += v0[2]*v1[2]; 
+	r += v0[3]*v1[3];
+	return r;
+}
+
+void normalize4_c(float v[4], float d[4])
+{
+	float b, c, x;
+	union {
+		float 	f;
+		int 	i;
+	} a;
+	
+	x = v[0]*v[0];
+	x += v[1]*v[1];
+	x += v[2]*v[2];
+	x += v[3]*v[3];
+
+	//fast invsqrt approx
+	a.f = x;
+	a.i = 0x5F3759DF - (a.i >> 1);		//VRSQRTE
+	c = x * a.f;
+	b = (3.0f - c * a.f) * 0.5;		//VRSQRTS
+	a.f = a.f * b;		
+	c = x * a.f;
+	b = (3.0f - c * a.f) * 0.5;
+    a.f = a.f * b;	
+
+	d[0] = v[0]*a.f;
+	d[1] = v[1]*a.f;
+	d[2] = v[2]*a.f;
+	d[3] = v[3]*a.f;
+}
+
+void normalize4_neon(float v[4], float d[4])
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"vld1.32 		{d4, d5}, [%0]			\n\t"	//d2={x0,y0}, d3={z0, w0}
+	"vmul.f32 		d0, d4, d4				\n\t"	//d0= d4*d4
+	"vmla.f32 		d0, d5, d5				\n\t"	//d0 = d0 + d5*d5 
+	"vpadd.f32 		d0, d0					\n\t"	//d0 = d[0] + d[1]
+	
+	"vmov.f32 		d1, d0					\n\t"	//d1 = d0
+	"vrsqrte.f32 	d0, d0					\n\t"	//d0 = ~ 1.0 / sqrt(d0)
+	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1
+	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d3 = (3 - d0 * d2) / 2 	
+	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d3
+	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1	
+	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d4 = (3 - d0 * d3) / 2	
+	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d4	
+
+	"vmul.f32 		q2, q2, d0[0]			\n\t"	//d0= d2*d4
+	"vst1.32 		{d4, d5}, [%1]			\n\t"	//d2={x0,y0}, d3={z0, w0}
+	
+	:: "r"(v), "r"(d) 
+    : "d0", "d1", "d2", "d3", "d4", "d5", "memory"
+	);	
+#else
+	normalize4_c(v, d);
+#endif
+
+}
+
+
+float dot4_neon_hfp(float v0[4], float v1[4])
+{
+#ifdef __MATH_NEON
+	asm volatile (
+	"vld1.32 		{d2, d3}, [%0]			\n\t"	//d2={x0,y0}, d3={z0, w0}
+	"vld1.32 		{d4, d5}, [%1]			\n\t"	//d4={x1,y1}, d5={z1, w1}
+	"vmul.f32 		d0, d2, d4				\n\t"	//d0= d2*d4
+	"vmla.f32 		d0, d3, d5				\n\t"	//d0 = d0 + d3*d5 
+	"vpadd.f32 		d0, d0					\n\t"	//d0 = d[0] + d[1]
+	:: "r"(v0), "r"(v1) : 
+	);	
+#endif
+}
+
+float dot4_neon_sfp(float v0[4], float v1[4])
+{
+#ifdef __MATH_NEON
+	dot4_neon_hfp(v0, v1);
+	asm volatile ("vmov.f32 r0, s0 		\n\t");
+#else
+	return dot4_c(v0, v1);
+#endif
+};
+