diff --git a/Makefile.common b/Makefile.common
index b8b2c27784..fe1b5eb13f 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -2261,6 +2261,7 @@ ifeq ($(HAVE_STATIC_VIDEO_FILTERS), 1)
           gfx/video_filters/normal2x.o \
           gfx/video_filters/normal2x_width.o \
           gfx/video_filters/normal2x_height.o \
+          gfx/video_filters/normal4x.o \
           gfx/video_filters/scanline2x.o \
           gfx/video_filters/grid2x.o \
           gfx/video_filters/grid3x.o \
diff --git a/gfx/video_filter.c b/gfx/video_filter.c
index 4c92a127f7..0e45185895 100644
--- a/gfx/video_filter.c
+++ b/gfx/video_filter.c
@@ -282,6 +282,7 @@ extern const struct softfilter_implementation *scale2x_get_implementation(softfi
 extern const struct softfilter_implementation *normal2x_get_implementation(softfilter_simd_mask_t simd);
 extern const struct softfilter_implementation *normal2x_width_get_implementation(softfilter_simd_mask_t simd);
 extern const struct softfilter_implementation *normal2x_height_get_implementation(softfilter_simd_mask_t simd);
+extern const struct softfilter_implementation *normal4x_get_implementation(softfilter_simd_mask_t simd);
 extern const struct softfilter_implementation *scanline2x_get_implementation(softfilter_simd_mask_t simd);
 extern const struct softfilter_implementation *grid2x_get_implementation(softfilter_simd_mask_t simd);
 extern const struct softfilter_implementation *grid3x_get_implementation(softfilter_simd_mask_t simd);
@@ -304,6 +305,7 @@ static const softfilter_get_implementation_t soft_plugs_builtin[] = {
+   normal4x_get_implementation,
diff --git a/gfx/video_filters/Makefile b/gfx/video_filters/Makefile
index 64fda8c7e5..9b3f246b78 100644
--- a/gfx/video_filters/Makefile
+++ b/gfx/video_filters/Makefile
@@ -70,7 +70,7 @@ endif
 objects += blargg_ntsc_snes.$(DYLIB) phosphor2x.$(DYLIB) epx.$(DYLIB) lq2x.$(DYLIB) \
 	   2xsai.$(DYLIB) super2xsai.$(DYLIB) supereagle.$(DYLIB) 2xbr.$(DYLIB) \
 	   darken.$(DYLIB) scale2x.$(DYLIB) normal2x.$(DYLIB) \
-	   normal2x_width.$(DYLIB) normal2x_height.$(DYLIB) \
+	   normal2x_width.$(DYLIB) normal2x_height.$(DYLIB) normal4x.$(DYLIB) \
 	   scanline2x.$(DYLIB) grid2x.$(DYLIB) grid3x.$(DYLIB) \
 	   gameboy3x.$(DYLIB) gameboy4x.$(DYLIB) \
 	   dot_matrix_3x.$(DYLIB) dot_matrix_4x.$(DYLIB)
diff --git a/gfx/video_filters/Normal4x.filt b/gfx/video_filters/Normal4x.filt
new file mode 100644
index 0000000000..761508de55
--- /dev/null
+++ b/gfx/video_filters/Normal4x.filt
@@ -0,0 +1 @@
+filter = normal4x
diff --git a/gfx/video_filters/normal4x.c b/gfx/video_filters/normal4x.c
new file mode 100644
index 0000000000..63750e5ebc
--- /dev/null
+++ b/gfx/video_filters/normal4x.c
@@ -0,0 +1,255 @@
+/*  RetroArch - A frontend for libretro.
+ *  Copyright (C) 2010-2014 - Hans-Kristian Arntzen
+ *  Copyright (C) 2011-2018 - Daniel De Matteis
+ *
+ *  RetroArch is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with RetroArch.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+/* Compile: gcc -o normal4x.so -shared normal4x.c -std=c99 -O3 -Wall -pedantic -fPIC */
+#include "softfilter.h"
+#include <stdlib.h>
+#include <string.h>
+#define softfilter_get_implementation normal4x_get_implementation
+#define softfilter_thread_data normal4x_softfilter_thread_data
+#define filter_data normal4x_filter_data
+struct softfilter_thread_data
+   void *out_data;
+   const void *in_data;
+   size_t out_pitch;
+   size_t in_pitch;
+   unsigned colfmt;
+   unsigned width;
+   unsigned height;
+   int first;
+   int last;
+struct filter_data
+   unsigned threads;
+   struct softfilter_thread_data *workers;
+   unsigned in_fmt;
+static unsigned normal4x_generic_input_fmts(void)
+static unsigned normal4x_generic_output_fmts(unsigned input_fmts)
+   return input_fmts;
+static unsigned normal4x_generic_threads(void *data)
+   struct filter_data *filt = (struct filter_data*)data;
+   return filt->threads;
+static void *normal4x_generic_create(const struct softfilter_config *config,
+      unsigned in_fmt, unsigned out_fmt,
+      unsigned max_width, unsigned max_height,
+      unsigned threads, softfilter_simd_mask_t simd, void *userdata)
+   struct filter_data *filt = (struct filter_data*)calloc(1, sizeof(*filt));
+   (void)simd;
+   (void)config;
+   (void)userdata;
+   if (!filt) {
+      return NULL;
+   }
+   /* Apparently the code is not thread-safe,
+    * so force single threaded operation... */
+   filt->workers = (struct softfilter_thread_data*)calloc(1, sizeof(struct softfilter_thread_data));
+   filt->threads = 1;
+   filt->in_fmt  = in_fmt;
+   if (!filt->workers) {
+      free(filt);
+      return NULL;
+   }
+   return filt;
+static void normal4x_generic_output(void *data,
+      unsigned *out_width, unsigned *out_height,
+      unsigned width, unsigned height)
+   *out_width = width << 2;
+   *out_height = height << 2;
+static void normal4x_generic_destroy(void *data)
+   struct filter_data *filt = (struct filter_data*)data;
+   if (!filt) {
+      return;
+   }
+   free(filt->workers);
+   free(filt);
+static void normal4x_work_cb_xrgb8888(void *data, void *thread_data)
+   struct softfilter_thread_data *thr = (struct softfilter_thread_data*)thread_data;
+   const uint32_t *input              = (const uint32_t*)thr->in_data;
+   uint32_t *output                   = (uint32_t*)thr->out_data;
+   uint32_t in_stride                 = (uint32_t)(thr->in_pitch >> 2);
+   uint32_t out_stride                = (uint32_t)(thr->out_pitch >> 2);
+   uint32_t x, y;
+   for (y = 0; y < thr->height; ++y)
+   {
+      uint32_t *out_ptr = output;
+      for (x = 0; x < thr->width; ++x)
+      {
+         uint32_t *out_line_ptr = out_ptr;
+         uint32_t color         = *(input + x);
+         uint32_t row_color[4];
+         row_color[0] = color;
+         row_color[1] = color;
+         row_color[2] = color;
+         row_color[3] = color;
+         /* Row 1 */
+         memcpy(out_line_ptr, row_color, sizeof(row_color));
+         out_line_ptr += out_stride;
+         /* Row 2 */
+         memcpy(out_line_ptr, row_color, sizeof(row_color));
+         out_line_ptr += out_stride;
+         /* Row 3 */
+         memcpy(out_line_ptr, row_color, sizeof(row_color));
+         out_line_ptr += out_stride;
+         /* Row 4 */
+         memcpy(out_line_ptr, row_color, sizeof(row_color));
+         out_ptr += 4;
+      }
+      input  += in_stride;
+      output += out_stride << 2;
+   }
+static void normal4x_work_cb_rgb565(void *data, void *thread_data)
+   struct softfilter_thread_data *thr = (struct softfilter_thread_data*)thread_data;
+   const uint16_t *input              = (const uint16_t*)thr->in_data;
+   uint16_t *output                   = (uint16_t*)thr->out_data;
+   uint16_t in_stride                 = (uint16_t)(thr->in_pitch >> 1);
+   uint16_t out_stride                = (uint16_t)(thr->out_pitch >> 1);
+   uint16_t x, y;
+   for (y = 0; y < thr->height; ++y)
+   {
+      uint16_t *out_ptr = output;
+      for (x = 0; x < thr->width; ++x)
+      {
+         uint16_t *out_line_ptr = out_ptr;
+         uint16_t color         = *(input + x);
+         uint16_t row_color[4];
+         row_color[0] = color;
+         row_color[1] = color;
+         row_color[2] = color;
+         row_color[3] = color;
+         /* Row 1 */
+         memcpy(out_line_ptr, row_color, sizeof(row_color));
+         out_line_ptr += out_stride;
+         /* Row 2 */
+         memcpy(out_line_ptr, row_color, sizeof(row_color));
+         out_line_ptr += out_stride;
+         /* Row 3 */
+         memcpy(out_line_ptr, row_color, sizeof(row_color));
+         out_line_ptr += out_stride;
+         /* Row 4 */
+         memcpy(out_line_ptr, row_color, sizeof(row_color));
+         out_ptr += 4;
+      }
+      input  += in_stride;
+      output += out_stride << 2;
+   }
+static void normal4x_generic_packets(void *data,
+      struct softfilter_work_packet *packets,
+      void *output, size_t output_stride,
+      const void *input, unsigned width, unsigned height, size_t input_stride)
+   /* We are guaranteed single threaded operation
+    * (filt->threads = 1) so we don't need to loop
+    * over threads and can cull some code. This only
+    * makes the tiniest performance difference, but
+    * every little helps when running on an o3DS... */
+   struct filter_data *filt = (struct filter_data*)data;
+   struct softfilter_thread_data *thr = (struct softfilter_thread_data*)&filt->workers[0];
+   thr->out_data = (uint8_t*)output;
+   thr->in_data = (const uint8_t*)input;
+   thr->out_pitch = output_stride;
+   thr->in_pitch = input_stride;
+   thr->width = width;
+   thr->height = height;
+   if (filt->in_fmt == SOFTFILTER_FMT_XRGB8888) {
+      packets[0].work = normal4x_work_cb_xrgb8888;
+   } else if (filt->in_fmt == SOFTFILTER_FMT_RGB565) {
+      packets[0].work = normal4x_work_cb_rgb565;
+   }
+   packets[0].thread_data = thr;
+static const struct softfilter_implementation normal4x_generic = {
+   normal4x_generic_input_fmts,
+   normal4x_generic_output_fmts,
+   normal4x_generic_create,
+   normal4x_generic_destroy,
+   normal4x_generic_threads,
+   normal4x_generic_output,
+   normal4x_generic_packets,
+   "Normal4x",
+   "normal4x",
+const struct softfilter_implementation *softfilter_get_implementation(
+      softfilter_simd_mask_t simd)
+   (void)simd;
+   return &normal4x_generic;
+#undef softfilter_get_implementation
+#undef softfilter_thread_data
+#undef filter_data
diff --git a/griffin/griffin.c b/griffin/griffin.c
index bc1828d53b..c2369c79ac 100644
--- a/griffin/griffin.c
+++ b/griffin/griffin.c
@@ -1003,6 +1003,7 @@ FILTERS
 #include "../gfx/video_filters/normal2x.c"
 #include "../gfx/video_filters/normal2x_width.c"
 #include "../gfx/video_filters/normal2x_height.c"
+#include "../gfx/video_filters/normal4x.c"
 #include "../gfx/video_filters/scanline2x.c"
 #include "../gfx/video_filters/grid2x.c"
 #include "../gfx/video_filters/grid3x.c"