From 3d4ddebcaf8ca75e1bf11e6bb99d72616f0085ce Mon Sep 17 00:00:00 2001
From: jdgleaver <james@leaver.myzen.co.uk>
Date: Thu, 24 Sep 2020 16:09:21 +0100
Subject: [PATCH] Add several LCD-effect video filters

---
 Makefile.common                               |   7 +-
 gfx/video_filter.c                            |  11 +
 gfx/video_filters/Dot_Matrix_3x.filt          |   4 +
 gfx/video_filters/Dot_Matrix_4x.filt          |   4 +
 gfx/video_filters/Gameboy3x_DMG.filt          |  11 +
 gfx/video_filters/Gameboy3x_Greenscale.filt   |  11 +
 gfx/video_filters/Gameboy3x_Light.filt        |  11 +
 gfx/video_filters/Gameboy3x_Pocket.filt       |  11 +
 gfx/video_filters/Gameboy3x_TI-83.filt        |  11 +
 gfx/video_filters/Gameboy4x_DMG.filt          |  11 +
 gfx/video_filters/Gameboy4x_Greenscale.filt   |  11 +
 gfx/video_filters/Gameboy4x_Light.filt        |  11 +
 gfx/video_filters/Gameboy4x_Pocket.filt       |  11 +
 gfx/video_filters/Gameboy4x_TI-83.filt        |  11 +
 gfx/video_filters/Grid2x.filt                 |   1 +
 gfx/video_filters/Makefile                    |   6 +-
 gfx/video_filters/dot_matrix_3x.c             | 254 +++++++++++++
 gfx/video_filters/dot_matrix_4x.c             | 271 ++++++++++++++
 gfx/video_filters/gameboy3x.c                 | 310 ++++++++++++++++
 gfx/video_filters/gameboy4x.c                 | 339 ++++++++++++++++++
 gfx/video_filters/grid2x.c                    | 250 +++++++++++++
 gfx/video_filters/normal2x.c                  |  30 +-
 gfx/video_filters/scanline2x.c                |  50 ++-
 gfx/video_filters/softfilter.h                |   4 +
 griffin/griffin.c                             |   5 +
 libretro-common/file/config_file_userdata.c   |  18 +
 .../include/file/config_file_userdata.h       |   3 +
 27 files changed, 1637 insertions(+), 40 deletions(-)
 create mode 100644 gfx/video_filters/Dot_Matrix_3x.filt
 create mode 100644 gfx/video_filters/Dot_Matrix_4x.filt
 create mode 100644 gfx/video_filters/Gameboy3x_DMG.filt
 create mode 100644 gfx/video_filters/Gameboy3x_Greenscale.filt
 create mode 100644 gfx/video_filters/Gameboy3x_Light.filt
 create mode 100644 gfx/video_filters/Gameboy3x_Pocket.filt
 create mode 100644 gfx/video_filters/Gameboy3x_TI-83.filt
 create mode 100644 gfx/video_filters/Gameboy4x_DMG.filt
 create mode 100644 gfx/video_filters/Gameboy4x_Greenscale.filt
 create mode 100644 gfx/video_filters/Gameboy4x_Light.filt
 create mode 100644 gfx/video_filters/Gameboy4x_Pocket.filt
 create mode 100644 gfx/video_filters/Gameboy4x_TI-83.filt
 create mode 100644 gfx/video_filters/Grid2x.filt
 create mode 100644 gfx/video_filters/dot_matrix_3x.c
 create mode 100644 gfx/video_filters/dot_matrix_4x.c
 create mode 100644 gfx/video_filters/gameboy3x.c
 create mode 100644 gfx/video_filters/gameboy4x.c
 create mode 100644 gfx/video_filters/grid2x.c

diff --git a/Makefile.common b/Makefile.common
index 3bd4b4b246..6d8b98e526 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -2253,7 +2253,12 @@ ifeq ($(HAVE_STATIC_VIDEO_FILTERS), 1)
           gfx/video_filters/lq2x.o \
           gfx/video_filters/phosphor2x.o \
           gfx/video_filters/normal2x.o \
-          gfx/video_filters/scanline2x.o
+          gfx/video_filters/scanline2x.o \
+          gfx/video_filters/grid2x.o \
+          gfx/video_filters/gameboy3x.o \
+          gfx/video_filters/gameboy4x.o \
+          gfx/video_filters/dot_matrix_3x.o \
+          gfx/video_filters/dot_matrix_4x.o
 endif
 
 ifeq ($(WANT_IOSUHAX), 1)
diff --git a/gfx/video_filter.c b/gfx/video_filter.c
index 1c658f2a37..e775ba2e2f 100644
--- a/gfx/video_filter.c
+++ b/gfx/video_filter.c
@@ -122,6 +122,7 @@ softfilter_find_implementation(rarch_softfilter_t *filt, const char *ident)
 static const struct softfilter_config softfilter_config = {
    config_userdata_get_float,
    config_userdata_get_int,
+   config_userdata_get_hex,
    config_userdata_get_float_array,
    config_userdata_get_int_array,
    config_userdata_get_string,
@@ -339,6 +340,11 @@ extern const struct softfilter_implementation *darken_get_implementation(softfil
 extern const struct softfilter_implementation *scale2x_get_implementation(softfilter_simd_mask_t simd);
 extern const struct softfilter_implementation *normal2x_get_implementation(softfilter_simd_mask_t simd);
 extern const struct softfilter_implementation *scanline2x_get_implementation(softfilter_simd_mask_t simd);
+extern const struct softfilter_implementation *grid2x_get_implementation(softfilter_simd_mask_t simd);
+extern const struct softfilter_implementation *gameboy3x_get_implementation(softfilter_simd_mask_t simd);
+extern const struct softfilter_implementation *gameboy4x_get_implementation(softfilter_simd_mask_t simd);
+extern const struct softfilter_implementation *dot_matrix_3x_get_implementation(softfilter_simd_mask_t simd);
+extern const struct softfilter_implementation *dot_matrix_4x_get_implementation(softfilter_simd_mask_t simd);
 
 static const softfilter_get_implementation_t soft_plugs_builtin[] = {
    blargg_ntsc_snes_get_implementation,
@@ -353,6 +359,11 @@ static const softfilter_get_implementation_t soft_plugs_builtin[] = {
    scale2x_get_implementation,
    normal2x_get_implementation,
    scanline2x_get_implementation,
+   grid2x_get_implementation,
+   gameboy3x_get_implementation,
+   gameboy4x_get_implementation,
+   dot_matrix_3x_get_implementation,
+   dot_matrix_4x_get_implementation,
 };
 
 static bool append_softfilter_plugs(rarch_softfilter_t *filt,
diff --git a/gfx/video_filters/Dot_Matrix_3x.filt b/gfx/video_filters/Dot_Matrix_3x.filt
new file mode 100644
index 0000000000..ec00e08153
--- /dev/null
+++ b/gfx/video_filters/Dot_Matrix_3x.filt
@@ -0,0 +1,4 @@
+filter = dot_matrix_3x
+
+# Colour format: 0xRRGGBB
+dot_matrix_3x_grid_color = 0xECF0EC
diff --git a/gfx/video_filters/Dot_Matrix_4x.filt b/gfx/video_filters/Dot_Matrix_4x.filt
new file mode 100644
index 0000000000..74df77e8e5
--- /dev/null
+++ b/gfx/video_filters/Dot_Matrix_4x.filt
@@ -0,0 +1,4 @@
+filter = dot_matrix_4x
+
+# Colour format: 0xRRGGBB
+dot_matrix_4x_grid_color = 0xECF0EC
diff --git a/gfx/video_filters/Gameboy3x_DMG.filt b/gfx/video_filters/Gameboy3x_DMG.filt
new file mode 100644
index 0000000000..2b859a6c8b
--- /dev/null
+++ b/gfx/video_filters/Gameboy3x_DMG.filt
@@ -0,0 +1,11 @@
+filter = gameboy3x
+
+# Colour format: 0xRRGGBB
+# 0 is darkest, 3 is lightest
+# Only works with 4 shade greyscale content
+
+gameboy3x_palette_0    = 0x00420C
+gameboy3x_palette_1    = 0x005121
+gameboy3x_palette_2    = 0x317400
+gameboy3x_palette_3    = 0x578200
+gameboy3x_palette_grid = 0x5C8A00
diff --git a/gfx/video_filters/Gameboy3x_Greenscale.filt b/gfx/video_filters/Gameboy3x_Greenscale.filt
new file mode 100644
index 0000000000..6eeab1f02a
--- /dev/null
+++ b/gfx/video_filters/Gameboy3x_Greenscale.filt
@@ -0,0 +1,11 @@
+filter = gameboy3x
+
+# Colour format: 0xRRGGBB
+# 0 is darkest, 3 is lightest
+# Only works with 4 shade greyscale content
+
+gameboy3x_palette_0    = 0x0C360C
+gameboy3x_palette_1    = 0x2C6234
+gameboy3x_palette_2    = 0x6E870A
+gameboy3x_palette_3    = 0x9CBE0C
+gameboy3x_palette_grid = 0xA3C70D
diff --git a/gfx/video_filters/Gameboy3x_Light.filt b/gfx/video_filters/Gameboy3x_Light.filt
new file mode 100644
index 0000000000..d2a3d39856
--- /dev/null
+++ b/gfx/video_filters/Gameboy3x_Light.filt
@@ -0,0 +1,11 @@
+filter = gameboy3x
+
+# Colour format: 0xRRGGBB
+# 0 is darkest, 3 is lightest
+# Only works with 4 shade greyscale content
+
+gameboy3x_palette_0    = 0x00778D
+gameboy3x_palette_1    = 0x269BAD
+gameboy3x_palette_2    = 0x01B6D5
+gameboy3x_palette_3    = 0x01CBDF
+gameboy3x_palette_grid = 0x01D3E8
diff --git a/gfx/video_filters/Gameboy3x_Pocket.filt b/gfx/video_filters/Gameboy3x_Pocket.filt
new file mode 100644
index 0000000000..17e811fbd4
--- /dev/null
+++ b/gfx/video_filters/Gameboy3x_Pocket.filt
@@ -0,0 +1,11 @@
+filter = gameboy3x
+
+# Colour format: 0xRRGGBB
+# 0 is darkest, 3 is lightest
+# Only works with 4 shade greyscale content
+
+gameboy3x_palette_0    = 0x2A3325
+gameboy3x_palette_1    = 0x535f49
+gameboy3x_palette_2    = 0x86927C
+gameboy3x_palette_3    = 0xA7B19A
+gameboy3x_palette_grid = 0xADB8A0
diff --git a/gfx/video_filters/Gameboy3x_TI-83.filt b/gfx/video_filters/Gameboy3x_TI-83.filt
new file mode 100644
index 0000000000..430f2e4367
--- /dev/null
+++ b/gfx/video_filters/Gameboy3x_TI-83.filt
@@ -0,0 +1,11 @@
+filter = gameboy3x
+
+# Colour format: 0xRRGGBB
+# 0 is darkest, 3 is lightest
+# Only works with 4 shade greyscale content
+
+gameboy3x_palette_0    = 0x181810
+gameboy3x_palette_1    = 0x464A35
+gameboy3x_palette_2    = 0x727C5A
+gameboy3x_palette_3    = 0x9CA684
+gameboy3x_palette_grid = 0xA3AD8A
diff --git a/gfx/video_filters/Gameboy4x_DMG.filt b/gfx/video_filters/Gameboy4x_DMG.filt
new file mode 100644
index 0000000000..e8c813eb41
--- /dev/null
+++ b/gfx/video_filters/Gameboy4x_DMG.filt
@@ -0,0 +1,11 @@
+filter = gameboy4x
+
+# Colour format: 0xRRGGBB
+# 0 is darkest, 3 is lightest
+# Only works with 4 shade greyscale content
+
+gameboy4x_palette_0    = 0x00420C
+gameboy4x_palette_1    = 0x005121
+gameboy4x_palette_2    = 0x317400
+gameboy4x_palette_3    = 0x578200
+gameboy4x_palette_grid = 0x5C8A00
diff --git a/gfx/video_filters/Gameboy4x_Greenscale.filt b/gfx/video_filters/Gameboy4x_Greenscale.filt
new file mode 100644
index 0000000000..ded4859913
--- /dev/null
+++ b/gfx/video_filters/Gameboy4x_Greenscale.filt
@@ -0,0 +1,11 @@
+filter = gameboy4x
+
+# Colour format: 0xRRGGBB
+# 0 is darkest, 3 is lightest
+# Only works with 4 shade greyscale content
+
+gameboy4x_palette_0    = 0x0C360C
+gameboy4x_palette_1    = 0x2C6234
+gameboy4x_palette_2    = 0x6E870A
+gameboy4x_palette_3    = 0x9CBE0C
+gameboy4x_palette_grid = 0xA3C70D
diff --git a/gfx/video_filters/Gameboy4x_Light.filt b/gfx/video_filters/Gameboy4x_Light.filt
new file mode 100644
index 0000000000..15f112af88
--- /dev/null
+++ b/gfx/video_filters/Gameboy4x_Light.filt
@@ -0,0 +1,11 @@
+filter = gameboy4x
+
+# Colour format: 0xRRGGBB
+# 0 is darkest, 3 is lightest
+# Only works with 4 shade greyscale content
+
+gameboy4x_palette_0    = 0x00778D
+gameboy4x_palette_1    = 0x269BAD
+gameboy4x_palette_2    = 0x01B6D5
+gameboy4x_palette_3    = 0x01CBDF
+gameboy4x_palette_grid = 0x01D3E8
diff --git a/gfx/video_filters/Gameboy4x_Pocket.filt b/gfx/video_filters/Gameboy4x_Pocket.filt
new file mode 100644
index 0000000000..c754daecba
--- /dev/null
+++ b/gfx/video_filters/Gameboy4x_Pocket.filt
@@ -0,0 +1,11 @@
+filter = gameboy4x
+
+# Colour format: 0xRRGGBB
+# 0 is darkest, 3 is lightest
+# Only works with 4 shade greyscale content
+
+gameboy4x_palette_0    = 0x2A3325
+gameboy4x_palette_1    = 0x535f49
+gameboy4x_palette_2    = 0x86927C
+gameboy4x_palette_3    = 0xA7B19A
+gameboy4x_palette_grid = 0xADB8A0
diff --git a/gfx/video_filters/Gameboy4x_TI-83.filt b/gfx/video_filters/Gameboy4x_TI-83.filt
new file mode 100644
index 0000000000..a325b98ca5
--- /dev/null
+++ b/gfx/video_filters/Gameboy4x_TI-83.filt
@@ -0,0 +1,11 @@
+filter = gameboy4x
+
+# Colour format: 0xRRGGBB
+# 0 is darkest, 3 is lightest
+# Only works with 4 shade greyscale content
+
+gameboy4x_palette_0    = 0x181810
+gameboy4x_palette_1    = 0x464A35
+gameboy4x_palette_2    = 0x727C5A
+gameboy4x_palette_3    = 0x9CA684
+gameboy4x_palette_grid = 0xA3AD8A
diff --git a/gfx/video_filters/Grid2x.filt b/gfx/video_filters/Grid2x.filt
new file mode 100644
index 0000000000..6bd4a40b5a
--- /dev/null
+++ b/gfx/video_filters/Grid2x.filt
@@ -0,0 +1 @@
+filter = grid2x
diff --git a/gfx/video_filters/Makefile b/gfx/video_filters/Makefile
index 2ba8930c1b..239f180825 100644
--- a/gfx/video_filters/Makefile
+++ b/gfx/video_filters/Makefile
@@ -67,7 +67,11 @@ ASMFLAGS := -INEON/asm
 asflags += -mfpu=neon
 endif
 
-objects += blargg_ntsc_snes.$(DYLIB) phosphor2x.$(DYLIB) epx.$(DYLIB) lq2x.$(DYLIB) 2xsai.$(DYLIB) super2xsai.$(DYLIB) supereagle.$(DYLIB) 2xbr.$(DYLIB) darken.$(DYLIB) scale2x.$(DYLIB) normal2x.$(DYLIB) scanline2x.$(DYLIB)
+objects += blargg_ntsc_snes.$(DYLIB) phosphor2x.$(DYLIB) epx.$(DYLIB) lq2x.$(DYLIB) \
+	   2xsai.$(DYLIB) super2xsai.$(DYLIB) supereagle.$(DYLIB) 2xbr.$(DYLIB) \
+	   darken.$(DYLIB) scale2x.$(DYLIB) normal2x.$(DYLIB) scanline2x.$(DYLIB)\
+	   grid2x.$(DYLIB) gameboy3x.$(DYLIB) gameboy4x.$(DYLIB) dot_matrix_3x.$(DYLIB) \
+	   dot_matrix_4x.$(DYLIB)
 
 all: build;
 
diff --git a/gfx/video_filters/dot_matrix_3x.c b/gfx/video_filters/dot_matrix_3x.c
new file mode 100644
index 0000000000..86e1f483a6
--- /dev/null
+++ b/gfx/video_filters/dot_matrix_3x.c
@@ -0,0 +1,254 @@
+/*  RetroArch - A frontend for libretro.
+ *  Copyright (C) 2010-2014 - Hans-Kristian Arntzen
+ *  Copyright (C) 2011-2018 - Daniel De Matteis
+ *
+ *  RetroArch is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with RetroArch.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* Compile: gcc -o dot_matrix_3x.so -shared dot_matrix_3x.c -std=c99 -O3 -Wall -pedantic -fPIC */
+
+#include "softfilter.h"
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef RARCH_INTERNAL
+#define softfilter_get_implementation dot_matrix_3x_get_implementation
+#define softfilter_thread_data dot_matrix_3x_softfilter_thread_data
+#define filter_data dot_matrix_3x_filter_data
+#endif
+
+/* Default grid colour: pure white */
+#define DOT_MATRIX_3X_DEFAULT_GRID_COLOR 0xFFFFFF
+
+#define DOT_MATRIX_3X_WEIGHT_10_6(c10, c6) (((c10 << 3) + (c10 << 1) + (c6 << 2) + (c6 << 1)) >> 4)
+
+typedef struct
+{
+   uint16_t r;
+   uint16_t g;
+   uint16_t b;
+} dot_matrix_3x_grid_color_t;
+
+struct softfilter_thread_data
+{
+   void *out_data;
+   const void *in_data;
+   size_t out_pitch;
+   size_t in_pitch;
+   unsigned colfmt;
+   unsigned width;
+   unsigned height;
+   int first;
+   int last;
+};
+
+struct filter_data
+{
+   unsigned threads;
+   struct softfilter_thread_data *workers;
+   unsigned in_fmt;
+   dot_matrix_3x_grid_color_t grid_color;
+};
+
+static unsigned dot_matrix_3x_generic_input_fmts(void)
+{
+   return SOFTFILTER_FMT_RGB565;
+}
+
+static unsigned dot_matrix_3x_generic_output_fmts(unsigned input_fmts)
+{
+   return input_fmts;
+}
+
+static unsigned dot_matrix_3x_generic_threads(void *data)
+{
+   struct filter_data *filt = (struct filter_data*)data;
+   return filt->threads;
+}
+
+static void dot_matrix_3x_initialize(struct filter_data *filt,
+      const struct softfilter_config *config,
+      void *userdata)
+{
+   unsigned grid_color;
+
+   /* Read raw grid colour */
+   config->get_hex(userdata, "grid_color", &grid_color,
+         DOT_MATRIX_3X_DEFAULT_GRID_COLOR);
+
+   /* Split into 5bit RGB components */
+   filt->grid_color.r = (grid_color >> 19) & 0x1F;
+   filt->grid_color.g = (grid_color >> 11) & 0x1F;
+   filt->grid_color.b = (grid_color >>  3) & 0x1F;
+}
+
+static void *dot_matrix_3x_generic_create(const struct softfilter_config *config,
+      unsigned in_fmt, unsigned out_fmt,
+      unsigned max_width, unsigned max_height,
+      unsigned threads, softfilter_simd_mask_t simd, void *userdata)
+{
+   struct filter_data *filt = (struct filter_data*)calloc(1, sizeof(*filt));
+   (void)simd;
+
+   if (!filt)
+      return NULL;
+
+   /* Apparently the code is not thread-safe,
+    * so force single threaded operation... */
+   filt->workers = (struct softfilter_thread_data*)calloc(1, sizeof(struct softfilter_thread_data));
+   filt->threads = 1;
+   filt->in_fmt  = in_fmt;
+   if (!filt->workers)
+   {
+      free(filt);
+      return NULL;
+   }
+
+   /* Initialise colour lookup tables */
+   dot_matrix_3x_initialize(filt, config, userdata);
+
+   return filt;
+}
+
+static void dot_matrix_3x_generic_output(void *data,
+      unsigned *out_width, unsigned *out_height,
+      unsigned width, unsigned height)
+{
+   *out_width  = width  * 3;
+   *out_height = height * 3;
+}
+
+static void dot_matrix_3x_generic_destroy(void *data)
+{
+   struct filter_data *filt = (struct filter_data*)data;
+   if (!filt) {
+      return;
+   }
+   free(filt->workers);
+   free(filt);
+}
+
+static void dot_matrix_3x_work_cb_rgb565(void *data, void *thread_data)
+{
+   struct filter_data *filt           = (struct filter_data*)data;
+   struct softfilter_thread_data *thr = (struct softfilter_thread_data*)thread_data;
+   const uint16_t *input              = (const uint16_t*)thr->in_data;
+   uint16_t *output                   = (uint16_t*)thr->out_data;
+   unsigned in_stride                 = (unsigned)(thr->in_pitch >> 1);
+   unsigned out_stride                = (unsigned)(thr->out_pitch >> 1);
+   uint16_t base_grid_r                = filt->grid_color.r;
+   uint16_t base_grid_g                = filt->grid_color.g;
+   uint16_t base_grid_b                = filt->grid_color.b;
+   unsigned x, y;
+
+   for (y = 0; y < thr->height; ++y)
+   {
+      uint16_t *out_ptr = output;
+
+      for (x = 0; x < thr->width; ++x)
+      {
+         uint16_t *out_line_ptr = out_ptr;
+         uint16_t pixel_color   = *(input + x);
+         uint16_t pixel_r       = (pixel_color >> 11 & 0x1F);
+         uint16_t pixel_g       = (pixel_color >>  6 & 0x1F);
+         uint16_t pixel_b       = (pixel_color       & 0x1F);
+         /* Get grid colour
+          * > 10:6 mix of pixel_color:base_grid_color */
+         uint16_t grid_r        = DOT_MATRIX_3X_WEIGHT_10_6(pixel_r, base_grid_r);
+         uint16_t grid_g        = DOT_MATRIX_3X_WEIGHT_10_6(pixel_g, base_grid_g);
+         uint16_t grid_b        = DOT_MATRIX_3X_WEIGHT_10_6(pixel_b, base_grid_b);
+         uint16_t grid_color    = (grid_r << 11) | (grid_g << 6) | grid_b;
+
+         /* - Pixel layout (p = pixel, g = grid) -
+          * Before:  After:
+          * (p)      (g)(p)(p)
+          *          (g)(p)(p)
+          *          (g)(g)(g)
+          */
+
+         /* Row 1: (g)(p)(p) */
+         *out_line_ptr       = grid_color;
+         *(out_line_ptr + 1) = pixel_color;
+         *(out_line_ptr + 2) = pixel_color;
+         out_line_ptr       += out_stride;
+
+         /* Row 2: (g)(p)(p) */
+         *out_line_ptr       = grid_color;
+         *(out_line_ptr + 1) = pixel_color;
+         *(out_line_ptr + 2) = pixel_color;
+         out_line_ptr       += out_stride;
+
+         /* Row 3: (g)(g)(g) */
+         *out_line_ptr       = grid_color;
+         *(out_line_ptr + 1) = grid_color;
+         *(out_line_ptr + 2) = grid_color;
+
+         out_ptr += 3;
+      }
+
+      input  += in_stride;
+      output += out_stride * 3;
+   }
+}
+
+static void dot_matrix_3x_generic_packets(void *data,
+      struct softfilter_work_packet *packets,
+      void *output, size_t output_stride,
+      const void *input, unsigned width, unsigned height, size_t input_stride)
+{
+   /* We are guaranteed single threaded operation
+    * (filt->threads = 1) so we don't need to loop
+    * over threads and can cull some code */
+   struct filter_data *filt = (struct filter_data*)data;
+   struct softfilter_thread_data *thr = (struct softfilter_thread_data*)&filt->workers[0];
+
+   thr->out_data = (uint8_t*)output;
+   thr->in_data = (const uint8_t*)input;
+   thr->out_pitch = output_stride;
+   thr->in_pitch = input_stride;
+   thr->width = width;
+   thr->height = height;
+
+   if (filt->in_fmt == SOFTFILTER_FMT_RGB565)
+      packets[0].work = dot_matrix_3x_work_cb_rgb565;
+
+   packets[0].thread_data = thr;
+}
+
+static const struct softfilter_implementation dot_matrix_3x_generic = {
+   dot_matrix_3x_generic_input_fmts,
+   dot_matrix_3x_generic_output_fmts,
+
+   dot_matrix_3x_generic_create,
+   dot_matrix_3x_generic_destroy,
+
+   dot_matrix_3x_generic_threads,
+   dot_matrix_3x_generic_output,
+   dot_matrix_3x_generic_packets,
+
+   SOFTFILTER_API_VERSION,
+   "Dot Matrix 3x",
+   "dot_matrix_3x",
+};
+
+const struct softfilter_implementation *softfilter_get_implementation(
+      softfilter_simd_mask_t simd)
+{
+   (void)simd;
+   return &dot_matrix_3x_generic;
+}
+
+#ifdef RARCH_INTERNAL
+#undef softfilter_get_implementation
+#undef softfilter_thread_data
+#undef filter_data
+#endif
diff --git a/gfx/video_filters/dot_matrix_4x.c b/gfx/video_filters/dot_matrix_4x.c
new file mode 100644
index 0000000000..7805c03a2f
--- /dev/null
+++ b/gfx/video_filters/dot_matrix_4x.c
@@ -0,0 +1,271 @@
+/*  RetroArch - A frontend for libretro.
+ *  Copyright (C) 2010-2014 - Hans-Kristian Arntzen
+ *  Copyright (C) 2011-2018 - Daniel De Matteis
+ *
+ *  RetroArch is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with RetroArch.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* Compile: gcc -o dot_matrix_4x.so -shared dot_matrix_4x.c -std=c99 -O3 -Wall -pedantic -fPIC */
+
+#include "softfilter.h"
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef RARCH_INTERNAL
+#define softfilter_get_implementation dot_matrix_4x_get_implementation
+#define softfilter_thread_data dot_matrix_4x_softfilter_thread_data
+#define filter_data dot_matrix_4x_filter_data
+#endif
+
+/* Default grid colour: pure white */
+#define DOT_MATRIX_4X_DEFAULT_GRID_COLOR 0xFFFFFF
+
+#define DOT_MATRIX_3X_WEIGHT_10_6(c10, c6) (((c10 << 3) + (c10 << 1) + (c6 << 2) + (c6 << 1)) >> 4)
+
+typedef struct
+{
+   uint16_t r;
+   uint16_t g;
+   uint16_t b;
+} dot_matrix_4x_grid_color_t;
+
+struct softfilter_thread_data
+{
+   void *out_data;
+   const void *in_data;
+   size_t out_pitch;
+   size_t in_pitch;
+   unsigned colfmt;
+   unsigned width;
+   unsigned height;
+   int first;
+   int last;
+};
+
+struct filter_data
+{
+   unsigned threads;
+   struct softfilter_thread_data *workers;
+   unsigned in_fmt;
+   dot_matrix_4x_grid_color_t grid_color;
+};
+
+static unsigned dot_matrix_4x_generic_input_fmts(void)
+{
+   return SOFTFILTER_FMT_RGB565;
+}
+
+static unsigned dot_matrix_4x_generic_output_fmts(unsigned input_fmts)
+{
+   return input_fmts;
+}
+
+static unsigned dot_matrix_4x_generic_threads(void *data)
+{
+   struct filter_data *filt = (struct filter_data*)data;
+   return filt->threads;
+}
+
+static void dot_matrix_4x_initialize(struct filter_data *filt,
+      const struct softfilter_config *config,
+      void *userdata)
+{
+   unsigned grid_color;
+
+   /* Read raw grid colour */
+   config->get_hex(userdata, "grid_color", &grid_color,
+         DOT_MATRIX_4X_DEFAULT_GRID_COLOR);
+
+   /* Split into 5bit RGB components */
+   filt->grid_color.r = (grid_color >> 19) & 0x1F;
+   filt->grid_color.g = (grid_color >> 11) & 0x1F;
+   filt->grid_color.b = (grid_color >>  3) & 0x1F;
+}
+
+static void *dot_matrix_4x_generic_create(const struct softfilter_config *config,
+      unsigned in_fmt, unsigned out_fmt,
+      unsigned max_width, unsigned max_height,
+      unsigned threads, softfilter_simd_mask_t simd, void *userdata)
+{
+   struct filter_data *filt = (struct filter_data*)calloc(1, sizeof(*filt));
+   (void)simd;
+
+   if (!filt)
+      return NULL;
+
+   /* Apparently the code is not thread-safe,
+    * so force single threaded operation... */
+   filt->workers = (struct softfilter_thread_data*)calloc(1, sizeof(struct softfilter_thread_data));
+   filt->threads = 1;
+   filt->in_fmt  = in_fmt;
+   if (!filt->workers)
+   {
+      free(filt);
+      return NULL;
+   }
+
+   /* Initialise colour lookup tables */
+   dot_matrix_4x_initialize(filt, config, userdata);
+
+   return filt;
+}
+
+static void dot_matrix_4x_generic_output(void *data,
+      unsigned *out_width, unsigned *out_height,
+      unsigned width, unsigned height)
+{
+   *out_width  = width  << 2;
+   *out_height = height << 2;
+}
+
+static void dot_matrix_4x_generic_destroy(void *data)
+{
+   struct filter_data *filt = (struct filter_data*)data;
+   if (!filt) {
+      return;
+   }
+   free(filt->workers);
+   free(filt);
+}
+
+static void dot_matrix_4x_work_cb_rgb565(void *data, void *thread_data)
+{
+   struct filter_data *filt           = (struct filter_data*)data;
+   struct softfilter_thread_data *thr = (struct softfilter_thread_data*)thread_data;
+   const uint16_t *input              = (const uint16_t*)thr->in_data;
+   uint16_t *output                   = (uint16_t*)thr->out_data;
+   unsigned in_stride                 = (unsigned)(thr->in_pitch >> 1);
+   unsigned out_stride                = (unsigned)(thr->out_pitch >> 1);
+   uint16_t base_grid_r                = filt->grid_color.r;
+   uint16_t base_grid_g                = filt->grid_color.g;
+   uint16_t base_grid_b                = filt->grid_color.b;
+   unsigned x, y;
+
+   for (y = 0; y < thr->height; ++y)
+   {
+      uint16_t *out_ptr = output;
+
+      for (x = 0; x < thr->width; ++x)
+      {
+         uint16_t *out_line_ptr = out_ptr;
+         uint16_t pixel_color   = *(input + x);
+         uint16_t pixel_r       = (pixel_color >> 11 & 0x1F);
+         uint16_t pixel_g       = (pixel_color >>  6 & 0x1F);
+         uint16_t pixel_b       = (pixel_color       & 0x1F);
+         /* Get shadow colour
+          * > 10:6 mix of pixel_color:base_grid_color */
+         uint16_t shadow_r      = DOT_MATRIX_3X_WEIGHT_10_6(pixel_r, base_grid_r);
+         uint16_t shadow_g      = DOT_MATRIX_3X_WEIGHT_10_6(pixel_g, base_grid_g);
+         uint16_t shadow_b      = DOT_MATRIX_3X_WEIGHT_10_6(pixel_b, base_grid_b);
+         uint16_t shadow_color  = (shadow_r << 11) | (shadow_g << 6) | shadow_b;
+         /* Get grid colour
+          * > 50:50 mix of pixel_color:base_grid_color */
+         uint16_t grid_r        = (pixel_r + base_grid_r) >> 1;
+         uint16_t grid_g        = (pixel_g + base_grid_g) >> 1;
+         uint16_t grid_b        = (pixel_b + base_grid_b) >> 1;
+         uint16_t grid_color    = (grid_r << 11) | (grid_g << 6) | grid_b;
+
+         /* - Pixel layout (p = pixel, s = shadow, g = grid) -
+          * Before:  After:
+          * (p)      (g)(p)(p)(p)
+          *          (s)(p)(p)(p)
+          *          (s)(p)(p)(p)
+          *          (s)(s)(s)(g)
+          */
+
+         /* Row 1: (g)(p)(p)(p) */
+         *out_line_ptr       = grid_color;
+         *(out_line_ptr + 1) = pixel_color;
+         *(out_line_ptr + 2) = pixel_color;
+         *(out_line_ptr + 3) = pixel_color;
+         out_line_ptr       += out_stride;
+
+         /* Row 2: (s)(p)(p)(p) */
+         *out_line_ptr       = shadow_color;
+         *(out_line_ptr + 1) = pixel_color;
+         *(out_line_ptr + 2) = pixel_color;
+         *(out_line_ptr + 3) = pixel_color;
+         out_line_ptr       += out_stride;
+
+         /* Row 3: (s)(p)(p)(p) */
+         *out_line_ptr       = shadow_color;
+         *(out_line_ptr + 1) = pixel_color;
+         *(out_line_ptr + 2) = pixel_color;
+         *(out_line_ptr + 3) = pixel_color;
+         out_line_ptr       += out_stride;
+
+         /* Row 4: (s)(s)(s)(g) */
+         *out_line_ptr       = shadow_color;
+         *(out_line_ptr + 1) = shadow_color;
+         *(out_line_ptr + 2) = shadow_color;
+         *(out_line_ptr + 3) = grid_color;
+
+         out_ptr += 4;
+      }
+
+      input  += in_stride;
+      output += out_stride << 2;
+   }
+}
+
+static void dot_matrix_4x_generic_packets(void *data,
+      struct softfilter_work_packet *packets,
+      void *output, size_t output_stride,
+      const void *input, unsigned width, unsigned height, size_t input_stride)
+{
+   /* We are guaranteed single threaded operation
+    * (filt->threads = 1) so we don't need to loop
+    * over threads and can cull some code */
+   struct filter_data *filt = (struct filter_data*)data;
+   struct softfilter_thread_data *thr = (struct softfilter_thread_data*)&filt->workers[0];
+
+   thr->out_data = (uint8_t*)output;
+   thr->in_data = (const uint8_t*)input;
+   thr->out_pitch = output_stride;
+   thr->in_pitch = input_stride;
+   thr->width = width;
+   thr->height = height;
+
+   if (filt->in_fmt == SOFTFILTER_FMT_RGB565)
+      packets[0].work = dot_matrix_4x_work_cb_rgb565;
+
+   packets[0].thread_data = thr;
+}
+
+static const struct softfilter_implementation dot_matrix_4x_generic = {
+   dot_matrix_4x_generic_input_fmts,
+   dot_matrix_4x_generic_output_fmts,
+
+   dot_matrix_4x_generic_create,
+   dot_matrix_4x_generic_destroy,
+
+   dot_matrix_4x_generic_threads,
+   dot_matrix_4x_generic_output,
+   dot_matrix_4x_generic_packets,
+
+   SOFTFILTER_API_VERSION,
+   "Dot Matrix 4x",
+   "dot_matrix_4x",
+};
+
+const struct softfilter_implementation *softfilter_get_implementation(
+      softfilter_simd_mask_t simd)
+{
+   (void)simd;
+   return &dot_matrix_4x_generic;
+}
+
+#ifdef RARCH_INTERNAL
+#undef softfilter_get_implementation
+#undef softfilter_thread_data
+#undef filter_data
+#endif
diff --git a/gfx/video_filters/gameboy3x.c b/gfx/video_filters/gameboy3x.c
new file mode 100644
index 0000000000..15e9325761
--- /dev/null
+++ b/gfx/video_filters/gameboy3x.c
@@ -0,0 +1,310 @@
+/*  RetroArch - A frontend for libretro.
+ *  Copyright (C) 2010-2014 - Hans-Kristian Arntzen
+ *  Copyright (C) 2011-2018 - Daniel De Matteis
+ *
+ *  RetroArch is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with RetroArch.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* Compile: gcc -o gameboy3x.so -shared gameboy3x.c -std=c99 -O3 -Wall -pedantic -fPIC */
+
+#include "softfilter.h"
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef RARCH_INTERNAL
+#define softfilter_get_implementation gameboy3x_get_implementation
+#define softfilter_thread_data gameboy3x_softfilter_thread_data
+#define filter_data gameboy3x_filter_data
+#endif
+
+/* Default colours match Gambatte's
+ * Gameboy Pocket palette */
+#define GAMEBOY_3X_DEFAULT_PALETTE_0    0x2A3325
+#define GAMEBOY_3X_DEFAULT_PALETTE_1    0x535f49
+#define GAMEBOY_3X_DEFAULT_PALETTE_2    0x86927C
+#define GAMEBOY_3X_DEFAULT_PALETTE_3    0xA7B19A
+#define GAMEBOY_3X_DEFAULT_PALETTE_GRID 0xADB8A0
+
+#define GAMEBOY_3X_RGB24_TO_RGB565(rgb24) ( ((rgb24 >> 8) & 0xF800) | ((rgb24 >> 5) & 0x7E0) | ((rgb24 >> 3) & 0x1F) )
+
+typedef struct
+{
+   uint16_t pixel_lut[4];
+   uint16_t grid_lut[4];
+} gameboy3x_colors_t;
+
+struct softfilter_thread_data
+{
+   void *out_data;
+   const void *in_data;
+   size_t out_pitch;
+   size_t in_pitch;
+   unsigned colfmt;
+   unsigned width;
+   unsigned height;
+   int first;
+   int last;
+};
+
+struct filter_data
+{
+   unsigned threads;
+   struct softfilter_thread_data *workers;
+   unsigned in_fmt;
+   gameboy3x_colors_t colors;
+};
+
+static unsigned gameboy3x_generic_input_fmts(void)
+{
+   return SOFTFILTER_FMT_RGB565;
+}
+
+static unsigned gameboy3x_generic_output_fmts(unsigned input_fmts)
+{
+   return input_fmts;
+}
+
+static unsigned gameboy3x_generic_threads(void *data)
+{
+   struct filter_data *filt = (struct filter_data*)data;
+   return filt->threads;
+}
+
+static uint32_t gameboy3x_get_grid_colour(unsigned palette, unsigned grid)
+{
+   /* > Grid colour is a 3:2 mix of palette:grid
+    * > We only have four pixel colours, so can
+    *   pre-calculate everything in advance */
+   uint32_t palette_r = (palette & 0xFF0000) >> 16;
+   uint32_t palette_g = (palette &   0xFF00) >> 8;
+   uint32_t palette_b = (palette &     0xFF);
+
+   uint32_t grid_r    = (grid    & 0xFF0000) >> 16;
+   uint32_t grid_g    = (grid    &   0xFF00) >> 8;
+   uint32_t grid_b    = (grid    &     0xFF);
+
+   uint32_t mix_r     = ((3 * palette_r) + (2 * grid_r)) / 5;
+   uint32_t mix_g     = ((3 * palette_g) + (2 * grid_g)) / 5;
+   uint32_t mix_b     = ((3 * palette_b) + (2 * grid_b)) / 5;
+
+   return (mix_r << 16) | (mix_g << 8) | mix_b;
+}
+
+static void gameboy3x_initialize(struct filter_data *filt,
+      const struct softfilter_config *config,
+      void *userdata)
+{
+   unsigned palette[4];
+   unsigned palette_grid;
+   size_t i;
+
+   /* Read raw colour values */
+   config->get_hex(userdata, "palette_0", &palette[0],
+         GAMEBOY_3X_DEFAULT_PALETTE_0);
+   config->get_hex(userdata, "palette_1", &palette[1],
+         GAMEBOY_3X_DEFAULT_PALETTE_1);
+   config->get_hex(userdata, "palette_2", &palette[2],
+         GAMEBOY_3X_DEFAULT_PALETTE_2);
+   config->get_hex(userdata, "palette_3", &palette[3],
+         GAMEBOY_3X_DEFAULT_PALETTE_3);
+   config->get_hex(userdata, "palette_grid", &palette_grid,
+         GAMEBOY_3X_DEFAULT_PALETTE_GRID);
+
+   /* Loop over palette colours */
+   for (i = 0; i < 4; i++)
+   {
+      uint32_t grid_color;
+
+      /* Populate pixel lookup table */
+      filt->colors.pixel_lut[i] = GAMEBOY_3X_RGB24_TO_RGB565(palette[i]);
+
+      /* Populate grid lookup table */
+      grid_color = gameboy3x_get_grid_colour(palette[i], palette_grid);
+      filt->colors.grid_lut[i] = GAMEBOY_3X_RGB24_TO_RGB565(grid_color);
+   }
+}
+
+static void *gameboy3x_generic_create(const struct softfilter_config *config,
+      unsigned in_fmt, unsigned out_fmt,
+      unsigned max_width, unsigned max_height,
+      unsigned threads, softfilter_simd_mask_t simd, void *userdata)
+{
+   struct filter_data *filt = (struct filter_data*)calloc(1, sizeof(*filt));
+   (void)simd;
+
+   if (!filt)
+      return NULL;
+
+   /* Apparently the code is not thread-safe,
+    * so force single threaded operation... */
+   filt->workers = (struct softfilter_thread_data*)calloc(1, sizeof(struct softfilter_thread_data));
+   filt->threads = 1;
+   filt->in_fmt  = in_fmt;
+   if (!filt->workers)
+   {
+      free(filt);
+      return NULL;
+   }
+
+   /* Initialise colour lookup tables */
+   gameboy3x_initialize(filt, config, userdata);
+
+   return filt;
+}
+
+static void gameboy3x_generic_output(void *data,
+      unsigned *out_width, unsigned *out_height,
+      unsigned width, unsigned height)
+{
+   *out_width  = width  * 3;
+   *out_height = height * 3;
+}
+
+static void gameboy3x_generic_destroy(void *data)
+{
+   struct filter_data *filt = (struct filter_data*)data;
+   if (!filt) {
+      return;
+   }
+   free(filt->workers);
+   free(filt);
+}
+
+static void gameboy3x_work_cb_rgb565(void *data, void *thread_data)
+{
+   struct filter_data *filt           = (struct filter_data*)data;
+   struct softfilter_thread_data *thr = (struct softfilter_thread_data*)thread_data;
+   const uint16_t *input              = (const uint16_t*)thr->in_data;
+   uint16_t *output                   = (uint16_t*)thr->out_data;
+   unsigned in_stride                 = (unsigned)(thr->in_pitch >> 1);
+   unsigned out_stride                = (unsigned)(thr->out_pitch >> 1);
+   uint16_t *pixel_lut                = filt->colors.pixel_lut;
+   uint16_t *grid_lut                 = filt->colors.grid_lut;
+   unsigned x, y;
+
+   for (y = 0; y < thr->height; ++y)
+   {
+      uint16_t *out_ptr = output;
+
+      for (x = 0; x < thr->width; ++x)
+      {
+         uint16_t *out_line_ptr = out_ptr;
+         uint16_t in_color      = *(input + x);
+         uint16_t in_rgb_mean   =
+               (in_color >> 11 & 0x1F) +
+               (in_color >>  6 & 0x1F) +
+               (in_color       & 0x1F);
+         uint16_t out_pixel_color;
+         uint16_t out_grid_color;
+         uint8_t  lut_index;
+
+         /* Calculate mean value of the 3 RGB
+          * colour components */
+         in_rgb_mean += (in_rgb_mean +   2) >> 2;
+         in_rgb_mean += (in_rgb_mean +   8) >> 4;
+         in_rgb_mean += (in_rgb_mean + 128) >> 8;
+         in_rgb_mean >>= 2;
+
+         /* Convert to lookup table index
+          * > This can never be greater than 3,
+          *   but check anyway... */
+         lut_index = in_rgb_mean >> 3;
+         lut_index = (lut_index > 3) ? 3 : lut_index;
+
+         /* Get output pixel and grid colours */
+         out_pixel_color = *(pixel_lut + lut_index);
+         out_grid_color  = *(grid_lut + lut_index);
+
+         /* - Pixel layout (p = pixel, g = grid) -
+          * Before:  After:
+          * (p)      (g)(p)(p)
+          *          (g)(p)(p)
+          *          (g)(g)(g)
+          */
+
+         /* Row 1: (g)(p)(p) */
+         *out_line_ptr       = out_grid_color;
+         *(out_line_ptr + 1) = out_pixel_color;
+         *(out_line_ptr + 2) = out_pixel_color;
+         out_line_ptr       += out_stride;
+
+         /* Row 2: (g)(p)(p) */
+         *out_line_ptr       = out_grid_color;
+         *(out_line_ptr + 1) = out_pixel_color;
+         *(out_line_ptr + 2) = out_pixel_color;
+         out_line_ptr       += out_stride;
+
+         /* Row 3: (g)(g)(g) */
+         *out_line_ptr       = out_grid_color;
+         *(out_line_ptr + 1) = out_grid_color;
+         *(out_line_ptr + 2) = out_grid_color;
+
+         out_ptr += 3;
+      }
+
+      input  += in_stride;
+      output += out_stride * 3;
+   }
+}
+
+static void gameboy3x_generic_packets(void *data,
+      struct softfilter_work_packet *packets,
+      void *output, size_t output_stride,
+      const void *input, unsigned width, unsigned height, size_t input_stride)
+{
+   /* We are guaranteed single threaded operation
+    * (filt->threads = 1) so we don't need to loop
+    * over threads and can cull some code */
+   struct filter_data *filt = (struct filter_data*)data;
+   struct softfilter_thread_data *thr = (struct softfilter_thread_data*)&filt->workers[0];
+
+   thr->out_data = (uint8_t*)output;
+   thr->in_data = (const uint8_t*)input;
+   thr->out_pitch = output_stride;
+   thr->in_pitch = input_stride;
+   thr->width = width;
+   thr->height = height;
+
+   if (filt->in_fmt == SOFTFILTER_FMT_RGB565)
+      packets[0].work = gameboy3x_work_cb_rgb565;
+
+   packets[0].thread_data = thr;
+}
+
+static const struct softfilter_implementation gameboy3x_generic = {
+   gameboy3x_generic_input_fmts,
+   gameboy3x_generic_output_fmts,
+
+   gameboy3x_generic_create,
+   gameboy3x_generic_destroy,
+
+   gameboy3x_generic_threads,
+   gameboy3x_generic_output,
+   gameboy3x_generic_packets,
+
+   SOFTFILTER_API_VERSION,
+   "Gameboy3x",
+   "gameboy3x",
+};
+
+const struct softfilter_implementation *softfilter_get_implementation(
+      softfilter_simd_mask_t simd)
+{
+   (void)simd;
+   return &gameboy3x_generic;
+}
+
+#ifdef RARCH_INTERNAL
+#undef softfilter_get_implementation
+#undef softfilter_thread_data
+#undef filter_data
+#endif
diff --git a/gfx/video_filters/gameboy4x.c b/gfx/video_filters/gameboy4x.c
new file mode 100644
index 0000000000..8dcd6d580c
--- /dev/null
+++ b/gfx/video_filters/gameboy4x.c
@@ -0,0 +1,339 @@
+/*  RetroArch - A frontend for libretro.
+ *  Copyright (C) 2010-2014 - Hans-Kristian Arntzen
+ *  Copyright (C) 2011-2018 - Daniel De Matteis
+ *
+ *  RetroArch is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with RetroArch.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* Compile: gcc -o gameboy4x.so -shared gameboy4x.c -std=c99 -O3 -Wall -pedantic -fPIC */
+
+#include "softfilter.h"
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef RARCH_INTERNAL
+#define softfilter_get_implementation gameboy4x_get_implementation
+#define softfilter_thread_data gameboy4x_softfilter_thread_data
+#define filter_data gameboy4x_filter_data
+#endif
+
+/* Default colours match Gambatte's
+ * Gameboy Pocket palette */
+#define GAMEBOY_4X_DEFAULT_PALETTE_0    0x2A3325
+#define GAMEBOY_4X_DEFAULT_PALETTE_1    0x535f49
+#define GAMEBOY_4X_DEFAULT_PALETTE_2    0x86927C
+#define GAMEBOY_4X_DEFAULT_PALETTE_3    0xA7B19A
+#define GAMEBOY_4X_DEFAULT_PALETTE_GRID 0xADB8A0
+
+#define GAMEBOY_4X_RGB24_TO_RGB565(rgb24) ( ((rgb24 >> 8) & 0xF800) | ((rgb24 >> 5) & 0x7E0) | ((rgb24 >> 3) & 0x1F) )
+
+typedef struct
+{
+   uint16_t pixel_lut[4];
+   uint16_t shadow_lut[4];
+   uint16_t grid_lut[4];
+} gameboy4x_colors_t;
+
+struct softfilter_thread_data
+{
+   void *out_data;
+   const void *in_data;
+   size_t out_pitch;
+   size_t in_pitch;
+   unsigned colfmt;
+   unsigned width;
+   unsigned height;
+   int first;
+   int last;
+};
+
+struct filter_data
+{
+   unsigned threads;
+   struct softfilter_thread_data *workers;
+   unsigned in_fmt;
+   gameboy4x_colors_t colors;
+};
+
+static unsigned gameboy4x_generic_input_fmts(void)
+{
+   return SOFTFILTER_FMT_RGB565;
+}
+
+static unsigned gameboy4x_generic_output_fmts(unsigned input_fmts)
+{
+   return input_fmts;
+}
+
+static unsigned gameboy4x_generic_threads(void *data)
+{
+   struct filter_data *filt = (struct filter_data*)data;
+   return filt->threads;
+}
+
+static uint32_t gameboy4x_get_weighted_colour(
+      unsigned palette, unsigned grid,
+      unsigned palette_weight, unsigned grid_weight)
+{
+   uint32_t palette_r = (palette & 0xFF0000) >> 16;
+   uint32_t palette_g = (palette &   0xFF00) >> 8;
+   uint32_t palette_b = (palette &     0xFF);
+
+   uint32_t grid_r    = (grid    & 0xFF0000) >> 16;
+   uint32_t grid_g    = (grid    &   0xFF00) >> 8;
+   uint32_t grid_b    = (grid    &     0xFF);
+
+   uint32_t mix_r     = ((palette_weight * palette_r) + (grid_weight * grid_r)) /
+         (palette_weight + grid_weight);
+   uint32_t mix_g     = ((palette_weight * palette_g) + (grid_weight * grid_g)) /
+         (palette_weight + grid_weight);
+   uint32_t mix_b     = ((palette_weight * palette_b) + (grid_weight * grid_b)) /
+         (palette_weight + grid_weight);
+
+   return (mix_r << 16) | (mix_g << 8) | mix_b;
+}
+
+static void gameboy4x_initialize(struct filter_data *filt,
+      const struct softfilter_config *config,
+      void *userdata)
+{
+   unsigned palette[4];
+   unsigned palette_grid;
+   size_t i;
+
+   /* Read raw colour values */
+   config->get_hex(userdata, "palette_0", &palette[0],
+         GAMEBOY_4X_DEFAULT_PALETTE_0);
+   config->get_hex(userdata, "palette_1", &palette[1],
+         GAMEBOY_4X_DEFAULT_PALETTE_1);
+   config->get_hex(userdata, "palette_2", &palette[2],
+         GAMEBOY_4X_DEFAULT_PALETTE_2);
+   config->get_hex(userdata, "palette_3", &palette[3],
+         GAMEBOY_4X_DEFAULT_PALETTE_3);
+   config->get_hex(userdata, "palette_grid", &palette_grid,
+         GAMEBOY_4X_DEFAULT_PALETTE_GRID);
+
+   /* Loop over palette colours */
+   for (i = 0; i < 4; i++)
+   {
+      uint32_t shadow_color;
+      uint32_t grid_color;
+
+      /* Populate pixel lookup table */
+      filt->colors.pixel_lut[i] = GAMEBOY_4X_RGB24_TO_RGB565(palette[i]);
+
+      /* Populate pixel shadow lookup table
+       * > 4:3 mix of palette:grid */
+      shadow_color = gameboy4x_get_weighted_colour(palette[i], palette_grid, 4, 3);
+      filt->colors.shadow_lut[i] = GAMEBOY_4X_RGB24_TO_RGB565(shadow_color);
+
+      /* Populate grid lookup table
+       * > 2:3 mix of palette:grid
+       * > Would like to set this to the pure grid
+       *   colour (to highlight the pixel shadow
+       *   effect), but doing so looks very peculiar.
+       *   2:3 is about as light as we can make this
+       *   without producing ugly optical illusions */
+      grid_color = gameboy4x_get_weighted_colour(palette[i], palette_grid, 2, 3);
+      filt->colors.grid_lut[i] = GAMEBOY_4X_RGB24_TO_RGB565(grid_color);
+   }
+}
+
+static void *gameboy4x_generic_create(const struct softfilter_config *config,
+      unsigned in_fmt, unsigned out_fmt,
+      unsigned max_width, unsigned max_height,
+      unsigned threads, softfilter_simd_mask_t simd, void *userdata)
+{
+   struct filter_data *filt = (struct filter_data*)calloc(1, sizeof(*filt));
+   (void)simd;
+
+   if (!filt)
+      return NULL;
+
+   /* Apparently the code is not thread-safe,
+    * so force single threaded operation... */
+   filt->workers = (struct softfilter_thread_data*)calloc(1, sizeof(struct softfilter_thread_data));
+   filt->threads = 1;
+   filt->in_fmt  = in_fmt;
+   if (!filt->workers)
+   {
+      free(filt);
+      return NULL;
+   }
+
+   /* Initialise colour lookup tables */
+   gameboy4x_initialize(filt, config, userdata);
+
+   return filt;
+}
+
+static void gameboy4x_generic_output(void *data,
+      unsigned *out_width, unsigned *out_height,
+      unsigned width, unsigned height)
+{
+   *out_width  = width  << 2;
+   *out_height = height << 2;
+}
+
+static void gameboy4x_generic_destroy(void *data)
+{
+   struct filter_data *filt = (struct filter_data*)data;
+   if (!filt) {
+      return;
+   }
+   free(filt->workers);
+   free(filt);
+}
+
+static void gameboy4x_work_cb_rgb565(void *data, void *thread_data)
+{
+   struct filter_data *filt           = (struct filter_data*)data;
+   struct softfilter_thread_data *thr = (struct softfilter_thread_data*)thread_data;
+   const uint16_t *input              = (const uint16_t*)thr->in_data;
+   uint16_t *output                   = (uint16_t*)thr->out_data;
+   unsigned in_stride                 = (unsigned)(thr->in_pitch >> 1);
+   unsigned out_stride                = (unsigned)(thr->out_pitch >> 1);
+   uint16_t *pixel_lut                = filt->colors.pixel_lut;
+   uint16_t *shadow_lut               = filt->colors.shadow_lut;
+   uint16_t *grid_lut                 = filt->colors.grid_lut;
+   unsigned x, y;
+
+   for (y = 0; y < thr->height; ++y)
+   {
+      uint16_t *out_ptr = output;
+
+      for (x = 0; x < thr->width; ++x)
+      {
+         uint16_t *out_line_ptr = out_ptr;
+         uint16_t in_color      = *(input + x);
+         uint16_t in_rgb_mean   =
+               (in_color >> 11 & 0x1F) +
+               (in_color >>  6 & 0x1F) +
+               (in_color       & 0x1F);
+         uint16_t out_pixel_color;
+         uint16_t out_shadow_color;
+         uint16_t out_grid_color;
+         uint8_t  lut_index;
+
+         /* Calculate mean value of the 3 RGB
+          * colour components */
+         in_rgb_mean += (in_rgb_mean +   2) >> 2;
+         in_rgb_mean += (in_rgb_mean +   8) >> 4;
+         in_rgb_mean += (in_rgb_mean + 128) >> 8;
+         in_rgb_mean >>= 2;
+
+         /* Convert to lookup table index
+          * > This can never be greater than 3,
+          *   but check anyway... */
+         lut_index = in_rgb_mean >> 3;
+         lut_index = (lut_index > 3) ? 3 : lut_index;
+
+         /* Get output pixel, pixel shadow and grid colours */
+         out_pixel_color  = *(pixel_lut + lut_index);
+         out_shadow_color = *(shadow_lut + lut_index);
+         out_grid_color   = *(grid_lut + lut_index);
+
+         /* - Pixel layout (p = pixel, s = shadow, g = grid) -
+          * Before:  After:
+          * (p)      (g)(p)(p)(p)
+          *          (s)(p)(p)(p)
+          *          (s)(p)(p)(p)
+          *          (s)(s)(s)(g)
+          */
+
+         /* Row 1: (g)(p)(p)(p) */
+         *out_line_ptr       = out_grid_color;
+         *(out_line_ptr + 1) = out_pixel_color;
+         *(out_line_ptr + 2) = out_pixel_color;
+         *(out_line_ptr + 3) = out_pixel_color;
+         out_line_ptr       += out_stride;
+
+         /* Row 2: (s)(p)(p)(p) */
+         *out_line_ptr       = out_shadow_color;
+         *(out_line_ptr + 1) = out_pixel_color;
+         *(out_line_ptr + 2) = out_pixel_color;
+         *(out_line_ptr + 3) = out_pixel_color;
+         out_line_ptr       += out_stride;
+
+         /* Row 3: (s)(p)(p)(p) */
+         *out_line_ptr       = out_shadow_color;
+         *(out_line_ptr + 1) = out_pixel_color;
+         *(out_line_ptr + 2) = out_pixel_color;
+         *(out_line_ptr + 3) = out_pixel_color;
+         out_line_ptr       += out_stride;
+
+         /* Row 4: (s)(s)(s)(g) */
+         *out_line_ptr       = out_shadow_color;
+         *(out_line_ptr + 1) = out_shadow_color;
+         *(out_line_ptr + 2) = out_shadow_color;
+         *(out_line_ptr + 3) = out_grid_color;
+
+         out_ptr += 4;
+      }
+
+      input  += in_stride;
+      output += out_stride << 2;
+   }
+}
+
+static void gameboy4x_generic_packets(void *data,
+      struct softfilter_work_packet *packets,
+      void *output, size_t output_stride,
+      const void *input, unsigned width, unsigned height, size_t input_stride)
+{
+   /* We are guaranteed single threaded operation
+    * (filt->threads = 1) so we don't need to loop
+    * over threads and can cull some code */
+   struct filter_data *filt = (struct filter_data*)data;
+   struct softfilter_thread_data *thr = (struct softfilter_thread_data*)&filt->workers[0];
+
+   thr->out_data = (uint8_t*)output;
+   thr->in_data = (const uint8_t*)input;
+   thr->out_pitch = output_stride;
+   thr->in_pitch = input_stride;
+   thr->width = width;
+   thr->height = height;
+
+   if (filt->in_fmt == SOFTFILTER_FMT_RGB565)
+      packets[0].work = gameboy4x_work_cb_rgb565;
+
+   packets[0].thread_data = thr;
+}
+
+static const struct softfilter_implementation gameboy4x_generic = {
+   gameboy4x_generic_input_fmts,
+   gameboy4x_generic_output_fmts,
+
+   gameboy4x_generic_create,
+   gameboy4x_generic_destroy,
+
+   gameboy4x_generic_threads,
+   gameboy4x_generic_output,
+   gameboy4x_generic_packets,
+
+   SOFTFILTER_API_VERSION,
+   "Gameboy4x",
+   "gameboy4x",
+};
+
+const struct softfilter_implementation *softfilter_get_implementation(
+      softfilter_simd_mask_t simd)
+{
+   (void)simd;
+   return &gameboy4x_generic;
+}
+
+#ifdef RARCH_INTERNAL
+#undef softfilter_get_implementation
+#undef softfilter_thread_data
+#undef filter_data
+#endif
diff --git a/gfx/video_filters/grid2x.c b/gfx/video_filters/grid2x.c
new file mode 100644
index 0000000000..2978b9fe33
--- /dev/null
+++ b/gfx/video_filters/grid2x.c
@@ -0,0 +1,250 @@
+/*  RetroArch - A frontend for libretro.
+ *  Copyright (C) 2010-2014 - Hans-Kristian Arntzen
+ *  Copyright (C) 2011-2018 - Daniel De Matteis
+ *
+ *  RetroArch is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with RetroArch.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* Compile: gcc -o grid2x.so -shared grid2x.c -std=c99 -O3 -Wall -pedantic -fPIC */
+
+#include "softfilter.h"
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef RARCH_INTERNAL
+#define softfilter_get_implementation grid2x_get_implementation
+#define softfilter_thread_data grid2x_softfilter_thread_data
+#define filter_data grid2x_filter_data
+#endif
+
+struct softfilter_thread_data
+{
+   void *out_data;
+   const void *in_data;
+   size_t out_pitch;
+   size_t in_pitch;
+   unsigned colfmt;
+   unsigned width;
+   unsigned height;
+   int first;
+   int last;
+};
+
+struct filter_data
+{
+   unsigned threads;
+   struct softfilter_thread_data *workers;
+   unsigned in_fmt;
+};
+
+static unsigned grid2x_generic_input_fmts(void)
+{
+   return SOFTFILTER_FMT_XRGB8888 | SOFTFILTER_FMT_RGB565;
+}
+
+static unsigned grid2x_generic_output_fmts(unsigned input_fmts)
+{
+   return input_fmts;
+}
+
+static unsigned grid2x_generic_threads(void *data)
+{
+   struct filter_data *filt = (struct filter_data*)data;
+   return filt->threads;
+}
+
+static void *grid2x_generic_create(const struct softfilter_config *config,
+      unsigned in_fmt, unsigned out_fmt,
+      unsigned max_width, unsigned max_height,
+      unsigned threads, softfilter_simd_mask_t simd, void *userdata)
+{
+   struct filter_data *filt = (struct filter_data*)calloc(1, sizeof(*filt));
+   (void)simd;
+   (void)config;
+   (void)userdata;
+
+   if (!filt) {
+      return NULL;
+   }
+   /* Apparently the code is not thread-safe,
+    * so force single threaded operation... */
+   filt->workers = (struct softfilter_thread_data*)calloc(1, sizeof(struct softfilter_thread_data));
+   filt->threads = 1;
+   filt->in_fmt  = in_fmt;
+   if (!filt->workers) {
+      free(filt);
+      return NULL;
+   }
+   return filt;
+}
+
+static void grid2x_generic_output(void *data,
+      unsigned *out_width, unsigned *out_height,
+      unsigned width, unsigned height)
+{
+   *out_width = width << 1;
+   *out_height = height << 1;
+}
+
+static void grid2x_generic_destroy(void *data)
+{
+   struct filter_data *filt = (struct filter_data*)data;
+   if (!filt) {
+      return;
+   }
+   free(filt->workers);
+   free(filt);
+}
+
+static void grid2x_work_cb_xrgb8888(void *data, void *thread_data)
+{
+   struct softfilter_thread_data *thr = (struct softfilter_thread_data*)thread_data;
+   const uint32_t *input = (const uint32_t*)thr->in_data;
+   uint32_t *output = (uint32_t*)thr->out_data;
+   unsigned in_stride = (unsigned)(thr->in_pitch >> 2);
+   unsigned out_stride = (unsigned)(thr->out_pitch >> 2);
+   unsigned x, y;
+
+   for (y = 0; y < thr->height; ++y)
+   {
+      uint32_t *out_ptr = output;
+      for (x = 0; x < thr->width; ++x)
+      {
+         /* Note: We process the 'padding' bits as though they
+          * matter (they don't), since this deals with any potential
+          * byte swapping issues */
+         uint32_t *out_line_ptr  = out_ptr;
+         uint32_t color          = *(input + x);
+         uint8_t  p              = (color >> 24 & 0xFF); /* Padding bits */
+         uint8_t  r              = (color >> 16 & 0xFF);
+         uint8_t  g              = (color >>  8 & 0xFF);
+         uint8_t  b              = (color       & 0xFF);
+         uint32_t scanline_color =
+               ((p - (p >> 2)) << 24) |
+               ((r - (r >> 2)) << 16) |
+               ((g - (g >> 2)) <<  8) |
+               ((b - (b >> 2))      );
+
+         /* Row 1: <colour><scanline> */
+         *out_line_ptr       = color;
+         *(out_line_ptr + 1) = scanline_color;
+         out_line_ptr       += out_stride;
+
+         /* Row 2: <scanline><scanline> */
+         *out_line_ptr       = scanline_color;
+         *(out_line_ptr + 1) = scanline_color;
+
+         out_ptr += 2;
+      }
+
+      input  += in_stride;
+      output += out_stride << 1;
+   }
+}
+
+static void grid2x_work_cb_rgb565(void *data, void *thread_data)
+{
+   struct softfilter_thread_data *thr = (struct softfilter_thread_data*)thread_data;
+   const uint16_t *input = (const uint16_t*)thr->in_data;
+   uint16_t *output = (uint16_t*)thr->out_data;
+   unsigned in_stride = (unsigned)(thr->in_pitch >> 1);
+   unsigned out_stride = (unsigned)(thr->out_pitch >> 1);
+   unsigned x, y;
+
+   for (y = 0; y < thr->height; ++y)
+   {
+      uint16_t *out_ptr = output;
+      for (x = 0; x < thr->width; ++x)
+      {
+         uint16_t *out_line_ptr  = out_ptr;
+         uint16_t color          = *(input + x);
+         uint8_t  r              = (color >> 11 & 0x1F);
+         uint8_t  g              = (color >>  6 & 0x1F);
+         uint8_t  b              = (color       & 0x1F);
+         uint16_t scanline_color =
+               ((r - (r >> 2)) << 11) |
+               ((g - (g >> 2)) <<  6) |
+               ((b - (b >> 2))      );
+
+         /* Row 1: <colour><scanline> */
+         *out_line_ptr       = color;
+         *(out_line_ptr + 1) = scanline_color;
+         out_line_ptr       += out_stride;
+
+         /* Row 2: <scanline><scanline> */
+         *out_line_ptr       = scanline_color;
+         *(out_line_ptr + 1) = scanline_color;
+
+         out_ptr += 2;
+      }
+
+      input  += in_stride;
+      output += out_stride << 1;
+   }
+}
+
+static void grid2x_generic_packets(void *data,
+      struct softfilter_work_packet *packets,
+      void *output, size_t output_stride,
+      const void *input, unsigned width, unsigned height, size_t input_stride)
+{
+   /* We are guaranteed single threaded operation
+    * (filt->threads = 1) so we don't need to loop
+    * over threads and can cull some code. This only
+    * makes the tiniest performance difference, but
+    * every little helps when running on an o3DS... */
+   struct filter_data *filt = (struct filter_data*)data;
+   struct softfilter_thread_data *thr = (struct softfilter_thread_data*)&filt->workers[0];
+
+   thr->out_data = (uint8_t*)output;
+   thr->in_data = (const uint8_t*)input;
+   thr->out_pitch = output_stride;
+   thr->in_pitch = input_stride;
+   thr->width = width;
+   thr->height = height;
+
+   if (filt->in_fmt == SOFTFILTER_FMT_XRGB8888) {
+      packets[0].work = grid2x_work_cb_xrgb8888;
+   } else if (filt->in_fmt == SOFTFILTER_FMT_RGB565) {
+      packets[0].work = grid2x_work_cb_rgb565;
+   }
+   packets[0].thread_data = thr;
+}
+
+static const struct softfilter_implementation grid2x_generic = {
+   grid2x_generic_input_fmts,
+   grid2x_generic_output_fmts,
+
+   grid2x_generic_create,
+   grid2x_generic_destroy,
+
+   grid2x_generic_threads,
+   grid2x_generic_output,
+   grid2x_generic_packets,
+
+   SOFTFILTER_API_VERSION,
+   "Grid2x",
+   "grid2x",
+};
+
+const struct softfilter_implementation *softfilter_get_implementation(
+      softfilter_simd_mask_t simd)
+{
+   (void)simd;
+   return &grid2x_generic;
+}
+
+#ifdef RARCH_INTERNAL
+#undef softfilter_get_implementation
+#undef softfilter_thread_data
+#undef filter_data
+#endif
diff --git a/gfx/video_filters/normal2x.c b/gfx/video_filters/normal2x.c
index 7ed675b696..3fdc5f4424 100644
--- a/gfx/video_filters/normal2x.c
+++ b/gfx/video_filters/normal2x.c
@@ -119,14 +119,17 @@ static void normal2x_work_cb_xrgb8888(void *data, void *thread_data)
       uint32_t *out_ptr = output;
       for (x = 0; x < thr->width; ++x)
       {
-         uint32_t color = *(input + x);
-         uint32_t color_buf[2];
+         uint32_t *out_line_ptr = out_ptr;
+         uint32_t color         = *(input + x);
 
-         color_buf[0] = color;
-         color_buf[1] = color;
+         /* Row 1 */
+         *out_line_ptr       = color;
+         *(out_line_ptr + 1) = color;
+         out_line_ptr       += out_stride;
 
-         memcpy(out_ptr,              color_buf, sizeof(color_buf));
-         memcpy(out_ptr + out_stride, color_buf, sizeof(color_buf));
+         /* Row 2 */
+         *out_line_ptr       = color;
+         *(out_line_ptr + 1) = color;
 
          out_ptr += 2;
       }
@@ -150,14 +153,17 @@ static void normal2x_work_cb_rgb565(void *data, void *thread_data)
       uint16_t *out_ptr = output;
       for (x = 0; x < thr->width; ++x)
       {
-         uint16_t color = *(input + x);
-         uint16_t color_buf[2];
+         uint16_t *out_line_ptr = out_ptr;
+         uint16_t color         = *(input + x);
 
-         color_buf[0] = color;
-         color_buf[1] = color;
+         /* Row 1 */
+         *out_line_ptr       = color;
+         *(out_line_ptr + 1) = color;
+         out_line_ptr       += out_stride;
 
-         memcpy(out_ptr,              color_buf, sizeof(color_buf));
-         memcpy(out_ptr + out_stride, color_buf, sizeof(color_buf));
+         /* Row 2 */
+         *out_line_ptr       = color;
+         *(out_line_ptr + 1) = color;
 
          out_ptr += 2;
       }
diff --git a/gfx/video_filters/scanline2x.c b/gfx/video_filters/scanline2x.c
index 6de127568a..4824a73a12 100644
--- a/gfx/video_filters/scanline2x.c
+++ b/gfx/video_filters/scanline2x.c
@@ -121,28 +121,27 @@ static void scanline2x_work_cb_xrgb8888(void *data, void *thread_data)
       {
          /* Note: We process the 'padding' bits as though they
           * matter (they don't), since this deals with any potential
-          * byte swapping issues */ 
+          * byte swapping issues */
+         uint32_t *out_line_ptr  = out_ptr;
          uint32_t color          = *(input + x);
          uint8_t  p              = (color >> 24 & 0xFF); /* Padding bits */
          uint8_t  r              = (color >> 16 & 0xFF);
          uint8_t  g              = (color >>  8 & 0xFF);
          uint8_t  b              = (color       & 0xFF);
          uint32_t scanline_color =
-               (((p >> 1) + (p >> 2)) << 24) |
-               (((r >> 1) + (r >> 2)) << 16) |
-               (((g >> 1) + (g >> 2)) <<  8) |
-               (((b >> 1) + (b >> 2))      );
-         uint32_t color_buf[2];
-         uint32_t scanline_color_buf[2];
+               ((p - (p >> 2)) << 24) |
+               ((r - (r >> 2)) << 16) |
+               ((g - (g >> 2)) <<  8) |
+               ((b - (b >> 2))      );
 
-         color_buf[0] = color;
-         color_buf[1] = color;
+         /* Row 1: Colour */
+         *out_line_ptr       = color;
+         *(out_line_ptr + 1) = color;
+         out_line_ptr       += out_stride;
 
-         scanline_color_buf[0] = scanline_color;
-         scanline_color_buf[1] = scanline_color;
-
-         memcpy(out_ptr,              color_buf,          sizeof(color_buf));
-         memcpy(out_ptr + out_stride, scanline_color_buf, sizeof(scanline_color_buf));
+         /* Row 2: Scanline */
+         *out_line_ptr       = scanline_color;
+         *(out_line_ptr + 1) = scanline_color;
 
          out_ptr += 2;
       }
@@ -166,25 +165,24 @@ static void scanline2x_work_cb_rgb565(void *data, void *thread_data)
       uint16_t *out_ptr = output;
       for (x = 0; x < thr->width; ++x)
       {
+         uint16_t *out_line_ptr  = out_ptr;
          uint16_t color          = *(input + x);
          uint8_t  r              = (color >> 11 & 0x1F);
          uint8_t  g              = (color >>  6 & 0x1F);
          uint8_t  b              = (color       & 0x1F);
          uint16_t scanline_color =
-               (((r >> 1) + (r >> 2)) << 11) |
-               (((g >> 1) + (g >> 2)) <<  6) |
-               (((b >> 1) + (b >> 2))      );
-         uint16_t color_buf[2];
-         uint16_t scanline_color_buf[2];
+               ((r - (r >> 2)) << 11) |
+               ((g - (g >> 2)) <<  6) |
+               ((b - (b >> 2))      );
 
-         color_buf[0] = color;
-         color_buf[1] = color;
+         /* Row 1: Colour */
+         *out_line_ptr       = color;
+         *(out_line_ptr + 1) = color;
+         out_line_ptr       += out_stride;
 
-         scanline_color_buf[0] = scanline_color;
-         scanline_color_buf[1] = scanline_color;
-
-         memcpy(out_ptr,              color_buf,          sizeof(color_buf));
-         memcpy(out_ptr + out_stride, scanline_color_buf, sizeof(scanline_color_buf));
+         /* Row 2: Scanline */
+         *out_line_ptr       = scanline_color;
+         *(out_line_ptr + 1) = scanline_color;
 
          out_ptr += 2;
       }
diff --git a/gfx/video_filters/softfilter.h b/gfx/video_filters/softfilter.h
index 7d39cb0c4f..e05e877184 100644
--- a/gfx/video_filters/softfilter.h
+++ b/gfx/video_filters/softfilter.h
@@ -53,6 +53,9 @@ typedef int (*softfilter_config_get_float_t)(void *userdata,
 typedef int (*softfilter_config_get_int_t)(void *userdata,
       const char *key, int *value, int default_value);
 
+typedef int (*softfilter_config_get_hex_t)(void *userdata,
+      const char *key, unsigned *value, unsigned default_value);
+
 /* Allocates an array with values. free() with softfilter_config_free_t. */
 typedef int (*softfilter_config_get_float_array_t)(void *userdata,
       const char *key,
@@ -74,6 +77,7 @@ struct softfilter_config
 {
    softfilter_config_get_float_t get_float;
    softfilter_config_get_int_t get_int;
+   softfilter_config_get_hex_t get_hex;
 
    softfilter_config_get_float_array_t get_float_array;
    softfilter_config_get_int_array_t get_int_array;
diff --git a/griffin/griffin.c b/griffin/griffin.c
index ddca147bfc..b811599de6 100644
--- a/griffin/griffin.c
+++ b/griffin/griffin.c
@@ -1002,6 +1002,11 @@ FILTERS
 #include "../gfx/video_filters/phosphor2x.c"
 #include "../gfx/video_filters/normal2x.c"
 #include "../gfx/video_filters/scanline2x.c"
+#include "../gfx/video_filters/grid2x.c"
+#include "../gfx/video_filters/gameboy3x.c"
+#include "../gfx/video_filters/gameboy4x.c"
+#include "../gfx/video_filters/dot_matrix_3x.c"
+#include "../gfx/video_filters/dot_matrix_4x.c"
 #endif
 
 #ifdef HAVE_DSP_FILTER
diff --git a/libretro-common/file/config_file_userdata.c b/libretro-common/file/config_file_userdata.c
index ae71eb6d18..799ce5f2a4 100644
--- a/libretro-common/file/config_file_userdata.c
+++ b/libretro-common/file/config_file_userdata.c
@@ -61,6 +61,24 @@ int config_userdata_get_int(void *userdata, const char *key_str,
    return got;
 }
 
+int config_userdata_get_hex(void *userdata, const char *key_str,
+      unsigned *value, unsigned default_value)
+{
+   bool got;
+   char key[2][256];
+   struct config_file_userdata *usr = (struct config_file_userdata*)userdata;
+
+   fill_pathname_join_delim(key[0], usr->prefix[0], key_str, '_', sizeof(key[0]));
+   fill_pathname_join_delim(key[1], usr->prefix[1], key_str, '_', sizeof(key[1]));
+
+   got = config_get_hex(usr->conf, key[0], value);
+   got = got || config_get_hex(usr->conf, key[1], value);
+
+   if (!got)
+      *value = default_value;
+   return got;
+}
+
 int config_userdata_get_float_array(void *userdata, const char *key_str,
       float **values, unsigned *out_num_values,
       const float *default_values, unsigned num_default_values)
diff --git a/libretro-common/include/file/config_file_userdata.h b/libretro-common/include/file/config_file_userdata.h
index 09e0009f27..0490da2481 100644
--- a/libretro-common/include/file/config_file_userdata.h
+++ b/libretro-common/include/file/config_file_userdata.h
@@ -43,6 +43,9 @@ int config_userdata_get_float(void *userdata, const char *key_str,
 int config_userdata_get_int(void *userdata, const char *key_str,
       int *value, int default_value);
 
+int config_userdata_get_hex(void *userdata, const char *key_str,
+      unsigned *value, unsigned default_value);
+
 int config_userdata_get_float_array(void *userdata, const char *key_str,
       float **values, unsigned *out_num_values,
       const float *default_values, unsigned num_default_values);