From 93078889ccbe3e82e8249a9467d1f2bb4193a2fc Mon Sep 17 00:00:00 2001 From: Themaister Date: Sat, 20 Oct 2012 01:12:02 +0200 Subject: [PATCH 1/7] Begin reworking RGB565 as default pixel format. --- Makefile | 14 +-- Makefile.win | 14 +-- driver.c | 55 +++++++++- driver.h | 4 + dynamic.c | 9 +- general.h | 2 +- gfx/gl.c | 69 ++++++------ gfx/gl_common.h | 25 +++-- gfx/scaler/pixconv.c | 243 +++++++++++++++++++++++++++++++++++++++++++ gfx/scaler/pixconv.h | 16 +++ gfx/scaler/scaler.c | 20 ++-- gfx/scaler/scaler.h | 1 + gfx/shader_cg.c | 4 +- gfx/shader_glsl.c | 4 +- libretro.h | 17 ++- record/ffemu.c | 4 +- record/ffemu.h | 2 +- retroarch.c | 21 +++- screenshot.c | 44 +++++--- 19 files changed, 467 insertions(+), 101 deletions(-) diff --git a/Makefile b/Makefile index a0c1956594..bc63cbc31e 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,11 @@ OBJ = retroarch.o \ audio/null.o \ gfx/null.o \ input/null.o \ - screenshot.o + screenshot.o \ + gfx/scaler/scaler.o \ + gfx/scaler/pixconv.o \ + gfx/scaler/scaler_int.o \ + gfx/scaler/filter.o JOYCONFIG_OBJ = tools/retroarch-joyconfig.o \ conf/config_file.o \ @@ -142,14 +146,6 @@ ifeq ($(PERF_TEST), 1) OBJ += benchmark.o endif -ifeq ($(HAVE_SDL), 1) - OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o -else ifeq ($(HAVE_OPENGL), 1) - OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o -else ifeq ($(HAVE_FFMPEG), 1) - OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o -endif - ifeq ($(HAVE_SDL), 1) OBJ += gfx/sdl_gfx.o input/sdl_input.o input/sdl_joypad.o audio/sdl_audio.o JOYCONFIG_OBJ += input/sdl_joypad.o diff --git a/Makefile.win b/Makefile.win index fd01396af5..f7b675cbad 100644 --- a/Makefile.win +++ b/Makefile.win @@ -22,7 +22,11 @@ OBJ = retroarch.o \ input/null.o \ fifo_buffer.o \ gfx/null.o \ - media/rarch.o + media/rarch.o \ + gfx/scaler/scaler.o \ + gfx/scaler/pixconv.o \ + gfx/scaler/scaler_int.o \ + gfx/scaler/filter.o JOBJ := conf/config_file.o \ tools/retroarch-joyconfig.o \ @@ -75,14 +79,6 @@ ifeq ($(PERF_TEST), 1) OBJ += benchmark.o endif -ifeq ($(HAVE_SDL), 1) - OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o -else ifeq ($(HAVE_OPENGL), 1) - OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o -else ifeq ($(HAVE_FFMPEG), 1) - OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o -endif - JLIBS = ifeq ($(HAVE_SDL), 1) diff --git a/driver.c b/driver.c index 000fa75350..a4fafb86c7 100644 --- a/driver.c +++ b/driver.c @@ -430,9 +430,9 @@ static void init_filter(void) if (*g_settings.video.filter_path == '\0') return; - if (g_extern.system.rgb32) + if (g_extern.system.pix_fmt != RETRO_PIXEL_FORMAT_0RGB1555) { - RARCH_WARN("libretro implementation uses XRGB8888 format. CPU filters only support 0RGB1555.\n"); + RARCH_WARN("CPU filters only support 0RGB1555.\n"); return; } @@ -536,6 +536,34 @@ static void init_shader_dir(void) } #endif +static bool init_video_pixel_converter(unsigned size) +{ + if (g_extern.system.pix_fmt == RETRO_PIXEL_FORMAT_0RGB1555) + { + RARCH_WARN("0RGB1555 pixel format is deprecated, and will be slower. For 15/16-bit, RGB565 format is preferred.\n"); + + // We'll tweak these values later, + // just set most of them to something sane to begin with. + driver.scaler.in_width = + driver.scaler.in_height = + driver.scaler.out_width = + driver.scaler.out_height = size; + + driver.scaler.scaler_type = SCALER_TYPE_POINT; + driver.scaler.in_fmt = SCALER_FMT_0RGB1555; + + // TODO: Pick either ARGB8888 or RGB565 depending on driver ... + driver.scaler.out_fmt = SCALER_FMT_RGB565; + + if (!scaler_ctx_gen_filter(&driver.scaler)) + return false; + + driver.scaler_out = calloc(sizeof(uint16_t), size * size); + } + + return true; +} + void init_video_input(void) { #ifdef HAVE_DYLIB @@ -585,12 +613,21 @@ void init_video_input(void) } } - RARCH_LOG("Video @ %ux%u\n", width, height); + if (width && height) + RARCH_LOG("Video @ %ux%u\n", width, height); + else + RARCH_LOG("Video @ fullscreen\n"); driver.display_type = RARCH_DISPLAY_NONE; driver.video_display = 0; driver.video_window = 0; + if (!init_video_pixel_converter(RARCH_SCALE_BASE * scale)) + { + RARCH_ERR("Failed to init pixel converter.\n"); + rarch_fail(1, "init_video_input()"); + } + video_info_t video = {0}; video.width = width; video.height = height; @@ -599,7 +636,7 @@ void init_video_input(void) video.force_aspect = g_settings.video.force_aspect; video.smooth = g_settings.video.smooth; video.input_scale = scale; - video.rgb32 = g_extern.filter.active || g_extern.system.rgb32; + video.rgb32 = g_extern.filter.active || (g_extern.system.pix_fmt == RETRO_PIXEL_FORMAT_XRGB8888); const input_driver_t *tmp = driver.input; driver.video_data = video_init_func(&video, &driver.input, &driver.input_data); @@ -643,6 +680,14 @@ void init_video_input(void) } } +static void deinit_pixel_converter(void) +{ + scaler_ctx_gen_reset(&driver.scaler); + memset(&driver.scaler, 0, sizeof(driver.scaler)); + free(driver.scaler_out); + driver.scaler_out = NULL; +} + void uninit_video_input(void) { if (driver.input_data != driver.video_data && driver.input) @@ -651,6 +696,8 @@ void uninit_video_input(void) if (driver.video_data && driver.video) video_free_func(); + deinit_pixel_converter(); + #ifdef HAVE_DYLIB deinit_filter(); #endif diff --git a/driver.h b/driver.h index 3061950af9..61242ed48d 100644 --- a/driver.h +++ b/driver.h @@ -23,6 +23,7 @@ #include #include #include "msvc/msvc_compat.h" +#include "gfx/scaler/scaler.h" #ifdef HAVE_CONFIG_H #include "config.h" @@ -240,6 +241,9 @@ typedef struct driver uintptr_t video_display; uintptr_t video_window; enum rarch_display_type display_type; + + struct scaler_ctx scaler; + void *scaler_out; } driver_t; void init_drivers(void); diff --git a/dynamic.c b/dynamic.c index c8f8016a7d..746e622924 100644 --- a/dynamic.c +++ b/dynamic.c @@ -420,17 +420,18 @@ static bool environment_cb(unsigned cmd, void *data) case RETRO_ENVIRONMENT_SET_PIXEL_FORMAT: { enum retro_pixel_format pix_fmt = *(const enum retro_pixel_format*)data; - bool rgb32 = false; switch (pix_fmt) { case RETRO_PIXEL_FORMAT_0RGB1555: - rgb32 = false; RARCH_LOG("Environ SET_PIXEL_FORMAT: 0RGB1555.\n"); break; + case RETRO_PIXEL_FORMAT_RGB565: + RARCH_LOG("Environ SET_PIXEL_FORMAT: RGB565.\n"); + break; + #ifndef RARCH_CONSOLE case RETRO_PIXEL_FORMAT_XRGB8888: - rgb32 = true; RARCH_LOG("Environ SET_PIXEL_FORMAT: XRGB8888.\n"); break; #endif @@ -438,7 +439,7 @@ static bool environment_cb(unsigned cmd, void *data) return false; } - g_extern.system.rgb32 = rgb32; + g_extern.system.pix_fmt = pix_fmt; break; } diff --git a/general.h b/general.h index 66ab6775d5..822e70acba 100644 --- a/general.h +++ b/general.h @@ -302,7 +302,7 @@ struct global unsigned rotation; bool shutdown; unsigned performance_level; - bool rgb32; + enum retro_pixel_format pix_fmt; bool force_nonblock; diff --git a/gfx/gl.c b/gfx/gl.c index 0e722620f9..f39443e9b6 100644 --- a/gfx/gl.c +++ b/gfx/gl.c @@ -460,8 +460,8 @@ static void gl_create_fbo_textures(gl_t *gl) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filter_type); glTexImage2D(GL_TEXTURE_2D, - 0, RARCH_GL_INTERNAL_FORMAT, gl->fbo_rect[i].width, gl->fbo_rect[i].height, - 0, RARCH_GL_TEXTURE_TYPE, + 0, RARCH_GL_INTERNAL_FORMAT32, gl->fbo_rect[i].width, gl->fbo_rect[i].height, + 0, RARCH_GL_TEXTURE_TYPE32, RARCH_GL_FORMAT32, NULL); } @@ -710,8 +710,8 @@ static void gl_check_fbo_dimensions(gl_t *gl) glBindTexture(GL_TEXTURE_2D, gl->fbo_texture[i]); glTexImage2D(GL_TEXTURE_2D, - 0, RARCH_GL_INTERNAL_FORMAT, gl->fbo_rect[i].width, gl->fbo_rect[i].height, - 0, RARCH_GL_TEXTURE_TYPE, + 0, RARCH_GL_INTERNAL_FORMAT32, gl->fbo_rect[i].width, gl->fbo_rect[i].height, + 0, RARCH_GL_TEXTURE_TYPE32, RARCH_GL_FORMAT32, NULL); pglFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, gl->fbo_texture[i], 0); @@ -862,8 +862,8 @@ static void gl_update_input_size(gl_t *gl, unsigned width, unsigned height, unsi } // It is *much* faster (order of mangnitude on my setup) to use a custom SIMD-optimized conversion routine than letting GL do it :( -#if !defined(HAVE_PSGL) -static inline void gl_convert_frame_rgb15_32(gl_t *gl, void *output, const void *input, int width, int height, int in_pitch) +#if !defined(HAVE_PSGL) && !defined(HAVE_OPENGLES2) +static inline void gl_convert_frame_rgb16_32(gl_t *gl, void *output, const void *input, int width, int height, int in_pitch) { if (width != gl->scaler.in_width || height != gl->scaler.in_height) { @@ -871,7 +871,7 @@ static inline void gl_convert_frame_rgb15_32(gl_t *gl, void *output, const void gl->scaler.in_height = height; gl->scaler.out_width = width; gl->scaler.out_height = height; - gl->scaler.in_fmt = SCALER_FMT_0RGB1555; + gl->scaler.in_fmt = SCALER_FMT_RGB565; gl->scaler.out_fmt = SCALER_FMT_ARGB8888; gl->scaler.scaler_type = SCALER_TYPE_POINT; scaler_ctx_gen_filter(&gl->scaler); @@ -925,37 +925,37 @@ static void gl_init_textures(gl_t *gl) #else static inline void gl_copy_frame(gl_t *gl, const void *frame, unsigned width, unsigned height, unsigned pitch) { - if (gl->base_size == 2) // ARGB1555 => ARGB8888, SIMD-style :D +#ifdef HAVE_OPENGLES2 + // No GL_UNPACK_ROW_LENGTH ;( + unsigned pitch_width = pitch / gl->base_size; + if (width == pitch_width) // Happy path :D { - glPixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(width * sizeof(uint32_t))); // Always use 32-bit textures. - gl_convert_frame_rgb15_32(gl, gl->conv_buffer, frame, width, height, pitch); + glTexSubImage2D(GL_TEXTURE_2D, + 0, 0, 0, width, height, gl->texture_type, + gl->texture_fmt, frame); + } + else // Slower path. + { + const uint8_t *src = (const uint8_t*)frame; + for (unsigned h = 0; h < height; h++, src += pitch) + { + glTexSubImage2D(GL_TEXTURE_2D, + 0, 0, h, width, 1, gl->texture_type, + gl->texture_fmt, src); + } + } +#else + glPixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(pitch)); + if (gl->base_size == 2) + { + // Always use 32-bit textures on desktop GL. + gl_convert_frame_rgb16_32(gl, gl->conv_buffer, frame, width, height, pitch); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, gl->texture_type, gl->texture_fmt, gl->conv_buffer); } else { -#ifdef HAVE_OPENGLES2 - // No GL_UNPACK_ROW_LENGTH ;( - unsigned pitch_width = pitch / gl->base_size; - if (width == pitch_width) // Happy path :D - { - glTexSubImage2D(GL_TEXTURE_2D, - 0, 0, 0, width, height, gl->texture_type, - gl->texture_fmt, frame); - } - else // Probably slower path. - { - const uint32_t *src = (const uint32_t*)frame; - for (unsigned h = 0; h < height; h++, src += pitch_width) - { - glTexSubImage2D(GL_TEXTURE_2D, - 0, 0, h, width, 1, gl->texture_type, - gl->texture_fmt, src); - } - } -#else - glPixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(pitch)); glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / gl->base_size); glTexSubImage2D(GL_TEXTURE_2D, @@ -963,8 +963,8 @@ static inline void gl_copy_frame(gl_t *gl, const void *frame, unsigned width, un gl->texture_fmt, frame); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); -#endif } +#endif } static void gl_init_textures(gl_t *gl) @@ -980,7 +980,7 @@ static void gl_init_textures(gl_t *gl) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, gl->tex_filter); glTexImage2D(GL_TEXTURE_2D, - 0, RARCH_GL_INTERNAL_FORMAT, gl->tex_w, gl->tex_h, 0, gl->texture_type, + 0, gl->internal_fmt, gl->tex_w, gl->tex_h, 0, gl->texture_type, gl->texture_fmt, gl->empty_buf ? gl->empty_buf : NULL); } glBindTexture(GL_TEXTURE_2D, gl->texture[gl->tex_index]); @@ -1305,7 +1305,8 @@ static void *gl_init(const video_info_t *video, const input_driver_t **input, vo else gl->tex_filter = video->smooth ? GL_LINEAR : GL_NEAREST; - gl->texture_type = RARCH_GL_TEXTURE_TYPE; + gl->internal_fmt = video->rgb32 ? RARCH_GL_INTERNAL_FORMAT32 : RARCH_GL_INTERNAL_FORMAT16; + gl->texture_type = video->rgb32 ? RARCH_GL_TEXTURE_TYPE32 : RARCH_GL_TEXTURE_TYPE16; gl->texture_fmt = video->rgb32 ? RARCH_GL_FORMAT32 : RARCH_GL_FORMAT16; gl->base_size = video->rgb32 ? sizeof(uint32_t) : sizeof(uint16_t); diff --git a/gfx/gl_common.h b/gfx/gl_common.h index d809a72689..d782f0ccd1 100644 --- a/gfx/gl_common.h +++ b/gfx/gl_common.h @@ -210,7 +210,8 @@ typedef struct gl struct gl_coords coords; GLuint pbo; - GLenum texture_type; // XBGR1555 or ARGB + GLenum internal_fmt; + GLenum texture_type; // RGB565 or ARGB GLenum texture_fmt; GLenum border_type; unsigned base_size; // 2 or 4 @@ -245,19 +246,25 @@ extern PFNGLACTIVETEXTUREPROC pglActiveTexture; #endif #if defined(HAVE_PSGL) -#define RARCH_GL_INTERNAL_FORMAT GL_ARGB_SCE -#define RARCH_GL_TEXTURE_TYPE GL_BGRA +#define RARCH_GL_INTERNAL_FORMAT32 GL_ARGB_SCE +#define RARCH_GL_INTERNAL_FORMAT16 GL_ARGB_SCE +#define RARCH_GL_TEXTURE_TYPE32 GL_BGRA +#define RARCH_GL_TEXTURE_TYPE16 GL_BGRA #define RARCH_GL_FORMAT32 GL_UNSIGNED_INT_8_8_8_8_REV #define RARCH_GL_FORMAT16 GL_RGB5_A1 #elif defined(HAVE_OPENGLES) -#define RARCH_GL_INTERNAL_FORMAT GL_BGRA_EXT -#define RARCH_GL_TEXTURE_TYPE GL_BGRA_EXT +#define RARCH_GL_INTERNAL_FORMAT32 GL_BGRA_EXT +#define RARCH_GL_INTERNAL_FORMAT16 GL_RGB +#define RARCH_GL_TEXTURE_TYPE32 GL_BGRA_EXT +#define RARCH_GL_TEXTURE_TYPE16 GL_RGB #define RARCH_GL_FORMAT32 GL_UNSIGNED_BYTE -// 15-bit is converted to 32-bit directly as we have to convert anyways. -#define RARCH_GL_FORMAT16 GL_UNSIGNED_BYTE +#define RARCH_GL_FORMAT16 GL_UNSIGNED_SHORT_5_6_5 #else -#define RARCH_GL_INTERNAL_FORMAT GL_RGBA -#define RARCH_GL_TEXTURE_TYPE GL_BGRA +// On desktop, we always use 32-bit. +#define RARCH_GL_INTERNAL_FORMAT32 GL_RGBA +#define RARCH_GL_INTERNAL_FORMAT16 GL_RGBA +#define RARCH_GL_TEXTURE_TYPE32 GL_BGRA +#define RARCH_GL_TEXTURE_TYPE16 GL_BGRA #define RARCH_GL_FORMAT32 GL_UNSIGNED_INT_8_8_8_8_REV #define RARCH_GL_FORMAT16 GL_UNSIGNED_INT_8_8_8_8_REV #endif diff --git a/gfx/scaler/pixconv.c b/gfx/scaler/pixconv.c index 33cfc1f00b..5318b9ef80 100644 --- a/gfx/scaler/pixconv.c +++ b/gfx/scaler/pixconv.c @@ -27,6 +27,64 @@ #include #endif +#if defined(__SSE2__) +void conv_0rgb1555_rgb565(void *output_, const void *input_, + int width, int height, + int out_stride, int in_stride) +{ + const uint16_t *input = (const uint16_t*)input_; + uint16_t *output = (uint16_t*)output_; + + int max_width = width - 7; + + const __m128i hi_mask = _mm_set1_epi16((int16_t)((0x1f << 11) | (0x1f << 6))); + const __m128i lo_mask = _mm_set1_epi16(0x1f); + const __m128i glow_mask = _mm_set1_epi16(1 << 5); + + for (int h = 0; h < height; h++, output += out_stride >> 1, input += in_stride >> 1) + { + int w; + for (w = 0; w < max_width; w += 8) + { + const __m128i in = _mm_loadu_si128((const __m128i*)(input + w)); + __m128i rg = _mm_and_si128(_mm_slli_epi16(in, 1), hi_mask); + __m128i b = _mm_and_si128(in, lo_mask); + __m128i glow = _mm_and_si128(_mm_srli_epi16(in, 4), glow_mask); + _mm_storeu_si128((__m128i*)(output + w), _mm_or_si128(rg, _mm_or_si128(b, glow))); + } + + for (; w < width; w++) + { + uint16_t col = input[w]; + uint16_t rg = (col << 1) & ((0x1f << 11) | (0x1f << 6)); + uint16_t b = col & 0x1f; + uint16_t glow = (col >> 4) & (1 << 5); + output[w] = rg | b | glow; + } + } +} +#else +void conv_0rgb1555_rgb565(void *output_, const void *input_, + int width, int height, + int out_stride, int in_stride) +{ + const uint16_t *input = (const uint16_t*)input_; + uint16_t *output = (uint16_t*)output_; + + for (int h = 0; h < height; h++, output += out_stride >> 1, input += in_stride >> 1) + { + for (int w = 0; w < width; w++) + { + uint16_t col = input[w]; + uint16_t rg = (col << 1) & ((0x1f << 11) | (0x1f << 6)); + uint16_t b = col & 0x1f; + uint16_t glow = (col >> 4) & (1 << 5); + output[w] = rg | b | glow; + } + } +} +#endif + #if defined(__SSE2__) void conv_0rgb1555_argb8888(void *output_, const void *input_, int width, int height, @@ -109,6 +167,90 @@ void conv_0rgb1555_argb8888(void *output_, const void *input_, } #endif +#if defined(__SSE2__) +void conv_rgb565_argb8888(void *output_, const void *input_, + int width, int height, + int out_stride, int in_stride) +{ + const uint16_t *input = (const uint16_t*)input_; + uint32_t *output = (uint32_t*)output_; + + const __m128i pix_mask_r = _mm_set1_epi16(0x1f << 10); + const __m128i pix_mask_g = _mm_set1_epi16(0x3f << 5); + const __m128i pix_mask_b = _mm_set1_epi16(0x1f << 5); + const __m128i mul16_r = _mm_set1_epi16(0x0210); + const __m128i mul16_g = _mm_set1_epi16(0x2080); + const __m128i mul16_b = _mm_set1_epi16(0x4200); + const __m128i a = _mm_set1_epi16(0x00ff); + + int max_width = width - 7; + + for (int h = 0; h < height; h++, output += out_stride >> 2, input += in_stride >> 1) + { + int w; + for (w = 0; w < max_width; w += 8) + { + const __m128i in = _mm_loadu_si128((const __m128i*)(input + w)); + __m128i r = _mm_and_si128(_mm_srli_epi16(in, 1), pix_mask_r); + __m128i g = _mm_and_si128(in, pix_mask_g); + __m128i b = _mm_and_si128(_mm_slli_epi16(in, 5), pix_mask_b); + + r = _mm_mulhi_epi16(r, mul16_r); + g = _mm_mulhi_epi16(g, mul16_g); + b = _mm_mulhi_epi16(b, mul16_b); + + __m128i res_lo_bg = _mm_unpacklo_epi8(b, g); + __m128i res_hi_bg = _mm_unpackhi_epi8(b, g); + __m128i res_lo_ra = _mm_unpacklo_epi8(r, a); + __m128i res_hi_ra = _mm_unpackhi_epi8(r, a); + + __m128i res_lo = _mm_or_si128(res_lo_bg, _mm_slli_si128(res_lo_ra, 2)); + __m128i res_hi = _mm_or_si128(res_hi_bg, _mm_slli_si128(res_hi_ra, 2)); + + _mm_storeu_si128((__m128i*)(output + w + 0), res_lo); + _mm_storeu_si128((__m128i*)(output + w + 4), res_hi); + } + + for (; w < width; w++) + { + uint32_t col = input[w]; + uint32_t r = (col >> 11) & 0x1f; + uint32_t g = (col >> 5) & 0x3f; + uint32_t b = (col >> 0) & 0x1f; + r = (r << 3) | (r >> 2); + g = (g << 2) | (g >> 4); + b = (b << 3) | (b >> 2); + + output[w] = (0xff << 24) | (r << 16) | (g << 8) | (b << 0); + } + } +} +#else +void conv_rgb565_argb8888(void *output_, const void *input_, + int width, int height, + int out_stride, int in_stride) +{ + const uint16_t *input = (const uint16_t*)input_; + uint32_t *output = (uint32_t*)output_; + + for (int h = 0; h < height; h++, output += out_stride >> 2, input += in_stride >> 1) + { + for (int w = 0; w < width; w++) + { + uint32_t col = input[w]; + uint32_t r = (col >> 11) & 0x1f; + uint32_t g = (col >> 5) & 0x3f; + uint32_t b = (col >> 0) & 0x1f; + r = (r << 3) | (r >> 2); + g = (g << 2) | (g >> 4); + b = (b << 3) | (b >> 2); + + output[w] = (0xff << 24) | (r << 16) | (g << 8) | (b << 0); + } + } +} +#endif + #if defined(__SSE2__) // :( TODO: Make this saner. static inline void store_bgr24_sse2(void *output, __m128i a, __m128i b, __m128i c, __m128i d) @@ -223,6 +365,80 @@ void conv_0rgb1555_bgr24(void *output_, const void *input_, } } } + +void conv_rgb565_bgr24(void *output_, const void *input_, + int width, int height, + int out_stride, int in_stride) +{ + const uint16_t *input = (const uint16_t*)input_; + uint8_t *output = (uint8_t*)output_; + + const __m128i pix_mask_r = _mm_set1_epi16(0x1f << 10); + const __m128i pix_mask_g = _mm_set1_epi16(0x3f << 5); + const __m128i pix_mask_b = _mm_set1_epi16(0x1f << 5); + const __m128i mul16_r = _mm_set1_epi16(0x0210); + const __m128i mul16_g = _mm_set1_epi16(0x2080); + const __m128i mul16_b = _mm_set1_epi16(0x4200); + const __m128i a = _mm_set1_epi16(0x00ff); + + int max_width = width - 15; + + for (int h = 0; h < height; h++, output += out_stride, input += in_stride >> 1) + { + uint8_t *out = output; + + int w; + for (w = 0; w < max_width; w += 16, out += 48) + { + const __m128i in0 = _mm_loadu_si128((const __m128i*)(input + w)); + const __m128i in1 = _mm_loadu_si128((const __m128i*)(input + w + 8)); + __m128i r0 = _mm_and_si128(_mm_srli_epi16(in0, 1), pix_mask_r); + __m128i g0 = _mm_and_si128(in0, pix_mask_g); + __m128i b0 = _mm_and_si128(_mm_slli_epi16(in0, 5), pix_mask_b); + __m128i r1 = _mm_and_si128(_mm_srli_epi16(in1, 1), pix_mask_r); + __m128i g1 = _mm_and_si128(in1, pix_mask_g); + __m128i b1 = _mm_and_si128(_mm_slli_epi16(in1, 5), pix_mask_b); + + r0 = _mm_mulhi_epi16(r0, mul16_r); + g0 = _mm_mulhi_epi16(g0, mul16_g); + b0 = _mm_mulhi_epi16(b0, mul16_b); + r1 = _mm_mulhi_epi16(r1, mul16_r); + g1 = _mm_mulhi_epi16(g1, mul16_g); + b1 = _mm_mulhi_epi16(b1, mul16_b); + + __m128i res_lo_bg0 = _mm_unpacklo_epi8(b0, g0); + __m128i res_hi_bg0 = _mm_unpackhi_epi8(b0, g0); + __m128i res_lo_ra0 = _mm_unpacklo_epi8(r0, a); + __m128i res_hi_ra0 = _mm_unpackhi_epi8(r0, a); + __m128i res_lo_bg1 = _mm_unpacklo_epi8(b1, g1); + __m128i res_hi_bg1 = _mm_unpackhi_epi8(b1, g1); + __m128i res_lo_ra1 = _mm_unpacklo_epi8(r1, a); + __m128i res_hi_ra1 = _mm_unpackhi_epi8(r1, a); + + __m128i res_lo0 = _mm_or_si128(res_lo_bg0, _mm_slli_si128(res_lo_ra0, 2)); + __m128i res_hi0 = _mm_or_si128(res_hi_bg0, _mm_slli_si128(res_hi_ra0, 2)); + __m128i res_lo1 = _mm_or_si128(res_lo_bg1, _mm_slli_si128(res_lo_ra1, 2)); + __m128i res_hi1 = _mm_or_si128(res_hi_bg1, _mm_slli_si128(res_hi_ra1, 2)); + + store_bgr24_sse2(out, res_lo0, res_hi0, res_lo1, res_hi1); + } + + for (; w < width; w++) + { + uint32_t col = input[w]; + uint32_t r = (col >> 11) & 0x1f; + uint32_t g = (col >> 5) & 0x3f; + uint32_t b = (col >> 0) & 0x1f; + r = (r << 3) | (r >> 2); + g = (g << 2) | (g >> 4); + b = (b << 3) | (b >> 2); + + *out++ = b; + *out++ = g; + *out++ = r; + } + } +} #else void conv_0rgb1555_bgr24(void *output_, const void *input_, int width, int height, @@ -250,6 +466,33 @@ void conv_0rgb1555_bgr24(void *output_, const void *input_, } } } + +void conv_rgb565_bgr24(void *output_, const void *input_, + int width, int height, + int out_stride, int in_stride) +{ + const uint16_t *input = (const uint16_t*)input_; + uint8_t *output = (uint8_t*)output_; + + for (int h = 0; h < height; h++, output += out_stride, input += in_stride >> 1) + { + uint8_t *out = output; + for (int w = 0; w < width; w++) + { + uint32_t col = input[w]; + uint32_t b = (col >> 0) & 0x1f; + uint32_t g = (col >> 5) & 0x3f; + uint32_t r = (col >> 11) & 0x1f; + b = (b << 3) | (b >> 2); + g = (g << 2) | (g >> 4); + r = (r << 3) | (r >> 2); + + *out++ = b; + *out++ = g; + *out++ = r; + } + } +} #endif void conv_bgr24_argb8888(void *output_, const void *input_, diff --git a/gfx/scaler/pixconv.h b/gfx/scaler/pixconv.h index d27608704d..9cdb7d9182 100644 --- a/gfx/scaler/pixconv.h +++ b/gfx/scaler/pixconv.h @@ -20,6 +20,14 @@ void conv_0rgb1555_argb8888(void *output, const void *input, int width, int height, int out_stride, int in_stride); +void conv_0rgb1555_rgb565(void *output, const void *input, + int width, int height, + int out_stride, int in_stride); + +void conv_rgb565_argb8888(void *output, const void *input, + int width, int height, + int out_stride, int in_stride); + void conv_bgr24_argb8888(void *output, const void *input, int width, int height, int out_stride, int in_stride); @@ -28,6 +36,10 @@ void conv_argb8888_0rgb1555(void *output, const void *input, int width, int height, int out_stride, int in_stride); +void conv_argb8888_rgb565(void *output, const void *input, + int width, int height, + int out_stride, int in_stride); + void conv_argb8888_bgr24(void *output, const void *input, int width, int height, int out_stride, int in_stride); @@ -36,6 +48,10 @@ void conv_0rgb1555_bgr24(void *output, const void *input, int width, int height, int out_stride, int in_stride); +void conv_rgb565_bgr24(void *output, const void *input, + int width, int height, + int out_stride, int in_stride); + void conv_copy(void *output, const void *input, int width, int height, int out_stride, int in_stride); diff --git a/gfx/scaler/scaler.c b/gfx/scaler/scaler.c index cdc5148593..0ea0f7ffc2 100644 --- a/gfx/scaler/scaler.c +++ b/gfx/scaler/scaler.c @@ -68,6 +68,12 @@ static bool set_direct_pix_conv(struct scaler_ctx *ctx) ctx->direct_pixconv = conv_copy; else if (ctx->in_fmt == SCALER_FMT_0RGB1555 && ctx->out_fmt == SCALER_FMT_ARGB8888) ctx->direct_pixconv = conv_0rgb1555_argb8888; + else if (ctx->in_fmt == SCALER_FMT_RGB565 && ctx->out_fmt == SCALER_FMT_ARGB8888) + ctx->direct_pixconv = conv_rgb565_argb8888; + else if (ctx->in_fmt == SCALER_FMT_RGB565 && ctx->out_fmt == SCALER_FMT_BGR24) + ctx->direct_pixconv = conv_rgb565_bgr24; + else if (ctx->in_fmt == SCALER_FMT_0RGB1555 && ctx->out_fmt == SCALER_FMT_RGB565) + ctx->direct_pixconv = conv_0rgb1555_rgb565; else if (ctx->in_fmt == SCALER_FMT_BGR24 && ctx->out_fmt == SCALER_FMT_ARGB8888) ctx->direct_pixconv = conv_bgr24_argb8888; else if (ctx->in_fmt == SCALER_FMT_ARGB8888 && ctx->out_fmt == SCALER_FMT_0RGB1555) @@ -76,6 +82,8 @@ static bool set_direct_pix_conv(struct scaler_ctx *ctx) ctx->direct_pixconv = conv_argb8888_bgr24; else if (ctx->in_fmt == SCALER_FMT_0RGB1555 && ctx->out_fmt == SCALER_FMT_BGR24) ctx->direct_pixconv = conv_0rgb1555_bgr24; + else if (ctx->in_fmt == SCALER_FMT_RGB565 && ctx->out_fmt == SCALER_FMT_BGR24) + ctx->direct_pixconv = conv_rgb565_bgr24; else return false; @@ -94,6 +102,10 @@ static bool set_pix_conv(struct scaler_ctx *ctx) ctx->in_pixconv = conv_0rgb1555_argb8888; break; + case SCALER_FMT_RGB565: + ctx->in_pixconv = conv_rgb565_argb8888; + break; + case SCALER_FMT_BGR24: ctx->in_pixconv = conv_bgr24_argb8888; break; @@ -160,14 +172,6 @@ bool scaler_ctx_gen_filter(struct scaler_ctx *ctx) void scaler_ctx_gen_reset(struct scaler_ctx *ctx) { -#ifdef SCALER_PERF - if (ctx->elapsed_frames) - fprintf(stderr, "[Scaler]: ms / frame: %.3f\n", ctx->elapsed_time_ms / ctx->elapsed_frames); - - ctx->elapsed_time_ms = 0.0; - ctx->elapsed_frames = 0; -#endif - scaler_free(ctx->horiz.filter); scaler_free(ctx->horiz.filter_pos); scaler_free(ctx->vert.filter); diff --git a/gfx/scaler/scaler.h b/gfx/scaler/scaler.h index ffeca48a46..1b6f691e0f 100644 --- a/gfx/scaler/scaler.h +++ b/gfx/scaler/scaler.h @@ -26,6 +26,7 @@ enum scaler_pix_fmt { SCALER_FMT_ARGB8888 = 0, SCALER_FMT_0RGB1555, + SCALER_FMT_RGB565, SCALER_FMT_BGR24 }; diff --git a/gfx/shader_cg.c b/gfx/shader_cg.c index 5018cbcaac..1c0272050f 100644 --- a/gfx/shader_cg.c +++ b/gfx/shader_cg.c @@ -521,8 +521,8 @@ static void load_texture_data(GLuint *obj, const struct texture_image *img, bool glPixelStorei(GL_UNPACK_ALIGNMENT, 4); #endif glTexImage2D(GL_TEXTURE_2D, - 0, RARCH_GL_INTERNAL_FORMAT, img->width, img->height, - 0, RARCH_GL_TEXTURE_TYPE, RARCH_GL_FORMAT32, img->pixels); + 0, RARCH_GL_INTERNAL_FORMAT32, img->width, img->height, + 0, RARCH_GL_TEXTURE_TYPE32, RARCH_GL_FORMAT32, img->pixels); free(img->pixels); } diff --git a/gfx/shader_glsl.c b/gfx/shader_glsl.c index 0212419c58..f7bd4cf8ec 100644 --- a/gfx/shader_glsl.c +++ b/gfx/shader_glsl.c @@ -433,8 +433,8 @@ static bool get_texture_image(const char *shader_path, xmlNodePtr ptr) glPixelStorei(GL_UNPACK_ALIGNMENT, 4); glTexImage2D(GL_TEXTURE_2D, - 0, RARCH_GL_INTERNAL_FORMAT, - img.width, img.height, 0, RARCH_GL_TEXTURE_TYPE, RARCH_GL_FORMAT32, img.pixels); + 0, RARCH_GL_INTERNAL_FORMAT32, + img.width, img.height, 0, RARCH_GL_TEXTURE_TYPE32, RARCH_GL_FORMAT32, img.pixels); pglActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, 0); diff --git a/libretro.h b/libretro.h index c2b193c4a7..57d12a4606 100755 --- a/libretro.h +++ b/libretro.h @@ -355,6 +355,7 @@ enum retro_key // const enum retro_pixel_format * -- // Sets the internal pixel format used by the implementation. // The default pixel format is RETRO_PIXEL_FORMAT_0RGB1555. + // This pixel format however, is deprecated (see enum retro_pixel_format). // If the call returns false, the frontend does not support this pixel format. // This function should be called inside retro_load_game() or retro_get_system_av_info(). // @@ -368,8 +369,18 @@ enum retro_key enum retro_pixel_format { - RETRO_PIXEL_FORMAT_0RGB1555 = 0, // 0RGB1555, native endian. 0 bit must be set to 0. - RETRO_PIXEL_FORMAT_XRGB8888 // XRGB8888, native endian. X bits are ignored. + // 0RGB1555, native endian. 0 bit must be set to 0. + // This pixel format is default for compatibility concerns only. + // If a 15/16-bit pixel format is desired, consider using RGB565. + RETRO_PIXEL_FORMAT_0RGB1555 = 0, + + // XRGB8888, native endian. X bits are ignored. + RETRO_PIXEL_FORMAT_XRGB8888 = 1, + + // RGB565, native endian. This pixel format is the recommended format to use if a 15/16-bit format is desired + // as it is the pixel format that is typically available on a wide range of low-power devices. + // It is also natively supported in APIs like OpenGL ES. + RETRO_PIXEL_FORMAT_RGB565 = 2 }; struct retro_message @@ -465,6 +476,8 @@ typedef bool (*retro_environment_t)(unsigned cmd, void *data); // Render a frame. Pixel format is 15-bit 0RGB1555 native endian unless changed (see RETRO_ENVIRONMENT_SET_PIXEL_FORMAT). // Width and height specify dimensions of buffer. // Pitch specifices length in bytes between two lines in buffer. +// For performance reasons, it is highly recommended to have a frame that is packed in memory, i.e. pitch == width * byte_per_pixel. +// Certain graphic APIs, such as OpenGL ES, do not like textures that are not packed in memory. typedef void (*retro_video_refresh_t)(const void *data, unsigned width, unsigned height, size_t pitch); // Renders a single audio frame. Should only be used if implementation generates a single sample at a time. diff --git a/record/ffemu.c b/record/ffemu.c index 36f4b6968c..3e9477e3b7 100644 --- a/record/ffemu.c +++ b/record/ffemu.c @@ -175,8 +175,8 @@ static bool ffemu_init_video(struct ff_video_info *video, const struct ffemu_par switch (param->pix_fmt) { - case FFEMU_PIX_XRGB1555: - video->scaler.in_fmt = SCALER_FMT_0RGB1555; + case FFEMU_PIX_RGB565: + video->scaler.in_fmt = SCALER_FMT_RGB565; video->pix_size = 2; break; diff --git a/record/ffemu.h b/record/ffemu.h index f5fe0d9271..aff204c373 100644 --- a/record/ffemu.h +++ b/record/ffemu.h @@ -25,7 +25,7 @@ extern "C" { enum ffemu_pix_format { - FFEMU_PIX_XRGB1555 = 0, + FFEMU_PIX_RGB565 = 0, FFEMU_PIX_BGR24, FFEMU_PIX_ARGB8888 }; diff --git a/retroarch.c b/retroarch.c index 01bc2628b3..467e134bb0 100644 --- a/retroarch.c +++ b/retroarch.c @@ -24,6 +24,7 @@ #include "file.h" #include "general.h" #include "dynamic.h" +#include "benchmark.h" #include "audio/utils.h" #include "record/ffemu.h" #include "rewind.h" @@ -252,6 +253,24 @@ static void video_frame(const void *data, unsigned width, unsigned height, size_ return; #endif + if (g_extern.system.pix_fmt == RETRO_PIXEL_FORMAT_0RGB1555) + { + RARCH_PERFORMANCE_INIT(video_frame_conv); + RARCH_PERFORMANCE_START(video_frame_conv); + driver.scaler.in_width = width; + driver.scaler.in_height = height; + driver.scaler.out_width = width; + driver.scaler.out_height = height; + driver.scaler.in_stride = pitch; + driver.scaler.out_stride = width * sizeof(uint16_t); + + scaler_ctx_scale(&driver.scaler, driver.scaler_out, data); + data = driver.scaler_out; + pitch = driver.scaler.out_stride; + RARCH_PERFORMANCE_STOP(video_frame_conv); + RARCH_PERFORMANCE_LOG("video_frame_conv()", video_frame_conv); + } + // Slightly messy code, // but we really need to do processing before blocking on VSync for best possible scheduling. #ifdef HAVE_FFMPEG @@ -1228,7 +1247,7 @@ static void init_recording(void) params.filename = g_extern.record_path; params.fps = fps; params.samplerate = samplerate; - params.pix_fmt = g_extern.system.rgb32 ? FFEMU_PIX_ARGB8888 : FFEMU_PIX_XRGB1555; + params.pix_fmt = g_extern.system.pix_fmt == RETRO_PIXEL_FORMAT_XRGB8888 ? FFEMU_PIX_ARGB8888 : FFEMU_PIX_RGB565; if (g_settings.video.gpu_record && driver.video->read_viewport) { diff --git a/screenshot.c b/screenshot.c index beb56e4811..be5cca4010 100644 --- a/screenshot.c +++ b/screenshot.c @@ -142,22 +142,35 @@ static void dump_line_16(uint8_t *line, const uint16_t *src, unsigned width) { uint16_t pixel = *src++; uint8_t b = (pixel >> 0) & 0x1f; - uint8_t g = (pixel >> 5) & 0x1f; - uint8_t r = (pixel >> 10) & 0x1f; + uint8_t g = (pixel >> 5) & 0x3f; + uint8_t r = (pixel >> 11) & 0x1f; *line++ = (b << 3) | (b >> 2); - *line++ = (g << 3) | (g >> 2); + *line++ = (g << 2) | (g >> 4); *line++ = (r << 3) | (r >> 2); } } +static void dump_line_32(uint8_t *line, const uint32_t *src, unsigned width) +{ + for (unsigned i = 0; i < width; i++) + { + uint32_t pixel = *src++; + *line++ = (pixel >> 0) & 0xff; + *line++ = (pixel >> 8) & 0xff; + *line++ = (pixel >> 16) & 0xff; + } +} + static void dump_content(FILE *file, const void *frame, int width, int height, int pitch, bool bgr24) { - const uint8_t *frame_bgr = (const uint8_t*)frame; - const uint16_t *frame16 = (const uint16_t*)frame; - - if (!bgr24) - pitch /= sizeof(uint16_t); + union + { + const uint8_t *u8; + const uint16_t *u16; + const uint32_t *u32; + } u; + u.u8 = (const uint8_t*)frame; uint8_t **lines = (uint8_t**)calloc(height, sizeof(uint8_t*)); if (!lines) @@ -174,13 +187,18 @@ static void dump_content(FILE *file, const void *frame, if (bgr24) // BGR24 byte order. Can directly copy. { - for (int j = 0; j < height; j++, frame_bgr += pitch) - dump_line_bgr(lines[j], frame_bgr, width); + for (int j = 0; j < height; j++, u.u8 += pitch) + dump_line_bgr(lines[j], u.u8, width); } - else // ARGB1555 + else if (g_extern.system.pix_fmt == RETRO_PIXEL_FORMAT_XRGB8888) { - for (int j = 0; j < height; j++, frame16 += pitch) - dump_line_16(lines[j], frame16, width); + for (int j = 0; j < height; j++, u.u8 += pitch) + dump_line_32(lines[j], u.u32, width); + } + else // RGB565 + { + for (int j = 0; j < height; j++, u.u8 += pitch) + dump_line_16(lines[j], u.u16, width); } #ifdef HAVE_LIBPNG From be491bdbe9602d43351e4de75d438f25b2a83e4e Mon Sep 17 00:00:00 2001 From: Themaister Date: Sat, 20 Oct 2012 11:40:32 +0200 Subject: [PATCH 2/7] Use RGB565 in libretro-test. --- libretro-test/libretro-test.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/libretro-test/libretro-test.c b/libretro-test/libretro-test.c index 9c035c8be3..f589aca059 100644 --- a/libretro-test/libretro-test.c +++ b/libretro-test/libretro-test.c @@ -146,8 +146,8 @@ static void update_input(void) static void render_checkered(void) { - uint16_t color_r = 31 << 10; - uint16_t color_g = 31 << 5; + uint16_t color_r = 31 << 11; + uint16_t color_g = 63 << 5; uint16_t *line = frame_buf; for (unsigned y = 0; y < 240; y++, line += 320) @@ -193,6 +193,13 @@ bool retro_load_game(const struct retro_game_info *info) environ_cb(RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS, desc); + enum retro_pixel_format fmt = RETRO_PIXEL_FORMAT_RGB565; + if (!environ_cb(RETRO_ENVIRONMENT_SET_PIXEL_FORMAT, &fmt)) + { + fprintf(stderr, "RGB565 is not supported.\n"); + return false; + } + (void)info; return true; } From 2145d1c4e22b20a608db232a93798315f5f5245e Mon Sep 17 00:00:00 2001 From: Themaister Date: Sat, 20 Oct 2012 11:22:10 +0100 Subject: [PATCH 3/7] Use RGB565 in VC context. --- gfx/context/vc_egl_ctx.c | 4 ++-- gfx/gl.c | 23 ++++++++++++++--------- gfx/gl_common.h | 4 +--- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/gfx/context/vc_egl_ctx.c b/gfx/context/vc_egl_ctx.c index 7c7ab00255..4f6bfa5b7d 100644 --- a/gfx/context/vc_egl_ctx.c +++ b/gfx/context/vc_egl_ctx.c @@ -445,12 +445,12 @@ static bool gfx_ctx_write_egl_image(const void *frame, unsigned width, unsigned if (!eglBuffer[index] || !g_egl_vgimage[index]) { - g_egl_vgimage[index] = vgCreateImage(VG_sXRGB_8888, g_egl_res, g_egl_res, g_smooth ? VG_IMAGE_QUALITY_BETTER : VG_IMAGE_QUALITY_NONANTIALIASED); + g_egl_vgimage[index] = vgCreateImage(rgb32 ? VG_sXRGB_8888 : VG_sRGB_565, g_egl_res, g_egl_res, VG_IMAGE_QUALITY_NONANTIALIASED); eglBuffer[index] = peglCreateImageKHR(g_egl_dpy, g_eglimage_ctx, EGL_VG_PARENT_IMAGE_KHR, (EGLClientBuffer)g_egl_vgimage[index], NULL); ret = true; } - vgImageSubData(g_egl_vgimage[index], frame, pitch, (rgb32 ? VG_sXRGB_8888 : VG_sARGB_1555), 0, 0, width, height); + vgImageSubData(g_egl_vgimage[index], frame, pitch, (rgb32 ? VG_sXRGB_8888 : VG_sRGB_565), 0, 0, width, height); *image_handle = eglBuffer[index]; gfx_ctx_bind_api(g_api); diff --git a/gfx/gl.c b/gfx/gl.c index 1c6e2364c6..b93b5e5b3b 100644 --- a/gfx/gl.c +++ b/gfx/gl.c @@ -995,8 +995,14 @@ static inline void gl_copy_frame(gl_t *gl, const void *frame, unsigned width, un #endif } -static void gl_init_textures(gl_t *gl) +static void gl_init_textures(gl_t *gl, const video_info_t *video) { +#if defined(HAVE_EGL) && defined(HAVE_OPENGLES2) + gl->egl_images = load_eglimage_proc(gl) && gl->ctx_driver->init_egl_image_buffer(video); +#else + (void)video; +#endif + glGenTextures(TEXTURES, gl->texture); for (unsigned i = 0; i < TEXTURES; i++) { @@ -1007,9 +1013,12 @@ static void gl_init_textures(gl_t *gl) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, gl->tex_filter); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, gl->tex_filter); - glTexImage2D(GL_TEXTURE_2D, - 0, gl->internal_fmt, gl->tex_w, gl->tex_h, 0, gl->texture_type, - gl->texture_fmt, gl->empty_buf ? gl->empty_buf : NULL); + if (!gl->egl_images) + { + glTexImage2D(GL_TEXTURE_2D, + 0, gl->internal_fmt, gl->tex_w, gl->tex_h, 0, gl->texture_type, + gl->texture_fmt, gl->empty_buf ? gl->empty_buf : NULL); + } } glBindTexture(GL_TEXTURE_2D, gl->texture[gl->tex_index]); } @@ -1381,7 +1390,7 @@ static void *gl_init(const video_info_t *video, const input_driver_t **input, vo } #endif - gl_init_textures(gl); + gl_init_textures(gl, video); for (unsigned i = 0; i < TEXTURES; i++) { @@ -1409,10 +1418,6 @@ static void *gl_init(const video_info_t *video, const input_driver_t **input, vo return NULL; } -#ifdef HAVE_EGL - gl->egl_images = load_eglimage_proc(gl) && gl->ctx_driver->init_egl_image_buffer(video); -#endif - return gl; } diff --git a/gfx/gl_common.h b/gfx/gl_common.h index 87f60f8742..220e722a72 100644 --- a/gfx/gl_common.h +++ b/gfx/gl_common.h @@ -158,7 +158,7 @@ struct gl_coords #define MAX_SHADERS 16 -#if defined(HAVE_GLSL) || defined(HAVE_CG) +#if (defined(HAVE_GLSL) || defined(HAVE_CG)) #define TEXTURES 8 #else #define TEXTURES 1 @@ -238,9 +238,7 @@ typedef struct gl GLuint menu_texture_id; #endif -#ifdef HAVE_EGL bool egl_images; -#endif } gl_t; // Windows ... <_< From 461cf0fef3c3435c6b800c71cbad81ce96737f3b Mon Sep 17 00:00:00 2001 From: Themaister Date: Sun, 21 Oct 2012 02:58:51 +0200 Subject: [PATCH 4/7] Pre-cache uniform locations in GLSL. --- gfx/shader_glsl.c | 335 ++++++++++++++++++++++++++++------------------ 1 file changed, 206 insertions(+), 129 deletions(-) diff --git a/gfx/shader_glsl.c b/gfx/shader_glsl.c index f7bd4cf8ec..619c182ceb 100644 --- a/gfx/shader_glsl.c +++ b/gfx/shader_glsl.c @@ -86,31 +86,31 @@ #define pglDisableVertexAttribArray glDisableVertexAttribArray #define pglVertexAttribPointer glVertexAttribPointer #else -static PFNGLCREATEPROGRAMPROC pglCreateProgram = NULL; -static PFNGLUSEPROGRAMPROC pglUseProgram = NULL; -static PFNGLCREATESHADERPROC pglCreateShader = NULL; -static PFNGLDELETESHADERPROC pglDeleteShader = NULL; -static PFNGLSHADERSOURCEPROC pglShaderSource = NULL; -static PFNGLCOMPILESHADERPROC pglCompileShader = NULL; -static PFNGLATTACHSHADERPROC pglAttachShader = NULL; -static PFNGLDETACHSHADERPROC pglDetachShader = NULL; -static PFNGLLINKPROGRAMPROC pglLinkProgram = NULL; -static PFNGLGETUNIFORMLOCATIONPROC pglGetUniformLocation = NULL; -static PFNGLUNIFORM1IPROC pglUniform1i = NULL; -static PFNGLUNIFORM1FPROC pglUniform1f = NULL; -static PFNGLUNIFORM2FVPROC pglUniform2fv = NULL; -static PFNGLUNIFORM4FVPROC pglUniform4fv = NULL; -static PFNGLUNIFORMMATRIX4FVPROC pglUniformMatrix4fv = NULL; -static PFNGLGETSHADERIVPROC pglGetShaderiv = NULL; -static PFNGLGETSHADERINFOLOGPROC pglGetShaderInfoLog = NULL; -static PFNGLGETPROGRAMIVPROC pglGetProgramiv = NULL; -static PFNGLGETPROGRAMINFOLOGPROC pglGetProgramInfoLog = NULL; -static PFNGLDELETEPROGRAMPROC pglDeleteProgram = NULL; -static PFNGLGETATTACHEDSHADERSPROC pglGetAttachedShaders = NULL; -static PFNGLGETATTRIBLOCATIONPROC pglGetAttribLocation = NULL; -static PFNGLENABLEVERTEXATTRIBARRAYPROC pglEnableVertexAttribArray = NULL; -static PFNGLDISABLEVERTEXATTRIBARRAYPROC pglDisableVertexAttribArray = NULL; -static PFNGLVERTEXATTRIBPOINTERPROC pglVertexAttribPointer = NULL; +static PFNGLCREATEPROGRAMPROC pglCreateProgram; +static PFNGLUSEPROGRAMPROC pglUseProgram; +static PFNGLCREATESHADERPROC pglCreateShader; +static PFNGLDELETESHADERPROC pglDeleteShader; +static PFNGLSHADERSOURCEPROC pglShaderSource; +static PFNGLCOMPILESHADERPROC pglCompileShader; +static PFNGLATTACHSHADERPROC pglAttachShader; +static PFNGLDETACHSHADERPROC pglDetachShader; +static PFNGLLINKPROGRAMPROC pglLinkProgram; +static PFNGLGETUNIFORMLOCATIONPROC pglGetUniformLocation; +static PFNGLUNIFORM1IPROC pglUniform1i; +static PFNGLUNIFORM1FPROC pglUniform1f; +static PFNGLUNIFORM2FVPROC pglUniform2fv; +static PFNGLUNIFORM4FVPROC pglUniform4fv; +static PFNGLUNIFORMMATRIX4FVPROC pglUniformMatrix4fv; +static PFNGLGETSHADERIVPROC pglGetShaderiv; +static PFNGLGETSHADERINFOLOGPROC pglGetShaderInfoLog; +static PFNGLGETPROGRAMIVPROC pglGetProgramiv; +static PFNGLGETPROGRAMINFOLOGPROC pglGetProgramInfoLog; +static PFNGLDELETEPROGRAMPROC pglDeleteProgram; +static PFNGLGETATTACHEDSHADERSPROC pglGetAttachedShaders; +static PFNGLGETATTRIBLOCATIONPROC pglGetAttribLocation; +static PFNGLENABLEVERTEXATTRIBARRAYPROC pglEnableVertexAttribArray; +static PFNGLDISABLEVERTEXATTRIBARRAYPROC pglDisableVertexAttribArray; +static PFNGLVERTEXATTRIBPOINTERPROC pglVertexAttribPointer; #endif #ifdef HAVE_OPENGLES2 @@ -131,28 +131,28 @@ enum filter_type RARCH_GL_NEAREST }; -static bool glsl_enable = false; -static bool glsl_modern = false; -static GLuint gl_program[MAX_PROGRAMS] = {0}; -static enum filter_type gl_filter_type[MAX_PROGRAMS] = {RARCH_GL_NOFORCE}; +static bool glsl_enable; +static bool glsl_modern; +static GLuint gl_program[MAX_PROGRAMS]; +static enum filter_type gl_filter_type[MAX_PROGRAMS]; static struct gl_fbo_scale gl_scale[MAX_PROGRAMS]; -static unsigned gl_num_programs = 0; -static unsigned active_index = 0; +static unsigned gl_num_programs; +static unsigned active_index; static GLuint gl_teximage[MAX_TEXTURES]; -static unsigned gl_teximage_cnt = 0; +static unsigned gl_teximage_cnt; static char gl_teximage_uniforms[MAX_TEXTURES][64]; -static state_tracker_t *gl_state_tracker = NULL; +static state_tracker_t *gl_state_tracker; static struct state_tracker_uniform_info gl_tracker_info[MAX_VARIABLES]; -static unsigned gl_tracker_info_cnt = 0; +static unsigned gl_tracker_info_cnt; static char gl_tracker_script[PATH_MAX]; static char gl_tracker_script_class[64]; -static char *gl_script_program = NULL; +static char *gl_script_program; static GLint gl_attribs[PREV_TEXTURES + 1 + 4 + MAX_PROGRAMS]; -static unsigned gl_attrib_index = 0; +static unsigned gl_attrib_index; static gfx_ctx_proc_t (*glsl_get_proc_address)(const char*); @@ -172,6 +172,38 @@ struct shader_program bool valid_scale; }; +struct shader_uniforms_frame +{ + int texture; + int input_size; + int texture_size; + int tex_coord; +}; + +struct shader_uniforms +{ + int mvp; + int tex_coord; + int vertex_coord; + int color; + int lut_tex_coord; + + int input_size; + int output_size; + int texture_size; + + int frame_count; + int frame_direction; + + int lut_texture[MAX_TEXTURES]; + + struct shader_uniforms_frame orig; + struct shader_uniforms_frame pass[MAX_PROGRAMS]; + struct shader_uniforms_frame prev[PREV_TEXTURES]; +}; + +static struct shader_uniforms gl_uniforms[MAX_PROGRAMS]; + static const char *stock_vertex_legacy = "varying vec4 color;\n" "void main() {\n" @@ -927,6 +959,63 @@ static void gl_glsl_reset_attrib(void) gl_attrib_index = 0; } +static void find_uniforms_frame(GLuint prog, struct shader_uniforms_frame *frame, const char *base) +{ + char texture[64]; + char texture_size[64]; + char input_size[64]; + char tex_coord[64]; + + snprintf(texture, sizeof(texture), "%s%s", base, "Texture"); + snprintf(texture_size, sizeof(texture_size), "%s%s", base, "TextureSize"); + snprintf(input_size, sizeof(input_size), "%s%s", base, "InputSize"); + snprintf(tex_coord, sizeof(tex_coord), "%s%s", base, "TexCoord"); + + frame->texture = pglGetUniformLocation(prog, texture); + frame->texture_size = pglGetUniformLocation(prog, texture_size); + frame->input_size = pglGetUniformLocation(prog, input_size); + frame->tex_coord = pglGetAttribLocation(prog, tex_coord); +} + +static void find_uniforms(GLuint prog, struct shader_uniforms *uni) +{ + pglUseProgram(prog); + + uni->mvp = pglGetUniformLocation(prog, "rubyMVPMatrix"); + uni->tex_coord = pglGetAttribLocation(prog, "rubyTexCoord"); + uni->vertex_coord = pglGetAttribLocation(prog, "rubyVertexCoord"); + uni->color = pglGetAttribLocation(prog, "rubyColor"); + uni->lut_tex_coord = pglGetAttribLocation(prog, "rubyLUTTexCoord"); + + uni->input_size = pglGetUniformLocation(prog, "rubyInputSize"); + uni->output_size = pglGetUniformLocation(prog, "rubyOutputSize"); + uni->texture_size = pglGetUniformLocation(prog, "rubyTextureSize"); + + uni->frame_count = pglGetUniformLocation(prog, "rubyFrameCount"); + uni->frame_direction = pglGetUniformLocation(prog, "rubyFrameDirection"); + + for (unsigned i = 0; i < gl_teximage_cnt; i++) + uni->lut_texture[i] = pglGetUniformLocation(prog, gl_teximage_uniforms[i]); + + find_uniforms_frame(prog, &uni->orig, "rubyOrig"); + + char frame_base[64]; + for (unsigned i = 0; i < MAX_PROGRAMS; i++) + { + snprintf(frame_base, sizeof(frame_base), "rubyPass%u", i + 1); + find_uniforms_frame(prog, &uni->pass[i], frame_base); + } + + find_uniforms_frame(prog, &uni->prev[0], "rubyPrev"); + for (unsigned i = 1; i < PREV_TEXTURES; i++) + { + snprintf(frame_base, sizeof(frame_base), "rubyPrev%u", i); + find_uniforms_frame(prog, &uni->prev[i], frame_base); + } + + pglUseProgram(0); +} + // Platforms with broken get_proc_address. // Assume functions are available without proc_address. #undef LOAD_GL_SYM @@ -1056,9 +1145,12 @@ bool gl_glsl_init(const char *path) } #endif + for (unsigned i = 0; i <= num_progs; i++) + find_uniforms(gl_program[i], &gl_uniforms[i]); + #ifdef GLSL_DEBUG if (!gl_check_error()) - RARCH_WARN("Detected GL error.\n"); + RARCH_WARN("Detected GL error in GLSL.\n"); #endif #ifdef HAVE_XML @@ -1087,9 +1179,10 @@ bool gl_glsl_init(const char *path) } #endif - glsl_enable = true; - gl_num_programs = num_progs; - gl_program[gl_num_programs + 1] = gl_program[0]; + glsl_enable = true; + gl_num_programs = num_progs; + gl_program[gl_num_programs + 1] = gl_program[0]; + gl_uniforms[gl_num_programs + 1] = gl_uniforms[0]; gl_glsl_reset_attrib(); @@ -1166,30 +1259,31 @@ void gl_glsl_set_params(unsigned width, unsigned height, if (!glsl_enable || (gl_program[active_index] == 0)) return; - GLint location; + const struct shader_uniforms *uni = &gl_uniforms[active_index]; - float inputSize[2] = {(float)width, (float)height}; - location = pglGetUniformLocation(gl_program[active_index], "rubyInputSize"); - pglUniform2fv(location, 1, inputSize); + float input_size[2] = {(float)width, (float)height}; + float output_size[2] = {(float)out_width, (float)out_height}; + float texture_size[2] = {(float)tex_width, (float)tex_height}; - float outputSize[2] = {(float)out_width, (float)out_height}; - location = pglGetUniformLocation(gl_program[active_index], "rubyOutputSize"); - pglUniform2fv(location, 1, outputSize); + if (uni->input_size >= 0) + pglUniform2fv(uni->input_size, 1, input_size); - float textureSize[2] = {(float)tex_width, (float)tex_height}; - location = pglGetUniformLocation(gl_program[active_index], "rubyTextureSize"); - pglUniform2fv(location, 1, textureSize); + if (uni->output_size >= 0) + pglUniform2fv(uni->output_size, 1, output_size); - location = pglGetUniformLocation(gl_program[active_index], "rubyFrameCount"); - pglUniform1i(location, frame_count); + if (uni->texture_size >= 0) + pglUniform2fv(uni->texture_size, 1, texture_size); - location = pglGetUniformLocation(gl_program[active_index], "rubyFrameDirection"); - pglUniform1i(location, g_extern.frame_is_reverse ? -1 : 1); + if (uni->frame_count >= 0) + pglUniform1i(uni->frame_count, frame_count); + + if (uni->frame_direction >= 0) + pglUniform1i(uni->frame_direction, g_extern.frame_is_reverse ? -1 : 1); for (unsigned i = 0; i < gl_teximage_cnt; i++) { - location = pglGetUniformLocation(gl_program[active_index], gl_teximage_uniforms[i]); - pglUniform1i(location, i + 1); + if (uni->lut_texture[i] >= 0) + pglUniform1i(uni->lut_texture[i], i + 1); } unsigned texunit = gl_teximage_cnt + 1; @@ -1197,25 +1291,29 @@ void gl_glsl_set_params(unsigned width, unsigned height, // Set original texture unless we're in first pass (pointless). if (active_index > 1) { - // Bind original texture. - pglActiveTexture(GL_TEXTURE0 + texunit); + if (uni->orig.texture >= 0) + { + // Bind original texture. + pglActiveTexture(GL_TEXTURE0 + texunit); + pglUniform1i(uni->orig.texture, texunit); + glBindTexture(GL_TEXTURE_2D, info->tex); + } - location = pglGetUniformLocation(gl_program[active_index], "rubyOrigTexture"); - pglUniform1i(location, texunit++); - glBindTexture(GL_TEXTURE_2D, info->tex); + texunit++; - location = pglGetUniformLocation(gl_program[active_index], "rubyOrigTextureSize"); - pglUniform2fv(location, 1, info->tex_size); - location = pglGetUniformLocation(gl_program[active_index], "rubyOrigInputSize"); - pglUniform2fv(location, 1, info->input_size); + if (uni->orig.texture_size >= 0) + pglUniform2fv(uni->orig.texture_size, 1, info->tex_size); + + if (uni->orig.input_size >= 0) + pglUniform2fv(uni->orig.input_size, 1, info->input_size); // Pass texture coordinates. - location = pglGetAttribLocation(gl_program[active_index], "rubyOrigTexCoord"); - if (location >= 0) + if (uni->orig.tex_coord >= 0) { - pglEnableVertexAttribArray(location); - pglVertexAttribPointer(location, 2, GL_FLOAT, GL_FALSE, 0, info->coord); - gl_attribs[gl_attrib_index++] = location; + int loc = uni->orig.tex_coord; + pglEnableVertexAttribArray(loc); + pglVertexAttribPointer(loc, 2, GL_FLOAT, GL_FALSE, 0, info->coord); + gl_attribs[gl_attrib_index++] = loc; } // Bind new texture in the chain. @@ -1228,27 +1326,23 @@ void gl_glsl_set_params(unsigned width, unsigned height, // Bind FBO textures. for (unsigned i = 0; i < fbo_info_cnt; i++) { - char attrib_buf[64]; + if (uni->pass[i].texture) + pglUniform1i(uni->pass[i].texture, texunit); - snprintf(attrib_buf, sizeof(attrib_buf), "rubyPass%uTexture", i + 1); - location = pglGetUniformLocation(gl_program[active_index], attrib_buf); - pglUniform1i(location, texunit++); + texunit++; - snprintf(attrib_buf, sizeof(attrib_buf), "rubyPass%uTextureSize", i + 1); - location = pglGetUniformLocation(gl_program[active_index], attrib_buf); - pglUniform2fv(location, 1, fbo_info[i].tex_size); + if (uni->pass[i].texture_size >= 0) + pglUniform2fv(uni->pass[i].texture_size, 1, fbo_info[i].tex_size); - snprintf(attrib_buf, sizeof(attrib_buf), "rubyPass%uInputSize", i + 1); - location = pglGetUniformLocation(gl_program[active_index], attrib_buf); - pglUniform2fv(location, 1, fbo_info[i].input_size); + if (uni->pass[i].input_size >= 0) + pglUniform2fv(uni->pass[i].input_size, 1, fbo_info[i].input_size); - snprintf(attrib_buf, sizeof(attrib_buf), "rubyPass%uTexCoord", i + 1); - location = pglGetAttribLocation(gl_program[active_index], attrib_buf); - if (location >= 0) + if (uni->pass[i].tex_coord >= 0) { - pglEnableVertexAttribArray(location); - pglVertexAttribPointer(location, 2, GL_FLOAT, GL_FALSE, 0, fbo_info[i].coord); - gl_attribs[gl_attrib_index++] = location; + int loc = uni->pass[i].tex_coord; + pglEnableVertexAttribArray(loc); + pglVertexAttribPointer(loc, 2, GL_FLOAT, GL_FALSE, 0, fbo_info[i].coord); + gl_attribs[gl_attrib_index++] = loc; } } } @@ -1273,45 +1367,28 @@ void gl_glsl_set_params(unsigned width, unsigned height, // Set previous textures. Only bind if they're actually used. for (unsigned i = 0; i < PREV_TEXTURES; i++) { - char attr_buf_tex[64]; - char attr_buf_tex_size[64]; - char attr_buf_input_size[64]; - char attr_buf_coord[64]; - static const char *prev_names[PREV_TEXTURES] = { - "Prev", - "Prev1", - "Prev2", - "Prev3", - "Prev4", - "Prev5", - "Prev6", - }; - - snprintf(attr_buf_tex, sizeof(attr_buf_tex), "ruby%sTexture", prev_names[i]); - snprintf(attr_buf_tex_size, sizeof(attr_buf_tex_size), "ruby%sTextureSize", prev_names[i]); - snprintf(attr_buf_input_size, sizeof(attr_buf_input_size), "ruby%sInputSize", prev_names[i]); - snprintf(attr_buf_coord, sizeof(attr_buf_coord), "ruby%sTexCoord", prev_names[i]); - - location = pglGetUniformLocation(gl_program[active_index], attr_buf_tex); - if (location >= 0) + if (uni->prev[i].texture >= 0) { pglActiveTexture(GL_TEXTURE0 + texunit); glBindTexture(GL_TEXTURE_2D, prev_info[i].tex); - pglUniform1i(location, texunit++); + pglUniform1i(uni->prev[i].texture, texunit++); } - location = pglGetUniformLocation(gl_program[active_index], attr_buf_tex_size); - pglUniform2fv(location, 1, prev_info[i].tex_size); - location = pglGetUniformLocation(gl_program[active_index], attr_buf_input_size); - pglUniform2fv(location, 1, prev_info[i].input_size); + texunit++; + + if (uni->prev[i].texture_size >= 0) + pglUniform2fv(uni->prev[i].texture_size, 1, prev_info[i].tex_size); + + if (uni->prev[i].input_size >= 0) + pglUniform2fv(uni->prev[i].input_size, 1, prev_info[i].input_size); // Pass texture coordinates. - location = pglGetAttribLocation(gl_program[active_index], attr_buf_coord); - if (location >= 0) + if (uni->prev[i].tex_coord >= 0) { - pglEnableVertexAttribArray(location); - pglVertexAttribPointer(location, 2, GL_FLOAT, GL_FALSE, 0, prev_info[i].coord); - gl_attribs[gl_attrib_index++] = location; + int loc = uni->prev[i].tex_coord; + pglEnableVertexAttribArray(loc); + pglVertexAttribPointer(loc, 2, GL_FLOAT, GL_FALSE, 0, prev_info[i].coord); + gl_attribs[gl_attrib_index++] = loc; } } @@ -1327,7 +1404,7 @@ void gl_glsl_set_params(unsigned width, unsigned height, for (unsigned i = 0; i < cnt; i++) { - location = pglGetUniformLocation(gl_program[active_index], info[i].id); + int location = pglGetUniformLocation(gl_program[active_index], info[i].id); pglUniform1f(location, info[i].value); } } @@ -1338,9 +1415,10 @@ bool gl_glsl_set_mvp(const math_matrix *mat) if (!glsl_enable || !glsl_modern) return false; - int loc = pglGetUniformLocation(gl_program[active_index], "rubyMVPMatrix"); + int loc = gl_uniforms[active_index].mvp; if (loc >= 0) pglUniformMatrix4fv(loc, 1, GL_FALSE, mat->data); + return true; } @@ -1349,35 +1427,34 @@ bool gl_glsl_set_coords(const struct gl_coords *coords) if (!glsl_enable || !glsl_modern) return false; - int loc; - - loc = pglGetAttribLocation(gl_program[active_index], "rubyTexCoord"); - if (loc >= 0) + const struct shader_uniforms *uni = &gl_uniforms[active_index]; + if (uni->tex_coord >= 0) { + int loc = uni->tex_coord; pglEnableVertexAttribArray(loc); pglVertexAttribPointer(loc, 2, GL_FLOAT, GL_FALSE, 0, coords->tex_coord); gl_attribs[gl_attrib_index++] = loc; } - loc = pglGetAttribLocation(gl_program[active_index], "rubyVertexCoord"); - if (loc >= 0) + if (uni->vertex_coord >= 0) { + int loc = uni->vertex_coord; pglEnableVertexAttribArray(loc); pglVertexAttribPointer(loc, 2, GL_FLOAT, GL_FALSE, 0, coords->vertex); gl_attribs[gl_attrib_index++] = loc; } - loc = pglGetAttribLocation(gl_program[active_index], "rubyColor"); - if (loc >= 0) + if (uni->color >= 0) { + int loc = uni->color; pglEnableVertexAttribArray(loc); pglVertexAttribPointer(loc, 4, GL_FLOAT, GL_FALSE, 0, coords->color); gl_attribs[gl_attrib_index++] = loc; } - loc = pglGetAttribLocation(gl_program[active_index], "rubyLUTTexCoord"); - if (loc >= 0) + if (uni->lut_tex_coord >= 0) { + int loc = uni->lut_tex_coord; pglEnableVertexAttribArray(loc); pglVertexAttribPointer(loc, 2, GL_FLOAT, GL_FALSE, 0, coords->lut_tex_coord); gl_attribs[gl_attrib_index++] = loc; From a545964be80f7a76fcb8390b4e673af2abe51cea Mon Sep 17 00:00:00 2001 From: Themaister Date: Sun, 21 Oct 2012 11:17:21 +0100 Subject: [PATCH 5/7] Fix crash on frame dupe. --- retroarch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retroarch.c b/retroarch.c index 467e134bb0..0a804e3754 100644 --- a/retroarch.c +++ b/retroarch.c @@ -253,7 +253,7 @@ static void video_frame(const void *data, unsigned width, unsigned height, size_ return; #endif - if (g_extern.system.pix_fmt == RETRO_PIXEL_FORMAT_0RGB1555) + if (g_extern.system.pix_fmt == RETRO_PIXEL_FORMAT_0RGB1555 && data) { RARCH_PERFORMANCE_INIT(video_frame_conv); RARCH_PERFORMANCE_START(video_frame_conv); From a2839001f7bb513d0a2f008565de6aab615f577d Mon Sep 17 00:00:00 2001 From: Themaister Date: Sun, 21 Oct 2012 12:20:53 +0200 Subject: [PATCH 6/7] Fix warning. --- gfx/gl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gfx/gl.c b/gfx/gl.c index b93b5e5b3b..9c2853688c 100644 --- a/gfx/gl.c +++ b/gfx/gl.c @@ -94,7 +94,7 @@ const GLfloat *default_vertex_ptr = vertexes_flipped; memcpy(&(pgl##SYM), &sym, sizeof(sym)); \ } -#ifdef HAVE_EGL +#if defined(HAVE_EGL) && defined(HAVE_OPENGLES2) static PFNGLEGLIMAGETARGETTEXTURE2DOESPROC pglEGLImageTargetTexture2DOES; static bool load_eglimage_proc(gl_t *gl) From 411bf932b6d93a02a70416a8c5aa049822ff76f5 Mon Sep 17 00:00:00 2001 From: Themaister Date: Sun, 21 Oct 2012 21:59:25 +0200 Subject: [PATCH 7/7] Update SDL and XVideo for RGB565. --- gfx/ext/rarch_video.h | 6 ++-- gfx/ext_gfx.c | 2 +- gfx/sdl_gfx.c | 70 +++++++++++++++++++++---------------------- gfx/xvideo.c | 18 +++++------ 4 files changed, 48 insertions(+), 48 deletions(-) diff --git a/gfx/ext/rarch_video.h b/gfx/ext/rarch_video.h index 04b8e057c5..456110b5b8 100644 --- a/gfx/ext/rarch_video.h +++ b/gfx/ext/rarch_video.h @@ -24,7 +24,7 @@ extern "C" { #define RARCH_API_CALLTYPE #endif -#define RARCH_GRAPHICS_API_VERSION 4 +#define RARCH_GRAPHICS_API_VERSION 5 // Since we don't want to rely on C++ or C99 for a proper boolean type, // make sure return semantics are perfectly clear ... ;) @@ -45,8 +45,8 @@ extern "C" { #define RARCH_FALSE 0 #endif -#define RARCH_COLOR_FORMAT_XRGB1555 0 -#define RARCH_COLOR_FORMAT_ARGB8888 1 +#define RARCH_COLOR_FORMAT_RGB565 0 +#define RARCH_COLOR_FORMAT_XRGB8888 1 #define RARCH_INPUT_SCALE_BASE 256 diff --git a/gfx/ext_gfx.c b/gfx/ext_gfx.c index 1c4604bb1b..0d85052a6b 100644 --- a/gfx/ext_gfx.c +++ b/gfx/ext_gfx.c @@ -300,7 +300,7 @@ static bool setup_video(ext_t *ext, const video_info_t *video, const input_drive info.aspect_ratio = g_settings.video.aspect_ratio; info.smooth = video->smooth; info.input_scale = video->input_scale; - info.color_format = video->rgb32 ? RARCH_COLOR_FORMAT_ARGB8888 : RARCH_COLOR_FORMAT_XRGB1555; + info.color_format = video->rgb32 ? RARCH_COLOR_FORMAT_XRGB8888 : RARCH_COLOR_FORMAT_RGB565; info.xml_shader = xml_shader; info.cg_shader = cg_shader; info.ttf_font = font; diff --git a/gfx/sdl_gfx.c b/gfx/sdl_gfx.c index 37027751b4..88abe57cc5 100644 --- a/gfx/sdl_gfx.c +++ b/gfx/sdl_gfx.c @@ -38,9 +38,9 @@ #include "SDL/SDL_syswm.h" #endif -static void convert_15bit_15bit_direct(uint16_t *out, unsigned outpitch, const uint16_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt); +static void convert_16bit_16bit_direct(uint16_t *out, unsigned outpitch, const uint16_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt); static void convert_32bit_32bit_direct(uint32_t *out, unsigned outpitch, const uint32_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt); -static void convert_15bit_15bit_shift(uint16_t *out, unsigned outpitch, const uint16_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt); +static void convert_16bit_16bit_shift(uint16_t *out, unsigned outpitch, const uint16_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt); static void convert_32bit_32bit_shift(uint32_t *out, unsigned outpitch, const uint32_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt); typedef struct sdl_video @@ -52,7 +52,7 @@ typedef struct sdl_video bool render32; - void (*convert_15_func)(uint16_t*, unsigned, const uint16_t*, unsigned, unsigned, unsigned, const SDL_PixelFormat*); + void (*convert_16_func)(uint16_t*, unsigned, const uint16_t*, unsigned, unsigned, unsigned, const SDL_PixelFormat*); void (*convert_32_func)(uint32_t*, unsigned, const uint32_t*, unsigned, unsigned, unsigned, const SDL_PixelFormat*); #ifdef HAVE_FREETYPE @@ -111,11 +111,11 @@ static void sdl_init_font(sdl_video_t *vid, const char *font_path, unsigned font g = g < 0 ? 0 : (g > 255 ? 255 : g); b = b < 0 ? 0 : (b > 255 ? 255 : b); - // RGB888 -> RGB555 + // RGB888 -> RGB565 if (!vid->render32) { r >>= 3; - g >>= 3; + g >>= 2; b >>= 3; } @@ -136,7 +136,7 @@ static void sdl_init_font(sdl_video_t *vid, const char *font_path, unsigned font } // Not very optimized, but hey :D -static void sdl_render_msg_15(sdl_video_t *vid, SDL_Surface *buffer, const char *msg, unsigned width, unsigned height, const SDL_PixelFormat *fmt) +static void sdl_render_msg_16(sdl_video_t *vid, SDL_Surface *buffer, const char *msg, unsigned width, unsigned height, const SDL_PixelFormat *fmt) { #ifdef HAVE_FREETYPE if (!vid->font) @@ -197,7 +197,7 @@ static void sdl_render_msg_15(sdl_video_t *vid, SDL_Surface *buffer, const char unsigned blend = src[x]; unsigned out_pix = out[x]; unsigned r = (out_pix >> rshift) & 0x1f; - unsigned g = (out_pix >> gshift) & 0x1f; + unsigned g = (out_pix >> gshift) & 0x3f; unsigned b = (out_pix >> bshift) & 0x1f; unsigned out_r = (r * (256 - blend) + vid->font_r * blend) >> 8; @@ -353,7 +353,7 @@ static void *sdl_gfx_init(const video_info_t *video, const input_driver_t **inpu RARCH_LOG("Creating window @ %ux%u\n", video->width, video->height); vid->render32 = !g_settings.video.force_16bit; - vid->screen = SDL_SetVideoMode(video->width, video->height, vid->render32 ? 32 : 15, SDL_HWSURFACE | SDL_HWACCEL | SDL_DOUBLEBUF | (video->fullscreen ? SDL_FULLSCREEN : 0)); + vid->screen = SDL_SetVideoMode(video->width, video->height, vid->render32 ? 32 : 16, SDL_HWSURFACE | SDL_HWACCEL | SDL_DOUBLEBUF | (video->fullscreen ? SDL_FULLSCREEN : 0)); if (!vid->screen) { @@ -376,9 +376,9 @@ static void *sdl_gfx_init(const video_info_t *video, const input_driver_t **inpu } else { - RARCH_LOG("SDL: Creating 15-bit buffer.\n"); + RARCH_LOG("SDL: Creating 16-bit buffer.\n"); vid->buffer = SDL_CreateRGBSurface(SDL_SWSURFACE, RARCH_SCALE_BASE * video->input_scale, - RARCH_SCALE_BASE * video->input_scale, 15, + RARCH_SCALE_BASE * video->input_scale, 16, fmt->Rmask, fmt->Gmask, fmt->Bmask, fmt->Amask); } RARCH_LOG("[Debug]: SDL Pixel format: Rshift = %u, Gshift = %u, Bshift = %u\n", @@ -408,15 +408,15 @@ static void *sdl_gfx_init(const video_info_t *video, const input_driver_t **inpu sdl_init_font(vid, g_settings.video.font_path, g_settings.video.font_size); - if (fmt->Rshift == 10 && fmt->Gshift == 5 && fmt->Bshift == 0) // XRGB1555 + if (fmt->Rshift == 11 && fmt->Gshift == 5 && fmt->Bshift == 0) // RGB565 { - RARCH_LOG("SDL: 15-bit format matches. Fast blit.\n"); - vid->convert_15_func = convert_15bit_15bit_direct; + RARCH_LOG("SDL: 16-bit format matches. Fast blit.\n"); + vid->convert_16_func = convert_16bit_16bit_direct; } else { - RARCH_LOG("SDL: 15-bit format does not match. Needs conversion.\n"); - vid->convert_15_func = convert_15bit_15bit_shift; + RARCH_LOG("SDL: 16-bit format does not match. Needs conversion.\n"); + vid->convert_16_func = convert_16bit_16bit_shift; } if (fmt->Rshift == 16 && fmt->Gshift == 8 && fmt->Bshift == 0) // ARGB8888 @@ -431,7 +431,7 @@ static void *sdl_gfx_init(const video_info_t *video, const input_driver_t **inpu } vid->scaler.scaler_type = video->smooth ? SCALER_TYPE_BILINEAR : SCALER_TYPE_POINT; - vid->scaler.in_fmt = vid->render32 ? SCALER_FMT_ARGB8888 : SCALER_FMT_0RGB1555; + vid->scaler.in_fmt = vid->render32 ? SCALER_FMT_ARGB8888 : SCALER_FMT_RGB565; vid->scaler.out_fmt = vid->scaler.in_fmt; return vid; @@ -441,56 +441,56 @@ error: return NULL; } -static inline uint16_t conv_pixel_32_15(uint32_t pix, const SDL_PixelFormat *fmt) +static inline uint16_t conv_pixel_32_16(uint32_t pix, const SDL_PixelFormat *fmt) { - uint16_t r = ((pix & 0x00f80000) >> 19) << fmt->Rshift; - uint16_t g = ((pix & 0x0000f800) >> 11) << fmt->Gshift; + uint16_t r = ((pix & 0x00f80000) >> 18) << fmt->Rshift; + uint16_t g = ((pix & 0x0000fc00) >> 10) << fmt->Gshift; uint16_t b = ((pix & 0x000000f8) >> 3) << fmt->Bshift; return r | g | b; } -static inline uint32_t conv_pixel_15_32(uint16_t pix, const SDL_PixelFormat *fmt) +static inline uint32_t conv_pixel_16_32(uint16_t pix, const SDL_PixelFormat *fmt) { - uint32_t r = (pix >> 10) & 0x1f; - uint32_t g = (pix >> 5) & 0x1f; + uint32_t r = (pix >> 11) & 0x1f; + uint32_t g = (pix >> 5) & 0x3f; uint32_t b = (pix >> 0) & 0x1f; r = (r << 3) | (r >> 2); - g = (g << 3) | (g >> 2); + g = (g << 2) | (g >> 4); b = (b << 3) | (b >> 2); return (r << fmt->Rshift) | (g << fmt->Gshift) | (b << fmt->Bshift); } -static void convert_32bit_15bit(uint16_t *out, unsigned outpitch, +static void convert_32bit_16bit(uint16_t *out, unsigned outpitch, const uint32_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt) { for (unsigned y = 0; y < height; y++) { for (unsigned x = 0; x < width; x++) - out[x] = conv_pixel_32_15(input[x], fmt); + out[x] = conv_pixel_32_16(input[x], fmt); out += outpitch >> 1; input += pitch >> 2; } } -static void convert_15bit_32bit(uint32_t *out, unsigned outpitch, +static void convert_16bit_32bit(uint32_t *out, unsigned outpitch, const uint16_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt) { for (unsigned y = 0; y < height; y++) { for (unsigned x = 0; x < width; x++) - out[x] = conv_pixel_15_32(input[x], fmt); + out[x] = conv_pixel_16_32(input[x], fmt); out += outpitch >> 2; input += pitch >> 1; } } -static void convert_15bit_15bit_direct(uint16_t *out, unsigned outpitch, +static void convert_16bit_16bit_direct(uint16_t *out, unsigned outpitch, const uint16_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt) { @@ -516,7 +516,7 @@ static void convert_32bit_32bit_direct(uint32_t *out, unsigned outpitch, (void)fmt; } -static void convert_15bit_15bit_shift(uint16_t *out, unsigned outpitch, +static void convert_16bit_16bit_shift(uint16_t *out, unsigned outpitch, const uint16_t *input, unsigned width, unsigned height, unsigned pitch, const SDL_PixelFormat *fmt) { @@ -528,8 +528,8 @@ static void convert_15bit_15bit_shift(uint16_t *out, unsigned outpitch, for (unsigned x = 0; x < width; x++) { uint16_t color = src[x]; - uint16_t r = ((color >> 10) & 0x1f) << fmt->Rshift; - uint16_t g = ((color >> 5) & 0x1f) << fmt->Gshift; + uint16_t r = ((color >> 11) & 0x1f) << fmt->Rshift; + uint16_t g = ((color >> 5) & 0x3f) << fmt->Gshift; uint16_t b = ((color >> 0) & 0x1f) << fmt->Bshift; dest[x] = r | g | b; } @@ -585,13 +585,13 @@ static bool sdl_gfx_frame(void *data, const void *frame, unsigned width, unsigne // 15-bit -> 32-bit. if (vid->upsample) - convert_15bit_32bit((uint32_t*)vid->buffer->pixels, vid->buffer->pitch, (const uint16_t*)frame, width, height, pitch, vid->screen->format); + convert_16bit_32bit((uint32_t*)vid->buffer->pixels, vid->buffer->pitch, (const uint16_t*)frame, width, height, pitch, vid->screen->format); // 15-bit -> 15-bit else if (!vid->rgb32) - vid->convert_15_func((uint16_t*)vid->buffer->pixels, vid->buffer->pitch, (const uint16_t*)frame, width, height, pitch, vid->screen->format); + vid->convert_16_func((uint16_t*)vid->buffer->pixels, vid->buffer->pitch, (const uint16_t*)frame, width, height, pitch, vid->screen->format); // 32-bit -> 15-bit else if (vid->rgb32 && !vid->render32) - convert_32bit_15bit((uint16_t*)vid->buffer->pixels, vid->buffer->pitch, (const uint32_t*)frame, width, height, pitch, vid->screen->format); + convert_32bit_16bit((uint16_t*)vid->buffer->pixels, vid->buffer->pitch, (const uint32_t*)frame, width, height, pitch, vid->screen->format); // 32-bit -> 32-bit else vid->convert_32_func((uint32_t*)vid->buffer->pixels, vid->buffer->pitch, (const uint32_t*)frame, width, height, pitch, vid->screen->format); @@ -627,7 +627,7 @@ static bool sdl_gfx_frame(void *data, const void *frame, unsigned width, unsigne if (vid->render32) sdl_render_msg_32(vid, vid->screen, msg, vid->screen->w, vid->screen->h, vid->screen->format); else - sdl_render_msg_15(vid, vid->screen, msg, vid->screen->w, vid->screen->h, vid->screen->format); + sdl_render_msg_16(vid, vid->screen, msg, vid->screen->w, vid->screen->h, vid->screen->format); } char buf[128]; diff --git a/gfx/xvideo.c b/gfx/xvideo.c index 724bc738e1..252c716671 100644 --- a/gfx/xvideo.c +++ b/gfx/xvideo.c @@ -108,16 +108,16 @@ static inline void calculate_yuv(uint8_t *y, uint8_t *u, uint8_t *v, unsigned r, static void init_yuv_tables(xv_t *xv) { - xv->ytable = (uint8_t*)malloc(0x8000); - xv->utable = (uint8_t*)malloc(0x8000); - xv->vtable = (uint8_t*)malloc(0x8000); + xv->ytable = (uint8_t*)malloc(0x10000); + xv->utable = (uint8_t*)malloc(0x10000); + xv->vtable = (uint8_t*)malloc(0x10000); - for (unsigned i = 0; i < 0x8000; i++) + for (unsigned i = 0; i < 0x10000; i++) { - // Extract RGB555 color data from i - unsigned r = (i >> 10) & 0x1F, g = (i >> 5) & 0x1F, b = (i) & 0x1F; + // Extract RGB565 color data from i + unsigned r = (i >> 11) & 0x1f, g = (i >> 5) & 0x3f, b = (i >> 0) & 0x1f; r = (r << 3) | (r >> 2); // R5->R8 - g = (g << 3) | (g >> 2); // G5->G8 + g = (g << 2) | (g >> 4); // G6->G8 b = (b << 3) | (b >> 2); // B5->B8 calculate_yuv(&xv->ytable[i], &xv->utable[i], &xv->vtable[i], r, g, b); @@ -224,7 +224,7 @@ static void render32_yuy2(xv_t *xv, const void *input_, unsigned width, unsigned for (unsigned x = 0; x < width; x++) { uint32_t p = *input++; - p = ((p >> 9) & 0x7c00) | ((p >> 6) & 0x03e0) | ((p >> 3) & 0x1f); // ARGB -> RGB15 + p = ((p >> 8) & 0xf800) | ((p >> 5) & 0x07e0) | ((p >> 3) & 0x1f); // ARGB -> RGB16 uint8_t y0 = xv->ytable[p]; uint8_t u = xv->utable[p]; @@ -253,7 +253,7 @@ static void render32_uyvy(xv_t *xv, const void *input_, unsigned width, unsigned for (unsigned x = 0; x < width; x++) { uint32_t p = *input++; - p = ((p >> 9) & 0x7c00) | ((p >> 6) & 0x03e0) | ((p >> 3) & 0x1f); // ARGB -> RGB15 + p = ((p >> 8) & 0xf800) | ((p >> 5) & 0x07e0) | ((p >> 3) & 0x1f); // ARGB -> RGB16 uint8_t y0 = xv->ytable[p]; uint8_t u = xv->utable[p];