diff --git a/Makefile b/Makefile index ce08637c98..6ea9aab229 100644 --- a/Makefile +++ b/Makefile @@ -144,6 +144,8 @@ endif ifeq ($(HAVE_SDL), 1) OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o +else ifeq ($(HAVE_OPENGL), 1) + OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o else ifeq ($(HAVE_FFMPEG), 1) OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o endif diff --git a/Makefile.win b/Makefile.win index 19d8724300..000242afea 100644 --- a/Makefile.win +++ b/Makefile.win @@ -75,6 +75,8 @@ endif ifeq ($(HAVE_SDL), 1) OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o +else ifeq ($(HAVE_OPENGL), 1) + OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o else ifeq ($(HAVE_FFMPEG), 1) OBJ += gfx/scaler/scaler.o gfx/scaler/pixconv.o gfx/scaler/scaler_int.o gfx/scaler/filter.o endif diff --git a/gfx/gl.c b/gfx/gl.c index baf39eb0a0..5d1b8dcfcf 100644 --- a/gfx/gl.c +++ b/gfx/gl.c @@ -15,6 +15,7 @@ #include "../driver.h" #include "../benchmark.h" +#include "scaler/scaler.h" #include #include "../libretro.h" @@ -835,20 +836,8 @@ static void gl_update_input_size(gl_t *gl, unsigned width, unsigned height, unsi gl->tex_w * gl->tex_h * gl->tex_index * gl->base_size, gl->tex_w * gl->tex_h * gl->base_size, gl->empty_buf); -#elif defined(HAVE_PBO) - pglBindBuffer(GL_PIXEL_UNPACK_BUFFER, gl->pbo); - - glBufferSubData(GL_PIXEL_UNPACK_BUFFER, - 0, gl->tex_w * gl->tex_h * gl->base_size, gl->empty_buf); - - glPixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(gl->tex_w * gl->base_size)); - glTexSubImage2D(GL_TEXTURE_2D, - 0, 0, 0, gl->tex_w, gl->tex_h, gl->texture_type, - gl->texture_fmt, NULL); - - pglBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); #else - glPixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(width * gl->base_size)); + glPixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(width * sizeof(uint32_t))); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, gl->tex_w, gl->tex_h, gl->texture_type, @@ -870,6 +859,28 @@ static void gl_update_input_size(gl_t *gl, unsigned width, unsigned height, unsi } } +// It is *much* faster (order of mangnitude on my setup) to use a custom SIMD-optimized conversion routine than letting GL do it :( +#if !defined(HAVE_PSGL) +static inline void gl_convert_frame_rgb15_32(gl_t *gl, void *output, const void *input, unsigned width, unsigned height, unsigned in_pitch) +{ + if (width != gl->scaler.in_width || height != gl->scaler.in_height) + { + gl->scaler.in_width = width; + gl->scaler.in_height = height; + gl->scaler.out_width = width; + gl->scaler.out_height = height; + gl->scaler.in_fmt = SCALER_FMT_0RGB1555; + gl->scaler.out_fmt = SCALER_FMT_ARGB8888; + gl->scaler.scaler_type = SCALER_TYPE_POINT; + scaler_ctx_gen_filter(&gl->scaler); + } + + gl->scaler.in_stride = in_pitch; + gl->scaler.out_stride = width * sizeof(uint32_t); + scaler_ctx_scale(&gl->scaler, output, input); +} +#endif + #if defined(HAVE_PSGL) static inline void gl_copy_frame(gl_t *gl, const void *frame, unsigned width, unsigned height, unsigned pitch) { @@ -914,98 +925,29 @@ static void gl_init_textures(gl_t *gl) } glBindTexture(GL_TEXTURE_2D, gl->texture[gl->tex_index]); } -#elif defined(HAVE_PBO) -static inline void gl_copy_frame(gl_t *gl, const void *frame, unsigned width, unsigned height, unsigned pitch) -{ - const uint8_t *frame_copy = (const uint8_t*)frame; - size_t frame_copy_size = width * gl->base_size; - - pglBindBuffer(GL_PIXEL_UNPACK_BUFFER, gl->pbo); - uint8_t *data = (uint8_t*)pglMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY); - if (!data) - return; - - for (unsigned h = 0; h < height; h++, data += frame_copy_size, frame_copy += pitch) - memcpy(data, frame_copy, frame_copy_size); - pglUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); - - glPixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(width * gl->base_size)); - glTexSubImage2D(GL_TEXTURE_2D, - 0, 0, 0, width, height, gl->texture_type, - gl->texture_fmt, NULL); - pglBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); -} - -static void gl_init_textures(gl_t *gl) -{ - void *buf = pglMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY); - if (buf) - { - memset(buf, 0, gl->tex_w * gl->tex_h * gl->base_size); - pglUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); - } - - glGenTextures(TEXTURES, gl->texture); - for (unsigned i = 0; i < TEXTURES; i++) - { - glBindTexture(GL_TEXTURE_2D, gl->texture[i]); - - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, gl->border_type); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, gl->border_type); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, gl->tex_filter); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, gl->tex_filter); - - glTexImage2D(GL_TEXTURE_2D, - 0, RARCH_GL_INTERNAL_FORMAT, gl->tex_w, gl->tex_h, 0, gl->texture_type, - gl->texture_fmt, NULL); - } - glBindTexture(GL_TEXTURE_2D, gl->texture[gl->tex_index]); -} #else static inline void gl_copy_frame(gl_t *gl, const void *frame, unsigned width, unsigned height, unsigned pitch) { - -#ifdef HAVE_OPENGLES2 // Have to perform pixel format conversions as well. - glPixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(width * sizeof(uint32_t))); // Always use 32-bit textures. - - if (gl->base_size == 2) // ARGB1555 => ARGB8888 + if (gl->base_size == 2) // ARGB1555 => ARGB8888, SIMD-style :D { - const uint16_t *src = (const uint16_t*)frame; - uint32_t *dst = (uint32_t*)gl->conv_buffer; - unsigned pitch_width = pitch >> 1; - - // GL_UNSIGNED_BYTE apparently means in byte order, so go with little endian for now (ARGB). - // We have to convert anyways, prefer something that is more likely to be a native format for the GPU. - for (unsigned h = 0; h < height; h++, dst += width, src += pitch_width) - { - for (unsigned w = 0; w < width; w++) - { - uint32_t col = src[w]; - uint32_t r = (col >> 10) & 0x1f; - uint32_t g = (col >> 5) & 0x1f; - uint32_t b = (col >> 0) & 0x1f; - r = (r << 3) | (r >> 2); - g = (g << 3) | (g >> 2); - b = (b << 3) | (b >> 2); - - dst[w] = (0xff << 24) | (r << 16) | (g << 8) | (b << 0); - } - } - + glPixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(width * sizeof(uint32_t))); // Always use 32-bit textures. + gl_convert_frame_rgb15_32(gl, gl->conv_buffer, frame, width, height, pitch); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, gl->texture_type, gl->texture_fmt, gl->conv_buffer); } else { +#ifdef HAVE_OPENGLES2 + // No GL_UNPACK_ROW_LENGTH ;( unsigned pitch_width = pitch / gl->base_size; - if (width == pitch_width) // Fast path :D + if (width == pitch_width) // Happy path :D { glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, gl->texture_type, - gl->texture_fmt, gl->conv_buffer); + gl->texture_fmt, frame); } - else + else // Probably slower path. { const uint32_t *src = (const uint32_t*)frame; for (unsigned h = 0; h < height; h++, src += pitch_width) @@ -1015,15 +957,17 @@ static inline void gl_copy_frame(gl_t *gl, const void *frame, unsigned width, un gl->texture_fmt, src); } } - } #else - glPixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(pitch)); - glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / gl->base_size); - glTexSubImage2D(GL_TEXTURE_2D, - 0, 0, 0, width, height, gl->texture_type, - gl->texture_fmt, frame); - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + glPixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(pitch)); + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / gl->base_size); + + glTexSubImage2D(GL_TEXTURE_2D, + 0, 0, 0, width, height, gl->texture_type, + gl->texture_fmt, frame); + + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); #endif + } } static void gl_init_textures(gl_t *gl) @@ -1197,9 +1141,6 @@ static void gl_free(void *data) #if defined(HAVE_PSGL) glBindBuffer(GL_TEXTURE_REFERENCE_BUFFER_SCE, 0); glDeleteBuffers(1, &gl->pbo); -#elif defined(HAVE_PBO) - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - glDeleteBuffers(1, &gl->pbo); #endif #ifdef HAVE_FBO @@ -1253,15 +1194,6 @@ static bool resolve_extensions(gl_t *gl) RARCH_LOG("[GL] Supported extensions: %s\n", ext); #endif -#if defined(HAVE_PBO) - RARCH_LOG("[GL]: Using PBOs.\n"); - if (!gl_query_extension("GL_ARB_pixel_buffer_object")) - { - RARCH_ERR("[GL]: PBOs are enabled, but extension does not exist ...\n"); - return false; - } -#endif - return true; } @@ -1403,17 +1335,12 @@ static void *gl_init(const video_info_t *video, const input_driver_t **input, vo glBindBuffer(GL_TEXTURE_REFERENCE_BUFFER_SCE, gl->pbo); glBufferData(GL_TEXTURE_REFERENCE_BUFFER_SCE, gl->tex_w * gl->tex_h * gl->base_size * TEXTURES, NULL, GL_STREAM_DRAW); -#elif defined(HAVE_PBO) - glGenBuffers(1, &gl->pbo); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, gl->pbo); - glBufferData(GL_PIXEL_UNPACK_BUFFER, - gl->tex_w * gl->tex_h * gl->base_size, NULL, GL_STREAM_DRAW); #endif // Empty buffer that we use to clear out the texture with on res change. gl->empty_buf = calloc(sizeof(uint32_t), gl->tex_w * gl->tex_h); -#ifdef HAVE_OPENGLES2 +#if !defined(HAVE_PSGL) gl->conv_buffer = calloc(sizeof(uint32_t), gl->tex_w * gl->tex_h); if (!gl->conv_buffer) { diff --git a/gfx/gl_common.h b/gfx/gl_common.h index 3d0c77b58a..22dbe36d00 100644 --- a/gfx/gl_common.h +++ b/gfx/gl_common.h @@ -20,6 +20,7 @@ #include "fonts/fonts.h" #include "math/matrix.h" #include "gfx_context.h" +#include "scaler/scaler.h" #ifdef HAVE_CONFIG_H #include "../config.h" @@ -171,7 +172,9 @@ typedef struct gl GLuint tex_filter; void *empty_buf; + void *conv_buffer; + struct scaler_ctx scaler; unsigned frame_count; @@ -256,7 +259,7 @@ extern PFNGLACTIVETEXTUREPROC pglActiveTexture; #define RARCH_GL_INTERNAL_FORMAT GL_RGBA #define RARCH_GL_TEXTURE_TYPE GL_BGRA #define RARCH_GL_FORMAT32 GL_UNSIGNED_INT_8_8_8_8_REV -#define RARCH_GL_FORMAT16 GL_UNSIGNED_SHORT_1_5_5_5_REV +#define RARCH_GL_FORMAT16 GL_UNSIGNED_INT_8_8_8_8_REV #endif // Platform specific workarounds/hacks. diff --git a/gfx/scaler/scaler.c b/gfx/scaler/scaler.c index fe5a04e787..cdc5148593 100644 --- a/gfx/scaler/scaler.c +++ b/gfx/scaler/scaler.c @@ -23,10 +23,6 @@ #include #include "../../benchmark.h" -#ifdef SCALER_PERF -#include -#endif - // In case aligned allocs are needed later ... void *scaler_alloc(size_t elem_size, size_t size) { diff --git a/gfx/scaler/scaler.h b/gfx/scaler/scaler.h index 3a43b61f64..ffeca48a46 100644 --- a/gfx/scaler/scaler.h +++ b/gfx/scaler/scaler.h @@ -92,11 +92,6 @@ struct scaler_ctx uint32_t *frame; int stride; } output; - -#ifdef SCALER_PERF - double elapsed_time_ms; - unsigned elapsed_frames; -#endif }; bool scaler_ctx_gen_filter(struct scaler_ctx *ctx); diff --git a/qb/config.libs.sh b/qb/config.libs.sh index 4ff3a577a4..f0dc7bc30e 100644 --- a/qb/config.libs.sh +++ b/qb/config.libs.sh @@ -80,10 +80,8 @@ fi if [ "$OS" = Darwin ]; then check_lib FBO "-framework OpenGL" glFramebufferTexture2D - check_lib PBO "-framework OpenGL" glMapBuffer else check_lib FBO -lGL glFramebufferTexture2D - check_lib PBO -lGL glMapBuffer fi check_pkgconf RSOUND rsound 1.1 @@ -186,6 +184,6 @@ check_pkgconf PYTHON python3 add_define_make OS "$OS" # Creates config.mk and config.h. -VARS="ALSA OSS OSS_BSD OSS_LIB AL RSOUND ROAR JACK COREAUDIO PULSE SDL OPENGL GLES VG EGL KMS GBM DRM DYLIB GETOPT_LONG THREADS CG XML SDL_IMAGE LIBPNG DYNAMIC FFMPEG AVCODEC AVFORMAT AVUTIL CONFIGFILE FREETYPE XVIDEO X11 XEXT XF86VM NETPLAY NETWORK_CMD STDIN_CMD COMMAND SOCKET_LEGACY FBO PBO STRL PYTHON FFMPEG_ALLOC_CONTEXT3 FFMPEG_AVCODEC_OPEN2 FFMPEG_AVIO_OPEN FFMPEG_AVFORMAT_WRITE_HEADER FFMPEG_AVFORMAT_NEW_STREAM FFMPEG_AVCODEC_ENCODE_AUDIO2 FFMPEG_AVCODEC_ENCODE_VIDEO2 SINC FIXED_POINT BSV_MOVIE VIDEOCORE" +VARS="ALSA OSS OSS_BSD OSS_LIB AL RSOUND ROAR JACK COREAUDIO PULSE SDL OPENGL GLES VG EGL KMS GBM DRM DYLIB GETOPT_LONG THREADS CG XML SDL_IMAGE LIBPNG DYNAMIC FFMPEG AVCODEC AVFORMAT AVUTIL CONFIGFILE FREETYPE XVIDEO X11 XEXT XF86VM NETPLAY NETWORK_CMD STDIN_CMD COMMAND SOCKET_LEGACY FBO STRL PYTHON FFMPEG_ALLOC_CONTEXT3 FFMPEG_AVCODEC_OPEN2 FFMPEG_AVIO_OPEN FFMPEG_AVFORMAT_WRITE_HEADER FFMPEG_AVFORMAT_NEW_STREAM FFMPEG_AVCODEC_ENCODE_AUDIO2 FFMPEG_AVCODEC_ENCODE_VIDEO2 SINC FIXED_POINT BSV_MOVIE VIDEOCORE" create_config_make config.mk $VARS create_config_header config.h $VARS diff --git a/qb/config.params.sh b/qb/config.params.sh index e41943b08d..e6a4508a83 100644 --- a/qb/config.params.sh +++ b/qb/config.params.sh @@ -15,7 +15,6 @@ HAVE_VG=auto # Enable OpenVG support HAVE_CG=auto # Enable Cg shader support HAVE_XML=auto # Enable bSNES-style XML shader support HAVE_FBO=auto # Enable render-to-texture (FBO) support -HAVE_PBO=no # Enable pixel buffer object (PBO) support HAVE_ALSA=auto # Enable ALSA support HAVE_OSS=auto # Enable OSS support HAVE_RSOUND=auto # Enable RSound support