Merge pull request #6364 from reswitched/master

[NSW] Graphics driver speed improvements/fixes and audio driver improvement
This commit is contained in:
Twinaphex 2018-03-07 06:35:39 +01:00 committed by GitHub
commit d532f108fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 28 additions and 61 deletions

View File

@ -18,6 +18,7 @@
#include <stdint.h>
#include<libtransistor/nx.h>
#include<libtransistor/alloc_pages.h>
#include "../audio_driver.h"
#include "../../verbosity.h"
@ -27,12 +28,6 @@ static const int max_num_samples = sample_rate;
static const int num_channels = 2;
static const size_t sample_buffer_size = ((max_num_samples * num_channels * sizeof(uint16_t)) + 0xfff) & ~0xfff;
/* don't think this can be in mapped memory, since samples get DMA'd out of it */
static uint16_t __attribute__((aligned(0x1000))) sample_buffer_1[sample_buffer_size/sizeof(uint16_t)];
static uint16_t __attribute__((aligned(0x1000))) sample_buffer_2[sample_buffer_size/sizeof(uint16_t)];
static uint16_t __attribute__((aligned(0x1000))) sample_buffer_3[sample_buffer_size/sizeof(uint16_t)];
static uint16_t *sample_buffers[3] = {sample_buffer_1, sample_buffer_2, sample_buffer_3};
typedef struct
{
audio_output_t output;
@ -254,11 +249,14 @@ static void *switch_audio_init(const char *device,
for(i = 0; i < 3; i++)
{
swa->buffers[i].ptr = &swa->buffers[i].sample_data;
swa->buffers[i].sample_data = sample_buffers[i];
swa->buffers[i].sample_data = alloc_pages(sample_buffer_size, sample_buffer_size, NULL);
swa->buffers[i].buffer_size = sample_buffer_size;
swa->buffers[i].data_size = sample_buffer_size;
swa->buffers[i].unknown = 0;
if(swa->buffers[i].sample_data == NULL)
goto fail_audio_output;
if (audio_ipc_output_append_buffer(&swa->output, &swa->buffers[i]) != RESULT_OK)
goto fail_audio_output;
}

View File

@ -131,8 +131,6 @@ static bool switch_frame(void *data, const void *frame,
unsigned x, y;
result_t r;
uint64_t begin, done_copying, post_vsync, pre_swizzle, post_swizzle,
copy_ms, swizzle_ms, vsync_ms;
int tgtw, tgth, centerx, centery;
uint32_t *out_buffer = NULL;
switch_video_t *sw = data;
@ -148,12 +146,10 @@ static bool switch_frame(void *data, const void *frame,
centerx = (1280-tgtw)/2;
centery = (720-tgth)/2;
begin = svcGetSystemTick();
// clear image to black
for(x = 0; x < 1280; x++)
for(y = 0; y < 720; y++)
{
for(y = 0; y < 720; y++)
for(x = 0; x < 1280; x++)
{
sw->image[y*1280+x] = 0xFF000000;
}
@ -173,7 +169,7 @@ static bool switch_frame(void *data, const void *frame,
sw->scaler.out_width = tgtw;
sw->scaler.out_height = tgth;
sw->scaler.out_stride = 1280 * sizeof(uint32_t);
sw->scaler.out_fmt = SCALER_FMT_ARGB8888;
sw->scaler.out_fmt = SCALER_FMT_ABGR8888;
sw->scaler.scaler_type = SCALER_TYPE_POINT;
@ -185,7 +181,7 @@ static bool switch_frame(void *data, const void *frame,
sw->last_width = width;
sw->last_height = height;
}
scaler_ctx_scale(&sw->scaler, sw->image + (centery * 1280) + centerx, frame);
}
@ -213,23 +209,6 @@ static bool switch_frame(void *data, const void *frame,
}
#endif
for(x = 0; x < 1280; x++)
{
for(y = 0; y < 720; y++)
{
// swizzle components
uint32_t *pixel = &sw->image[(y*1280) + x];
uint32_t src = *pixel;
uint8_t a = (src & 0xFF000000) >> 24;
uint8_t r = (src & 0x00FF0000) >> 16;
uint8_t g = (src & 0x0000FF00) >> 8;
uint8_t b = (src & 0x000000FF) >> 0;
*pixel = (a << 24) | (b << 16) | (g << 8) | (r << 0);
}
}
done_copying = svcGetSystemTick();
#if 0
if (frame_count > 6000)
{
@ -241,30 +220,21 @@ static bool switch_frame(void *data, const void *frame,
if (msg && strlen(msg) > 0)
RARCH_LOG("message: %s\n", msg);
do {
if (sw->vsync) /* vsync seems to sometimes return before the buffer has actually been dequeued? */
switch_wait_vsync(sw);
post_vsync = svcGetSystemTick();
r = surface_dequeue_buffer(&sw->surface, &out_buffer);
} while(r != RESULT_OK);
pre_swizzle = svcGetSystemTick();
r = surface_dequeue_buffer(&sw->surface, &out_buffer);
if (sw->vsync)
switch_wait_vsync(sw);
svcSleepThread(10000);
if(r != RESULT_OK) {
return true; // just skip the frame
}
gfx_slow_swizzling_blit(out_buffer, sw->image, 1280, 720, 0, 0);
post_swizzle = svcGetSystemTick();
r = surface_queue_buffer(&sw->surface);
if (r != RESULT_OK)
return false;
copy_ms = (done_copying - begin) / 19200;
swizzle_ms = (post_swizzle - pre_swizzle) / 19200;
vsync_ms = (post_vsync - done_copying) / 19200;
RARCH_LOG("frame %d benchmark: copy %ld ms, swizzle %ld ms, vsync %ld ms\n", frame_count, copy_ms, swizzle_ms, vsync_ms);
last_frame = svcGetSystemTick();
return true;
}
@ -354,7 +324,7 @@ static void switch_set_texture_frame(
if (sw->menu_texture.pixels)
free(sw->menu_texture.pixels);
sw->menu_texture.pixels = malloc(width * height * 4);
sw->menu_texture.pixels = malloc(width * height * (rgb32 ? 4 : 2));
if (!sw->menu_texture.pixels)
{
RARCH_ERR("failed to allocate buffer for menu texture\n");
@ -378,13 +348,13 @@ static void switch_set_texture_frame(
sctx->in_width = width;
sctx->in_height = height;
sctx->in_stride = width * 4;
sctx->in_fmt = SCALER_FMT_ARGB8888;
sctx->in_stride = width * (rgb32 ? 4 : 2);
sctx->in_fmt = rgb32 ? SCALER_FMT_ARGB8888 : SCALER_FMT_RGB565;
sctx->out_width = sw->menu_texture.tgtw;
sctx->out_height = sw->menu_texture.tgth;
sctx->out_stride = 1280 * 4;
sctx->out_fmt = SCALER_FMT_ARGB8888;
sctx->out_fmt = SCALER_FMT_ABGR8888;
sctx->scaler_type = SCALER_TYPE_POINT;
@ -395,12 +365,7 @@ static void switch_set_texture_frame(
}
}
if (rgb32)
memcpy(sw->menu_texture.pixels, frame, width * height * 4);
else
conv_rgb565_argb8888(sw->menu_texture.pixels, frame,
width, height,
width * sizeof(uint32_t), width * sizeof(uint16_t));
memcpy(sw->menu_texture.pixels, frame, width * height * (rgb32 ? 4 : 2));
}
static void switch_set_texture_enable(void *data, bool enable, bool full_screen)

View File

@ -239,6 +239,10 @@ bool scaler_ctx_gen_filter(struct scaler_ctx *ctx)
ctx->out_pixconv = conv_argb8888_bgr24;
break;
case SCALER_FMT_ABGR8888:
ctx->out_pixconv = conv_argb8888_abgr8888;
break;
default:
return false;
}