Prevent unnecessary copies of entire frames on Windows

This commit is contained in:
loki 2020-03-27 21:57:29 +01:00
parent 55705af922
commit 94181fd047
9 changed files with 333 additions and 212 deletions

View File

@ -148,6 +148,7 @@ set(SUNSHINE_TARGET_FILES
sunshine/thread_pool.h
sunshine/thread_safe.h
sunshine/sync.h
sunshine/round_robin.h
${PLATFORM_TARGET_FILES})
include_directories(

View File

@ -91,7 +91,7 @@ std::string from_sockaddr(const sockaddr *const);
std::pair<std::uint16_t, std::string> from_sockaddr_ex(const sockaddr *const);
std::unique_ptr<mic_t> microphone(std::uint32_t sample_rate);
std::unique_ptr<display_t> display();
std::shared_ptr<display_t> display();
input_t input();
void move_mouse(input_t &input, int deltaX, int deltaY);

View File

@ -315,8 +315,8 @@ struct mic_attr_t : public mic_t {
}
};
std::unique_ptr<display_t> shm_display() {
auto shm = std::make_unique<shm_attr_t>();
std::shared_ptr<display_t> shm_display() {
auto shm = std::make_shared<shm_attr_t>();
if(shm->init()) {
return nullptr;
@ -325,11 +325,11 @@ std::unique_ptr<display_t> shm_display() {
return shm;
}
std::unique_ptr<display_t> display() {
std::shared_ptr<display_t> display() {
auto shm_disp = shm_display();
if(!shm_disp) {
return std::unique_ptr<display_t> { new x11_attr_t {} };
return std::make_shared<x11_attr_t>();
}
return shm_disp;

View File

@ -249,9 +249,7 @@ void keyboard(input_t &input, uint16_t modcode, bool release) {
auto key_state = GetAsyncKeyState(modcode);
bool key_state_down = (key_state & KEY_STATE_DOWN) != 0;
if(key_state_down != release) {
BOOST_LOG(warning) << "Key state of vkey ["sv << util::hex(modcode).to_string_view() << "] does not match the desired state ["sv << (release ? "on]"sv : "off]"sv);
return;
BOOST_LOG(debug) << "Key state of vkey ["sv << util::hex(modcode).to_string_view() << "] does not match the desired state ["sv << (release ? "on]"sv : "off]"sv);
}
INPUT i {};

View File

@ -104,9 +104,57 @@ public:
class display_t;
struct img_t : public ::platf::img_t {
~img_t() override {
delete[] data;
data = nullptr;
unmap();
}
void unmap() {
if(info.pData) {
device_ctx_p->Unmap(texture.get(), 0);
info.pData = nullptr;
}
}
int reset(int width, int height, DXGI_FORMAT format, device_t::pointer device, device_ctx_t::pointer device_ctx_p, const std::shared_ptr<display_t> &display) {
unmap();
D3D11_TEXTURE2D_DESC t {};
t.Width = width;
t.Height = height;
t.MipLevels = 1;
t.ArraySize = 1;
t.SampleDesc.Count = 1;
t.Usage = D3D11_USAGE_STAGING;
t.Format = format;
t.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
dxgi::texture2d_t::pointer tex_p {};
auto status = device->CreateTexture2D(&t, nullptr, &tex_p);
texture.reset(tex_p);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to create texture [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
this->display = display;
this->device_ctx_p = device_ctx_p;
this->data = nullptr;
this->row_pitch = 0;
this->pixel_pitch = 4;
this->width = width;
this->height = height;
return 0;
}
std::shared_ptr<display_t> display;
texture2d_t texture;
D3D11_MAPPED_SUBRESOURCE info {};
device_ctx_t::pointer device_ctx_p;
};
struct cursor_t {
@ -247,10 +295,16 @@ void blend_cursor(const cursor_t &cursor, img_t &img) {
}
}
class display_t : public ::platf::display_t {
class display_t : public ::platf::display_t, public std::enable_shared_from_this<display_t> {
public:
capture_e snapshot(::platf::img_t *img_base, bool cursor_visible) override {
auto img = (img_t *) img_base;
auto img = (img_t*)img_base;
if(img->display.get() != this) {
if(img->reset(width, height, format, device.get(), device_ctx.get(), shared_from_this())) {
return capture_e::error;
}
}
HRESULT status;
DXGI_OUTDUPL_FRAME_INFO frame_info;
@ -296,15 +350,15 @@ public:
}
//Copy from GPU to CPU
device_ctx->CopyResource(texture.get(), src.get());
device_ctx->CopyResource(img->texture.get(), src.get());
}
if(current_img.pData) {
device_ctx->Unmap(texture.get(), 0);
current_img.pData = nullptr;
if(img->info.pData) {
device_ctx->Unmap(img->texture.get(), 0);
img->info.pData = nullptr;
}
status = device_ctx->Map(texture.get(), 0, D3D11_MAP_READ, 0, &current_img);
status = device_ctx->Map(img->texture.get(), 0, D3D11_MAP_READ, 0, &img->info);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to map texture [0x"sv << util::hex(status).to_string_view() << ']';
@ -312,28 +366,23 @@ public:
}
}
/*
const bool update_flag =
frame_info.LastMouseUpdateTime.QuadPart ||
frame_info.LastPresentTime.QuadPart != 0 ||
frame_info.PointerShapeBufferSize > 0;
*/
const bool update_flag = frame_info.LastPresentTime.QuadPart != 0;
if(!update_flag) {
return capture_e::timeout;
}
if(img->width != width || img->height != height) {
delete[] img->data;
img->data = new std::uint8_t[height * current_img.RowPitch];
img->row_pitch = img->info.RowPitch;
img->data = (std::uint8_t*)img->info.pData;
img->width = width;
img->height = height;
img->row_pitch = current_img.RowPitch;
}
std::copy_n((std::uint8_t*)current_img.pData, height * current_img.RowPitch, (std::uint8_t*)img->data);
if(cursor_visible && cursor.visible) {
blend_cursor(cursor, *img);
if(cursor_visible) { // && cursor.visible) {
//blend_cursor(cursor, *img);
}
return capture_e::ok;
@ -342,11 +391,9 @@ public:
std::shared_ptr<::platf::img_t> alloc_img() override {
auto img = std::make_shared<img_t>();
img->data = nullptr;
img->height = 0;
img->width = 0;
img->row_pitch = 0;
img->pixel_pitch = 4;
if(img->reset(width, height, format, device.get(), device_ctx.get(), shared_from_this())) {
return nullptr;
}
return img;
}
@ -368,8 +415,6 @@ public:
FreeLibrary(user32);
});
*/
current_img.pData = nullptr; // current_img is not yet mapped
dxgi::factory1_t::pointer factory_p {};
dxgi::adapter_t::pointer adapter_p {};
dxgi::output_t::pointer output_p {};
@ -548,43 +593,9 @@ public:
BOOST_LOG(debug) << "Source format ["sv << format_str[dup_desc.ModeDesc.Format] << ']';
D3D11_TEXTURE2D_DESC t {};
t.Width = width;
t.Height = height;
t.MipLevels = 1;
t.ArraySize = 1;
t.SampleDesc.Count = 1;
t.Usage = D3D11_USAGE_STAGING;
t.Format = format;
t.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
dxgi::texture2d_t::pointer tex_p {};
status = device->CreateTexture2D(&t, nullptr, &tex_p);
texture.reset(tex_p);
if(FAILED(status)) {
BOOST_LOG(error) << "Failed to create texture [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
// map the texture simply to get the pitch and stride
status = device_ctx->Map(texture.get(), 0, D3D11_MAP_READ, 0, &current_img);
if(FAILED(status)) {
BOOST_LOG(error) << "Error: Failed to map the texture [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
return 0;
}
~display_t() override {
if(current_img.pData) {
device_ctx->Unmap(texture.get(), 0);
current_img.pData = nullptr;
}
}
factory1_t factory;
adapter_t adapter;
output_t output;
@ -592,13 +603,11 @@ public:
device_ctx_t device_ctx;
duplication_t dup;
cursor_t cursor;
texture2d_t texture;
int width, height;
DXGI_FORMAT format;
D3D_FEATURE_LEVEL feature_level;
D3D11_MAPPED_SUBRESOURCE current_img;
};
const char *format_str[] = {
@ -729,8 +738,8 @@ const char *format_str[] = {
}
namespace platf {
std::unique_ptr<display_t> display() {
auto disp = std::make_unique<dxgi::display_t>();
std::shared_ptr<display_t> display() {
auto disp = std::make_shared<dxgi::display_t>();
if (disp->init()) {
return nullptr;

141
sunshine/round_robin.h Executable file
View File

@ -0,0 +1,141 @@
#ifndef KITTY_UTIL_ITERATOR_H
#define KITTY_UTIL_ITERATOR_H
#include <iterator>
namespace util {
template<class V, class T>
class it_wrap_t : public std::iterator<std::random_access_iterator_tag, V> {
public:
typedef T iterator;
typedef typename std::iterator<std::random_access_iterator_tag, V>::value_type class_t;
typedef class_t& reference;
typedef class_t* pointer;
typedef std::ptrdiff_t diff_t;
iterator operator += (diff_t step) {
while(step-- > 0) {
++_this();
}
return _this();
}
iterator operator -= (diff_t step) {
while(step-- > 0) {
--_this();
}
return _this();
}
iterator operator +(diff_t step) {
iterator new_ = _this();
return new_ += step;
}
iterator operator -(diff_t step) {
iterator new_ = _this();
return new_ -= step;
}
diff_t operator -(iterator first) {
diff_t step = 0;
while(first != _this()) {
++step;
++first;
}
return step;
}
iterator operator++() { _this().inc(); return _this(); }
iterator operator--() { _this().dec(); return _this(); }
iterator operator++(int) {
iterator new_ = _this();
++_this();
return new_;
}
iterator operator--(int) {
iterator new_ = _this();
--_this();
return new_;
}
reference operator*() { return *_this().get(); }
const reference operator*() const { return *_this().get(); }
pointer operator->() { return &*_this(); }
const pointer operator->() const { return &*_this(); }
bool operator != (const iterator &other) const {
return !(_this() == other);
}
bool operator < (const iterator &other) const {
return !(_this() >= other);
}
bool operator >= (const iterator &other) const {
return _this() == other || _this() > other;
}
bool operator <= (const iterator &other) const {
return _this() == other || _this() < other;
}
bool operator == (const iterator &other) const { return _this().eq(other); };
bool operator > (const iterator &other) const { return _this().gt(other); }
private:
iterator &_this() { return *static_cast<iterator*>(this); }
const iterator &_this() const { return *static_cast<const iterator*>(this); }
};
template<class V, class It>
class round_robin_t : public it_wrap_t<V, round_robin_t<V, It>> {
public:
using iterator = It;
using pointer = V*;
round_robin_t(iterator begin, iterator end) : _begin(begin), _end(end), _pos(begin) {}
void inc() {
++_pos;
if(_pos == _end) {
_pos = _begin;
}
}
bool eq(const round_robin_t &other) const {
return *_pos == *other._pos;
}
pointer get() const {
return &*_pos;
}
private:
It _begin;
It _end;
It _pos;
};
template<class V, class It>
round_robin_t<V, It> make_round_robin(It begin, It end) {
return round_robin_t<V, It>(begin, end);
}
}
#endif

View File

@ -614,7 +614,7 @@ void videoBroadcastThread(safe::signal_t *shutdown_event, udp::socket &sock, vid
frame_new = "\000\000\000\001("sv;
}
assert(std::search(std::begin(payload), std::end(payload), std::begin(hevc_i_frame), std::end(hevc_i_frame)) ==
assert(std::search(std::begin(payload), std::end(payload), std::begin(frame_new), std::end(frame_new)) ==
std::end(payload));
payload_new = replace(payload, frame_old, frame_new);
payload = {(char *) payload_new.data(), payload_new.size()};

View File

@ -388,69 +388,6 @@ void c_free(T *p) {
template<class T>
using c_ptr = safe_ptr<T, c_free<T>>;
template<class T>
class FakeContainer {
typedef T pointer;
pointer _begin;
pointer _end;
public:
FakeContainer(pointer begin, pointer end) : _begin(begin), _end(end) {}
pointer begin() { return _begin; }
pointer end() { return _end; }
const pointer begin() const { return _begin; }
const pointer end() const { return _end; }
const pointer cbegin() const { return _begin; }
const pointer cend() const { return _end; }
pointer data() { return begin(); }
const pointer data() const { return cbegin(); }
std::size_t size() const { return std::distance(begin(), end()); }
};
template<class T>
FakeContainer<T> toContainer(T begin, T end) {
return { begin, end };
}
template<class T>
FakeContainer<T> toContainer(T begin, std::size_t end) {
return { begin, begin + end };
}
template<class T>
FakeContainer<T*> toContainer(T * const begin) {
T *end = begin;
auto default_val = T();
while(*end != default_val) {
++end;
}
return toContainer(begin, end);
}
template<class T, class H>
struct _init_helper;
template<template<class...> class T, class H, class... Args>
struct _init_helper<T<Args...>, H> {
using type = T<Args...>;
static type move(Args&&... args, H&&) {
return std::make_tuple(std::move(args)...);
}
static type copy(const Args&... args, const H&) {
return std::make_tuple(args...);
}
};
inline std::int64_t from_chars(const char *begin, const char *end) {
std::int64_t res {};
std::int64_t mul = 1;

View File

@ -11,6 +11,7 @@ extern "C" {
#include "platform/common.h"
#include "thread_pool.h"
#include "round_robin.h"
#include "config.h"
#include "video.h"
#include "main.h"
@ -138,40 +139,6 @@ struct capture_thread_ctx_t {
return codec_t { ctx.get() };
}
int capture_display(platf::img_t *img, std::unique_ptr<platf::display_t> &disp) {
auto status = disp->snapshot(img, display_cursor);
switch (status) {
case platf::capture_e::reinit: {
// We try this twice, in case we still get an error on reinitialization
for(int x = 0; x < 2; ++x) {
disp.reset();
disp = platf::display();
if(disp) {
break;
}
std::this_thread::sleep_for(200ms);
}
if(!disp) {
return -1;
}
return 0;
}
case platf::capture_e::error:
return -1;
case platf::capture_e::timeout:
return 0;
case platf::capture_e::ok:
return 1;
default:
BOOST_LOG(error) << "Unrecognized capture status ["sv << (int)status << ']';
return -1;
}
}
void captureThread(std::shared_ptr<safe::queue_t<capture_ctx_t>> capture_ctx_queue) {
std::vector<capture_ctx_t> capture_ctxs;
@ -190,6 +157,22 @@ void captureThread(std::shared_ptr<safe::queue_t<capture_ctx_t>> capture_ctx_que
std::chrono::nanoseconds delay = 1s;
auto disp = platf::display();
if(!disp) {
return;
}
std::vector<std::shared_ptr<platf::img_t>> imgs(12);
auto round_robin = util::make_round_robin<std::shared_ptr<platf::img_t>>(std::begin(imgs), std::end(imgs));
for(auto &img : imgs) {
img = disp->alloc_img();
if(!img) {
BOOST_LOG(error) << "Couldn't initialize an image"sv;
return;
}
}
auto next_frame = std::chrono::steady_clock::now();
while(capture_ctx_queue->running()) {
while(capture_ctx_queue->peek()) {
capture_ctxs.emplace_back(std::move(*capture_ctx_queue->pop()));
@ -197,13 +180,62 @@ void captureThread(std::shared_ptr<safe::queue_t<capture_ctx_t>> capture_ctx_que
delay = std::min(delay, capture_ctxs.back().delay);
}
std::shared_ptr<platf::img_t> img = disp->alloc_img();
auto result = capture_display(img.get(), disp);
if(result < 0) {
return;
auto now = std::chrono::steady_clock::now();
if(next_frame > now) {
std::this_thread::sleep_until(next_frame);
}
if(!result) {
continue;
next_frame += delay;
auto &img = *round_robin++;
auto status = disp->snapshot(img.get(), display_cursor);
switch (status) {
case platf::capture_e::reinit: {
// Some classes of images contain references to the display --> display won't delete unless img is deleted
for(auto &img : imgs) {
img.reset();
}
while(disp.use_count() > 1) {
std::this_thread::sleep_for(100ms);
}
// We try this twice, in case we still get an error on reinitialization
for(int x = 0; x < 2; ++x) {
// Some classes of display cannot have multiple instances at once
disp.reset();
disp = platf::display();
if(disp) {
break;
}
std::this_thread::sleep_for(200ms);
}
if(!disp) {
return;
}
// Re-allocate images
for(auto &img : imgs) {
img = disp->alloc_img();
if(!img) {
BOOST_LOG(error) << "Couldn't initialize an image"sv;
return;
}
}
continue;
}
case platf::capture_e::error:
return;
case platf::capture_e::timeout:
continue;
case platf::capture_e::ok:
break;
default:
BOOST_LOG(error) << "Unrecognized capture status ["sv << (int)status << ']';
return;
}
KITTY_WHILE_LOOP(auto capture_ctx = std::begin(capture_ctxs), capture_ctx != std::end(capture_ctxs), {
@ -508,6 +540,20 @@ void capture(
int framerate = config.framerate;
auto images = std::make_shared<img_event_t::element_type>();
// Temporary image to ensure something is send to Moonlight even if no frame has been captured yet.
int dummy_data = 0;
{
auto img = std::make_shared<platf::img_t>();
img->row_pitch = 4;
img->height = 1;
img->width = 1;
img->pixel_pitch = 4;
img->data = (std::uint8_t*)&dummy_data;
images->raise(std::move(img));
}
// Keep a reference counter to ensure the capture thread only runs when other threads have a reference to the capture thread
static auto capture_thread = safe::make_shared<capture_thread_ctx_t>(start_capture, end_capture);
auto ref = capture_thread.ref();
@ -533,15 +579,6 @@ void capture(
// Initiate scaling context with correct height and width
sws_t sws;
// Temporary image to ensure something is send to Moonlight even if no frame has been captured yet.
int dummy_data = 0;
auto img = std::make_shared<platf::img_t>();
img->row_pitch = 4;
img->height = 1;
img->width = 1;
img->pixel_pitch = 4;
img->data = (std::uint8_t*)&dummy_data;
auto next_frame = std::chrono::steady_clock::now();
while(true) {
if(shutdown_event->peek() || !images->running()) {
@ -566,8 +603,29 @@ void capture(
// When Moonlight request an IDR frame, send frames even if there is no new captured frame
if(frame_nr > (key_frame_nr + config.framerate) || images->peek()) {
if(auto tmp_img = images->pop(delay)) {
img = std::move(tmp_img);
if(auto img = images->pop(delay)) {
if(software.system_memory) {
auto new_width = img->width;
auto new_height = img->height;
if(img_width != new_width || img_height != new_height) {
img_width = new_width;
img_height = new_height;
sws.reset(
sws_getContext(
img_width, img_height, AV_PIX_FMT_BGR0,
session->ctx->width, session->ctx->height, session->ctx->pix_fmt,
SWS_LANCZOS | SWS_ACCURATE_RND,
nullptr, nullptr, nullptr));
sws_setColorspaceDetails(sws.get(), sws_getCoefficients(SWS_CS_DEFAULT), 0,
sws_getCoefficients(session->sws_color_format), config.encoderCscMode & 0x1,
0, 1 << 16, 1 << 16);
}
}
software.img_to_frame(sws, *img, session->frame);
}
else if(images->running()) {
continue;
@ -577,29 +635,6 @@ void capture(
}
}
if(software.system_memory) {
auto new_width = img->width;
auto new_height = img->height;
if(img_width != new_width || img_height != new_height) {
img_width = new_width;
img_height = new_height;
sws.reset(
sws_getContext(
img_width, img_height, AV_PIX_FMT_BGR0,
session->ctx->width, session->ctx->height, session->ctx->pix_fmt,
SWS_LANCZOS | SWS_ACCURATE_RND,
nullptr, nullptr, nullptr));
sws_setColorspaceDetails(sws.get(), sws_getCoefficients(SWS_CS_DEFAULT), 0,
sws_getCoefficients(session->sws_color_format), config.encoderCscMode & 0x1,
0, 1 << 16, 1 << 16);
}
}
software.img_to_frame(sws, *img, session->frame);
encode(frame_nr++, session->ctx, session->frame, packets, channel_data);
session->frame->pict_type = AV_PICTURE_TYPE_NONE;