Added NvFBC on Linux

This commit is contained in:
loki-47-6F-64 2021-09-22 11:36:59 +02:00
parent 196f1f7471
commit bb912786bd
7 changed files with 546 additions and 327 deletions

View File

@ -141,6 +141,13 @@ public:
struct img_t {
public:
img_t() = default;
img_t(img_t &&) = delete;
img_t(const img_t &) = delete;
img_t &operator=(img_t &&) = delete;
img_t &operator=(const img_t &) = delete;
std::uint8_t *data {};
std::int32_t width {};
std::int32_t height {};

View File

@ -1,3 +1,5 @@
#include <bitset>
#include <NvFBC.h>
#include <ffnvcodec/dynlink_loader.h>
@ -55,96 +57,9 @@ inline static int check(CUresult result, const std::string_view &sv) {
return 0;
}
class ctx_t {
class img_t : public platf::img_t {
public:
ctx_t(CUcontext ctx) {
CU_CHECK_IGNORE(cdf->cuCtxPushCurrent(ctx), "Couldn't push cuda context");
}
~ctx_t() {
CUcontext dummy;
CU_CHECK_IGNORE(cdf->cuCtxPopCurrent(&dummy), "Couldn't pop cuda context");
}
};
void free_res(CUgraphicsResource res) {
cdf->cuGraphicsUnregisterResource(res);
}
using res_internal_t = util::safe_ptr<CUgraphicsResource_st, free_res>;
template<std::size_t N>
class res_t {
public:
res_t() : resources {}, mapped { false } {}
res_t(res_t &&other) noexcept : resources { other.resources }, array_p { other.array_p }, ctx { other.ctx }, stream { other.stream } {
other.resources = std::array<res_internal_t::pointer, N> {};
}
res_t &operator=(res_t &&other) {
for(auto x = 0; x < N; ++x) {
std::swap(resources[x], other.resources[x]);
std::swap(array_p[x], other.array_p[x]);
}
std::swap(ctx, other.ctx);
std::swap(stream, other.stream);
std::swap(mapped, other.mapped);
return *this;
}
res_t(CUcontext ctx, CUstream stream) : resources {}, ctx { ctx }, stream { stream }, mapped { false } {}
int bind(gl::tex_t &tex) {
ctx_t ctx { this->ctx };
CU_CHECK(cdf->cuGraphicsGLRegisterImage(&resources[0], tex[0], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register Y image");
CU_CHECK(cdf->cuGraphicsGLRegisterImage(&resources[1], tex[1], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register uv image");
return 0;
}
int map() {
ctx_t ctx { this->ctx };
CU_CHECK(cdf->cuGraphicsMapResources(resources.size(), resources.data(), stream), "Coudn't map cuda resources");
mapped = true;
CU_CHECK(cdf->cuGraphicsSubResourceGetMappedArray(&array_p[0], resources[0], 0, 0), "Couldn't get mapped subresource [0]");
CU_CHECK(cdf->cuGraphicsSubResourceGetMappedArray(&array_p[1], resources[1], 0, 0), "Couldn't get mapped subresource [1]");
return 0;
}
void unmap() {
// Either all or none are mapped
if(mapped) {
ctx_t ctx { this->ctx };
CU_CHECK_IGNORE(cdf->cuGraphicsUnmapResources(resources.size(), resources.data(), stream), "Couldn't unmap cuda resources");
mapped = false;
}
}
inline CUarray &operator[](std::size_t index) {
return array_p[index];
}
~res_t() {
unmap();
}
std::array<res_internal_t::pointer, N> resources;
std::array<CUarray, N> array_p;
CUcontext ctx;
CUstream stream;
bool mapped;
tex_t tex;
};
int init() {
@ -160,139 +75,8 @@ int init() {
return 0;
}
class opengl_t : public platf::hwdevice_t {
public:
int init(int in_width, int in_height, platf::x11::xdisplay_t::pointer xdisplay) {
if(!cdf) {
BOOST_LOG(warning) << "cuda not initialized"sv;
return -1;
}
this->data = (void *)0x1;
display = egl::make_display(xdisplay);
if(!display) {
return -1;
}
auto ctx_opt = egl::make_ctx(display.get());
if(!ctx_opt) {
return -1;
}
ctx = std::move(*ctx_opt);
width = in_width;
height = in_height;
return 0;
}
int set_frame(AVFrame *frame) override {
auto cuda_ctx = (AVCUDADeviceContext *)((AVHWFramesContext *)frame->hw_frames_ctx->data)->device_ctx->hwctx;
tex = gl::tex_t::make(2);
fb = gl::frame_buf_t::make(2);
gl::ctx.BindTexture(GL_TEXTURE_2D, tex[0]);
gl::ctx.TexImage2D(GL_TEXTURE_2D, 0, GL_RED, frame->width, frame->height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
gl::ctx.BindTexture(GL_TEXTURE_2D, tex[1]);
gl::ctx.TexImage2D(GL_TEXTURE_2D, 0, GL_RG, frame->width / 2, frame->height / 2, 0, GL_RG, GL_UNSIGNED_BYTE, nullptr);
gl::ctx.BindTexture(GL_TEXTURE_2D, 0);
fb.bind(std::begin(tex), std::end(tex));
res = res_t<2> { cuda_ctx->cuda_ctx, cuda_ctx->stream };
if(res.bind(tex)) {
return -1;
}
this->hwframe.reset(frame);
this->frame = frame;
if(av_hwframe_get_buffer(frame->hw_frames_ctx, frame, 0)) {
BOOST_LOG(error) << "Couldn't get hwframe for NVENC"sv;
return -1;
}
auto sws_opt = egl::sws_t::make(width, height, frame->width, frame->height);
if(!sws_opt) {
return -1;
}
sws = std::move(*sws_opt);
return sws.blank(fb, 0, 0, frame->width, frame->height);
}
int convert(platf::img_t &img) override {
sws.load_ram(img);
if(sws.convert(fb)) {
return -1;
}
if(res.map()) {
return -1;
}
// Push and pop cuda context
ctx_t ctx { res.ctx };
for(auto x = 0; x < 2; ++x) {
CUDA_MEMCPY2D desc {};
auto shift = x;
desc.srcPitch = frame->width;
desc.dstPitch = frame->linesize[x];
desc.Height = frame->height >> shift;
desc.WidthInBytes = std::min(desc.srcPitch, desc.dstPitch);
desc.srcMemoryType = CU_MEMORYTYPE_ARRAY;
desc.dstMemoryType = CU_MEMORYTYPE_DEVICE;
desc.srcArray = res[x];
desc.dstDevice = (CUdeviceptr)frame->data[x];
CU_CHECK(cdf->cuMemcpy2DAsync(&desc, res.stream), "Couldn't copy from OpenGL to cuda");
}
res.unmap();
return 0;
}
void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
sws.set_colorspace(colorspace, color_range);
}
frame_t hwframe;
egl::display_t display;
egl::ctx_t ctx;
egl::sws_t sws;
gl::tex_t tex;
gl::frame_buf_t fb;
res_t<2> res;
int width, height;
};
class cuda_t : public platf::hwdevice_t {
public:
~cuda_t() override {
// sws_t needs to be destroyed while the context is active
if(sws) {
ctx_t ctx { cuda_ctx };
sws.reset();
}
}
int init(int in_width, int in_height) {
if(!cdf) {
BOOST_LOG(warning) << "cuda not initialized"sv;
@ -322,78 +106,111 @@ public:
return -1;
}
cuda_ctx = ((AVCUDADeviceContext *)((AVHWFramesContext *)frame->hw_frames_ctx->data)->device_ctx->hwctx)->cuda_ctx;
ctx_t ctx { cuda_ctx };
sws = sws_t::make(width, height, frame->width, frame->height, width * 4);
if(!sws) {
auto sws_opt = sws_t::make(width, height, frame->width, frame->height, width * 4);
if(!sws_opt) {
return -1;
}
sws = std::move(*sws_opt);
return 0;
}
int convert(platf::img_t &img) override {
ctx_t ctx { cuda_ctx };
return sws->load_ram(img) || sws->convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1]);
}
void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
ctx_t ctx { cuda_ctx };
sws->set_colorspace(colorspace, color_range);
sws.set_colorspace(colorspace, color_range);
auto tex = tex_t::make(height, width * 4);
if(!tex) {
return;
}
// The default green color is ugly.
// Update the background color
platf::img_t img;
img.width = frame->width;
img.height = frame->height;
img.width = width;
img.height = height;
img.pixel_pitch = 4;
img.row_pitch = img.width * img.pixel_pitch;
img.row_pitch = img.width * img.pixel_pitch;
std::vector<std::uint8_t> image_data;
image_data.resize(img.row_pitch * img.height);
img.data = image_data.data();
if(sws->load_ram(img)) {
if(sws.load_ram(img, tex->array)) {
return;
}
sws->convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], {
frame->width, frame->height, 0, 0
});
sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex->texture, { frame->width, frame->height, 0, 0 });
}
frame_t hwframe;
std::unique_ptr<sws_t> sws;
int width, height;
CUcontext cuda_ctx;
sws_t sws;
};
std::shared_ptr<platf::hwdevice_t> make_hwdevice(int width, int height, platf::x11::xdisplay_t::pointer xdisplay) {
class cuda_ram_t : public cuda_t {
public:
int convert(platf::img_t &img) override {
return sws.load_ram(img, tex.array) || sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex.texture);
}
int set_frame(AVFrame *frame) {
if(cuda_t::set_frame(frame)) {
return -1;
}
auto tex_opt = tex_t::make(height, width * 4);
if(!tex_opt) {
return -1;
}
tex = std::move(*tex_opt);
return 0;
}
tex_t tex;
};
class cuda_vram_t : public cuda_t {
public:
int convert(platf::img_t &img) override {
return sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], (cudaTextureObject_t)img.data);
}
};
std::shared_ptr<platf::hwdevice_t> make_hwdevice(int width, int height, bool vram) {
if(init()) {
return nullptr;
}
auto cuda = std::make_shared<cuda_t>();
std::shared_ptr<cuda_t> cuda;
if(vram) {
cuda = std::make_shared<cuda_vram_t>();
}
else {
cuda = std::make_shared<cuda_ram_t>();
}
if(cuda->init(width, height)) {
return nullptr;
}
return cuda;
}
} // namespace cuda
namespace platf::nvfbc {
namespace nvfbc {
static PNVFBCCREATEINSTANCE createInstance {};
static NVFBC_API_FUNCTION_LIST func { NVFBC_VERSION };
static constexpr inline NVFBC_BOOL nv_bool(bool b) {
return b ? NVFBC_TRUE : NVFBC_FALSE;
}
static void *handle { nullptr };
int init() {
static bool funcs_loaded = false;
@ -418,49 +235,65 @@ int init() {
return -1;
}
auto status = cuda::nvfbc::createInstance(&cuda::nvfbc::func);
if(status) {
BOOST_LOG(error) << "Unable to create NvFBC instance"sv;
dlclose(handle);
handle = nullptr;
return -1;
}
funcs_loaded = true;
return 0;
}
class handle_t {
KITTY_USING_MOVE_T(session_t, NVFBC_SESSION_HANDLE, std::numeric_limits<std::uint64_t>::max(), {
if(el == std::numeric_limits<std::uint64_t>::max()) {
return;
}
NVFBC_DESTROY_HANDLE_PARAMS params { NVFBC_DESTROY_HANDLE_PARAMS_VER };
auto status = func.nvFBCDestroyHandle(el, &params);
if(status) {
BOOST_LOG(error) << "Failed to destroy nvfbc handle: "sv << func.nvFBCGetLastErrorStr(el);
}
});
enum flag_e {
SESSION_HANDLE,
SESSION_CAPTURE,
MAX_FLAGS,
};
public:
handle_t() = default;
handle_t(handle_t &&other) : handle_flags { other.handle_flags }, handle { other.handle } {
other.handle_flags.reset();
}
handle_t &operator=(handle_t &&other) {
std::swap(handle_flags, other.handle_flags);
std::swap(handle, other.handle);
return *this;
}
static std::optional<handle_t> make() {
NVFBC_CREATE_HANDLE_PARAMS params { NVFBC_CREATE_HANDLE_PARAMS_VER };
session_t session;
auto status = func.nvFBCCreateHandle(&session.el, &params);
handle_t handle;
auto status = func.nvFBCCreateHandle(&handle.handle, &params);
if(status) {
BOOST_LOG(error) << "Failed to create session: "sv << func.nvFBCGetLastErrorStr(session.el);
session.release();
BOOST_LOG(error) << "Failed to create session: "sv << handle.last_error();
return std::nullopt;
}
return handle_t { std::move(session) };
handle.handle_flags[SESSION_HANDLE] = true;
return std::move(handle);
}
const char *last_error() {
return func.nvFBCGetLastErrorStr(session.el);
return func.nvFBCGetLastErrorStr(handle);
}
std::optional<NVFBC_GET_STATUS_PARAMS> status() {
NVFBC_GET_STATUS_PARAMS params { NVFBC_GET_STATUS_PARAMS_VER };
auto status = func.nvFBCGetStatus(session.el, &params);
auto status = func.nvFBCGetStatus(handle, &params);
if(status) {
BOOST_LOG(error) << "Failed to create session: "sv << last_error();
BOOST_LOG(error) << "Failed to get NvFBC status: "sv << last_error();
return std::nullopt;
}
@ -468,23 +301,328 @@ public:
return params;
}
session_t session;
int capture(NVFBC_CREATE_CAPTURE_SESSION_PARAMS &capture_params) {
if(func.nvFBCCreateCaptureSession(handle, &capture_params)) {
BOOST_LOG(error) << "Failed to start capture session: "sv << last_error();
return -1;
}
handle_flags[SESSION_CAPTURE] = true;
NVFBC_TOCUDA_SETUP_PARAMS setup_params {
NVFBC_TOCUDA_SETUP_PARAMS_VER,
NVFBC_BUFFER_FORMAT_BGRA,
};
if(func.nvFBCToCudaSetUp(handle, &setup_params)) {
BOOST_LOG(error) << "Failed to setup cuda interop with nvFBC: "sv << last_error();
return -1;
}
return 0;
}
int stop() {
if(!handle_flags[SESSION_CAPTURE]) {
return 0;
}
NVFBC_DESTROY_CAPTURE_SESSION_PARAMS params { NVFBC_DESTROY_CAPTURE_SESSION_PARAMS_VER };
if(func.nvFBCDestroyCaptureSession(handle, &params)) {
BOOST_LOG(error) << "Couldn't destroy capture session: "sv << last_error();
return -1;
}
handle_flags[SESSION_CAPTURE] = false;
return 0;
}
~handle_t() {
if(!handle_flags[SESSION_HANDLE]) {
return;
}
if(handle_flags[SESSION_CAPTURE]) {
NVFBC_DESTROY_CAPTURE_SESSION_PARAMS params { NVFBC_DESTROY_CAPTURE_SESSION_PARAMS_VER };
if(func.nvFBCDestroyCaptureSession(handle, &params)) {
BOOST_LOG(error) << "Couldn't destroy capture session: "sv << func.nvFBCGetLastErrorStr(handle);
}
}
NVFBC_DESTROY_HANDLE_PARAMS params { NVFBC_DESTROY_HANDLE_PARAMS_VER };
if(func.nvFBCDestroyHandle(handle, &params)) {
BOOST_LOG(error) << "Couldn't destroy session handle: "sv << func.nvFBCGetLastErrorStr(handle);
}
}
std::bitset<MAX_FLAGS> handle_flags;
NVFBC_SESSION_HANDLE handle;
};
class display_t : public platf::display_t {
public:
int init(const std::string_view &display_name, int framerate) {
auto handle = handle_t::make();
if(!handle) {
return -1;
}
auto status_params = handle->status();
if(!status_params) {
return -1;
}
int streamedMonitor = -1;
if(!display_name.empty()) {
if(status_params->bXRandRAvailable) {
auto monitor_nr = util::from_view(display_name);
if(monitor_nr < 0 || monitor_nr >= status_params->dwOutputNum) {
BOOST_LOG(warning) << "Can't stream monitor ["sv << monitor_nr << "], it needs to be between [0] and ["sv << status_params->dwOutputNum - 1 << "], defaulting to virtual desktop"sv;
}
else {
streamedMonitor = monitor_nr;
}
}
else {
BOOST_LOG(warning) << "XrandR not available, streaming entire virtual desktop"sv;
}
}
capture_params = NVFBC_CREATE_CAPTURE_SESSION_PARAMS { NVFBC_CREATE_CAPTURE_SESSION_PARAMS_VER };
capture_params.eCaptureType = NVFBC_CAPTURE_SHARED_CUDA;
capture_params.bDisableAutoModesetRecovery = nv_bool(true);
capture_params.dwSamplingRateMs = 1000 /* ms */ / framerate;
if(streamedMonitor != -1) {
auto &output = status_params->outputs[streamedMonitor];
width = output.trackedBox.w;
height = output.trackedBox.h;
offset_x = output.trackedBox.x;
offset_y = output.trackedBox.y;
capture_params.eTrackingType = NVFBC_TRACKING_OUTPUT;
capture_params.dwOutputId = output.dwId;
}
else {
capture_params.eTrackingType = NVFBC_TRACKING_SCREEN;
width = status_params->screenSize.w;
height = status_params->screenSize.h;
}
env_width = status_params->screenSize.w;
env_height = status_params->screenSize.h;
this->handle = std::move(*handle);
return 0;
}
platf::capture_e capture(snapshot_cb_t &&snapshot_cb, std::shared_ptr<platf::img_t> img, bool *cursor) override {
// Force display_t::capture to initialize handle_t::capture
cursor_visible = !*cursor;
auto fg = util::fail_guard([&]() {
handle.stop();
});
while(img) {
auto status = snapshot(img.get(), 500ms, *cursor);
switch(status) {
case platf::capture_e::reinit:
case platf::capture_e::error:
return status;
case platf::capture_e::timeout:
std::this_thread::sleep_for(1ms);
continue;
case platf::capture_e::ok:
img = snapshot_cb(img);
break;
default:
BOOST_LOG(error) << "Unrecognized capture status ["sv << (int)status << ']';
return status;
}
}
return platf::capture_e::ok;
}
// Reinitialize the capture session.
platf::capture_e reinit(bool cursor) {
if(handle.stop()) {
return platf::capture_e::error;
}
cursor_visible = cursor;
if(false && cursor) {
capture_params.bPushModel = nv_bool(false);
capture_params.bWithCursor = nv_bool(true);
capture_params.bAllowDirectCapture = nv_bool(false);
}
else {
capture_params.bPushModel = nv_bool(true);
capture_params.bWithCursor = nv_bool(false);
capture_params.bAllowDirectCapture = nv_bool(true);
}
if(handle.capture(capture_params)) {
return platf::capture_e::error;
}
// If trying to capture directly, test if it actually does.
if(capture_params.bAllowDirectCapture) {
CUdeviceptr device_ptr;
NVFBC_FRAME_GRAB_INFO info;
NVFBC_TOCUDA_GRAB_FRAME_PARAMS grab {
NVFBC_TOCUDA_GRAB_FRAME_PARAMS_VER,
NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT_IF_NEW_FRAME_READY,
&device_ptr,
&info,
0,
};
// Direct Capture may fail the first few times, even if it's possible
for(int x = 0; x < 3; ++x) {
if(auto status = func.nvFBCToCudaGrabFrame(handle.handle, &grab)) {
if(status == NVFBC_ERR_MUST_RECREATE) {
return platf::capture_e::reinit;
}
BOOST_LOG(error) << "Couldn't capture nvFramebuffer: "sv << handle.last_error();
return platf::capture_e::error;
}
if(info.bDirectCapture) {
break;
}
BOOST_LOG(debug) << "Direct capture failed attempt ["sv << x << ']';
}
if(!info.bDirectCapture) {
BOOST_LOG(debug) << "Direct capture failed, trying the extra copy method"sv;
// Direct capture failed
capture_params.bPushModel = nv_bool(false);
capture_params.bWithCursor = nv_bool(false);
capture_params.bAllowDirectCapture = nv_bool(false);
if(handle.stop() || handle.capture(capture_params)) {
return platf::capture_e::error;
}
}
}
return platf::capture_e::ok;
}
platf::capture_e snapshot(platf::img_t *img, std::chrono::milliseconds timeout, bool cursor) {
if(cursor != cursor_visible) {
auto status = reinit(cursor);
if(status != platf::capture_e::ok) {
return status;
}
}
CUdeviceptr device_ptr;
NVFBC_FRAME_GRAB_INFO info;
NVFBC_TOCUDA_GRAB_FRAME_PARAMS grab {
NVFBC_TOCUDA_GRAB_FRAME_PARAMS_VER,
NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT_IF_NEW_FRAME_READY,
&device_ptr,
&info,
(std::uint32_t)timeout.count(),
};
if(auto status = func.nvFBCToCudaGrabFrame(handle.handle, &grab)) {
if(status == NVFBC_ERR_MUST_RECREATE) {
return platf::capture_e::reinit;
}
BOOST_LOG(error) << "Couldn't capture nvFramebuffer: "sv << handle.last_error();
return platf::capture_e::error;
}
if(!info.bIsNewFrame) {
return platf::capture_e::timeout;
}
if(((img_t *)img)->tex.copy((std::uint8_t *)device_ptr, img->height, img->row_pitch)) {
return platf::capture_e::error;
}
return platf::capture_e::ok;
}
std::shared_ptr<platf::hwdevice_t> make_hwdevice(platf::pix_fmt_e pix_fmt) override {
return ::cuda::make_hwdevice(width, height, true);
}
std::shared_ptr<platf::img_t> alloc_img() override {
auto img = std::make_shared<cuda::img_t>();
img->width = width;
img->height = height;
img->pixel_pitch = 4;
img->row_pitch = img->width * img->pixel_pitch;
auto tex_opt = tex_t::make(height, width * img->pixel_pitch);
if(!tex_opt) {
return nullptr;
}
img->tex = std::move(*tex_opt);
img->data = (std::uint8_t *)img->tex.texture;
return img;
};
int dummy_img(platf::img_t *) override {
return 0;
}
bool cursor_visible;
handle_t handle;
NVFBC_CREATE_CAPTURE_SESSION_PARAMS capture_params;
};
} // namespace nvfbc
} // namespace cuda
namespace platf {
std::shared_ptr<display_t> nvfbc_display(mem_type_e hwdevice_type, const std::string &display_name, int framerate) {
if(hwdevice_type != mem_type_e::cuda) {
BOOST_LOG(error) << "Could not initialize nvfbc display with the given hw device type"sv;
return nullptr;
}
auto display = std::make_shared<cuda::nvfbc::display_t>();
if(display->init(display_name, framerate)) {
return nullptr;
}
return display;
}
std::vector<std::string> nvfbc_display_names() {
if(init()) {
if(cuda::init() || cuda::nvfbc::init()) {
return {};
}
std::vector<std::string> display_names;
auto status = createInstance(&func);
if(status) {
BOOST_LOG(error) << "Unable to create NvFBC instance"sv;
return {};
}
auto handle = handle_t::make();
auto handle = cuda::nvfbc::handle_t::make();
if(!handle) {
return {};
}
@ -500,7 +638,18 @@ std::vector<std::string> nvfbc_display_names() {
BOOST_LOG(info) << "Found ["sv << status_params->dwOutputNum << "] outputs"sv;
BOOST_LOG(info) << "Virtual Desktop: "sv << status_params->screenSize.w << 'x' << status_params->screenSize.h;
BOOST_LOG(info) << "XrandR: "sv << (status_params->bXRandRAvailable ? "available"sv : "unavailable"sv);
for(auto x = 0; x < status_params->dwOutputNum; ++x) {
auto &output = status_params->outputs[x];
BOOST_LOG(info) << "-- Output --"sv;
BOOST_LOG(debug) << " ID: "sv << output.dwId;
BOOST_LOG(debug) << " Name: "sv << output.name;
BOOST_LOG(info) << " Resolution: "sv << output.trackedBox.w << 'x' << output.trackedBox.h;
BOOST_LOG(info) << " Offset: "sv << output.trackedBox.x << 'x' << output.trackedBox.y;
display_names.emplace_back(std::to_string(x));
}
return display_names;
}
} // namespace platf::nvfbc
} // namespace platf

View File

@ -21,6 +21,9 @@ using namespace std::literals;
#define CU_CHECK_PTR(x, y) \
if(check((x), SUNSHINE_STRINGVIEW(y ": "))) return nullptr;
#define CU_CHECK_OPT(x, y) \
if(check((x), SUNSHINE_STRINGVIEW(y ": "))) return std::nullopt;
#define CU_CHECK_IGNORE(x, y) \
check((x), SUNSHINE_STRINGVIEW(y ": "))
@ -165,15 +168,21 @@ __global__ void RGBA_to_NV12(
dstY[1] = calcY(rgb_r, color_matrix) * 245.0f; // 245.0f is a magic number to ensure slight changes in luminosity are more visisble
}
sws_t::sws_t(int in_width, int in_height, int out_width, int out_height, int pitch, int threadsPerBlock, ptr_t &&color_matrix)
: array {}, texture { INVALID_TEXTURE }, threadsPerBlock { threadsPerBlock }, color_matrix { std::move(color_matrix) } {
auto format = cudaCreateChannelDesc<uchar4>();
int tex_t::copy(std::uint8_t *src, int height, int pitch) {
CU_CHECK(cudaMemcpy2DToArray(array, 0, 0, src, pitch, pitch, height, cudaMemcpyDeviceToDevice), "Couldn't copy to cuda array from deviceptr");
CU_CHECK_VOID(cudaMallocArray(&array, &format, pitch, in_height, cudaArrayDefault), "Couldn't allocate cuda array");
return 0;
}
std::optional<tex_t> tex_t::make(int height, int pitch) {
tex_t tex;
auto format = cudaCreateChannelDesc<uchar4>();
CU_CHECK_OPT(cudaMallocArray(&tex.array, &format, pitch, height, cudaArrayDefault), "Couldn't allocate cuda array");
cudaResourceDesc res {};
res.resType = cudaResourceTypeArray;
res.res.array.array = array;
res.res.array.array = tex.array;
cudaTextureDesc desc {};
@ -183,9 +192,40 @@ sws_t::sws_t(int in_width, int in_height, int out_width, int out_height, int pit
std::fill_n(std::begin(desc.addressMode), 2, cudaAddressModeClamp);
CU_CHECK_VOID(cudaCreateTextureObject(&texture, &res, &desc, nullptr), "Couldn't create cuda texture");
CU_CHECK_OPT(cudaCreateTextureObject(&tex.texture, &res, &desc, nullptr), "Couldn't create cuda texture");
return std::move(tex);
}
tex_t::tex_t() : array { }, texture { INVALID_TEXTURE } {}
tex_t::tex_t(tex_t &&other) : array { other.array }, texture { other.texture } {
other.array = 0;
other.texture = INVALID_TEXTURE;
}
tex_t &tex_t::operator=(tex_t &&other) {
std::swap(array, other.array);
std::swap(texture, other.texture);
return *this;
}
tex_t::~tex_t() {
if(texture != INVALID_TEXTURE) {
CU_CHECK_IGNORE(cudaDestroyTextureObject(texture), "Couldn't deallocate cuda texture");
texture = INVALID_TEXTURE;
}
if(array) {
CU_CHECK_IGNORE(cudaFreeArray(array), "Couldn't deallocate cuda array");
array = cudaArray_t {};
}
}
sws_t::sws_t(int in_width, int in_height, int out_width, int out_height, int pitch, int threadsPerBlock, ptr_t &&color_matrix)
: threadsPerBlock { threadsPerBlock }, color_matrix { std::move(color_matrix) } {
// Ensure aspect ratio is maintained
auto scalar = std::fminf(out_width / (float)in_width, out_height / (float)in_height);
auto out_width_f = in_width * scalar;
@ -202,52 +242,32 @@ sws_t::sws_t(int in_width, int in_height, int out_width, int out_height, int pit
viewport.offsetY = offsetY_f;
}
sws_t::~sws_t() {
if(texture != INVALID_TEXTURE) {
CU_CHECK_IGNORE(cudaDestroyTextureObject(texture), "Couldn't deallocate cuda texture");
texture = INVALID_TEXTURE;
}
if(array) {
CU_CHECK_IGNORE(cudaFreeArray(array), "Couldn't deallocate cuda array");
array = cudaArray_t {};
}
}
std::unique_ptr<sws_t> sws_t::make(int in_width, int in_height, int out_width, int out_height, int pitch) {
std::optional<sws_t> sws_t::make(int in_width, int in_height, int out_width, int out_height, int pitch) {
cudaDeviceProp props;
int device;
CU_CHECK_PTR(cudaGetDevice(&device), "Couldn't get cuda device");
CU_CHECK_PTR(cudaGetDeviceProperties(&props, device), "Couldn't get cuda device properties");
CU_CHECK_OPT(cudaGetDevice(&device), "Couldn't get cuda device");
CU_CHECK_OPT(cudaGetDeviceProperties(&props, device), "Couldn't get cuda device properties");
auto ptr = make_ptr<video::color_t>();
if(!ptr) {
return nullptr;
return std::nullopt;
}
auto sws = std::make_unique<sws_t>(in_width, in_height, out_width, out_height, pitch, props.maxThreadsPerMultiProcessor / props.maxBlocksPerMultiProcessor / 2, std::move(ptr));
if(sws->texture == INVALID_TEXTURE) {
return nullptr;
}
return sws;
return std::make_optional<sws_t>(in_width, in_height, out_width, out_height, pitch, props.maxThreadsPerMultiProcessor / props.maxBlocksPerMultiProcessor / 2, std::move(ptr));
}
int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV) {
return convert(Y, UV, pitchY, pitchUV, viewport);
int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture) {
return convert(Y, UV, pitchY, pitchUV, texture, viewport);
}
int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, const viewport_t &viewport) {
int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, const viewport_t &viewport) {
int threadsX = viewport.width / 2;
int threadsY = viewport.height;
dim3 block(threadsPerBlock, threadsPerBlock);
dim3 grid(div_align(threadsX, threadsPerBlock), div_align(threadsY, threadsPerBlock));
RGBA_to_NV12<<<block, grid>>>(texture, Y, UV, pitchY, pitchUV, viewport, (video::color_t*)color_matrix.get());
RGBA_to_NV12<<<block, grid>>>(texture, Y, UV, pitchY, pitchUV, viewport, (video::color_t *)color_matrix.get());
return CU_CHECK_IGNORE(cudaGetLastError(), "RGBA_to_NV12 failed");
}
@ -274,7 +294,7 @@ void sws_t::set_colorspace(std::uint32_t colorspace, std::uint32_t color_range)
CU_CHECK_IGNORE(cudaMemcpy(color_matrix.get(), color_p, sizeof(video::color_t), cudaMemcpyHostToDevice), "Couldn't copy color matrix to cuda");
}
int sws_t::load_ram(platf::img_t &img) {
int sws_t::load_ram(platf::img_t &img, cudaArray_t array) {
return CU_CHECK_IGNORE(cudaMemcpy2DToArray(array, 0, 0, img.data, img.row_pitch, img.width * img.pixel_pitch, img.height, cudaMemcpyHostToDevice), "Couldn't copy to cuda array");
}

View File

@ -1,15 +1,16 @@
#ifndef SUNSHINE_PLATFORM_CUDA_H
#define SUNSHINE_PLATFORM_CUDA_H
#include <memory>
#ifndef __NVCC__
#include "sunshine/platform/common.h"
#include "x11grab.h"
namespace cuda {
std::shared_ptr<platf::hwdevice_t> make_hwdevice(int width, int height, platf::x11::xdisplay_t::pointer xdisplay);
namespace nvfbc {
std::vector<std::string> display_names();
}
std::shared_ptr<platf::hwdevice_t> make_hwdevice(int width, int height, bool vram);
int init();
} // namespace cuda
@ -41,9 +42,26 @@ struct viewport_t {
int offsetX, offsetY;
};
class tex_t {
public:
static std::optional<tex_t> make(int height, int pitch);
tex_t();
tex_t(tex_t &&);
tex_t &operator=(tex_t &&other);
~tex_t();
int copy(std::uint8_t *src, int height, int pitch);
cudaArray_t array;
cudaTextureObject_t texture;
};
class sws_t {
public:
~sws_t();
sws_t() = default;
sws_t(int in_width, int in_height, int out_width, int out_height, int pitch, int threadsPerBlock, ptr_t &&color_matrix);
/**
@ -52,19 +70,17 @@ public:
*
* pitch -- The size of a single row of pixels in bytes
*/
static std::unique_ptr<sws_t> make(int in_width, int in_height, int out_width, int out_height, int pitch);
static std::optional<sws_t> make(int in_width, int in_height, int out_width, int out_height, int pitch);
// Converts loaded image into a CUDevicePtr
int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV);
int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, const viewport_t &viewport);
int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture);
int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, const viewport_t &viewport);
void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);
int load_ram(platf::img_t &img);
int load_ram(platf::img_t &img, cudaArray_t array);
ptr_t color_matrix;
cudaArray_t array;
cudaTextureObject_t texture;
int threadsPerBlock;

View File

@ -141,6 +141,9 @@ std::string get_mac_address(const std::string_view &address) {
}
enum class source_e {
// #ifdef SUNSHINE_BUILD_CUDA
NVFBC,
// #endif
#ifdef SUNSHINE_BUILD_WAYLAND
WAYLAND,
#endif
@ -153,6 +156,15 @@ enum class source_e {
};
static source_e source;
// #ifdef SUNSHINE_BUILD_CUDA
std::vector<std::string> nvfbc_display_names();
std::shared_ptr<display_t> nvfbc_display(mem_type_e hwdevice_type, const std::string &display_name, int framerate);
bool verify_nvfbc() {
return !nvfbc_display_names().empty();
}
// #endif
#ifdef SUNSHINE_BUILD_WAYLAND
std::vector<std::string> wl_display_names();
std::shared_ptr<display_t> wl_display(mem_type_e hwdevice_type, const std::string &display_name, int framerate);
@ -182,6 +194,10 @@ bool verify_x11() {
std::vector<std::string> display_names() {
switch(source) {
// #ifdef SUNSHINE_BUILD_CUDA
case source_e::NVFBC:
return nvfbc_display_names();
// #endif
#ifdef SUNSHINE_BUILD_WAYLAND
case source_e::WAYLAND:
return wl_display_names();
@ -201,6 +217,10 @@ std::vector<std::string> display_names() {
std::shared_ptr<display_t> display(mem_type_e hwdevice_type, const std::string &display_name, int framerate) {
switch(source) {
// #ifdef SUNSHINE_BUILD_CUDA
case source_e::NVFBC:
return nvfbc_display(hwdevice_type, display_name, framerate);
// #endif
#ifdef SUNSHINE_BUILD_WAYLAND
case source_e::WAYLAND:
return wl_display(hwdevice_type, display_name, framerate);
@ -229,7 +249,7 @@ std::unique_ptr<deinit_t> init() {
window_system = window_system_e::WAYLAND;
}
#endif
#ifdef SUNSHINE_BUILD_X11
#if defined(SUNSHINE_BUILD_X11) // || defined(SUNSHINE_BUILD_CUDA)
if(std::getenv("DISPLAY") && window_system != window_system_e::WAYLAND) {
if(std::getenv("WAYLAND_DISPLAY")) {
BOOST_LOG(warning) << "Wayland detected, yet sunshine will use X11 for screencasting, screencasting will only work on XWayland applications"sv;
@ -238,6 +258,13 @@ std::unique_ptr<deinit_t> init() {
window_system = window_system_e::X11;
}
#endif
// #ifdef SUNSHINE_BUILD_CUDA
if(verify_nvfbc()) {
BOOST_LOG(info) << "Using nvFBC for screencasting"sv;
source = source_e::NVFBC;
goto found_source;
}
// #endif
#ifdef SUNSHINE_BUILD_WAYLAND
if(verify_wl()) {
BOOST_LOG(info) << "Using Wayland for screencasting"sv;

View File

@ -518,7 +518,7 @@ struct x11_attr_t : public display_t {
}
if(mem_type == mem_type_e::cuda) {
return cuda::make_hwdevice(width, height, xdisplay.get());
return cuda::make_hwdevice(width, height, false);
}
return std::make_shared<hwdevice_t>();
@ -678,7 +678,7 @@ struct shm_attr_t : public x11_attr_t {
std::shared_ptr<display_t> x11_display(platf::mem_type_e hwdevice_type, const std::string &display_name, int framerate) {
if(hwdevice_type != platf::mem_type_e::system && hwdevice_type != platf::mem_type_e::vaapi && hwdevice_type != platf::mem_type_e::cuda) {
BOOST_LOG(error) << "Could not initialize display with the given hw device type."sv;
BOOST_LOG(error) << "Could not initialize x11 display with the given hw device type"sv;
return nullptr;
}

View File

@ -1699,7 +1699,7 @@ util::Either<buffer_t, int> vaapi_make_hwdevice_ctx(platf::hwdevice_t *base) {
util::Either<buffer_t, int> cuda_make_hwdevice_ctx(platf::hwdevice_t *base) {
buffer_t hw_device_buf;
auto status = av_hwdevice_ctx_create(&hw_device_buf, AV_HWDEVICE_TYPE_CUDA, nullptr, nullptr, 0);
auto status = av_hwdevice_ctx_create(&hw_device_buf, AV_HWDEVICE_TYPE_CUDA, nullptr, nullptr, 1 /* AV_CUDA_USE_PRIMARY_CONTEXT */);
if(status < 0) {
char string[AV_ERROR_MAX_STRING_SIZE];
BOOST_LOG(error) << "Failed to create a CUDA device: "sv << av_make_error_string(string, AV_ERROR_MAX_STRING_SIZE, status);