diff --git a/src/platform/linux/cuda.cpp b/src/platform/linux/cuda.cpp index 0a048936..0f1d6413 100644 --- a/src/platform/linux/cuda.cpp +++ b/src/platform/linux/cuda.cpp @@ -4,6 +4,10 @@ */ #include +#include + +#include + #include #include @@ -29,6 +33,8 @@ extern "C" { #define CU_CHECK_IGNORE(x, y) \ check((x), SUNSHINE_STRINGVIEW(y ": ")) +namespace fs = std::filesystem; + using namespace std::literals; namespace cuda { constexpr auto cudaDevAttrMaxThreadsPerBlock = (CUdevice_attribute) 1; @@ -69,6 +75,13 @@ namespace cuda { CU_CHECK_IGNORE(cdf->cuStreamDestroy(stream), "Couldn't destroy cuda stream"); } + void + unregisterResource(CUgraphicsResource resource) { + CU_CHECK_IGNORE(cdf->cuGraphicsUnregisterResource(resource), "Couldn't unregister resource"); + } + + using registered_resource_t = util::safe_ptr; + class img_t: public platf::img_t { public: tex_t tex; @@ -223,6 +236,236 @@ namespace cuda { } }; + /** + * @brief Opens the DRM device associated with the CUDA device index. + * @param index CUDA device index to open. + * @return File descriptor or -1 on failure. + */ + file_t + open_drm_fd_for_cuda_device(int index) { + CUdevice device; + CU_CHECK(cdf->cuDeviceGet(&device, index), "Couldn't get CUDA device"); + + // There's no way to directly go from CUDA to a DRM device, so we'll + // use sysfs to look up the DRM device name from the PCI ID. + char pci_bus_id[13]; + CU_CHECK(cdf->cuDeviceGetPCIBusId(pci_bus_id, sizeof(pci_bus_id), device), "Couldn't get CUDA device PCI bus ID"); + BOOST_LOG(debug) << "Found CUDA device with PCI bus ID: "sv << pci_bus_id; + + // Look for the name of the primary node in sysfs + char sysfs_path[PATH_MAX]; + std::snprintf(sysfs_path, sizeof(sysfs_path), "/sys/bus/pci/devices/%s/drm", pci_bus_id); + fs::path sysfs_dir { sysfs_path }; + for (auto &entry : fs::directory_iterator { sysfs_dir }) { + auto file = entry.path().filename(); + auto filestring = file.generic_u8string(); + if (std::string_view { filestring }.substr(0, 4) != "card"sv) { + continue; + } + + BOOST_LOG(debug) << "Found DRM primary node: "sv << filestring; + + fs::path dri_path { "/dev/dri"sv }; + auto device_path = dri_path / file; + return open(device_path.c_str(), O_RDWR); + } + + BOOST_LOG(error) << "Unable to find DRM device with PCI bus ID: "sv << pci_bus_id; + return -1; + } + + class gl_cuda_vram_t: public platf::avcodec_encode_device_t { + public: + /** + * @brief Initialize the GL->CUDA encoding device. + * @param in_width Width of captured frames. + * @param in_height Height of captured frames. + * @param offset_x Offset of content in captured frame. + * @param offset_y Offset of content in captured frame. + * @return 0 on success or -1 on failure. + */ + int + init(int in_width, int in_height, int offset_x, int offset_y) { + // This must be non-zero to tell the video core that it's a hardware encoding device. + data = (void *) 0x1; + + // TODO: Support more than one CUDA device + file = std::move(open_drm_fd_for_cuda_device(0)); + if (file.el < 0) { + char string[1024]; + BOOST_LOG(error) << "Couldn't open DRM FD for CUDA device: "sv << strerror_r(errno, string, sizeof(string)); + return -1; + } + + gbm.reset(gbm::create_device(file.el)); + if (!gbm) { + BOOST_LOG(error) << "Couldn't create GBM device: ["sv << util::hex(eglGetError()).to_string_view() << ']'; + return -1; + } + + display = egl::make_display(gbm.get()); + if (!display) { + return -1; + } + + auto ctx_opt = egl::make_ctx(display.get()); + if (!ctx_opt) { + return -1; + } + + ctx = std::move(*ctx_opt); + + width = in_width; + height = in_height; + + sequence = 0; + + this->offset_x = offset_x; + this->offset_y = offset_y; + + return 0; + } + + /** + * @brief Initialize color conversion into target CUDA frame. + * @param frame Destination CUDA frame to write into. + * @param hw_frames_ctx_buf FFmpeg hardware frame context. + * @return 0 on success or -1 on failure. + */ + int + set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx_buf) override { + this->hwframe.reset(frame); + this->frame = frame; + + if (!frame->buf[0]) { + if (av_hwframe_get_buffer(hw_frames_ctx_buf, frame, 0)) { + BOOST_LOG(error) << "Couldn't get hwframe for VAAPI"sv; + return -1; + } + } + + auto hw_frames_ctx = (AVHWFramesContext *) hw_frames_ctx_buf->data; + sw_format = hw_frames_ctx->sw_format; + + auto nv12_opt = egl::create_target(frame->width, frame->height, sw_format); + if (!nv12_opt) { + return -1; + } + + auto sws_opt = egl::sws_t::make(width, height, frame->width, frame->height, sw_format); + if (!sws_opt) { + return -1; + } + + this->sws = std::move(*sws_opt); + this->nv12 = std::move(*nv12_opt); + + auto cuda_ctx = (AVCUDADeviceContext *) hw_frames_ctx->device_ctx->hwctx; + + stream = make_stream(); + if (!stream) { + return -1; + } + + cuda_ctx->stream = stream.get(); + + CU_CHECK(cdf->cuGraphicsGLRegisterImage(&y_res, nv12->tex[0], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), + "Couldn't register Y plane texture"); + CU_CHECK(cdf->cuGraphicsGLRegisterImage(&uv_res, nv12->tex[1], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), + "Couldn't register UV plane texture"); + + return 0; + } + + /** + * @brief Convert the captured image into the target CUDA frame. + * @param img Captured screen image. + * @return 0 on success or -1 on failure. + */ + int + convert(platf::img_t &img) override { + auto &descriptor = (egl::img_descriptor_t &) img; + + if (descriptor.sequence == 0) { + // For dummy images, use a blank RGB texture instead of importing a DMA-BUF + rgb = egl::create_blank(img); + } + else if (descriptor.sequence > sequence) { + sequence = descriptor.sequence; + + rgb = egl::rgb_t {}; + + auto rgb_opt = egl::import_source(display.get(), descriptor.sd); + + if (!rgb_opt) { + return -1; + } + + rgb = std::move(*rgb_opt); + } + + // Perform the color conversion and scaling in GL + sws.load_vram(descriptor, offset_x, offset_y, rgb->tex[0]); + sws.convert(nv12->buf); + + auto fmt_desc = av_pix_fmt_desc_get(sw_format); + + // Map the GL textures to read for CUDA + CUgraphicsResource resources[2] = { y_res.get(), uv_res.get() }; + CU_CHECK(cdf->cuGraphicsMapResources(2, resources, stream.get()), "Couldn't map GL textures in CUDA"); + + // Copy from the GL textures to the target CUDA frame + for (int i = 0; i < 2; i++) { + CUDA_MEMCPY2D cpy = {}; + cpy.srcMemoryType = CU_MEMORYTYPE_ARRAY; + CU_CHECK(cdf->cuGraphicsSubResourceGetMappedArray(&cpy.srcArray, resources[i], 0, 0), "Couldn't get mapped plane array"); + + cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE; + cpy.dstDevice = (CUdeviceptr) frame->data[i]; + cpy.dstPitch = frame->linesize[i]; + cpy.WidthInBytes = (frame->width * fmt_desc->comp[i].step) >> (i ? fmt_desc->log2_chroma_w : 0); + cpy.Height = frame->height >> (i ? fmt_desc->log2_chroma_h : 0); + + CU_CHECK_IGNORE(cdf->cuMemcpy2DAsync(&cpy, stream.get()), "Couldn't copy texture to CUDA frame"); + } + + // Unmap the textures to allow modification from GL again + CU_CHECK(cdf->cuGraphicsUnmapResources(2, resources, stream.get()), "Couldn't unmap GL textures from CUDA"); + return 0; + } + + /** + * @brief Configures shader parameters for the specified colorspace. + */ + void + apply_colorspace() override { + sws.apply_colorspace(colorspace); + } + + file_t file; + gbm::gbm_t gbm; + egl::display_t display; + egl::ctx_t ctx; + + // This must be destroyed before display_t + stream_t stream; + frame_t hwframe; + + egl::sws_t sws; + egl::nv12_t nv12; + AVPixelFormat sw_format; + + int width, height; + + std::uint64_t sequence; + egl::rgb_t rgb; + + registered_resource_t y_res; + registered_resource_t uv_res; + + int offset_x, offset_y; + }; + std::unique_ptr make_avcodec_encode_device(int width, int height, bool vram) { if (init()) { @@ -245,6 +488,29 @@ namespace cuda { return cuda; } + /** + * @brief Create a GL->CUDA encoding device for consuming captured dmabufs. + * @param in_width Width of captured frames. + * @param in_height Height of captured frames. + * @param offset_x Offset of content in captured frame. + * @param offset_y Offset of content in captured frame. + * @return FFmpeg encoding device context. + */ + std::unique_ptr + make_avcodec_gl_encode_device(int width, int height, int offset_x, int offset_y) { + if (init()) { + return nullptr; + } + + auto cuda = std::make_unique(); + + if (cuda->init(width, height, offset_x, offset_y)) { + return nullptr; + } + + return cuda; + } + namespace nvfbc { static PNVFBCCREATEINSTANCE createInstance {}; static NVFBC_API_FUNCTION_LIST func { NVFBC_VERSION }; diff --git a/src/platform/linux/cuda.h b/src/platform/linux/cuda.h index d5b97d65..91564174 100644 --- a/src/platform/linux/cuda.h +++ b/src/platform/linux/cuda.h @@ -27,6 +27,18 @@ namespace cuda { } std::unique_ptr make_avcodec_encode_device(int width, int height, bool vram); + + /** + * @brief Create a GL->CUDA encoding device for consuming captured dmabufs. + * @param in_width Width of captured frames. + * @param in_height Height of captured frames. + * @param offset_x Offset of content in captured frame. + * @param offset_y Offset of content in captured frame. + * @return FFmpeg encoding device context. + */ + std::unique_ptr + make_avcodec_gl_encode_device(int width, int height, int offset_x, int offset_y); + int init(); } // namespace cuda diff --git a/src/platform/linux/graphics.cpp b/src/platform/linux/graphics.cpp index a91ee8e4..e53483a9 100644 --- a/src/platform/linux/graphics.cpp +++ b/src/platform/linux/graphics.cpp @@ -647,6 +647,71 @@ namespace egl { return nv12; } + /** + * @brief Creates biplanar YUV textures to render into. + * @param width Width of the target frame. + * @param height Height of the target frame. + * @param format Format of the target frame. + * @return The new RGB texture. + */ + std::optional + create_target(int width, int height, AVPixelFormat format) { + nv12_t nv12 { + EGL_NO_DISPLAY, + EGL_NO_IMAGE, + EGL_NO_IMAGE, + gl::tex_t::make(2), + gl::frame_buf_t::make(2), + }; + + GLint y_format; + GLint uv_format; + + // Determine the size of each plane element + auto fmt_desc = av_pix_fmt_desc_get(format); + if (fmt_desc->comp[0].depth <= 8) { + y_format = GL_R8; + uv_format = GL_RG8; + } + else if (fmt_desc->comp[0].depth <= 16) { + y_format = GL_R16; + uv_format = GL_RG16; + } + else { + BOOST_LOG(error) << "Unsupported target pixel format: "sv << format; + return std::nullopt; + } + + gl::ctx.BindTexture(GL_TEXTURE_2D, nv12->tex[0]); + gl::ctx.TexStorage2D(GL_TEXTURE_2D, 1, y_format, width, height); + + gl::ctx.BindTexture(GL_TEXTURE_2D, nv12->tex[1]); + gl::ctx.TexStorage2D(GL_TEXTURE_2D, 1, uv_format, + width >> fmt_desc->log2_chroma_w, height >> fmt_desc->log2_chroma_h); + + nv12->buf.bind(std::begin(nv12->tex), std::end(nv12->tex)); + + GLenum attachments[] { + GL_COLOR_ATTACHMENT0, + GL_COLOR_ATTACHMENT1 + }; + + for (int x = 0; x < sizeof(attachments) / sizeof(decltype(attachments[0])); ++x) { + gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, nv12->buf[x]); + gl::ctx.DrawBuffers(1, &attachments[x]); + + const float y_black[] = { 0.0f, 0.0f, 0.0f, 0.0f }; + const float uv_black[] = { 0.5f, 0.5f, 0.5f, 0.5f }; + gl::ctx.ClearBufferfv(GL_COLOR, 0, x == 0 ? y_black : uv_black); + } + + gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, 0); + + gl_drain_errors; + + return nv12; + } + void sws_t::apply_colorspace(const video::sunshine_colorspace_t &colorspace) { auto color_p = video::color_vectors_from_colorspace(colorspace); diff --git a/src/platform/linux/graphics.h b/src/platform/linux/graphics.h index 9c0c3fb0..d2759f7d 100644 --- a/src/platform/linux/graphics.h +++ b/src/platform/linux/graphics.h @@ -277,6 +277,16 @@ namespace egl { std::array &&fds, const surface_descriptor_t &y, const surface_descriptor_t &uv); + /** + * @brief Creates biplanar YUV textures to render into. + * @param width Width of the target frame. + * @param height Height of the target frame. + * @param format Format of the target frame. + * @return The new RGB texture. + */ + std::optional + create_target(int width, int height, AVPixelFormat format); + class cursor_t: public platf::img_t { public: int x, y; diff --git a/src/platform/linux/kmsgrab.cpp b/src/platform/linux/kmsgrab.cpp index eb85a257..069900f1 100644 --- a/src/platform/linux/kmsgrab.cpp +++ b/src/platform/linux/kmsgrab.cpp @@ -20,6 +20,7 @@ #include "src/utility.h" #include "src/video.h" +#include "cuda.h" #include "graphics.h" #include "vaapi.h" #include "wayland.h" @@ -1192,6 +1193,12 @@ namespace platf { } #endif +#ifdef SUNSHINE_BUILD_CUDA + if (mem_type == mem_type_e::cuda) { + return cuda::make_avcodec_encode_device(width, height, false); + } +#endif + return std::make_unique(); } @@ -1315,6 +1322,12 @@ namespace platf { } #endif +#ifdef SUNSHINE_BUILD_CUDA + if (mem_type == mem_type_e::cuda) { + return cuda::make_avcodec_gl_encode_device(width, height, img_offset_x, img_offset_y); + } +#endif + BOOST_LOG(error) << "Unsupported pixel format for egl::display_vram_t: "sv << platf::from_pix_fmt(pix_fmt); return nullptr; } @@ -1434,13 +1447,18 @@ namespace platf { } #ifdef SUNSHINE_BUILD_VAAPI - if (!va::validate(card.render_fd.el)) { -#else - if (true) { -#endif + if (mem_type == mem_type_e::vaapi && !va::validate(card.render_fd.el)) { BOOST_LOG(warning) << "Monitor "sv << display_name << " doesn't support hardware encoding. Reverting back to GPU -> RAM -> GPU"sv; return -1; } +#endif + +#ifndef SUNSHINE_BUILD_CUDA + if (mem_type == mem_type_e::cuda) { + BOOST_LOG(warning) << "Attempting to use NVENC without CUDA support. Reverting back to GPU -> RAM -> GPU"sv; + return -1; + } +#endif return 0; } @@ -1452,7 +1470,7 @@ namespace platf { std::shared_ptr kms_display(mem_type_e hwdevice_type, const std::string &display_name, const ::video::config_t &config) { - if (hwdevice_type == mem_type_e::vaapi) { + if (hwdevice_type == mem_type_e::vaapi || hwdevice_type == mem_type_e::cuda) { auto disp = std::make_shared(hwdevice_type); if (!disp->init(display_name, config)) { diff --git a/src/platform/linux/wlgrab.cpp b/src/platform/linux/wlgrab.cpp index 24791d0d..23066773 100644 --- a/src/platform/linux/wlgrab.cpp +++ b/src/platform/linux/wlgrab.cpp @@ -6,6 +6,8 @@ #include "src/main.h" #include "src/video.h" + +#include "cuda.h" #include "vaapi.h" #include "wayland.h" @@ -224,6 +226,12 @@ namespace wl { } #endif +#ifdef SUNSHINE_BUILD_CUDA + if (mem_type == platf::mem_type_e::cuda) { + return cuda::make_avcodec_encode_device(width, height, false); + } +#endif + return std::make_unique(); } @@ -336,6 +344,12 @@ namespace wl { } #endif +#ifdef SUNSHINE_BUILD_CUDA + if (mem_type == platf::mem_type_e::cuda) { + return cuda::make_avcodec_gl_encode_device(width, height, 0, 0); + } +#endif + return std::make_unique(); } @@ -358,7 +372,7 @@ namespace platf { return nullptr; } - if (hwdevice_type == platf::mem_type_e::vaapi) { + if (hwdevice_type == platf::mem_type_e::vaapi || hwdevice_type == platf::mem_type_e::cuda) { auto wlr = std::make_shared(); if (wlr->init(hwdevice_type, display_name, config)) { return nullptr;