From a7171d77db28c8b6f79eecae5f21f955afc05c7c Mon Sep 17 00:00:00 2001
From: Cameron Gutman <aicommander@gmail.com>
Date: Sat, 10 Jul 2021 11:24:13 -0500
Subject: [PATCH] Reduce CPU usage with Linux NVENC

The internal assembly routines inside libswscale perform the
RGB->YUV conversion using a fully planar output format, then
have to perform an additional YUV420->NV12 conversion step.

NVENC can directly consume YUV420 input frames, so we can
completely avoid this NV12 conversion and save quite a bit of
CPU time in the process.
---
 sunshine/video.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sunshine/video.cpp b/sunshine/video.cpp
index 59f3a1ec..5253723e 100644
--- a/sunshine/video.cpp
+++ b/sunshine/video.cpp
@@ -409,11 +409,13 @@ static encoder_t nvenc {
 #ifdef _WIN32
   AV_HWDEVICE_TYPE_D3D11VA,
   AV_PIX_FMT_D3D11,
+  AV_PIX_FMT_NV12, AV_PIX_FMT_P010,
 #else
   AV_HWDEVICE_TYPE_CUDA,
   AV_PIX_FMT_CUDA,
+  // Fully planar YUV formats are more efficient for sws_scale()
+  AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10,
 #endif
-  AV_PIX_FMT_NV12, AV_PIX_FMT_P010,
   {
     {
       { "forced-idr"s, 1 },