Use AVVideoScalingModeResizeAspect instead of abusing extended pixels

This not only eliminates the hand-rolled aspect ratio correction (which didn't actually handle scaling), but it also avoids us having to write to the GPU frames to add padding which improves performance.
2025-02-16 21:39:52 +00:00 · 2023-10-03 18:44:17 -05:00 · 2023-10-03 18:44:17 -05:00 · ca041f2934
commit ca041f2934
parent a29d2e11ea
4 changed files with 12 additions and 91 deletions
--- a/src/platform/macos/av_video.h
+++ b/src/platform/macos/av_video.h
@ -20,11 +20,6 @@ struct CaptureSession {
@property (nonatomic, assign) OSType pixelFormat;
@property (nonatomic, assign) int frameWidth;
@property (nonatomic, assign) int frameHeight;
-@property (nonatomic, assign) float scaling;
-@property (nonatomic, assign) int paddingLeft;
-@property (nonatomic, assign) int paddingRight;
-@property (nonatomic, assign) int paddingTop;
-@property (nonatomic, assign) int paddingBottom;

 typedef bool (^FrameCallbackBlock)(CMSampleBufferRef);

--- a/src/platform/macos/av_video.m
+++ b/src/platform/macos/av_video.m
@ -39,11 +39,6 @@
  self.pixelFormat = kCVPixelFormatType_32BGRA;
  self.frameWidth = CGDisplayModeGetPixelWidth(mode);
  self.frameHeight = CGDisplayModeGetPixelHeight(mode);
-  self.scaling = CGDisplayPixelsWide(displayID) / CGDisplayModeGetPixelWidth(mode);
-  self.paddingLeft = 0;
-  self.paddingRight = 0;
-  self.paddingTop = 0;
-  self.paddingBottom = 0;
  self.minFrameDuration = CMTimeMake(1, frameRate);
  self.session = [[AVCaptureSession alloc] init];
  self.videoOutputs = [[NSMapTable alloc] init];
@ -77,48 +72,8 @@
 }

 - (void)setFrameWidth:(int)frameWidth frameHeight:(int)frameHeight {
-  CGImageRef screenshot = CGDisplayCreateImage(self.displayID);
-
  self.frameWidth = frameWidth;
  self.frameHeight = frameHeight;
-
-  double screenRatio = (double) CGImageGetWidth(screenshot) / (double) CGImageGetHeight(screenshot);
-  double streamRatio = (double) frameWidth / (double) frameHeight;
-
-  if (screenRatio < streamRatio) {
-    int padding = frameWidth - (frameHeight * screenRatio);
-    self.paddingLeft = padding / 2;
-    self.paddingRight = padding - self.paddingLeft;
-    self.paddingTop = 0;
-    self.paddingBottom = 0;
-  }
-  else {
-    int padding = frameHeight - (frameWidth / screenRatio);
-    self.paddingLeft = 0;
-    self.paddingRight = 0;
-    self.paddingTop = padding / 2;
-    self.paddingBottom = padding - self.paddingTop;
-  }
-
-  // XXX: if the streamed image is larger than the native resolution, we add a black box around
-  // the frame. Instead the frame should be resized entirely.
-  int delta_width = frameWidth - (CGImageGetWidth(screenshot) + self.paddingLeft + self.paddingRight);
-  if (delta_width > 0) {
-    int adjust_left = delta_width / 2;
-    int adjust_right = delta_width - adjust_left;
-    self.paddingLeft += adjust_left;
-    self.paddingRight += adjust_right;
-  }
-
-  int delta_height = frameHeight - (CGImageGetHeight(screenshot) + self.paddingTop + self.paddingBottom);
-  if (delta_height > 0) {
-    int adjust_top = delta_height / 2;
-    int adjust_bottom = delta_height - adjust_top;
-    self.paddingTop += adjust_top;
-    self.paddingBottom += adjust_bottom;
-  }
-
-  CFRelease(screenshot);
 }

 - (dispatch_semaphore_t)capture:(FrameCallbackBlock)frameCallback {
@ -128,11 +83,8 @@
    [videoOutput setVideoSettings:@{
      (NSString *) kCVPixelBufferPixelFormatTypeKey: [NSNumber numberWithUnsignedInt:self.pixelFormat],
      (NSString *) kCVPixelBufferWidthKey: [NSNumber numberWithInt:self.frameWidth],
-      (NSString *) kCVPixelBufferExtendedPixelsRightKey: [NSNumber numberWithInt:self.paddingRight],
-      (NSString *) kCVPixelBufferExtendedPixelsLeftKey: [NSNumber numberWithInt:self.paddingLeft],
-      (NSString *) kCVPixelBufferExtendedPixelsTopKey: [NSNumber numberWithInt:self.paddingTop],
-      (NSString *) kCVPixelBufferExtendedPixelsBottomKey: [NSNumber numberWithInt:self.paddingBottom],
-      (NSString *) kCVPixelBufferHeightKey: [NSNumber numberWithInt:self.frameHeight]
+      (NSString *) kCVPixelBufferHeightKey: [NSNumber numberWithInt:self.frameHeight],
+      (NSString *) AVVideoScalingModeKey: AVVideoScalingModeResizeAspect,
    }];

    dispatch_queue_attr_t qos = dispatch_queue_attr_make_with_qos_class(DISPATCH_QUEUE_SERIAL,
--- a/src/platform/macos/display.mm
+++ b/src/platform/macos/display.mm
@ -66,11 +66,9 @@ namespace platf {
        av_img->pixel_buffer = pixelBuffer;
        img_out->data = (uint8_t *) CVPixelBufferGetBaseAddress(pixelBuffer);

-        size_t extraPixels[4];
-        CVPixelBufferGetExtendedPixels(pixelBuffer, &extraPixels[0], &extraPixels[1], &extraPixels[2], &extraPixels[3]);

-        img_out->width = CVPixelBufferGetWidth(pixelBuffer) + extraPixels[0] + extraPixels[1];
-        img_out->height = CVPixelBufferGetHeight(pixelBuffer) + extraPixels[2] + extraPixels[3];
+        img_out->width = CVPixelBufferGetWidth(pixelBuffer);
+        img_out->height = CVPixelBufferGetHeight(pixelBuffer);
        img_out->row_pitch = CVPixelBufferGetBytesPerRow(pixelBuffer);
        img_out->pixel_pitch = img_out->row_pitch / img_out->width;

@ -136,11 +134,9 @@ namespace platf {
        av_img->pixel_buffer = pixelBuffer;
        img->data = (uint8_t *) CVPixelBufferGetBaseAddress(pixelBuffer);

-        size_t extraPixels[4];
-        CVPixelBufferGetExtendedPixels(pixelBuffer, &extraPixels[0], &extraPixels[1], &extraPixels[2], &extraPixels[3]);

-        img->width = CVPixelBufferGetWidth(pixelBuffer) + extraPixels[0] + extraPixels[1];
-        img->height = CVPixelBufferGetHeight(pixelBuffer) + extraPixels[2] + extraPixels[3];
+        img->width = CVPixelBufferGetWidth(pixelBuffer);
+        img->height = CVPixelBufferGetHeight(pixelBuffer);
        img->row_pitch = CVPixelBufferGetBytesPerRow(pixelBuffer);
        img->pixel_pitch = img->row_pitch / img->width;

--- a/src/platform/macos/nv12_zero_device.cpp
+++ b/src/platform/macos/nv12_zero_device.cpp
@ -26,37 +26,15 @@ namespace platf {

    av_img_t *av_img = (av_img_t *) &img;

-    size_t left_pad, right_pad, top_pad, bottom_pad;
-    CVPixelBufferGetExtendedPixels(av_img->pixel_buffer, &left_pad, &right_pad, &top_pad, &bottom_pad);
-
-    const uint8_t *data = (const uint8_t *) CVPixelBufferGetBaseAddressOfPlane(av_img->pixel_buffer, 0) - left_pad - (top_pad * img.width);
-
-    int result = av_image_fill_arrays(av_frame->data, av_frame->linesize, data, (AVPixelFormat) av_frame->format, img.width, img.height, 32);
-
-    // We will create the black bars for the padding top/bottom or left/right here in very cheap way.
-    // The luminance is 0, therefore, we simply need to set the chroma values to 128 for each pixel
-    // for black bars (instead of green with chroma 0). However, this only works 100% correct, when
-    // the resolution is devisable by 32. This could be improved by calculating the chroma values for
-    // the outer content pixels, which should introduce only a minor performance hit.
-    //
-    // XXX: Improve the algorithm to take into account the outer pixels
-
-    size_t uv_plane_height = CVPixelBufferGetHeightOfPlane(av_img->pixel_buffer, 1);
-
-    if (left_pad || right_pad) {
-      for (int l = 0; l < uv_plane_height + (top_pad / 2); l++) {
-        int line = l * av_frame->linesize[1];
-        memset((void *) &av_frame->data[1][line], 128, (size_t) left_pad);
-        memset((void *) &av_frame->data[1][line + img.width - right_pad], 128, right_pad);
-      }
+    // Set up the data fields in the AVFrame to point into the mapped CVPixelBuffer
+    int planes = CVPixelBufferGetPlaneCount(av_img->pixel_buffer->buf);
+    for (int i = 0; i < planes; i++) {
+      av_frame->linesize[i] = CVPixelBufferGetBytesPerRowOfPlane(av_img->pixel_buffer->buf, i);
+      av_frame->data[i] = (uint8_t *) CVPixelBufferGetBaseAddressOfPlane(av_img->pixel_buffer->buf, i);
    }

-    if (top_pad || bottom_pad) {
-      memset((void *) &av_frame->data[1][0], 128, (top_pad / 2) * av_frame->linesize[1]);
-      memset((void *) &av_frame->data[1][((top_pad / 2) + uv_plane_height) * av_frame->linesize[1]], 128, bottom_pad / 2 * av_frame->linesize[1]);
-    }

-    return result > 0 ? 0 : -1;
+    return 0;
  }

  int