Use AVVideoScalingModeResizeAspect instead of abusing extended pixels

This not only eliminates the hand-rolled aspect ratio correction (which didn't actually handle scaling),
but it also avoids us having to write to the GPU frames to add padding which improves performance.
This commit is contained in:
Cameron Gutman 2023-10-03 18:44:17 -05:00
parent a29d2e11ea
commit ca041f2934
4 changed files with 12 additions and 91 deletions

@ -20,11 +20,6 @@ struct CaptureSession {
@property (nonatomic, assign) OSType pixelFormat;
@property (nonatomic, assign) int frameWidth;
@property (nonatomic, assign) int frameHeight;
@property (nonatomic, assign) float scaling;
@property (nonatomic, assign) int paddingLeft;
@property (nonatomic, assign) int paddingRight;
@property (nonatomic, assign) int paddingTop;
@property (nonatomic, assign) int paddingBottom;
typedef bool (^FrameCallbackBlock)(CMSampleBufferRef);

@ -39,11 +39,6 @@
self.pixelFormat = kCVPixelFormatType_32BGRA;
self.frameWidth = CGDisplayModeGetPixelWidth(mode);
self.frameHeight = CGDisplayModeGetPixelHeight(mode);
self.scaling = CGDisplayPixelsWide(displayID) / CGDisplayModeGetPixelWidth(mode);
self.paddingLeft = 0;
self.paddingRight = 0;
self.paddingTop = 0;
self.paddingBottom = 0;
self.minFrameDuration = CMTimeMake(1, frameRate);
self.session = [[AVCaptureSession alloc] init];
self.videoOutputs = [[NSMapTable alloc] init];
@ -77,48 +72,8 @@
}
- (void)setFrameWidth:(int)frameWidth frameHeight:(int)frameHeight {
CGImageRef screenshot = CGDisplayCreateImage(self.displayID);
self.frameWidth = frameWidth;
self.frameHeight = frameHeight;
double screenRatio = (double) CGImageGetWidth(screenshot) / (double) CGImageGetHeight(screenshot);
double streamRatio = (double) frameWidth / (double) frameHeight;
if (screenRatio < streamRatio) {
int padding = frameWidth - (frameHeight * screenRatio);
self.paddingLeft = padding / 2;
self.paddingRight = padding - self.paddingLeft;
self.paddingTop = 0;
self.paddingBottom = 0;
}
else {
int padding = frameHeight - (frameWidth / screenRatio);
self.paddingLeft = 0;
self.paddingRight = 0;
self.paddingTop = padding / 2;
self.paddingBottom = padding - self.paddingTop;
}
// XXX: if the streamed image is larger than the native resolution, we add a black box around
// the frame. Instead the frame should be resized entirely.
int delta_width = frameWidth - (CGImageGetWidth(screenshot) + self.paddingLeft + self.paddingRight);
if (delta_width > 0) {
int adjust_left = delta_width / 2;
int adjust_right = delta_width - adjust_left;
self.paddingLeft += adjust_left;
self.paddingRight += adjust_right;
}
int delta_height = frameHeight - (CGImageGetHeight(screenshot) + self.paddingTop + self.paddingBottom);
if (delta_height > 0) {
int adjust_top = delta_height / 2;
int adjust_bottom = delta_height - adjust_top;
self.paddingTop += adjust_top;
self.paddingBottom += adjust_bottom;
}
CFRelease(screenshot);
}
- (dispatch_semaphore_t)capture:(FrameCallbackBlock)frameCallback {
@ -128,11 +83,8 @@
[videoOutput setVideoSettings:@{
(NSString *) kCVPixelBufferPixelFormatTypeKey: [NSNumber numberWithUnsignedInt:self.pixelFormat],
(NSString *) kCVPixelBufferWidthKey: [NSNumber numberWithInt:self.frameWidth],
(NSString *) kCVPixelBufferExtendedPixelsRightKey: [NSNumber numberWithInt:self.paddingRight],
(NSString *) kCVPixelBufferExtendedPixelsLeftKey: [NSNumber numberWithInt:self.paddingLeft],
(NSString *) kCVPixelBufferExtendedPixelsTopKey: [NSNumber numberWithInt:self.paddingTop],
(NSString *) kCVPixelBufferExtendedPixelsBottomKey: [NSNumber numberWithInt:self.paddingBottom],
(NSString *) kCVPixelBufferHeightKey: [NSNumber numberWithInt:self.frameHeight]
(NSString *) kCVPixelBufferHeightKey: [NSNumber numberWithInt:self.frameHeight],
(NSString *) AVVideoScalingModeKey: AVVideoScalingModeResizeAspect,
}];
dispatch_queue_attr_t qos = dispatch_queue_attr_make_with_qos_class(DISPATCH_QUEUE_SERIAL,

@ -66,11 +66,9 @@ namespace platf {
av_img->pixel_buffer = pixelBuffer;
img_out->data = (uint8_t *) CVPixelBufferGetBaseAddress(pixelBuffer);
size_t extraPixels[4];
CVPixelBufferGetExtendedPixels(pixelBuffer, &extraPixels[0], &extraPixels[1], &extraPixels[2], &extraPixels[3]);
img_out->width = CVPixelBufferGetWidth(pixelBuffer) + extraPixels[0] + extraPixels[1];
img_out->height = CVPixelBufferGetHeight(pixelBuffer) + extraPixels[2] + extraPixels[3];
img_out->width = CVPixelBufferGetWidth(pixelBuffer);
img_out->height = CVPixelBufferGetHeight(pixelBuffer);
img_out->row_pitch = CVPixelBufferGetBytesPerRow(pixelBuffer);
img_out->pixel_pitch = img_out->row_pitch / img_out->width;
@ -136,11 +134,9 @@ namespace platf {
av_img->pixel_buffer = pixelBuffer;
img->data = (uint8_t *) CVPixelBufferGetBaseAddress(pixelBuffer);
size_t extraPixels[4];
CVPixelBufferGetExtendedPixels(pixelBuffer, &extraPixels[0], &extraPixels[1], &extraPixels[2], &extraPixels[3]);
img->width = CVPixelBufferGetWidth(pixelBuffer) + extraPixels[0] + extraPixels[1];
img->height = CVPixelBufferGetHeight(pixelBuffer) + extraPixels[2] + extraPixels[3];
img->width = CVPixelBufferGetWidth(pixelBuffer);
img->height = CVPixelBufferGetHeight(pixelBuffer);
img->row_pitch = CVPixelBufferGetBytesPerRow(pixelBuffer);
img->pixel_pitch = img->row_pitch / img->width;

@ -26,37 +26,15 @@ namespace platf {
av_img_t *av_img = (av_img_t *) &img;
size_t left_pad, right_pad, top_pad, bottom_pad;
CVPixelBufferGetExtendedPixels(av_img->pixel_buffer, &left_pad, &right_pad, &top_pad, &bottom_pad);
const uint8_t *data = (const uint8_t *) CVPixelBufferGetBaseAddressOfPlane(av_img->pixel_buffer, 0) - left_pad - (top_pad * img.width);
int result = av_image_fill_arrays(av_frame->data, av_frame->linesize, data, (AVPixelFormat) av_frame->format, img.width, img.height, 32);
// We will create the black bars for the padding top/bottom or left/right here in very cheap way.
// The luminance is 0, therefore, we simply need to set the chroma values to 128 for each pixel
// for black bars (instead of green with chroma 0). However, this only works 100% correct, when
// the resolution is devisable by 32. This could be improved by calculating the chroma values for
// the outer content pixels, which should introduce only a minor performance hit.
//
// XXX: Improve the algorithm to take into account the outer pixels
size_t uv_plane_height = CVPixelBufferGetHeightOfPlane(av_img->pixel_buffer, 1);
if (left_pad || right_pad) {
for (int l = 0; l < uv_plane_height + (top_pad / 2); l++) {
int line = l * av_frame->linesize[1];
memset((void *) &av_frame->data[1][line], 128, (size_t) left_pad);
memset((void *) &av_frame->data[1][line + img.width - right_pad], 128, right_pad);
}
// Set up the data fields in the AVFrame to point into the mapped CVPixelBuffer
int planes = CVPixelBufferGetPlaneCount(av_img->pixel_buffer->buf);
for (int i = 0; i < planes; i++) {
av_frame->linesize[i] = CVPixelBufferGetBytesPerRowOfPlane(av_img->pixel_buffer->buf, i);
av_frame->data[i] = (uint8_t *) CVPixelBufferGetBaseAddressOfPlane(av_img->pixel_buffer->buf, i);
}
if (top_pad || bottom_pad) {
memset((void *) &av_frame->data[1][0], 128, (top_pad / 2) * av_frame->linesize[1]);
memset((void *) &av_frame->data[1][((top_pad / 2) + uv_plane_height) * av_frame->linesize[1]], 128, bottom_pad / 2 * av_frame->linesize[1]);
}
return result > 0 ? 0 : -1;
return 0;
}
int