WebRTC IOS视频硬编码流程及其中传递的CVPixelBufferRef

IOS视频硬编码流程


WebRTC中默认摄像头采集:
RTCCameraVideoCapturer:

src/sdk/objc/components/capturer/RTCCameraVideoCapturer.m

- (void)captureOutput:(AVCaptureOutput *)captureOutput
    didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer
           fromConnection:(AVCaptureConnection *)connection {
 ...

  RTCCVPixelBuffer *rtcPixelBuffer = [[RTCCVPixelBuffer alloc] initWithPixelBuffer:pixelBuffer];
  int64_t timeStampNs = CMTimeGetSeconds(CMSampleBufferGetPresentationTimeStamp(sampleBuffer)) *
      kNanosecondsPerSecond;
  RTCVideoFrame *videoFrame = [[RTCVideoFrame alloc] initWithBuffer:rtcPixelBuffer
                                                           rotation:_rotation
                                                        timeStampNs:timeStampNs];
  [self.delegate capturer:self didCaptureVideoFrame:videoFrame];
}

先看看RTCCVPixelBufferRTCVideoFrame
RTCCVPixelBuffer只是简单存储了CVPixelBufferRef pixelBuffer

src/sdk/objc/components/video_frame_buffer/RTCCVPixelBuffer.mm

- (instancetype)initWithPixelBuffer:(CVPixelBufferRef)pixelBuffer {
  return [self initWithPixelBuffer:pixelBuffer
                      adaptedWidth:CVPixelBufferGetWidth(pixelBuffer)
                     adaptedHeight:CVPixelBufferGetHeight(pixelBuffer)
                         cropWidth:CVPixelBufferGetWidth(pixelBuffer)
                        cropHeight:CVPixelBufferGetHeight(pixelBuffer)
                             cropX:0
                             cropY:0];
}

- (instancetype)initWithPixelBuffer:(CVPixelBufferRef)pixelBuffer
                       adaptedWidth:(int)adaptedWidth
                      adaptedHeight:(int)adaptedHeight
                          cropWidth:(int)cropWidth
                         cropHeight:(int)cropHeight
                              cropX:(int)cropX
                              cropY:(int)cropY {
  if (self = [super init]) {
    _width = adaptedWidth;
    _height = adaptedHeight;
    _pixelBuffer = pixelBuffer;
    _bufferWidth = CVPixelBufferGetWidth(_pixelBuffer);
    _bufferHeight = CVPixelBufferGetHeight(_pixelBuffer);
    _cropWidth = cropWidth;
    _cropHeight = cropHeight;
    // Can only crop at even pixels.
    _cropX = cropX & ~1;
    _cropY = cropY & ~1;
    CVBufferRetain(_pixelBuffer);
  }

  return self;
}

RTCVideoFrame又简单存储了RTCVideoFrameBuffer

src/sdk/objc/base/RTCVideoFrame.mm

- (instancetype)initWithBuffer:(id)buffer
                      rotation:(RTCVideoRotation)rotation
                   timeStampNs:(int64_t)timeStampNs {
  if (self = [super init]) {
    _buffer = buffer;
    _rotation = rotation;
    _timeStampNs = timeStampNs;
  }

  return self;
}

回到didCaptureVideoFrame:
RTCVideoSource:

src/sdk/objc/api/peerconnection/RTCVideoSource.mm

- (void)capturer:(RTCVideoCapturer *)capturer didCaptureVideoFrame:(RTCVideoFrame *)frame {
  getObjCVideoSource(_nativeVideoSource)->OnCapturedFrame(frame);
}

ObjCVideoTrackSource:

src/sdk/objc/native/src/objc_video_track_source.mm

void ObjCVideoTrackSource::OnCapturedFrame(RTCVideoFrame *frame) {
  const int64_t timestamp_us = frame.timeStampNs / rtc::kNumNanosecsPerMicrosec;
  const int64_t translated_timestamp_us =
      timestamp_aligner_.TranslateTimestamp(timestamp_us, rtc::TimeMicros());

  int adapted_width;
  int adapted_height;
  int crop_width;
  int crop_height;
  int crop_x;
  int crop_y;
  if (!AdaptFrame(frame.width,
                  frame.height,
                  timestamp_us,
                  &adapted_width,
                  &adapted_height,
                  &crop_width,
                  &crop_height,
                  &crop_x,
                  &crop_y)) {
    return;
  }

  rtc::scoped_refptr buffer;
  if (adapted_width == frame.width && adapted_height == frame.height) {
    // No adaption - optimized path.
    buffer = new rtc::RefCountedObject(frame.buffer);
  } else if ([frame.buffer isKindOfClass:[RTCCVPixelBuffer class]]) {
    // Adapted CVPixelBuffer frame.
    RTCCVPixelBuffer *rtcPixelBuffer = (RTCCVPixelBuffer *)frame.buffer;
    buffer = new rtc::RefCountedObject([[RTCCVPixelBuffer alloc]
        initWithPixelBuffer:rtcPixelBuffer.pixelBuffer
               adaptedWidth:adapted_width
              adaptedHeight:adapted_height
                  cropWidth:crop_width
                 cropHeight:crop_height
                      cropX:crop_x + rtcPixelBuffer.cropX
                      cropY:crop_y + rtcPixelBuffer.cropY]);
  } else {
    // Adapted I420 frame.
    // TODO(magjed): Optimize this I420 path.
    rtc::scoped_refptr i420_buffer = I420Buffer::Create(adapted_width, adapted_height);
    buffer = new rtc::RefCountedObject(frame.buffer);
    i420_buffer->CropAndScaleFrom(*buffer->ToI420(), crop_x, crop_y, crop_width, crop_height);
    buffer = i420_buffer;
  }

  // Applying rotation is only supported for legacy reasons and performance is
  // not critical here.
  VideoRotation rotation = static_cast(frame.rotation);
  if (apply_rotation() && rotation != kVideoRotation_0) {
    buffer = I420Buffer::Rotate(*buffer->ToI420(), rotation);
    rotation = kVideoRotation_0;
  }

  OnFrame(VideoFrame(buffer, rotation, translated_timestamp_us));
}

默认没 crop,直接将RTCVideoFrameBuffer转换为ObjCFrameBuffer

src/sdk/objc/native/src/objc_frame_buffer.mm

ObjCFrameBuffer::ObjCFrameBuffer(id frame_buffer)
    : frame_buffer_(frame_buffer), width_(frame_buffer.width), height_(frame_buffer.height) {}

继续调用OnFrame:
ObjCVideoTrackSource派生于AdaptedVideoTrackSourceOnFrame是父类AdaptedVideoTrackSource实现的:

src/media/base/adaptedvideotracksource.cc

void AdaptedVideoTrackSource::OnFrame(const webrtc::VideoFrame& frame) {
  rtc::scoped_refptr buffer(
      frame.video_frame_buffer());
  /* Note that this is a "best effort" approach to
     wants.rotation_applied; apply_rotation_ can change from false to
     true between the check of apply_rotation() and the call to
     broadcaster_.OnFrame(), in which case we generate a frame with
     pending rotation despite some sink with wants.rotation_applied ==
     true was just added. The VideoBroadcaster enforces
     synchronization for us in this case, by not passing the frame on
     to sinks which don't want it. */
  if (apply_rotation() && frame.rotation() != webrtc::kVideoRotation_0 &&
      buffer->type() == webrtc::VideoFrameBuffer::Type::kI420) {
    /* Apply pending rotation. */
    broadcaster_.OnFrame(webrtc::VideoFrame(
        webrtc::I420Buffer::Rotate(*buffer->GetI420(), frame.rotation()),
        webrtc::kVideoRotation_0, frame.timestamp_us()));
  } else {
    broadcaster_.OnFrame(frame);
  }
}

broadcaster_VideoBroadcaster:

src/media/base/videobroadcaster.cc

void VideoBroadcaster::OnFrame(const webrtc::VideoFrame& frame) {
  rtc::CritScope cs(&sinks_and_wants_lock_);
  for (auto& sink_pair : sink_pairs()) {
    if (sink_pair.wants.rotation_applied &&
        frame.rotation() != webrtc::kVideoRotation_0) {
      // Calls to OnFrame are not synchronized with changes to the sink wants.
      // When rotation_applied is set to true, one or a few frames may get here
      // with rotation still pending. Protect sinks that don't expect any
      // pending rotation.
      RTC_LOG(LS_VERBOSE) << "Discarding frame with unexpected rotation.";
      continue;
    }
    if (sink_pair.wants.black_frames) {
      sink_pair.sink->OnFrame(
          webrtc::VideoFrame(GetBlackFrameBuffer(frame.width(), frame.height()),
                             frame.rotation(), frame.timestamp_us()));
    } else {
      sink_pair.sink->OnFrame(frame);
    }
  }
}

当videotrack.isEnabled=false时(在上层api可以调用此设置),创建blackframe,也就是黑屏帧,此时不发送摄像头帧而只是发黑屏帧。
正常情况下发送视频frame。


sink->OnFrame到VideoStreamEncoder::OnFrame

sink的定义:VideoSinkInterface* sink;
VideoStreamEncoder派生于VideoStreamEncoderInterface:
sink->OnFrame实际上调用的是VideoStreamEncoder::OnFrame
VideoStreamEncoder创建及和sink绑定的流程:
绕了很大一圈,代码都看花了眼:

 src/pc/peerconnection.cc
PeerConnection::SetLocalDescription-->
ApplyLocalDescription-->
CreateChannels

PeerConnection::SetRemoteDescription-->
ApplyRemoteDescription->
CreateChannels

音频通道:
CreateChannels->
CreateVoiceChannel->
channel_manager()->CreateVoiceChannel->
media_engine_->voice().CreateMediaChannel->
...音频下面不分析了,主要分析视频

视频通道:
CreateChannels->
CreateVideoChannel->
channel_manager()->CreateVideoChannel->
media_engine_->video().CreateMediaChannel->
VideoMediaChannel* WebRtcVideoEngine::CreateMediaChannel->
new WebRtcVideoChannel

至此创建了WebRtcVideoChannel

ApplyLocalDescription->
UpdateSessionState

ApplyRemoteDescription->
UpdateSessionState

UpdateSessionState->
PushdownMediaDescription->
channel->SetLocalContent
channel->SetRemoteContent

channel->SetLocalContent->BaseChannel::SetLocalContent_w->
VideoChannel::SetLocalContent_w->
UpdateLocalStreams_w->
media_channel()->AddSendStream->
WebRtcVideoChannel::AddSendStream->
new WebRtcVideoSendStream->
WebRtcVideoChannel::WebRtcVideoSendStream::RecreateWebRtcStream()->
stream_ = call_->CreateVideoSendStream->,stream_为VideoSendStream
stream_->SetSource(this, GetDegradationPreference());

channel->SetRemoteContent->BaseChannel::SetRemoteContent_w
->VideoChannel::SetRemoteContent_w->
UpdateRemoteStreams_w->
BaseChannel::AddRecvStream_w->
media_channel()->AddRecvStream->
WebRtcVideoChannel::AddRecvStream->
new WebRtcVideoReceiveStream->
WebRtcVideoChannel::WebRtcVideoReceiveStream::RecreateWebRtcVideoStream() ->
stream_ = call_->CreateVideoReceiveStream->,stream_为VideoReceiveStream
stream_->Start();

至此创建了VideoSendStreamVideoReceiveStream,下面只分析VideoSendStream

VideoSendStream构造函数->
video_stream_encoder_ = CreateVideoStreamEncoder,返回的是VideoStreamEncoder

上面的:
stream_->SetSource(this, GetDegradationPreference());时的this是WebRtcVideoSendStream,
VideoStreamEncoder::SetSource->
source->AddOrUpdateSink->
WebRtcVideoSendStream::AddOrUpdateSink->
source_->AddOrUpdateSink,这里的source_在WebRtcVideoChannel::WebRtcVideoSendStream::SetVideoSend中被设置

下面来看看WebRtcVideoChannel::WebRtcVideoSendStream::SetVideoSend是怎么被调用的:

PeerConnection::CreateSender->
sender = RtpSenderProxyWithInternal::Create(
        signaling_thread(), new VideoRtpSender(worker_thread(), id)); ->
sender->SetTrack(track)->track_->

VideoRtpSender::SetVideoSend() ->
media_channel_->SetVideoSend(ssrc_, &options, track_);->
WebRtcVideoChannel::WebRtcVideoSendStream::SetVideoSend

晕死,绕了好大一圈。


class VideoStreamEncoder : public VideoStreamEncoderInterface,
                           private EncodedImageCallback,
                           // Protected only to provide access to tests.
                           protected AdaptationObserverInterface {

VideoStreamEncoderInterface又派生于rtc::VideoSinkInterface

VideoStreamEncoder::OnFrame:

src/video/video_stream_encoder.cc

 void VideoStreamEncoder::OnFrame(const VideoFrame& video_frame) {
  RTC_DCHECK_RUNS_SERIALIZED(&incoming_frame_race_checker_);
  ...
  encoder_queue_.PostTask(
      [this, incoming_frame, post_time_us, log_stats]() {
        RTC_DCHECK_RUN_ON(&encoder_queue_);
        encoder_stats_observer_->OnIncomingFrame(incoming_frame.width(),
                                                 incoming_frame.height());
        ++captured_frame_count_;
        const int posted_frames_waiting_for_encode =
            posted_frames_waiting_for_encode_.fetch_sub(1);
        RTC_DCHECK_GT(posted_frames_waiting_for_encode, 0);
        if (posted_frames_waiting_for_encode == 1) {
          MaybeEncodeVideoFrame(incoming_frame, post_time_us);
        } else {
          // There is a newer frame in flight. Do not encode this frame.
          RTC_LOG(LS_VERBOSE)
              << "Incoming frame dropped due to that the encoder is blocked.";
          ++dropped_frame_count_;
          encoder_stats_observer_->OnFrameDropped(
              VideoStreamEncoderObserver::DropReason::kEncoderQueue);
        }
        ...
      });
}

PostTask中的MaybeEncodeVideoFrame:

void VideoStreamEncoder::MaybeEncodeVideoFrame(const VideoFrame& video_frame,
                                               int64_t time_when_posted_us) {
  ...
  EncodeVideoFrame(video_frame, time_when_posted_us);
}

继续EncodeVideoFrame:

void VideoStreamEncoder::EncodeVideoFrame(const VideoFrame& video_frame,
                                          int64_t time_when_posted_us) {
 ...
  video_sender_.AddVideoFrame(out_frame, nullptr, encoder_info_);
}
src/modules/video_coding/video_sender.cc

int32_t VideoSender::AddVideoFrame(
    const VideoFrame& videoFrame,
    const CodecSpecificInfo* codecSpecificInfo,
    absl::optional encoder_info) {
...
  int32_t ret =
      _encoder->Encode(converted_frame, codecSpecificInfo, next_frame_types);
  if (ret < 0) {
    RTC_LOG(LS_ERROR) << "Failed to encode frame. Error code: " << ret;
    return ret;
  }
...
}

以上如果是软编码而且不是I420格式,会转换成I420。

int32_t ret = _encoder->Encode(converted_frame, codecSpecificInfo, next_frame_types);
这儿的_enocderVCMGenericEncoder

VCMGenericEncoder

src/modules/video_coding/generic_encoder.cc

int32_t VCMGenericEncoder::Encode(const VideoFrame& frame,
                                  const CodecSpecificInfo* codec_specific,
                                  const std::vector& frame_types) {
...
  return encoder_->Encode(frame, codec_specific, &frame_types);
}

这儿的encoder_ObjCVideoEncoder
ObjCVideoEncoder:

  int32_t Encode(const VideoFrame &frame,
                 const CodecSpecificInfo *codec_specific_info,
                 const std::vector *frame_types) {
...
    return [encoder_ encode:ToObjCVideoFrame(frame)
          codecSpecificInfo:nil
                 frameTypes:rtcFrameTypes];
  }

这儿的encoder_RTCVideoEncoder也就是RTCVideoEncoderH264
RTCVideoEncoderH264:

- (NSInteger)encode:(RTCVideoFrame *)frame
    codecSpecificInfo:(nullable id)codecSpecificInfo
           frameTypes:(NSArray *)frameTypes {
...
  OSStatus status = VTCompressionSessionEncodeFrame(_compressionSession,
                                                    pixelBuffer,
                                                    presentationTimeStamp,
                                                    kCMTimeInvalid,
                                                    frameProperties,
                                                    encodeParams.release(),
                                                    nullptr);
 ...

正常摄像头采集情况下CVPixelBufferRef pixelBuffer经过层层传递最终到达VTCompressionSessionEncodeFrame。

格式转换


正常从摄像头采集的pixbuffer不需要格式转换,但如果需要转换,WebRTC会自动进行转换,但是只支持四种格式转换为I420:

kCVPixelFormatType_420YpCbCr8BiPlanarFullRange
kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange
kCVPixelFormatType_32BGRA
kCVPixelFormatType_32ARGB

如果中途不需要转换的话,传进来的格式可以是其他格式。比如RTCCameraVideoCapturer采集时的格式是系统默认的第一种格式,在mac平台通常不是以上四种格式,比如我的iMac默认是kCVPixelFormatType_422YpCbCr8,然而传递到编码的过程中都不需要读取数据和转换格式,而直接将采集到的数据送进系统的硬编码器中编码。

kCVPixelFormatType_420YpCbCr8BiPlanarFullRangekCVPixelFormatType_420YpCbCr8BiPlanarVideoRange都是NV12格式,只不过FullRange的颜色更好一些。

RTCCVPixelBuffer::toI420的转换代码:

- (id)toI420 {
  const OSType pixelFormat = CVPixelBufferGetPixelFormatType(_pixelBuffer);

  CVPixelBufferLockBaseAddress(_pixelBuffer, kCVPixelBufferLock_ReadOnly);

  RTCMutableI420Buffer* i420Buffer =
      [[RTCMutableI420Buffer alloc] initWithWidth:[self width] height:[self height]];

  switch (pixelFormat) {
    case kCVPixelFormatType_420YpCbCr8BiPlanarFullRange:
    case kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange: {
      const uint8_t* srcY =
          static_cast(CVPixelBufferGetBaseAddressOfPlane(_pixelBuffer, 0));
      const int srcYStride = CVPixelBufferGetBytesPerRowOfPlane(_pixelBuffer, 0);
      const uint8_t* srcUV =
          static_cast(CVPixelBufferGetBaseAddressOfPlane(_pixelBuffer, 1));
      const int srcUVStride = CVPixelBufferGetBytesPerRowOfPlane(_pixelBuffer, 1);

      // Crop just by modifying pointers.
      srcY += srcYStride * _cropY + _cropX;
      srcUV += srcUVStride * (_cropY / 2) + _cropX;

      // TODO(magjed): Use a frame buffer pool.
      webrtc::NV12ToI420Scaler nv12ToI420Scaler;
      nv12ToI420Scaler.NV12ToI420Scale(srcY,
                                       srcYStride,
                                       srcUV,
                                       srcUVStride,
                                       _cropWidth,
                                       _cropHeight,
                                       i420Buffer.mutableDataY,
                                       i420Buffer.strideY,
                                       i420Buffer.mutableDataU,
                                       i420Buffer.strideU,
                                       i420Buffer.mutableDataV,
                                       i420Buffer.strideV,
                                       i420Buffer.width,
                                       i420Buffer.height);
      break;
    }
    case kCVPixelFormatType_32BGRA:
    case kCVPixelFormatType_32ARGB: {
      CVPixelBufferRef scaledPixelBuffer = NULL;
      CVPixelBufferRef sourcePixelBuffer = NULL;
      if ([self requiresCropping] ||
          [self requiresScalingToWidth:i420Buffer.width height:i420Buffer.height]) {
        CVPixelBufferCreate(
            NULL, i420Buffer.width, i420Buffer.height, pixelFormat, NULL, &scaledPixelBuffer);
        [self cropAndScaleTo:scaledPixelBuffer withTempBuffer:NULL];

        CVPixelBufferLockBaseAddress(scaledPixelBuffer, kCVPixelBufferLock_ReadOnly);
        sourcePixelBuffer = scaledPixelBuffer;
      } else {
        sourcePixelBuffer = _pixelBuffer;
      }
      const uint8_t* src = static_cast(CVPixelBufferGetBaseAddress(sourcePixelBuffer));
      const size_t bytesPerRow = CVPixelBufferGetBytesPerRow(sourcePixelBuffer);

      if (pixelFormat == kCVPixelFormatType_32BGRA) {
        // Corresponds to libyuv::FOURCC_ARGB
        libyuv::ARGBToI420(src,
                           bytesPerRow,
                           i420Buffer.mutableDataY,
                           i420Buffer.strideY,
                           i420Buffer.mutableDataU,
                           i420Buffer.strideU,
                           i420Buffer.mutableDataV,
                           i420Buffer.strideV,
                           i420Buffer.width,
                           i420Buffer.height);
      } else if (pixelFormat == kCVPixelFormatType_32ARGB) {
        // Corresponds to libyuv::FOURCC_BGRA
        libyuv::BGRAToI420(src,
                           bytesPerRow,
                           i420Buffer.mutableDataY,
                           i420Buffer.strideY,
                           i420Buffer.mutableDataU,
                           i420Buffer.strideU,
                           i420Buffer.mutableDataV,
                           i420Buffer.strideV,
                           i420Buffer.width,
                           i420Buffer.height);
      }

      if (scaledPixelBuffer) {
        CVPixelBufferUnlockBaseAddress(scaledPixelBuffer, kCVPixelBufferLock_ReadOnly);
        CVBufferRelease(scaledPixelBuffer);
      }
      break;
    }
    default: { RTC_NOTREACHED() << "Unsupported pixel format."; }
  }

  CVPixelBufferUnlockBaseAddress(_pixelBuffer, kCVPixelBufferLock_ReadOnly);

  return i420Buffer;
}

CopyVideoFrameToNV12PixelBuffer将RTCI420Buffer转换为NV12的CVPixelBufferRef,内部使用的是libyuv::I420ToNV12进行转换。

bool CopyVideoFrameToNV12PixelBuffer(id frameBuffer, CVPixelBufferRef pixelBuffer) {
  RTC_DCHECK(pixelBuffer);
  RTC_DCHECK_EQ(CVPixelBufferGetPixelFormatType(pixelBuffer), kNV12PixelFormat);
  RTC_DCHECK_EQ(CVPixelBufferGetHeightOfPlane(pixelBuffer, 0), frameBuffer.height);
  RTC_DCHECK_EQ(CVPixelBufferGetWidthOfPlane(pixelBuffer, 0), frameBuffer.width);

  CVReturn cvRet = CVPixelBufferLockBaseAddress(pixelBuffer, 0);
  if (cvRet != kCVReturnSuccess) {
    RTC_LOG(LS_ERROR) << "Failed to lock base address: " << cvRet;
    return false;
  }
  uint8_t *dstY = reinterpret_cast(CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 0));
  int dstStrideY = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 0);
  uint8_t *dstUV = reinterpret_cast(CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 1));
  int dstStrideUV = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 1);
  // Convert I420 to NV12.
  int ret = libyuv::I420ToNV12(frameBuffer.dataY,
                               frameBuffer.strideY,
                               frameBuffer.dataU,
                               frameBuffer.strideU,
                               frameBuffer.dataV,
                               frameBuffer.strideV,
                               dstY,
                               dstStrideY,
                               dstUV,
                               dstStrideUV,
                               frameBuffer.width,
                               frameBuffer.height);
  CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
  if (ret) {
    RTC_LOG(LS_ERROR) << "Error converting I420 VideoFrame to NV12 :" << ret;
    return false;
  }
  return true;
}

CVPixelBufferRef


CVPixelBufferRef支持多种格式,使用CVPixelBufferGetPixelFormatType可以取得格式类型。
常用的格式有:

kCVPixelFormatType_32ARGB         = 0x00000020,
kCVPixelFormatType_32BGRA         = 'BGRA',
kCVPixelFormatType_420YpCbCr8PlanarFullRange    = 'f420',
kCVPixelFormatType_420YpCbCr8BiPlanarFullRange  = '420f',

还有更多格式都在CVPixelBuffer.h中定义。
f420就是420p也是i420420f即系统默认的NV12

你可能感兴趣的:(WebRTC IOS视频硬编码流程及其中传递的CVPixelBufferRef)