IOS视频硬编码流程
WebRTC中默认摄像头采集:
RTCCameraVideoCapturer
:
src/sdk/objc/components/capturer/RTCCameraVideoCapturer.m
- (void)captureOutput:(AVCaptureOutput *)captureOutput
didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer
fromConnection:(AVCaptureConnection *)connection {
...
RTCCVPixelBuffer *rtcPixelBuffer = [[RTCCVPixelBuffer alloc] initWithPixelBuffer:pixelBuffer];
int64_t timeStampNs = CMTimeGetSeconds(CMSampleBufferGetPresentationTimeStamp(sampleBuffer)) *
kNanosecondsPerSecond;
RTCVideoFrame *videoFrame = [[RTCVideoFrame alloc] initWithBuffer:rtcPixelBuffer
rotation:_rotation
timeStampNs:timeStampNs];
[self.delegate capturer:self didCaptureVideoFrame:videoFrame];
}
先看看RTCCVPixelBuffer
和RTCVideoFrame
:
RTCCVPixelBuffer
只是简单存储了CVPixelBufferRef pixelBuffer
:
src/sdk/objc/components/video_frame_buffer/RTCCVPixelBuffer.mm
- (instancetype)initWithPixelBuffer:(CVPixelBufferRef)pixelBuffer {
return [self initWithPixelBuffer:pixelBuffer
adaptedWidth:CVPixelBufferGetWidth(pixelBuffer)
adaptedHeight:CVPixelBufferGetHeight(pixelBuffer)
cropWidth:CVPixelBufferGetWidth(pixelBuffer)
cropHeight:CVPixelBufferGetHeight(pixelBuffer)
cropX:0
cropY:0];
}
- (instancetype)initWithPixelBuffer:(CVPixelBufferRef)pixelBuffer
adaptedWidth:(int)adaptedWidth
adaptedHeight:(int)adaptedHeight
cropWidth:(int)cropWidth
cropHeight:(int)cropHeight
cropX:(int)cropX
cropY:(int)cropY {
if (self = [super init]) {
_width = adaptedWidth;
_height = adaptedHeight;
_pixelBuffer = pixelBuffer;
_bufferWidth = CVPixelBufferGetWidth(_pixelBuffer);
_bufferHeight = CVPixelBufferGetHeight(_pixelBuffer);
_cropWidth = cropWidth;
_cropHeight = cropHeight;
// Can only crop at even pixels.
_cropX = cropX & ~1;
_cropY = cropY & ~1;
CVBufferRetain(_pixelBuffer);
}
return self;
}
RTCVideoFrame
又简单存储了RTCVideoFrameBuffer
:
src/sdk/objc/base/RTCVideoFrame.mm
- (instancetype)initWithBuffer:(id)buffer
rotation:(RTCVideoRotation)rotation
timeStampNs:(int64_t)timeStampNs {
if (self = [super init]) {
_buffer = buffer;
_rotation = rotation;
_timeStampNs = timeStampNs;
}
return self;
}
回到didCaptureVideoFrame
:
RTCVideoSource
:
src/sdk/objc/api/peerconnection/RTCVideoSource.mm
- (void)capturer:(RTCVideoCapturer *)capturer didCaptureVideoFrame:(RTCVideoFrame *)frame {
getObjCVideoSource(_nativeVideoSource)->OnCapturedFrame(frame);
}
ObjCVideoTrackSource
:
src/sdk/objc/native/src/objc_video_track_source.mm
void ObjCVideoTrackSource::OnCapturedFrame(RTCVideoFrame *frame) {
const int64_t timestamp_us = frame.timeStampNs / rtc::kNumNanosecsPerMicrosec;
const int64_t translated_timestamp_us =
timestamp_aligner_.TranslateTimestamp(timestamp_us, rtc::TimeMicros());
int adapted_width;
int adapted_height;
int crop_width;
int crop_height;
int crop_x;
int crop_y;
if (!AdaptFrame(frame.width,
frame.height,
timestamp_us,
&adapted_width,
&adapted_height,
&crop_width,
&crop_height,
&crop_x,
&crop_y)) {
return;
}
rtc::scoped_refptr buffer;
if (adapted_width == frame.width && adapted_height == frame.height) {
// No adaption - optimized path.
buffer = new rtc::RefCountedObject(frame.buffer);
} else if ([frame.buffer isKindOfClass:[RTCCVPixelBuffer class]]) {
// Adapted CVPixelBuffer frame.
RTCCVPixelBuffer *rtcPixelBuffer = (RTCCVPixelBuffer *)frame.buffer;
buffer = new rtc::RefCountedObject([[RTCCVPixelBuffer alloc]
initWithPixelBuffer:rtcPixelBuffer.pixelBuffer
adaptedWidth:adapted_width
adaptedHeight:adapted_height
cropWidth:crop_width
cropHeight:crop_height
cropX:crop_x + rtcPixelBuffer.cropX
cropY:crop_y + rtcPixelBuffer.cropY]);
} else {
// Adapted I420 frame.
// TODO(magjed): Optimize this I420 path.
rtc::scoped_refptr i420_buffer = I420Buffer::Create(adapted_width, adapted_height);
buffer = new rtc::RefCountedObject(frame.buffer);
i420_buffer->CropAndScaleFrom(*buffer->ToI420(), crop_x, crop_y, crop_width, crop_height);
buffer = i420_buffer;
}
// Applying rotation is only supported for legacy reasons and performance is
// not critical here.
VideoRotation rotation = static_cast(frame.rotation);
if (apply_rotation() && rotation != kVideoRotation_0) {
buffer = I420Buffer::Rotate(*buffer->ToI420(), rotation);
rotation = kVideoRotation_0;
}
OnFrame(VideoFrame(buffer, rotation, translated_timestamp_us));
}
默认没 crop,直接将RTCVideoFrameBuffer
转换为ObjCFrameBuffer
。
src/sdk/objc/native/src/objc_frame_buffer.mm
ObjCFrameBuffer::ObjCFrameBuffer(id frame_buffer)
: frame_buffer_(frame_buffer), width_(frame_buffer.width), height_(frame_buffer.height) {}
继续调用OnFrame:
ObjCVideoTrackSource
派生于AdaptedVideoTrackSource
,OnFrame
是父类AdaptedVideoTrackSource
实现的:
src/media/base/adaptedvideotracksource.cc
void AdaptedVideoTrackSource::OnFrame(const webrtc::VideoFrame& frame) {
rtc::scoped_refptr buffer(
frame.video_frame_buffer());
/* Note that this is a "best effort" approach to
wants.rotation_applied; apply_rotation_ can change from false to
true between the check of apply_rotation() and the call to
broadcaster_.OnFrame(), in which case we generate a frame with
pending rotation despite some sink with wants.rotation_applied ==
true was just added. The VideoBroadcaster enforces
synchronization for us in this case, by not passing the frame on
to sinks which don't want it. */
if (apply_rotation() && frame.rotation() != webrtc::kVideoRotation_0 &&
buffer->type() == webrtc::VideoFrameBuffer::Type::kI420) {
/* Apply pending rotation. */
broadcaster_.OnFrame(webrtc::VideoFrame(
webrtc::I420Buffer::Rotate(*buffer->GetI420(), frame.rotation()),
webrtc::kVideoRotation_0, frame.timestamp_us()));
} else {
broadcaster_.OnFrame(frame);
}
}
broadcaster_
是VideoBroadcaster
:
src/media/base/videobroadcaster.cc
void VideoBroadcaster::OnFrame(const webrtc::VideoFrame& frame) {
rtc::CritScope cs(&sinks_and_wants_lock_);
for (auto& sink_pair : sink_pairs()) {
if (sink_pair.wants.rotation_applied &&
frame.rotation() != webrtc::kVideoRotation_0) {
// Calls to OnFrame are not synchronized with changes to the sink wants.
// When rotation_applied is set to true, one or a few frames may get here
// with rotation still pending. Protect sinks that don't expect any
// pending rotation.
RTC_LOG(LS_VERBOSE) << "Discarding frame with unexpected rotation.";
continue;
}
if (sink_pair.wants.black_frames) {
sink_pair.sink->OnFrame(
webrtc::VideoFrame(GetBlackFrameBuffer(frame.width(), frame.height()),
frame.rotation(), frame.timestamp_us()));
} else {
sink_pair.sink->OnFrame(frame);
}
}
}
当videotrack.isEnabled=false时(在上层api可以调用此设置),创建blackframe,也就是黑屏帧,此时不发送摄像头帧而只是发黑屏帧。
正常情况下发送视频frame。
sink->OnFrame到VideoStreamEncoder::OnFrame
sink
的定义:VideoSinkInterface
VideoStreamEncoder
派生于VideoStreamEncoderInterface
:
sink->OnFrame
实际上调用的是VideoStreamEncoder::OnFrame
,
VideoStreamEncoder
创建及和sink
绑定的流程:
绕了很大一圈,代码都看花了眼:
src/pc/peerconnection.cc
PeerConnection::SetLocalDescription-->
ApplyLocalDescription-->
CreateChannels
PeerConnection::SetRemoteDescription-->
ApplyRemoteDescription->
CreateChannels
音频通道:
CreateChannels->
CreateVoiceChannel->
channel_manager()->CreateVoiceChannel->
media_engine_->voice().CreateMediaChannel->
...音频下面不分析了,主要分析视频
视频通道:
CreateChannels->
CreateVideoChannel->
channel_manager()->CreateVideoChannel->
media_engine_->video().CreateMediaChannel->
VideoMediaChannel* WebRtcVideoEngine::CreateMediaChannel->
new WebRtcVideoChannel
至此创建了WebRtcVideoChannel
。
ApplyLocalDescription->
UpdateSessionState
ApplyRemoteDescription->
UpdateSessionState
UpdateSessionState->
PushdownMediaDescription->
channel->SetLocalContent
channel->SetRemoteContent
channel->SetLocalContent->BaseChannel::SetLocalContent_w->
VideoChannel::SetLocalContent_w->
UpdateLocalStreams_w->
media_channel()->AddSendStream->
WebRtcVideoChannel::AddSendStream->
new WebRtcVideoSendStream->
WebRtcVideoChannel::WebRtcVideoSendStream::RecreateWebRtcStream()->
stream_ = call_->CreateVideoSendStream->,stream_为VideoSendStream
stream_->SetSource(this, GetDegradationPreference());
channel->SetRemoteContent->BaseChannel::SetRemoteContent_w
->VideoChannel::SetRemoteContent_w->
UpdateRemoteStreams_w->
BaseChannel::AddRecvStream_w->
media_channel()->AddRecvStream->
WebRtcVideoChannel::AddRecvStream->
new WebRtcVideoReceiveStream->
WebRtcVideoChannel::WebRtcVideoReceiveStream::RecreateWebRtcVideoStream() ->
stream_ = call_->CreateVideoReceiveStream->,stream_为VideoReceiveStream
stream_->Start();
至此创建了VideoSendStream
和VideoReceiveStream
,下面只分析VideoSendStream
:
VideoSendStream构造函数->
video_stream_encoder_ = CreateVideoStreamEncoder,返回的是VideoStreamEncoder
上面的:
stream_->SetSource(this, GetDegradationPreference());时的this是WebRtcVideoSendStream,
VideoStreamEncoder::SetSource->
source->AddOrUpdateSink->
WebRtcVideoSendStream::AddOrUpdateSink->
source_->AddOrUpdateSink,这里的source_在WebRtcVideoChannel::WebRtcVideoSendStream::SetVideoSend中被设置
下面来看看WebRtcVideoChannel::WebRtcVideoSendStream::SetVideoSend是怎么被调用的:
PeerConnection::CreateSender->
sender = RtpSenderProxyWithInternal::Create(
signaling_thread(), new VideoRtpSender(worker_thread(), id)); ->
sender->SetTrack(track)->track_->
VideoRtpSender::SetVideoSend() ->
media_channel_->SetVideoSend(ssrc_, &options, track_);->
WebRtcVideoChannel::WebRtcVideoSendStream::SetVideoSend
晕死,绕了好大一圈。
class VideoStreamEncoder : public VideoStreamEncoderInterface,
private EncodedImageCallback,
// Protected only to provide access to tests.
protected AdaptationObserverInterface {
VideoStreamEncoderInterface
又派生于rtc::VideoSinkInterface
VideoStreamEncoder::OnFrame
:
src/video/video_stream_encoder.cc
void VideoStreamEncoder::OnFrame(const VideoFrame& video_frame) {
RTC_DCHECK_RUNS_SERIALIZED(&incoming_frame_race_checker_);
...
encoder_queue_.PostTask(
[this, incoming_frame, post_time_us, log_stats]() {
RTC_DCHECK_RUN_ON(&encoder_queue_);
encoder_stats_observer_->OnIncomingFrame(incoming_frame.width(),
incoming_frame.height());
++captured_frame_count_;
const int posted_frames_waiting_for_encode =
posted_frames_waiting_for_encode_.fetch_sub(1);
RTC_DCHECK_GT(posted_frames_waiting_for_encode, 0);
if (posted_frames_waiting_for_encode == 1) {
MaybeEncodeVideoFrame(incoming_frame, post_time_us);
} else {
// There is a newer frame in flight. Do not encode this frame.
RTC_LOG(LS_VERBOSE)
<< "Incoming frame dropped due to that the encoder is blocked.";
++dropped_frame_count_;
encoder_stats_observer_->OnFrameDropped(
VideoStreamEncoderObserver::DropReason::kEncoderQueue);
}
...
});
}
PostTask
中的MaybeEncodeVideoFrame
:
void VideoStreamEncoder::MaybeEncodeVideoFrame(const VideoFrame& video_frame,
int64_t time_when_posted_us) {
...
EncodeVideoFrame(video_frame, time_when_posted_us);
}
继续EncodeVideoFrame
:
void VideoStreamEncoder::EncodeVideoFrame(const VideoFrame& video_frame,
int64_t time_when_posted_us) {
...
video_sender_.AddVideoFrame(out_frame, nullptr, encoder_info_);
}
src/modules/video_coding/video_sender.cc
int32_t VideoSender::AddVideoFrame(
const VideoFrame& videoFrame,
const CodecSpecificInfo* codecSpecificInfo,
absl::optional encoder_info) {
...
int32_t ret =
_encoder->Encode(converted_frame, codecSpecificInfo, next_frame_types);
if (ret < 0) {
RTC_LOG(LS_ERROR) << "Failed to encode frame. Error code: " << ret;
return ret;
}
...
}
以上如果是软编码而且不是I420格式,会转换成I420。
int32_t ret = _encoder->Encode(converted_frame, codecSpecificInfo, next_frame_types);
这儿的_enocder
是VCMGenericEncoder
。
VCMGenericEncoder
:
src/modules/video_coding/generic_encoder.cc
int32_t VCMGenericEncoder::Encode(const VideoFrame& frame,
const CodecSpecificInfo* codec_specific,
const std::vector& frame_types) {
...
return encoder_->Encode(frame, codec_specific, &frame_types);
}
这儿的encoder_
是ObjCVideoEncoder
。
ObjCVideoEncoder
:
int32_t Encode(const VideoFrame &frame,
const CodecSpecificInfo *codec_specific_info,
const std::vector *frame_types) {
...
return [encoder_ encode:ToObjCVideoFrame(frame)
codecSpecificInfo:nil
frameTypes:rtcFrameTypes];
}
这儿的encoder_
是RTCVideoEncoder
也就是RTCVideoEncoderH264
。
RTCVideoEncoderH264
:
- (NSInteger)encode:(RTCVideoFrame *)frame
codecSpecificInfo:(nullable id)codecSpecificInfo
frameTypes:(NSArray *)frameTypes {
...
OSStatus status = VTCompressionSessionEncodeFrame(_compressionSession,
pixelBuffer,
presentationTimeStamp,
kCMTimeInvalid,
frameProperties,
encodeParams.release(),
nullptr);
...
正常摄像头采集情况下CVPixelBufferRef pixelBuffer经过层层传递最终到达VTCompressionSessionEncodeFrame。
格式转换
正常从摄像头采集的pixbuffer不需要格式转换,但如果需要转换,WebRTC会自动进行转换,但是只支持四种格式转换为I420:
kCVPixelFormatType_420YpCbCr8BiPlanarFullRange
kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange
kCVPixelFormatType_32BGRA
kCVPixelFormatType_32ARGB
如果中途不需要转换的话,传进来的格式可以是其他格式。比如RTCCameraVideoCapturer
采集时的格式是系统默认的第一种格式,在mac平台通常不是以上四种格式,比如我的iMac默认是kCVPixelFormatType_422YpCbCr8
,然而传递到编码的过程中都不需要读取数据和转换格式,而直接将采集到的数据送进系统的硬编码器中编码。
kCVPixelFormatType_420YpCbCr8BiPlanarFullRange
和kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange
都是NV12格式,只不过FullRange
的颜色更好一些。
RTCCVPixelBuffer::toI420
的转换代码:
- (id)toI420 {
const OSType pixelFormat = CVPixelBufferGetPixelFormatType(_pixelBuffer);
CVPixelBufferLockBaseAddress(_pixelBuffer, kCVPixelBufferLock_ReadOnly);
RTCMutableI420Buffer* i420Buffer =
[[RTCMutableI420Buffer alloc] initWithWidth:[self width] height:[self height]];
switch (pixelFormat) {
case kCVPixelFormatType_420YpCbCr8BiPlanarFullRange:
case kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange: {
const uint8_t* srcY =
static_cast(CVPixelBufferGetBaseAddressOfPlane(_pixelBuffer, 0));
const int srcYStride = CVPixelBufferGetBytesPerRowOfPlane(_pixelBuffer, 0);
const uint8_t* srcUV =
static_cast(CVPixelBufferGetBaseAddressOfPlane(_pixelBuffer, 1));
const int srcUVStride = CVPixelBufferGetBytesPerRowOfPlane(_pixelBuffer, 1);
// Crop just by modifying pointers.
srcY += srcYStride * _cropY + _cropX;
srcUV += srcUVStride * (_cropY / 2) + _cropX;
// TODO(magjed): Use a frame buffer pool.
webrtc::NV12ToI420Scaler nv12ToI420Scaler;
nv12ToI420Scaler.NV12ToI420Scale(srcY,
srcYStride,
srcUV,
srcUVStride,
_cropWidth,
_cropHeight,
i420Buffer.mutableDataY,
i420Buffer.strideY,
i420Buffer.mutableDataU,
i420Buffer.strideU,
i420Buffer.mutableDataV,
i420Buffer.strideV,
i420Buffer.width,
i420Buffer.height);
break;
}
case kCVPixelFormatType_32BGRA:
case kCVPixelFormatType_32ARGB: {
CVPixelBufferRef scaledPixelBuffer = NULL;
CVPixelBufferRef sourcePixelBuffer = NULL;
if ([self requiresCropping] ||
[self requiresScalingToWidth:i420Buffer.width height:i420Buffer.height]) {
CVPixelBufferCreate(
NULL, i420Buffer.width, i420Buffer.height, pixelFormat, NULL, &scaledPixelBuffer);
[self cropAndScaleTo:scaledPixelBuffer withTempBuffer:NULL];
CVPixelBufferLockBaseAddress(scaledPixelBuffer, kCVPixelBufferLock_ReadOnly);
sourcePixelBuffer = scaledPixelBuffer;
} else {
sourcePixelBuffer = _pixelBuffer;
}
const uint8_t* src = static_cast(CVPixelBufferGetBaseAddress(sourcePixelBuffer));
const size_t bytesPerRow = CVPixelBufferGetBytesPerRow(sourcePixelBuffer);
if (pixelFormat == kCVPixelFormatType_32BGRA) {
// Corresponds to libyuv::FOURCC_ARGB
libyuv::ARGBToI420(src,
bytesPerRow,
i420Buffer.mutableDataY,
i420Buffer.strideY,
i420Buffer.mutableDataU,
i420Buffer.strideU,
i420Buffer.mutableDataV,
i420Buffer.strideV,
i420Buffer.width,
i420Buffer.height);
} else if (pixelFormat == kCVPixelFormatType_32ARGB) {
// Corresponds to libyuv::FOURCC_BGRA
libyuv::BGRAToI420(src,
bytesPerRow,
i420Buffer.mutableDataY,
i420Buffer.strideY,
i420Buffer.mutableDataU,
i420Buffer.strideU,
i420Buffer.mutableDataV,
i420Buffer.strideV,
i420Buffer.width,
i420Buffer.height);
}
if (scaledPixelBuffer) {
CVPixelBufferUnlockBaseAddress(scaledPixelBuffer, kCVPixelBufferLock_ReadOnly);
CVBufferRelease(scaledPixelBuffer);
}
break;
}
default: { RTC_NOTREACHED() << "Unsupported pixel format."; }
}
CVPixelBufferUnlockBaseAddress(_pixelBuffer, kCVPixelBufferLock_ReadOnly);
return i420Buffer;
}
CopyVideoFrameToNV12PixelBuffer
将RTCI420Buffer转换为NV12的CVPixelBufferRef,内部使用的是libyuv::I420ToNV12
进行转换。
bool CopyVideoFrameToNV12PixelBuffer(id frameBuffer, CVPixelBufferRef pixelBuffer) {
RTC_DCHECK(pixelBuffer);
RTC_DCHECK_EQ(CVPixelBufferGetPixelFormatType(pixelBuffer), kNV12PixelFormat);
RTC_DCHECK_EQ(CVPixelBufferGetHeightOfPlane(pixelBuffer, 0), frameBuffer.height);
RTC_DCHECK_EQ(CVPixelBufferGetWidthOfPlane(pixelBuffer, 0), frameBuffer.width);
CVReturn cvRet = CVPixelBufferLockBaseAddress(pixelBuffer, 0);
if (cvRet != kCVReturnSuccess) {
RTC_LOG(LS_ERROR) << "Failed to lock base address: " << cvRet;
return false;
}
uint8_t *dstY = reinterpret_cast(CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 0));
int dstStrideY = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 0);
uint8_t *dstUV = reinterpret_cast(CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 1));
int dstStrideUV = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 1);
// Convert I420 to NV12.
int ret = libyuv::I420ToNV12(frameBuffer.dataY,
frameBuffer.strideY,
frameBuffer.dataU,
frameBuffer.strideU,
frameBuffer.dataV,
frameBuffer.strideV,
dstY,
dstStrideY,
dstUV,
dstStrideUV,
frameBuffer.width,
frameBuffer.height);
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
if (ret) {
RTC_LOG(LS_ERROR) << "Error converting I420 VideoFrame to NV12 :" << ret;
return false;
}
return true;
}
CVPixelBufferRef
CVPixelBufferRef支持多种格式,使用CVPixelBufferGetPixelFormatType
可以取得格式类型。
常用的格式有:
kCVPixelFormatType_32ARGB = 0x00000020,
kCVPixelFormatType_32BGRA = 'BGRA',
kCVPixelFormatType_420YpCbCr8PlanarFullRange = 'f420',
kCVPixelFormatType_420YpCbCr8BiPlanarFullRange = '420f',
还有更多格式都在CVPixelBuffer.h
中定义。
f420
就是420p
也是i420
,420f
即系统默认的NV12
。