VideoToolBox H264 硬编码

一. 主要函数说明

创建会话
使用VTCompressionSessionCreate()来创建会话。

 VTCompressionSessionCreate(
     //  指定的分配器，若设置为NULL，表示使用默认的分配器
     CFAllocatorRef  _Nullable allocator, 
     //  视频图像的宽                              
     int32_t width, 
     //  视频图像的高
     int32_t height,
     //  编码类型
     CMVideoCodecType codecType,
     //  编码规范，若设置为NULL,表示由系统自行选择编码规范
     CFDictionaryRef  _Nullable encoderSpecification,
     //  源像素缓冲区属性，若设置为NULL，表示自己创建
     CFDictionaryRef  _Nullable sourceImageBufferAttributes,
     //  压缩数据的分配器，若设置为NULL，表示使用默认的分配器
     CFAllocatorRef  _Nullable compressedDataAllocator,
     //  回调函数的函数指针
     VTCompressionOutputCallback  _Nullable outputCallback,
     //  回调函数的引用数据，将会传递到回调函数中
     void * _Nullable outputCallbackRefCon,
     //  接受生成的会话的地址
     VTCompressionSessionRef  _Nullable * _Nonnull compressionSessionOut 
 )

这个函数有一个OSStatus类型的返回值，若返回noErr，则表示创建成功。

设置编码会话的属性
使用VTSessionSetProperty()来完成编码属性的设置。

 VTSessionSetProperty(
     VTSessionRef  _Nonnull session,    //  要设置的编码会话
     CFStringRef  _Nonnull propertyKey, //  属性的key
     CFTypeRef  _Nullable propertyValue //  属性的value
 )

这个函数也有一个OSStatus类型的返回值，若返回noErr，则表示属性设置成功。

准备编码
使用VTCompressionSessionPrepareToEncodeFrames()

 VTCompressionSessionPrepareToEncodeFrames(
     VTCompressionSessionRef  _Nonnull session  //  准备编码的会话
 )

同上，若此函数返回noErr，则表示执行成功。

编码

使用VTCompressionSessionEncodeFrame()函数来进行编码操作。

 VTCompressionSessionEncodeFrame(
     VTCompressionSessionRef  _Nonnull session, //  执行编码的会话
     CVImageBufferRef  _Nonnull imageBuffer,    //  要进行编码的图像数据帧
     CMTime presentationTimeStamp,              //  该帧的展示时间戳，每一个时间戳必须大于前一个时间戳
     CMTime duration,                           //  该帧的持续时间，若没有持续时间，则传递kCMTimeInvalid
     CFDictionaryRef  _Nullable frameProperties,//  该帧的属性
     void * _Nullable sourceFrameRefcon,        //  该帧的引用数据，将会传递到回调函数中
     VTEncodeInfoFlags * _Nullable infoFlagsOut //  用于接受此次编码操作信息的地址
 )

返回结果同上，若此函数返回noErr，则表示编码成功。

结束编码

使用VTCompressionSessionCompleteFrames()函数来强制完成所有未处理的帧。

VTCompressionSessionCompleteFrames(
    VTCompressionSessionRef  _Nonnull session, //  执行此操作的会话
    CMTime completeUntilPresentationTimeStamp  //  完成帧编码的时间戳，若传递kCMTimeInvalid，则会处理完所有待处理的帧再返回
)

返回结果同上。

使用VTCompressionSessionInvalidate()函数来设置编码会话失效。

VTCompressionSessionInvalidate(

    VTCompressionSessionRef  _Nonnull session  //  将要失效的编码会话

)

返回结果同上

编码回调函数

VideoToolBox定义了一个VTCompressionOutputCallback类型的结构体，我们需要根据其定义声明一个函数来获取回调信息。

结构体如下：

typedef void (*VTCompressionOutputCallback)
    void * CM_NULLABLE outputCallbackRefCon,    //  创建会话时传入的引用数据
    void * CM_NULLABLE sourceFrameRefCon,       //  编码时传入的引用数据
    OSStatus status,                            //  编码的状态
    VTEncodeInfoFlags infoFlags,                //  有关编码的信息
    CM_NULLABLE CMSampleBufferRef sampleBuffer  //  编码后的结果
);

二. 编码流程

编码流程

三. 具体实现

1. 创建编码会话

int32_t width = 480;   //  视频图像的宽
int32_t height = 640;  //  视频图像的高
VTCompressionSessionRef encodeSesion;

OSStatus status = VTCompressionSessionCreate(
                        kCFAllocatorDefault,               //  这里我们使用默认的分配器
                        width, 
                        height, 
                        kCMVideoCodecType_H264,            //  H264编码模式
                        NULL,                              //  由系统自行选择编码规范
                        NULL,                              //  自己创建
                        NULL,                              //  使用默认的分配器
                        VideoEncodeCallback,               //  自己定义的回调函数名 
                        (__bridge void * _Nullable)(self), //  这里我们将self传递过去 
                        &encodeSesion
                        );

if (status != noErr) {
    NSLog(@"Session create failed. status=%d", (int)status);
}

2. 设置编码属性

常用的属性

kVTCompressionPropertyKey_RealTime

这个属性表示是否实时编码，值为一个CFBoolean类型。
kVTCompressionPropertyKey_ProfileLevel

这个属性表示编码的效率级别，一般传kVTProfileLevel_H264_Baseline_AutoLevel即可
kVTCompressionPropertyKey_AllowFrameReordering

这个属性表示是否允许帧重新排序。如果对B帧进行重新编码，编码器必须对帧进行重新排序。所以这个属性可以间接理解为是否产生B帧。该属性的值为一个CFBoolean类型。
kVTCompressionPropertyKey_MaxKeyFrameInterval

这个属性表示I帧的间隔，也就是GOP，这个值设置太大的话，图像会模糊。该属性的值为一个CFNumberRef类型
kVTCompressionPropertyKey_ExpectedFrameRate

这个属性表示期望编码后的帧率，也就是FPS。这个设置并不能控制帧率，实际的帧率还依赖于帧的持续时间，并且有可能变化。该属性的值为一个CFNumberRef类型。
kVTCompressionPropertyKey_AverageBitRate

这个属性表示平均码率，单位是bps。码率大的话，画面会非常清晰，但同时文件也会比较大。码率小的话，图像有时会模糊。该属性的值为一个CFNumberRef类型。
kVTCompressionPropertyKey_DataRateLimits

这个属性表示码率限制，单位是byte。该属性是一个CFNumberRef或CFArrayRef类型。

//  是否实时编码输出
VTSessionSetProperty(_encoderSession,
                     kVTCompressionPropertyKey_RealTime,
                     kCFBooleanTrue);
//  设置profile 和 level
VTSessionSetProperty(_encoderSession,
                     kVTCompressionPropertyKey_ProfileLevel,
                     kVTProfileLevel_H264_Baseline_AutoLevel);
//  是否产生B帧
VTSessionSetProperty(_encoderSession,
                     kVTCompressionPropertyKey_AllowFrameReordering,
                     kCFBooleanFalse);
//  设置关键帧间隔
int frameInterval = 10;
CFNumberRef frameIntervalRaf = CFNumberCreate(kCFAllocatorDefault, kCFNumberIntType, &frameInterval);
VTSessionSetProperty(_encoderSession,
                     kVTCompressionPropertyKey_MaxKeyFrameInterval,
                     frameIntervalRaf);
//  设置期望FPS
int fps = 10;
CFNumberRef fpsRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberIntType, &fps);
VTSessionSetProperty(_encoderSession,
                     kVTCompressionPropertyKey_ExpectedFrameRate,
                     fpsRef);
        
//  设置平均码率
int bitRate = self.width * self.height * 3 * 4 * 8;
CFNumberRef bitRateRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &bitRate);
VTSessionSetProperty(_encoderSession,
                     kVTCompressionPropertyKey_AverageBitRate,
                     bitRateRef);
        
//  设置硬性码率限制
int bigRateLimit = self.width * self.height * 3 * 4;
CFNumberRef bitRateLimitRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &bigRateLimit);
VTSessionSetProperty(_encoderSession,
                     kVTCompressionPropertyKey_DataRateLimits,
                     bitRateLimitRef);

3. 准备编码

OSStatus status = VTCompressionSessionPrepareToEncodeFrames(encodeSesion);
if(status != noErr) {
    NSLog(@"prepare to encode error! [status : %d]", (int)status);
}

5. 编码

我们拿到CMSampleBuffer的数据就可以使用VTCompressionSessionEncodeFrame()来进行编码了。

CVPixelBufferRef pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
self.frameID++;
CMTime timeStamp = CMTimeMake(self.frameID, 1000);
CMTime duration = kCMTimeInvalid;
VTEncodeInfoFlags flag;
OSStatus status = VTCompressionSessionEncodeFrame(self.encoderSession, pixelBuffer, timeStamp, duration, NULL, NULL, &flag);
if (status != noErr) {
    NSLog(@"encode sample buffer error [status : %d]", status);
}

6. 编码后处理

这部分在回调函数中执行。

(1). 判断编码状态

if (status != noErr) {
    return;
}

(2). 判断数据有没有准备好

Boolean isDataReady = CMSampleBufferDataIsReady(sampleBuffer);
if (!isDataReady) {
   return;
}

(3). 获得当前对象

这个步骤依赖于我们在创建会话的时候传入的参考值

KKKVideoCoder *coder = (__bridge KKKVideoCoder *)(outputCallbackRefCon);

(4). 判断关键帧(I帧)

CFArrayRef attachmentsArray = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, true);
if (!attachmentsArray) {
   return;
}
CFDictionaryRef dict = CFArrayGetValueAtIndex(attachmentsArray, 0);
if (!dict) {
   return;
}
Boolean isIFrame = false;
isIFrame = !CFDictionaryContainsKey(dict, kCMSampleAttachmentKey_NotSync);

关于kCMSampleAttachmentKey_NotSync，官方文档有这样一段discussion:

A sync sample, also known as a key frame or IDR (Instantaneous Decoding Refresh), can be decoded without requiring any previous samples to have been decoded. Samples following a sync sample also do not require samples prior to the sync sample to have been decoded. Samples are assumed to be sync samples by default — set the value for this key to kCFBooleanTrue for samples which should not be treated as sync samples.
This attachment is read from and written to media files.

简单翻译如下：

一个同步样本，即关键帧或者IDR，可以在不解码任何之前的样本的情况下进行解码。一个同步样本之后的样本也不需要这个同步样本之前的样本完成解码。默认情况下，样本被假定为同步样本。— 若样本不被视为同步样本，则会将这个key设置为kCFBooleanTrue。
该附件是从媒体文件读取和写入的。

(5). 从关键帧(I帧)获取SPS和PPS

如果我们拿到的是关键帧(I帧)，我们就需要在数据前拼接相应的SPS和PPS。

获取描述信息

CMFormatDescriptionRef formatDesc = CMSampleBufferGetFormatDescription(sampleBuffer);

获取SPS

size_t spsSize, spsCount;
const uint8_t *spsData;
OSStatus spsStatus = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(formatDesc, 0, &spsData, &spsSize, &spsCount, 0);
if (spsStatus == noErr) {
 
    coder.hasSPS = YES;
    NSMutableData *sps = [NSMutableData dataWithCapacity:4 + spsSize];
    [sps appendBytes:startCode length:4];
    [sps appendBytes:spsData length:spsSize];
     
    dispatch_async(coder.callBackQueue, ^{
        [coder.delegate encoderGetSPSData:sps];
    });
} else {
    NSLog(@"get SPS error! [status : %d]", spsStatus);
}

获取PPS

size_t ppsSize, ppsCount;
const uint8_t *ppsData;
OSStatus ppsStatus = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(formatDesc, 1, &ppsData, &ppsSize, &ppsCount, 0);
if (ppsStatus == noErr) {
      
    coder.hasPPS = YES;
    NSMutableData *pps = [NSMutableData dataWithCapacity:4 + ppsSize];
    [pps appendBytes:startCode length:4];  //  startCode是"\x00\x00\x00\x01"
    [pps appendBytes:ppsData length:ppsSize];
      
    dispatch_async(coder.callBackQueue, ^{
        [coder.delegate encoderGetPPSData:pps];
    });
} else {
    NSLog(@"get PPS error! [status : %d]", ppsStatus);
}

(6). 处理编码后的数据

获取编码后的数据

CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
    
size_t lengthAtOffsetOut, totalLengthOut;
char * dataPointOut;
OSStatus error = CMBlockBufferGetDataPointer(blockBuffer, 0, &lengthAtOffsetOut, &totalLengthOut, &dataPointOut);
if (error != kCMBlockBufferNoErr) {
    NSLog(@"get block buffer data pointer failed! [status : %d]", error);
}

循环从`dataBuffer`获取NALU流数据

注意：返回的NALU数据前4个字节不是起始位置,而是大端模式的帧长度length

size_t offset = 0;
const int startCodeLength = 4;
while (offset < totalLengthOut - startCodeLength) {
   //  得到当前处理的NAL单元的起始位置指针
   char *src = dataPointOut + offset;
   
   //  获取NAL单元长度（此时为大端长度）
   uint32_t naluBigLength = 0;
   memcpy(&naluBigLength, src, startCodeLength);
   
   //  将获取的长度转化为小端长度
   uint32_t naluHostLength = CFSwapInt32BigToHost(naluBigLength);
   
   //  得到NAL单元的整体长度
   uint32_t naluLength = startCodeLength + naluHostLength;
   
   //  拼接数据
   NSMutableData *data = [NSMutableData dataWithCapacity:naluLength];
   [data appendBytes:startCode length:4];
   [data appendBytes:src + startCodeLength length:naluHostLength];
   
   dispatch_async(coder.callBackQueue, ^{
       [coder.delegate encoderGetData:data];
   });
   
   offset += naluLength;
}

7. 编码结束

我们可以再dealloc方法中结束编码操作

- (void)dealloc {
    
    if (self.encoderSession) {
        
        VTCompressionSessionCompleteFrames(self.encoderSession, kCMTimeInvalid);
        VTCompressionSessionInvalidate(self.encoderSession);
        CFRelease(self.encoderSession);
        self.encoderSession = NULL;
    }
}