VideoToolBox-> H264硬编码思路及详细步骤解析

1.主要思路

对AVFoundation 采集的视频流 -> CMSampleBufferRef -> 进行H264硬编码
初始化编码器 -> VTCompressionSessionCreate
初始化编码器完毕后设置编码器编码参数
开始编码 VTCompressionSessionEncodeFrame -> CVImageBufferRef
将硬编码成功的CMSampleBuffer转换成H264码流
一帧编码完成数据返回回调方法VTCompressionOutputCallback->数据解析出 SPS PPS CMBlockBufferRef
封装成H264码流，SPS PPS CMBlockBufferRef 拼接StartCode -> NSMutableData

2. 详细步骤

1.对AVFoundation 采集的视频流 -> CMSampleBufferRef -> 进行H264硬编码

1.转换 CMSampleBufferRef -> CVImageBufferRef -> VTCompressionSessionEncodeFrame

2.初始化编码器

主要方法 VTCompressionSessionCreate
VTCompressionSessionCreate 参数采集视频的宽width、高height、编码格式kCMVideoCodecType_H264、回调方法指针比如didCompressH264、C语言里OC的桥接对象、编码器对象VTCompressionSessionRef encodeingSession

3. 初始化编码器完毕后设置编码器编码参数

设置实时编码输出
kVTCompressionPropertyKey_RealTime -> kCFBooleanTrue
丢弃B帧
kVTCompressionPropertyKey_AllowFrameReordering -> kCFBooleanFalse
设置关键帧间隔
kVTCompressionPropertyKey_MaxKeyFrameInterval -> 60
设置期望FPS
kVTCompressionPropertyKey_ExpectedFrameRate -> 30
设置码率均值
kVTCompressionPropertyKey_AverageBitRate -> width * height * 3 * 4
设置码率均值
kVTCompressionPropertyKey_AverageBitRate -> width * height * 3 * 4
准备编码
VTCompressionSessionPrepareToEncodeFrames

4. 开始编码 VTCompressionSessionEncodeFrame -> CVImageBufferRef

数据类型转换CVImageBufferRef -> CVImageBufferRef
设置CMTime 时间戳
编码函数 VTCompressionSessionEncodeFrame
VTCompressionSessionEncodeFrame 主要参数, 编码器encodeingSession、视频数据CVImageBufferRef、时间戳CMTime、OC桥接对象、异步设置

5. 将硬编码成功的CMSampleBuffer转换成H264码流

一帧编码完成数据返回回调方法VTCompressionOutputCallback->数据解析出 SPS PPS CMBlockBufferRef

关键帧标志位获取
如果是关键帧获取SPS、PPS数据
循环遍历编码后数据 CMBlockBufferGetDataPointer -> CMBlockBufferRef -> 获取流媒体单元 NALU

6. SPS PPS CMBlockBufferRef 拼接StartCode -> NSMutableData

1.H264硬编码完成后，回调VTCompressionOutputCallback
2.将硬编码成功的CMSampleBuffer转换成H264码流，通过网络传播
3.解析出参数集SPS & PPS，加上开始码组装成 NALU。提现出视频数据，将长度码转换为开始码，组成NALU，将NALU发送出去。

StartCode \x00\x00\x00\x01 四个字节拼接在数据的最前面
\x00\x00\x00\x01 -> PPS -> NSMutableData
\x00\x00\x00\x01 -> SPS -> NSMutableData
\x00\x00\x00\x01 -> CMBlockBufferRef -> NSMutableData

3. 代码实现

1. 头文件和属性、变量

#import 
#import 
@interface VideoH264Encoder ()
{
    int32_t width;
    int32_t height;
}

// 每一帧生成CMTime使用的frameID++，计数器
@property (nonatomic, assign) int64_t frameID;

// block回调
@property (nonatomic, copy) void (^H264DataBlock)(NSData *data);

// 串行编码队列
@property (nonatomic, strong) dispatch_queue_t encodeQueue;

// 编码会话
@property (nonatomic, assign) VTCompressionSessionRef compressionSession;

2. 类初始化方法

- (id)initWithVedioWidth:(int32_t)videoWidth vedioHeight:(int32_t)videoHeight {
    self = [super init];
    if (self) {
        // 采集视频设置的分辨率
        width = videoWidth;
        height = videoHeight;
        self.encodeQueue = dispatch_queue_create("encode queue", DISPATCH_QUEUE_SERIAL);
        [self setupVideoToolBox];
    }
    return self;
}

3. 初始化编码器及设置编码器属性

// 1.初始化编码器-
- (void)setupVideoToolBox {
    
    self.frameID = 0;
    // width->视频的宽 height->视频的高 编码格式H264->kCMVideoCodecType_H264
    // OC桥接对象 -> (__bridge void *)(self)，会话对象->compressionSession
    
    // 1. 创建编码器session
    OSStatus status = VTCompressionSessionCreate(NULL, width, height, kCMVideoCodecType_H264, NULL, NULL, NULL, didCompressionH264, (__bridge void *)(self), &_compressionSession);
    NSLog(@"VTCompressionSessionCreate status == %d", status);
    if (status != noErr) {
        NSLog(@"unable to create H264 session -> VTCompressionSessionCreate");
    }
    
    // 1.实时编码输出(避免延时)
    VTSessionSetProperty(self.compressionSession, kVTCompressionPropertyKey_RealTime, kCFBooleanTrue);
    VTSessionSetProperty(self.compressionSession, kVTCompressionPropertyKey_ProfileLevel, kVTProfileLevel_H264_Baseline_AutoLevel);
    
    // 2. 是否产生B帧 -> kCFBooleanFalse -> 丢弃B帧
    // 通常做法不产生B帧确保实时性(因为B帧在解码时并不是必要的,是可以抛弃B帧的)
    VTSessionSetProperty(self.compressionSession, kVTCompressionPropertyKey_AllowFrameReordering, kCFBooleanFalse);
    
    // 3. 设置关键帧(GOPsize)间隔
    int frameInterval = 60;
    CFNumberRef frameIntervalRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberIntType, &frameInterval);
    VTSessionSetProperty(self.compressionSession, kVTCompressionPropertyKey_MaxKeyFrameInterval, frameIntervalRef);
    
    // 4. 设置期望帧率
    int fps = 30;
    CFNumberRef fpsRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberIntType, &fps);
    VTSessionSetProperty(self.compressionSession, kVTCompressionPropertyKey_ExpectedFrameRate, fpsRef);
    
    // 5. 设置码率均值
    int bitRate = width * height * 3 * 4 * 8;
    CFNumberRef bitRateRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberIntType, &bitRate);
    VTSessionSetProperty(self.compressionSession, kVTCompressionPropertyKey_AverageBitRate, bitRateRef);
    
    // 6. 设置码率上限
    int bigRateLimit = width * height * 3 * 4;
    CFNumberRef bigRateLimitRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberIntType, &bigRateLimit);
    VTSessionSetProperty(self.compressionSession, kVTCompressionPropertyKey_DataRateLimits, bigRateLimitRef);
    
    // 7. 准备开始编码
    VTCompressionSessionPrepareToEncodeFrames(self.compressionSession);
}

4.开始编码

// 2.开始编码
- (void)encodeCMSampleBuffer:(CMSampleBufferRef)sampleBuffer h264DataBlock:(void(^)(NSData *))h264DataBlock {
    CFRetain(sampleBuffer);

    dispatch_async(self.encodeQueue, ^{
        if (!self.compressionSession) {
            return;
        }
        // 1.保存block回调
        self.H264DataBlock = h264DataBlock;
        
        // 2. CMSampleBufferRef -> CVImageBufferRef
        CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
        
        // 3. 根据当前帧数，创建CMTime的时间
        CMTime presentationTimeStamp = CMTimeMake(self.frameID++, 1000);
        VTEncodeInfoFlags flags;
        
        // 4. 开始编码该帧数据
        // 参数 1.compressionSession会话 2.imageBuffer视频数据 3.presentationTimeStamp时间戳
        // 4.(__bridge void *)(self)OC桥接对象
        OSStatus status = VTCompressionSessionEncodeFrame(self.compressionSession, imageBuffer, presentationTimeStamp, kCMTimeInvalid, NULL, (__bridge void *)(self), &flags);
        if (status != noErr) {
            NSLog(@"VTCompressionSessionEncodeFrame failed");
        }

        CFRelease(sampleBuffer);
    });
}

5.编码回调函数解析数据

// 2. 回调函数->解析出SPS PPS NALU 单元
// 编码完成回调函数-> VTCompressionSessionCreate -> didCompressionH264
void (didCompressionH264)(void * CM_NULLABLE outputCallbackRefCon, void * CM_NULLABLE sourceFrameRefCon, OSStatus status, VTEncodeInfoFlags infoFlags, CM_NULLABLE CMSampleBufferRef sampleBuffer) {
    
    // 1. 状态判断
    if (status != noErr) {
        NSLog(@"didCompressionH264 error");
        return;
    }
    
    // 2. 判断CMSampleBufferRef是否准备OK
    if (!CMSampleBufferDataIsReady(sampleBuffer)) {
        NSLog(@"CMSampleBufferRef is not ready");
        return;
    }
    
    // 3.OC桥接对象转换
    VideoH264Encoder * encoder = (__bridge VideoH264Encoder*)outputCallbackRefCon;
    
    // 4. 判断是否是关键帧 ->是关键帧获取pps 和 sps 数据
    bool isKeyFrame = !CFDictionaryContainsKey(CFArrayGetValueAtIndex(CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, true), 0), kCMSampleAttachmentKey_NotSync);
    
    if(isKeyFrame) {
        // 5. 获取编码后的信息，存储于CMFormatDescriptionRef中
        CMFormatDescriptionRef formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer);
        
        // index sps = 0, pps = 1
        const uint8_t * spsParameterSet;
        size_t spsParameterSetSize, spsParameterCount;
        // 6.获取SPS数据 格式描述 parameterSetPointer指针 size count
        OSStatus status =  CMVideoFormatDescriptionGetH264ParameterSetAtIndex(formatDescription, 0, &spsParameterSet, &spsParameterSetSize, &spsParameterCount, 0);
        if (status != noErr) {
            NSLog(@"CMVideoFormatDescriptionGetH264ParameterSetAtIndex get SPS failed");
        }
        
        // index sps = 0, pps = 1
        // 7.获取PPS数据 格式描述 parameterSetPointer指针 size count
        const uint8_t * ppsParmeterSet;
        size_t ppsParameterSetSize, ppsParameterCount;
        CMVideoFormatDescriptionGetH264ParameterSetAtIndex(formatDescription, 1, &ppsParmeterSet, &ppsParameterSetSize, &ppsParameterCount, 0);
        if (status != noErr) {
            NSLog(@"CMVideoFormatDescriptionGetH264ParameterSetAtIndex get PPS failed");
        }
        
        // 8. SPS PPS 封装成NSData
        NSData * spsData = [NSData dataWithBytes:spsParameterSet length:spsParameterSetSize];
        NSData * ppsData = [NSData dataWithBytes:ppsParmeterSet length:ppsParameterSetSize];
        
        // 9. 添加0x 00 00 00 01 startCode
        [encoder gotSpsData:spsData ppsData:ppsData];
    }
    
    // 10. 获取编码后的视频数据CMBlockBuffer
    CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
    size_t length, totalLength;
    char * dataPointer;
    
    OSStatus statusCodeRet = CMBlockBufferGetDataPointer(blockBuffer, 0, &length, &totalLength, &dataPointer);
    if (statusCodeRet == noErr) {
        size_t bufferOffset = 0;
        // 返回的nalu数据前四个字节不是0001的startcode，而是大端模式的帧长度length
        static const int AVCCHeaderLength = 4;
        while (bufferOffset < totalLength - AVCCHeaderLength) {
            uint32_t NALUUnitLength = 0;
            
            // 1. 读取前4个字节，返回帧长度 保存在NALUUnitLength里，Read the NAL unit length
            memcpy(&NALUUnitLength, dataPointer + bufferOffset, AVCCHeaderLength);
            
            // 2. 大端转系统端 -> iOS MacOS小端存储
            NALUUnitLength = CFSwapInt32BigToHost(NALUUnitLength);
            
            // 3.读取CMBlockBufferRef数据
            // 移动指针从dataPointer + bufferOffset + AVCCHeaderLength位置读取NALUUnitLength长度
            NSData * blockData = [[NSData alloc] initWithBytes:(dataPointer + bufferOffset + AVCCHeaderLength) length:NALUUnitLength];
            
            [encoder gotBlockData:blockData];
            
            
            // 4. 移动到下一个块，转成NALU单元 //移动下标，继续读取下一个数据
            bufferOffset += NALUUnitLength + AVCCHeaderLength;
        }
    }
}

6. SPS PPS数据拼接StartCode 变为NALU码流，封装成NSData，发送给调用者使用

// 3.处理SPS PPS 数据 -> NALU码流
- (void)gotSpsData:(NSData *)spsData ppsData:(NSData *)ppsData {
    // 1. 拼接NALU的 StartCode, 默认规定使用 00000001，四个字节
    const char bytes[] = "\x00\x00\x00\x01";
    
    // sizeof(bytes)-1的原因是去掉C字符串结束标志\0
    size_t length = (sizeof(bytes)) - 1;
    NSData * headerData = [NSData dataWithBytes:bytes length:length];
    
    // 2.SPS数据 -> 起始码拼接在数据最前面
    NSMutableData * spsH264Data = [[NSMutableData alloc] init];
    [spsH264Data appendData:headerData];
    [spsH264Data appendData:spsData];
    NSLog(@"spsH264Data length == %lu", (unsigned long)[spsH264Data length]);

    if (self.H264DataBlock) {
        self.H264DataBlock(spsH264Data);
    }
    
    //2. PPS数据 -> 起始码拼接在数据最前面
    NSMutableData * ppsH264Data = [[NSMutableData alloc] init];
    [ppsH264Data appendData:headerData];
    [ppsH264Data appendData:ppsData];
    
    NSLog(@"ppsH264Data length == %lu", (unsigned long)[ppsH264Data length]);
    if (self.H264DataBlock) {
        self.H264DataBlock(ppsH264Data);
    }
}

7. 视频数据BlockBuffer，拼接StartCode 变为NALU码流，封装成NSData，发送给调用者使用


// 4.处理BlockBuffer视频数据 -> NALU码流
- (void)gotBlockData:(NSData *)blockData {
    // 1. 拼接NALU的 StartCode, 默认规定使用 00000001，四个字节
    const char bytes[] = "\x00\x00\x00\x01";
    // sizeof(bytes)-1的原因是去掉C字符串结束标志\0
    size_t length = (sizeof(bytes)) - 1;
    NSData * headerData = [NSData dataWithBytes:bytes length:length];
    NSMutableData * data = [[NSMutableData alloc] init];
    [data appendData:headerData];
    [data appendData:blockData];
    NSLog(@"blockData length == %lu", (unsigned long)[data length]);

    if (self.H264DataBlock) {
        self.H264DataBlock(data);
    }
}

8.结束编码释放C语言变量

// 5.结束编码
- (void)endEncode {
    // 1.停止编码
    VTCompressionSessionCompleteFrames(self.compressionSession, kCMTimeInvalid);
    // 2.使得会话无效
    VTCompressionSessionInvalidate(self.compressionSession);
    
    // 3.释放会话资源
    if (self.compressionSession) {
        CFRelease(self.compressionSession);
        self.compressionSession = NULL;
    }
}