iOS端主流视频直播技术

流程图

流程图.jpg

1、视频编码

11.png

1、1>初始化视频编码类

初始化调用：
VTCompressionSessionCreate(
kCFAllocatorDefault,
width,
height,
kCMVideoCodecType_H264,
nil,
attributes as CFDictionary?,
nil,
callback,
Unmanaged.passUnretained(self).toOpaque(),
&_session)
需要设置下，幅面、码率、帧率、回调函数等常规信息。
width,height分别是编码的幅面大小。
kCMVideoCodecType_H264 采用的编码技术。
attributes 流设置，这里面涉及到的参数:
[kVTCompressionPropertyKey_RealTime: kCFBooleanTrue, // 实时编码
kVTCompressionPropertyKey_ProfileLevel: kVTProfileLevel_H264_Baseline_3_1 as NSObject, //编码画质低清Baseline Level 1.3，标清Baseline Level 3，半高清Baseline Level 3.1，全高清Baseline Level 4.1（BaseLine表示直播，Main存储媒体，Hight高清存储【只有：3.1 & 4.1】）
kVTCompressionPropertyKey_AverageBitRate: Int(bitrate) as NSObject, // 设置码率
kVTCompressionPropertyKey_ExpectedFrameRate: NSNumber(value: expectedFPS), // 设置帧率
kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration: NSNumber(value: 2.0) // 关键帧间隔，单位秒, kVTCompressionPropertyKey_AllowFrameReordering: !isBaseline as NSObject, //是否产生B帧，直播设置为false【B帧是双向差别帧，也就是B帧记录的是本帧与前后帧的差别，B帧可以大大减少空间，但运算量较大】
kVTCompressionPropertyKey_PixelTransferProperties: [
"ScalingMode": "Trim"
] as NSObject] 像素转换规则
kVTCompressionPropertyKey_H264EntropyMode:kVTH264EntropyMode_CABAC // 如果是264编码指定算法

2、2设置回调函数。

private var callback: VTCompressionOutputCallback = {(
outputCallbackRef: UnsafeMutableRawPointer?,
sourceFrameRef: UnsafeMutableRawPointer?,
status: OSStatus,
infoFlags: VTEncodeInfoFlags,
sampleBuffer: CMSampleBuffer?) in
guard let ref: UnsafeMutableRawPointer = outputCallbackRef,
let sampleBuffer: CMSampleBuffer = sampleBuffer, status == noErr else {
return
}
let encoder: H264Encoder = Unmanaged.fromOpaque(ref).takeUnretainedValue() //因为初始化的时候传了进去，现在取回来。
encoder.formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer) // 得到视频流，用于编码
encoder.delegate?.sampleOutput(video: sampleBuffer) //交给外部处理,通过解析 CMSampleBufferRef 分别处理SPS，PPS，I-Frame和非I-Frame，然后通过RTMP推出去。
}

2.3 编码

编码后会自动调用2.2的回调函数。
BTW:这是在视频采集的时候调用这个
func captureOutput(_ captureOutput: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
VTCompressionSessionEncodeFrame(
session,
sampleBuffer,
CMSampleBufferGetPresentationTimeStamp(sampleBuffer),
CMSampleBufferGetDuration(sampleBuffer),
nil,
nil,
&flags
)
}

22.png

这个就是CMSampleBuffer的内部结构图，编码和解码前后的内部结构
编码就是CVPixelBuffer—>CMSampleBufferRef，解码反之。

2、音频编码

2、1创建编码器

AudioConverterNewSpecific(
&inSourceFormat!, //输入参数
&inDestinationFormat, //输出参数
UInt32(inClassDescriptions.count), //音频描述符数量
&inClassDescriptions, //音频描述符数组
&converter //编码器
)
创建好编码器后，还要修改一下编码器的码率
UInt32 outputBitrate = 64000 * channelscount // 还要* 通道数。需要注意，AAC并不是随便的码率都可以支持。比如，如果PCM采样率是44100KHz，那么码率可以设置64000bps，如果是16K，可以设置为32000bps。
UInt32 propSize = sizeof(outputBitrate);
AudioConverterSetProperty(audioConverter,
kAudioConverterEncodeBitRate,
propSize,
&outputBitrate);

2、2音频描述文件

inDestinationFormat = AudioStreamBasicDescription()
inDestinationFormat!.mSampleRate = sampleRate == 0 ? inSourceFormat!.mSampleRate : sampleRate //设置采样率，有 32K, 44.1K，48K
inDestinationFormat!.mFormatID = kAudioFormatMPEG4AAC // 采用AAC编码方式
inDestinationFormat!.mFormatFlags = profile //指明格式的细节. 设置为 0 说明没有子格式。
inDestinationFormat!.mBytesPerPacket = 0 //每个音频包的字节数,该字段设置为 0, 表明包里的字节数是变化的。
inDestinationFormat!.mFramesPerPacket = 1024 每个音频包帧的数量. 对于未压缩的数据设置为 1. 动态码率格式，这个值是一个较大的固定数字，比如说AAC的1024。如果是动态帧数（比如Ogg格式）设置为0。
inDestinationFormat!.mBytesPerFrame = 0 // 每个帧的字节数。对于压缩数据，设置为 0.
inDestinationFormat!.mChannelsPerFrame = 1 //音频声道数
inDestinationFormat!.mBitsPerChannel = 0 // 压缩数据，该值设置为0.
inDestinationFormat!.mReserved = 0 // 用于字节对齐，必须是0.
CMAudioFormatDescriptionCreate(
kCFAllocatorDefault, &inDestinationFormat!, 0, nil, 0, nil, nil, &formatDescription
)

2、3转码

通过音频捕获获取音频流
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
// 编码流程:
首先，创建一个 AudioBufferList，并将输入数据存到 AudioBufferList里。
其次，设置输出。
然后，调用 AudioConverterFillComplexBuffer 方法，该方法又会调用 inInputDataProc 回调函数，将输入数据拷贝到编码器中。
最后，转码。将转码后的数据输出到指定的输出变量中。
//设置输入
var blockBuffer: CMBlockBuffer?
currentBufferList = AudioBufferList.allocate(maximumBuffers: 1)
CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(
sampleBuffer,
nil,
currentBufferList!.unsafeMutablePointer,
AudioBufferList.sizeInBytes(maximumBuffers: 1),
kCFAllocatorDefault,
kCFAllocatorDefault,
0,
&blockBuffer
)
// 设置输出
var finished: Bool = false
while !finished {
var ioOutputDataPacketSize: UInt32 = 1
let dataLength: Int = blockBuffer!.dataLength
let outOutputData: UnsafeMutableAudioBufferListPointer = AudioBufferList.allocate(maximumBuffers: 1)
outOutputData[0].mNumberChannels = inDestinationFormat.mChannelsPerFrame
outOutputData[0].mDataByteSize = UInt32(dataLength)
outOutputData[0].mData = UnsafeMutableRawPointer.allocate(byteCount: dataLength, alignment: 0)
let status: OSStatus = AudioConverterFillComplexBuffer(
converter,
inputDataProc,
Unmanaged.passUnretained(self).toOpaque(),
&ioOutputDataPacketSize,
outOutputData.unsafeMutablePointer,
nil
)
if 0 <= status && ioOutputDataPacketSize == 1 {
var result: CMSampleBuffer?
var timing: CMSampleTimingInfo = CMSampleTimingInfo(sampleBuffer: sampleBuffer)
let numSamples: CMItemCount = sampleBuffer.numSamples
CMSampleBufferCreate(kCFAllocatorDefault, nil, false, nil, nil, formatDescription, numSamples, 1, &timing, 0, nil, &result)
CMSampleBufferSetDataBufferFromAudioBufferList(result!, kCFAllocatorDefault, kCFAllocatorDefault, 0, outOutputData.unsafePointer) // 这里通过fillComplexBuffer指向outOutputData，然后通过inputDataProc回调，最后再次回调给自己的onInputDataForAudioConverter函数，再通过memcpy拷贝到这个outOutputData里。下面的这行代码才最终把buffer数据拿走
delegate?.sampleOutput(audio: result!)
} else {
finished = true
}
for i in 0.. free(outOutputData[i].mData) } free(outOutputData.unsafeMutablePointer) } } // 编码解释 AudioConverterFillComplexBuffer( inAudioConverter: AudioConverterRef, inInputDataProc: AudioConverterComplexInputDataProc, inInputDataProcUserData: UnsafeMutablePointer, ioOutputDataPacketSize: UnsafeMutablePointer, outOutputData: UnsafeMutablePointer, outPacketDescription: AudioStreamPacketDescription ) -> OSStatus inAudioConverter : 转码器 inInputDataProc : 回调函数。用于将PCM数据喂给编码器。 inInputDataProcUserData : 用户自定义数据指针。 ioOutputDataPacketSize : 输出数据包大小。 outOutputData : 输出数据 AudioBufferList 指针。 outPacketDescription : 输出包描述符。回调处理 private var inputDataProc: AudioConverterComplexInputDataProc = {( converter: AudioConverterRef, ioNumberDataPackets: UnsafeMutablePointer, ioData: UnsafeMutablePointer, outDataPacketDescription: UnsafeMutablePointer?>?, inUserData: UnsafeMutableRawPointer?) in return Unmanaged.fromOpaque(inUserData!).takeUnretainedValue().onInputDataForAudioConverter( ioNumberDataPackets, ioData: ioData, outDataPacketDescription: outDataPacketDescription ) } 再回调处理 func onInputDataForAudioConverter( _ ioNumberDataPackets: UnsafeMutablePointer, ioData: UnsafeMutablePointer, outDataPacketDescription: UnsafeMutablePointer?>?) -> OSStatus { guard let bufferList: UnsafeMutableAudioBufferListPointer = currentBufferList else { ioNumberDataPackets.pointee = 0 return -1 } memcpy(ioData, bufferList.unsafePointer, bufferListSize) // 通过上面的回调传值处理，然后再这里在通过memcpy把数据拷贝到iodata里实现数据的保存到outOutputData ioNumberDataPackets.pointee = 1 free(bufferList.unsafeMutablePointer) currentBufferList = nil return noErr }

 
  
 3. 流合成。 
  
  通过1、2的音视频的编码操作，下面我们就可以合成流以便给Socket准备发送的数据 
   
   3.1 视频合成流
 func sampleOutput(video sampleBuffer: CMSampleBuffer) {
 let keyframe: Bool = !sampleBuffer.dependsOnOthers
 var compositionTime: Int32 = 0
 let presentationTimeStamp: CMTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
 var decodeTimeStamp: CMTime = CMSampleBufferGetDecodeTimeStamp(sampleBuffer)
 if decodeTimeStamp == kCMTimeInvalid {
 decodeTimeStamp = presentationTimeStamp
 } else {
 compositionTime = Int32((decodeTimeStamp.seconds - decodeTimeStamp.seconds) * 1000) 
 }
 let delta: Double = (videoTimestamp == kCMTimeZero ? 0 : decodeTimeStamp.seconds - videoTimestamp.seconds) * 1000
 guard let data: Data = sampleBuffer.dataBuffer?.data, 0 <= delta else {
 return
 }
 var buffer: Data = Data([((keyframe ? FLVFrameType.key.rawValue : FLVFrameType.inter.rawValue) << 4) | FLVVideoCodec.avc.rawValue, FLVAVCPacketType.nal.rawValue]) // 设置头
 buffer.append(contentsOf: compositionTime.bigEndian.data[1..<4]) // 大小端处理
 buffer.append(data) //添加流数据
 delegate?.sampleOutput(video: buffer, withTimestamp: delta, muxer: self) //回调出去
 videoTimestamp = decodeTimeStamp
 }
 public enum FLVFrameType: UInt8 {
 case key = 1
 3.1 视频合成流
 func sampleOutput(video sampleBuffer: CMSampleBuffer) {
 let keyframe: Bool = !sampleBuffer.dependsOnOthers
 var compositionTime: Int32 = 0
 let presentationTimeStamp: CMTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
 var decodeTimeStamp: CMTime = CMSampleBufferGetDecodeTimeStamp(sampleBuffer)
 if decodeTimeStamp == kCMTimeInvalid {
 decodeTimeStamp = presentationTimeStamp
 } else {
 compositionTime = Int32((decodeTimeStamp.seconds - decodeTimeStamp.seconds) * 1000) 
 }
 let delta: Double = (videoTimestamp == kCMTimeZero ? 0 : decodeTimeStamp.seconds - videoTimestamp.seconds) * 1000
 guard let data: Data = sampleBuffer.dataBuffer?.data, 0 <= delta else {
 return
 }
 var buffer: Data = Data([((keyframe ? FLVFrameType.key.rawValue : FLVFrameType.inter.rawValue) << 4) | FLVVideoCodec.avc.rawValue, FLVAVCPacketType.nal.rawValue]) // 设置头
 buffer.append(contentsOf: compositionTime.bigEndian.data[1..<4]) // 大小端处理
 buffer.append(data) //添加流数据
 delegate?.sampleOutput(video: buffer, withTimestamp: delta, muxer: self) //回调出去
 videoTimestamp = decodeTimeStamp
 }
 public enum FLVFrameType: UInt8 {
 case key = 1
 case inter = 2
 case disposable = 3
 case generated = 4
 case command = 5
 } 
   
  
  
  3、2音频合成流 
   
   func sampleOutput(audio sampleBuffer: CMSampleBuffer) {
 let presentationTimeStamp: CMTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
 let delta: Double = (audioTimestamp == kCMTimeZero ? 0 : presentationTimeStamp.seconds - audioTimestamp.seconds) * 1000
 guard let data: Data = sampleBuffer.dataBuffer?.data, 0 <= delta else {
 return
 }
 var buffer: Data = Data([RTMPMuxer.aac, FLVAACPacketType.raw.rawValue]) // 设置头
 buffer.append(data) // 添加流数据
 delegate?.sampleOutput(audio: buffer, withTimestamp: delta, muxer: self) // 回调出去
 audioTimestamp = presentationTimeStamp
 }
 public enum FLVAACPacketType: UInt8 {
 case seq = 0
 case raw = 1
 } 
   
  
  
  3、3组RTMP协议数据，仅供参考
 func sampleOutput(audio buffer: Data, withTimestamp: Double, muxer: RTMPMuxer) {
 guard readyState == .publishing else {
 return
 }
 let type: FLVTagType = .audio
 let length: Int = rtmpConnection.socket.doOutput(chunk: // 发送数据给socket，写入inputstream
 RTMPChunk( //拼接流数据
 type: audioWasSent ? .one : .zero, // 是否是第一次发送用于处理大小端数据
 streamId: type.streamId,
 message: RTMPAudioMessage(streamId: id, timestamp: UInt32(audioTimestamp), payload: buffer)), locked: nil)
 audioWasSent = true
 OSAtomicAdd64(Int64(length), &info.byteCount) 原子锁定，避免重复添加。发送数据大小统计 
 audioTimestamp = withTimestamp + (audioTimestamp - floor(audioTimestamp))
 }
 和上面很接近只是增加了锁
 func sampleOutput(video buffer: Data, withTimestamp: Double, muxer: RTMPMuxer) {
 guard readyState == .publishing else {
 return
 }
 let type: FLVTagType = .video
 OSAtomicOr32Barrier(1, &mixer.videoIO.encoder.locked)
 let length: Int = rtmpConnection.socket.doOutput(chunk: RTMPChunk(
 type: videoWasSent ? .one : .zero,
 streamId: type.streamId,
 message: RTMPVideoMessage(streamId: id, timestamp: UInt32(videoTimestamp), payload: buffer)
 ), locked: &mixer.videoIO.encoder.locked)
 videoWasSent = true
 OSAtomicAdd64(Int64(length), &info.byteCount)
 videoTimestamp = withTimestamp + (videoTimestamp - floor(videoTimestamp))
 frameCount += 1
 }

iOS端主流视频直播技术

流程图

1、视频编码

2、音频编码

3. 流合成。

你可能感兴趣的:(iOS端主流视频直播技术)