void H264FUAFragmenter::doGetNextFrame() { if (fNumValidDataBytes == 1) { // We have no NAL unit data currently in the buffer. Read a new one: fInputSource->getNextFrame(&fInputBuffer[1], fInputBufferSize - 1, afterGettingFrame, this, FramedSource::handleClosure, this); } else { // We have NAL unit data in the buffer. There are three cases to consider: // 1. There is a new NAL unit in the buffer, and it's small enough to deliver // to the RTP sink (as is). // 2. There is a new NAL unit in the buffer, but it's too large to deliver to // the RTP sink in its entirety. Deliver the first fragment of this data, // as a FU-A packet, with one extra preceding header byte. // 3. There is a NAL unit in the buffer, and we've already delivered some // fragment(s) of this. Deliver the next fragment of this data, // as a FU-A packet, with two extra preceding header bytes. if (fMaxSize < fMaxOutputPacketSize) { // shouldn't happen envir() << "H264FUAFragmenter::doGetNextFrame(): fMaxSize (" << fMaxSize << ") is smaller than expected\n"; } else { fMaxSize = fMaxOutputPacketSize; } fLastFragmentCompletedNALUnit = True; // by default if (fCurDataOffset == 1) { // case 1 or 2 if (fNumValidDataBytes - 1 <= fMaxSize) { // case 1 memmove(fTo, &fInputBuffer[1], fNumValidDataBytes - 1); fFrameSize = fNumValidDataBytes - 1; fCurDataOffset = fNumValidDataBytes; } else { // case 2 // We need to send the NAL unit data as FU-A packets. Deliver the first // packet now. Note that we add FU indicator and FU header bytes to the front // of the packet (reusing the existing NAL header byte for the FU header). fInputBuffer[0] = (fInputBuffer[1] & 0xE0) | 28; // FU indicator fInputBuffer[1] = 0x80 | (fInputBuffer[1] & 0x1F); // FU header (with S bit) memmove(fTo, fInputBuffer, fMaxSize); fFrameSize = fMaxSize; fCurDataOffset += fMaxSize - 1; fLastFragmentCompletedNALUnit = False; } } else { // case 3 // We are sending this NAL unit data as FU-A packets. We've already sent the // first packet (fragment). Now, send the next fragment. Note that we add // FU indicator and FU header bytes to the front. (We reuse these bytes that // we already sent for the first fragment, but clear the S bit, and add the E // bit if this is the last fragment.) fInputBuffer[fCurDataOffset - 2] = fInputBuffer[0]; // FU indicator fInputBuffer[fCurDataOffset - 1] = fInputBuffer[1] & ~0x80; // FU header (no S bit) unsigned numBytesToSend = 2 + fNumValidDataBytes - fCurDataOffset; if (numBytesToSend > fMaxSize) { // We can't send all of the remaining data this time: numBytesToSend = fMaxSize; fLastFragmentCompletedNALUnit = False; } else { // This is the last fragment: fInputBuffer[fCurDataOffset - 1] |= 0x40; // set the E bit in the FU header fNumTruncatedBytes = fSaveNumTruncatedBytes; } memmove(fTo, &fInputBuffer[fCurDataOffset - 2], numBytesToSend); fFrameSize = numBytesToSend; fCurDataOffset += numBytesToSend - 2; } if (fCurDataOffset >= fNumValidDataBytes) { // We're done with this data. Reset the pointers for receiving new data: fNumValidDataBytes = fCurDataOffset = 1; } // Complete delivery to the client: FramedSource::afterGetting(this); } }当fNumValidDataBytes等于1时,表明buffer(fInputBuffer)中没有Nal Unit数据,那么就读入一个新的.从哪里读呢?还记得前面讲过的吗?H264FUAFragmenter在第一次读数据时代替了H264VideoStreamFramer,同时也与H264VideoStreamFramer还有ByteStreamFileSource手牵着脚,脚牵着手形成了链结构.文件数据从ByteStreamFileSource读入,经H264VideoStreamFramer处理传给H264FUAFragmenter.ByteStreamFileSource返回给H264VideoStreamFramer一段数据,H264VideoStreamFramer返回一个H264FUAFragmenter一个Nal unit .
H.264 视频 RTP 负载格式
1. 网络抽象层单元类型 (NALU)
NALU 头由一个字节组成, 它的语法如下:
|F|NRI| Type |
F: 1 个比特.
forbidden_zero_bit. 在 H.264 规范中规定了这一位必须为 0.
NRI: 2 个比特.
nal_ref_idc. 取 00 ~ 11, 似乎指示这个 NALU 的重要性, 如 00 的 NALU 解码器可以丢弃它而不影响图像的回放. 不过一般情况下不太关心
Type: 5 个比特.
nal_unit_type. 这个 NALU 单元的类型. 简述如下:
0 没有定义
1-23 NAL单元 单个 NAL 单元包.
24 STAP-A 单一时间的组合包
25 STAP-B 单一时间的组合包
26 MTAP16 多个时间的组合包
27 MTAP24 多个时间的组合包
28 FU-A 分片的单元
29 FU-B 分片的单元
30-31 没有定义
2. 打包模式
下面是 RFC 3550 中规定的 RTP 头的结构.
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|V=2|P|X| CC |M| PT | sequence number |
| timestamp |
| synchronization source (SSRC) identifier |
| contributing source (CSRC) identifiers |
| .... |
负载类型 Payload type (PT): 7 bits
序列号 Sequence number (SN): 16 bits
时间戳 Timestamp: 32 bits
H.264 Payload 格式定义了三种不同的基本的负载(Payload)结构. 接收端可能通过 RTP Payload
的第一个字节来识别它们. 这一个字节类似 NALU 头的格式, 而这个头结构的 NAL 单元类型字段
这个字节的结构如下, 可以看出它和 H.264 的 NALU 头结构是一样的.
|F|NRI| Type |
字段 Type: 这个 RTP payload 中 NAL 单元的类型. 这个字段和 H.264 中类型字段的区别是, 当 type
的值为 24 ~ 31 表示这是一个特别格式的 NAL 单元, 而 H.264 中, 只取 1~23 是有效的值.
24 STAP-A 单一时间的组合包
25 STAP-B 单一时间的组合包
26 MTAP16 多个时间的组合包
27 MTAP24 多个时间的组合包
28 FU-A 分片的单元
29 FU-B 分片的单元
30-31 没有定义
1. 单一 NAL 单元模式
即一个 RTP 包仅由一个完整的 NALU 组成. 这种情况下 RTP NAL 头类型字段和原始的 H.264的
NALU 头类型字段是一样的.
2. 组合封包模式
即可能是由多个 NAL 单元组成一个 RTP 包. 分别有4种组合方式: STAP-A, STAP-B, MTAP16, MTAP24.
那么这里的类型值分别是 24, 25, 26 以及 27.
3. 分片封包模式
用于把一个 NALU 单元封装成多个 RTP 包. 存在两种类型 FU-A 和 FU-B. 类型值分别是 28 和 29.
2.1 单一 NAL 单元模式
对于 NALU 的长度小于 MTU 大小的包, 一般采用单一 NAL 单元模式.
对于一个原始的 H.264 NALU 单元常由 [Start Code] [NALU Header] [NALU Payload] 三部分组成, 其中 Start Code 用于标示这是一个
NALU 单元的开始, 必须是 "00 00 00 01" 或 "00 00 01", NALU 头仅一个字节, 其后都是 NALU 单元内容.
打包时去除 "00 00 01" 或 "00 00 00 01" 的开始码, 把其他数据封包的 RTP 包即可.
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|F|NRI| type | |
+-+-+-+-+-+-+-+-+ |
| |
| Bytes 2..n of a Single NAL unit |
| |
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| :...OPTIONAL RTP padding |
如有一个 H.264 的 NALU 是这样的:
[00 00 00 01 67 42 A0 1E 23 56 0E 2F ... ]
这是一个序列参数集 NAL 单元. [00 00 00 01] 是四个字节的开始码, 67 是 NALU 头, 42 开始的数据是 NALU 内容.
封装成 RTP 包将如下:
[ RTP Header ] [ 67 42 A0 1E 23 56 0E 2F ]
即只要去掉 4 个字节的开始码就可以了.
2.2 组合封包模式
其次, 当 NALU 的长度特别小时, 可以把几个 NALU 单元封在一个 RTP 包中.
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
| RTP Header |
| NALU 1 Data |
: :
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| | NALU 2 Size | NALU 2 HDR |
| NALU 2 Data |
: :
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| :...OPTIONAL RTP padding |
2.3 Fragmentation Units (FUs).
而当 NALU 的长度超过 MTU 时, 就必须对 NALU 单元进行分片封包. 也称为 Fragmentation Units (FUs).
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
| FU indicator | FU header | |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| |
| FU payload |
| |
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| :...OPTIONAL RTP padding |
Figure 14. RTP payload format for FU-A
The FU indicator octet has the following format:
|F|NRI| Type |
The FU header has the following format:
|S|E|R| Type |
十二 h264 rtp包的时间戳
void H264VideoRTPSink::doSpecialFrameHandling(unsigned /*fragmentationOffset*/, unsigned char* /*frameStart*/, unsigned /*numBytesInFrame*/, struct timeval framePresentationTime, unsigned /*numRemainingBytes*/) { // Set the RTP 'M' (marker) bit iff // 1/ The most recently delivered fragment was the end of (or the only fragment of) an NAL unit, and // 2/ This NAL unit was the last NAL unit of an 'access unit' (i.e. video frame). if (fOurFragmenter != NULL) { H264VideoStreamFramer* framerSource = (H264VideoStreamFramer*) (fOurFragmenter->inputSource()); // This relies on our fragmenter's source being a "H264VideoStreamFramer". if (fOurFragmenter->lastFragmentCompletedNALUnit() && framerSource != NULL && framerSource->pictureEndMarker()) { setMarkerBit(); framerSource->pictureEndMarker() = False; } } setTimestamp(framePresentationTime); }
函数中先检测是否是一个帧的最后一个包,如果是,打上'M'标记.然后就设置时间戳.这个间戳是哪来的呢?需看函数doSpecialFrameHandling()是被谁调用的,经查找,是被MultiFramedRTPSink::afterGettingFrame1()调用的.MultiFramedRTPSink::afterGettingFrame1()的参数presentationTime传给了doSpecialFrameHandling().MultiFramedRTPSink::afterGettingFrame1()是在调用source的getNextFrame()时传给了source.传给哪个source呢?传给了H264FUAFragmenter,还记得暗渡陈仓那件事吗?所以H264FUAFragmenter在获取一个nal unit后调用了MultiFramedRTPSink::afterGettingFrame1().也就是H264FUAFragmenter::afterGettingFrame1()调用了MultiFramedRTPSink::afterGettingFrame1().
H264VideoStreamFramer的afterGettingFrame1()是没有的,代替之的是MPEGVideoStreamFramer::continueReadProcessin().它被MPEGVideoStreamParser暗中传给了StreamParser的构造函数.所以StreamParser在分析完一帧(或nal unit)之后,调用的就是MPEGVideoStreamFramer::continueReadProcessin().以下即是证明:(补充:以下函数并不是在parser分析完一帧(或nal unit)之后调用,而是parser利用ByteStreamFileSuorce获取到原始数据后调用,然后MPEGVideoStreamFramer再调用Parser的parser()函数分析原始数据)
void StreamParser::afterGettingBytes(void* clientData, unsigned numBytesRead, unsigned /*numTruncatedBytes*/, struct timeval presentationTime, unsigned /*durationInMicroseconds*/) { StreamParser* parser = (StreamParser*) clientData; if (parser != NULL) parser->afterGettingBytes1(numBytesRead, presentationTime); } void StreamParser::afterGettingBytes1(unsigned numBytesRead, struct timeval presentationTime) { // Sanity check: Make sure we didn't get too many bytes for our bank: if (fTotNumValidBytes + numBytesRead > BANK_SIZE) { fInputSource->envir() << "StreamParser::afterGettingBytes() warning: read " << numBytesRead << " bytes; expected no more than " << BANK_SIZE - fTotNumValidBytes << "\n"; } fLastSeenPresentationTime = presentationTime; unsigned char* ptr = &curBank()[fTotNumValidBytes]; fTotNumValidBytes += numBytesRead; // Continue our original calling source where it left off: restoreSavedParserState(); // Sigh... this is a crock; things would have been a lot simpler // here if we were using threads, with synchronous I/O... fClientContinueFunc(fClientContinueClientData, ptr, numBytesRead, presentationTime); }
void MPEGVideoStreamFramer::continueReadProcessing(void* clientData, unsigned char* /*ptr*/, unsigned /*size*/, struct timeval /*presentationTime*/) { MPEGVideoStreamFramer* framer = (MPEGVideoStreamFramer*) clientData; framer->continueReadProcessing(); }
unsigned H264VideoStreamParser::parse() { try { // The stream must start with a 0x00000001: if (!fHaveSeenFirstStartCode) { // Skip over any input bytes that precede the first 0x00000001: u_int32_t first4Bytes; while ((first4Bytes = test4Bytes()) != 0x00000001) { get1Byte(); setParseState(); // ensures that we progress over bad data } skipBytes(4); // skip this initial code setParseState(); fHaveSeenFirstStartCode = True; // from now on } if (fOutputStartCodeSize > 0) { // Include a start code in the output: save4Bytes(0x00000001); } // Then save everything up until the next 0x00000001 (4 bytes) or 0x000001 (3 bytes), or we hit EOF. // Also make note of the first byte, because it contains the "nal_unit_type": u_int8_t firstByte; if (haveSeenEOF()) { // We hit EOF the last time that we tried to parse this data, // so we know that the remaining unparsed data forms a complete NAL unit: unsigned remainingDataSize = totNumValidBytes() - curOffset(); if (remainingDataSize == 0) (void) get1Byte(); // forces another read, which will cause EOF to get handled for real this time if (remainingDataSize == 0) return 0; firstByte = get1Byte(); saveByte(firstByte); while (--remainingDataSize > 0) { saveByte(get1Byte()); } } else { u_int32_t next4Bytes = test4Bytes(); firstByte = next4Bytes >> 24; while (next4Bytes != 0x00000001 && (next4Bytes & 0xFFFFFF00) != 0x00000100) { // We save at least some of "next4Bytes". if ((unsigned) (next4Bytes & 0xFF) > 1) { // Common case: 0x00000001 or 0x000001 definitely doesn't begin anywhere in "next4Bytes", so we save all of it: save4Bytes(next4Bytes); skipBytes(4); } else { // Save the first byte, and continue testing the rest: saveByte(next4Bytes >> 24); skipBytes(1); } next4Bytes = test4Bytes(); } // Assert: next4Bytes starts with 0x00000001 or 0x000001, and we've saved all previous bytes (forming a complete NAL unit). // Skip over these remaining bytes, up until the start of the next NAL unit: if (next4Bytes == 0x00000001) { skipBytes(4); } else { skipBytes(3); } } u_int8_t nal_ref_idc = (firstByte & 0x60) >> 5; u_int8_t nal_unit_type = firstByte & 0x1F; switch (nal_unit_type) { case 6: { // Supplemental enhancement information (SEI) analyze_sei_data(); // Later, perhaps adjust "fPresentationTime" if we saw a "pic_timing" SEI payload??? ##### break; } case 7: { // Sequence parameter set // First, save a copy of this NAL unit, in case the downstream object wants to see it: usingSource()->saveCopyOfSPS(fStartOfFrame + fOutputStartCodeSize, fTo - fStartOfFrame - fOutputStartCodeSize); // Parse this NAL unit to check whether frame rate information is present: unsigned num_units_in_tick, time_scale, fixed_frame_rate_flag; analyze_seq_parameter_set_data(num_units_in_tick, time_scale, fixed_frame_rate_flag); if (time_scale > 0 && num_units_in_tick > 0) { usingSource()->fFrameRate = time_scale / (2.0 * num_units_in_tick); } else { } break; } case 8: { // Picture parameter set // Save a copy of this NAL unit, in case the downstream object wants to see it: usingSource()->saveCopyOfPPS(fStartOfFrame + fOutputStartCodeSize, fTo - fStartOfFrame - fOutputStartCodeSize); } } //更新时间戳变量 usingSource()->setPresentationTime(); // If this NAL unit is a VCL NAL unit, we also scan the start of the next NAL unit, to determine whether this NAL unit // ends the current 'access unit'. We need this information to figure out when to increment "fPresentationTime". // (RTP streamers also need to know this in order to figure out whether or not to set the "M" bit.) Boolean thisNALUnitEndsAccessUnit = False; // until we learn otherwise if (haveSeenEOF()) { // There is no next NAL unit, so we assume that this one ends the current 'access unit': thisNALUnitEndsAccessUnit = True; } else { Boolean const isVCL = nal_unit_type <= 5 && nal_unit_type > 0; // Would need to include type 20 for SVC and MVC ##### if (isVCL) { u_int32_t first4BytesOfNextNALUnit = test4Bytes(); u_int8_t firstByteOfNextNALUnit = first4BytesOfNextNALUnit >> 24; u_int8_t next_nal_ref_idc = (firstByteOfNextNALUnit & 0x60) >> 5; u_int8_t next_nal_unit_type = firstByteOfNextNALUnit & 0x1F; if (next_nal_unit_type >= 6) { // The next NAL unit is not a VCL; therefore, we assume that this NAL unit ends the current 'access unit': thisNALUnitEndsAccessUnit = True; } else { // The next NAL unit is also a VLC. We need to examine it a little to figure out if it's a different 'access unit'. // (We use many of the criteria described in section of the H.264 specification.) Boolean IdrPicFlag = nal_unit_type == 5; Boolean next_IdrPicFlag = next_nal_unit_type == 5; if (next_IdrPicFlag != IdrPicFlag) { // IdrPicFlag differs in value thisNALUnitEndsAccessUnit = True; } else if (next_nal_ref_idc != nal_ref_idc && next_nal_ref_idc * nal_ref_idc == 0) { // nal_ref_idc differs in value with one of the nal_ref_idc values being equal to 0 thisNALUnitEndsAccessUnit = True; } else if ((nal_unit_type == 1 || nal_unit_type == 2 || nal_unit_type == 5) && (next_nal_unit_type == 1 || next_nal_unit_type == 2 || next_nal_unit_type == 5)) { // Both this and the next NAL units begin with a "slice_header". // Parse this (for each), to get parameters that we can compare: // Current NAL unit's "slice_header": unsigned frame_num, pic_parameter_set_id, idr_pic_id; Boolean field_pic_flag, bottom_field_flag; analyze_slice_header( fStartOfFrame + fOutputStartCodeSize, fTo, nal_unit_type, frame_num, pic_parameter_set_id, idr_pic_id, field_pic_flag, bottom_field_flag); // Next NAL unit's "slice_header": u_int8_t next_slice_header[NUM_NEXT_SLICE_HEADER_BYTES_TO_ANALYZE]; testBytes(next_slice_header, sizeof next_slice_header); unsigned next_frame_num, next_pic_parameter_set_id, next_idr_pic_id; Boolean next_field_pic_flag, next_bottom_field_flag; analyze_slice_header(next_slice_header, &next_slice_header[sizeof next_slice_header], next_nal_unit_type, next_frame_num, next_pic_parameter_set_id, next_idr_pic_id, next_field_pic_flag, next_bottom_field_flag); if (next_frame_num != frame_num) { // frame_num differs in value thisNALUnitEndsAccessUnit = True; } else if (next_pic_parameter_set_id != pic_parameter_set_id) { // pic_parameter_set_id differs in value thisNALUnitEndsAccessUnit = True; } else if (next_field_pic_flag != field_pic_flag) { // field_pic_flag differs in value thisNALUnitEndsAccessUnit = True; } else if (next_bottom_field_flag != bottom_field_flag) { // bottom_field_flag differs in value thisNALUnitEndsAccessUnit = True; } else if (next_IdrPicFlag == 1 && next_idr_pic_id != idr_pic_id) { // IdrPicFlag is equal to 1 for both and idr_pic_id differs in value // Note: We already know that IdrPicFlag is the same for both. thisNALUnitEndsAccessUnit = True; } } } } } //注意!注意!注意!此处计算时间戳!! if (thisNALUnitEndsAccessUnit) { usingSource()->fPictureEndMarker = True; ++usingSource()->fPictureCount; // Note that the presentation time for the next NAL unit will be different: struct timeval& nextPT = usingSource()->fNextPresentationTime; // alias nextPT = usingSource()->fPresentationTime; double nextFraction = nextPT.tv_usec / 1000000.0 + 1 / usingSource()->fFrameRate; unsigned nextSecsIncrement = (long) nextFraction; nextPT.tv_sec += (long) nextSecsIncrement; nextPT.tv_usec = (long) ((nextFraction - nextSecsIncrement) * 1000000); } setParseState(); return curFrameSize(); } catch (int /*e*/) { return 0; // the parsing got interrupted } }
u_int32_t RTPSink::convertToRTPTimestamp(struct timeval tv) { // Begin by converting from "struct timeval" units to RTP timestamp units: u_int32_t timestampIncrement = (fTimestampFrequency * tv.tv_sec); timestampIncrement += (u_int32_t)( (2.0 * fTimestampFrequency * tv.tv_usec + 1000000.0) / 2000000); // note: rounding // Then add this to our 'timestamp base': if (fNextTimestampHasBeenPreset) { // Make the returned timestamp the same as the current "fTimestampBase", // so that timestamps begin with the value that was previously preset: fTimestampBase -= timestampIncrement; fNextTimestampHasBeenPreset = False; } u_int32_t const rtpTimestamp = fTimestampBase + timestampIncrement; return rtpTimestamp; }
大家应该已理解了GroupSocket这个类。理论上讲那些需要操作udp socket 的类应保存GroupSocket的实例。但事实并不是这样,可以看一下RTPSink,RTPSource,RTCPInstance等,它们都没有保存GroupSocket型的变量。那它们通过哪个类进行socket操作呢?是RTPInterface!!
这些类接收的GroupSocket指针最后都传给了 RTPInterface 。为什么用RTPInterface而不直接用GroupSocket呢?这里面有个故事...扯远了。
首先请问,Live555即支持rtp over udp,又支持rtp over tcp。那么在rtp over tcp情况下,用 GroupSocket 怎么实现呢?GroupSocket可是仅仅代表UDP啊!
Groupsock* fGS;
tcpStreamRecord* fTCPStreams; // optional, for RTP-over-TCP streaming/receiving
嘿嘿,这两个紧靠着,说明它们关系不一般啊(难道他们有一腿?)。fGS--代表了一个udp socket和它对应的多个目的端,fTCPStreams--代表了多个TCP socket,当然这些socket都是从一个socket accept()出来的客户端socket(tcpStreamRecord是一个链表哦)。
Boolean RTPInterface::sendPacket(unsigned char* packet, unsigned packetSize) { Boolean success = True; // we'll return False instead if any of the sends fail // Normal case: Send as a UDP packet: if (!fGS->output(envir(), fGS->ttl(), packet, packetSize)) success = False; // Also, send over each of our TCP sockets: for (tcpStreamRecord* streams = fTCPStreams; streams != NULL; streams = streams->fNext) { if (!sendRTPOverTCP(packet, packetSize, streams->fStreamSocketNum, streams->fStreamChannelId)) { success = False; } } return success; }
void RTPInterface::setStreamSocket(int sockNum, unsigned char streamChannelId) { fGS->removeAllDestinations(); addStreamSocket(sockNum, streamChannelId); } void RTPInterface::addStreamSocket(int sockNum, unsigned char streamChannelId) { if (sockNum < 0) return; for (tcpStreamRecord* streams = fTCPStreams; streams != NULL; streams = streams->fNext) { if (streams->fStreamSocketNum == sockNum && streams->fStreamChannelId == streamChannelId) { return; // we already have it } } fTCPStreams = new tcpStreamRecord(sockNum, streamChannelId, fTCPStreams); }
数据呗,streamChannelId是什么呢?我们不防再猜测一下(很奇怪,我每次都能猜对,嘿嘿...):rtp over tcp时,这个tcp连接是直接利用了RTSP所用的那个tcp连接,如果同时有很多rtp
session,再加上rtsp session,大家都用这一个socket通信,怎么区分你的还是我的?我想这个channel
id就是用于解决这个问题。给每个session分配一个唯一的id,在发送自己的包时为包再加上个头部,头部中需要有session的标记--也就是这个channel id,包的长度等等字段。这样大家就可以穿一条裤子了,术语叫多路复用,但要注意只有tcp才进行多路复用,udp是不用的,因为udp是一个session对应一个socket(加上RTCP是两个)。
想像一下,服务端要从这个tcp socket读写数据,必须把一个handler加入TaskScheduler中,这个handler在可读数据时进行读,在可写数据时进行写。在读数据时,对读出的数据进行分析,取得数据包的长度,以及其channel id,跟据channel id找到相应的处handler和对象,交给它们去处理自己的数据。
试想两个建立在tcp上的rtp session,这个两个tcp socket既担负着rtsp通讯,又担负着rtp通讯。如果这两个rtp session共用一个stream,那么最终负责这两个session通信的就只有一个RTPInterface,那么这个RTPInterface中的fTCPStreams这个链表中就会有两项,分别对应这两个session。tcpStreamRecord主要用于socket number与channel id的对应。这些tcpStreamRecord是通过addStreamSocket()添加的。处理数据的handler是通过startNetworkReading()添加的,看一下下:
void RTPInterface::startNetworkReading(TaskScheduler::BackgroundHandlerProc* handlerProc) { // Normal case: Arrange to read UDP packets: envir().taskScheduler().turnOnBackgroundReadHandling(fGS->socketNum(),handlerProc, fOwner); // Also, receive RTP over TCP, on each of our TCP connections: fReadHandlerProc = handlerProc; for (tcpStreamRecord* streams = fTCPStreams; streams != NULL; streams = streams->fNext) { // Get a socket descriptor for "streams->fStreamSocketNum": SocketDescriptor* socketDescriptor = lookupSocketDescriptor(envir(), streams->fStreamSocketNum); // Tell it about our subChannel: socketDescriptor->registerRTPInterface(streams->fStreamChannelId, this); } }
用UDP时很简单,直接把处理函数做为handler加入taskScheduler即可。而TCP时,需向所有的session的socket都注册自己。可以想像,socketDescriptor代表一个tcp socket,并且它有一个链表之类的东西,其中保存了所有的对这个socket感兴趣的RTPInterface,同时也记录了RTPInterface对应的channal id。只有向socketDescriptor注册了自己,socketDescriptor在读取数据时,才能跟据分析出的channel id找到对应的RTPInterface,才能调用RTPInterface中的数据处理handler,当然,这个函数也不是RTPInteface自己的,而是从startNetworkReading()这个函数接收到的调用者的。
上述主要讲的是一个RTPInterface对应多个客户端tcp socket的情形。现在又发现一个问题:SocketDescriptor为什么需要对应多个RTPInterface呢?上面已经讲了,是为了多路复用,因为这个socket即负担rtsp通信又负担rtp通信还负担RTCP通信。SocketDescriptor记录多路复用数据(也就是RTPInterface与channel id)用了一个Hash table:HashTable* fSubChannelHashTable。SocketDescriptor读数据使用函数:static void tcpReadHandler(SocketDescriptor*, int mask)。证据如下:
void SocketDescriptor::registerRTPInterface( unsigned char streamChannelId, RTPInterface* rtpInterface) { Boolean isFirstRegistration = fSubChannelHashTable->IsEmpty(); fSubChannelHashTable->Add((char const*) (long) streamChannelId, rtpInterface); if (isFirstRegistration) { // Arrange to handle reads on this TCP socket: TaskScheduler::BackgroundHandlerProc* handler = (TaskScheduler::BackgroundHandlerProc*) &tcpReadHandler; fEnv.taskScheduler().turnOnBackgroundReadHandling(fOurSocketNum, handler, this); } }
可见在注册第一个多路复用对象时启动reand handler。看一下函数主体:
void SocketDescriptor::tcpReadHandler1(int mask) { // We expect the following data over the TCP channel: // optional RTSP command or response bytes (before the first '$' character) // a '$' character // a 1-byte channel id // a 2-byte packet size (in network byte order) // the packet data. // However, because the socket is being read asynchronously, this data might arrive in pieces. u_int8_t c; struct sockaddr_in fromAddress; if (fTCPReadingState != AWAITING_PACKET_DATA) { int result = readSocket(fEnv, fOurSocketNum, &c, 1, fromAddress); if (result != 1) { // error reading TCP socket, or no more data available if (result < 0) { // error fEnv.taskScheduler().turnOffBackgroundReadHandling( fOurSocketNum); // stops further calls to us } return; } } switch (fTCPReadingState) { case AWAITING_DOLLAR: { if (c == '$') { fTCPReadingState = AWAITING_STREAM_CHANNEL_ID; } else { // This character is part of a RTSP request or command, which is handled separately: if (fServerRequestAlternativeByteHandler != NULL) { (*fServerRequestAlternativeByteHandler)( fServerRequestAlternativeByteHandlerClientData, c); } } break; } case AWAITING_STREAM_CHANNEL_ID: { // The byte that we read is the stream channel id. if (lookupRTPInterface(c) != NULL) { // sanity check fStreamChannelId = c; fTCPReadingState = AWAITING_SIZE1; } else { // This wasn't a stream channel id that we expected. We're (somehow) in a strange state. Try to recover: fTCPReadingState = AWAITING_DOLLAR; } break; } case AWAITING_SIZE1: { // The byte that we read is the first (high) byte of the 16-bit RTP or RTCP packet 'size'. fSizeByte1 = c; fTCPReadingState = AWAITING_SIZE2; break; } case AWAITING_SIZE2: { // The byte that we read is the second (low) byte of the 16-bit RTP or RTCP packet 'size'. unsigned short size = (fSizeByte1 << 8) | c; // Record the information about the packet data that will be read next: RTPInterface* rtpInterface = lookupRTPInterface(fStreamChannelId); if (rtpInterface != NULL) { rtpInterface->fNextTCPReadSize = size; rtpInterface->fNextTCPReadStreamSocketNum = fOurSocketNum; rtpInterface->fNextTCPReadStreamChannelId = fStreamChannelId; } fTCPReadingState = AWAITING_PACKET_DATA; break; } case AWAITING_PACKET_DATA: { // Call the appropriate read handler to get the packet data from the TCP stream: RTPInterface* rtpInterface = lookupRTPInterface(fStreamChannelId); if (rtpInterface != NULL) { if (rtpInterface->fNextTCPReadSize == 0) { // We've already read all the data for this packet. fTCPReadingState = AWAITING_DOLLAR; break; } if (rtpInterface->fReadHandlerProc != NULL) { rtpInterface->fReadHandlerProc(rtpInterface->fOwner, mask); } } return; } } }
case AWAITING_DOLLAR: { if (c == $) { fTCPReadingState = AWAITING_STREAM_CHANNEL_ID; } else { // This character is part of a RTSP request or command, which is handled separately: if (fServerRequestAlternativeByteHandler != NULL) { (*fServerRequestAlternativeByteHandler)( fServerRequestAlternativeByteHandlerClientData, c); } } break; }
啊!原来ServerRequestAlternativeByteHandler是用于处理RTSP数据的。也就是从这个socket收到RTSP数据时,调用ServerRequestAlternativeByteHandler。如果收到RTP/RTCP数据时,先查看其channel id,跟据id找到RTPInterface(RTCP也是用了RTPIterface进行通信),设置RTPInterface中与读缓冲有关的变量,然后当读到包数据的开始位置时,调用rtpInterface中保存的数据处理handler。还记得吧,rtpInterface中的这个数据处理handler在UDP时也被使用,在这个函数中要做的是读取一个包的数据,然后处理这个包。而SocketDescriptor把读取位置置于包数据开始的位置再交给数据处理handler,正好可以使用与UDP相同的数据处理handler!
还有,socketDescriptor们并不属于任何RTPInterface,而是单独保存在一个Hash table中,这样多个RTPInterface都可以注册到一个socketDescriptor中,以实现多路复用。
总结一下通过RTPInterface,live555不仅实现了rtp over udp,还实现了rtp over tcp,而且还实现了同时即有rtp over tcp,又有rtp over udp!
最后,channel id是从哪里来的呢?是在RTSP请求中指定的。在哪个请求中呢?自己找去吧。
大家应该已理解了GroupSocket这个类。理论上讲那些需要操作udp socket 的类应保存GroupSocket的实例。但事实并不是这样,可以看一下RTPSink,RTPSource,RTCPInstance等,它们都没有保存GroupSocket型的变量。那它们通过哪个类进行socket操作呢?是RTPInterface!!
这些类接收的GroupSocket指针最后都传给了 RTPInterface 。为什么用RTPInterface而不直接用GroupSocket呢?这里面有个故事...扯远了。
首先请问,Live555即支持rtp over udp,又支持rtp over tcp。那么在rtp over tcp情况下,用 GroupSocket 怎么实现呢?GroupSocket可是仅仅代表UDP啊!
Groupsock* fGS;
tcpStreamRecord* fTCPStreams; // optional, for RTP-over-TCP streaming/receiving
嘿嘿,这两个紧靠着,说明它们关系不一般啊(难道他们有一腿?)。fGS--代表了一个udp socket和它对应的多个目的端,fTCPStreams--代表了多个TCP socket,当然这些socket都是从一个socket accept()出来的客户端socket(tcpStreamRecord是一个链表哦)。
Boolean RTPInterface::sendPacket(unsigned char* packet, unsigned packetSize) { Boolean success = True; // we'll return False instead if any of the sends fail // Normal case: Send as a UDP packet: if (!fGS->output(envir(), fGS->ttl(), packet, packetSize)) success = False; // Also, send over each of our TCP sockets: for (tcpStreamRecord* streams = fTCPStreams; streams != NULL; streams = streams->fNext) { if (!sendRTPOverTCP(packet, packetSize, streams->fStreamSocketNum, streams->fStreamChannelId)) { success = False; } } return success; }
void RTPInterface::setStreamSocket(int sockNum, unsigned char streamChannelId) { fGS->removeAllDestinations(); addStreamSocket(sockNum, streamChannelId); } void RTPInterface::addStreamSocket(int sockNum, unsigned char streamChannelId) { if (sockNum < 0) return; for (tcpStreamRecord* streams = fTCPStreams; streams != NULL; streams = streams->fNext) { if (streams->fStreamSocketNum == sockNum && streams->fStreamChannelId == streamChannelId) { return; // we already have it } } fTCPStreams = new tcpStreamRecord(sockNum, streamChannelId, fTCPStreams); }
数据呗,streamChannelId是什么呢?我们不防再猜测一下(很奇怪,我每次都能猜对,嘿嘿...):rtp over tcp时,这个tcp连接是直接利用了RTSP所用的那个tcp连接,如果同时有很多rtp
session,再加上rtsp session,大家都用这一个socket通信,怎么区分你的还是我的?我想这个channel
id就是用于解决这个问题。给每个session分配一个唯一的id,在发送自己的包时为包再加上个头部,头部中需要有session的标记--也就是这个channel id,包的长度等等字段。这样大家就可以穿一条裤子了,术语叫多路复用,但要注意只有tcp才进行多路复用,udp是不用的,因为udp是一个session对应一个socket(加上RTCP是两个)。
想像一下,服务端要从这个tcp socket读写数据,必须把一个handler加入TaskScheduler中,这个handler在可读数据时进行读,在可写数据时进行写。在读数据时,对读出的数据进行分析,取得数据包的长度,以及其channel id,跟据channel id找到相应的处handler和对象,交给它们去处理自己的数据。
试想两个建立在tcp上的rtp session,这个两个tcp socket既担负着rtsp通讯,又担负着rtp通讯。如果这两个rtp session共用一个stream,那么最终负责这两个session通信的就只有一个RTPInterface,那么这个RTPInterface中的fTCPStreams这个链表中就会有两项,分别对应这两个session。tcpStreamRecord主要用于socket number与channel id的对应。这些tcpStreamRecord是通过addStreamSocket()添加的。处理数据的handler是通过startNetworkReading()添加的,看一下下:
void RTPInterface::startNetworkReading(TaskScheduler::BackgroundHandlerProc* handlerProc) { // Normal case: Arrange to read UDP packets: envir().taskScheduler().turnOnBackgroundReadHandling(fGS->socketNum(),handlerProc, fOwner); // Also, receive RTP over TCP, on each of our TCP connections: fReadHandlerProc = handlerProc; for (tcpStreamRecord* streams = fTCPStreams; streams != NULL; streams = streams->fNext) { // Get a socket descriptor for "streams->fStreamSocketNum": SocketDescriptor* socketDescriptor = lookupSocketDescriptor(envir(), streams->fStreamSocketNum); // Tell it about our subChannel: socketDescriptor->registerRTPInterface(streams->fStreamChannelId, this); } }
用UDP时很简单,直接把处理函数做为handler加入taskScheduler即可。而TCP时,需向所有的session的socket都注册自己。可以想像,socketDescriptor代表一个tcp socket,并且它有一个链表之类的东西,其中保存了所有的对这个socket感兴趣的RTPInterface,同时也记录了RTPInterface对应的channal id。只有向socketDescriptor注册了自己,socketDescriptor在读取数据时,才能跟据分析出的channel id找到对应的RTPInterface,才能调用RTPInterface中的数据处理handler,当然,这个函数也不是RTPInteface自己的,而是从startNetworkReading()这个函数接收到的调用者的。
上述主要讲的是一个RTPInterface对应多个客户端tcp socket的情形。现在又发现一个问题:SocketDescriptor为什么需要对应多个RTPInterface呢?上面已经讲了,是为了多路复用,因为这个socket即负担rtsp通信又负担rtp通信还负担RTCP通信。SocketDescriptor记录多路复用数据(也就是RTPInterface与channel id)用了一个Hash table:HashTable* fSubChannelHashTable。SocketDescriptor读数据使用函数:static void tcpReadHandler(SocketDescriptor*, int mask)。证据如下:
void SocketDescriptor::registerRTPInterface( unsigned char streamChannelId, RTPInterface* rtpInterface) { Boolean isFirstRegistration = fSubChannelHashTable->IsEmpty(); fSubChannelHashTable->Add((char const*) (long) streamChannelId, rtpInterface); if (isFirstRegistration) { // Arrange to handle reads on this TCP socket: TaskScheduler::BackgroundHandlerProc* handler = (TaskScheduler::BackgroundHandlerProc*) &tcpReadHandler; fEnv.taskScheduler().turnOnBackgroundReadHandling(fOurSocketNum, handler, this); } }
可见在注册第一个多路复用对象时启动reand handler。看一下函数主体:
void SocketDescriptor::tcpReadHandler1(int mask) { // We expect the following data over the TCP channel: // optional RTSP command or response bytes (before the first '$' character) // a '$' character // a 1-byte channel id // a 2-byte packet size (in network byte order) // the packet data. // However, because the socket is being read asynchronously, this data might arrive in pieces. u_int8_t c; struct sockaddr_in fromAddress; if (fTCPReadingState != AWAITING_PACKET_DATA) { int result = readSocket(fEnv, fOurSocketNum, &c, 1, fromAddress); if (result != 1) { // error reading TCP socket, or no more data available if (result < 0) { // error fEnv.taskScheduler().turnOffBackgroundReadHandling( fOurSocketNum); // stops further calls to us } return; } } switch (fTCPReadingState) { case AWAITING_DOLLAR: { if (c == '$') { fTCPReadingState = AWAITING_STREAM_CHANNEL_ID; } else { // This character is part of a RTSP request or command, which is handled separately: if (fServerRequestAlternativeByteHandler != NULL) { (*fServerRequestAlternativeByteHandler)( fServerRequestAlternativeByteHandlerClientData, c); } } break; } case AWAITING_STREAM_CHANNEL_ID: { // The byte that we read is the stream channel id. if (lookupRTPInterface(c) != NULL) { // sanity check fStreamChannelId = c; fTCPReadingState = AWAITING_SIZE1; } else { // This wasn't a stream channel id that we expected. We're (somehow) in a strange state. Try to recover: fTCPReadingState = AWAITING_DOLLAR; } break; } case AWAITING_SIZE1: { // The byte that we read is the first (high) byte of the 16-bit RTP or RTCP packet 'size'. fSizeByte1 = c; fTCPReadingState = AWAITING_SIZE2; break; } case AWAITING_SIZE2: { // The byte that we read is the second (low) byte of the 16-bit RTP or RTCP packet 'size'. unsigned short size = (fSizeByte1 << 8) | c; // Record the information about the packet data that will be read next: RTPInterface* rtpInterface = lookupRTPInterface(fStreamChannelId); if (rtpInterface != NULL) { rtpInterface->fNextTCPReadSize = size; rtpInterface->fNextTCPReadStreamSocketNum = fOurSocketNum; rtpInterface->fNextTCPReadStreamChannelId = fStreamChannelId; } fTCPReadingState = AWAITING_PACKET_DATA; break; } case AWAITING_PACKET_DATA: { // Call the appropriate read handler to get the packet data from the TCP stream: RTPInterface* rtpInterface = lookupRTPInterface(fStreamChannelId); if (rtpInterface != NULL) { if (rtpInterface->fNextTCPReadSize == 0) { // We've already read all the data for this packet. fTCPReadingState = AWAITING_DOLLAR; break; } if (rtpInterface->fReadHandlerProc != NULL) { rtpInterface->fReadHandlerProc(rtpInterface->fOwner, mask); } } return; } } }
case AWAITING_DOLLAR: { if (c == $) { fTCPReadingState = AWAITING_STREAM_CHANNEL_ID; } else { // This character is part of a RTSP request or command, which is handled separately: if (fServerRequestAlternativeByteHandler != NULL) { (*fServerRequestAlternativeByteHandler)( fServerRequestAlternativeByteHandlerClientData, c); } } break; }
啊!原来ServerRequestAlternativeByteHandler是用于处理RTSP数据的。也就是从这个socket收到RTSP数据时,调用ServerRequestAlternativeByteHandler。如果收到RTP/RTCP数据时,先查看其channel id,跟据id找到RTPInterface(RTCP也是用了RTPIterface进行通信),设置RTPInterface中与读缓冲有关的变量,然后当读到包数据的开始位置时,调用rtpInterface中保存的数据处理handler。还记得吧,rtpInterface中的这个数据处理handler在UDP时也被使用,在这个函数中要做的是读取一个包的数据,然后处理这个包。而SocketDescriptor把读取位置置于包数据开始的位置再交给数据处理handler,正好可以使用与UDP相同的数据处理handler!
还有,socketDescriptor们并不属于任何RTPInterface,而是单独保存在一个Hash table中,这样多个RTPInterface都可以注册到一个socketDescriptor中,以实现多路复用。
总结一下通过RTPInterface,live555不仅实现了rtp over udp,还实现了rtp over tcp,而且还实现了同时即有rtp over tcp,又有rtp over udp!
最后,channel id是从哪里来的呢?是在RTSP请求中指定的。在哪个请求中呢?自己找去吧。
RTCPInstance靠RTPInterface提供网络通讯支持,所以它既支持rtcp over udp,又支持rtcp over tcp.
RTCPInstance接收到的包在函数static void incomingReportHandler(RTCPInstance* instance, int /*mask*/)中处理.
最值得关注的是这个成员函数:void setSpecificRRHandler(netAddressBits fromAddress, Port fromPort,TaskFunc* handlerTask, void* clientData).它的作用是让调用者可以设置回调函数,调用者就可以在收到RR包时做出一定的动作.参数fromAddress和fromPort指明要对哪个客户端的RR包做出响应.
利用这个机制的例子是RTSPServer::RTSPClientSession.它会把自己的RRHandler函数经过层层传递,最终传给RTCPInstance.于是RTSPServer::RTSPClientSession就可以在每次收到对应的客户端的RR包时调用它传入的函数,这个函数是void RTSPServer::RTSPClientSession::noteClientLiveness(RTSPClientSession* clientSession).此函数只是以下函数的过渡:
void RTSPServer::RTSPClientSession::noteLiveness() { #ifdef DEBUG fprintf(stderr, "Liveness indication from client at %s\n", our_inet_ntoa(fClientAddr.sin_addr)); #endif if (fOurServer.fReclamationTestSeconds > 0) { envir().taskScheduler().rescheduleDelayedTask(fLivenessCheckTask, fOurServer.fReclamationTestSeconds * 1000000, (TaskFunc*) livenessTimeoutTask, this); } }
void RTSPServer::RTSPClientSession::livenessTimeoutTask(RTSPClientSession* clientSession) { // If this gets called, the client session is assumed to have timed out, // so delete it: #ifdef DEBUG fprintf(stderr, "RTSP client session from %s has timed out (due to inactivity)\n", our_inet_ntoa(clientSession->fClientAddr.sin_addr)); #endif delete clientSession; }
那么RTSPServer::RTSPClientSession就会自杀(真是想不开啊).也就是说fOurServer.fReclamationTestSeconds * 1000000是超时时间(默认好像是60秒).
十六 几个重要对象的生命期
unsigned referenceCount() const { return fReferenceCount; } void incrementReferenceCount() { ++fReferenceCount; } void decrementReferenceCount() { if (fReferenceCount > 0) --fReferenceCount; }
经查找,是在建立新的StreamState时.在函数void RTSPServer::RTSPClientSession::handleCmd_SETUP(char const* cseq,char const* urlPreSuffix, char const* urlSuffix,char const* fullRequestStr)中可以看到.再找一下减少引用的代码:
RTSPServer::RTSPClientSession::~RTSPClientSession() { closeSockets(); if (fSessionCookie != NULL) { // We were being used for RTSP-over-HTTP tunneling. Remove ourselves from the 'session cookie' hash table before we go: fOurServer.fClientSessionsForHTTPTunneling->Remove(fSessionCookie); delete[] fSessionCookie; } reclaimStreamStates(); if (fOurServerMediaSession != NULL) { fOurServerMediaSession->decrementReferenceCount(); if (fOurServerMediaSession->referenceCount() == 0 && fOurServerMediaSession->deleteWhenUnreferenced()) { fOurServer.removeServerMediaSession(fOurServerMediaSession); fOurServerMediaSession = NULL; } } }
if (fOurServerMediaSession->referenceCount() == 0 && fOurServerMediaSession->deleteWhenUnreferenced()) { fOurServer.removeServerMediaSession(fOurServerMediaSession); fOurServerMediaSession = NULL; }