#define VISUAL_OBJECT_SEQUENCE_START_CODE 0x000001B0 #define VISUAL_OBJECT_SEQUENCE_END_CODE 0x000001B1 #define GROUP_VOP_START_CODE 0x000001B3 #define VISUAL_OBJECT_START_CODE 0x000001B5 #define VOP_START_CODE 0x000001B6
void MPEG4VideoStreamDiscreteFramer ::afterGettingFrame1(unsigned frameSize, unsigned numTruncatedBytes, struct timeval presentationTime, unsigned durationInMicroseconds) { // Check that the first 4 bytes are a system code: if (frameSize >= 4 && fTo[0] == 0 && fTo[1] == 0 && fTo[2] == 1) { fPictureEndMarker = True; // Assume that we have a complete 'picture' here unsigned i = 3; // //视觉对象序列,按照完整的MPEG4 Elemental Stream进行解析 // if (fTo[i] == 0xB0) { // VISUAL_OBJECT_SEQUENCE_START_CODE // The next byte is the "profile_and_level_indication": if (frameSize >= 5) fProfileAndLevelIndication = fTo[4]; // The start of this frame - up to the first GROUP_VOP_START_CODE // or VOP_START_CODE - is stream configuration information. Save this: for (i = 7; i < frameSize; ++i) { if ((fTo[i] == 0xB3 /*GROUP_VOP_START_CODE*/ || fTo[i] == 0xB6 /*VOP_START_CODE*/) && fTo[i-1] == 1 && fTo[i-2] == 0 && fTo[i-3] == 0) { break; // The configuration information ends here } } fNumConfigBytes = i < frameSize ? i-3 : frameSize; delete[] fConfigBytes; fConfigBytes = new unsigned char[fNumConfigBytes]; for (unsigned j = 0; j < fNumConfigBytes; ++j) fConfigBytes[j] = fTo[j]; // This information (should) also contain a VOL header, which we need // to analyze, to get "vop_time_increment_resolution" (which we need // - along with "vop_time_increment" - in order to generate accurate // presentation times for "B" frames). analyzeVOLHeader(); } if (i < frameSize) { u_int8_t nextCode = fTo[i]; // //VOP组 // if (nextCode == 0xB3 /*GROUP_VOP_START_CODE*/) { // Skip to the following VOP_START_CODE (if any): for (i += 4; i < frameSize; ++i) { if (fTo[i] == 0xB6 /*VOP_START_CODE*/ && fTo[i-1] == 1 && fTo[i-2] == 0 && fTo[i-3] == 0) { nextCode = fTo[i]; break; } } } // //视觉对象平面 // if (nextCode == 0xB6 /*VOP_START_CODE*/ && i+5 < frameSize) { ++i; // Get the "vop_coding_type" from the next byte: u_int8_t nextByte = fTo[i++]; u_int8_t vop_coding_type = nextByte>>6; //VOP开始符后的2bit,表示帧类型I/P/B/S // Next, get the "modulo_time_base" by counting the '1' bits that // follow. We look at the next 32-bits only. // This should be enough in most cases. u_int32_t next4Bytes = (fTo[i]<<24)|(fTo[i+1]<<16)|(fTo[i+2]<<8)|fTo[i+3]; i += 4; u_int32_t timeInfo = (nextByte<<(32-6))|(next4Bytes>>6); unsigned modulo_time_base = 0; u_int32_t mask = 0x80000000; while ((timeInfo&mask) != 0) { ++modulo_time_base; mask >>= 1; } mask >>= 2; // Then, get the "vop_time_increment". unsigned vop_time_increment = 0; // First, make sure we have enough bits left for this: if ((mask>>(fNumVTIRBits-1)) != 0) { for (unsigned i = 0; i < fNumVTIRBits; ++i) { vop_time_increment |= timeInfo&mask; mask >>= 1; } while (mask != 0) { vop_time_increment >>= 1; mask >>= 1; } } // //若是"B"frame, 需要修正时间时间戳 // // If this is a "B" frame, then we have to tweak "presentationTime": if (vop_coding_type == 2/*B*/ && (fLastNonBFramePresentationTime.tv_usec > 0 || fLastNonBFramePresentationTime.tv_sec > 0)) { int timeIncrement = fLastNonBFrameVop_time_increment - vop_time_increment; if (timeIncrement<0) timeIncrement += vop_time_increment_resolution; unsigned const MILLION = 1000000; double usIncrement = vop_time_increment_resolution == 0 ? 0.0 : ((double)timeIncrement*MILLION)/vop_time_increment_resolution; unsigned secondsToSubtract = (unsigned)(usIncrement/MILLION); unsigned uSecondsToSubtract = ((unsigned)usIncrement)%MILLION; presentationTime = fLastNonBFramePresentationTime; if ((unsigned)presentationTime.tv_usec < uSecondsToSubtract) { presentationTime.tv_usec += MILLION; if (presentationTime.tv_sec > 0) --presentationTime.tv_sec; } presentationTime.tv_usec -= uSecondsToSubtract; if ((unsigned)presentationTime.tv_sec > secondsToSubtract) { presentationTime.tv_sec -= secondsToSubtract; } else { presentationTime.tv_sec = presentationTime.tv_usec = 0; } } else { fLastNonBFramePresentationTime = presentationTime; fLastNonBFrameVop_time_increment = vop_time_increment; } } } } // Complete delivery to the client: fFrameSize = frameSize; fNumTruncatedBytes = numTruncatedBytes; fPresentationTime = presentationTime; fDurationInMicroseconds = durationInMicroseconds; afterGetting(this); }
最后关注一下,MPEG4 ES流时间戳的处理。 在处理MPEG4 的ES流时,使用MPEG4VideoStreamFramer,作为source。使用分析器MPEG4VideoStreamParser,对完整的MPEG4 Elemental Stream进行分析,主要是解析出其中的时间信息。
void MPEGVideoStreamFramer::continueReadProcessing() { unsigned acquiredFrameSize = fParser->parse(); if (acquiredFrameSize > 0) { // We were able to acquire a frame from the input. // It has already been copied to the reader's space. fFrameSize = acquiredFrameSize; fNumTruncatedBytes = fParser->numTruncatedBytes(); // "fPresentationTime" should have already been computed. // //根据帧计数及帧率计算帧的持续时间 // // Compute "fDurationInMicroseconds" now: fDurationInMicroseconds = (fFrameRate == 0.0 || ((int)fPictureCount) < 0) ? 0 : (unsigned)((fPictureCount*1000000)/fFrameRate); fPictureCount = 0; // Call our own 'after getting' function. Because we're not a 'leaf' // source, we can call this directly, without risking infinite recursion. afterGetting(this); } else { // We were unable to parse a complete frame from the input, because: // - we had to read more data from the source stream, or // - the source stream has ended. } }
void MPEG4VideoStreamParser::analyzeVOLHeader() { // //从VOL中解析出时间信息 // // Extract timing information (in particular, // "vop_time_increment_resolution") from the VOL Header: ... do { ... // Use "vop_time_increment_resolution" as the 'frame rate' // (really, 'tick rate'): usingSource()->fFrameRate = (double)vop_time_increment_resolution; //帧率 return; } while (0); ... }