#############################################
但是具体到一个网页的时候,应该怎么去解析呢?
一个网络上的数据流到HTMLTokenizer之间会做了一些什么事情呢?
准备接着进行下面的分析。
还是先来一个堆栈说明问题:
#0 HTMLDocumentParser (this=0x2a2e8798, document=0x2a283a18, reportErrors=false) at external/webkit/Source/WebCore/html/parser/HTMLDocumentParser.cpp:77 #1 0x48f22a3a in WebCore::HTMLDocumentParser::create (this=0x2a283a18) at external/webkit/Source/WebCore/html/parser/HTMLDocumentParser.h:61 #2 WebCore::HTMLDocument::createParser (this=0x2a283a18) at external/webkit/Source/WebCore/html/HTMLDocument.cpp:281 #3 0x48eeb7ec in WebCore::Document::implicitOpen (this=0x2a283a18) at external/webkit/Source/WebCore/dom/Document.cpp:1987 #4 0x48f389e8 in WebCore::DocumentWriter::begin (this=0x2a3b16fc, url=<value optimized out>, dispatch=<value optimized out>, origin=0x0) at external/webkit/Source/WebCore/loader/DocumentWriter.cpp:149 #5 0x48d47f82 in WebCore::FrameLoader::receivedFirstData (this=0x2a146f20) at external/webkit/Source/WebCore/loader/FrameLoader.cpp:609 #6 0x48f3869c in WebCore::DocumentWriter::setEncoding (this=0x2a3b16fc, name=..., userChosen=<value optimized out>) at external/webkit/Source/WebCore/loader/DocumentWriter.cpp:243 #7 0x48f36c30 in WebCore::DocumentLoader::commitData (this=0x2a3b16a8,
#0 HTMLTokenizer (this=0x2a4527b8, usePreHTML5ParserQuirks=false) at external/webkit/Source/WebCore/html/parser/HTMLTokenizer.cpp:106 #1 0x48f27f28 in WebCore::HTMLTokenizer::create (this=0x2a463b08, document=<value optimized out>) at external/webkit/Source/WebCore/html/parser/HTMLTokenizer.h:123 #2 HTMLPreloadScanner (this=0x2a463b08, document=<value optimized out>) at external/webkit/Source/WebCore/html/parser/HTMLPreloadScanner.cpp:155 #3 0x48f26b0e in WebCore::HTMLDocumentParser::pumpTokenizer (this=0x2a2e8798, mode=WebCore::HTMLDocumentParser::AllowYield) at external/webkit/Source/WebCore/html/parser/HTMLDocumentParser.cpp:293 #4 0x48f26d52 in WebCore::HTMLDocumentParser::append (this=0x2a2e8798, source=...) at external/webkit/Source/WebCore/html/parser/HTMLDocumentParser.cpp:367 #5 WebCore::HTMLDocumentParser::append (this=0x2a2e8798, source=...) at external/webkit/Source/WebCore/html/parser/HTMLDocumentParser.cpp:337 #6 0x48fbe2c4 in WebCore::DecodedDataDocumentParser::appendBytes (this=0x2a2e8798, writer=<value optimized out>, data=0x2a413ef0 "b/gp/?tab=wm\"><div class=\"gbzi gbsi\" style=\"background-position:-32px -50px\"></div><span class=gbzn>Gmail</span></a><a onclick=gbar.logger.il(1,{t:25}); id=gb_25 class=\"gbza\" href=\"https://drive.googl"..., length=12398, shouldFlush=false) at external/webkit/Source/WebCore/dom/DecodedDataDocumentParser.cpp:54 #7 0x48f384ea in WebCore::DocumentWriter::addData (this=0x2a3b16fc, str=0x2a413ef0 "b/gp/?tab=wm\"><div class=\"gbzi gbsi\" style=\"background-position:-32px -50px\"></div><span class=gbzn>Gmail</span></a><a onclick=gbar.logger.il(1,{t:25}); id=gb_25 class=\"gbza\" href=\"https://drive.googl"..., len=12398, flush=<value optimized out>) at external/webkit/Source/WebCore/loader/DocumentWriter.cpp:207
void DocumentWriter::addData(const char* str, int len, bool flush) { if (len == -1) len = strlen(str); DocumentParser* parser = m_frame->document()->parser(); if (parser) parser->appendBytes(this, str, len, flush); }
void DecodedDataDocumentParser::appendBytes(DocumentWriter* writer , const char* data, int length, bool shouldFlush) { if (!length && !shouldFlush) return; TextResourceDecoder* decoder = writer->createDecoderIfNeeded(); String decoded = decoder->decode(data, length); if (shouldFlush) decoded += decoder->flush(); if (decoded.isEmpty()) return; writer->reportDataReceived(); append(decoded); }
#0 WebCore::TextResourceDecoder::checkForMetaCharset (this=0x2a9c26b0, data=0x2aa36d68 "<!DOCTYPE html PUBLIC \"-//WAPFORUM//DTD XHTML Mobile 1.0//EN\" \"http://www.wapforum.org/DTD/xhtml-mobile10.dtd\">\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\r\n<head>\r\n<meta http-equiv=\"Content-Type\" co"..., length=2293) at external/webkit/Source/WebCore/loader/TextResourceDecoder.cpp:579 #1 0x48d532ac in WebCore::TextResourceDecoder::checkForHeadCharset (this=0x2a9c26b0, data=0x2aa36d68 "<!DOCTYPE html PUBLIC \"-//WAPFORUM//DTD XHTML Mobile 1.0//EN\" \"http://www.wapforum.org/DTD/xhtml-mobile10.dtd\">\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\r\n<head>\r\n<meta http-equiv=\"Content-Type\" co"..., len=2293, movedDataToBuffer=<value optimized out>) at external/webkit/Source/WebCore/loader/TextResourceDecoder.cpp:575 #2 0x48d535b2 in WebCore::TextResourceDecoder::decode (this=0x2a9c26b0, data=0x2aa36d68 "<!DOCTYPE html PUBLIC \"-//WAPFORUM//DTD XHTML Mobile 1.0//EN\" \"http://www.wapforum.org/DTD/xhtml-mobile10.dtd\">\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\r\n<head>\r\n<meta http-equiv=\"Content-Type\" co"..., len=2293) at external/webkit/Source/WebCore/loader/TextResourceDecoder.cpp:638 #3 0x48fbe25c in WebCore::DecodedDataDocumentParser::appendBytes (this=0x2a41dc78, writer=0x2a46dc54, data=0x2aa36d68 "<!DOCTYPE html PUBLIC \"-//WAPFORUM//DTD XHTML Mobile 1.0//EN\" \"http://www.wapforum.org/DTD/xhtml-mobile10.dtd\">\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\r\n<head>\r\n<meta http-equiv=\"Content-Type\" co"..., length=2293, shouldFlush=false) at external/webkit/Source/WebCore/dom/DecodedDataDocumentParser.cpp:46 #4 0x48f384ea in WebCore::DocumentWriter::addData (this=0x2a46dc54, str=0x2aa36d68 "<!DOCTYPE html PUBLIC \"-//WAPFORUM//DTD XHTML Mobile 1.0//EN\" \"http://www.wapforum.org/DTD/xhtml-mobile10.dtd\">\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\r\n<head>\r\n<meta http-equiv=\"Content-Type\" co"..., len=2293, flush=<value optimized out>) at external/webkit/Source/WebCore/loader/DocumentWriter.cpp:207
if (change == Force) { // style selector may set this again during recalc m_hasNodesWithPlaceholderStyle = false; RefPtr<RenderStyle> documentStyle = CSSStyleSelector::styleForDocument(this); StyleChange ch = diff(documentStyle.get(), renderer()->style()); if (renderer() && ch != NoChange) renderer()->setStyle(documentStyle.release()); }
void HTMLDocumentParser::pumpTokenizerIfPossible(SynchronousMode mode) { if (isStopped() || m_treeBuilder->isPaused()) return; // Once a resume is scheduled, HTMLParserScheduler controls when we next pump. if (isScheduledForResume()) { ASSERT(mode == AllowYield); return; } pumpTokenizer(mode); }
void checkForYieldBeforeToken(PumpSession& session) { if (session.processedTokens > m_parserChunkSize) { // currentTime() can be expensive. By delaying, we avoided calling // currentTime() when constructing non-yielding PumpSessions. if (!session.startTime) session.startTime = currentTime(); session.processedTokens = 0; double elapsedTime = currentTime() - session.startTime; if (elapsedTime > m_parserTimeLimit) session.needsYield = true; } ++session.processedTokens; }