#include "nsCOMPtr.h"

#include "nsString.h"

#include "nsIParser.h"

#include "prtypes.h"

#include "nsIUnicodeDecoder.h"

#include "nsScannerString.h"


classnsReadEndCondition {


const PRUnichar *mChars;

PRUnichar mFilter;

//构造参数的显式转换,nsReadEndCondition = (PRUnichar)

explicit nsReadEndCondition(constPRUnichar* aTerminateChars);



nsReadEndCondition(constnsReadEndCondition& aOther); // No copying

void operator=(const nsReadEndCondition& aOther); // No assigning


classnsScanner {



*Use this constructor if you want i/o to be based on

*a single string you hand in during construction.

*This short cut was added for Javascript.


*@update ftang 3/02/99

*@param aCharset charset

*@param aCharsetSource - wherethe charset info came from

*@param aMode represents theparser mode (nav, other)



nsScanner(const nsAString&anHTMLString, const nsACString& aCharset,PRInt32 aSource); //构造方法nsScanner,这个构造方法可以让你的I/O基于一个固定的字符串


*Use this constructor if you want i/o to be based on

*a file (therefore a stream) or just data you provide via Append().


*@update ftang 3/02/99

*@param aCharset charset

*@param aCharsetSource - wherethe charset info came from

*@param aMode represents theparser mode (nav, other)




nsScanner(nsString& aFilename,PRBool aCreateStream, const nsACString& aCharset, PRInt32 aSource);

~nsScanner(); //析构方法


*retrieve next char from internal input stream


*@update gess 3/25/98

*@param ch is the char to acceptnew value

*@return error code reflectingread status



nsresult GetChar(PRUnichar& ch);


*peek ahead to consume next char from scanner's internal

*input buffer


*@update gess 3/25/98

*@param ch is the char to acceptnew value

*@return error code reflectingread status



nsresult Peek(PRUnichar& ch, PRUint32 aOffset=0);


nsresult Peek(nsAString& aStr, PRInt32 aNumChars, PRInt32 aOffset =0);


*Skip over chars as long as they equal given char


*@update gess 3/25/98

*@param char to be skipped

*@return error code



nsresult SkipOver(PRUnichar aSkipChar);


*Consume characters until you run into space, a '<', a '>', or a'/'.


*@param aString - receives newdata from stream

*@return error code



nsresult ReadTagIdentifier(nsScannerSharedSubstring& aString);


*Consume characters until you run into a char that's not valid in an

*entity name


*@param aString - receives newdata from stream

*@return error code



nsresult ReadEntityIdentifier(nsString& aString);


nsresult ReadNumber(nsString& aString,PRInt32 aBase);

nsresult ReadWhitespace(nsScannerSharedSubstring& aString,



nsresult ReadWhitespace(nsScannerIterator& aStart,




*Consume characters until you find the terminal char


*@update gess 3/25/98

*@param aString receives new datafrom stream

*@param aTerminal containsterminating char

*@param addTerminal tells uswhether to append terminal to aString

*@return error code



nsresult ReadUntil(nsAString& aString,

PRUnichar aTerminal,

PRBool addTerminal);


*Consume characters until you find one contained in given

*terminal set.


*@update gess 3/25/98

*@param aString receives new datafrom stream

*@param aTermSet contains set ofterminating chars

*@param addTerminal tells uswhether to append terminal to aString

*@return error code



nsresult ReadUntil(nsAString& aString,

const nsReadEndCondition& aEndCondition,

PRBool addTerminal);

nsresult ReadUntil(nsScannerSharedSubstring& aString,

const nsReadEndCondition& aEndCondition,

PRBool addTerminal);

nsresult ReadUntil(nsScannerIterator& aStart,


const nsReadEndCondition& aEndCondition,

PRBool addTerminal);


*Records current offset position in input stream. This allows us

*to back up to this point if the need should arise, such as when

*tokenization gets interrupted.


*@update gess 5/12/98





PRInt32 Mark(void);


*Resets current offset position of input stream to marked position.

*This allows us to back up to this point if the need should arise,

*such as when tokenization gets interrupted.



*@update gess 5/12/98





void RewindToMark(void);




*@update harishd 01/12/99





PRBool UngetReadable(constnsAString& aBuffer);




*@update gess 5/13/98





nsresult Append(const nsAString&aBuffer);




*@update gess 5/21/98





nsresult Append(const char* aBuffer, PRUint32 aLen,

nsIRequest *aRequest);


*Call this to copy bytes out of the scanner that have not yet beenconsumed

*by the tokenization process.


*@update gess 5/12/98

*@param aCopyBuffer is where thescanner buffer will be copied to

*@return nada



void CopyUnusedData(nsString&aCopyBuffer);


*Retrieve the name of the file that the scanner is reading from.

*In some cases, it's just a given name, because the scanner isn't

*really reading from a file.


*@update gess 5/12/98




nsString& GetFilename(void);


static voidSelfTest();


*Use this setter to change the scanner's unicode decoder


*@update ftang 3/02/99

*@param aCharset a normalized(alias resolved) charset name

*@param aCharsetSource- where thecharset info came from




nsresult SetDocumentCharset(constnsACString& aCharset, PRInt32 aSource);


voidBindSubstring(nsScannerSubstring& aSubstring, constnsScannerIterator& aStart, constnsScannerIterator& aEnd);


voidCurrentPosition(nsScannerIterator& aPosition);


void EndReading(nsScannerIterator&aPosition);


void SetPosition(nsScannerIterator&aPosition,

PRBool aTruncate =PR_FALSE,

PRBool aReverse =PR_FALSE);


void ReplaceCharacter(nsScannerIterator&aPosition,

PRUnichar aChar);


* Internal method used to cause theinternal buffer to

* be filled with data.


* @update gess4/3/98



PRBool IsIncremental(void) {returnmIncremental;}


voidSetIncremental(PRBool anIncrValue) {mIncremental=anIncrValue;}


* Return the position of the firstnon-whitespace

* character. This is only reliablebefore consumers start

* reading from this scanner.



PRInt32 FirstNonWhitespacePosition()


return mFirstNonWhitespacePosition;



void SetParser(nsParser*aParser)


mParser = aParser;



* Override replacement character used bynsIUnicodeDecoder.

* Default behavior is that it usesnsIUnicodeDecoder's mapping.


* @param aReplacementCharacter thereplacement character

*XML (expat) parser uses 0xffff



voidOverrideReplacementCharacter(PRUnichar aReplacementCharacter);




PRBool AppendToBuffer(nsScannerString::Buffer *, nsIRequest *aRequest,PRInt32 aErrorPos = -1);

PRBool AppendToBuffer(constnsAString& aStr)


nsScannerString::Buffer* buf =nsScannerString::AllocBufferFromString(aStr);

if (!buf)

return PR_FALSE;

AppendToBuffer(buf, nsnull);

return PR_TRUE;





nsScannerIteratormCurrentPosition; // The position we will nextread from in the scanner buffer


nsScannerIteratormMarkPosition; // The position last marked (we may rewind to here)


nsScannerIteratormEndPosition; // The current end of the scanner buffer


nsScannerIteratormFirstInvalidPosition; // The position of thefirst invalid character that was detected


nsString mFilename;


PRUint32 mCountRemaining; // The number of bytes still to be read

// from the scanner buffer


PRPackedBool mIncremental;

PRPackedBoolmHasInvalidCharacter; //是否有非法的字符

PRUnicharmReplacementCharacter; //替换用字符

PRInt32mFirstNonWhitespacePosition; //第一个非空白字符的位置

PRInt32 mCharsetSource; //字符集编号

nsCString mCharset; //字符集名称

nsCOMPtr mUnicodeDecoder; //Unicode编码器

nsParser *mParser; //当前扫描器所对应的解析器


nsScanner &operator =(const nsScanner &); //Not implemented.



// We replace NUL characterswith this character.


staticPRUnichar sInvalid = UCS2_REPLACEMENT_CHAR;

nsReadEndCondition::nsReadEndCondition(const PRUnichar* aTerminateChars) :

mChars(aTerminateChars), mFilter(PRUnichar(~0)) //All bits set


// Build filter that will be used to filter out characterswith

// bits that none ofthe terminal chars have. This works very well

// because terminal chars often have only the last 4-6 bitsset and

// normal ascii letters have bit 7 set. Other letters haveeven higher

// bits set.

// Calculate filter




const PRUnichar *current = aTerminateChars;


PRUnichar terminalChar = *current;


while (terminalChar) {

mFilter &= ~terminalChar; //用terminalChar的反码和mFilter进行与运算,也就是说如果terminalChar的第N位为1的话,那么mFilter的第N位则会被至为0

++current; //取下一个TerminateChars数组元素的地址

terminalChar = *current; //并且其赋值给terminalChar




const int kBufsize=1; //设置缓冲区大小为1


const intkBufsize=64; //否则设置为64



* Usethis constructor if you want i/o to be based on

* asingle string you hand in during construction.

* Thisshort cut was added for Javascript.


*@update gess 5/12/98

*@param aMode represents theparser mode (nav, other)




nsScanner::nsScanner(const nsAString& anHTMLString, const nsACString& aCharset,

PRInt32 aSource)

: mParser(nsnull)




mSlidingBuffer = nsnull;

mCountRemaining = 0;

mFirstNonWhitespacePosition = -1;

if (AppendToBuffer(anHTMLString)) { //将给定的字符串拷贝到当前要解析的buffer中

mSlidingBuffer->BeginReading(mCurrentPosition); //开始解析,需要注意的是,实际上并不是从mCurrentPosition位置开始解析,而是开始解析,并把位置赋值给mCurrentPosition

} else { //其他情况下,说明将字符串拷贝给buffer失败了

/* XXX see hack below, re: bug 182067 */

memset(&mCurrentPosition, 0, sizeof(mCurrentPosition)); //貌似通过内存设置方式直接将其指针置空

mEndPosition = mCurrentPosition; //此时这两个值都应当为0


mMarkPosition = mCurrentPosition; //记录一下当前的位置

mIncremental = PR_FALSE; //设置增量式解析位为FALSE

mUnicodeDecoder = 0; //某个变量

mCharsetSource = kCharsetUninitialized; //字符集变量

mHasInvalidCharacter = PR_FALSE; //设置是否有非法字符

mReplacementCharacter = PRUnichar(0x0); //设置如果遇到非法字符的替换字符



nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , PRInt32 aSource)



if (aSource < mCharsetSource) // priority is lower the the current one , just

return NS_OK; //如果新字符集的优先级较低,直接返回

nsICharsetAlias* calias = nsParser::GetCharsetAliasService(); //获取字符集服务

NS_ASSERTION(calias, "Must have thecharset alias service!"); //确保获取成功

nsresult res = NS_OK;

if (!mCharset.IsEmpty()) //如果当前字符集不为空


PRBool same;

res = calias->Equals(aCharset, mCharset, &same); //判断一下新字符集和当前字符集是否是同一个字符集

if(NS_SUCCEEDED(res) && same) //如果两个字符集一样

{ //则直接返回

return NS_OK; //no difference, don't change it




// different, need to change it

nsCString charsetName;

res = calias->GetPreferred(aCharset, charsetName); //获取字符集名称

if(NS_FAILED(res) && (mCharsetSource ==kCharsetUninitialized))

{ //如果获取失败,且当前的字符集为空

// failed - unknown alias , fallback toISO-8859-1

mCharset.AssignLiteral("ISO-8859-1"); //那么默认使用ISO-8859-1字符集




mCharset.Assign(charsetName); //获取成功,则直接使用该字符集


mCharsetSource = aSource; //设置字符集源值


"Musthave the charset converter manager!");


res = nsParser::GetCharsetConverterManager()->

GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder));

if (NS_FAILED(res)) //如果获取UnicodeDecoder失败


// GetUnicodeDecoderRaw can fail if thecharset has the .isXSSVulnerable

// flag. Try to fallback to ISO-8859-1

mCharset.AssignLiteral("ISO-8859-1"); //则还是默认使用ISO-8859-1字符集

mCharsetSource = kCharsetFromWeakDocTypeDefault; //设置为最低优先级

res = nsParser::GetCharsetConverterManager()-> //获取相应的UnicodeDecoder

GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder));



if (NS_SUCCEEDED(res) && mUnicodeDecoder)


// We need to detect conversion error ofcharacter to support XML

// encoding error.





return res; //返回处理结果




*default destructor


*@update gess 3/25/98




nsScanner::~nsScanner() {

if (mSlidingBuffer) {

delete mSlidingBuffer; //删除当前未扫描的字符串


MOZ_COUNT_DTOR(nsScanner); //用来进行日志记录的一个构件方法,如果没有特殊#Define NS_BUILD_REFCNT_LOGGING的话,这个方法一般为空,什么都不做



*Resets current offset position of input stream to marked position.

* Thisallows us to back up to this point if the need should arise,

* suchas when tokenization gets interrupted.



*@update gess 5/12/98





if (mSlidingBuffer) { //如果当前存在一个mSlidingBuffer

mCountRemaining += (Distance(mMarkPosition, mCurrentPosition)); //修改剩余的字节数,需要加上Mark位置到当前位置的距离

mCurrentPosition = mMarkPosition; //设置当前位置为Mark的位置





*Records current offset position in input stream. This allows us

* toback up to this point if the need should arise, such as when

*tokenization gets interrupted.


*@update gess 7/29/98




PRInt32 nsScanner::Mark() {

PRInt32 distance = 0; //设置距离为0

if (mSlidingBuffer) { //如果当前字符串存在

nsScannerIteratoroldStart; //设置一个游标

mSlidingBuffer->BeginReading(oldStart); //用该游标记录下原始的起始位置

distance = Distance(oldStart, mCurrentPosition);

mSlidingBuffer->DiscardPrefix(mCurrentPosition); //去掉当前位置mCurrentPosition之前的字符串

mSlidingBuffer->BeginReading(mCurrentPosition); //设置起始位置为mCurrentPosition

mMarkPosition = mCurrentPosition; //设置mMarkPosition为mCurrentPosition


return distance;




* Insert data to our underlying input bufferas

* if it were read from an input stream.


* @updateharishd 01/12/99

* @returnerror code


PRBool nsScanner::UngetReadable(const nsAString& aBuffer) {

if (!mSlidingBuffer) { //如果当前的解析字符串不存在

return PR_FALSE; //则返回


mSlidingBuffer->UngetReadable(aBuffer,mCurrentPosition); //调用mSlidingBuffer的UngetReadable,将aBuffer插入到mCurrentPosition中去


mSlidingBuffer->BeginReading(mCurrentPosition); // Insertion invalidated our iterators



PRUint32 length = aBuffer.Length();


mCountRemaining += length; // Ref. bug 117441

return PR_TRUE;




* Append data to our underlying input bufferas

* if it were read from an input stream.


* @updategess4/3/98

* @returnerror code


nsresult nsScanner::Append(const nsAString& aBuffer) {

if (!AppendToBuffer(aBuffer)) //直接调用AppendToBuffer方法


return NS_OK;





*@update gess 5/21/98





nsresult nsScanner::Append(const char* aBuffer,PRUint32 aLen,



nsresult res=NS_OK;

PRUnichar *unichars, *start;

if (mUnicodeDecoder) {

PRInt32 unicharBufLen = 0;

mUnicodeDecoder->GetMaxLength(aBuffer,aLen, &unicharBufLen); //就是unicharBufLen = aLen + 1

//申请一个新数组,长度为unicharBufLen + 1,因为C++中字符数组最后一位要放’\0’

nsScannerString::Buffer* buffer =nsScannerString::AllocBuffer(unicharBufLen + 1);



start = unichars = buffer->DataStart();


PRInt32 totalChars = 0;

PRInt32 unicharLength = unicharBufLen;

PRInt32errorPos = -1;


do {

PRInt32 srcLength = aLen; //设置一个变量,记录附加字符串的原始长度


res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars,&unicharLength);


totalChars += unicharLength;

// Continuationof failure case

if(NS_FAILED(res)) {

// if we failed, we consume one byte, replaceit with the replacement

// character and try the conversion again.

// This is only needed because some decodersdon't follow the

// nsIUnicodeDecoder contract: they return afailure when *aDestLength

// is 0rather than the correct NS_OK_UDEC_MOREOUTPUT.See bug 244177




if ((unichars + unicharLength) >=buffer->DataEnd()) { //如果超出了最大长度

NS_ERROR("Unexpected end of destinationbuffer"); //指针越界出错了



if (mReplacementCharacter == 0x0 && errorPos== -1) { //如果替换字符为0,且出错位置为-1

errorPos = totalChars; //出错位置直接记录为整体字符串



unichars[unicharLength++] = mReplacementCharacter == 0x0 ?

mUnicodeDecoder->GetCharacterForUnMapped() :



unichars = unichars + unicharLength;

unicharLength = unicharBufLen - (++totalChars);



if(((PRUint32) (srcLength + 1)) >aLen) { //此处应当是出现了错误字符才会导致的情况

srcLength = aLen;


else { //一般情况下只需要进行加一操作,即按序解析下一个字符



aBuffer += srcLength; //将aBuffer向后移动srcLength个位置

aLen -= srcLength; //减少srcLength长度,即已经解析的字节数


}while (NS_FAILED(res) && (aLen >0));

buffer->SetDataLength(totalChars); //设置buffer的数据长度为新的数据长度

// Don't propagate return code of unicodedecoder

// since it doesn't reflect on our success orfailure

// - Ref. bug 87110

res= NS_OK;

if (!AppendToBuffer(buffer, aRequest,errorPos)) //使用转换好的字符串进行AppendToBuffer操作,这个操作在代码的最后会进行介绍



else { //其他情况下,说明Append操作失败

NS_WARNING("No decoder found.");

res = NS_ERROR_FAILURE; //设置结果为错误值


return res; //返回结果




*retrieve next char from scanners internal input stream


*@update gess 3/25/98


*@return error code reflectingread status



nsresultnsScanner::GetChar(PRUnichar& aChar) {

if (!mSlidingBuffer || mCurrentPosition ==mEndPosition) { //先判断一下当前解析是不是已经到结尾了,或者带解析的字符串本身就不存在

aChar = 0;

return kEOF; //返回文件末尾,即空值


aChar = *mCurrentPosition++; //设置aChar为当前的位置加一

--mCountRemaining; //减少剩余未解析的字节数

return NS_OK;




* peekahead to consume next char from scanner's internal

* inputbuffer


*@update gess 3/25/98





nsresult nsScanner::Peek(PRUnichar&aChar, PRUint32 aOffset) {

aChar = 0;

if (!mSlidingBuffer || mCurrentPosition ==mEndPosition) { //如果当前待解析的字符串不存在,或当前位置等于结束位置了

return kEOF; //返回文件末尾kEOF,即空


if (aOffset > 0) { //如果aOffset大于零

if (mCountRemaining <= aOffset) //如果偏移位置超过了剩余字节的数量

return kEOF; //直接返回空(为啥不返回最后一个字节呢,合情合理)

nsScannerIterator pos = mCurrentPosition; //获取当前位置

pos.advance(aOffset); //前进aOffset个位置

aChar=*pos; //用aChar指向该位置的字符


else {

aChar=*mCurrentPosition; //其他情况下,即aOffset为0或者为负值的情况下,直接指向当前位置


return NS_OK; //返回正确结果




nsresult nsScanner::Peek(nsAString&aStr, PRInt32 aNumChars, PRInt32 aOffset)


if (!mSlidingBuffer || mCurrentPosition ==mEndPosition) { //如果当前待解析的字符串为空,或当前位置已经是结束的位置了

return kEOF; //直接返回文件末尾值,即空值


nsScannerIterator start, end; //设置两个游标start,end

start = mCurrentPosition; //start游标指向当前位置

if ((PRInt32)mCountRemaining <= aOffset) { //如果偏移值超过了剩余字节的数量

return kEOF; //返回文件末尾值


if (aOffset > 0) { //如果偏移值大于0

start.advance(aOffset); //游标start向前前进aOffset个位置


if (mCountRemaining < PRUint32(aNumChars +aOffset)) { //这个和前面的Peek方法就不一样了,如果需要获取的字符串的末位置超出了文档结尾,则已文档末尾作为获取字符串的结束位置

end = mEndPosition;


else { //其他情况下,即正常情况

end = start;

end.advance(aNumChars); //将end在start的基础上前进aNumChars个字节


CopyUnicodeTo(start, end, aStr); //这样就直接可以获取start和end之间的字符串作为结果放到aStr中去了

return NS_OK; //返回正确值




* Skipwhitespace on scanner input stream


*@update gess 3/25/98


*@return error status


nsresult nsScanner::SkipWhitespace(PRInt32&aNewlinesSkipped) {


if (!mSlidingBuffer) { //如果当前解析字符串为空

return kEOF; //返回文件末尾值,即空值


PRUnichar theChar = 0; //设置一个字符变量

nsresult result = Peek(theChar); //获取当前位置的字符

if (NS_FAILED(result)) { //如果获取失败

return result; //则返回该结果


nsScannerIterator current = mCurrentPosition; //获取当前位置的游标


PRBool done = PR_FALSE;

PRBool skipped = PR_FALSE;


while (!done && current != mEndPosition) {

switch(theChar) {

case '\n':

case '\r':++aNewlinesSkipped; //遇到\r或者\n,则对参数中的变量+1

case ' ':

case '\t':


skipped = PR_TRUE; //设置遇到了空字符并进行了跳过

PRUnichar thePrevChar = theChar; //用thePrevChar记录当前字节

theChar = (++current != mEndPosition) ? *current : '\0'; //如果到文件末尾了,那么直接将theChar写成’\0’

if ((thePrevChar == '\r' && theChar == '\n') ||

(thePrevChar == '\n' && theChar == '\r')) {

theChar = (++current !=mEndPosition) ? *current : '\0'; // CRLF == LFCR => LF //如果遇到了’\r’和’\n’结合使用的情况,再多跳过一个字节





done = PR_TRUE; //其他情况下即遇到非空字符,则设置DONE为PR_TRUE以跳出循环




if (skipped) { //如果发生了跳过空字符

SetPosition(current); //设置当前位置为新位置

if (current == mEndPosition) { //如果当前位置为mEndPosition

result = kEOF; //返回文件末尾值



return result; //返回结果




* Skipover chars as long as they equal given char


*@update gess 3/25/98


*@return error code


nsresult nsScanner::SkipOver(PRUnicharaSkipChar){

if (!mSlidingBuffer) { //如果当前待解析字符串为空

return kEOF; //返回空值



PRUnichar ch=0;

nsresult result=NS_OK;

while(NS_OK==result) { //循环进行字符跳过

result=Peek(ch); //获取当前位置的字符

if(NS_OK == result) { //如果获取成功

if(ch!=aSkipChar) { //如果当前位置字符和所给定字符不等

break; //跳出循环


GetChar(ch); //调用前面的GetChar获取下一个字符


else break; //其他情况下,即Peek()返回失败值,则退出循环

} //while

return result; //返回结果




*Consume characters until you run into space, a '<', a '>', or a'/'.


*@param aString - receives newdata from stream

*@return error code


nsresultnsScanner::ReadTagIdentifier(nsScannerSharedSubstring& aString) {

if (!mSlidingBuffer) { //如果当前待解析的字符串为空

return kEOF; //返回文件末尾值,即空值



PRUnichar theChar=0;


nsScannerIterator current, end;


current = mCurrentPosition;

end = mEndPosition;

// Loop until we find an illegal character. Everything isthen appended

// later.


while(current != end && !found) {


switch(theChar) {

case '\n':

case '\r':

case ' ':

case '\t':

case '\v':

case '\f':

case '<':

case '>':

case '/':

found = PR_TRUE; //找到非法字符,设置相应标示位


case '\0':

ReplaceCharacter(current, sInvalid); //如果是空字符则使用特殊字符对其进行替换





if (!found) { //如果没找到

++current; //则将当前位置前进一个字符



SetPosition(current); //设置当前位置为新位置

if (current == end) { //如果当前已经到了文件末尾

result = kEOF; //返回文件末尾值



return result; //返回处理结果




*Consume characters until you run into a char that's not valid in an

*entity name


*@param aString - receives newdata from stream

*@return error code



nsresultnsScanner::ReadEntityIdentifier(nsString& aString) {

if (!mSlidingBuffer) { //对待解析字符串进行判断

return kEOF;



PRUnichar theChar=0;


nsScannerIterator origin, current, end;



origin = mCurrentPosition;

current = mCurrentPosition;

end = mEndPosition;

while(current != end) { //循环遍历字符串,直到末尾或主动退出

theChar=*current; //获取当前位置的字符

if(theChar) { //如果字符存在

found=PR_FALSE; //设置是否找到位found默认值为PR_FALSE

switch(theChar) {

case '_':

case '-':

case '.':

// Don't allow ':' in entity names. See bug 23791

found = PR_TRUE;



found = ('a'<=theChar &&theChar<='z') ||

('A'<=theChar&& theChar<='Z') ||

('0'<=theChar&& theChar<='9');



if(!found) { //这里不应当对是否前进了进行一下判断么?如果mCurrentPosition和current相等怎么办?他似乎默认当前一定能找到Entity字符


AppendUnicodeTo(mCurrentPosition, current,aString);




++current; //将current前进一位


SetPosition(current); //将当前位置设置为新的位置

if (current == end) { //如果发现是文件末尾了

AppendUnicodeTo(origin, current, aString); //将原始的position和当前位置之间的这段字符串粘贴到aString中去

return kEOF;



return result; //返回结果




*Consume digits


*@param aString - should containdigits

*@return error code


nsresultnsScanner::ReadNumber(nsString& aString,PRInt32 aBase) {

if (!mSlidingBuffer) { //对待解析字符串的存在进行判定

return kEOF;



NS_ASSERTION(aBase == 10 || aBase == 16,"basevalue not supported");


PRUnichar theChar=0;


nsScannerIterator origin, current, end;


origin = mCurrentPosition;

current = origin;

end = mEndPosition;

PRBool done = PR_FALSE;

while(current != end) { //循环遍历字符串,直到字符串结尾或主动退出循环


if(theChar) { //如果字符串存在

done = (theChar < '0' || theChar> '9') &&

((aBase == 16)? (theChar < 'A' || theChar > 'F')&&

(theChar < 'a' || theChar > 'f')

:PR_TRUE); //判断是否是0~9,或16进制情况下的A~F

if(done) { //如果找到

AppendUnicodeTo(origin, current, aString); //则将其粘附到aString末尾

break; //退出循环





SetPosition(current); //同上一个方法一样,主要为进行一些特殊情况的收尾工作

if (current == end) {

AppendUnicodeTo(origin, current, aString);

return kEOF;



return result;




*Consume characters until you find the terminal char


*@update gess 3/25/98

*@param aString receives new datafrom stream

*@param addTerminal tells uswhether to append terminal to aString

*@return error code


nsresultnsScanner::ReadWhitespace(nsScannerSharedSubstring& aString,


PRBool&aHaveCR) {



if (!mSlidingBuffer) { //对待解析字符串的存在进行判定

return kEOF;



PRUnichar theChar = 0;

nsresult result = Peek(theChar); //查看当前位置的字符

if (NS_FAILED(result)) { //如果查看失败

return result; //则返回失败结果



nsScannerIterator origin, current, end;

PRBool done = PR_FALSE; //设置后面循环用到的变量


origin = mCurrentPosition;

current = origin;

end = mEndPosition;

PRBool haveCR = PR_FALSE; //申请一个新的内部使用的haveCR变量,默认同样为FALSE

while(!done && current != end) {

switch(theChar) {

case '\n':

case '\r':

{ //如果是’\n’或’\r’的情况下

++aNewlinesSkipped; //首先将aNewlinesSkipped加一,因为新的一行开始了

PRUnichar thePrevChar = theChar; //记录当前字符

theChar = (++current != end) ? *current : '\0'; //获取下一个字符,如果是文件末尾了则设置下一个字符为’\0’

if ((thePrevChar == '\r' && theChar == '\n') || //判断是否是\r\n同时出现

(thePrevChar == '\n' && theChar == '\r')) {


theChar = (++current != end) ? *current: '\0'; // CRLF ==LFCR => LF


haveCR = PR_TRUE;

} else if(thePrevChar == '\r') { //如果上一个字符为’\r’

// LoneCR becomes CRLF; callers should know to remove extra CRs

AppendUnicodeTo(origin, current,aString); //拷贝字符串

aString.writable().Append(PRUnichar('\n')); //并且需要手动在其之后加上一个’\n’字符

origin = current;

haveCR = PR_TRUE;




case ' ':

case '\t':

theChar = (++current != end) ? *current : '\0'; //遇到其他类型的空白字符,则都需要对其是否为文件末尾做判断



done = PR_TRUE; //默认情况下即不为空白字符,那么设置done为TRUE

AppendUnicodeTo(origin, current, aString); //并将目前已解析的这些字符串,注意是从orgin





//XXXbz callers of this haveto manage their lone '\r' themselves if they want

//it to work. Good thing they're all in view-source and itdeals.

nsresultnsScanner::ReadWhitespace(nsScannerIterator& aStart,

nsScannerIterator& aEnd,

PRInt32&aNewlinesSkipped) {

if (!mSlidingBuffer) { //首先对代解析字符串的存在进行判断

return kEOF;


PRUnichartheChar = 0; //申请一个新的变量

nsresult result = Peek(theChar); //获取当前位置的字符

if (NS_FAILED(result)) { //如果获取字符失败

return result; //返回失败结果


nsScannerIterator origin, current, end; //三个用来记录位置的游标

PRBool done = PR_FALSE; //设置循环条件


origin = mCurrentPosition;

current = origin;

end = mEndPosition;


while(!done && current != end) {

switch(theChar) {

case '\n':

case '\r':++aNewlinesSkipped; //遇到\n或者\r就将新行数加一

case ' ':

case '\t':


PRUnichar thePrevChar = theChar;

theChar = (++current != end) ? *current : '\0';

if ((thePrevChar == '\r' && theChar == '\n') || //同时需要注意处理\n\r紧邻着同时出现的情况

(thePrevChar == '\n' && theChar == '\r')) {

theChar = (++current != end) ?*current : '\0'; //CRLF == LFCR => LF





done = PR_TRUE; //默认情况下就说明找到了非空格字符

aStart = origin;

aEnd = current;




SetPosition(current); //设置当前位置为新位置

if (current == end) { //判断是否已到达字符串末尾

aStart = origin;

aEnd = current;

result = kEOF;


return result;




*Consume characters until you encounter one contained in given

* inputset.


*@update gess 3/25/98

*@param aString will contain theresult of this method

*@param aTerminalSet is anordered string that contains

*the set of INVALID characters

*@return error code



nsresultnsScanner::ReadUntil(nsAString& aString,

const nsReadEndCondition& aEndCondition,



if (!mSlidingBuffer) { //判断待解析的字符串是否为空

return kEOF;



nsScannerIterator origin, current;


const PRUnichar* setstart = aEndCondition.mChars;

const PRUnichar* setcurrent;


origin = mCurrentPosition;

current = origin;

PRUnichar theChar=0;

nsresultresult=Peek(theChar); //获取当前位置的字符

if (NS_FAILED(result)) { //如果获取字符失败

return result;


while (current != mEndPosition) { //循环,直到字符串末尾

theChar = *current; //获取当亲字符

if (theChar == '\0'){ //如果当前位置是空字符’\0’

ReplaceCharacter(current, sInvalid); //用替换字符对其进行替换

theChar = sInvalid; //并且获取替换后的字符


// Filter out completely wrong characters

// Check if all bits are in the required area

if(!(theChar & aEndCondition.mFilter)){


// They were. Do a thorough check.


setcurrent = setstart;

while (*setcurrent) {

if (*setcurrent == theChar) { //判断是否是特殊字符

if(addTerminal) //参数传递过来的标示位,是否需要将该特殊字符也添加到读取结果字符串中


AppendUnicodeTo(origin, current, aString); //粘贴字符串

SetPosition(current); //设置当前位置为新位置


return NS_OK;


++setcurrent; //获取下一个aEndCondition中的字符



++current; //比较源字符串中的下一个字符


// If we are here, we didn't find any terminator in thestring and

// current = mEndPosition



AppendUnicodeTo(origin, current, aString);

return kEOF;



nsresultnsScanner::ReadUntil(nsScannerSharedSubstring& aString,

const nsReadEndCondition& aEndCondition,



if (!mSlidingBuffer) {

return kEOF;


nsScannerIterator origin, current;

const PRUnichar* setstart = aEndCondition.mChars;

const PRUnichar* setcurrent;

origin = mCurrentPosition;

current = origin;

PRUnichar theChar=0;

nsresult result=Peek(theChar);

if (NS_FAILED(result)) {

return result;


while (current != mEndPosition) {

theChar = *current;

if (theChar == '\0'){

ReplaceCharacter(current, sInvalid);

theChar = sInvalid;


// Filter out completely wrong characters

// Check if all bits are in the required area

if(!(theChar &aEndCondition.mFilter)) {

// They were. Do a thorough check.

setcurrent = setstart;

while (*setcurrent) {

if (*setcurrent == theChar) {



AppendUnicodeTo(origin, current, aString);



return NS_OK;







// If we are here, we didn't find any terminatorin the string and

// current = mEndPosition


AppendUnicodeTo(origin, current, aString);

return kEOF;




*Consumes chars until you see the given terminalChar


*@update gess 3/25/98


*@return error code


nsresult nsScanner::ReadUntil(nsAString&aString,




if (!mSlidingBuffer) {

return kEOF;


nsScannerIterator origin, current;

origin = mCurrentPosition;

current = origin;

PRUnichar theChar;

nsresult result = Peek(theChar);

if (NS_FAILED(result)) {

return result;


while (current != mEndPosition) {

theChar = *current;

if (theChar == '\0'){

ReplaceCharacter(current, sInvalid);

theChar = sInvalid;


if (aTerminalChar == theChar) {



AppendUnicodeTo(origin, current, aString);


return NS_OK;




// If we are here, we didn't find any terminator in thestring and

// current = mEndPosition

AppendUnicodeTo(origin, current, aString);


return kEOF;



voidnsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd)



aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);


voidnsScanner::CurrentPosition(nsScannerIterator& aPosition)



aPosition = mCurrentPosition;


voidnsScanner::EndReading(nsScannerIterator& aPosition)



aPosition = mEndPosition;



voidnsScanner::SetPosition(nsScannerIterator& aPosition, PRBool aTerminate,PRBool aReverse)


if (mSlidingBuffer) { //首先对未解析字符串的存在进行判定


PRUint32 origRemaining = mCountRemaining;


if (aReverse) { //需要通过参数来判断新位置是在当前位置之前还是之后


mCountRemaining += (Distance(aPosition, mCurrentPosition));


else { //反之就是在之后,需要减去偏移距离

mCountRemaining -= (Distance(mCurrentPosition, aPosition));



NS_ASSERTION((mCountRemaining >= origRemaining && aReverse)||

(mCountRemaining <=origRemaining && !aReverse),

"Improperuse of nsScanner::SetPosition. Make sure to set the"

"aReverse parameter correctly");


mCurrentPosition = aPosition;

if (aTerminate &&(mCurrentPosition == mEndPosition)) { //如果当前已经到了字符串结尾,并且相应的aTerminate标示位被设置为TRUE

mMarkPosition = mCurrentPosition; //记录一下当前位置

mSlidingBuffer->DiscardPrefix(mCurrentPosition); //删除当前位置之前的所有字符





voidnsScanner::ReplaceCharacter(nsScannerIterator& aPosition,



if (mSlidingBuffer) { //如果当前待解析字符串存在

mSlidingBuffer->ReplaceCharacter(aPosition, aChar); //直接对aPosition位置的字符进行替换




PRBoolnsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf,

nsIRequest *aRequest,




if (nsParser::sParserDataListeners && mParser&&


aBuf->DataEnd()), aRequest))) {

// Don't actually append on failure.


return mSlidingBuffer != nsnull;


if (!mSlidingBuffer) { //如果字符串为空的情况下

mSlidingBuffer = newnsScannerString(aBuf); //使用aBuf初始化其为一个新的字符串

if (!mSlidingBuffer) //如果初始化失败

return PR_FALSE; //返回错误值,这里怎么不报那个NS_OUTOFMEMORY错误了?

mSlidingBuffer->BeginReading(mCurrentPosition); //获取字符串的读取当前位置,即起始位置

mMarkPosition = mCurrentPosition; //标记一下当前位置

mSlidingBuffer->EndReading(mEndPosition); //获取结束位置

mCountRemaining = aBuf->DataLength(); //获取aBuf的长度


else {

mSlidingBuffer->AppendBuffer(aBuf); //其他情况下,说明当前待解析的字符串不为空,我们需要将aBuf放到该字符串之后

if (mCurrentPosition == mEndPosition) { //判断,如果当前位置是原始字符串的末尾

mSlidingBuffer->BeginReading(mCurrentPosition); //设置新的当前位置,因为我们新增了内容


mSlidingBuffer->EndReading(mEndPosition); //设置新的结束位置,同样因为有新增内容

mCountRemaining += aBuf->DataLength(); //增加剩余字节的数量


if (aErrorPos != -1 && !mHasInvalidCharacter){ //同时,要对非法字符进行处理,做判断,如果原始字符串中没有非法字符,而新增加的字符串中有非法字符

mHasInvalidCharacter = PR_TRUE; //那么我们需要设置新的非法字符标志位

mFirstInvalidPosition = mCurrentPosition; //改变新的非法字符位置

mFirstInvalidPosition.advance(aErrorPos); //设置该位置为当前位置前进aErrorPos偏移距离后的位置


if (mFirstNonWhitespacePosition == -1) { //同时需要对第一个非空白字符标示位进行修改,判断,如果原始的字符串中全部都是空白字符

nsScannerIterator iter(mCurrentPosition); //那么设置两个游标iter和end

nsScannerIterator end(mEndPosition);

while (iter != end) { //循环遍历新增加的那段字符串

if (!nsCRT::IsAsciiSpace(*iter)) { //判断当前字符如果是非空字符

mFirstNonWhitespacePosition = Distance(mCurrentPosition, iter); //那么设置第一个非空白字符标示位为起始地址+当前偏移地址(此处我怎么觉得这么别扭?不过貌似没错)



++iter; //将游标前进至下一个字符



return PR_TRUE;




* callthis to copy bytes out of the scanner that have not yet been consumed

* bythe tokenization process.


* @update gess 5/12/98

*@param aCopyBuffer is where thescanner buffer will be copied to

*@return nada


voidnsScanner::CopyUnusedData(nsString& aCopyBuffer) {

if (!mSlidingBuffer) { //首先得判断解析字符串是否存在

aCopyBuffer.Truncate(); //如果不存在或者为0值,直接清空并返回空值




nsScannerIterator start, end;

start = mCurrentPosition;

end = mEndPosition;


CopyUnicodeTo(start, end, aCopyBuffer);




*Retrieve the name of the file that the scanner is reading from.

* Insome cases, it's just a given name, because the scanner isn't

*really reading from a file.


*@update gess 5/12/98



nsString& nsScanner::GetFilename(void) { //这个filename基本没用

return mFilename;



*Conduct self test. Actually, selftesting for this class

*occurs in the parser selftest.


*@update gess 3/25/98




voidnsScanner::SelfTest(void) { //空方法,期待后来人编写




voidnsScanner::OverrideReplacementCharacter(PRUnichar aReplacementCharacter)

{ //重设新的非法字符替换字符

mReplacementCharacter = aReplacementCharacter;

if (mHasInvalidCharacter) {

ReplaceCharacter(mFirstInvalidPosition, mReplacementCharacter);



