1.TiXmlDocument::LoadFile() 打开xml文件
/**
* @brief TiXmlDocument::LoadFile
* @param _filename xml文件名
* @param encoding 文件编码类型
* @return
*
* 这个方法只是打开xml文件,然后调用另一个LoadFile()方法
*
*/
bool TiXmlDocument::LoadFile( const char* _filename, TiXmlEncoding encoding )
{
TIXML_STRING filename( _filename );
value = filename;
// reading in binary mode so that tinyxml can normalize the EOL
FILE* file = TiXmlFOpen( value.c_str (), "rb" );
if ( file )
{
bool result = LoadFile( file, encoding );
fclose( file );
return result;
}
else
{
SetError( TIXML_ERROR_OPENING_FILE, 0, 0, TIXML_ENCODING_UNKNOWN );
return false;
}
}
/**
* @brief TiXmlDocument::LoadFile
* @param file
* @param encoding
* @return
*
* 1.将文件内容读到一个字符数组中
* 2.换行符统一替换成\n,文件换行符在不同的系统实现不同,有\n,\r\n,\r三种形式。
* 3.调用Parse()方法
*/
bool TiXmlDocument::LoadFile( FILE* file, TiXmlEncoding encoding )
{
if ( !file )
{
SetError( TIXML_ERROR_OPENING_FILE, 0, 0, TIXML_ENCODING_UNKNOWN );
return false;
}
// Delete the existing data:
Clear();
location.Clear();
// Get the file size, so we can pre-allocate the string. HUGE speed impact.
long length = 0;
fseek( file, 0, SEEK_END );
length = ftell( file );
fseek( file, 0, SEEK_SET );
// Strange case, but good to handle up front.
if ( length <= 0 )
{
SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
return false;
}
// Subtle bug here. TinyXml did use fgets. But from the XML spec:
// 2.11 End-of-Line Handling
//
//
// ...the XML processor MUST behave as if it normalized all line breaks in external
// parsed entities (including the document entity) on input, before parsing, by translating
// both the two-character sequence #xD #xA and any #xD that is not followed by #xA to
// a single #xA character.
//
//
// It is not clear fgets does that, and certainly isn't clear it works cross platform.
// Generally, you expect fgets to translate from the convention of the OS to the c/unix
// convention, and not work generally.
/*
while( fgets( buf, sizeof(buf), file ) )
{
data += buf;
}
*/
char* buf = new char[ length+1 ];
buf[0] = 0;
if ( fread( buf, length, 1, file ) != 1 ) {
delete [] buf;
SetError( TIXML_ERROR_OPENING_FILE, 0, 0, TIXML_ENCODING_UNKNOWN );
return false;
}
// Process the buffer in place to normalize new lines. (See comment above.)
// Copies from the 'p' to 'q' pointer, where p can advance faster if
// a newline-carriage return is hit.
//
// Wikipedia:
// Systems based on ASCII or a compatible character set use either LF (Line feed, '\n', 0x0A, 10 in decimal) or
// CR (Carriage return, '\r', 0x0D, 13 in decimal) individually, or CR followed by LF (CR+LF, 0x0D 0x0A)...
// * LF: Multics, Unix and Unix-like systems (GNU/Linux, AIX, Xenix, Mac OS X, FreeBSD, etc.), BeOS, Amiga, RISC OS, and others
// * CR+LF: DEC RT-11 and most other early non-Unix, non-IBM OSes, CP/M, MP/M, DOS, OS/2, Microsoft Windows, Symbian OS
// * CR: Commodore 8-bit machines, Apple II family, Mac OS up to version 9 and OS-9
const char* p = buf; // the read head
char* q = buf; // the write head
const char CR = 0x0d;
const char LF = 0x0a;
buf[length] = 0;
while( *p ) {
assert( p < (buf+length) );
assert( q <= (buf+length) );
assert( q <= p );
if ( *p == CR ) {
*q++ = LF;
p++;
if ( *p == LF ) { // check for CR+LF (and skip LF)
p++;
}
}
else {
*q++ = *p++;
}
}
assert( q <= (buf+length) );
*q = 0;
Parse( buf, 0, encoding ); //解析xml
delete [] buf;
return !Error();
}
/**
* @brief TiXmlDocument::Parse
* @param p
* @param prevData
* @param encoding
* @return
*
* 完成DOM的建立
*
*/
const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
{
ClearError();
// Parse away, at the document level. Since a document
// contains nothing but other tags, most of what happens
// here is skipping white space.
if ( !p || !*p )
{
SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
return 0;
}
// Note that, for a document, this needs to come
// before the while space skip, so that parsing
// starts from the pointer we are given.
location.Clear();
if ( prevData )
{
location.row = prevData->cursor.row;
location.col = prevData->cursor.col;
}
else
{
location.row = 0;
location.col = 0;
}
TiXmlParsingData data( p, TabSize(), location.row, location.col );
location = data.Cursor();
if ( encoding == TIXML_ENCODING_UNKNOWN )
{
// Check for the Microsoft UTF-8 lead bytes.
const unsigned char* pU = (const unsigned char*)p;
if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
&& *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
&& *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
{
encoding = TIXML_ENCODING_UTF8;
useMicrosoftBOM = true;
}
}
/*这个方法的功能是判断的当前的指针p指向的字符
*是不是空白字符(即空格或换行符),如果是,则指针
*前移,找到一个不是空白字符的字符,返回当前的指针位置
*如果不是,还返回这个指针
*
*由此可见这个方法也非常重要,不断地跳过空白字符,不停地解析数据
*/
p = SkipWhiteSpace( p, encoding );
if ( !p )
{
SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
return 0;
}
/*
*重点在这里
*解析xml字符串,直到结束'\0'
*
*/
while ( p && *p )
{
/*根据头部判断当前的指针指向哪种节点,然后new一个
*相应的节点,并返回该节点指针,并且设置该节点的父节点为this
*TiXmlNode是一个基类
*是对xml的元素、注释、文本、文档声明的抽象
*/
TiXmlNode* node = Identify( p, encoding );
if ( node )
{
/*下面是多态执行的,不同的节点类型,实现是不同的
*假设node是一个元素节点,那么这个元素就会有属性
*就会有子元素等信息,所以要继续解析,因此这个node也有子节点,
*直到这个节点,这就是多叉树形成的原因
*直到这个节点内容结束,返回当前位置指针。
*/
p = node->Parse( p, &data, encoding );
/*
*将这个节点,连接到父节点树上
*/
LinkEndChild( node );
}
else
{
break;
}
// Did we get encoding info?
if ( encoding == TIXML_ENCODING_UNKNOWN
&& node->ToDeclaration() )
{
TiXmlDeclaration* dec = node->ToDeclaration();
const char* enc = dec->Encoding();
assert( enc );
if ( *enc == 0 )
encoding = TIXML_ENCODING_UTF8;
else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
encoding = TIXML_ENCODING_UTF8;
else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
else
encoding = TIXML_ENCODING_LEGACY;
}
p = SkipWhiteSpace( p, encoding );
}
// Was this empty?
if ( !firstChild ) {
SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
return 0;
}
// All is well.
return p;
}
/**
* @brief TiXmlElement::Parse
* @param p
* @param data
* @param encoding
* @return
*
*是基类TiXmlNode::Parse()的一种实现,用来解析元素类型的多叉树
*
*/
const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
{
p = SkipWhiteSpace( p, encoding );
TiXmlDocument* document = GetDocument();
if ( !p || !*p )
{
if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
return 0;
}
if ( data )
{
data->Stamp( p, encoding );
location = data->Cursor();
}
if ( *p != '<' )
{
if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
return 0;
}
p = SkipWhiteSpace( p+1, encoding );
// Read the name.
const char* pErr = p;
//获取元素名 (value是类成员)
p = ReadName( p, &value, encoding );
if ( !p || !*p )
{
if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
return 0;
}
TIXML_STRING endTag ("");
//获取这个元素的结束标记
endTag += value;
// Check for and read attributes. Also look for an empty
// tag or an end tag.
while ( p && *p )
{
pErr = p;
p = SkipWhiteSpace( p, encoding );
if ( !p || !*p )
{
if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
return 0;
}
if ( *p == '/' )
{
++p;
// Empty tag.
if ( *p != '>' )
{
if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
return 0;
}
return (p+1);
}
// 读取元素的值
else if ( *p == '>' )
{
// Done with attributes (if there were any.)
// Read the value -- which can include other
// elements -- read the end tag, and return.
++p;
//有可能这个元素没有值,接着又是子元素,如Jim
p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
if ( !p || !*p ) {
// We were looking for the end tag, but found nothing.
// Fix for [ 1663758 ] Failure to report error on bad XML
if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
return 0;
}
// We should find the end tag now
// note that:
// and
//
// are both valid end tags.
if ( StringEqual( p, endTag.c_str(), false, encoding ) )
{
p += endTag.length();
p = SkipWhiteSpace( p, encoding );
if ( p && *p && *p == '>' ) {
++p;
return p;
}
if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
return 0;
}
else
{
if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
return 0;
}
}
//读取元素的属性
else
{
// Try to read an attribute:
TiXmlAttribute* attrib = new TiXmlAttribute();
if ( !attrib )
{
return 0;
}
attrib->SetDocument( document );
pErr = p;
p = attrib->Parse( p, data, encoding );
if ( !p || !*p )
{
if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
delete attrib;
return 0;
}
// Handle the strange case of double attributes:
#ifdef TIXML_USE_STL
TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
#else
TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
#endif
if ( node )
{
if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
delete attrib;
return 0;
}
attributeSet.Add( attrib );
}
}
return p;
}
/**
* @brief TiXmlElement::ReadValue
* @param p
* @param data
* @param encoding
* @return
* 读取元素的值和解析子元素
*
*/
const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
{
TiXmlDocument* document = GetDocument();
// Read in text and elements in any order.
const char* pWithWhiteSpace = p;
p = SkipWhiteSpace( p, encoding );
while ( p && *p )
{
if ( *p != '<' )
{
// Take what we have, make a text element.
TiXmlText* textNode = new TiXmlText( "" );
if ( !textNode )
{
return 0;
}
if ( TiXmlBase::IsWhiteSpaceCondensed() )
{
p = textNode->Parse( p, data, encoding );
}
else
{
// Special case: we want to keep the white space
// so that leading spaces aren't removed.
p = textNode->Parse( pWithWhiteSpace, data, encoding );
}
if ( !textNode->Blank() )
LinkEndChild( textNode );
else
delete textNode;
}
//一个子元素标签的开始,解析子元素
else
{
// We hit a '<'
// Have we hit a new element or an end tag? This could also be
// a TiXmlText in the "CDATA" style.
if ( StringEqual( p, "", false, encoding ) )
{
return p;
}
else
{
TiXmlNode* node = Identify( p, encoding );
if ( node )
{
p = node->Parse( p, data, encoding );
LinkEndChild( node );
}
else
{
return 0;
}
}
}
pWithWhiteSpace = p;
p = SkipWhiteSpace( p, encoding );
}
if ( !p )
{
if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
}
return p;
}