要实现对XML文件的解析,首先需要使用github上面的开源组件tinyxml2,将tinyxml2中的tinyxml2.cpp
与tinyxml2.h
同时复制到项目下面即可。
tinyxml2 github链接
整个的使用方法与log4cpp的使用方法比较相似,通过程序创建好的XML文件如下:
data.xml
:
<?xml version="1.0" encoding="UTF-8"?>
<School SchoolName="SWJTU">
<Student StudentName="Worthy">
<Age>23</Age>
<Grade>99.5</Grade>
</Student>
<Student StudentName="John">
<Age>25</Age>
<Grade>100</Grade>
</Student>
</School>
终端的输出如下:
wwx@linux:~/week3/parseXML$ g++ main.cpp tinyxml2.cpp -o main
wwx@linux:~/week3/parseXML$ ./main
School SchoolName=SWJTU
Student StudentName=Worthy
Age:23
Grade:99.5
Student StudentName=John
Age:25
Grade:100
接下来就是完整的程序,可以仔细研究:
#include "tinyxml2.h"
#include
using namespace tinyxml2;
using namespace std;
bool createXml(const char* path) {
XMLDocument *doc = new XMLDocument();
XMLDeclaration *declaration = doc->NewDeclaration("xml version=\"1.0\" encoding=\"UTF-8\"");
doc->LinkEndChild(declaration);
//School
XMLElement *School = doc->NewElement("School");
School->SetAttribute("SchoolName", "SWJTU");
doc->LinkEndChild(School);
//School->Student1
XMLElement *Student1 = doc->NewElement("Student");
Student1->SetAttribute("StudentName", "Worthy");
School->LinkEndChild(Student1);
//Age, Grade
XMLElement *Age1 = doc->NewElement("Age");
XMLText *ageText1 = doc->NewText("23");
Age1->LinkEndChild(ageText1);
XMLElement *Grade1 = doc->NewElement("Grade");
XMLText *gradeText1 = doc->NewText("99.5");
Grade1->LinkEndChild(gradeText1);
Student1->LinkEndChild(Age1);
Student1->LinkEndChild(Grade1);
//School->Student2
XMLElement *Student2 = doc->NewElement("Student");
Student2->SetAttribute("StudentName", "John");
School->LinkEndChild(Student2);
//Age, Grade
XMLElement *Age2 = doc->NewElement("Age");
XMLText *ageText2 = doc->NewText("25");
Age2->LinkEndChild(ageText2);
XMLElement *Grade2 = doc->NewElement("Grade");
XMLText *gradeText2 = doc->NewText("100");
Grade2->LinkEndChild(gradeText2);
Student2->LinkEndChild(Age2);
Student2->LinkEndChild(Grade2);
if (XML_SUCCESS != doc->SaveFile(path)) {
cout << "SaveFile ERROR!" << endl;
delete doc;
return false;
}
delete doc;
return true;
}
bool readXml(const char* path) {
XMLDocument doc;
if (doc.LoadFile(path) != XML_SUCCESS) {
cout << "LoadFile ERROR!" << endl;
return false;
}
//get Root Element
XMLElement *Root = doc.RootElement();
cout << Root->Name() << " SchoolName=" << Root->Attribute("SchoolName") << endl;
//Tree Structure , from First Layer to Second Layer
XMLElement *First = Root->FirstChildElement();
while (First) {
cout << First->Name() << "\tStudentName=" << First->Attribute("StudentName") << endl;
XMLElement *Second = First->FirstChildElement();
while (Second) {
cout << "\t\t" << Second->Name() << ":"<< Second->GetText() << endl;
Second = Second->NextSiblingElement();
}
First = First->NextSiblingElement();
}
return true;
}
int main()
{
const char * path = "data.xml";
createXml(path);
readXml(path);
return 0;
}
XML文件的内部组成和树的结构相似,从根节点开始不断往下延申,要想遍历XML文件中的所有信息,就需要用到遍历树节点的DFS方法。
#include "tinyxml2.h"
#include
#include
using namespace tinyxml2;
using namespace std;
void show_all_node(XMLElement *tmpnode, int presize)
{
while(tmpnode)
{
/******************获取节点名与节点属性*********************/
cout<< setw(presize)<< "<"<<tmpnode->Name() << " ";
//获取节点属性第一个
const XMLAttribute* tmp_attr = tmpnode->FirstAttribute();
//遍历整个属性列表
while(tmp_attr)
{
cout<< tmp_attr->Name()<<"="<<tmp_attr->Value()<<" ";
//获取下一个属性节点
tmp_attr=tmp_attr->Next();
}
cout << ">" << endl;
/******************获取节点信息Text*********************/
if(tmpnode->GetText())//一定要判断不然会有问题,如果文本为空的话,会打印text=后直接结束进程
cout<< setw(presize+3) << tmpnode->GetText()<<" "<<endl;
show_all_node(tmpnode->FirstChildElement(), presize+2);//递归掉用,打印子节点所有属性和文本信息
/******************节点闭合信息以及转移节点*********************/
cout<< setw(presize) <<"" <<tmpnode->Name()<< ">" << endl;
//获取同级别的下一个兄弟元素
tmpnode=tmpnode->NextSiblingElement();
}
}
bool readXml(const char* path) {
XMLDocument doc;
if (doc.LoadFile(path) != XML_SUCCESS) {
cout << "LoadFile ERROR!" << endl;
return false;
}
//get Root Element
XMLElement *Root = doc.RootElement();
//Tree Structure , from First Layer to Second Layer
show_all_node(Root, 0);
return true;
}
int main()
{
const char * path = "coolshell.xml";
readXml(path);
return 0;
}
RssReader.hpp
:
#ifndef _RSSREADER_H_
#define _RSSREADER_H_
#include "tinyxml2.h"
#include
#include
#include
#include
#include
#include
#include
using std::cout;
using std::endl;
using std::ofstream;
using std::regex;
using std::regex_replace;
using std::string;
using std::stringstream;
using std::vector;
using namespace tinyxml2;
struct RssItem
{
string title;
string link;
string description;
string content;
};
class RssReader
{
public:
void DFS(XMLElement *);
bool parseRss(const char *filename); //解析
void dump(const string &filename); //输出
private:
vector<RssItem> _rss;
};
void RssReader::DFS(XMLElement *Root)
{
while (Root)
{
if (!strcmp(Root->Name(), "item"))
{
RssItem it;
XMLElement *Title = Root->FirstChildElement("title");
XMLElement *Link = Root->FirstChildElement("link");
XMLElement *Description = Root->FirstChildElement("description");
XMLElement *Content = Root->FirstChildElement("content:encoded");
if (Title->GetText())
{
stringstream ss;
string s = Title->GetText();
regex e("(<)[^>]*(>)"); // 这样的形式全部去掉
ss << regex_replace(s, e, " "); //换为 " "
it.title = ss.str();
}
if (Link->GetText())
{
stringstream ss;
string s = Link->GetText();
regex e("(<)[^>]*(>)"); // 这样的形式全部去掉
ss << regex_replace(s, e, " "); //换为 " "
it.link = ss.str();
}
if (Description->GetText())
{
stringstream ss;
string s = Description->GetText();
regex e("(<)[^>]*(>)"); // 这样的形式全部去掉
ss << regex_replace(s, e, " "); //换为 " "
it.description = ss.str();
}
if (Content->GetText())
{
stringstream ss;
string s = Content->GetText();
regex e("(<)[^>]*(>)"); // 这样的形式全部去掉
ss << regex_replace(s, e, " "); //换为 " "
it.content = ss.str();
}
_rss.push_back(it);
}
DFS(Root->FirstChildElement());
Root = Root->NextSiblingElement();
}
}
bool RssReader::parseRss(const char *filename)
{
XMLDocument doc;
if (doc.LoadFile(filename) != XML_SUCCESS)
{
cout << "LoadFile ERROR!" << endl;
return false;
}
//get Root Element
XMLElement *Root = doc.RootElement();
DFS(Root);
return true;
}
void RssReader::dump(const string &filename)
{
ofstream ofs(filename, ofstream::out);
if (!ofs)
{
cout << "open fileERROR!" << endl;
exit(-1);
}
for (int i = 0; i < _rss.size(); i++)
{
ofs << "" << endl;
ofs << "\t " << i << "" << endl;
ofs << "\t " << _rss[i].title << "" << endl;
ofs << "\t " << _rss[i].link << "" << endl;
ofs << "\t " << _rss[i].description << "" << endl;
ofs << "\t " << _rss[i].content << "" << endl;
ofs << "" << endl;
ofs << endl
<< endl
<< endl;
}
ofs.close();
}
#endif
main.cpp
:
#include "RssReader.h"
int main(){
RssReader reader;
reader.parseRss("coolshell.xml");//你需要解析的xml文件
reader.dump("pagelib.txt"); //结果输出到文件钟
return 0;
}