嵌入式 Linux环境下C使用的XML解析库:libxml2

1、libxml2库常识

如果对方提供XML接口给我们传递数据,就必须有解析的程序,这也可能是今后数据接口的最通用的办法。经过研究,正如使用C语言来生成页面一样,显然使用C语言解析XML要比PHP和ASP要麻烦很多。
同其它语言一样,解析的方法一般都是调用现有的解析器,因为这样省时省力。PHP4是内置的EXPAT,PHP5是内置的LIBXML2,WIN平台可以调用MSXML。FREEBSD上使用C语言,最流行的就是调用EXPAT和LIBXML2,由于PHP基于某些原因放弃了EXPAT,所以我主要试用了LIBXML2。
  
LIBXML2主页是http://xmlsoft.org

安装过程:(需要ROOT权限)
gunzip -c libxml2-2.6.22.tar.gz | tar xvf -
cd libxml2-2.6.22
./configure
make

su
make install
exit

安装完成后就可以使用简单的代码解析XML文件,包括本地和远程的文件,但是在编码上有一些问题。LIBXML默认只支持UTF-8的编码,无论输入输出都是UTF-8,所以如果你解析完一个XML得到的结果都是UTF-8的,如果需要输出GB2312或者其它编码,需要ICONV来做转码(生成UTF-8编码的文件也可以用它做)。

ICONV的安装过程和LIBXML2一样。

//test.c

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
#include<iconv.h>

//
//


static char s_strBufOut[1024];
char *d_ConvertCharset(char *cpEncodeFrom, char *cpEncodeTo, const char *cpInput)
{

  char *cpOut;
  size_t iInputLen, iOutLen, iReturn;

  iconv_t c_pt;
 if ((c_pt = iconv_open(cpEncodeTo, cpEncodeFrom)) == (iconv_t)-2)
  {
    printf("iconv_open failed!\n");
    return NULL;
  }
  iconv(c_pt, NULL, NULL, NULL, NULL);

  iInputLen = strlen(cpInput) + 1;
  iOutLen = 1024;
  cpOut = s_strBufOut;
  iReturn = iconv(c_pt, &cpInput, &iInputLen, &cpOut, &iOutLen);

  if (iReturn == -1)
  {
    return NULL;
  }
 
  iconv_close(c_pt);
  return s_strBufOut;
}

//输出每一项的内容,使用GB2312编码输出

void parseItem (xmlDocPtr doc, xmlNodePtr cur)
{
  struct user
  {
    char id[20];
    char name[64];
    char password[64];
  };
  struct user user[2];
 
  xmlChar *key;
 

  cur = cur->xmlChildrenNode;
 
 
 
  while (cur != NULL)
  {
    int i;
     for(i=0;i<2;i++)
    {
      if ((!xmlStrcmp(cur->name, (const xmlChar *)"userid")))
     {
       key = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
//     printf("userid: %s\n", d_ConvertCharset("utf-8", "gb2312", (char *)key));
       char *p = d_ConvertCharset("utf-8","gb2312",(char *)key);
       memcpy(user[i].id,p,strlen (p)+1);
       xmlFree(key);
      }   
      else if((!xmlStrcmp(cur->name, (const xmlChar *)"username")))
       {
        key = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
//      printf("user name: %s\n", d_ConvertCharset("utf-8", "gb2312", (char *)key));
        char *p = d_ConvertCharset("utf-8","gb2312",(char *)key);
        memcpy(user[i].name,p,strlen (p)+1);
   
  //    printf("name= %s\n",user1.name);
  //    printf("id= %s\n",user1.id);
        xmlFree(key);
       }   
  
       else if ((!xmlStrcmp(cur->name, (const xmlChar *)"password")))
       {
         key = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
//       printf("password: %s\n", d_ConvertCharset("utf-8", "gb2312", (char *)key));
         char *p = d_ConvertCharset("utf-8","gb2312",(char *)key);
  
         memcpy(user[i].password,p,strlen(p)+1);
  //     printf("id= %s\n",user1.id);
  //     printf("name= %s\n",user1.name);
  //     printf("passord= %s\n",user1.password);
  //     xmlFree(key);
        }
      }
    
    cur = cur->next;
   
  } 
    int j;
 for(j=0;j<2;j++)
 {
  printf("结构体信息:\n");
  printf("id:%s name:%s password:%s\n",user[j].id,user[j].name,user[j].password);

  }

return;
}

void parseDoc(char *docname)
{

  xmlDocPtr doc;  //解析树
  xmlNodePtr cur;  //当前节点
 
  doc = xmlParseFile(docname);
 
  if (doc == NULL )
  {
    fprintf(stderr,"Document not parsed successfully. \n");
    return;
  }
 
  //得到根节点
  cur = xmlDocGetRootElement(doc);
 
  if (cur == NULL)
  {
    fprintf(stderr,"empty document\n");
    xmlFreeDoc(doc);
    return;
  }
 
  //判断根节点是不是mp3
  if (xmlStrcmp(cur->name, (const xmlChar *) "Authentication"))
  {
    fprintf(stderr,"document of the wrong type, root node != Authentication");
    xmlFreeDoc(doc);
    return;
  }
 
  //得到当前节点的第一个子节点,即第一个ITEM
  cur = cur->xmlChildrenNode;
 
 
  while (cur != NULL)
  {
   if ((!xmlStrcmp(cur->name, (const xmlChar *)"user")))
    {
      //输出每个ITEM
      printf("用户信息\n");
      parseItem (doc, cur);
      printf("heeell\n");
     
    }
    
   
                           
  else  if((!xmlStrcmp(cur->name,(const xmlChar *)"right")))
   { 
     printf("权限信息\n");
    
  struct role
  {
    char name[64];
    char password[64];
  };

 
 struct role role1;

  xmlChar *key;
  cur = cur->xmlChildrenNode;
   while(cur !=NULL)
   {
      if((!xmlStrcmp(cur->name,(const xmlChar *)"username")))
        {
           key = xmlNodeListGetString(doc, cur->xmlChildrenNode,1);
           printf("username: %s\n",d_ConvertCharset("utf-8","gb2312",(char *)key));
       
          char* p = d_ConvertCharset("utf-8","gb2312",(char *)key);
        memcpy(role1.name, p, strlen (p) + 1);
           
          xmlFree(key);
        }
       else if((!xmlStrcmp(cur->name,(const xmlChar *)"password")))
        {
            key = xmlNodeListGetString(doc,cur->xmlChildrenNode,1);
            printf("password: %s\n",d_ConvertCharset("utf-8","gb2312",(char *)key)); 
            char* p = d_ConvertCharset("utf-8","gb2312",(char *)key);
        memcpy(role1.password, p, strlen (p) + 1);
          }
        cur = cur->next;

  }
       
     return;
   }
  
   cur = cur->next;
  }
  xmlFreeDoc(doc);
  return;
}

//入参可以是一个文件,也可以是一个URL,要求必须是UTF-8编码
int main(int argc, char **argv)
{
  char *docname;
   
  if (argc <= 1)
  {
    printf("Usage: %s docname\n", argv[0]);
    return(0);
  }

  docname = argv[1];
  parseDoc (docname);

  return 0;
}

xml文件

<?xml version="1.0" encoding="UTF-8" ?>

- <Authentication>
- <user>
  <userid>001</userid>
  <username>root</username>
  <password>root</password>
  </user>
- <user>
  <userid>002</userid>
  <username>ctu</username>
  <password>ctu</password>
  </user>
- <right>
  <username>root</username>
  <password>123</password>
  </right>
  </Authentication>

调试 gcc -o test test.c -I /usr/local/include/libxml2/ -L /usr/local/lib -lxml2 -lz
执行 /test xml文件名.xml


2、库的安装以及小示例一

一、库的安装:
下载libxml2,http://xmlsoft.org/sources/,貌似最新的是2.7.2。但是一查发现暴了个漏洞绿盟发的:http://www.nsfocus.net/vulndb/12695 自己用无所谓,商业的就得考虑考虑了。
经典的./configure&&make&&make install。库就安装好了。
头文件在/usr/local/include/libxml2/libxml下,库文件在/usr/local/lib下。
写个helloworld测试一下能用不?
      1 #include <stdio.h>
      2 #include <libxml/parser.h>
      3 
      4 int main(int argc,char **argv)
      5 {
      6     xmlDocPtr doc;
      7     char *xmlfilename;
      8 
      9     if(argv[1] == NULL)
     10     {
     11         fprintf(stderr,"Give me a xml document!\n");
     12         exit(-1);
     13     }
     14     else
     15         xmlfilename = argv[1];
     16 
     17 /*先以GB2312编码打开,有可能是UTF—8 */
     18     doc = xmlReadFile(xmlfilename,"GB2312",XML_PARSE_RECOVER);
     19 
     20     return 0;
     21 }
保存为test.c
然后编译:$gcc -o test test.c -I /usr/local/include/libxml2/ -L /usr/local/lib -lxml2 -lz
如果编译成功,那就是安装成功了。
二、一个解析的例子。
没用到多复杂的东西,就写了个解析的函数。所解析的文件就是前面所贴的。我想要的就是把file字段的printf出来就OK了。
代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <libxml/parser.h>
#include <libxml/xmlmemory.h>
#include <iconv.h>
int main(int argc,char **argv)
{
    xmlDocPtr doc;
    xmlNodePtr curNode;
    xmlChar *szKey;
    char *szDocName;
    szDocName=argv[1];
    doc = xmlReadFile(szDocName,"GB2312",XML_PARSE_RECOVER);
    if(NULL == doc)
    {
        fprintf(stderr,"Document not parsed sucessfully!\n");
        exit(-1);
    }
    curNode = xmlDocGetRootElement(doc);    
    if(NULL == curNode)
    {
        fprintf(stderr,"Empty document!\n");    
        xmlFreeDoc(doc);
        exit(-1);
    }
    if(xmlStrcmp(curNode->name,BAD_CAST"config"))
    {
        fprintf(stderr,"Document of the wrong type,root node != \"config\"");
        xmlFreeDoc(doc);
    }
    curNode = curNode->xmlChildrenNode;
    xmlNodePtr propNodePtr = curNode;
    
    while(curNode != NULL)
    {
        if (!(xmlStrcmp(curNode->name,(const xmlChar *)"file")))
        {
            szKey = xmlNodeGetContent(curNode);
            printf("file:%s\n",szKey);
            xmlFree(szKey);
        }
        curNode = curNode->next;
    }
    xmlFreeDoc(doc);
    return 0;
}
问题的继续。
1W多个文件怎么弄?
我的思路是扫描一下目录,将xml文件遍历一下交给解析函数,解析函数printf出来,然后在shell里面 ">"重定向到一个文件就OK了。
代码总共下来150行就搞定了.运行还顺。上来趟厕所回来就OK了。

三、小示例三

D. XPath例程代码
#include <libxml/parser.h>
#include <libxml/xpath.h>
xmlDocPtr
getdoc (char *docname) {
xmlDocPtr doc;
doc = xmlParseFile(docname);
if (doc == NULL ) {
fprintf(stderr,"Document not parsed successfully. \n");
return NULL;
}
return doc;
}
xmlXPathObjectPtr
getnodeset (xmlDocPtr doc, xmlChar *xpath){
xmlXPathContextPtr context;
xmlXPathObjectPtr result;
context = xmlXPathNewContext(doc);
result = xmlXPathEval;
if(xmlXPathNodeSetIsEmpty(result->nodesetval)){
printf("No result\n");
return NULL;
}
xmlXPathFreeContext(context);
return result;
}
int
main(int argc, char **argv) {
char *docname;
xmlDocPtr doc;
xmlChar *xpath = ("//keyword");
xmlNodeSetPtr nodeset;
xmlXPathObjectPtr result;
int i;
xmlChar *keyword;
if (argc <= 1) {
printf("Usage: %s docname\n", argv[0]);
return(0);
}
docname = argv[1];
doc = getdoc(docname);
result = getnodeset (doc, xpath);
if (result) {
nodeset = result->nodesetval;
for (i=0; i < nodeset->nodeNr; i++) {
keyword = xmlNodeListGetString(doc, nodeset->nodeTab[i]->printf
("keyword: %s\n", keyword);
xmlFree(keyword);
}
xmlXPathFreeObject (result);
}
xmlFreeDoc(doc);
xmlCleanupParser();
return (1);
}
E. 添加keyword例程代码
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
void
parseStory (xmlDocPtr doc, xmlNodePtr cur, char *keyword) {
xmlNewTextChild (cur, NULL, "keyword", keyword);
return;
}
xmlDocPtr
parseDoc(char *docname, char *keyword) {
xmlDocPtr doc;
xmlNodePtr cur;
doc = xmlParseFile(docname);
if (doc == NULL ) {
fprintf(stderr,"Document not parsed successfully. \n");
return (NULL);
}
cur = xmlDocGetRootElement(doc);
if (cur == NULL) {
fprintf(stderr,"empty document\n");
xmlFreeDoc(doc);
return (NULL);
}
if (xmlStrcmp(cur->name, (const xmlChar *) "story")) {
fprintf(stderr,"document of the wrong type, root node != story");
xmlFreeDoc(doc);
return (NULL);
}
cur = cur->xmlChildrenNode;
while (cur != NULL) {
if ((!xmlStrcmp(cur->name, (const xmlChar *)"storyinfo"))){
parseStory (doc, cur, keyword);
}
cur = cur->next;
}
return(doc);
}
int
main(int argc, char **argv) {
char *docname;
char *keyword;
xmlDocPtr doc;
if (argc <= 2) {
printf("Usage: %s docname, keyword\n", argv[0]);
return(0);
}
docname = argv[1];
keyword = argv[2];
doc = parseDoc (docname, keyword);
if (doc != NULL) {
xmlSaveFormatFile (docname, doc, 0);
xmlFreeDoc(doc);
}
return (1);
}
F. 添加属性例程代码
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
xmlDocPtr
parseDoc(char *docname, char *uri) {
xmlDocPtr doc;
xmlNodePtr cur;
xmlNodePtr newnode;
xmlAttrPtr newattr;
doc = xmlParseFile(docname);
if (doc == NULL ) {
fprintf(stderr,"Document not parsed successfully. \n");
return (NULL);
}
cur = xmlDocGetRootElement(doc);
if (cur == NULL) {
fprintf(stderr,"empty document\n");
xmlFreeDoc(doc);
return (NULL);
}
if (xmlStrcmp(cur->name, (const xmlChar *) "story")) {
fprintf(stderr,"document of the wrong type, root node != story");
xmlFreeDoc(doc);
return (NULL);
}
newnode = xmlNewTextChild (cur, NULL, "reference", NULL);
newattr = xmlNewProp (newnode, "uri", uri);
return(doc);
}
int
main(int argc, char **argv) {
char *docname;
char *uri;
xmlDocPtr doc;
if (argc <= 2) {
printf("Usage: %s docname, uri\n", argv[0]);
return(0);
}
docname = argv[1];
uri = argv[2];
doc = parseDoc (docname, uri);
if (doc != NULL) {
xmlSaveFormatFile (docname, doc, 1);
xmlFreeDoc(doc);
}
return (1);
}
G. 取得属性值例程代码
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
void
getReference (xmlDocPtr doc, xmlNodePtr cur) {
xmlChar *uri;
cur = cur->xmlChildrenNode;
while (cur != NULL) {
if ((!xmlStrcmp(cur->name, (const xmlChar *)"reference"))) {
uri = xmlGetProp(cur, "uri");
printf("uri: %s\n", uri);
xmlFree(uri);
}
cur = cur->next;
}
return;
}
void
parseDoc(char *docname) {
xmlDocPtr doc;
xmlNodePtr cur;
doc = xmlParseFile(docname);
if (doc == NULL ) {
fprintf(stderr,"Document not parsed successfully. \n");
return;
}
cur = xmlDocGetRootElement(doc);
if (cur == NULL) {
fprintf(stderr,"empty document\n");
xmlFreeDoc(doc);
return;
}
if (xmlStrcmp(cur->name, (const xmlChar *) "story")) {
fprintf(stderr,"document of the wrong type, root node != story");
xmlFreeDoc(doc);
return;
}
getReference (doc, cur);
xmlFreeDoc(doc);
return;
}
int
main(int argc, char **argv) {
char *docname;
if (argc <= 1) {
printf("Usage: %s docname\n", argv[0]);
return(0);
}
docname = argv[1];
parseDoc (docname);
return (1);
}
H. 编码转换例程代码
#include <string.h>
#include <libxml/parser.h>
unsigned char*
convert (unsigned char *in, char *encoding)
{
unsigned char *out;
int ret,size,out_size,temp;
xmlCharEncodingHandlerPtr handler;
size = (int)strlen(in)+1;
out_size = size*2-1;
out = malloc((size_t)out_size);
if (out) {
handler = xmlFindCharEncodingHandler(encoding);
if (!handler) {
free(out);
out = NULL;
}
}
if (out) {
temp=size-1;
ret = handler->input(out, &out_size, in, &temp);
if (ret || temp-size+1) {
if (ret) {
printf("conversion wasn't successful.\n");
} else {
printf("conversion wasn't successful. converted: }
free(out);
out = NULL;
} else {
out = realloc(out,out_size+1);
out[out_size]=0; /*null terminating out*/
}
} else {
printf("no mem\n");
}
return (out);
}
int
main(int argc, char **argv) {
unsigned char *content, *out;
xmlDocPtr doc;
xmlNodePtr rootnode;
char *encoding = "ISO-8859-1";
if (argc <= 1) {
printf("Usage: %s content\n", argv[0]);
return(0);
}
content = argv[1];
out = convert(content, encoding);
doc = xmlNewDoc ("1.0");
rootnode = xmlNewDocNode(doc, NULL, (const xmlChar*)"root", out);
xmlDocSetRootElement(doc, rootnode);
xmlSaveFormatFileEnc("-", doc, encoding, 1);
return (1);
}
…………………………………………………………………………………………
char *convert(char *instr,char *encoding)
{
xmlCharEncodingHandlerPtr handler;
xmlBufferPtr in,out;
handler = xmlFindCharEncodingHandler(encoding);
if(NULL != handler)
{
in = xmlBufferCreate();
xmlBufferWriteChar(in,instr);
out = xmlBufferCreate();
if(xmlCharEncInFunc(handler, out, in)
{
xmlBufferFree(in);
xmlBufferFree(out);
return NULL;
}
else
{
xmlBufferFree(in);
return (char *)out-〉content;
}
}
}

你可能感兴趣的:(嵌入式 Linux环境下C使用的XML解析库:libxml2)