libxml是一个实现了读,创建和操作xml数据等功能的c语言库,对于其具体的API,可参考官方文档,这里只介绍一些常用的用法。
libxml在操作xml数据时,定义了几种节点类型:
Enum xmlElementType { XML_ELEMENT_NODE = 1 XML_ATTRIBUTE_NODE = 2 XML_TEXT_NODE = 3 XML_CDATA_SECTION_NODE = 4 XML_ENTITY_REF_NODE = 5 XML_ENTITY_NODE = 6 XML_PI_NODE = 7 XML_COMMENT_NODE = 8 XML_DOCUMENT_NODE = 9 XML_DOCUMENT_TYPE_NODE = 10 XML_DOCUMENT_FRAG_NODE = 11 XML_NOTATION_NODE = 12 XML_HTML_DOCUMENT_NODE = 13 XML_DTD_NODE = 14 XML_ELEMENT_DECL = 15 XML_ATTRIBUTE_DECL = 16 XML_ENTITY_DECL = 17 XML_NAMESPACE_DECL = 18 XML_XINCLUDE_START = 19 XML_XINCLUDE_END = 20 XML_DOCB_DOCUMENT_NODE = 21 }
比较常用的是XML_ELEMENT_NODE,XML_TEXT_NODE和XML_ATTRIBUTE_NODE,可称为元素节点,文本节点和属性节点;它们都是xmlNode结构体类型的,且可以通过curnode->type来获得类型。这几种节点分别对应xml的不同数据,以下面xml文档来说明,story和storyinfo是元素节点,而John Fleck就是文本节点。在结构上John Fleck文本节点是<author>元素节点的子节点。
测试xml:
<?xml version="1.0"?> <story> <storyinfo> <author>John Fleck</author> <datewritten>June 2, 2002</datewritten> <keyword>example keyword</keyword> </storyinfo> <body> <headline>This is the headline</headline> <para>This is the body text.</para> </body> </story>
1. 解析文档:
xmlDocPtr doc; xmlNodePtr cur; doc = xmlParseFile(docname); if (doc == NULL ) { fprintf(stderr,"Document not parsed successfully. \n"); return; } cur = xmlDocGetRootElement(doc); if (cur == NULL) { fprintf(stderr,"empty document\n"); xmlFreeDoc(doc); return; } if (xmlStrcmp(cur->name, (const xmlChar *) "story")) { fprintf(stderr,"document of the wrong type, root node != story"); xmlFreeDoc(doc); return; }
2. 检索节点:
void parseStory (xmlDocPtr doc, xmlNodePtr cur) { xmlChar *key; cur = cur->xmlChildrenNode; while (cur != NULL) { if ((!xmlStrcmp(cur->name, (const xmlChar *)"keyword"))) { key = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1); printf("keyword: %s\n", key); xmlFree(key); } cur = cur->next; } return; }
3. XPATH方式检索文档:
除了遍历文档树来寻找某个节点外,libxml2还支持使用XPATH表达式来寻找符合指定搜索规则的节点集。xpath的内容可参见:http://www.w3schools.com/xpath/xpath_operators.asp 和 http://www.w3.org/TR/xpath/ 。测试可参:http://www.xpathtester.com/test 。这里是几个简单的用法,"/Infomation/CameraSet"取的是绝对路径,"//CameraItem"取的是相对路径,数字可直接用等号比较,文本则要用text()取其内容进行比较。
char expr[128]; sprintf(expr, "/Information/CameraSet/CameraItem[IP[text()='%s']]", pItem->sIP); sprintf(expr, "/Information/CameraSet/CameraItem[Index=%d]/VarTrafficLine", iIndex); xpObjPtr = getnodeset(doc, (xmlChar*)expr);
4. 添加节点和获取文本:
设置元素节点的文本可使用xmlNodeSetContent(cur, (const xmlChar*)"sssss");也有其他方式,如下面代码中。获取文本:xmlNodeGetContent(cur)和xmlNodeListGetString(doc, nodelist, inline);第一个方法获取的是cur的子孙节点中类型是TEXT或者ENTITY_REF的节点的内容拼接的字符串;第二个方法只取当前节点链表中类型是TEXT或者ENTITY_REF的节点的内容。
newnode = xmlNewNode(NULL, (const xmlChar*)"NewNode"); xmlAddChild(cur, newnode); xmlNewTextChild(newnode, NULL, (const xmlChar*)"Code", (const xmlChar*)"1001"); xmlNewTextChild(newnode, NULL, (const xmlChar*)"Name", (const xmlChar*)"anewnode"); textnode = xmlNewText((const xmlChar*)"abcdefg"); //向已存在的元素节点添加文本 xmlAddChild(cur, textnode);
5. 删除节点:
xmlUnlinkNode(cur); xmlFreeNode(cur);
这样删除指定节点后,会自动形成新的节点树结构,而不会打乱原来的结构,如:删除<author>节点后,<storyinfo>的第一个子节点就变成<datewritten>,<datewritten>的前驱节点是NULL,而不是删除前的<author>了。
代码:
#include <libxml/parser.h> #include <libxml/xpath.h> xmlDocPtr getdoc (char *docname) { xmlDocPtr doc; doc = xmlParseFile(docname); if (doc == NULL ) { fprintf(stderr,"Document not parsed successfully. \n"); return NULL; } return doc; } xmlXPathObjectPtr getnodeset (xmlDocPtr doc, xmlChar *xpath){ xmlXPathContextPtr context; xmlXPathObjectPtr result; context = xmlXPathNewContext(doc); if (context == NULL) { printf("Error in xmlXPathNewContext\n"); return NULL; } result = xmlXPathEvalExpression(xpath, context); xmlXPathFreeContext(context); if (result == NULL) { printf("Error in xmlXPathEvalExpression\n"); return NULL; } if(xmlXPathNodeSetIsEmpty(result->nodesetval)){ xmlXPathFreeObject(result); printf("No result\n"); return NULL; } return result; } int main(int argc, char **argv) { char *docname; xmlDocPtr doc; xmlChar *xpath = (xmlChar*) "//root"; xmlNodeSetPtr nodeset; xmlXPathObjectPtr result; int i; xmlChar *keyword; if (argc <= 1) { printf("Usage: %s docname\n", argv[0]); return(0); } docname = argv[1]; doc = getdoc(docname); result = getnodeset (doc, xpath); if (result) { nodeset = result->nodesetval; for (i=0; i < nodeset->nodeNr; i++) { keyword = xmlNodeListGetString(doc, nodeset->nodeTab[i]->xmlChildrenNode,1); printf("keyword1: %s\n", keyword); xmlFree(keyword); } xmlXPathFreeObject (result); } xmlFreeDoc(doc); xmlCleanupParser(); return (1); }