埋点分析,果断搞起,早在oracle的9i推出xmltype类型,对xml在数据库存储提供了有力的支持,但这里先不用xmltype使用clob字段,xmltype11g 还是默认就是clob方式,12c中的
note:
XMLType
stored as CLOB
is deprecated as of Oracle Database 12c Release 1 (12.1).XMLType
tables and columns are now stored as binary XML. clob已经被废弃成不是默认。
xml文件如下:
<?xml version="1.0" encoding="utf-8"?> <client name="iphone" version="2.5.1.0" versiontype="0000"> <user account="18888888888"> <events count_date="2012-08-19"> <event network="WiFi" count="5" durations="20">time_reg</event> <event network="WiFi" count="9">send_soundim</event> </events> <events count_date="2012-08-18"> <event network="WiFi" count="4">send_sys_expression</event> <event network="WiFi" count="5">click_accessory</event> </events> </user> <user account=" noaccount"> <events count_date="2012-08-19"> <event network="CMCC3G" count="2">click_reg_next</event> </events> </user> </client>
自己写的解析代码第一种如下:
select /*+ NO_XML_QUERY_REWRITE */ t.name, t.version, t.versiontype, t.account, t.network, t.count_date, t.cnt, t.duration, t.action from (SELECT XMLQuery('//client' PASSING res RETURNING CONTENT) OBJECT_VALUE FROM xdb.resource_view r where any_path like '%client.xml' and under_path(RES, '/home/SCOTT/demonstrations/introduction') = 1)p, XMLTable('for $i in /client let $a:=$i/@name let $b:=$i/@version let $c:=$i/@versiontype let $d:=$i/user for $j in $i/user let $e:=$j/@account for $x in $i/user/events let $f:= $x/@count_date for $y in $i/user/events/event let $g:= $y/@network let $h:= $y/@count let $k:= $y/@duration let $l:=$y/text() return <client name="{$a}" version="{$b}" versiontype="{$c}" account="{$e}" count_date="{$f}" network="{$g}" count="{$h}" duration="{$k}" action="{$l}" > </client> ' PASSING p.OBJECT_VALUE COLUMNS name VARCHAR2(20) PATH '@name', version VARCHAR2(24) PATH '@version', versiontype VARCHAR2(16) PATH '@versiontype', account VARCHAR2(50) PATH '@account', network VARCHAR2(20) PATH '@network', count_date VARCHAR2(48) PATH '@count_date', cnt number(10) PATH '@count', duration number(10) PATH '@duration', action VARCHAR2(50) PATH '@action' ) t;
第二种:
CREATE OR REPLACE PROCEDURE sp_xmlparser(datestr IN VARCHAR2) AS /* datestr format 'yyyymmdd' * * edited by H.S vsersion 1.0 */ ------------------------------------------------- --以下变量用于获取XML节点的值,对应表中的字段值 --xmlClobData CLOB; client VARCHAR2(20); version VARCHAR2(24); versionType VARCHAR2(16); account VARCHAR2(50); network VARCHAR2(20); action VARCHAR2(50); count_date VARCHAR2(20); cnt INTEGER; duration INTEGER; ------------------------------------------------ --步骤1:定义或确定要解析的XML规则 --步骤2:创建XML解析器实例XMLPARSER.parser,如下: xmlPar XMLPARSER.parser := XMLPARSER.NEWPARSER; --步骤3:定义DOM文档对象,如下: doc xmldom.DOMDocument; --步骤4:定义解析XML所需要的其他对象,如下: lenUser INTEGER; eventItem INTEGER; networkcount INTEGER; eventlen INTEGER; tempNode_event xmldom.DOMNode; eventItem xmldom.DOMNode; clientnode xmldom.DOMNode; usernode xmldom.DOMNode; eventnetwork_node xmldom.DOMNode; unitNodes xmldom.DOMNodeList; eventNodes xmldom.DOMNodeList; userNodes xmldom.DOMNodeList; eventnetwork_Nodes xmldom.DOMNodeList; tempArrMap xmldom.DOMNamedNodeMap; tempArrMap2 xmldom.DOMNamedNodeMap; tempArrMap3 xmldom.DOMNamedNodeMap; tempArrMap4 xmldom.DOMNamedNodeMap; CURSOR cur IS SELECT xmlstring FROM xml_test WHERE loadtime=datestr; v_xmlstring xml_test.xmlstring%TYPE; BEGIN OPEN cur; LOOP FETCH cur INTO v_xmlstring; EXIT WHEN cur%NOTFOUND; --步骤5:获取xml数据,以下假设从数据表的clob字段中获取取,如下: -- SELECT xmlstring INTO xmlClobData FROM xml_test; --步骤6:解析xml数据,如下 xmlPar := xmlparser.newParser; xmlparser.parseClob(xmlPar,v_xmlstring); doc := xmlparser.getDocument(xmlPar); -- 释放解析器实例 xmlparser.freeParser(xmlPar); -- 获取所有client元素 unitNodes := xmldom.getElementsByTagName(doc,'client'); -- 遍历clinet 中的 name=”iphone” version="2.5.1.0" versiontype="0000"属性 clientNode := xmldom.item(unitNodes,0); tempArrMap := xmldom.getAttributes(clientnode); client := xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap,'name')); version := xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap,'version')); versionType := xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap,'versiontype')); --DBMS_OUTPUT.PUT_LINE('client'||lenUnit); --user account子节点个数 userNodes := xmldom.getChildNodes(clientNode); lenUser := xmldom.getLength(userNodes); --DBMS_output.Put_Line('lenuser'||lenUser); --第i个user account子节点 FOR i in 0..lenUser-1 LOOP --获取第i个user account子节点元素值 usernode:=xmldom.item(userNodes,i); tempArrMap2:=xmldom.getAttributes(usernode); account:= xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap2,'account')); --event子节点个数 eventNodes:= xmldom.getChildNodes(usernode); eventlen:=xmldom.getLength(eventNodes); --DBMS_output.Put_Line('eventlen'||eventlen); FOR j in 0..eventlen-1 LOOP --获取j个event子节点 tempNode_event:=xmldom.item(eventNodes,j); --获取第j个event子节点元素值 tempArrMap3 := xmldom.getAttributes(tempNode_event); count_date := xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap3,'count_date')); --event network子节点个数 eventnetwork_Nodes:= xmldom.getChildNodes(tempNode_event); networkcount := xmldom.getLength(eventnetwork_Nodes); --DBMS_output.Put_Line('networkcount'||networkcount); FOR k in 0.. networkcount-1 LOOP eventnetwork_node:=xmldom.item(eventnetwork_Nodes,k); tempArrMap4 := xmldom.getAttributes(eventnetwork_node); --event network子节点元素值 network := xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap4,'network')); cnt:= xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap4,'count')); duration:= xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap4,'duration')); action:= xmldom.getNodeValue(xmldom.getfirstChild(xmldom.item(eventnetwork_Nodes,0))); --DBMS_output.Put_Line(account||' '||count_date||' '||client||' '||version||' '||versionType||' '||network||' '||cnt||' '||duration||' '||action); INSERT INTO mobile_login(account,login_date,client,version,versiontype,network,cnt,duration,action)VALUES(account,count_date,client,version,versionType,network,cnt,duration,action); END LOOP; END LOOP; END LOOP; COMMIT; END LOOP; --步骤7:释放文档对象 xmldom.freeDocument(doc); --步骤8:异常与错误处理 EXCEPTION WHEN OTHERS THEN DBMS_output.PUT_LINE(SQLERRM); END;再写个shell实现加载
#!/bin/sh datestr=`date +%Y%m%d` ls -l ./$datestr|awk '{ if(NR==1){} else print NR-1"|"'$datestr'"|./'$datestr'/"$9}'>xml_list.txt echo " load data infile 'xml_list.txt' append into table SB_250.xml_test fields terminated by '|' trailing nullcols ( file_id integer external, loadtime char, file_name FILLER char, xmlstring LOBFILE(file_name) TERMINATED BY EOF )">control.ctl sqlldr oracle/oracle control=control.ctl if [ $? -eq 0 ]; then echo "$datestr:import successfully.">>xml_import.log else echo "$datestr:import failed.">>xml_import.log fi