XML解析

埋点分析,果断搞起,早在oracle的9i推出xmltype类型,对xml在数据库存储提供了有力的支持,但这里先不用xmltype使用clob字段,xmltype11g 还是默认就是clob方式,12c中的

note:

XMLType stored as CLOB is deprecated as of Oracle Database 12c Release 1 (12.1).XMLType tables and columns are now stored as binary XML. clob已经被废弃成不是默认。

xml文件如下:

<?xml version="1.0" encoding="utf-8"?>
<client name="iphone" version="2.5.1.0" versiontype="0000">
<user account="18888888888">
	<events count_date="2012-08-19">
		<event network="WiFi" count="5" durations="20">time_reg</event>
		<event network="WiFi" count="9">send_soundim</event>
	</events>
	<events count_date="2012-08-18">
		<event network="WiFi" count="4">send_sys_expression</event>
		<event network="WiFi" count="5">click_accessory</event>	
	</events>
</user>
<user account=" noaccount">
	<events count_date="2012-08-19">
		<event network="CMCC3G" count="2">click_reg_next</event>
	</events>
</user>
</client>

自己写的解析代码第一种如下:

  select   /*+ NO_XML_QUERY_REWRITE */ t.name,
   t.version,
   t.versiontype,
   t.account,
   t.network,
   t.count_date,
   t.cnt, 
   t.duration,
   t.action
from
(SELECT XMLQuery('//client' PASSING res  RETURNING CONTENT) OBJECT_VALUE
FROM xdb.resource_view r where any_path like '%client.xml'
and under_path(RES, '/home/SCOTT/demonstrations/introduction') = 1)p,
 XMLTable('for $i in /client
              let $a:=$i/@name
              let $b:=$i/@version
              let $c:=$i/@versiontype
              let $d:=$i/user
              for $j in $i/user
              let $e:=$j/@account
              for $x in $i/user/events
              let $f:= $x/@count_date
              for $y in $i/user/events/event
              let $g:= $y/@network
              let $h:= $y/@count
              let $k:= $y/@duration
              let $l:=$y/text()
                 return <client name="{$a}" version="{$b}" versiontype="{$c}" account="{$e}" count_date="{$f}" network="{$g}" count="{$h}" duration="{$k}" action="{$l}" > 
                        </client> ' PASSING p.OBJECT_VALUE
          COLUMNS name          VARCHAR2(20)       PATH '@name',
                  version          VARCHAR2(24)   PATH '@version',
                  versiontype      VARCHAR2(16)   PATH '@versiontype',
                  account          VARCHAR2(50)   PATH '@account',
                  network          VARCHAR2(20)   PATH '@network',
                  count_date       VARCHAR2(48)   PATH '@count_date',
                  cnt              number(10)     PATH '@count',
                  duration         number(10)     PATH  '@duration',
                  action           VARCHAR2(50)   PATH '@action'
) t;

第二种:

CREATE OR REPLACE PROCEDURE sp_xmlparser(datestr IN VARCHAR2)
AS
/* datestr format 'yyyymmdd'
*
* edited by H.S vsersion 1.0
*/ 
-------------------------------------------------
  --以下变量用于获取XML节点的值,对应表中的字段值
    --xmlClobData CLOB;
    client VARCHAR2(20);
    version VARCHAR2(24);
    versionType VARCHAR2(16);
    account VARCHAR2(50);
    network VARCHAR2(20);
    action VARCHAR2(50);
    count_date VARCHAR2(20);
    cnt INTEGER;
    duration INTEGER;
------------------------------------------------
--步骤1:定义或确定要解析的XML规则
--步骤2:创建XML解析器实例XMLPARSER.parser,如下:
 xmlPar XMLPARSER.parser := XMLPARSER.NEWPARSER;
--步骤3:定义DOM文档对象,如下:
 doc xmldom.DOMDocument;
--步骤4:定义解析XML所需要的其他对象,如下:
 lenUser INTEGER;
 eventItem INTEGER;
 networkcount INTEGER;
 eventlen INTEGER;
 tempNode_event xmldom.DOMNode;
 eventItem  xmldom.DOMNode;
 clientnode  xmldom.DOMNode;
 usernode xmldom.DOMNode;
 eventnetwork_node xmldom.DOMNode;
 unitNodes xmldom.DOMNodeList;
 eventNodes xmldom.DOMNodeList;
 userNodes xmldom.DOMNodeList;
 eventnetwork_Nodes xmldom.DOMNodeList;
 tempArrMap xmldom.DOMNamedNodeMap;
 tempArrMap2 xmldom.DOMNamedNodeMap;
 tempArrMap3 xmldom.DOMNamedNodeMap;
 tempArrMap4 xmldom.DOMNamedNodeMap;
 CURSOR cur IS SELECT xmlstring FROM xml_test WHERE loadtime=datestr;
 v_xmlstring xml_test.xmlstring%TYPE;
 BEGIN 
  OPEN cur;
  LOOP
    FETCH cur INTO v_xmlstring;
    EXIT WHEN cur%NOTFOUND;
--步骤5:获取xml数据,以下假设从数据表的clob字段中获取取,如下:
 -- SELECT xmlstring INTO xmlClobData FROM xml_test;
 --步骤6:解析xml数据,如下
  xmlPar := xmlparser.newParser;  
  xmlparser.parseClob(xmlPar,v_xmlstring);
  doc := xmlparser.getDocument(xmlPar);
  -- 释放解析器实例
  xmlparser.freeParser(xmlPar);
  -- 获取所有client元素
  unitNodes := xmldom.getElementsByTagName(doc,'client');
  -- 遍历clinet 中的 name=”iphone” version="2.5.1.0" versiontype="0000"属性
  clientNode := xmldom.item(unitNodes,0); 
  tempArrMap := xmldom.getAttributes(clientnode);
     
  client := xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap,'name'));
     
  version := xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap,'version')); 
     
  versionType := xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap,'versiontype'));
     
  --DBMS_OUTPUT.PUT_LINE('client'||lenUnit);
  --user account子节点个数
  userNodes := xmldom.getChildNodes(clientNode);
  lenUser := xmldom.getLength(userNodes);
  --DBMS_output.Put_Line('lenuser'||lenUser);
  --第i个user account子节点 
  FOR i in 0..lenUser-1
     LOOP
  --获取第i个user account子节点元素值
  usernode:=xmldom.item(userNodes,i);
  tempArrMap2:=xmldom.getAttributes(usernode);
  account:= xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap2,'account'));
  --event子节点个数
  eventNodes:= xmldom.getChildNodes(usernode);
  eventlen:=xmldom.getLength(eventNodes);
   --DBMS_output.Put_Line('eventlen'||eventlen);
     FOR j in 0..eventlen-1
       LOOP
       --获取j个event子节点
       tempNode_event:=xmldom.item(eventNodes,j);
       --获取第j个event子节点元素值
       tempArrMap3 := xmldom.getAttributes(tempNode_event);
       count_date := xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap3,'count_date'));
       --event network子节点个数
       eventnetwork_Nodes:= xmldom.getChildNodes(tempNode_event);
       networkcount := xmldom.getLength(eventnetwork_Nodes);
       --DBMS_output.Put_Line('networkcount'||networkcount);
          FOR k in 0.. networkcount-1
          LOOP
             eventnetwork_node:=xmldom.item(eventnetwork_Nodes,k);
             tempArrMap4 := xmldom.getAttributes(eventnetwork_node);
             --event network子节点元素值
             network := xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap4,'network')); 
             cnt:= xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap4,'count'));
             duration:= xmldom.getNodeValue(xmldom.getNamedItem(tempArrMap4,'duration'));  
             action:= xmldom.getNodeValue(xmldom.getfirstChild(xmldom.item(eventnetwork_Nodes,0)));
             --DBMS_output.Put_Line(account||' '||count_date||' '||client||' '||version||' '||versionType||' '||network||' '||cnt||' '||duration||' '||action);
             INSERT INTO mobile_login(account,login_date,client,version,versiontype,network,cnt,duration,action)VALUES(account,count_date,client,version,versionType,network,cnt,duration,action);
          END LOOP;
       END LOOP;
   END LOOP;
   COMMIT;
END LOOP;
--步骤7:释放文档对象
   xmldom.freeDocument(doc);
--步骤8:异常与错误处理
   EXCEPTION
    WHEN OTHERS THEN
      DBMS_output.PUT_LINE(SQLERRM);
END;
再写个shell实现加载

#!/bin/sh
datestr=`date +%Y%m%d`
ls -l ./$datestr|awk '{ if(NR==1){} else print NR-1"|"'$datestr'"|./'$datestr'/"$9}'>xml_list.txt
echo "
load data
infile 'xml_list.txt'
append
into table SB_250.xml_test
fields terminated by '|'
trailing nullcols
(
file_id integer external,
loadtime char,
file_name FILLER char,
xmlstring LOBFILE(file_name) TERMINATED BY EOF
)">control.ctl
        sqlldr oracle/oracle control=control.ctl
        if [ $? -eq 0 ]; then
           echo "$datestr:import successfully.">>xml_import.log
        else
           echo "$datestr:import failed.">>xml_import.log
        fi



性能测试4秒1万多条,满给力

你可能感兴趣的:(XML解析)