[xmlparser]Code-XMLParser

package com.xml; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; /** * @author zfzheng * * //doctype不对root节点进行匹配判断 * //文法未细化(文法节点的text不同,开始与结尾未划分),大致可以就行了(程序里是区分的) * 文法: * doc -> xml {doctype} root * xml -> <?xml version {encoding} ?> * doctype -> <!DOCTYPE {text}+ > * root -> node * node -> (doctype | tag | text | remark | cdata) node * tag -> <name {attr}> * remark -> <!-- text --> * cdata -> <![CDATA[ text ]]> * attr -> name = ("text" |'text') * name -> [^/s]+ * text -> charset * */ public class XMLParser { private Lexer lex; private List nodeList=null;//先解析所有节点,避免解析回溯。 private int cur=0; private int size=0; private XMLDocument parse(StringBuffer xml)throws Exception{ lex=new Lexer(xml); nodeList=new ArrayList(); while(!lex.eof()){ nodeList.add(lex.nextToken()); } // dumpNodeList(); XMLDocument doc=doc(); doc.dump(); return doc; } private XMLDocument doc()throws InnerEndException{ XMLDocument doc=new XMLDocument(); cur=0; size=nodeList.size(); //xml文档 Node xml=xml(); doc.setVersion(Float.parseFloat(xml.getAttributeValue("version"))); doc.setEncoding(xml.getAttributeValue("encoding")); xml=null; //文档类型 doc.setDocType(doctype()); //内容 doc.setRoot(root()); return doc; } private Node text()throws InnerEndException{ StringBuffer sb=new StringBuffer(); String v=next(); while(!v.startsWith("<")){ sb.append(v); v=next(); } back(); Node node=new Node(); node.setValue(sb.toString()); node.setType(Node.NODE_TEXT); return node; } private Node remark()throws InnerEndException{ StringBuffer sb=new StringBuffer(); String v=next(); while(!v.endsWith("-->")){ sb.append(v); v=next(); } sb.append(v); Node node=new Node(); node.setValue(sb.toString()); node.setType(Node.NODE_REMARK); return node; } private Node cdata()throws InnerEndException{ StringBuffer sb=new StringBuffer(); String v=next(); while(!v.endsWith("]]>")){ sb.append(v); v=next(); } sb.append(v); Node node=new Node(); node.setValue(sb.toString()); node.setType(Node.NODE_CDATA); return node; } private Node node()throws InnerEndException{ boolean closed=false; Node root=new Node(); String v=nextNotSpace(); if(v.charAt(0)=='<'){ if(v.endsWith("/>")){ closed=true; root.setValue(takeOff(v,1,2)); }else if(v.endsWith(">")){ root.setValue(takeOff(v)); }else{ root.setValue(v.substring(1)); v=nextNotSpace(); while(!v.endsWith(">")){//属性 back(); root.addAttribute(attribute()); v=nextNotSpace(); } if(v.endsWith("/>")){ closed=true; } } } if(!closed){ String matchEnd="</"+root.getValue()+'>'; v = next(); Node subNode; while(!matchEnd.equals(v)){//closed subNode=null; if(v.startsWith("<!--")){ back(); subNode=remark(); }else if(v.startsWith("<![CDATA[")){ back(); subNode=cdata(); }else if(v.charAt(0)=='<'){ back(); subNode=node(); }else{ back(); subNode=text(); } root.addChild(subNode); if(eof()){ break; } v=next(); } } root.setType(Node.NODE_TAG); return root; } private Node root()throws InnerEndException{ return node(); } private String doctype()throws InnerEndException{ String v=nextNotSpace(); StringBuffer sb=new StringBuffer(); if("<!DOCTYPE".equals(v)){ sb.append(v); v=next(); while(!">".equals(v)){ sb.append(v); v=next(); } sb.append(v); }else{ backToNotSpace(); } return sb.toString(); } private Node xml()throws InnerEndException{ String v=nextNotSpace(); Node node=new Node(); if("<?xml".equals(v)){ node.setValue("xml"); v=nextNotSpace(); while(!"?>".equals(v)){ back(); node.addAttribute(attribute()); v=nextNotSpace(); } } node.setType(Node.NODE_XML); return node; } private Attribute attribute()throws InnerEndException{ Attribute att=new Attribute(); att.setName(nextNotSpace()); nextNotSpace();//= att.setValue(takeOff(nextNotSpace())); return att; } private String nextNotSpace()throws InnerEndException{ String v=next(); while(isEmpty(v)){ v=next(); } return v; } private void backToNotSpace()throws InnerEndException{ String v=(String)nodeList.get(--cur); while(isEmpty(v)){ v=(String)nodeList.get(--cur); } } private void back(){ if(cur>0){ cur--; } } private boolean eof(){ return cur>=size; } private String next()throws InnerEndException{ if(cur>=size){ throw new InnerEndException(); } return (String)nodeList.get(cur++); } private boolean isEmpty(String v){ return v.replaceAll("//s","").length()==0; } private String takeOff(String s,int beginLen,int endLen){ return s.substring(1,s.length()-endLen); } private String takeOff(String s){ if(s==null||s.length()<2){ return s; } return s.substring(1,s.length()-1); } private void dumpNodeList(){ for(int i=0,n=nodeList.size();i<n;i++){ System.out.println(nodeList.get(i)); } } class InnerEndException extends Exception{ } public static XMLDocument parseFromFile(String fileName)throws Exception{ BufferedReader br=new BufferedReader(new InputStreamReader(new FileInputStream(fileName))); StringBuffer sb=new StringBuffer(); String line; while((line=br.readLine())!=null){ sb.append(line).append('/n'); } br.close(); return parseFrom(sb); } public static XMLDocument parseFrom(StringBuffer xml)throws Exception{ return new XMLParser().parse(xml); } public static void main(String[] args) throws Exception{ if(args.length!=2){ System.out.println("Usage: java com.xml.XMLParser xmlFile"); }else{ XMLParser.parseFromFile(args[1]); } } }

你可能感兴趣的:(xml,exception,String,null,import,encoding)