JavaParseXml

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;



public class T4 {
    
    
    static class Node{
        
        String nodename;//标签名
        
        Map<String, String> attributes;//属性
        
        String text;//文本
        
        List<Node> childers;//子元素
        
        
        @Override
        public String toString() {
            // TODO Auto-generated method stub
            
            StringBuilder builder=new StringBuilder();
            
            builder.append("tagname:"+this.nodename);
            
            Iterator<String> iterator=attributes.keySet().iterator();
            
            builder.append(",attribute:[");
            while(iterator.hasNext()){
                
                String key=iterator.next();
                builder.append(key+"="+attributes.get(key)+",");
            }
            builder.append("],");
            builder.append("children:[");
            for(Node node:childers){
                builder.append(node.nodename+",");
            }
            builder.append("]");
            
            return builder.toString().replaceAll(",]", "]");
        }
    }

    public static void main(String[] args) throws IOException {
        
    
        
        String xml="<xml a=b c=d e=5>testxml" +
                "<a>testa</a>" +
                 "<b>testb</b>"+
                "</xml>";
        
        Pattern pattern_kt=Pattern.compile("^<(\\w+)[^<]*>([^<]*)");//匹配开头
        Pattern  pattern_attr=Pattern.compile("(\\w+)=(\\w+)");//匹配属性
        Pattern pattern_jw=Pattern.compile("^</(\\w+)>");//匹配结尾
        
        Pattern pattern=Pattern.compile("<(\\w+)[^<]*>([^<]*)|</(\\w+)>");//匹配下一个(可能是开始也可以是结尾)
        
        Stack<Node> nodes=new Stack<Node>(); //元素栈
        
        List<Node> list_nodes=new ArrayList<Node>();//真正的符合规范的元素集合
        
        Matcher matcher=pattern.matcher(xml);
        
        while(matcher.find()){
                
            String tmp=matcher.group();
            Matcher matcher2=pattern_kt.matcher(tmp);//匹配开头
            
            Matcher matcher3=pattern_jw.matcher(tmp);//匹配结尾
            //如果是开头就找到元素的 标签名称 和元素的属性 还有元素的文本
            if(matcher2.find()){
                
                Node node=new Node();
                //父节点
                if(!nodes.isEmpty()){
                    nodes.peek().childers.add(node);
                }
                nodes.push(node);
                node.attributes=new HashMap<String, String>();
                node.childers=new ArrayList<T4.Node>();
                
                node.nodename=matcher2.group(1);//标签名称
                
                Matcher  attrs=pattern_attr.matcher(tmp);
                
                
                //属性
                while(attrs.find()){
                    
                   node.attributes.put(attrs.group(1),attrs.group(2));
                    
                }
                //文本
                if(matcher2.group(2)!=null){
                    node.text=matcher2.group(2);
                }

                //如果是结尾 就弹出栈,弹出的如果和当前的标签名称一样 才加入集合
            }else if(matcher3.find()){            
                Node node=nodes.pop();
                if(node.nodename.equals(matcher3.group(1))){
                    list_nodes.add(node);
                }
                
            }
            
        }
        
        for(Node node:list_nodes){
            
         System.out.println(node);
        }
    
    }
}


你可能感兴趣的:(JavaParseXml)