要解析的xml文件有1G大报内存溢出,怎么办?

上个月工作中,遇到了一个超大的xml文件解析工作。然后按照以往的方法jdom试了一下,然后jvm内存溢出了。没得办法,只有上网寻找万能的网友们了。花了接近一上午的时间终于写好了一个实用符合编码习惯的工具给大家分享一下。

需要引入的jar包:

 
            javax.persistence
            persistence-api
            1.0.2
 
 
            org.apache.commons
            commons-lang3
            3.4
 
 
            org.jdom
            jdom
            2.0.2
 

第一段代码,

package com.kernel.util;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.List;

/**
 * 自定义xml解析工具
 * 更新时间:2019/9/6
 */
public class MyXmlHandler extends DefaultHandler {
    private Class clazz;             //反射对象
    private List list;          //解析结果
    private String element;             //获取的节点的名称
    private List fieldList;     //反射对象的字段的名称
    private Object entity;              //实例化的反射对象
    private String rootName;            //xml中相对根节点
    private StringBuffer value;         //现有标签里的值,预防ch[]数组里面获取到的不是一个完整的xml

    public MyXmlHandler(Object entity) {
        initHandler(entity.getClass());
    }

    public MyXmlHandler(Class clazz) {
        initHandler(clazz);
    }

    public MyXmlHandler(Object entity, String rootName) {
        initHandler(entity.getClass(), rootName);
    }

    public MyXmlHandler(Class clazz, String rootName) {
        this.initHandler(clazz, rootName);
    }

    private void initHandler(Class clazz) {
        initHandler(clazz, clazz.getSimpleName());
    }

    private void initHandler(Class clazz, String rootName) {
        this.clazz = clazz;
        this.rootName = rootName;
        fieldList = new ArrayList<>();

        Field[] fields = clazz.getDeclaredFields();
        for (Field f : fields) {
            fieldList.add(f.getName());
        }
    }

    @Override
    public void startDocument() throws SAXException {
        super.startDocument();
        list = new ArrayList<>();
    }

    @Override
    public void endDocument() throws SAXException {
        super.endDocument();
        fieldList.clear();
    }

    @Override
    public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
        super.startElement(uri, localName, qName, attributes);
        if (qName.equalsIgnoreCase(rootName)) {
            try {
                entity = clazz.newInstance();
            } catch (Exception e) {
                System.out.println("init new entity error:" + clazz.getName() + "\t" + qName + "\t" + e.getMessage());
            }
        }
        value = new StringBuffer();
        element = qName;
    }

    @Override
    public void endElement(String uri, String localName, String qName) {
        if (element != null && entity != null) {
            initEntityField(value.toString());
        }
        if (qName.equalsIgnoreCase(rootName)) {
            list.add(entity);
        }
        element = null;
    }

    @Override
    public void characters(char[] ch, int start, int length) {
        String content = new String(ch, start, length);
        value.append(content);
    }

    /**
     * 给对象的属性赋值
     *
     * @param elementValue 现有字段的值
     */
    private void initEntityField(String elementValue) {
        try {
            if (fieldList.stream().anyMatch(s -> s.equalsIgnoreCase(element))) {
                String name = fieldList.stream().filter(s -> s.equalsIgnoreCase(element)).findFirst().get();
                Field field = clazz.getDeclaredField(name);
                field.setAccessible(true);
                Object value = getValidValue(field, elementValue);
                field.set(entity, value);
            }

        } catch (Exception e) {
            System.out.println("set entity`s value error:" + clazz.getName() + "\t" + element + "\t" + e.getMessage());
        }

    }

    /**
     * 获取实例对象的属性的有效值,可根据需要继续增加其他基本类型
     */
    private static Object getValidValue(Field field, String content) {
        Object value = null;
        String flag = field.getGenericType().getTypeName();
        switch (flag) {
            case "java.lang.String":
                value = content;
                break;
            case "int":
                value = Integer.parseInt(content);
                break;
            case "java.util.Date":
                value = DateUtil.parseDate(content);
                break;
            default:
                break;
        }
        return value;
    }


    public List getList() {
        return list;
    }

}

第二处工具类封装上述工具的代码,注意是readXml方法封装的上述代码。

package com.kernel.util;

import com.kernel.entity.User;
import org.apache.commons.lang3.time.DateFormatUtils;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.input.SAXBuilder;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

public class XmlUtil {

    /**
     * 生成xml
     *
     * @param element 数据区分节点
     * @param obj     对象的实例
     */
    public static void initCreateXML(Element element, Object obj) {
        //反射对象的属性,获取所有字段
        Field[] fields = obj.getClass().getDeclaredFields();
        for (Field f : fields) {
            try {
                f.setAccessible(true);
                Element e = new Element(f.getName().toUpperCase());
                Object o = f.get(obj);
                if (o instanceof Number) {
                    e.setText((Number) o + "");//装箱后建议使该方法
                } else if (o instanceof Date) {//commons-lang3.jar中的方法
                    String d = DateFormatUtils.format((Date) o, "yyyy-MM-dd HH:mm:ss");
                    e.setText(d);
                } else {
                    e.setText(String.valueOf(o));
                }
                element.addContent(e);
            } catch (IllegalAccessException e1) {
                e1.printStackTrace();
            }
        }
    }

    /**
     * 解析xml文件
     */
    public static List readXml(File file, Object entity) {
        return readXml(file, entity, null);
    }

    /**
     * 解析xml文件,反射对象的字段名同xml的节点名称。
     * 便于减少内存占用,快速解析xml文件,比jdom2的全文加载更快
     *
     * @param file     xml文件
     * @param entity   xml反射的对象
     * @param rootName 相对节点名称
     */
    public static List readXml(File file, Object entity, String rootName) {
        List list = new ArrayList<>();
        try {
            SAXParserFactory factory = SAXParserFactory.newInstance();
            factory.setNamespaceAware(true);
            factory.setValidating(true);
            SAXParser parser = factory.newSAXParser();
            MyXmlHandler handler;
            if (rootName == null) {
                handler = new MyXmlHandler(entity);
            } else {
                handler = new MyXmlHandler(entity, rootName);
            }
            parser.parse(file, handler);
            list.addAll(handler.getList());
        } catch (Exception e) {
            e.printStackTrace();
        }
        return list;
    }

    public static void readXmlByJDOM(File file){
        try {
            SAXBuilder saxBuilder=new SAXBuilder();
            InputStream in=new FileInputStream(file);
            Document doc=saxBuilder.build(in);
            Element root=doc.getRootElement();
            ListelementList=root.getChildren();
            System.out.println(elementList.size());

        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) {
        File file=new File("F:\\test\\fileTest\\user01.xml");
//        readXmlByJDOM(file);
        List users=readXml(file,new User());

        System.out.println(users.size());
    }

}

第三处代码:user实体类,字段上的一些注解可以先不管

package com.kernel.entity;

import com.fasterxml.jackson.annotation.JsonFormat;
import org.springframework.context.annotation.PropertySource;
import org.springframework.format.annotation.DateTimeFormat;

import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
import javax.persistence.Table;
import java.io.Serializable;
import java.util.Date;
//persistence-api
@Entity
@Table(name = "user")
public class User implements Serializable {
    @Id
    @Column(name = "id")
    private String id;
    @Column(name = "username")
    private String username;
    @Column(name = "gender")
    private String gender;
    @Column(name = "age")
    private int age;
    @Column(name = "phone")
    private String phone;
    @Column(name = "email")
    private String email;
    @Column(name = "birth")
    @DateTimeFormat(pattern = "yyyy-MM-dd")//前台到后端的转化
    private Date birth;
    @Column(name = "addtime")
    private Date addtime;
    @Column(name = "role")
    private String role;

    public String aq;
    
    private static String ad;
//    private static final String ad1 = "123tttvvv";

    public User() {
    }

    public User(String id, String username, String gender, int age, String phone, String email, Date birth, Date addtime, String role) {
        this.id = id;
        this.username = username;
        this.gender = gender;
        this.age = age;
        this.phone = phone;
        this.email = email;
        this.birth = birth;
        this.addtime = addtime;
        this.role = role;
    }

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public String getUsername() {
        return username;
    }

    public void setUsername(String username) {
        this.username = username;
    }

    public String getGender() {
        return gender;
    }

    public void setGender(String gender) {
        this.gender = gender;
    }

    public int getAge() {
        return age;
    }

    public void setAge(int age) {
        this.age = age;
    }

    public String getPhone() {
        return phone;
    }

    public void setPhone(String phone) {
        this.phone = phone;
    }

    public String getEmail() {
        return email;
    }

    public void setEmail(String email) {
        this.email = email;
    }
    @JsonFormat(timezone = "GMT+8",pattern = "yyyy-MM-dd")//后端到前端的转化
    public Date getBirth() {
        return birth;
    }

    public void setBirth(Date birth) {
        this.birth = birth;
    }

    public Date getAddtime() {
        return addtime;
    }

    public void setAddtime(Date addtime) {
        this.addtime = addtime;
    }

    public String getRole() {
        return role;
    }

    public void setRole(String role) {
        this.role = role;
    }
}

 测试的xml文件模板,为减少排版,这里就少显示点



  
    a0
    jack0c5e5e8e-64f5-4346-9c3f-83aad186c7d3
    m
    12
    110
    86da9724-70a5-443f-a9f8-5cf86f7c4cec110@com
    2019-06-02 14:51:16
    2019-06-02 14:51:16
    员工0
    null
    null
  
  
    a1
    jack9bdc3093-efa6-46ad-b877-da3c256f5b6a
    m
    12
    110
    c0d1e737-2cec-41d5-8629-b5db44ccc640110@com
    2019-06-02 14:51:17
    2019-06-02 14:51:17
    员工1
    null
    null
  

你可能感兴趣的:(java,超大xml文件解析,xml)