上个月工作中,遇到了一个超大的xml文件解析工作。然后按照以往的方法jdom试了一下,然后jvm内存溢出了。没得办法,只有上网寻找万能的网友们了。花了接近一上午的时间终于写好了一个实用符合编码习惯的工具给大家分享一下。
需要引入的jar包:
javax.persistence
persistence-api
1.0.2
org.apache.commons
commons-lang3
3.4
org.jdom
jdom
2.0.2
第一段代码,
package com.kernel.util;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.List;
/**
* 自定义xml解析工具
* 更新时间:2019/9/6
*/
public class MyXmlHandler extends DefaultHandler {
private Class> clazz; //反射对象
private List
第二处工具类封装上述工具的代码,注意是readXml方法封装的上述代码。
package com.kernel.util;
import com.kernel.entity.User;
import org.apache.commons.lang3.time.DateFormatUtils;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.input.SAXBuilder;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
public class XmlUtil {
/**
* 生成xml
*
* @param element 数据区分节点
* @param obj 对象的实例
*/
public static void initCreateXML(Element element, Object obj) {
//反射对象的属性,获取所有字段
Field[] fields = obj.getClass().getDeclaredFields();
for (Field f : fields) {
try {
f.setAccessible(true);
Element e = new Element(f.getName().toUpperCase());
Object o = f.get(obj);
if (o instanceof Number) {
e.setText((Number) o + "");//装箱后建议使该方法
} else if (o instanceof Date) {//commons-lang3.jar中的方法
String d = DateFormatUtils.format((Date) o, "yyyy-MM-dd HH:mm:ss");
e.setText(d);
} else {
e.setText(String.valueOf(o));
}
element.addContent(e);
} catch (IllegalAccessException e1) {
e1.printStackTrace();
}
}
}
/**
* 解析xml文件
*/
public static List readXml(File file, Object entity) {
return readXml(file, entity, null);
}
/**
* 解析xml文件,反射对象的字段名同xml的节点名称。
* 便于减少内存占用,快速解析xml文件,比jdom2的全文加载更快
*
* @param file xml文件
* @param entity xml反射的对象
* @param rootName 相对节点名称
*/
public static List readXml(File file, Object entity, String rootName) {
List list = new ArrayList<>();
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
factory.setValidating(true);
SAXParser parser = factory.newSAXParser();
MyXmlHandler handler;
if (rootName == null) {
handler = new MyXmlHandler(entity);
} else {
handler = new MyXmlHandler(entity, rootName);
}
parser.parse(file, handler);
list.addAll(handler.getList());
} catch (Exception e) {
e.printStackTrace();
}
return list;
}
public static void readXmlByJDOM(File file){
try {
SAXBuilder saxBuilder=new SAXBuilder();
InputStream in=new FileInputStream(file);
Document doc=saxBuilder.build(in);
Element root=doc.getRootElement();
ListelementList=root.getChildren();
System.out.println(elementList.size());
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
File file=new File("F:\\test\\fileTest\\user01.xml");
// readXmlByJDOM(file);
List users=readXml(file,new User());
System.out.println(users.size());
}
}
第三处代码:user实体类,字段上的一些注解可以先不管
package com.kernel.entity;
import com.fasterxml.jackson.annotation.JsonFormat;
import org.springframework.context.annotation.PropertySource;
import org.springframework.format.annotation.DateTimeFormat;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
import javax.persistence.Table;
import java.io.Serializable;
import java.util.Date;
//persistence-api
@Entity
@Table(name = "user")
public class User implements Serializable {
@Id
@Column(name = "id")
private String id;
@Column(name = "username")
private String username;
@Column(name = "gender")
private String gender;
@Column(name = "age")
private int age;
@Column(name = "phone")
private String phone;
@Column(name = "email")
private String email;
@Column(name = "birth")
@DateTimeFormat(pattern = "yyyy-MM-dd")//前台到后端的转化
private Date birth;
@Column(name = "addtime")
private Date addtime;
@Column(name = "role")
private String role;
public String aq;
private static String ad;
// private static final String ad1 = "123tttvvv";
public User() {
}
public User(String id, String username, String gender, int age, String phone, String email, Date birth, Date addtime, String role) {
this.id = id;
this.username = username;
this.gender = gender;
this.age = age;
this.phone = phone;
this.email = email;
this.birth = birth;
this.addtime = addtime;
this.role = role;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public String getGender() {
return gender;
}
public void setGender(String gender) {
this.gender = gender;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
public String getPhone() {
return phone;
}
public void setPhone(String phone) {
this.phone = phone;
}
public String getEmail() {
return email;
}
public void setEmail(String email) {
this.email = email;
}
@JsonFormat(timezone = "GMT+8",pattern = "yyyy-MM-dd")//后端到前端的转化
public Date getBirth() {
return birth;
}
public void setBirth(Date birth) {
this.birth = birth;
}
public Date getAddtime() {
return addtime;
}
public void setAddtime(Date addtime) {
this.addtime = addtime;
}
public String getRole() {
return role;
}
public void setRole(String role) {
this.role = role;
}
}
测试的xml文件模板,为减少排版,这里就少显示点
a0
jack0c5e5e8e-64f5-4346-9c3f-83aad186c7d3
m
12
110
86da9724-70a5-443f-a9f8-5cf86f7c4cec110@com
2019-06-02 14:51:16
2019-06-02 14:51:16
员工0
null
null
a1
jack9bdc3093-efa6-46ad-b877-da3c256f5b6a
m
12
110
c0d1e737-2cec-41d5-8629-b5db44ccc640110@com
2019-06-02 14:51:17
2019-06-02 14:51:17
员工1
null
null