帮助测试做的工具。
测试需要对多个导出的结果进行比较,并将比较的结果写入到一个Excel中。
导出的文件格式包括:txt, csv, xls, xlxs
写入的文件格式为:xlxs
由于最终的写入方式是一致的,先说写入。
涉及到客户产品信息,代码仅举例子说明,具体的逻辑嵌套请自行处理(测试组要求比较数据的时候需要读取的时候同步写入)。
由于写入的数据量不确定,最少的时候可能只有几条,最多的时候将近两千万。因此,只需要考虑最多就行了,根据测试组要求,运行时间不是问题,但是数据记录要齐全(代码为将100000000个左右的数字写入Excel中)。
public void writeInOneFile(String path, String name) throws IOException {
Workbook wb = new SXSSFWorkbook(3000);
int number = 0;
int count = 1;
Sheet sheet = wb.createSheet("result " + count);
Row row = sheet.createRow(number);
for (int i = 0; i < 10001; i++) {
for (int j = 0; j < 1001; j++) {
for (int k = 0; k < 10; k++) {
Cell cell= row.createCell(k);
cell.setCellValue("" + (i + j + k));
}
number = number + 1;
//单个Sheet页的最大写入数量约为104W行,这里写入100W行就结束一个Sheet页
if (number >= 1000000) {
count = count + 1;
sheet = wb.createSheet("result " + count);
System.out.println(sheet.getSheetName());
number = 0;
}
row = sheet.createRow(number);
}
}
FileOutputStream fos = new FileOutputStream(path + "\\" + name + ".xlsx");
wb.write(fos);
fos.flush();
fos.close();
wb.close();
}
public String createExcel(String path, String name) {
return path + "\\" + name + ".xlsx";
}
public void writeInMoreFile(String path, String name) throws IOException, InterruptedException {
int index = 0;
String fileName = createExcel(path, name + " " + index);
Workbook wb = new SXSSFWorkbook(5000);
;
int number = 0;
Sheet sheet = wb.createSheet();
Row row = sheet.createRow(number);
for (int i = 0; i < 10001; i++) {
for (int j = 0; j < 1001; j++) {
for (int k = 0; k < 10; k++) {
Cell cell= row.createCell(k);
cell.setCellValue("" + (i + j + k));
}
number = number + 1;
//单个Sheet页的最大写入数量约为104W行,这里写入100W行就结束一个Excel
if (number >= 1000000) {
FileOutputStream fos = new FileOutputStream(fileName);
wb.write(fos);
fos.flush();
fos.close();
Thread.sleep(1000);
index = index + 1;
fileName = createExcel(path, name + " " + index);
wb = new SXSSFWorkbook(5000);
sheet = wb.createSheet();
System.out.println(fileName);
number = 0;
}
row = sheet.createRow(number);
}
}
FileOutputStream fos = new FileOutputStream(fileName);
wb.write(fos);
fos.flush();
fos.close();
wb.close();
}
最终的结果为写入一个excel的文件大小为380M左右,分开的文件一个为38M左右。
对于不同的文件采取不同的读取方式
属于简单的文本读取,直接使用BufferedReader就可以了,这一类的代码很多,也是很基础的功能,这里不展示代码了。
先准备一些载体对象
class CustomSheet {
private String name;
private List<CustomRow> rows;
public CustomSheet(String name, List<CustomRow> rows) {
this.name = name;
this.rows = rows;
}
public CustomSheet() {
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public List<CustomRow> getRows() {
return rows;
}
public void setRows(List<CustomRow> rows) {
this.rows = rows;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
CustomSheet that = (CustomSheet) o;
return Objects.equals(rows, that.rows);
}
@Override
public int hashCode() {
return rows != null ? rows.hashCode() : 0;
}
@Override
public String toString() {
return "CustomSheet{" +
"name='" + name + '\'' +
", rows=" + rows +
'}';
}
public void sort() {
Collections.sort(this.rows, new Comparator<CustomRow>() {
@Override
public int compare(CustomRow o1, CustomRow o2) {
return o1.getNo() - o2.getNo();
}
});
}
}
class CustomRow {
private int no;
private List<CustomCell> cells;
public CustomRow(int no, List<CustomCell> cells) {
this.no = no;
this.cells = cells;
}
public CustomRow(int no) {
this.no = no;
}
public CustomRow() {
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
CustomRow row = (CustomRow) o;
return cells.equals(row.cells);
}
@Override
public int hashCode() {
return cells.hashCode();
}
@Override
public String toString() {
return "CustomRow{" +
"no=" + no +
", cells=" + cells +
'}';
}
public void sort() {
Collections.sort(this.cells, new Comparator<CustomCell>() {
@Override
public int compare(CustomCell o1, CustomCell o2) {
return o1.getNo() - o2.getNo();
}
});
}
public int getNo() {
return no;
}
public void setNo(int no) {
this.no = no;
}
public List<CustomCell> getCells() {
return cells;
}
public void setCells(List<CustomCell> cells) {
this.cells = cells;
}
}
class CustomCell {
private int no;
private String value;
public CustomCell(int no, String value) {
this.no = no;
this.value = value;
}
public CustomCell() {
}
public int getNo() {
return no;
}
public void setNo(int no) {
this.no = no;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
CustomCell cell = (CustomCell) o;
return value.equals(cell.value);
}
@Override
public int hashCode() {
return value.hashCode();
}
@Override
public String toString() {
return "CustomCell{" +
"no=" + no +
", value='" + value + '\'' +
'}';
}
}
这里插入一句,在新建一个对象的时候,重写equals方法和toString方法是很有必要的,加入带参数的构造方法也是可以提高后续效率的(如果加入带参构造函数一定要同时加入无参构造函数)。
针对xls和xlsx,xls需要使用HSSFWorkbook来解析,xlsx使用XSSFWorkbook来解析,但是xlsx文件过大的时候,使用XSSFWorkbook会出现OOM的异常。
public void read(String name) throws IOException {
InputStream in = new FileInputStream(name);
Workbook wb = new HSSFWorkbook(in);
Map<String, CustomSheet> map = new HashMap<>();
for (int i = 0, st = wb.getNumberOfSheets(); i < st; i++) {
Sheet sheet = wb.getSheetAt(i);
if (null == sheet) {
continue;
}
String sheetName = sheet.getSheetName();
CustomSheet cs = new CustomSheet();
cs.setName(sheetName);
cs.setRows(new ArrayList<>());
for (int j = 0, rt = sheet.getLastRowNum(); j < rt; j++) {
Row row = sheet.getRow(j);
if (null == row) {
continue;
}
CustomRow cr = new CustomRow(j);
cr.setCells(new ArrayList<>());
for (int k = 0, ct = row.getLastCellNum(); k < ct; k++) {
Cell cell = row.getCell(k);
if (null == cell) {
continue;
}
cr.getCells().add(new CustomCell(k, getCellValue(cell)));
}
cr.sort();
cs.getRows().add(cr);
}
cs.sort();
map.put(sheetName, cs);
}
System.out.println(map.size());
System.out.println(map);
}
static String getCellValue(Cell cell) {
try {
return (null != cell) ? cell.getStringCellValue() : "";
} catch (IllegalStateException str) {
try {
//Try get number value
return String.valueOf(cell.getNumericCellValue());
} catch (IllegalStateException num) {
try {
//Try get boolean value
return String.valueOf(cell.getBooleanCellValue());
} catch (IllegalStateException boo) {
try {
//Try get date value
return String.valueOf(cell.getDateCellValue());
} catch (IllegalStateException date) {
date.printStackTrace();
return "";
}
}
}
}
}
第一种方式,就是直接使用XSSFWorkbook读取,但是容易出现OOM。但是, xlsx格式的文件本质上是一个ZIP文件,将XLSX(或者XLSM)文件扩展名改为zip,用解压软件解压,可以看到OOXML格式的文件内容。因此,使用SAX来读取,以避免OOM
首先,需要重写Handler
enum XssfDataType {
BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
}
class CustomHander extends DefaultHandler {
private CustomRow row;
private CustomSheet data;
private SharedStringsTable sharedStringsTable;
private StylesTable stylesTable;
private boolean vIsOpen;
private XssfDataType nextDataType;
private boolean isCellNull = false;
private int lastColumnNumber = -1;
private StringBuffer value;
private int cellNumber;
private int rowNumber = -1;
private short formatIndex;
private String formatString;
private final DataFormatter formatter = new DataFormatter();
private SimpleDateFormat sdf = null;
private DecimalFormat df = new DecimalFormat("###########");
public CustomHander(SharedStringsTable sharedStringsTable, StylesTable stylesTable) {
this.sharedStringsTable = sharedStringsTable;
this.stylesTable = stylesTable;
this.data = new CustomSheet();
this.data.setRows(new ArrayList<>());
this.value = new StringBuffer();
}
@Override
public void startElement (String uri, String localName,
String qName, Attributes attributes)
throws SAXException {
if ("inlineStr".equals(qName) || "v".equals(qName)) {
vIsOpen = true;
value.setLength(0);
} else if ("row".equals(qName)) {
rowNumber = Integer.parseInt(attributes.getValue("r"));
this.row = new CustomRow(rowNumber);
this.row.setCells(new ArrayList<CustomCell>());
}else if ("c".equals(qName)) {
String r = attributes.getValue("r");
int firstDigit = -1;
//get the index of the cell
for (int c = 0; c < r.length(); c++) {
if (Character.isDigit(r.charAt(c))) {
firstDigit = c;
break;
}
}
cellNumber = nameToColumn(r.substring(0, firstDigit));
this.nextDataType = XssfDataType.NUMBER;
this.formatIndex = -1;
this.formatString = null;
String cellType = attributes.getValue("t");
String cellStyleStr = attributes.getValue("s");
if ("b".equals(cellType))
nextDataType = XssfDataType.BOOL;
else if ("e".equals(cellType))
nextDataType = XssfDataType.ERROR;
else if ("inlineStr".equals(cellType))
nextDataType = XssfDataType.INLINESTR;
else if ("s".equals(cellType))
nextDataType = XssfDataType.SSTINDEX;
else if ("str".equals(cellType))
nextDataType = XssfDataType.FORMULA;
else if (cellStyleStr != null) {
// It's a number, but almost certainly one
// with a special style or format
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
this.formatIndex = style.getDataFormat();
this.formatString = style.getDataFormatString();
if (this.formatString == null)
this.formatString = BuiltinFormats
.getBuiltinFormat(this.formatIndex);
}
}
}
@Override
public void endElement (String uri, String localName, String qName)
throws SAXException {
String thisStr = null;
// v => contents of a cell
if ("v".equals(qName)) {
// Do now, as characters() may be called more than once
switch (nextDataType) {
case BOOL:
char first = value.charAt(0);
thisStr = first == '0' ? "FALSE" : "TRUE";
break;
case ERROR:
thisStr = "\"ERROR:" + value.toString() + '"';
break;
case FORMULA:
// A formula could result in a string value,
// so always add double-quote characters.
thisStr = value.toString();
break;
case INLINESTR:
// TODO: have seen an example of this, so it's untested
XSSFRichTextString rtsi = new XSSFRichTextString(
value.toString());
thisStr = rtsi.toString();
break;
case SSTINDEX:
String sstIndex = value.toString();
try {
int idx = Integer.parseInt(sstIndex);
XSSFRichTextString rtss = new XSSFRichTextString(
sharedStringsTable.getEntryAt(idx));
thisStr = rtss.toString();
} catch (NumberFormatException ex) {
System.out.println("Failed to parse SST index '" + sstIndex
+ "': " + ex.toString());
}
break;
case NUMBER:
String n = value.toString();
// Date or not
if (formatIndex == 14 || formatIndex == 31 || formatIndex == 57 || formatIndex == 58
|| (176 <= formatIndex && formatIndex <= 178) ||
(182 <= formatIndex && formatIndex <= 196)
|| (210 <= formatIndex && formatIndex <= 213) || (208 == formatIndex)) {
sdf = new SimpleDateFormat("yyyy-MM-dd");
Date date = org.apache.poi.ss.usermodel.DateUtil.getJavaDate(Double.parseDouble(n));
thisStr = sdf.format(date);
} else if (formatIndex == 20 || formatIndex == 32 || formatIndex == 183 ||
(200 <= formatIndex && formatIndex <= 209)) {//时间
sdf = new SimpleDateFormat("HH:mm");
Date date = org.apache.poi.ss.usermodel.DateUtil.getJavaDate(Double.parseDouble(n));
thisStr = sdf.format(date);
} else {
if (n.contains("E")) {
String[] split = n.split("\\+");
String e = split[0].replaceAll("E|e", "");
thisStr = e.replace(".", "");
} else {
thisStr = n;
}
}
break;
default:
thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
break;
}
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
//判断单元格的值是否为空
if (thisStr == null || "".equals(isCellNull)) {
isCellNull = true;
}
this.row.getCells().add(new CustomCell(cellNumber, thisStr));
if (rowNumber > -1)
lastColumnNumber = rowNumber;
}else if ("row".equals(qName)) {
if (cellNumber > 0) {
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
data.getRows().add(this.row);
}
}
}
public CustomSheet getData() {
return data;
}
public void setData(CustomSheet data) {
this.data = data;
}
public void characters(char[] ch, int start, int length)
throws SAXException {
value = new StringBuffer();
value.append(ch, start, length);
}
private int nameToColumn(String name) {
int column = -1;
for (int i = 0; i < name.length(); ++i) {
int c = name.charAt(i);
column = (column + 1) * 26 + c - 'A';
}
return column;
}
}
然后,读取文件
private Map<String, CustomSheet> readXLSX(String path) throws IOException, OpenXML4JException, SAXException {
OPCPackage opc = OPCPackage.open(path, PackageAccess.READ);
XSSFReader reader = new XSSFReader(opc);
XSSFReader.SheetIterator iterator = (XSSFReader.SheetIterator) reader.getSheetsData();
SharedStringsTable sst = reader.getSharedStringsTable();
StylesTable st = reader.getStylesTable();
CustomHandler handler = new CustomHandler(sst, st);
XMLReader xml = XMLReaderFactory.createXMLReader();
Map<String, CustomSheet> map = new HashMap<>();
xml.setContentHandler(handler);
while (iterator.hasNext()) {
System.out.println("=====================================================================================================================");
InputStream is = iterator.next();
InputSource in = new InputSource(is);
xml.parse(in);
String name = iterator.getSheetName();
System.out.println(name);
CustomSheet sheet = handler.getData();
sheet.setName(name);
map.put(name, sheet);
is.close();
}
System.out.println(map.size());
return map;
}
最后,感谢网上的各位大神。