xlsx 又名 SheetJS 是一个强大的 JS 库,用于各种 Excel 操作
这里只记录一下解析 Excel 的方法
Excel
假设有如下表格:
解析成原始数据
/**
* 获取原始数据
* @param {File} file
* @param {Array} schemas
* schemas = [
* {
* name: '用户信息',
* schema: [
* {
* title: 'ID',
* key: 'id'
* },
* {
* title: '姓名',
* key: 'username'
* }
* ]
* }
* ]
*/
export function getRawExcelSheets (file, schemas = []) {
return new Promise(resolve => {
const reader = new FileReader()
reader.onload = (e) => {
const fileData = new Uint8Array(e.target.result)
const workbook = XLSX.read(fileData, { type: 'array' })
const result = []
// validate sheet names
const validSheetNamesError = validateSheetNames(workbook, schemas)
if (validSheetNamesError) {
return resolve([validSheetNamesError])
}
workbook.SheetNames.forEach((sheetName, sheetIndex) => {
// get sheet
const worksheet = workbook.Sheets[sheetName]
// convert to json data
// api ref: https://github.com/SheetJS/sheetjs#json
let rows = XLSX.utils.sheet_to_json(worksheet, {
header: 1,
defval: '',
blankrows: false
})
rows = trimSheetData(rows)
result.push(rows)
})
// validate sheet titles
const validSheetTitlesError = validateSheetTitles(result, schemas)
if (validSheetTitlesError) {
return resolve([validSheetTitlesError])
}
resolve([null, result])
}
reader.readAsArrayBuffer(file)
})
}
结果如下:
可以看到,直接变成了二维数组的结构
可是这样操作起来并不是非常的明了,比如 result[0]
代表工作表1,这里也就是 用户信息
这个工作表,而 result[0][0]
则是这个工作表的第一行也就是标题,这数组下表操作写起来太不明了了,如果直接是表格打印倒也没啥
解析成键值对的形式
好吧,根据上面的方法,再加利用一下,变成:
/**
* 获取 Excel 数据(变成键值对的形式)
* @param {File} file
* @param {Array} sheets
*/
export function getExcelSheets (file, sheets) {
return new Promise(async resolve => {
let [error, result] = await getRawExcelSheets(file, sheets)
if (error) {
return resolve([error])
}
result = result.map((rows, sheetIndex) => {
rows.shift()
return rows.map(row => {
const item = {}
// convert each row to Object
sheets[sheetIndex].columns.forEach(({ key }, colIndex) => {
// key => value
item[key] = row[colIndex]
})
return item
})
})
resolve([null, result])
})
}
结果如下:
完整代码
import XLSX from 'xlsx'
/**
* 验证工作表名称
* @param {Object}} workbook
* @param {Array} schemas
*/
function validateSheetNames (workbook, schemas) {
// get sheets names and titles
const sheetNames = schemas.map(({ name }) => name)
// validate sheet names
if (JSON.stringify(sheetNames) !== JSON.stringify(workbook.SheetNames)) {
return new Error('Sheet Names Mismatch!')
}
return null
}
/**
* 验证工作表的标题
* @param {Array} result
* @param {Array} schemas
*/
function validateSheetTitles (result, schemas) {
const sheetTitles = []
const workbookSheetTitles = []
schemas.forEach(({ schema }, sheetIndex) => {
sheetTitles[sheetIndex] = schema.map(({ title }) => title)
})
result.forEach((rows, sheetIndex) => {
const titles = []
for (let i = 0; i < schemas[sheetIndex].schema.length; i++) {
titles.push(rows[0][i])
}
workbookSheetTitles.push(titles)
})
if (JSON.stringify(workbookSheetTitles) !== JSON.stringify(sheetTitles)) {
return new Error('Sheet Header Titles Mismatch!')
}
return null
}
/**
* 清除表格空行、字段前后空白符
* @param rows
* @param colLength 每行总列数
* @returns {*}
*/
function trimSheetData (rows) {
return rows.filter(row => row.length > 0 && row.filter(option => option !== '').length > 0)
.map(row => row.map(content => {
if (Object.prototype.toString.call(content) === '[object String]') {
return content.trim()
}
return content
}))
}
/**
* 获取原始数据
* @param {File} file
* @param {Array} schemas
* schemas = [
* {
* name: '用户信息',
* schema: [
* {
* title: 'ID',
* key: 'id'
* },
* {
* title: '姓名',
* key: 'username'
* }
* ]
* }
* ]
*/
export function getRawExcelSheets (file, schemas = []) {
return new Promise(resolve => {
const reader = new FileReader()
reader.onload = (e) => {
const fileData = new Uint8Array(e.target.result)
const workbook = XLSX.read(fileData, { type: 'array' })
const result = []
// validate sheet names
const validSheetNamesError = validateSheetNames(workbook, schemas)
if (validSheetNamesError) {
return resolve([validSheetNamesError])
}
workbook.SheetNames.forEach((sheetName, sheetIndex) => {
// get sheet
const worksheet = workbook.Sheets[sheetName]
// convert to json data
// api ref: https://github.com/SheetJS/sheetjs#json
let rows = XLSX.utils.sheet_to_json(worksheet, {
header: 1,
defval: '',
blankrows: false
})
rows = trimSheetData(rows)
result.push(rows)
})
// validate sheet titles
const validSheetTitlesError = validateSheetTitles(result, schemas)
if (validSheetTitlesError) {
return resolve([validSheetTitlesError])
}
resolve([null, result])
}
reader.readAsArrayBuffer(file)
})
}
/**
* 获取 Excel 数据(变成键值对的形式)
* @param {File} file
* @param {Array} schemas
*/
export function getExcelSheets (file, schemas = []) {
return new Promise(async resolve => {
let [error, result] = await getRawExcelSheets(file, schemas)
if (error) {
return resolve([error])
}
result = result.map((rows, sheetIndex) => {
rows.shift()
return rows.map(row => {
const item = {}
// convert each row to Object
schemas[sheetIndex].schema.forEach(({ key }, colIndex) => {
// key => value
item[key] = row[colIndex]
})
return item
})
})
resolve([null, result])
})
}
调用如下:
const schemas = [
{
name: '用户信息',
schema: [
{
title: 'ID',
key: 'id'
},
{
title: '性别',
key: 'gender'
},
{
title: '姓名',
key: 'username'
},
{
title: '年龄',
key: 'age'
}
],
},
{
name: '城市信息',
schema: [
{
title: 'ID',
key: 'id'
},
{
title: '城市名称',
key: 'cityName'
},
{
title: '城市首字母',
key: 'cityInitial'
}
]
}
]
const [err, data] = await getRawExcelSheets(file, schemas)
if (err) return alert(err)
console.log(JSON.stringify(data, null, 4))
总结
主要是使用了一个 schemas 参数去描述这个 Excel 文件的结构,用于表格验证,也用于键值对的输出
demo 源码网址:https://github.com/RoamIn/Doraemon/tree/master/Excel
—— 2019/12/12 By Vinci, Mostly Sunny.