xlsx 大刀小用,Excel 表格解析

xlsx 又名 SheetJS 是一个强大的 JS 库,用于各种 Excel 操作

这里只记录一下解析 Excel 的方法

Excel

假设有如下表格:

xlsx 大刀小用,Excel 表格解析_第1张图片

解析成原始数据

/**
 * 获取原始数据
 * @param {File} file
 * @param {Array} schemas
 * schemas = [
 *   {
 *       name: '用户信息',
 *       schema: [
 *           {
 *               title: 'ID',
 *               key: 'id'
 *           },
 *           {
 *               title: '姓名',
 *               key: 'username'
 *           }
 *       ]
 *  }
 * ]
 */
export function getRawExcelSheets (file, schemas = []) {
    return new Promise(resolve => {
        const reader = new FileReader()

        reader.onload = (e) => {
            const fileData = new Uint8Array(e.target.result)
            const workbook = XLSX.read(fileData, { type: 'array' })
            const result = []

            // validate sheet names
            const validSheetNamesError = validateSheetNames(workbook, schemas)
            if (validSheetNamesError) {
                return resolve([validSheetNamesError])
            }

            workbook.SheetNames.forEach((sheetName, sheetIndex) => {
                // get sheet
                const worksheet = workbook.Sheets[sheetName]

                // convert to json data
                // api ref: https://github.com/SheetJS/sheetjs#json
                let rows = XLSX.utils.sheet_to_json(worksheet, {
                    header: 1,
                    defval: '',
                    blankrows: false
                })

                rows = trimSheetData(rows)

                result.push(rows)
            })


            // validate sheet titles
            const validSheetTitlesError = validateSheetTitles(result, schemas)
            if (validSheetTitlesError) {
                return resolve([validSheetTitlesError])
            }

            resolve([null, result])
        }

        reader.readAsArrayBuffer(file)
    })
}

结果如下:


xlsx 大刀小用,Excel 表格解析_第2张图片

可以看到,直接变成了二维数组的结构

可是这样操作起来并不是非常的明了,比如 result[0] 代表工作表1,这里也就是 用户信息 这个工作表,而 result[0][0] 则是这个工作表的第一行也就是标题,这数组下表操作写起来太不明了了,如果直接是表格打印倒也没啥

解析成键值对的形式

好吧,根据上面的方法,再加利用一下,变成:

/**
 * 获取 Excel 数据(变成键值对的形式)
 * @param {File} file 
 * @param {Array} sheets 
 */
export function getExcelSheets (file, sheets) {
    return new Promise(async resolve => {
        let [error, result] = await getRawExcelSheets(file, sheets)

        if (error) {
            return resolve([error])
        }

        result = result.map((rows, sheetIndex) => {
            rows.shift()

            return rows.map(row => {
                const item = {}

                // convert each row to Object
                sheets[sheetIndex].columns.forEach(({ key }, colIndex) => {
                    // key => value
                    item[key] = row[colIndex]
                })

                return item
            })
        })

        resolve([null, result])
    })
}

结果如下:


xlsx 大刀小用,Excel 表格解析_第3张图片

完整代码

import XLSX from 'xlsx'

/**
 * 验证工作表名称
 * @param {Object}} workbook 
 * @param {Array} schemas 
 */
function validateSheetNames (workbook, schemas) {
    // get sheets names and titles
    const sheetNames = schemas.map(({ name }) => name)

    // validate sheet names
    if (JSON.stringify(sheetNames) !== JSON.stringify(workbook.SheetNames)) {
        return new Error('Sheet Names Mismatch!')
    }

    return null
}

/**
 * 验证工作表的标题
 * @param {Array} result 
 * @param {Array} schemas 
 */
function validateSheetTitles (result, schemas) {
    const sheetTitles = []
    const workbookSheetTitles = []

    schemas.forEach(({ schema }, sheetIndex) => {
        sheetTitles[sheetIndex] = schema.map(({ title }) => title)
    })

    result.forEach((rows, sheetIndex) => {
        const titles = []

        for (let i = 0; i < schemas[sheetIndex].schema.length; i++) {
            titles.push(rows[0][i])
        }

        workbookSheetTitles.push(titles)
    })

    if (JSON.stringify(workbookSheetTitles) !== JSON.stringify(sheetTitles)) {
        return new Error('Sheet Header Titles Mismatch!')
    }

    return null
}

/**
 * 清除表格空行、字段前后空白符
 * @param rows
 * @param colLength 每行总列数
 * @returns {*}
 */
function trimSheetData (rows) {
    return rows.filter(row => row.length > 0 && row.filter(option => option !== '').length > 0)
        .map(row => row.map(content => {
            if (Object.prototype.toString.call(content) === '[object String]') {
                return content.trim()
            }

            return content
        }))
}

/**
 * 获取原始数据
 * @param {File} file
 * @param {Array} schemas
 * schemas = [
 *   {
 *       name: '用户信息',
 *       schema: [
 *           {
 *               title: 'ID',
 *               key: 'id'
 *           },
 *           {
 *               title: '姓名',
 *               key: 'username'
 *           }
 *       ]
 *  }
 * ]
 */
export function getRawExcelSheets (file, schemas = []) {
    return new Promise(resolve => {
        const reader = new FileReader()

        reader.onload = (e) => {
            const fileData = new Uint8Array(e.target.result)
            const workbook = XLSX.read(fileData, { type: 'array' })
            const result = []

            // validate sheet names
            const validSheetNamesError = validateSheetNames(workbook, schemas)
            if (validSheetNamesError) {
                return resolve([validSheetNamesError])
            }

            workbook.SheetNames.forEach((sheetName, sheetIndex) => {
                // get sheet
                const worksheet = workbook.Sheets[sheetName]

                // convert to json data
                // api ref: https://github.com/SheetJS/sheetjs#json
                let rows = XLSX.utils.sheet_to_json(worksheet, {
                    header: 1,
                    defval: '',
                    blankrows: false
                })

                rows = trimSheetData(rows)

                result.push(rows)
            })


            // validate sheet titles
            const validSheetTitlesError = validateSheetTitles(result, schemas)
            if (validSheetTitlesError) {
                return resolve([validSheetTitlesError])
            }

            resolve([null, result])
        }

        reader.readAsArrayBuffer(file)
    })
}

/**
 * 获取 Excel 数据(变成键值对的形式)
 * @param {File} file 
 * @param {Array} schemas 
 */
export function getExcelSheets (file, schemas = []) {
    return new Promise(async resolve => {
        let [error, result] = await getRawExcelSheets(file, schemas)

        if (error) {
            return resolve([error])
        }

        result = result.map((rows, sheetIndex) => {
            rows.shift()

            return rows.map(row => {
                const item = {}

                // convert each row to Object
                schemas[sheetIndex].schema.forEach(({ key }, colIndex) => {
                    // key => value
                    item[key] = row[colIndex]
                })

                return item
            })
        })

        resolve([null, result])
    })
}

调用如下:

const schemas = [
    {
        name: '用户信息',
        schema: [
            {
                title: 'ID',
                key: 'id'
            },
            {
                title: '性别',
                key: 'gender'
            },
            {
                title: '姓名',
                key: 'username'
            },
            {
                title: '年龄',
                key: 'age'
            }
        ],
    },
    {
        name: '城市信息',
        schema: [
            {
                title: 'ID',
                key: 'id'
            },
            {
                title: '城市名称',
                key: 'cityName'
            },
            {
                title: '城市首字母',
                key: 'cityInitial'
            }
        ]
    }
]

const [err, data] = await getRawExcelSheets(file, schemas)

if (err) return alert(err)

console.log(JSON.stringify(data, null, 4))

总结

主要是使用了一个 schemas 参数去描述这个 Excel 文件的结构,用于表格验证,也用于键值对的输出

demo 源码网址:https://github.com/RoamIn/Doraemon/tree/master/Excel

—— 2019/12/12 By Vinci, Mostly Sunny.

你可能感兴趣的:(xlsx 大刀小用,Excel 表格解析)