node使用json2csv将大型json转化为csv存入本地失败

node 使用 json2csv 将大型 json 转化为 csv 存入本地失败

  • 原始方法
  • 思路1:换插件,使用 papaparse(失败)
  • 思路2:采用循环的方式批量转换成csv,然后拼接成字符串,最后将字符串写入文件(失败)
  • 思路3:分批次转换成csv,然后采用流式写入的方式(成功)
  • 分析:归根结底还是字符串长度太大

原始方法

/**
 * 本地生成csv文件
 * @param data
 * @param fields
 * @param path
 */
async function writeFile(data, fields, path) {
    const opts = {fields};
    try {
        const parser = new Parser(opts);
        const csv = parser.parse(data);
        // console.log('csv:', csv);
        console.log('开始写入文件……')
        fs.writeFile(path,csv, 'utf8', (err) => {
            if (err) {
                console.log('写入失败')
            } else {
                console.log('写入成功')
            }
        });
    } catch (err) {
        console.error(err);
    }
}

问题:RangeError: Invalid string length

RangeError: Invalid string length
    at D:\zj\nodejs-server -model\node_modules\json2csv\dist\json2csv.cjs.js:1096:45
    at Array.reduce ()
    at fastJoin (D:\zj\nodejs-server -model\node_modules\json2csv\dist\json2csv.cjs.js:1086:14)
    at JSON2CSVParser.processData (D:\zj\nodejs-server -model\node_modules\json2csv\dist\json2csv.cjs.js:1507:14)
    at JSON2CSVParser.parse (D:\zj\nodejs-server -model\node_modules\json2csv\dist\json2csv.cjs.js:1465:23)
    at writeFile (D:\zj\nodejs-server -model\app\controller\test\util.js:52:28)
    at parseTrainingData (D:\zj\nodejs-server -model\app\controller\test\testData.js:153:11)
    at process._tickCallback (internal/process/next_tick.js:68:7)

思路1:换插件,使用 papaparse(失败)

papaparse

// Convert back to CSV
var csv = Papa.unparse(data);

结果:RangeError: Invalid string length

RangeError: Invalid string length
    at D:\zj\nodejs-server -model\node_modules\json2csv\dist\json2csv.cjs.js:1096:45
    at Array.reduce ()
    at fastJoin (D:\zj\nodejs-server -model\node_modules\json2csv\dist\json2csv.cjs.js:1086:14)
    at JSON2CSVParser.processData (D:\zj\nodejs-server -model\node_modules\json2csv\dist\json2csv.cjs.js:1507:14)
    at JSON2CSVParser.parse (D:\zj\nodejs-server -model\node_modules\json2csv\dist\json2csv.cjs.js:1465:23)
    at writeFile (D:\zj\nodejs-server -model\app\controller\test\util.js:52:28)
    at parseTrainingData (D:\zj\nodejs-server -model\app\controller\test\testData.js:153:11)
    at process._tickCallback (internal/process/next_tick.js:68:7)

思路2:采用循环的方式批量转换成csv,然后拼接成字符串,最后将字符串写入文件(失败)

async function writeFileStream(data, fields, path) {
    const BATCH_SIZE = 1000;
    const BATCH_NUM = data.length / BATCH_SIZE;
    console.log("BATCH_NUM:", BATCH_NUM);
    let csv = '';
    const opts = {fields};
    let batch_index = 0;
    let from = batch_index * BATCH_SIZE;
    let to = from + BATCH_SIZE;
    let data_part = data.slice(from, to);
    let parser = new Parser(opts);
    let csv_part = parser.parse(data_part);
    csv += csv_part
    batch_index++;

    while (batch_index < BATCH_NUM) {
        console.log('batch_index:', batch_index)
        from = batch_index * BATCH_SIZE;
        to = from + BATCH_SIZE;
        data_part = data.slice(from, to);
        const opts2 = {fields: fields, header: false};
        parser = new Parser(opts2);
        csv_part = parser.parse(data_part);
        csv += '\n';
        csv += csv_part;
        batch_index++;
    }
    console.log('开始写入文件……')
    fs.writeFile(path,csv, 'utf8', (err) => {
        if (err) {
            console.log('写入失败')
        } else {
            console.log('写入成功')
        }
    });
}

结果:RangeError: Invalid string length

思路3:分批次转换成csv,然后采用流式写入的方式(成功)


async function writeFileStream(data, fields, path) {

    const BATCH_SIZE = 1000;
    const BATCH_NUM = data.length / BATCH_SIZE;
    console.log("BATCH_NUM:",BATCH_NUM);
    let writerStream = fs.createWriteStream(path);
    const opts = {fields};
    let batch_index = 0;
    let from = batch_index * BATCH_SIZE;
    let to = from + BATCH_SIZE;
    let data_part = data.slice(from, to);
    let parser = new Parser(opts);
    let csv_part = parser.parse(data_part);
    writerStream.write(csv_part,'utf8');
    batch_index++;

    while (batch_index < BATCH_NUM) {
        let csv = '';
        console.log('batch_index:', batch_index)
        from = batch_index  * BATCH_SIZE;
        to = from + BATCH_SIZE;
        data_part = data.slice(from, to);
        //不需要文件头
        const opts2 = {fields: fields, header: false};
        parser = new Parser(opts2);
        csv_part = parser.parse(data_part);
        //需要添加回车
        csv += '\n';
        csv += csv_part;
        writerStream.write(csv,'utf8');
        batch_index++;
    }
    // 标记文件末尾
    writerStream.end();
    // 处理流事件 --> data, end, and error
    writerStream.on('finish', function() {
        console.log("文件写入完成");
    });

}

结果:成功!!!

分析:归根结底还是字符串长度太大

你可能感兴趣的:(深度学习)