python解析excel到文件系统,php解析入库

phpphpspreadsheet解析excel是真的垃圾…非常耗内存,比如一个10W条的excel文件,内存一定溢出,现在的解决方案是用pythonxlrd(比openpyxl快的多) 解析excel到文件系统,php读取文件按照业务配置文件校验单元格并写入数据库。

  1. ajax上传excel文件。
  2. php执行python脚本解析excel到文件系统 (>/dev/null & 为了立即执行exec的下一行代码),ajax立刻返回。
   exec(PYTHON." " . realpath(ROOT_PATH . '/../') . '/shell/excel.py' . " {
       $file} >/dev/null &");
  1. 设置定时器每3秒请求一次批量导入的状态。

效果如下:业务导入代码继承Product抽象类即可。
python解析excel到文件系统,php解析入库_第1张图片

解析excel并生成文件

#解析excel并生成文件
import xlrd,time,json,os,sys

# if(len(sys.argv)!=2) :
#     print('需要1个参数')
#     print('***********************')
#     print('用法: python3 '+sys.argv[0] + ' 文件名 \n')
#     sys.exit(2)

# 数字转字符
def changeNumToChar(s):
        '''
        把数字转换成相应的字符,1-->'A' 27-->'AA'
        '''
        a = [chr(i) for i in range(65, 91)]
        ss = ''
        b = []
        if s <= 26:
            b = [s]
        else:
            while s > 26:
                c = s // 26  # 商
                d = s % 26  # 余数
                b = b + [d]
                s = c
            b = b + [s]
        b.reverse()
        for i in b:
            ss = ss + a[i - 1]
        return ss

full = sys.argv[1]  #文件名
if not os.path.exists(full):
    sys.exit(3)
filename = os.path.basename(full)

statusname = sys.path[0] + '/status/lock'  #锁文件  保证只有一个请求处理excel
with open(statusname, 'w') as fo:
    fo.write(json.dumps({
     'file':filename,'python': 'start'}))
start = time.time()
wb = xlrd.open_workbook(full)
sheet = wb.sheets()[0] 
num = 0
txtname = sys.path[0]+'/txt/' + filename[0:filename.rfind('.')]
if os.path.exists(txtname):
    os.remove(txtname)

with open(txtname, 'a',encoding='utf8') as file_object:
    for i in range(1,sheet.nrows):
        sub_data = {
     }  #行数据
        for j in range(0,sheet.ncols):
            sub_data[changeNumToChar(j+1)] = sheet.cell_value(i, j)
        # print(sub_data)
        file_object.write(json.dumps(sub_data)+"\r\n")
end = time.time()
#导入成功,生成txt文件之后 python解析excel成功 状态写入lock文件
with open(statusname, 'w') as foa:
    foa.write(json.dumps({
     'file':filename,'python': 'ok'}))

sys.exit(1)

php处理文件parse.php


include_once realpath(__DIR__.'/bootstrap.php');
include_once realpath(__DIR__.'/Factory.php');
set_time_limit(0);
// if(count($argv)!==3){
     
//     $msg=<<
//         需要2个参数
//         ***********************
//         用法: php $argv[0] 文件名 类型
//     eof;
//     echo $msg.PHP_EOL;
//     die;
// }
$file = $argv[1]; //文件名
$type = strtolower($argv[2]); //类型 site等
$full = realpath(__DIR__).'/txt/'.substr($file,0,strrpos($file,".")); //文件的路径+文件名
if (!file_exists($full)) die(4);  //txt文件不存在
$fileError = realpath(__DIR__).'/error/'.substr($file,0,strrpos($file,".")); //错误日志
$fileStatus = realpath(__DIR__).'/status/lock'; //完成状态log

if(file_exists($fileError)){
     
    unlink($fileError);
}

$product = Factory::getFactory($type);
if(!$product) die(5);
$product->setDb($db);
$config = $product->getImportConfig();
if(!$config) die(6);
//唯一的字段
$unique = [];
foreach ($config['col'] as $v) {
     
    if(!isset($v['check']) || !is_array($v['check'])) continue;
    if(in_array($product::check_is_exist_no,$v['check'])){
     
        $unique[$v['name']] = [];
    }
}
file_put_contents($fileStatus,json_encode(['file'=>$file,'python'=> 'ok','php'=>'start']));

$flag = true;
$num = 1; //实际是从第二行开始的
$allData = [];  //总数据
$file_handle = fopen($full, "r");
while (!feof($file_handle)) {
     
    $line = fgets($file_handle);
    $num++;
    if(is_null($line)||$line =='') continue;
    $tmpRowData = json_decode($line,true);
    if(!is_array($tmpRowData)) continue;
    $rowData = [];  //数组键值格式化成配置文件中键值
    foreach ($config['col'] as $v) {
     
        $rowData[$v['name']] = $tmpRowData[$v['index']]; //数组键值格式化成配置文件中键值
        if(!isset($v['check']) || !is_array($v['check'])) continue;
        $result = $product->check($tmpRowData[$v['index']],$v['name'],$v['check']);
        if(count($result)>0){
     
            // 本行校验未通过 写入数据库
            if($flag) $flag = false;
            file_put_contents($fileError, '第'.$num.'行 '.$v['title'].': '.$tmpRowData[$v['index']].' '.implode('、',$result).PHP_EOL, FILE_APPEND);
        }
        //文件系统校验唯一字段
        if(in_array($product::check_is_exist_no,$v['check'])){
     
            if(in_array($tmpRowData[$v['index']],$unique[$v['name']])){
     
                if(count($result)==0 || ($count($result)>0 &&!in_array($product::check_is_exist_text,$result))){
     
                    // 本行校验未通过 写入数据库
                    if($flag) $flag = false;
                    file_put_contents($fileError, '第'.$num.'行 '.$v['title'].': '.$tmpRowData[$v['index']].' '.$product::check_is_exist_text.PHP_EOL, FILE_APPEND);
                }
            }
        }
        if(in_array($v['name'],array_keys($unique))){
     
            array_push($unique[$v['name']],$tmpRowData[$v['index']]);
        }
    }
    array_push($allData,$rowData);
}
fclose($file_handle);
if($flag){
     
    $tmp = array_chunk($allData,50);
    // $db->query('BEGIN');
    foreach ($tmp as $v) {
     
        $st = $product->import($v);
        if(!$st){
     
            // $db->query('ROLLBACK');
            file_put_contents($fileStatus,json_encode(['file'=>$file,'python'=> 'ok','php'=>'ok','check'=>true,'import'=>false]));
            die(25);
        }
    }
    // $db->query('COMMIT');
    file_put_contents($fileStatus,json_encode(['file'=>$file,'python'=> 'ok','php'=>'ok','check'=>true,'import'=>true]));
    die(1);
}else{
     
    //校验失败
    file_put_contents($fileStatus,json_encode(['file'=>$file,'python'=> 'ok','php'=>'ok','check'=>false]));
    die(25);
}

Factory.php

 
class Factory
{
     

    const dir = 'code';

    static function getFactory($product)
    {
     
        $file = realpath(__DIR__.'/'.self::dir.'/'.$product.'/Import'.$product.'.php');
        if(!$file) return false;
        require_once $file;
        $cla = 'Import'.$product;
        return new $cla;
    }
}

Product.php



abstract class Product
{
     
    public $dir = 'code';

    const check_not_empty_no = 1;
    const check_not_empty_text = '不能为空';
    const check_is_exist_no = 2;
    const check_is_exist_text = '信息已存在'; //唯一
    const check_no_relevance_no = 3;
    const check_no_relevance_text = '关联信息不存在';
    const check_num_no = 4;
    const check_num_text = '格式必须是数字';
    const check_date_no = 5;
    const check_date_text = '格式必须是 年-月-日';
    const check_time_no = 6;
    const check_time_text = '格式必须是 年-月-日 时:分:秒';


    abstract function import($rows);

    /**
     * 检查唯一
     *
     * @param [type] $val 值
     * @param [type] $col 列名
     * @return boolean
     */
    abstract function checkUnique($val, $col);

    /**
     * 检查关联信息是否存在
     *
     * @param [type] $val 值
     * @param [type] $col 列名
     * @return boolean
     */
    abstract function checkRelevance($val, $col);

    /**
     * 校验单元格
     *
     * @param [type] $val   值
     * @param [type] $col   列名
     * @param [type] $rule  规则
     * @return array
     */
    function check($val,$col,$rule){
     
        $result = [];
        if (!isset($rule) || !is_array($rule)) return $result;
        foreach ($rule as $v) {
     
            switch ($v) {
     
                case self::check_not_empty_no:
                    if ($val == '') array_push($result, self::check_not_empty_text);
                    break;
                case self::check_is_exist_no:
                    if (!$this->checkUnique($val, $col)) array_push($result, self::check_is_exist_text);
                    break;
                case self::check_no_relevance_no:
                    if (!$this->checkRelevance($val, $col)) array_push($result, self::check_no_relevance_text);
                    break;
                case self::check_num_no:
                    if (!is_numeric($val)) array_push($result, self::check_num_text);
                    break;
                // case self::check_date_no:
                //     if (!is_numeric($val)) array_push($result, self::check_date_text);
                //     break;
                // case self::check_time_no:
                //     if (!is_numeric($val)) array_push($result, self::check_time_text);
                //     break;
            }
        }
        return $result;
    }

    function getImportConfig(){
     
        $file = realpath(__DIR__.'/'.$this->dir.'/'.$this->product.'/import.json');
        if(!$file) return false;
        $c = file_get_contents($file);
        if(!$c) return false;
        $j = json_decode($c,true);
        return is_null($j)?false:$j;
    }

    function getExportConfig(){
     
        $file = realpath(__DIR__.'/'.$this->dir.'/'.$this->product.'/export.json');
        if(!$file) return false;
        $c = file_get_contents($file);
        if(!$c) return false;
        $j = json_decode($c,true);
        return is_null($j)?false:$j;
    }

    function setDb($db){
     
        CW::SetDb('',$db);
    }
}

importBase.vue

<template>
  <div>
    <b-overlay :show="loading">
      <template v-slot:overlay>
        <div class="text-center"><b-spinner></b-spinner></div>
        <div><span style="font-size: 20px;font-weight: 500;color: #007bff;">{
     {
      msg }}</span></div>
      </template>
      <b-row>
        <b-col cols="12">
          <iq-card>
            <template v-slot:headerTitle>
              <h4 class="card-title">{
     {
      title }}</h4>
            </template>
            <template v-slot:body>
              <b-row>
                <b-col offset="1" cols="3">
                  <b-file id="upload" v-model="file" placeholder="未选择文件" accept="" browse-text="选择文件"></b-file>
                </b-col>
                <b-col cols="8">
                  <div>
                    <b-button variant="success" class="mb-3 mr-1" @click="upload">导入</b-button>
                    <!-- <b-button variant="danger" class="mb-3 mr-1">清除</b-button> -->
                  </div>
                </b-col>
              </b-row>
              <b-row>
                <b-col offset="1">
                  <b-alert v-if="msg" style="width: max-content;" show variant="dark">{
     {
      msg }}</b-alert>
                </b-col>
              </b-row>
              <b-row>
                <b-col offset="1">
                  <span v-for="(v,i) in error" :key="i" class="error-span">{
     {
      v }}</span>
                </b-col>
              </b-row>
            </template>
          </iq-card>
        </b-col>
      </b-row>
    </b-overlay>
  </div>
</template>
<script>
import {
      vito } from '@/config/pluginInit'
import {
      uploadAndParse, checkStatus } from '@/api/excel.js'
export default {
     
  name: 'ImportBase',
  components: {
      },
  props: {
     
    title: {
     
      type: String,
      default: '导入'
    },
    type: {
     
      type: String,
      default: ''
    }
  },
  mounted() {
     
    vito.index()
  },
  data() {
     
    return {
     
      loading: false,
      file: null,
      msg: '',
      error: []
    }
  },
  methods: {
     
    async upload() {
     
      this.loading = true
      this.msg = '正在上传,请稍后...'
      try {
     
        const fd = new FormData()
        fd.append('file', this.file)
        const {
      data } = await uploadAndParse(fd)
        if (data.status === 1) {
     
          this.msg = '上传成功,正在解析...'
          this.a = setInterval(() => {
     
            this.checkStatus(data.data.src)
          }, 3000)
        } else {
     
          this.msg = data.msg
          this.loading = false
        }
      } catch (err) {
     
        this.msg = '出错了,请重试'
        this.loading = false
      }
    },
    async checkStatus(src) {
     
      const {
      data } = await checkStatus(this.type, src)
      this.msg = data.msg
      this.error = data.data
      if (!data.reRequest) {
     
        clearInterval(this.a)
        this.loading = false
      }
    }
  }
}
</script>
<style scoped>
.error-span{
     
    display: block;
    margin: 5px;
    color: #b82e2e;
}
</style>

控制器主要方法

	/**
     * 上传excel
     *
     * @return void
     */
    function uploadAndParse()
    {
     
        if(!isset($_FILES['file'])){
     
            return $this->response([
                'status' => 2,
                'msg' => '系统错误',
                'data' => []
            ], 1, '成功');
        }
        $fileArr = $_FILES['file'];
        if( !isset($fileArr['type']) || $fileArr['type'] != 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' ){
     
            return $this->response([
                'status' => 2,
                'msg' => '文件类型错误',
                'data' => []
            ], 1, '成功');
        }
        $file = CW_Unit::UploadExcel($fileArr);
        $path = realpath(ROOT_PATH . '/../') . "/shell/status/lock";
        if (file_exists($path)) {
     
            return $this->response([
                'status' => 2,
                'msg' => '有正在导入的文件,请稍后重试!',
                'data' => []
            ], 1, '成功');
        }
        exec(PYTHON." " . realpath(ROOT_PATH . '/../') . '/shell/excel.py' . " {
       $file} >/dev/null &");
        return $this->response([
            'status' => 1,
            'msg' => '成功',
            'data' => ['src'=>$file]
        ], 1, '成功');
    }

    /**
     * 每3秒钟发送请求,查看上传进度
     *
     * @return void
     */
    function checkStatus()
    {
     
        $src = $this->input->post('src');
        $type = $this->input->post('type');
        if(!isset($src)||$src == ''||!isset($type)||$type==''){
     
            return $this->response([
                'status' => 8,
                'msg' => '文件或者导入类型不能为空',
                'data' => [],
                'reRequest' => false
            ], 1, '成功');
        }
        $file = basename($src);
        // $file = '2.xlsx';
        // $type = 'site';
        $a = substr($file, 0, strrpos($file, "."));
        $path = realpath(ROOT_PATH . '/../') . "/shell/status/lock";
        if (file_exists($path)) {
     
            $content = file_get_contents($path);
            $res = json_decode($content, true);
            if (isset($res['file']) && $res['file'] == $file) {
     
                if (isset($res['python']) && $res['python'] == 'ok') {
     
                    if (isset($res['php'])) {
     
                        if ($res['php'] == 'ok') {
     
                            if (isset($res['check']) && $res['check']) {
     
                                if (isset($res['import']) && $res['import']) {
     
                                    //导入成功
                                    //删除锁文件 txt error
                                    $this->delLockTxtError($file);
                                    return $this->response([
                                        'status' => 1,
                                        'msg' => '导入成功',
                                        'data' => [],
                                        'reRequest' => false
                                    ], 1, '成功');
                                } else {
     
                                    //导入失败
                                    //删除锁文件 txt error
                                    $this->delLockTxtError($file);
                                    return $this->response([
                                        'status' => 5,
                                        'msg' => '导入数据库失败!请重试!',
                                        'data' => [],
                                        'reRequest' => false
                                    ], 1, '成功');
                                }
                            } else {
     
                                //校验未通过
                                $error =  [];
                                $errPath = realpath(ROOT_PATH . '/../') . "/shell/error/{
       $a}";
                                $file_handle = fopen($errPath, "r");
                                while (!feof($file_handle)) {
     
                                    $line = fgets($file_handle);
                                    if (is_null($line) || $line == '') continue;
                                    array_push($error, $line);
                                }
                                fclose($file_handle);
                                //删除锁文件 txt error
                                $this->delLockTxtError($file);
                                return $this->response([
                                    'status' => 3,
                                    'msg' => '校验未通过!请修改后重试!',
                                    'data' => $error,
                                    'reRequest' => false
                                ], 1, '成功');
                            }
                        } else {
     
                            return $this->response([
                                'status' => 6,
                                'msg' => '正在校验excel单元格...',
                                'data' => [],
                                'reRequest' => true
                            ], 1, '成功');
                        }
                    } else {
     
                        //执行php脚本
                        exec(PHP." " . realpath(ROOT_PATH . '/../') . '/shell/parse.php' . " {
       $file} {
       $type} >/dev/null &");
                        return $this->response([
                            'status' => 6,
                            'msg' => '正在校验excel单元格...',
                            'data' => [],
                            'reRequest' => true
                        ], 1, '成功');
                    }
                } else {
     
                    // python暂时还没解析完成 返回
                    return $this->response([
                        'status' => 2,
                        'msg' => '正在解析excel...',
                        'data' => [],
                        'reRequest' => true
                    ], 1, '成功');
                }
            } else {
     
                //删除锁文件 txt error
                $this->delLockTxtError($file);
                return $this->response([
                    'status' => 4,
                    'msg' => '锁异常',
                    'data' => [],
                    'reRequest' => false
                ], 1, '成功');
            }
        } else {
     
            // python暂时还没解析完成 返回
            return $this->response([
                'status' => 2,
                'msg' => '正在解析excel...',
                'data' => [],
                'reRequest' => true
            ], 1, '成功');
        }
    }

    /**
     * 删除锁文件  临时txt error文件
     *
     * @param [type] $filename 文件名 1.xlsx
     * @return void
     */
    private function delLockTxtError($filename){
     
        $file = substr($filename, 0, strrpos($filename, "."));
        //删除excel
        @unlink(ROOT_PATH."/".CWConfig::_upload_path . "/excel/{
       $filename}");
        //删除lock
        @unlink(realpath(ROOT_PATH . "/../") . "/shell/status/lock");
        //删除临时txt
        @unlink(realpath(ROOT_PATH . "/../") . "/shell/txt/{
       $file}");
        //删除error
        $errorPath = realpath(ROOT_PATH . "/../") . "/shell/error/{
       $file}";
        if(file_exists($errorPath)){
     
            @unlink($errorPath);
        }
        return true;
    }

你可能感兴趣的:(php,python,python,php,异步导入,大excel)