Python 遍历文件夹下的csv文件导入到数据库

目录

代码

【question1】UnicodeDecodeError:'utf8'编解码器无法解码位置0中的字节0xa5

【question2】python文件、数据库读写编码的问题

【question3】Mysql执行insert语句,不报错,数据也插不进去

【question4】python代码出现OSError:[WinError 123] 文件名、目录名或卷标语法不正确


代码

import pymysql
import os,csv
#os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8'

''' 1、循环读取选定文件夹下的文件'''
def readAllFiles(filePath):
    fileList = os.listdir(filePath)
    for file in fileList:
        path = os.path.join(filePath, file)
        if os.path.isfile(path):
            file = open(path, 'r', encoding='gbk')
            print(path)
            #流程记录信息
            if path.find("workflow") != -1:
                analysisWorkflowCsv(file)
                pass
            #意见信息
            elif path.find("opinion") != -1:
                analysisOpinionCsv(file)
                pass
            #发文数据
            elif path.find("wd_24") != -1:
                analysisWd24Csv(file)
                pass
            #收文数据
            elif path.find("wd_25") != -1:
                analysisWd25Csv(file)
                pass
        else:
            readAllFiles(path)
 



'''2、解析文件内容,首行为标题栏需要跳过。入库操作每满1000条commit一次主要是python频繁提交执行次数达到1000+就会报错。1000条commit一次可以避免错误并缓解内存压力'''
def analysisWorkflowCsv(file):
    csvFile = csv.reader(file)
    # 读取一行,下面的reader中已经没有该行了
    head_row = next(csvFile)
    # print(head_row)
    __conn = getConnect_old()
    counter = 0
    for row in csvFile:
        workflow = {}
        workflow['ID'] = row[0]
        workflow['subject'] = row[1]
        workflow['signdate'] = row[2]
        workflow['UnitName'] = row[3]
        workflow['UnitUser'] = row[4]
        workflow['UnitUserTitle'] = row[5]
        workflow['UnitEndTime'] = row[6]
        workflow['UnitAction'] = row[7]
        workflow['UnitToTitle'] = row[8]
        if insertWorkflows(__conn, workflow):
            counter += 1
        if counter % 1000 == 0:
            __conn.commitData()
    print("已经插入工作流数据: %d 条。"%counter)
    __conn.commitData()
    __conn.closeConn()
 



'''3、数据入库'''
def insertWorkflows(__conn, workflow):
    __sql = '''
        INSERT INTO workflows (
            ID, UnitName, UnitUser, UnitUserTitle, UnitEndTime, UnitAction, UnitToTitle, subject, signdate
        ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
    '''
    __params = (
        workflow['ID'], workflow['UnitName'], workflow['UnitUser'], workflow['UnitUserTitle'],
        workflow['UnitEndTime'], workflow['UnitAction'], workflow['UnitToTitle'], workflow['subject'],
        workflow['signdate']
        )
    # print(__sql % __params)
    return __conn.mysql_exe_sql(__sql, __params)
 





'''4、数据库连接,python操作sqlserver代码'''
class ConnectionDatabase(object):
    # 连接mysql数据库
    def __init__(self, ip, user_name, passwd, db, char='utf8'):
        self.ip = ip
        # self.port = port
        self.username = user_name
        self.passwd = passwd
        self.mysqldb = db
        self.char = char

        self.MsSQL_db = pymysql.connect(
            host=self.ip,
            user=self.username,
            password=self.passwd,
            database=self.mysqldb,
            charset=self.char)
  # 查询数据(sqlserver)
    def mysql_findList(self, sql):
        cursor = self.MySQL_db.cursor()
        MySQL_sql = sql
        results = None
        if not cursor:
            raise (NameError,"数据库连接失败")
        try:
            # 执行SQL语句
            cursor.execute(MsSQL_sql)
            # 获取所有记录列表
            results = cursor.fetchall()
        except Exception as e:
            print(e)
            self.MySQL_db.close()
        if results:
            return results
        else:
            return None

    # 数据增删改查(sqlserver)
    def mysql_exe_sql(self, sql, params):
        cursor = self.MsSQL_db.cursor()
        MsSQL_sql = sql
        result = 0
        if not cursor:
            raise (NameError,"数据库连接失败")
        try:
            # 执行SQL语句
            cursor.execute(MsSQL_sql, params)
            result = cursor.rowcount
        except Exception as e:
            print(e)
            self.MySQL_db.rollback()
            self.MySQL_db.close()

        return result>0

    '''
        提交数据集
    '''
    def commitData(self):
        try:
            self.MsSQL_db.commit()
        except Exception as e:
            print(e)

    '''
        关闭数据库连接
    '''
    def closeConn(self):
        if self.MsSQL_db:
            self.MsSQL_db.close()
 

5、执行代码

if __name__ == "__main__":
    #文件所在的文件夹父路径
    # testFilePath = "G:\数据解析\csv\workflowcsv"
    testFilePath = "G:\数据解析\csv\wd25csv"
    readAllFiles(testFilePath)

【question1】UnicodeDecodeError:'utf8'编解码器无法解码位置0中的字节0xa5

该错误是因为字典中存在一些非ascii字符,并且无法对其进行编码/解码。避免此错误的一种简单方法是使用以下encode()函数对此类字符串进行编码(如果a是具有非ascii字符的字符串):

file = open(path, 'r', encoding='gbk')

【question2】python文件、数据库读写编码的问题

1.读写utf-8文件

  fh = open(file_name, 'r', encoding = 'gbk')

2.读写数据库utf-8格式

  connect = pymysql.connect(host='127.0.0.1', user='root', passwd='123456', db='code_bank', charset='utf8')

【question3】Mysql执行insert语句,不报错,数据也插不进去

1.注意Commit()数据提交到数据库

2.数据库中某条数据的某列为空,后面为这列创建了索引,插入的时候没有指定这列就会造成不报错也插不进去。可以清空数据库或者重建试试

【question4】python代码出现OSError:[WinError 123] 文件名、目录名或卷标语法不正确

文件路径的命名规则出了问题,注意文件名不能用数字开头

你可能感兴趣的:(数据库,Python,Mysql,csv,错误预警)