目录
代码
【question1】UnicodeDecodeError:'utf8'编解码器无法解码位置0中的字节0xa5
【question2】python文件、数据库读写编码的问题
【question3】Mysql执行insert语句,不报错,数据也插不进去
【question4】python代码出现OSError:[WinError 123] 文件名、目录名或卷标语法不正确
import pymysql
import os,csv
#os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8'
''' 1、循环读取选定文件夹下的文件'''
def readAllFiles(filePath):
fileList = os.listdir(filePath)
for file in fileList:
path = os.path.join(filePath, file)
if os.path.isfile(path):
file = open(path, 'r', encoding='gbk')
print(path)
#流程记录信息
if path.find("workflow") != -1:
analysisWorkflowCsv(file)
pass
#意见信息
elif path.find("opinion") != -1:
analysisOpinionCsv(file)
pass
#发文数据
elif path.find("wd_24") != -1:
analysisWd24Csv(file)
pass
#收文数据
elif path.find("wd_25") != -1:
analysisWd25Csv(file)
pass
else:
readAllFiles(path)
'''2、解析文件内容,首行为标题栏需要跳过。入库操作每满1000条commit一次主要是python频繁提交执行次数达到1000+就会报错。1000条commit一次可以避免错误并缓解内存压力'''
def analysisWorkflowCsv(file):
csvFile = csv.reader(file)
# 读取一行,下面的reader中已经没有该行了
head_row = next(csvFile)
# print(head_row)
__conn = getConnect_old()
counter = 0
for row in csvFile:
workflow = {}
workflow['ID'] = row[0]
workflow['subject'] = row[1]
workflow['signdate'] = row[2]
workflow['UnitName'] = row[3]
workflow['UnitUser'] = row[4]
workflow['UnitUserTitle'] = row[5]
workflow['UnitEndTime'] = row[6]
workflow['UnitAction'] = row[7]
workflow['UnitToTitle'] = row[8]
if insertWorkflows(__conn, workflow):
counter += 1
if counter % 1000 == 0:
__conn.commitData()
print("已经插入工作流数据: %d 条。"%counter)
__conn.commitData()
__conn.closeConn()
'''3、数据入库'''
def insertWorkflows(__conn, workflow):
__sql = '''
INSERT INTO workflows (
ID, UnitName, UnitUser, UnitUserTitle, UnitEndTime, UnitAction, UnitToTitle, subject, signdate
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
'''
__params = (
workflow['ID'], workflow['UnitName'], workflow['UnitUser'], workflow['UnitUserTitle'],
workflow['UnitEndTime'], workflow['UnitAction'], workflow['UnitToTitle'], workflow['subject'],
workflow['signdate']
)
# print(__sql % __params)
return __conn.mysql_exe_sql(__sql, __params)
'''4、数据库连接,python操作sqlserver代码'''
class ConnectionDatabase(object):
# 连接mysql数据库
def __init__(self, ip, user_name, passwd, db, char='utf8'):
self.ip = ip
# self.port = port
self.username = user_name
self.passwd = passwd
self.mysqldb = db
self.char = char
self.MsSQL_db = pymysql.connect(
host=self.ip,
user=self.username,
password=self.passwd,
database=self.mysqldb,
charset=self.char)
# 查询数据(sqlserver)
def mysql_findList(self, sql):
cursor = self.MySQL_db.cursor()
MySQL_sql = sql
results = None
if not cursor:
raise (NameError,"数据库连接失败")
try:
# 执行SQL语句
cursor.execute(MsSQL_sql)
# 获取所有记录列表
results = cursor.fetchall()
except Exception as e:
print(e)
self.MySQL_db.close()
if results:
return results
else:
return None
# 数据增删改查(sqlserver)
def mysql_exe_sql(self, sql, params):
cursor = self.MsSQL_db.cursor()
MsSQL_sql = sql
result = 0
if not cursor:
raise (NameError,"数据库连接失败")
try:
# 执行SQL语句
cursor.execute(MsSQL_sql, params)
result = cursor.rowcount
except Exception as e:
print(e)
self.MySQL_db.rollback()
self.MySQL_db.close()
return result>0
'''
提交数据集
'''
def commitData(self):
try:
self.MsSQL_db.commit()
except Exception as e:
print(e)
'''
关闭数据库连接
'''
def closeConn(self):
if self.MsSQL_db:
self.MsSQL_db.close()
5、执行代码
if __name__ == "__main__":
#文件所在的文件夹父路径
# testFilePath = "G:\数据解析\csv\workflowcsv"
testFilePath = "G:\数据解析\csv\wd25csv"
readAllFiles(testFilePath)
该错误是因为字典中存在一些非ascii字符,并且无法对其进行编码/解码。避免此错误的一种简单方法是使用以下encode()
函数对此类字符串进行编码(如果a
是具有非ascii字符的字符串):
file = open(path, 'r', encoding='gbk')
1.读写utf-8文件
fh = open(file_name, 'r', encoding = 'gbk')
2.读写数据库utf-8格式
connect = pymysql.connect(host='127.0.0.1', user='root', passwd='123456', db='code_bank', charset='utf8')
1.注意Commit()数据提交到数据库
2.数据库中某条数据的某列为空,后面为这列创建了索引,插入的时候没有指定这列就会造成不报错也插不进去。可以清空数据库或者重建试试
文件路径的命名规则出了问题,注意文件名不能用数字开头