目录复制
1、如果知道dbf文件编码格式的情况下,可以采用此代码读取(下面以“GBK”为例):
import dbfread
import pandas as pd
dbf_path = r'F:\python_demo\1、pyqt5_projects\25_读取dbf文件\1.DBF' # 文件所在位置
xls_filename = r"{}.xlsx".format(dbf_path.split(r'\\')[-1].split('.')[0]) # 输出路径
# 读取DBF文件并转换为DataFrame
with dbfread.DBF(dbf_path, encoding='gbk') as table:
records = [record for record in table]
df = pd.DataFrame(records)
2、如果不知道dbf文件的编码格式,可以采用遍历多种编码的方式找到。Python常用的编码格式包括:
1. ASCII
:ASCII编码是Python默认的编码格式,可以表示128个字符。
2. UTF-8
:UTF-8编码可以表示世界上几乎所有的字符,是Python中最常用的编码格式。
3. GBK
:GBK编码是中国大陆使用的编码格式,可以表示中文字符。
4. Unicode
:Unicode编码可以表示世界上几乎所有的字符,是Python3中默认的编码格式。
5. Latin-1
:Latin-1编码可以表示西欧语言的字符。
6. Base64
:Base64编码是一种二进制编码格式,可以将二进制数据转换为可打印的字符。
7. Hex
:Hex编码可以将二进制数据转换为十六进制字符。
需要注意的是,Python2默认使用ASCII编码,而Python3默认使用Unicode编码。在使用Python时,应该根据具体的需求选择适当的编码格式。
import dbfread
import pandas as pd
dbf_path = r'D:\python_demo\读取dbf文件\1.DBF' # 文件所在位置
encodings = [
'ascii', 'big5', 'big5hk', 'cp037', 'cp273', 'cp424', 'cp437',
'cp500', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 'cp855',
'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862', 'cp863',
'cp864', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1250',
'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 'cp1256',
'cp1257', 'cp1258', 'euc_cn', 'euc_jis_2004', 'euc_jisx0213',
'euc_jp', 'euc_kr', 'gb18030', 'gb2312', 'hz', 'iso2022_jp',
'iso2022_jp_1', 'iso2022_jp_2', 'iso2022_jp_2ext',
'iso2022_jp_3', 'iso2022_jp_ext', 'iso2022_kr', 'iso8859_10',
'iso8859_13', 'iso8859_14', 'iso8859_15', 'iso8859_16', 'iso8859_2',
'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', 'iso8859_7', 'iso8']
# 尝试使用不同的编码方式读取文件
for encoding in encodings:
try:
table = dbfread.DBF(dbf_path, encoding=encoding)
# 读取DBF文件
# table = dbfread.DBF(dbf_path, encoding='gbk')
records = table.records
# 打印所有记录
for record in records:
print(record)
# dbf转xls
xls_filename = r"test.xls" # 输出路径
data = pd.DataFrame(iter(records))
data.to_excel(xls_filename, index=False, encoding='gbk') # 写入表格中
break # 如果成功读取文件,则退出循环
except:
pass # 如果出现解码错误,则尝试下一种编码方式
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023-09-09 15:43
# @Author : Leuanghing Chen
# @Blog : https://blog.csdn.net/weixin_46153372?spm=1010.2135.3001.5421
# @File : 读取dbf并转成xls.py
# @Software : PyCharm
import dbfread
from openpyxl import Workbook
import pandas as pd
dbf_path = r'F:\python_demo\1、pyqt5_projects\25_读取dbf文件\1.DBF' # 文件所在位置
xls_filename = r"{}.xlsx".format(dbf_path.split(r'\\')[-1].split('.')[0]) # 输出路径
# 读取DBF文件并转换为DataFrame
with dbfread.DBF(dbf_path, encoding='gbk') as table:
records = [record for record in table]
df = pd.DataFrame(records)
wb = Workbook() # 创建一个新的工作簿
ws = wb.active # 选择活动工作表
# 写入表头
for i in range(len(df.columns.tolist())+1):
if i == 0:
ws.cell(row=1, column=1, value="序号")
else:
ws.cell(row=1, column=i+1, value=df.columns.tolist()[i-1])
# 写入数据
for row in range(len(df)):
for col in range(len(df.columns)+1):
if col == 0:
ws.cell(row=row + 2, column=col + 1, value=str(df.index.tolist()[row] + 1))
else:
ws.cell(row=row + 2, column=col + 1, value=df.iloc[row, col-1])
# 保存为Excel文件
wb.save(xls_filename)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023-09-09 15:04
# @Author : Leuanghing Chen
# @Blog : https://blog.csdn.net/weixin_46153372?spm=1010.2135.3001.5421
# @File : 读取dbf并转成csv.py
# @Software : PyCharm
import dbfread
import pandas as pd
dbf_path = r'F:\python_demo\1、pyqt5_projects\25_读取dbf文件\1.DBF' # 文件所在位置
csv_filename = r"{}.csv".format(dbf_path.split(r'\\')[-1].split('.')[0]) # 输出路径
# 读取DBF文件并转换为DataFrame
with dbfread.DBF(dbf_path, encoding='gbk') as table:
records = [record for record in table]
df = pd.DataFrame(records)
# 在第一行插入新数据
# new_row = pd.DataFrame({'代码': ['代码'], '名称': ['名称'], '全名': ['全名'], '助记码': ['助记码'], '规格型号': ['规格型号']})
# df = pd.concat([new_row, df], ignore_index=True)
# 在第一列插入新的列'序号'
new_column = pd.Series(list(map(lambda x: x + 1, df.index.tolist()))) # df.index.tolist()元素自加1
df.insert(0, '序号', new_column)
# # 使用 loc 方法修改 (0,0) 的值
# df.loc[0, '序号'] = '序号'
# 保存为CSV文件
df.to_csv(csv_filename, index=False, encoding='gbk')
print(df.columns.tolist())
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023-09-09 15:27
# @Author : Leuanghing Chen
# @Blog : https://blog.csdn.net/weixin_46153372?spm=1010.2135.3001.5421
# @File : 读取dbf并转成word.py
# @Software : PyCharm
import pandas as pd
import dbfread
from docx import Document # 导入docx包
from docx.shared import Pt, RGBColor, Cm # 设置字体大小、颜色、页面边距
from docx.oxml.ns import qn # 中文字体
from docx.enum.table import WD_CELL_VERTICAL_ALIGNMENT # 导入单元格垂直对齐
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT # 导入段落对齐
dbf_path = r'F:\python_demo\1、pyqt5_projects\25_读取dbf文件\1.DBF' # 文件所在位置
docx_filename = r"{}.docx".format(dbf_path.split(r'\\')[-1].split('.')[0]) # 输出路径
# 读取DBF文件并转换为DataFrame
with dbfread.DBF(dbf_path, encoding='gbk') as table:
records = [record for record in table]
df = pd.DataFrame(records)
# 生成word文档
Doc = Document()
# 设置页面边距
sec = Doc.sections[0] # sections对应文档中的“节”
sec.left_margin = Cm(1.5) # 以下依次设置左、右、上、下页面边距
sec.right_margin = Cm(1.5)
sec.top_margin = Cm(1.5)
sec.bottom_margin = Cm(1.5)
# 创建表格
tab = Doc.add_table(len(df)+1, len(df.columns)+1, style="Table Grid")
# 列宽
tab.cell(0, 0).width = Cm(1.01)
tab.cell(0, 1).width = Cm(3.34)
tab.cell(0, 2).width = Cm(3.43)
tab.cell(0, 3).width = Cm(5.76)
tab.cell(0, 4).width = Cm(3.00)
tab.cell(0, 5).width = Cm(2.45)
# 将文字写入表格
# 写入表头
for i in range(len(df.columns.tolist())+1):
if i == 0:
tab.cell(0, i).text = "序号"
else:
tab.cell(0, i).text = df.columns.tolist()[i-1]
# 写入内容
for row in range(len(df)):
for column in range(len(df.columns)+1):
# print("({},{})".format(row, column))
try:
if column == 0:
tab.cell(row+1, 0).text = str(df.index.tolist()[row] + 1)
else:
tab.cell(row+1, column).text = str(df.iloc[row, column-1])
except AttributeError:
pass
# 将表格中所有单元格修改字体
for col in tab.rows:
# 单元格
for cell in col.cells:
# 单元格对齐方式为:中部居中对齐
cell.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER
cell.paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
# 单元格的段落
for par in cell.paragraphs:
# 设置行距
par.paragraph_format.line_spacing = 1.0
# 段前与段后间距
par.paragraph_format.space_before = Pt(0)
par.paragraph_format.space_after = Pt(0)
for block in par.runs:
# 字体样式操作:bold:加粗、italic:斜体、underline:带下划线、strike:删除线、double_strike:双删除线、shadow:阴影
block.font.bold = False
block.font.italic = False
block.font.underline = False
block.font.strike = False
block.font.double_strike = False
block.font.shadow = False
# 初号=42磅 小初=36磅 一号=26磅 小一=24磅 二号=22磅 小二=18磅 三号=16磅 小三=15磅 四号=14磅
# 小四=12磅 五号=10.5磅 小五=9磅 六号=7.5磅 小六=6.5磅 七号=5.5磅 八号=5磅
block.font.size = Pt(12)
block.font.color.rgb = RGBColor(0, 0, 0) # rgb(0,0,0)为黑色
block.font.name = '宋体' # 英文字体设置
block._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体') # 设置中文字体
# 第一行加粗
run = tab.cell(0, 0).paragraphs[0].runs[0]
run.font.bold = True # 将文字块设置为粗体
run = tab.cell(0, 1).paragraphs[0].runs[0]
run.font.bold = True # 将文字块设置为粗体
run = tab.cell(0, 2).paragraphs[0].runs[0]
run.font.bold = True # 将文字块设置为粗体
run = tab.cell(0, 3).paragraphs[0].runs[0]
run.font.bold = True # 将文字块设置为粗体
run = tab.cell(0, 4).paragraphs[0].runs[0]
run.font.bold = True # 将文字块设置为粗体
run = tab.cell(0, 5).paragraphs[0].runs[0]
run.font.bold = True # 将文字块设置为粗体
# 保存为Word文档
Doc.save(docx_filename)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023-09-09 18:02
# @Author : Leuanghing Chen
# @Blog : https://blog.csdn.net/weixin_46153372?spm=1010.2135.3001.5421
# @File : 读取dbf并转成txt.py
# @Software : PyCharm
import dbfread
import pandas as pd
dbf_path = r'F:\python_demo\1、pyqt5_projects\25_读取dbf文件\1.DBF' # 文件所在位置
txt_filename = r"{}.txt".format(dbf_path.split(r'\\')[-1].split('.')[0]) # 输出路径
# 读取DBF文件并转换为DataFrame
with dbfread.DBF(dbf_path, encoding='gbk') as table:
records = [record for record in table]
df = pd.DataFrame(records)
# 写入数据到txt文件
with open(txt_filename, 'w') as file:
for row in range(len(df)):
if row == 0:
for i in range(len(df.columns.tolist())+1):
if i == 0:
file.write(f"{'序号'}\t") # 添加一个制表符
else:
file.write(f"{df.columns.tolist()[i-1]}\t") # 使用制表符(\t)分隔数据
file.write("\n") # 每行数据结束后添加换行符
else:
file.write(f"{df.index.tolist()[row]}\t")
for col in range(len(df.columns)):
file.write(f"{df.iloc[row, col]}\t") # 使用制表符(\t)分隔数据
file.write("\n") # 每行数据结束后添加换行符