最近需要各种转格式,这里对相关代码作一个记录,方便日后查询。
xlsx文件转csv文件
import xlrd
import csv
def xlsx_to_csv():
workbook = xlrd.open_workbook('1.xlsx')
table = workbook.sheet_by_index(0)
with codecs.open('1.csv', 'w', encoding='utf-8') as f:
write = csv.writer(f)
for row_num in range(table.nrows):
row_value = table.row_values(row_num)
write.writerow(row_value)
if __name__ == '__main__':
xlsx_to_csv()
在2个文件之间转换,需要注意一个文件的字符转码问题
使用第三方库pandas将xlsx文件转csv文件
import pandas as pd
def xlsx_to_csv_pd():
data_xls = pd.read_excel('1.xlsx', index_col=0)
data_xls.to_csv('1.csv', encoding='utf-8')
if __name__ == '__main__':
xlsx_to_csv_pd()
csv文件转换成xlsx文件
import csv
import xlwt
def csv_to_xlsx():
with open('1.csv', 'r', encoding='utf-8') as f:
read = csv.reader(f)
workbook = xlwt.Workbook()
sheet = workbook.add_sheet('data') # 创建一个sheet表格
l = 0
for line in read:
print(line)
r = 0
for i in line:
print(i)
sheet.write(l, r, i) # 一个一个将单元格数据写入
r = r + 1
l = l + 1
workbook.save('1.xlsx') # 保存Excel
if __name__ == '__main__':
csv_to_xlsx()
使用pandas将csv文件转成xlsx文件
import pandas as pd
def csv_to_xlsx_pd():
csv = pd.read_csv('1.csv', encoding='utf-8')
csv.to_excel('1.xlsx', sheet_name='data')
if __name__ == '__main__':
csv_to_xlsx_pd()
xlsx文件转json文件
from collections import OrderedDict
import json
import codecs
wb = xlrd.open_workbook('positive_previous.xlsx')
convert_list = []
sh = wb.sheet_by_index(0)
title = sh.row_values(0)
for rownum in range(1, sh.nrows):
rowvalue = sh.row_values(rownum)
single = OrderedDict()
for colnum in range(0, len(rowvalue)):
print(title[colnum], rowvalue[colnum])
single[title[colnum]] = rowvalue[colnum]
convert_list.append(single)
j = json.dumps(convert_list)
with codecs.open('positive_previous.json', "w", "utf-8") as f:
f.write(j)
csv文件转化为json文件
# 下面的工具可以方便的将CSV格式文件转换成json文件格式
import sys, json
tip = """
请确保:
1. CSV格式是UTF-8
2. CSV第一行是键值
用法:
python csv2json.py foobar.csv
其中foobar.csv是需要转换的源数据文件
运行环境:
Python 3.4.3
日期:
2015年12月29日
"""
print(tip)
# 获取输入数据
input_file = sys.argv[1]
lines = open(input_file, "r", encoding="utf_8_sig").readlines()
lines = [line.strip() for line in lines]
# 获取键值
keys = lines[0].split(',')
line_num = 1
total_lines = len(lines)
parsed_datas = []
while line_num < total_lines:
values = lines[line_num].split(",")
parsed_datas.append(dict(zip(keys, values)))
line_num = line_num + 1
json_str = json.dumps(parsed_datas, ensure_ascii=False, indent=4)
output_file = input_file.replace("csv", "json")
# write to the file
f = open(output_file, "w", encoding="utf-8")
f.write(json_str)
f.close()
print("解析结束!")
这段代码产生的键值不是按顺序来的,个人觉得可以改进一下