with open(文件路径,'r',encoding='utf-8') as file:
reader = csv.reader(file)
for line in reader: #循环后,line是每一行的文件,如果需要行文件的列,则用索引line[i]选择
a[line[0]]=line[1]
b[line[0]]=line[3]
当需要将文件test3文件的部分内容写入到output文件的时候:
思路1:将数据少的文件写成字典,然后打开并写入到output中‘
思路2:同时打开两个文件,然后再写入新的文件,但是这种方法需要不断的打开某一个文件进行遍历,因为使用with open打开遍历一次以后,不会初始化,因此需要不断地进行打开。该文件很浪费时间。
下述代码为思路1的代码:
# 附件3数据处理
import csv
path3 = 'C:/Users/Administrator/Desktop/C0909/data/test3.csv'
path4 = 'C:/Users/Administrator/Desktop/C0909/output.csv'
c = {}
with open(path3,'r',encoding='utf-8') as file:
readers3 = csv.reader(file)
for i in readers3:
k = i[0]+ ',' + i[1]
c[k] = i[2]
# print(c)
with open(path4,'r',encoding='utf-8') as csvfile:
reader4 = csv.reader(csvfile)
headers2 = next(reader4)
# 添加新的一列
headers2.append('批发价格')
with open('./AllData1.csv', 'w', newline='',encoding='utf-8') as outfile1:
# 写入表头
writer_price = csv.writer(outfile1)
writer_price.writerow(headers2)
for r in reader4:
for c_k,c_v in c.items():
if r[0] + ',' + r[2] == c_k:
r.append(c_v)
# 写入新的一行数据
writer_price.writerow(r)
下述代码是思路2的代码:
#附件3整合output文件(到附件1+附件2的整合文件) = AllData文件
path3 = 'C:/Users/86166/Desktop/C0908/data/附件3.csv'
path4 = 'C:/Users/86166/Desktop/C0908/output.csv'
with open(path4,'r',encoding='utf-8') as csvfile:
reader4 = csv.reader(csvfile)
headers2 = next(reader4)
print(headers2)
# 添加新的一列
headers2.append('批发价格')
with open('./AllData.csv', 'w', newline='') as outfile1:
# 写入表头
writer_price = csv.writer(outfile1)
writer_price.writerow(headers2)
for row4 in reader4:
# print(row4)
with open(path3,'r',encoding='utf-8') as csv_file:
reader3 = csv.reader(csv_file)
headers3 = next(reader3)
print(headers3)
sum = 0
for row3 in reader3:
# print(row3)
# 附件3中的数据说明:row3[0] 日期,row3[1] 单品编号,row[2] 批发价格
# output文件中的数据说明:row4[0] 日期, row4[2] 单品编号
if row4[0] + ',' + row4[2] != row3[0] + ',' + row3[1]:
continue
else:
# 在每一行数据中添加新的一列的值
row4.append(row3[2])
# 写入新的一行数据
writer_price.writerow(row4)
import os
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_excel('../data1/sale.xlsx')
data.head() #查看前5行
data.tail() #查看倒数5行
data.shape #查看维度信息
data.dtypes #查看每一列的数据格式
# 查看列数据信息
data['日销售额'] #或则data.列名
#查看单行或者多行数据:
data.iloc[4] #提取第4行
data.iloc[:,0:4] #提取所有行,0到4列
data['日销售额'].mean() #求列平均值
#逐行读数据
import pandas as pd
data = pd.read_excel('../data1/compare.xlsx')
"""先获取二维列表格式的值,然后遍历二维列表"""
for row in data.values: # 遍历所有行
print(row) #一行全部输出
print(row[1]) #输出行的第1列数据
import pandas as pd
#提取每一行的数据的指定列数据
data = pd.read_excel('../data1/compare.xlsx')
for i in range(data.shape[0]):
print(data.iloc[i,1]) #选取每一行(i)的第1列数据
求第一列+第三列的和 = data.iloc[i,1] + data.iloc[i,3]
#逐行读数据[共3种方法]
import pandas as pd
data = pd.read_excel('../data1/compare.xlsx')
"""先获取二维列表格式的值,然后遍历二维列表"""
for row in data.values: # 遍历所有行
for value in row: # 遍历行的每列值
print(value,end=",")
print()
"""根据行索引和列名遍历"""
for row_index in data.index:
for column_name in data.columns:
print(data.loc[row_index, column_name],end=",")
print()
"""根据行列的数字索引遍历"""
for i in range(data.shape[0]):
for j in range(data.shape[1]):
print(data.iloc[i, j],end=",")
print()