CONTENTS
- 一、CSV文件读取
-
- 1.读行标题
- 2.按行读取
- 3.按列读取
- 4.按位置读取
- 二、CSV文件筛选
-
- 1.“特定列”按数值筛选数据
- 2.统计min,max,std,
- 3.排序
- 三、CSV文件删除
- 四、CSV文件改变
-
- 1.行汇总
- 2.特定列,行汇总
- 3.列顺序转变
- 4.改列值
- 五、CSV文件转移
- 六、CONTINUE
一、CSV文件读取
1.读行标题
#reading headers
print(a.columns)
2.按行读取
#reading row 注意break右边取不到
print(a[1:2]) #第一行
print(a[:2]) #0、1行
print(a.head(2)) #头两行 尾两行用tail
print(a[:]) #All rows
print(a[-1:0]) #倒数第二行
3.按列读取
#reading column
print(a['T']) #a['T'][0:3] T列的特定行
print(a.T)
#reading multiple columns : change single str to a list
print(a[['T','TI']])
4.按位置读取
# reading the data in a specific location
print(a.iloc[2,1])
二、CSV文件筛选
1.“特定列”按数值筛选数据
#filtering data in a specific row
print(a['N'].loc[a['N']==3])
print(a.N.loc[a['NC']==3])
print(a.loc[a['N']<=2])
print(a.loc[(a['N']==2) | (a['S']==2)]) #| 并
print(a.loc[(a['N']==2) & (a['S']==2)]) #& 并
print(a.reset_index(drop=True)) # 去掉索引
print(a.loc[a['T'].str.contains('')]) # 包含特定字符串
2.统计min,max,std,
# max,min,std,0.5,0.75,0.25
print(a.describe())
3.排序
# sort 排序
print(a.sort_values('T',ascending=False)) #ascending升序
三、CSV文件删除
# drop a specific column
print(a.drop(columns=['TOTAL']).head(2))
四、CSV文件改变
1.行汇总
# making changes to data
a['TOTAL']=a['T']+a['TI']
print(a.head(2))
2.特定列,行汇总
# 特定列,逐行相加运算
a['TOTAL']=a.iloc[:,5:9].sum(axis=1) #:means all rows所有行,逗号后面为五到八列
print(a['TOTAL'].head(3))
3.列顺序转变
# 转换列的顺序,,提取特定几列
columns=list(a.columns.values)
print(a[columns[0:4]+columns[-2:-1]].head(3))
4.改列值
a.loc[a['RE']==-9,'RE']=0
print(a)
a.loc[a['RE']==0,'RE']= 'Bingo'
print(a)
a.loc[a['R']==0,['RE','REC']]= 'Bingo'
print(a)
五、CSV文件转移
# Saving to a new csv
b=a.to_csv('new_csv',index=False)
print(b)
六、CONTINUE
# a['count']=1
a.groupby(['RECO_SR']).count()['count']
print(a)
import pandas as pd
def readcsv(path):
for i in os.listdir(path):
wl=os.path.join(path,i)
a=pd.read_csv(wl)
##########d读、筛、删、改、转
#reading headers
print(a.columns)
#read a specific column
print(a['T'][:3])
print(a.T)
#read multi_columns :change single to a list of name
print(a[['T','T']])
#read row 注意读行读区间
print(a[1:2])
#find specific data in a specific row
print(a['NE'].loc[a['NEE']==3])
print(a.N.loc[a['N']==3])
# read row by row
for index, row in a.iterrows():
print(index,row['TI'])
# read the data in a specific location
print(a.iloc[2,1])
# max,min,std,0.5,0.75,0.25
print(a.describe())
# sort 排序
print(a.sort_values('T',ascending=False))
# ########Flitering data
print(a.loc[a['NEC']<=2])
print(a.loc[(a['NE']==2) | (a['S']==2)])
print(a.loc[(a['NE']==2) & (a['S']==2)])
print(a.reset_index(drop=True))
print(a.loc[a['T'].str.contains('')])
########## making changes to data
a['TOTAL']=a['T']+a['TI']
print(a.head(2))
drop a specific column
print(a.drop(columns=['TOTAL']).head(2))
# 特定列,逐行相加运算
:, means all rows所有行,逗号后面为五到九列
a['TOTAL']=a.iloc[:,5:9].sum(axis=1)
print(a['TOTAL'].head(3))
# 转换列的顺序,,提取特定几列
columns=list(a.columns.values)
print(a[columns[0:4]+columns[-2:-1]].head(3))
######### Saving to a new csv
b=a.to_csv('new_csv',index=False)
print(b)
# #########condional change
a.loc[a['R']==-9999,'R']=0
print(a)
a.loc[a['RE']==0,'RE']= 'Bingo'
print(a)
a.loc[a['RE']==0,['RE','RE']]= 'Bingo'
print(a)
# a['count']=1
a.groupby(['RE']).count()['count']
print(a)
readcsv(r'D:\副本 (2)')