pandas 常用

import pandas as pd
a = [['abc', 'def', 'ghij'], ['klm', 'nop', 'qrs'], ['tuv', 'wxy', 'zzz']]
df = pd.DataFrame(a)
print(df)
df.to_csv(path_or_buf='temp.csv', sep=',', header=False, index=False)

# temp.csv里面的结果:
abc,def,ghij
klm,nop,qrs
tuv,wxy,zzz


# .to_csv()函数中header默认是True, index默认是True
import pandas as pd
a = [['abc', 'def', 'ghij'], ['klm', 'nop', 'qrs'], ['tuv', 'wxy', 'zzz']]
df = pd.DataFrame(a)
print(df)
df.to_csv(path_or_buf='temp.csv', sep=',')

# temp.csv里面的结果:
,0,1,2
0,abc,def,ghij
1,klm,nop,qrs
2,tuv,wxy,zzz



import pandas as pd
a = [['abc', 'def', 'ghij'], ['klm', 'nop', 'qrs'], ['tuv', 'wxy', 'zzz']]
df = pd.DataFrame({'q1': a[0], 'q2': a[1], 'q3': a[2]})
print(df)
df.to_csv(path_or_buf='temp.csv', sep=',', header=False, index=False)

# temp.csv里面的结果:
abc,def,ghij
klm,nop,qrs
tuv,wxy,zzz




import pandas as pd
a = [['abc', 'def', 'ghij'], ['klm', 'nop', 'qrs'], ['tuv', 'wxy', 'zzz']]
df = pd.DataFrame({'q1': a[0], 'q2': a[1], 'q3': a[2]})
print(df)
df.to_csv(path_or_buf='temp.csv', sep=',')

# temp.csv里面的结果:
,q1,q2,q3
0,abc,klm,tuv
1,def,nop,wxy
2,ghij,qrs,zzz
pd.read_csv()


header参数: 0 表示取文件中的第一行作为列
       None: 系统自动生成 0,1,2,3 。。。表示列
# dataframe的reset_index()函数

import pandas as pd
a = [['abc', 'def', 'ghij'], ['klm', 'nop', 'qrs'], ['tuv', 'wxy', 'zzz']]
# df = pd.DataFrame({'q1': a[0], 'q2': a[1], 'q3': a[2]})
df = pd.DataFrame(a)
print(df)
# 由于采样之后colum_index乱序了,所以reset_index()
df = df.sample(frac=1.0, axis=0).reset_index(drop=True)
print(df)

#
     0    1     2
0  abc  def  ghij
1  klm  nop   qrs
2  tuv  wxy   zzz
     0    1     2
0  tuv  wxy   zzz
1  abc  def  ghij
2  klm  nop   qrs

# drop = False

     0    1     2
0  abc  def  ghij
1  klm  nop   qrs
2  tuv  wxy   zzz
   index    0    1     2
0      1  klm  nop   qrs
1      2  tuv  wxy   zzz
2      0  abc  def  ghij


import pandas as pd
a = [['abc', 'def', 'ghij'], ['klm', 'nop', 'qrs'], ['tuv', 'wxy', 'zzz']]
# df = pd.DataFrame({'q1': a[0], 'q2': a[1], 'q3': a[2]})
df = pd.DataFrame(a)
print(df)
df = df.sample(frac=1.0, axis=0).reset_index(drop=True)
print(df)

你可能感兴趣的:(机器学习)