Pandas - 数据的加载、存储与解析

  • Pandas加载数据
  • Pandas存储数据


Pandas加载数据

import pandas as pd
from pandas import DataFrame,Series
import sys
  • read_csv 从文件,URl,文件型对象中加载带分隔符的数据,默认分隔符为逗号(,)
  • read_table 从文件,URl,文件型对象中加载带分隔符的数据,默认分隔符为制表符(\t)
pd.read_csv('ex1.csv')
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
#pandas加载没有标题的数据
pd.read_csv('ex2.csv',header=None)
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
0 1 2 3 4
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
#手动指定列名
pd.read_csv('ex2.csv',names =['a','b','c','d','message'])
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
#加载csv数据并制定一列数据并指定一列为行索引
pd.read_csv('ex2.csv',
            names =['a','b','c','d','message'],
           index_col='message')
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
a b c d
message
hello 1 2 3 4
world 5 6 7 8
foo 9 10 11 12
#加载数据,创建层次化索引数据结构
parsed = pd.read_csv('csv_mindex.csv',index_col=['key1','key2'])
parsed
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
value1 value2
key1 key2
one a 1 2
b 3 4
c 5 6
d 7 8
two a 9 10
b 11 12
c 13 14
d 15 16
#若CSV文件的分隔符是不规则的,可采取正则来进行分割
pd.read_csv('ex3.csv',sep='\s+')
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
A B C
aaa -0.264438 -1.026059 -0.619500
bbb 0.927272 0.302904 -0.032399
ccc -0.264273 -0.386314 -0.217601
ddd -0.871858 -0.348382 1.100491
#读取带有无效数据行的文件
pd.read_csv('ex4.csv',skiprows=[0,2,3])
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
pd.read_csv('ex5.csv',na_values=['world'])#na_values选定数据集里的某些值为None
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
something a b c d message
0 one 1 2 3.0 4 NaN
1 two 5 6 NaN 8 NaN
2 three 9 10 11.0 12 foo
#加载csv数据,并按列指定不同的None值
sen = {'message':['foo','NA'],'something':['two']}
data_ex5 = pd.read_csv('ex5.csv',na_values=sen)
data_ex5
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
something a b c d message
0 one 1 2 3.0 4 NaN
1 NaN 5 6 NaN 8 world
2 three 9 10 11.0 12 NaN

Pandas存储数据

df = DataFrame({'name':['Rachel','Zack','Daniel'],
               'age':[15,22,35],
               'gemder':['F','M','M'],
               'job':['student','killer','doctor']})
df
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
age gemder job name
0 15 F student Rachel
1 22 M killer Zack
2 35 M doctor Daniel
#将数据写入到文件
df.to_csv('Angelsof.csv',sep='\t')
#将写入的结果在控制台输出sys.stdout
df.to_csv(sys.stdout,sep='\t')
    age gemder  job name
0   15  F   student Rachel
1   22  M   killer  Zack
2   35  M   doctor  Daniel
#将数据写入到csv文件中,不保存行索引和列索引
df.to_csv(sys.stdout,index=False,header=False)
15,F,student,Rachel
22,M,killer,Zack
35,M,doctor,Daniel
#写入CSV文件,并替换NaN值
data_ex5.to_csv(sys.stdout,na_rep=0)
,something,a,b,c,d,message
0,one,1,2,3.0,4,0
1,0,5,6,0,8,world
2,three,9,10,11.0,12,0
#只保存部分数据
df.to_csv(sys.stdout,columns=['name'])
,name
0,Rachel
1,Zack
2,Daniel

你可能感兴趣的:(python,pandas,AI,之路)