Pandas加载数据
import pandas as pd
from pandas import DataFrame,Series
import sys
- read_csv 从文件,URl,文件型对象中加载带分隔符的数据,默认分隔符为逗号(,)
- read_table 从文件,URl,文件型对象中加载带分隔符的数据,默认分隔符为制表符(\t)
pd.read_csv('ex1.csv')
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
|
a |
b |
c |
d |
message |
0 |
1 |
2 |
3 |
4 |
hello |
1 |
5 |
6 |
7 |
8 |
world |
2 |
9 |
10 |
11 |
12 |
foo |
pd.read_csv('ex2.csv',header=None)
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
|
0 |
1 |
2 |
3 |
4 |
0 |
1 |
2 |
3 |
4 |
hello |
1 |
5 |
6 |
7 |
8 |
world |
2 |
9 |
10 |
11 |
12 |
foo |
pd.read_csv('ex2.csv',names =['a','b','c','d','message'])
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
|
a |
b |
c |
d |
message |
0 |
1 |
2 |
3 |
4 |
hello |
1 |
5 |
6 |
7 |
8 |
world |
2 |
9 |
10 |
11 |
12 |
foo |
pd.read_csv('ex2.csv',
names =['a','b','c','d','message'],
index_col='message')
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
|
a |
b |
c |
d |
message |
|
|
|
|
hello |
1 |
2 |
3 |
4 |
world |
5 |
6 |
7 |
8 |
foo |
9 |
10 |
11 |
12 |
parsed = pd.read_csv('csv_mindex.csv',index_col=['key1','key2'])
parsed
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
|
|
value1 |
value2 |
key1 |
key2 |
|
|
one |
a |
1 |
2 |
b |
3 |
4 |
c |
5 |
6 |
d |
7 |
8 |
two |
a |
9 |
10 |
b |
11 |
12 |
c |
13 |
14 |
d |
15 |
16 |
pd.read_csv('ex3.csv',sep='\s+')
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
|
A |
B |
C |
aaa |
-0.264438 |
-1.026059 |
-0.619500 |
bbb |
0.927272 |
0.302904 |
-0.032399 |
ccc |
-0.264273 |
-0.386314 |
-0.217601 |
ddd |
-0.871858 |
-0.348382 |
1.100491 |
pd.read_csv('ex4.csv',skiprows=[0,2,3])
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
|
a |
b |
c |
d |
message |
0 |
1 |
2 |
3 |
4 |
hello |
1 |
5 |
6 |
7 |
8 |
world |
2 |
9 |
10 |
11 |
12 |
foo |
pd.read_csv('ex5.csv',na_values=['world'])
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
|
something |
a |
b |
c |
d |
message |
0 |
one |
1 |
2 |
3.0 |
4 |
NaN |
1 |
two |
5 |
6 |
NaN |
8 |
NaN |
2 |
three |
9 |
10 |
11.0 |
12 |
foo |
sen = {'message':['foo','NA'],'something':['two']}
data_ex5 = pd.read_csv('ex5.csv',na_values=sen)
data_ex5
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
|
something |
a |
b |
c |
d |
message |
0 |
one |
1 |
2 |
3.0 |
4 |
NaN |
1 |
NaN |
5 |
6 |
NaN |
8 |
world |
2 |
three |
9 |
10 |
11.0 |
12 |
NaN |
Pandas存储数据
df = DataFrame({'name':['Rachel','Zack','Daniel'],
'age':[15,22,35],
'gemder':['F','M','M'],
'job':['student','killer','doctor']})
df
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
|
age |
gemder |
job |
name |
0 |
15 |
F |
student |
Rachel |
1 |
22 |
M |
killer |
Zack |
2 |
35 |
M |
doctor |
Daniel |
df.to_csv('Angelsof.csv',sep='\t')
df.to_csv(sys.stdout,sep='\t')
age gemder job name
0 15 F student Rachel
1 22 M killer Zack
2 35 M doctor Daniel
df.to_csv(sys.stdout,index=False,header=False)
15,F,student,Rachel
22,M,killer,Zack
35,M,doctor,Daniel
data_ex5.to_csv(sys.stdout,na_rep=0)
,something,a,b,c,d,message
0,one,1,2,3.0,4,0
1,0,5,6,0,8,world
2,three,9,10,11.0,12,0
df.to_csv(sys.stdout,columns=['name'])
,name
0,Rachel
1,Zack
2,Daniel