import pandas
import numpy as np
from pandas import Series , DataFrame
s = Series([1,2,3,np.nan,5,1])
print(s)
0 1.0
1 2.0
2 3.0
3 NaN
4 5.0
5 1.0
dtype: float64
numpy.random.randn() :以给定的形状创建一个数组,数组元素来符合标准正态分布N(0,1)
a = np.random.randn(10)
s = Series(a , name='Series 1')
print(s)
0 1.285564
1 -0.189391
2 -1.730828
3 -1.972853
4 0.104808
5 -0.225108
6 1.045197
7 -1.531798
8 -1.420962
9 -2.423736
Name: Series 1, dtype: float64
d = {'a':1 , 'b':2 , 'c':3}
s = Series( d , name='Series from dict' )
print(s)
a 1
b 2
c 3
Name: Series from dict, dtype: int64
s = Series(1.5, index=['a','b','c','d','e','f','g'])
print(s)
a 1.5
b 1.5
c 1.5
d 1.5
e 1.5
f 1.5
g 1.5
dtype: float64
d = {'a':1,'b':2,'c':3}
s = Series(d,name='Seris from dict',index=['a','c','d','b'])
print(s)
a 1.0
c 3.0
d NaN
b 2.0
Name: Seris from dict, dtype: float64
d = {'c_one':[1,2,3,4] , 'c_two':[4,3,2,1]}
df = DataFrame( d , index=['id1','id2','id3','id4'])
print(df)
c_one c_two
id1 1 4
id2 2 3
id3 3 2
id4 4 1
d = {'one': Series([1,2,3] , index=['a','b','c']),
'two': Series([1,2,3,4] , index=['a','b','c','d'])}
df = DataFrame(d)
print(df)
print(df.index)
print(df.columns)
print(df.values)
one two
a 1.0 1
b 2.0 2
c 3.0 3
d NaN 4
Index(['a', 'b', 'c', 'd'], dtype='object')
Index(['one', 'two'], dtype='object')
[[ 1. 1.]
[ 2. 2.]
[ 3. 3.]
[nan 4.]]
d = DataFrame(np.arange(10).reshape(2,5),columns=['c1','c2','c3','c4','c5'],index=['i1','i2'])
print(d)
c1 c2 c3 c4 c5
i1 0 1 2 3 4
i2 5 6 7 8 9
d = {'c_one':[1,2,3,4],'c_two':[4,3,2,1]}
df = DataFrame( d , index=['id1','id2','id3','id4'])
print(df)
c_one c_two
id1 1 4
id2 2 3
id3 3 2
id4 4 1
print(df.sort_index(axis=0,ascending=False))
print(df.sort_values(by='c_two'))
print(df.sort_values(by='c_one'))
dates = pandas.date_range('20200101',periods=6)
df = DataFrame(np.arange(24).reshape(6,4),index=dates,columns=['A','B','C','D'])
print(df)
print(df.A)
print(df['A'])
访问前三行
print(df[0:3])
print(df[['A','B','C']])
print(df['A']['2020-01-02'])
print(df.loc['2020-01-03'])
print(df.loc[:,['A','C']])
print(df.loc['2020-01-01',['A','D']])
print(df.iloc[0,0])
print(df.iloc[[1,3],1])
print(df[df.B>5])
from matplotlib import pyplot as plt
df = DataFrame(abs(np.random.randn(4,5)),columns=['St','Doc','Tea','Dri','Tra'],
index=['bj','sh','hz','sz'])
df.plot(kind='bar')
plt.show()