Pandas - Series、DataFrame、plot(demo

import pandas
import numpy as np

from pandas import Series , DataFrame

s = Series([1,2,3,np.nan,5,1])
print(s)

0    1.0
1    2.0
2    3.0
3    NaN
4    5.0
5    1.0
dtype: float64


numpy.random.randn()  :以给定的形状创建一个数组,数组元素来符合标准正态分布N(0,1)
a = np.random.randn(10)
s = Series(a , name='Series 1')
print(s)

0    1.285564
1   -0.189391
2   -1.730828
3   -1.972853
4    0.104808
5   -0.225108
6    1.045197
7   -1.531798
8   -1.420962
9   -2.423736
Name: Series 1, dtype: float64

d = {'a':1 , 'b':2 , 'c':3}
s = Series( d , name='Series from dict' )
print(s)

a    1
b    2
c    3
Name: Series from dict, dtype: int64

s = Series(1.5, index=['a','b','c','d','e','f','g'])
print(s)

a    1.5
b    1.5
c    1.5
d    1.5
e    1.5
f    1.5
g    1.5
dtype: float64

d = {'a':1,'b':2,'c':3}
s = Series(d,name='Seris from dict',index=['a','c','d','b'])
print(s)

a    1.0
c    3.0
d    NaN
b    2.0
Name: Seris from dict, dtype: float64

d = {'c_one':[1,2,3,4] , 'c_two':[4,3,2,1]}
df = DataFrame( d , index=['id1','id2','id3','id4'])
print(df)
     c_one  c_two
id1      1      4
id2      2      3
id3      3      2
id4      4      1

d = {'one': Series([1,2,3] , index=['a','b','c']),
     'two': Series([1,2,3,4] , index=['a','b','c','d'])}

df = DataFrame(d)
print(df)
print(df.index)
print(df.columns)
print(df.values)

   one  two
a  1.0    1
b  2.0    2
c  3.0    3
d  NaN    4
Index(['a', 'b', 'c', 'd'], dtype='object')
Index(['one', 'two'], dtype='object')
[[ 1.  1.]
 [ 2.  2.]
 [ 3.  3.]
 [nan  4.]]


d = DataFrame(np.arange(10).reshape(2,5),columns=['c1','c2','c3','c4','c5'],index=['i1','i2'])
print(d)

    c1  c2  c3  c4  c5
i1   0   1   2   3   4
i2   5   6   7   8   9

d = {'c_one':[1,2,3,4],'c_two':[4,3,2,1]}
df = DataFrame( d , index=['id1','id2','id3','id4'])
print(df)

     c_one  c_two
id1      1      4
id2      2      3
id3      3      2
id4      4      1

print(df.sort_index(axis=0,ascending=False))    # axis = 0 按索引(行名)排序,降序
print(df.sort_values(by='c_two'))               # 按c_two列的值排序,升序
print(df.sort_values(by='c_one'))


dates = pandas.date_range('20200101',periods=6)
df = DataFrame(np.arange(24).reshape(6,4),index=dates,columns=['A','B','C','D'])
print(df)
# 访问A列
print(df.A)
print(df['A'])
访问前三行
print(df[0:3])
# 访问前三列
print(df[['A','B','C']])
# 按列名、行名访问
print(df['A']['2020-01-02'])


# pandas的ix已删除,不能使用
# iloc,即index locate 用index索引进行定位,所以参数是整型,如:df.iloc[10:20, 3:5]
# loc,则可以使用column名和index名进行定位

print(df.loc['2020-01-03']) # 按行名访问
print(df.loc[:,['A','C']]) # 访问所有行A、C两列
print(df.loc['2020-01-01',['A','D']]) # 访问2020-01-01的A、D两列

print(df.iloc[0,0]) # 按下标
print(df.iloc[[1,3],1]) # 访问2、4行第2列元素
print(df[df.B>5]) # B列值>5的数据

# Pandas绘图( plt用来显示
from matplotlib import pyplot as plt

df = DataFrame(abs(np.random.randn(4,5)),columns=['St','Doc','Tea','Dri','Tra'],
               index=['bj','sh','hz','sz'])

df.plot(kind='bar') # kind : str # 绘图类型,bar:条状图
plt.show()  # show pic

你可能感兴趣的:(python)