import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
创建Series以及对Series的相关操作
# 自动添加索引
# np.nan:empty value
>>>s1 = pd.Series([1,2,3,4,np.nan,5,6,7])
0 1.0
1 2.0
2 3.0
3 4.0
4 NaN
5 5.0
6 6.0
7 7.0
dtype: float64
>>>s1.values
array([ 1., 2., 3., 4., nan, 5., 6., 7.])
>>>s1.index
RangeIndex(start=0, stop=8, step=1)
# 主动添加索引
>>>s2 = pd.Series([21,23,42,21,23],index=['Jack','Lucy','Helen','Milky','Jasper'])
Jack 21
Lucy 23
Helen 42
Milky 21
Jasper 23
dtype: int64
>>>s2['Jack']
21
>>>s2.loc['Jack'] # 根据名字索引
21
>>>s2.iloc[0] # 根据位置索引
21
>>>print(s2.shape,s2.size)
(5,) 5
>>>s2.head(2) #选择头两行,默认前五行
Jack 21
Lucy 23
dtype: int64
>>>s2.describe()
count 5.0
mean 26.0
std 9.0
min 21.0
25% 21.0
50% 23.0
75% 23.0
max 42.0
dtype: float64
>>>s2.sort_values() # 对values排序
Jack 21
Milky 21
Lucy 23
Jasper 23
Helen 42
dtype: int64
>>>s2[s2>22]# Check the people who is older than 22
Lucy 23
Helen 42
Jasper 23
dtype: int64
>>>'Lucy' in s2
True
>>>s2_dict = s2.to_dict() # Series转换为字典
{'Helen': 42, 'Jack': 21, 'Jasper': 23, 'Lucy': 23, 'Milky': 21}
>>>s2_series = pd.Series(s2_dict) # 字典转换为Series
Helen 42
Jack 21
Jasper 23
Lucy 23
Milky 21
dtype: int64
# 检查dict中没有的索引,自动添加
>>>name = ['Jack','Lucy','Helen','Milky','Tom','Jasper','Helen']
>>>s2_new = pd.Series(s2_dict,index = name)
Jack 21.0
Lucy 23.0
Helen 42.0
Milky 21.0
Tom NaN
Jasper 23.0
Helen 42.0
dtype: float64
>>>s2_new.drop_duplicates() ## drop the duplicate in value 去掉values中的重复值,保留第一个
Jack 21.0
Lucy 23.0
Helen 42.0
Tom NaN
dtype: float64
>>>pd.isnull(s2_new) #判断是否为空值 = s2_new.isnull()
Jack False
Lucy False
Helen False
Milky False
Tom True
Jasper False
Helen False
dtype: bool