title: D05|Series详解,从构造开始,深度解析Series
author: Adolph Lee
categories: 数据挖掘基础
tags:
本文使用的是pandas0.25.3,从0.25开始起pandas将只支持Python3.53以上版本,不再支持Python2.7。且在0.25版本中三维数组Panel完全被官方移除了,新版本删除了很多也新增了很多方法属性。对于那些已经取消或者将要取消的,我们不再提及。时代在进步,一起向前看。
Series是pandas特有,能够保存任何类型数据(整数、字符串、浮点数、Python对象等)的一维数组。
Series(data, index, dtype, name, copy, fastpath)
构造一个Series
# 构造一个Seriesimport pandas as pdSeries_1d = pd.Series(['Arya','Jaime','Tyrone'],index=['a','b','c'],dtype='object',name='冰与火之歌')print(Series_1d)
import pandas as pdSeries_1d = pd.Series(['Arya','Jaime','Tyrone'],index=['a','b','c'],dtype='object',name='冰与火之歌')print(Series_1d)Series_1d['d'] = 'Imp'print(Series_1d)
接下来的内容主要以Series的增、查、改、删为主线来刨析Series,而官方给出的文档中更倾向于转换、索引、切片、运算、时序模式等API接口为主线来讲解。如果讲解详细的api一是过于繁杂,二是不如直接去看官方文档来得直接,因此以数据的增、查、改、删为基准来讲解,更容易为大家理解和使用。对于想更深入了解的朋友,可以去阅读官方文档。
向Series中插入元素
利用索引
import pandas as pdSeries_1d = pd.Series(['Arya','Jaime','Tyrone'],index=['a','b','c'],dtype='object',name='冰与火之歌')print(Series_1d)Series_1d['d'] = 'Imp'print(Series_1d)
Series.append(self, to_append, ignore_index=False, verify_integrity=False)
import pandas as pdseries_1d = pd.Series(['Arya','Jaime','Tyrone'],index=['a','b','c'],dtype='object',name='冰与火之歌')print(series_1d)new_series_1d = series_1d.append(pd.Series('Imp',index=['d']))print(series_1d)print(new_series_1d)# 使用ignore_index参数new_series_1d = series_1d.append(pd.Series('Imp',index=['d']),ignore_index=True)print(new_series_1d)# 使用verify_integrity参数,当索引重复时会抛出异常 ,将索引改为 d 即可正常运行new_series_1d = series_1d.append(pd.Series('Imp',index=['c']),verify_integrity=True)print(new_series_1d)
我们可以像访问列表一样通过索引访问Series中的元素,Series的索引默认从0开始,最后一个元素可以用负一表示。同时pandas还提供了许多其它的方法帮助我们访问Series中的元素,如at,iat,loc,iloc,get等。下面我们将会一一讲解这些方法。并帮助大家剖析这些方法的异同。
import pandas as pd# 通过索引访问单个元素series_1d = pd.Series(['Arya','Jaime','Tyrone'],index=['a','b','c'],dtype='object',name='冰与火之歌')print(series_1d['a'])series_1d = pd.Series(['Arya','Jaime','Tyrone'],dtype='object',name='冰与火之歌')print(series_1d[1])# 通过索引访问多个元素series_1d = pd.Series(['Arya','Jaime','Tyrone','Imp'],dtype='object',name='冰与火之歌')print(series_1d[1:-1])
Series.get(self, key, default=None)
import pandas as pd# 通过索引访问单个元素series_1d = pd.Series(['Arya','Jaime','Tyrone'],dtype='object',name='冰与火之歌')print(series_1d.get(0))print(series_1d.get(5,default=9999))
Series.at[]
import pandas as pdseries_1d = pd.Series(['Arya','Jaime','Tyrone'],dtype='object',name='冰与火之歌')print(series_1d.at[0])series_1d = pd.Series(['Arya','Jaime','Tyrone'],index=['a','b','c'],dtype='object',name='冰与火之歌')print(series_1d.at['a'])
Series.iat[]
import pandas as pdseries_1d = pd.Series(['Arya','Jaime','Tyrone'],index=['a','b','c'],dtype='object',name='冰与火之歌')print(series_1d.iat[2])series_1d = pd.Series(['Arya','Jaime','Tyrone'],dtype='object',name='冰与火之歌')print(series_1d.iat[0])
Series.loc[]
import pandas as pdseries_1d = pd.Series(['Arya','Jaime','Tyrone'],index=['a','b','c'],dtype='object',name='冰与火之歌')print(series_1d.loc['a'])print(series_1d.loc[['a','b']])print(series_1d[[True,False,True]])series_1d = pd.Series(['Arya','Jaime','Tyrone'],dtype='object',name='冰与火之歌')print(series_1d[0:-1])
Series.iloc[]
import pandas as pdseries_1d = pd.Series(['Arya','Jaime','Tyrone'],index=['a','b','c'],dtype='object',name='冰与火之歌')print(series_1d.iloc[0])print(series_1d.iloc[[2,0]])print(series_1d[[True,False,True]])series_1d = pd.Series(['Arya','Jaime','Tyrone'],dtype='object',name='冰与火之歌')print(series_1d[0:-1])
小结:get、at、loc都是通过索引名称去定位元素,iat、iloc则是通过元素位置来定位。loc、iloc都可以通过布尔值来访问元素
import pandas as pdimport numpy as npseries_1d = pd.Series(np.arange(0,20))print(series_1d.head())print(series_1d.head(3))print(series_1d.tail())print(series_1d.tail(3))
import pandas as pdseries_1d = pd.Series(['Arya','Jaime','Tyrone'],index=['a','b','c'],dtype='object',name='冰与火之歌')for element in series_1d: print(element)
import pandas as pdseries_1d = pd.Series(['Arya','Jaime','Tyrone'],index=['a','b','c'],dtype='object',name='冰与火之歌')for element in series_1d.items(): print(element)
import pandas as pd series_1d = pd.Series(['Arya','Jaime','Tyrone'],index=['a','b','c'],dtype='object',name='冰与火之歌')print(series_1d.item())
import pandas as pdseries_1d = pd.Series(['Arya','Jaime','Tyrone'],index=['a','b','c'],dtype='object',name='冰与火之歌')print(series_1d.keys().values)print(series_1d.values)
replace(self, to_replace=None, value=None, inplace=False, limit=None, regex=False, method=’pad’)
import pandas as pdimport numpy as npseries_1d = pd.Series(np.arange(0,5),index=['a','b','c','d','e'])new_series_1d = series_1d.replace(3,3333)print(new_series_1d)new_series_1d = series_1d.replace([3,2,1],[333,2222,111])print(new_series_1d)series_1d.replace([3,2,1],[333,2222,111],inplace=True)print(series_1d)
import pandas as pdimport numpy as npseries_1d = pd.Series(np.arange(0,5))series_1d.update(pd.Series(['a','b',np.nan,'e'],index=[2,3,4,5]))print(series_1d)
drop(self, labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors=’raise’)
import pandas as pdimport numpy as npseries_1d = pd.Series(np.arange(0,5))new_series_1d = series_1d.drop(3)print(new_series_1d)new_series_1d = series_1d.drop([3,2])print(new_series_1d)series_1d = pd.Series(np.arange(0,5),index=['a','b','c','d','e'])new_series_1d = series_1d.drop(['a','e'])print(new_series_1d)new_series_1d = series_1d.drop(index=['a','e'])print(new_series_1d)
import pandas as pdimport numpy as npseries_1d = pd.Series(np.arange(0,5),index=['a','b','c','d','e'])new_series_1d = series_1d.pop('b')print(new_series_1d)
dropna(self, axis=0, inplace=False, **kwargs)
import pandas as pdimport numpy as npSeries_1d = pd.Series(['Arya','Jaime','Tyrone',np.nan],index=['a','b','c','d'],dtype='object',name='冰与火之歌')Series_1d.dropna()
drop_duplicates(self, keep=’first’, inplace=False)
import pandas as pdseries_1d = pd.Series(['a','a','v','b','c','a','b'])new_series_1d = series_1d.drop_duplicates()print(new_series_1d)new_series_1d = series_1d.drop_duplicates(keep='last')print(new_series_1d)series_1d.drop_duplicates(keep=False,inplace=True)print(series_1d)