Python 她优雅迷人简洁,我毫不犹豫转投入到她的怀抱,她也让我吃了不少亏。即使如此,她似乎愈加迷人了。夜深时,总结使用Python索引或切片时遇到的坑,希望能够帮到爱好Pyhton的您,让我们一起学习与进步。
In [4]: sentence = 'You are a nice girl'
In [5]: L = sentence.split()
In [6]: L
Out[6]: ['You', 'are', 'a', 'nice', 'girl']
# 从0开始索引
In [7]: L[2]
Out[7]: 'a'
# 负数索引,从列表右侧开始计数
In [8]: L[-2]
Out[8]: 'nice'
# -1表示列表最后一项
In [9]: L[-1]
Out[9]: 'girl'
# 当正整数索引超过返回时
In [10]: L[100]
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
10-78da2f882365> in ()
----> 1 L[100]
IndexError: list index out of range
# 当负整数索引超过返回时
In [11]: L[-100]
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
11-46b47b0ecb55> in ()
----> 1 L[-100]
IndexError: list index out of range
# slice 索引
In [193]: sl = slice(0,-1,1)
In [194]: L[sl]
Out[194]: ['You', 'are', 'a', 'nice']
In [199]: sl = slice(0,100)
In [200]: L[sl]
Out[200]: ['You', 'are', 'a', 'nice', 'girl']
In [14]: L = [[1,2,3],{'I':'You are a nice girl','She':'Thank you!'},(11,22),'My name is Kyles']
In [15]: L
Out[15]:
[[1, 2, 3],
{'I': 'You are a nice girl', 'She': 'Thank you!'},
(11, 22),
'My name is Kyles']
# 索引第1项,索引为0
In [16]: L[0]
Out[16]: [1, 2, 3]
# 索引第1项的第2子项
In [17]: L[0][1]
Out[17]: 2
# 索引第2项词典
In [18]: L[1]
Out[18]: {'I': 'You are a nice girl', 'She': 'Thank you!'}
# 索引第2项词典的 “She”
In [19]: L[1]['She']
Out[19]: 'Thank you!'
# 索引第3项
In [20]: L[2]
Out[20]: (11, 22)
# 索引第3项,第一个元组
In [22]: L[2][0]
Out[22]: 11
# 索引第4项
In [23]: L[3]
Out[23]: 'My name is Kyles'
# 索引第4项,前3个字符
In [24]: L[3][:3]
Out[24]: 'My '
# 切片选择,从1到列表末尾
In [13]: L[1:]
Out[13]: ['are', 'a', 'nice', 'girl']
# 负数索引,选取列表后两项
In [28]: L[-2:]
Out[28]: ['nice', 'girl']
# 异常测试,这里没有报错!
In [29]: L[-100:]
Out[29]: ['You', 'are', 'a', 'nice', 'girl']
# 返回空
In [30]: L[-100:-200]
Out[30]: []
# 正向索引
In [32]: L[-100:3]
Out[32]: ['You', 'are', 'a']
# 返回空
In [33]: L[-1:3]
Out[33]: []
# 返回空
In [41]: L[0:0]
Out[41]: []
看似简单的索引,有的人不以为然,我们这里采用精准的数字索引,很容易排查错误。若索引是经过计算出的一个变量,就千万要小心了,否则失之毫厘差之千里。
In [34]: import numpy as np
In [35]: arr = np.arange(10)
In [36]: arr
Out[36]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [40]: arr.shape
Out[40]: (10,)
# [0,1)
In [37]: arr[0:1]
Out[37]: array([0])
# [0,0)
In [38]: arr[0:0]
Out[38]: array([], dtype=int32)
# 右侧超出范围之后
In [42]: arr[:1000]
Out[42]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
# 左侧超出之后
In [43]: arr[-100:1000]
Out[43]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
# 两侧都超出
In [44]: arr[100:101]
Out[44]: array([], dtype=int32)
# []
In [45]: arr[-100:-2]
Out[45]: array([0, 1, 2, 3, 4, 5, 6, 7])
# []
In [46]: arr[-100:-50]
Out[46]: array([], dtype=int32)
In [49]: arr = np.arange(15).reshape(3,5)
In [50]: arr
Out[50]:
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
In [51]: arr.shape
Out[51]: (3, 5)
# axis = 0 增长的方向
In [52]: arr[0]
Out[52]: array([0, 1, 2, 3, 4])
# 选取第2行
In [53]: arr[1]
Out[53]: array([5, 6, 7, 8, 9])
# axis = 1 增长的方向,选取每一行的第1列
In [54]: arr[:,0]
Out[54]: array([ 0, 5, 10])
# axis = 1 增长的方向,选取每一行的第2列
In [55]: arr[:,1]
Out[55]: array([ 1, 6, 11])
# 选取每一行的第1,2列
In [56]: arr[:,0:2]
Out[56]:
array([[ 0, 1],
[ 5, 6],
[10, 11]])
# 右侧超出范围之后
In [57]: arr[:,0:100]
Out[57]:
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
# 左侧超出范围之后
In [62]: arr[:,-10:2]
Out[62]:
array([[ 0, 1],
[ 5, 6],
[10, 11]])
# []
In [58]: arr[:,0:0]
Out[58]: array([], shape=(3, 0), dtype=int32)
# []
In [59]: arr[0:0,0:1]
Out[59]: array([], shape=(0, 1), dtype=int32)
# 异常
In [63]: arr[:,-10]
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
63-2ffa6627dc7f> in <module>()
----> 1 arr[:,-10]
IndexError: index -10 is out of bounds for axis 1 with size 5
In [67]: import numpy as np
In [68]: arr = np.arange(30).reshape(2,3,5)
In [69]: arr
Out[69]:
array([[[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]],
[[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24],
[25, 26, 27, 28, 29]]])
# 根据 axis = 0 选取
In [70]: arr[0]
Out[70]:
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
In [71]: arr[1]
Out[71]:
array([[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24],
[25, 26, 27, 28, 29]])
# 根据 axis = 1 选取
In [72]: arr[:,0]
Out[72]:
array([[ 0, 1, 2, 3, 4],
[15, 16, 17, 18, 19]])
In [73]: arr[:,1]
Out[73]:
array([[ 5, 6, 7, 8, 9],
[20, 21, 22, 23, 24]])
# 异常指出 axis = 1 超出范围
In [74]: arr[:,4]
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
74-9d489478e7c7> in <module>()
----> 1 arr[:,4]
IndexError: index 4 is out of bounds for axis 1 with size 3
# 根据 axis = 2 选取
In [75]: arr[:,:,0]
Out[75]:
array([[ 0, 5, 10],
[15, 20, 25]])
# 降维
In [76]: arr[:,:,0].shape
Out[76]: (2, 3)
In [78]: arr[:,:,0:2]
Out[78]:
array([[[ 0, 1],
[ 5, 6],
[10, 11]],
[[15, 16],
[20, 21],
[25, 26]]])
In [79]: arr[:,:,0:2].shape
Out[79]: (2, 3, 2)
# 左/右侧超出范围
In [81]: arr[:,:,0:100]
Out[81]:
array([[[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]],
[[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24],
[25, 26, 27, 28, 29]]])
# 异常 axis = 0
In [82]: arr[100,:,0:100]
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
82-21efcc74439d> in <module>()
----> 1 arr[100,:,0:100]
IndexError: index 100 is out of bounds for axis 0 with size 2
In [84]: s = pd.Series(['You','are','a','nice','girl'])
In [85]: s
Out[85]:
0 You
1 are
2 a
3 nice
4 girl
dtype: object
# 按照索引选择
In [86]: s[0]
Out[86]: 'You'
# []
In [87]: s[0:0]
Out[87]: Series([], dtype: object)
In [88]: s[0:-1]
Out[88]:
0 You
1 are
2 a
3 nice
dtype: object
# 易错点,ix包含区间为 []
In [91]: s.ix[0:0]
Out[91]:
0 You
dtype: object
In [92]: s.ix[0:1]
Out[92]:
0 You
1 are
dtype: object
# ix索引不存在index
In [95]: s.ix[400]
KeyError: 400
# 按照从0开始的索引
In [95]: s.iloc[0]
Out[95]: 'You'
In [96]: s.iloc[1]
Out[96]: 'are'
In [97]: s.iloc[100]
IndexError: single positional indexer is out-of-bounds
In [98]: s = pd.Series(['You','are','a','nice','girl'], index=list('abcde'))
In [99]: s
Out[99]:
a You
b are
c a
d nice
e girl
dtype: object
In [100]: s.iloc[0]
Out[100]: 'You'
In [101]: s.iloc[1]
Out[101]: 'are'
# 按照 label 索引
In [103]: s.loc['a']
Out[103]: 'You'
In [104]: s.loc['b']
Out[104]: 'are'
In [105]: s.loc[['b','a']]
Out[105]:
b are
a You
dtype: object
# loc切片索引
In [106]: s.loc['a':'c']
Out[106]:
a You
b are
c a
dtype: object
In [108]: s.index
Out[108]: Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
In [114]: import pandas as pd
In [115]: df = pd.DataFrame({'open':[1,2,3],'high':[4,5,6],'low':[6,3,1]}, index=pd.period_range('30/12/2017',perio
...: ds=3,freq='H'))
In [116]: df
Out[116]:
high low open
2017-12-30 00:00 4 6 1
2017-12-30 01:00 5 3 2
2017-12-30 02:00 6 1 3
# 按列索引
In [117]: df['high']
Out[117]:
2017-12-30 00:00 4
2017-12-30 01:00 5
2017-12-30 02:00 6
Freq: H, Name: high, dtype: int64
In [118]: df.high
Out[118]:
2017-12-30 00:00 4
2017-12-30 01:00 5
2017-12-30 02:00 6
Freq: H, Name: high, dtype: int64
In [120]: df[['high','open']]
Out[120]:
high open
2017-12-30 00:00 4 1
2017-12-30 01:00 5 2
2017-12-30 02:00 6 3
In [122]: df.ix[:]
D:\CodeTool\Python\Python36\Scripts\ipython:1: DeprecationWarning:
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing
In [123]: df.iloc[0:0]
Out[123]:
Empty DataFrame
Columns: [high, low, open]
Index: []
In [124]: df.ix[0:0]
Out[124]:
Empty DataFrame
Columns: [high, low, open]
Index: []
# 按照 label 索引
In [127]: df.index
Out[127]: PeriodIndex(['2017-12-30 00:00', '2017-12-30 01:00', '2017-12-30 02:00'], dtype='period[H]', freq='H')
In [128]: df.loc['2017-12-30 00:00']
Out[128]:
high 4
low 6
open 1
Name: 2017-12-30 00:00, dtype: int64
# 检查参数
In [155]: df.loc['2017-12-30 00:00:11']
Out[155]:
high 4
low 6
open 1
Name: 2017-12-30 00:00, dtype: int64
In [156]: df.loc['2017-12-30 00:00:66']
KeyError: 'the label [2017-12-30 00:00:66] is not in the [index]'
In [158]: df = pd.DataFrame({'a':[1,2,3],'b':[4,5,6]}, index=[2,3,4])
In [159]: df
Out[159]:
a b
2 1 4
3 2 5
4 3 6
# iloc 取第一行正确用法
In [160]: df.iloc[0]
Out[160]:
a 1
b 4
Name: 2, dtype: int64
# loc 正确用法
In [165]: df.loc[[2,3]]
Out[165]:
a b
2 1 4
3 2 5
# 注意此处 index 是什么类型
In [167]: df.loc['2']
KeyError: 'the label [2] is not in the [index]'
# 索引 Int64Index
Out[172]: Int64Index([2, 3, 4], dtype='int64')
# 索引为字符串
In [168]: df = pd.DataFrame({'a':[1,2,3],'b':[4,5,6]}, index=list('234'))
In [169]: df
Out[169]:
a b
2 1 4
3 2 5
4 3 6
In [170]: df.index
Out[170]: Index(['2', '3', '4'], dtype='object')
# 此处没有报错,千万注意 index 类型
In [176]: df.loc['2']
Out[176]:
a 1
b 4
Name: 2, dtype: int64
# ix 是一个功能强大的函数,但是争议却很大,往往是错误之源
# 咦,怎么输出与预想不一致!
In [177]: df.ix[2]
D:\CodeTool\Python\Python36\Scripts\ipython:1: DeprecationWarning:
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing
See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
Out[177]:
a 3
b 6
Name: 4, dtype: int64
# 注意开闭区间
In [180]: df.loc['2':'3']
Out[180]:
a b
2 1 4
3 2 5