DataFrame的行和列:df[‘行’, ‘列’]
DataFrame行和列的获取分三个维度
先随机生成一个dataframe
import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.randn(10,5), index=list('abcdefghij'), columns=list('ABCDE'))
# Output
A B C D E
a 0.299206 -0.383297 -0.931467 -0.591609 -1.131105
b 0.074351 0.791849 1.637467 -1.408712 -1.376527
c -0.359802 -2.049489 -0.615742 -1.953994 0.685243
d 0.232557 1.768284 -0.447015 2.373358 1.220536
e -0.997380 -0.447236 0.632368 -0.352590 -0.064736
f -1.220178 -0.314304 1.202184 0.018326 1.072153
g -1.508916 0.380466 0.359506 -0.742657 -0.373764
h 1.031420 -3.236676 0.444769 1.396802 -0.405590
i 0.166133 -0.051614 -0.146943 0.609431 -0.351814
j 1.857521 -0.159101 0.899745 1.108722 -0.615379
df[:3]
# Output
A B C D E
a 0.299206 -0.383297 -0.931467 -0.591609 -1.131105
b 0.074351 0.791849 1.637467 -1.408712 -1.376527
c -0.359802 -2.049489 -0.615742 -1.953994 0.685243
df[1:3] # 前闭后开
df['b':'c'] # # 前闭后闭
# Output
A B C D E
b 0.074351 0.791849 1.637467 -1.408712 -1.376527
c -0.359802 -2.049489 -0.615742 -1.953994 0.685243
# 布尔数组 (数组长度需等于行数)
df[[True,False,True,False,False,False, True, True, False, True]]
# Output
A B C D E
a 0.299206 -0.383297 -0.931467 -0.591609 -1.131105
c -0.359802 -2.049489 -0.615742 -1.953994 0.685243
g -1.508916 0.380466 0.359506 -0.742657 -0.373764
h 1.031420 -3.236676 0.444769 1.396802 -0.405590
j 1.857521 -0.159101 0.899745 1.108722 -0.615379
df[df.A > 0]
# Output
A B C D E
a 0.299206 -0.383297 -0.931467 -0.591609 -1.131105
b 0.074351 0.791849 1.637467 -1.408712 -1.376527
d 0.232557 1.768284 -0.447015 2.373358 1.220536
h 1.031420 -3.236676 0.444769 1.396802 -0.405590
i 0.166133 -0.051614 -0.146943 0.609431 -0.351814
j 1.857521 -0.159101 0.899745 1.108722 -0.615379
df[(df.A > 0) & (df.B > 0)]
# Output
A B C D E
b 0.074351 0.791849 1.637467 -1.408712 -1.376527
d 0.232557 1.768284 -0.447015 2.373358 1.220536
df[(df.A > 0) | (df.B > 0)]
# Output
A B C D E
a 0.299206 -0.383297 -0.931467 -0.591609 -1.131105
b 0.074351 0.791849 1.637467 -1.408712 -1.376527
d 0.232557 1.768284 -0.447015 2.373358 1.220536
g -1.508916 0.380466 0.359506 -0.742657 -0.373764
h 1.031420 -3.236676 0.444769 1.396802 -0.405590
i 0.166133 -0.051614 -0.146943 0.609431 -0.351814
j 1.857521 -0.159101 0.899745 1.108722 -0.615379
# 获取A列
df['A'] # 输出为Series类型
df[['A']] # 输出为DataFrame类型
# 获取A列和B列
df[['A', 'B']]
df[df.columns[0:2]]
pandas.DataFrame.loc 官方文档
# 输出为Series类型
df.loc['a']
df.loc['a', :]
# Output
A 0.299206
B -0.383297
C -0.931467
D -0.591609
E -1.131105
Name: a, dtype: float64
# 输出为DataFrame类型
df.loc[['a']]
df.loc[['a'], :]
# Output
A B C D E
a 0.299206 -0.383297 -0.931467 -0.591609 -1.131105
# 使用标签索引
df.loc[['a', 'b', 'd']]
df.loc[['a', 'b', 'd'], :]
# 使用布尔数组
df[[True, True, False, True, False, False, False, True, False, True]]
# Output
A B C D E
a 0.299206 -0.383297 -0.931467 -0.591609 -1.131105
b 0.074351 0.791849 1.637467 -1.408712 -1.376527
d 0.232557 1.768284 -0.447015 2.373358 1.220536
df.loc['a':'d', :]
# Output
A B C D E
a 0.299206 -0.383297 -0.931467 -0.591609 -1.131105
b 0.074351 0.791849 1.637467 -1.408712 -1.376527
c -0.359802 -2.049489 -0.615742 -1.953994 0.685243
d 0.232557 1.768284 -0.447015 2.373358 1.220536
df.loc[df.A > 0]
df.loc[df.A > 0, :]
# Output
A B C D E
a 0.299206 -0.383297 -0.931467 -0.591609 -1.131105
b 0.074351 0.791849 1.637467 -1.408712 -1.376527
d 0.232557 1.768284 -0.447015 2.373358 1.220536
h 1.031420 -3.236676 0.444769 1.396802 -0.405590
i 0.166133 -0.051614 -0.146943 0.609431 -0.351814
j 1.857521 -0.159101 0.899745 1.108722 -0.615379
# 选取A列
df.loc[:, 'A']
# 选取A列和C列
df.loc[:, ['A', 'C']]
# 选取A列到C列
df.loc[:, 'A':'C']
# 选取c行B列的值
df.loc['c', 'B']
# 选取A列和B列同时大于0的C列和D列
df.loc[((df.A > 0) & (df.B > 0)), ['C', 'D']]
# 令a行为10
df.loc['a', :] = 10
# 令B列为50
df.loc[:, 'B'] = 50
# 令b, c行的C到F列为30
df.loc[['b', 'c'], 'C':'F'] = 30
# 令C列小于0的行赋值为0
df.loc[df.C < 0] = 0
Example
tuples = [
('cobra', 'mark i'), ('cobra', 'mark ii'),
('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),
('viper', 'mark ii'), ('viper', 'mark iii')
]
index = pd.MultiIndex.from_tuples(tuples)
values = [[12, 2], [0, 4], [10, 20],
[1, 4], [7, 1], [16, 36]]
df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index)
# Output
df
max_speed shield
cobra mark i 12 2
mark ii 0 4
sidewinder mark i 10 20
mark ii 1 4
viper mark ii 7 1
mark iii 16 36
df.loc['cobra']
max_speed shield
mark i 12 2
mark ii 0 4
# return a Series
df.loc[('cobra', 'mark ii')]
max_speed 0
shield 4
Name: (cobra, mark ii), dtype: int64
# return a dataframe
df.loc[[('cobra', 'mark ii')]]
max_speed shield
cobra mark ii 0 4
# return a Series
df.loc['cobra', 'mark i']
max_speed 12
shield 2
Name: (cobra, mark i), dtype: int64
df.loc[('cobra', 'mark i'), 'shield']
2
df.loc[('cobra', 'mark i'):'viper']
max_speed shield
cobra mark i 12 2
mark ii 0 4
sidewinder mark i 10 20
mark ii 1 4
viper mark ii 7 1
mark iii 16 36
df.loc[('cobra', 'mark i'):('viper', 'mark ii')]
max_speed shield
cobra mark i 12 2
mark ii 0 4
sidewinder mark i 10 20
mark ii 1 4
viper mark ii 7 1
pandas.DataFrame.iloc 官方文档
# return a Series
df.iloc[1]
df.iloc[1, :]
# return a dataframe
df.iloc[[1]]
df.iloc[[1], :]
df.iloc[:3, :]
df.iloc[:3]
df.iloc[[1, 3, 5]]
df.iloc[[1, 3, 5], :]
df.iloc[:, 1]
df.iloc[:, 0:3]
df.iloc[:,:3]
df.iloc[:, [0, 2, 3]]
df.iloc[0, 1]
df.iloc[[1,2], 1:4]
可以混合标签索引和整数索引
However, when an axis is integer based, ONLY label based access and not positional access is supported. Thus, in such cases, it’s usually better to be explicit and use .iloc or .loc.
pandas.DataFrame.at 官方文档
df.at['c', 'C']
df.at['c', 'C'] = 10
pandas.DataFrame.iat 官方文档
df.iat[2, 2]
df.iat[2, 2] = 10