###移除重复数据
data = DataFrame({'k1': ['one'] * 3 + ['two'] * 4,
'k2': [1, 1, 2, 3, 3, 4, 4]})
data
k1 k2
0 one 1
1 one 1
2 one 2
3 two 3
4 two 3
5 two 4
6 two 4
data.duplicated()
0 False
1 True
2 False
3 False
4 True
5 False
6 True
dtype: bool
data.drop_duplicates()
k1 k2
0 one 1
2 one 2
3 two 3
5 two 4
data['v1'] = range(7)
data
k1 k2 v1
0 one 1 0
1 one 1 1
2 one 2 2
3 two 3 3
4 two 3 4
5 two 4 5
6 two 4 6
data.drop_duplicates(['k1'])
k1 k2 v1
0 one 1 0
3 two 3 3
data.drop_duplicates(['k1', 'k2'], take_last=True)
Traceback (most recent call last):
File "
data.drop_duplicates(['k1', 'k2'], take_last=True)
TypeError: drop_duplicates() got an unexpected keyword argument 'take_last'
将 take_last=True 改为 keep='last' 就可以了
data.drop_duplicates(['k1','k2'],keep='last')
k1 k2 v1
1 one 1 1
2 one 2 2
4 two 3 4
6 two 4 6