reindex函数
reindex的method选项:
#重新指定索引及NaN填充值
x= Series([4,7,5],index= ['a','b','c'])
y= x.reindex(['a','b','c','d'])
print(y)
"""
a 4.0
b 7.0
c 5.0
d NaN
dtype: float64
"""
## fill_value 指定不存在元素NaN的默认值
print(x.reindex(['a','b','c','d']), fill_value = 0)
"""
a 4
b 7
c 5
d 0
dtype: int64
"""
#新指定索引并指定填充NaN的方法
x=Series('blue','purple', index= [0,2])
print(x.reindex(range(4)),method='ffill')
'''
0 blue
1 blue
2 purple
3 purple
dtype: object
'''
#对DataFrame重新指定行/列索引
x= DataFrame(np.arange(9).reshape(3,3),
index= ['a','c','d'],
columns=['A','B','C'])
print(x)
'''
A B C
a 0 1 2
c 3 4 5
d 6 7 8
'''
x= x.reindex(['a', 'b', 'c', 'd'],method='bfill')
print(x)
'''
A B C
a 0 1 2
b 3 4 5
c 3 4 5
d 6 7 8
'''
#重新指定column
states=['A', 'B', 'C', 'D']
x.reindex(columns = states, fill_value= 0)
print(x)
'''
A B C D
a 0 1 2 0
b 3 4 5 0
d 6 7 8 0
c 3 4 5 0
'''
#????
print(x.ix[['a', 'b', 'd', 'c'], states])
'''
A B C D
a 0 1 2 0
b 3 4 5 0
d 6 7 8 0
c 3 4 5 0
'''
drop函数
from pandas import Index
from pandas import Series,DataFrame
import numpy as np
import pandas as pd
#Series根据行索引删除行
x= Series(np.arange(4),index= ['a','b','c','d'])
print(x.drop('c'))
'''
a 0
b 1
d 3
dtype: int32
'''
# 花式删除
print(x.drop(['a','b']))
'''
c 2
d 3
dtype: int32
'''
#DataFrame根据索引行/列删除行/列
x = DataFrame(np.arange(16).reshape((4, 4)),
index = ['a', 'b', 'c', 'd'],
columns = ['A', 'B', 'C', 'D'])
print(x)
'''
A B C D
a 0 1 2 3
b 4 5 6 7
c 8 9 10 11
d 12 13 14 15
'''
print(x.drop(['A','B'],axis= 1)) # 在列的维度上删除AB两行
'''
C D
a 2 3
b 6 7
c 10 11
d 14 15
'''
print(x.drop('a',axis= 0)) # 在行的维度上删除行
'''
A B C D
b 4 5 6 7
c 8 9 10 11
d 12 13 14 15
'''
print(x.drop(['a','b'],axis= 0))
'''
A B C D
c 8 9 10 11
d 12 13 14 15
'''
#Series的数组索引/字典索引
x= Series(np.arange(4),index= ['a','b', 'c', 'd'])
print(x['b']) # 1 像字典一样索引
print(x[1]) # 1 像数组一样索引
print(x[[1,3]]) # 花式索引
'''
b 1
d 3
dtype: int32
'''
print(x[x < 2]) # 布尔索引
'''
a 0
b 1
dtype: int32
'''
#Series的数组切片
print(x['a':'c']) # 闭区间,索引顺序须为前后
'''
a 0
b 1
c 2
dtype: int32
'''
x['a':'c']= 5
print(x)
'''
a 5
b 5
c 5
d 3
dtype: int32
'''
#DataFrame的索引
data = DataFrame(np.arange(16).reshape((4, 4)),
index = ['a', 'b', 'c', 'd'],
columns = ['A', 'B', 'C', 'D'])
print(data)
'''
A B C D
a 0 1 2 3
b 4 5 6 7
c 8 9 10 11
d 12 13 14 15
'''
print(data['A']) #打印列
'''
a 0
b 4
c 8
d 12
Name: A, dtype: int32
'''
print( data[['A','B']]) # 花式索引
'''
A B
a 0 1
b 4 5
c 8 9
d 12 13
'''
#按行号 进行索引
print(data.ix[1])
'''
A 4
B 5
C 6
D 7
Name: b, dtype: int32
'''
print(data[:2]) # 切片索引,选择行
'''
A B C D
a 0 1 2 3
b 4 5 6 7
'''
print(data.ix[:2,['A','B']]) # 指定行和列索引
'''
A B
a 0 1
b 4 5
'''
print(data.ix[['a','b'],[3,0,1]]) #行:字典索引,列:数组索引
'''
D A B
a 3 0 1
b 7 4 5
'''
print(data.ix[:'b','A']) # 行从开始到b,第A列。
'''
a 0
b 4
Name: A, dtype: int32
'''
#根据条件选择
print(data)
'''
A B C D
a 0 1 2 3
b 4 5 6 7
c 8 9 10 11
d 12 13 14 15
'''
print(data[data.A > 5]) # 根据条件选择行,A列大于5的行
'''
A B C D
c 8 9 10 11
d 12 13 14 15
'''
print(data < 5) # 打印True或者False
'''
A B C D
a True True True True
b True False False False
c False False False False
d False False False False
'''
data[data < 5] = 0 # 条件索引
print(data)
'''
A B C D
a 0 0 0 0
b 0 5 6 7
c 8 9 10 11
d 12 13 14 15
'''