pandas分层索引的操作

pandas文档

https://pandas.pydata.org/pandas-docs/version/0.25.0/user_guide/advanced.html

arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]

tuples = index = list(zip(*arrays))

index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second']) 
s = pd.Series(np.random.randn(8), index=index)
iterables = [['bar', 'baz', 'foo', 'qux'], ['one', 'two']]
pd.MultiIndex.from_product(iterables, names=['first', 'second'])
df = pd.DataFrame([['bar', 'one'], ['bar', 'two'],['foo', 'one'], ['foo', 'two']],columns=['first', 'second'])
pd.MultiIndex.from_frame(df)
arrays = [np.array(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux']),np.array(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'])]
s = pd.Series(np.random.randn(8), index=arrays)
df = pd.DataFrame(np.random.randn(8, 4), index=arrays)
# 返回多层索引的指定索引
index.get_level_values(0)
index.get_level_values('second')
df.columns.levels
df[['foo','qux']].columns.levels
df[['foo', 'qux']].columns.to_numpy()
df[['foo', 'qux']].columns.get_level_values(0)
new_mi = df[['foo', 'qux']].columns.remove_unused_levels()
new_mi.levels
# 数据对齐和使用reindex
dfmi.loc(axis=0)[:, :, ['C1', 'C3']]
df2.loc(axis=0)[:, :, ['C1', 'C3']] = -10
# 选取指定的行的名   xs方法这个方法也挺常用的
df.xs('one', level='second')
df.xs('one', level='second', axis=1)
df.xs(('one', 'bar'), level=('second', 'first'), axis=1)
df.xs('one', level='second', axis=1, drop_level=False)
df2 = df.mean(level=0)
df2.reindex(df.index, level=0)
df_aligned, df2_aligned = df.align(df2, level=0)
# 转换多层索引的等级
df[:5].swaplevel(0, 1, axis=0)
# 使用reorder_levels重新排序级别
df[:5].reorder_levels([1, 0], axis=0)
df.rename(columns={0: "col0", 1: "col1"})
df.rename(index={"one": "two", "y": "z"})
df.rename_axis(index=['abc', 'def'])
s.sort_index()
s.sort_index(level=0)
s.sort_index(level=1)
s.index.set_names(['L1', 'L2'], inplace=True)
df.T.sort_index(level=1, axis=1)
dfm = pd.DataFrame({'jim': [0, 0, 1, 1],'joe': ['x', 'x', 'z', 'y'],'jolie': np.random.rand(4)})
dfm = dfm.set_index(['jim', 'joe'])
dfm.index.is_lexsorted()
dfm.index.lexsort_depth
dfm = dfm.sort_index()
dfm.index.is_lexsorted()
dfm.index.lexsort_depth
fm.loc[(0, 'y'):(1, 'z')]
index = pd.Index(np.random.randint(0, 1000, 10))
positions = [0, 9, 3]
index.take(positions)
ser.iloc[positions]
ser.take(positions)
frm = pd.DataFrame(np.random.randn(5, 3))
frm.take([1, 4, 3])
frm.take([0, 2], axis=1)
df['B'] = df['B'].astype(CategoricalDtype(list('cab')))
df2 = df.set_index('B')

 

 

你可能感兴趣的:(pandas分层索引的操作)