pandas.api.types.union_categoricals
#将类别合并
a = pd.Categorical(["b", "c"])
b = pd.Categorical(["a", "b"])
union_categoricals([a, b])
[b, c, a, b]
Categories (3, object): [b, c, a]
不一定,只有分类的数量和类别一样,才是分类变量
还没测试出啥区别
缺陷是修改series变量的时候,原分类跟着变了。创建的时候设置参数copy=True,这样修改series的时候原分类就不会变了
#第一题
data['深度']=pd.cut(data['深度'],[-0.1,5,10,15,20,30,50,np.inf],labels=['Ⅰ','Ⅱ','Ⅲ','Ⅳ','Ⅴ','Ⅵ','Ⅶ'])
#第二题
data['烈度'] = pd.cut(data['烈度'], [-0.1,3,4,5,np.inf],labels=['Ⅰ','Ⅱ','Ⅲ','Ⅳ'])
data.set_index(['深度','烈度']).sort_index()
#抄答案了~
def my_crosstab(foo,bar):
num = len(foo)
s1 = pd.Series([i for i in list(foo.categories.union(set(foo)))],name='1nd var')
s2 = [i for i in list(bar.categories.union(set(bar)))]
df = pd.DataFrame({i:[0]*len(s1) for i in s2},index=s1)
for i in range(num):
df.at[foo[i],bar[i]] += 1
return df.rename_axis('2st var',axis=1)
my_crosstab(foo,bar)