# 相邻2个数字构成左开右闭区间
bins = [-1, 3, 11, 17, 29, 40, 55, 65, 80, 100]
labels = ['age_group%d' % i for i in range(len(bins) - 1)]
df['age_group'] = pd.cut(x=df['age'], bins=bins, labels=labels)
df['age_group'] = df['age_group'].astype(str)
df = df.join(pd.get_dummies(df['age_group']))
def stratified_sample_df(df, col, n_samples):
n = min(n_samples, df[col].value_counts().min())
df_ = df.groupby(col).apply(lambda x: x.sample(n))
df_.index = df_.index.droplevel(0)
return df_