python-featuretools生成新特征

import featuretools as ft

# 定义EntitySet
es = ft.EntitySet(id = 'clients')
# 添加entity到EntitySet
es = es.entity_from_dataframe(entity_id = 'app', dataframe = app, index = 'SK_ID_CURR', variable_types=app_types)
es = es.entity_from_dataframe(entity_id = 'bureau', dataframe = bureau, index = 'SK_ID_BUREAU')
es = es.entity_from_dataframe(entity_id = 'previous', dataframe = previous, index = 'SK_ID_PREV', variable_types= previous_types )
es = es.entity_from_dataframe(entity_id = 'bureau_balance', dataframe = bureau_balance, 
                             make_index = True, index = 'bureaubalance_index')

es = es.entity_from_dataframe(entity_id = 'cash', dataframe = cash, 
                             make_index = True, index = 'cash_index')

es = es.entity_from_dataframe(entity_id = 'installments', dataframe = installments,
                             make_index = True, index = 'installments_index')

es = es.entity_from_dataframe(entity_id = 'credit', dataframe = credit,
                             make_index = True, index = 'credit_index')
# 在表与表之间定义关系
r_app_bureau = ft.Relationship(es['app']['SK_ID_CURR'], es['bureau']['SK_ID_CURR'])
r_bureau_balance = ft.Relationship(es['bureau']['SK_ID_BUREAU'], es['bureau_balance']['SK_ID_BUREAU'])
r_app_previous = ft.Relationship(es['app']['SK_ID_CURR'], es['previous']['SK_ID_CURR'])
r_previous_cash = ft.Relationship(es['previous']['SK_ID_PREV'], es['cash']['SK_ID_PREV'])
r_previous_installments = ft.Relationship(es['previous']['SK_ID_PREV'], es['installments']['SK_ID_PREV'])
r_previous_credit = ft.Relationship(es['previous']['SK_ID_PREV'], es['credit']['SK_ID_PREV'])
# 把关系添加到EntitySet
es = es.add_relationships([r_app_bureau, r_bureau_balance,  r_app_previous, r_previous_cash, r_previous_installments, r_previous_credit])
# 定义feature primitives,primitives其实就是一种默认的计算规则
primitives = ft.list_primitives()
# 定义默认的premitives
agg_primitives = ["sum", "max", "min", "mean", "count", "percent_true", "num_unique", "mode"]
trans_primitives = ['percentile', 'and', 'diff']
# Deep feature synthesis,Deep feature synthesis就是产生新特征的一个过程
feature_names = ft.dfs(entityset = es, target_entity = 'app', 
                        trans_primitives = trans_primitives,
                        agg_primitives = agg_primitives, 
                        max_depth = 3, n_jobs = 1, verbose = 1,
                        features_only = True)
# 将新生成的特征保存起来
ft.save_features(features, '../input/features.txt')
# 导入新生成的features
feature_defs = ft.load_features(external_path + '/input/features.txt')

你可能感兴趣的:(机器学习)