from catboost import CatBoostClassifier
# 数据集
cat_features = [0, 1] # 类别特征下标
train_data = [["a", "b", 1, 4, 5, 6],
["a", "b", 4, 5, 6, 7],
["c", "d", 30, 40, 50, 60]]
train_labels = [1, 1, -1]
eval_data = [["a", "b", 2, 4, 6, 8],
["a", "d", 1, 4, 50, 60]]
# 定义模型
model = CatBoostClassifier(iterations=2, learning_rate=1, depth=2)
# 训练
model.fit(train_data, train_labels, cat_features)
# 预测类别
preds_class = model.predict(eval_data)
# 预测每个类别的可能性
preds_proba = model.predict_proba(eval_data)
# 预测Raw formula value
preds_raw = model.predict(eval_data, prediction_type='RawFormulaVal')
print(preds_class)
print(preds_proba)
print(preds_raw)
'''
[1 1]
[[0.37014499 0.62985501]
[0.4641579 0.5358421 ]]
[0.53159487 0.14361474]
'''
#使用CatBoost自带的数据集处理Pool类进行训练
from catboost import CatBoostClassifier, Pool
# 数据集
train_data = Pool(data=[[1, 4, 5, 6], [4, 5, 6, 7], [30, 40, 50, 60]],
label=[1, 1, -1],
weight=[0.1, 0.2, 0.3])
# 定义模型
model = CatBoostClassifier(iterations=10)
# 训练
model.fit(train_data)
# 预测
preds_class = model.predict(train_data)
print(preds_class) # [ 1 1 -1]
结果