import pandas as pd
import numpy as np
import seaborn as sns
import gc
from tqdm import tqdm
import pylab as pl
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import lightgbm as lgb
from lightgbm import LGBMClassifier
import catboost as cat
from catboost import CatBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn import feature_selection
from sklearn import model_selection
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFE
from sklearn import feature_selection
from sklearn import preprocessing
from sklearn.metrics import f1_score, roc_auc_score,make_scorer
from sklearn.ensemble import RandomForestClassifier,VotingClassifier
import time
from sklearn.feature_selection import SelectPercentile
from sklearn.feature_selection import f_classif,chi2
from sklearn.preprocessing import Binarizer, scale,MinMaxScaler
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LassoCV
from feature_selector import FeatureSelector
from sklearn.preprocessing import normalize
from sklearn.decomposition import PCA
from itertools import product
import category_encoders as ce
from sklearn.model_selection import cross_val_score
import warnings
import toad
warnings.filterwarnings(module=‘sklearn*’, action=‘ignore’, category=DeprecationWarning)
pd.set_option(‘display.max_columns’, None)
pd.set_option(‘display.max_rows’, None)
train_label=pd.read_csv(“D:/职业比赛/重庆赛道/train_label.csv”,encoding=“utf-8”)
train=pd.read_csv(“D:/职业比赛/重庆赛道/train_set.csv”,encoding=“utf-8”)
test=pd.read_csv(“D:/职业比赛/重庆赛道/result_predict_A.csv”,encoding=“utf-8”)
train = pd.merge(train, train_label, left_on=‘user_id’, right_on=‘user_id’ , how=‘left’, sort=False)
train[“orgin”]=“train”
test[“orgin”]=“test”
df = pd.concat([train, test], axis = 0).reset_index(drop = True)
encoder=LabelEncoder()
df[“X1”]=df[“X1”].astype(object)
df[“X1”]=encoder.fit_transform(df[“X1”])
def bian(s):
if s==“大众用户”:
return 1
elif s==“农村用户”:
return 2
elif s==“集团用户”:
return 3
elif s==“校园用户”:
return 4
else:
return 5
df[“X5”]=df[“X5”].map(bian)
####################
for col in [‘X6’, ‘X7’, ‘X8’, ‘X9’, ‘X10’, ‘X11’, ‘X12’, ‘X13’, ‘X14’, ‘X15’, ‘X16’, ‘X17’, ‘X18’, ‘X19’, ‘X20’, ‘X21’, ‘X22’, ‘X23’,“X32”,“X33”,“X34”,“X35”,“X36”,“X38”]:
df[col]=df[col].fillna(df[col].mean())
for col1 in [“X3”,‘X5’, ‘X26’, ‘X27’, ‘X28’, ‘X29’, ‘X30’, ‘X31’]:
df[col1]=df[col1].fillna(df[col1].mode()[0])
#####################
#业务特征
df[“a1”]=df[“X7”]-df[“X6”]
df[“a3”]=df[“X10”]-df[“X9”]#
df[“a5”]=df[“X13”]-df[“X12”]#
df[“a7”]=df[“X6”]/df[“X15”]#
df[“a8”]=df[“X9”]/df[“X16”]#
df[“a9”]=df[“X12”]/df[“X17”]#
df[“a10”]=df[“X19”]-df[“X18”]
df[“a12”]=df[“X22”]-df[“X21”]
df[“a14”]=df[“X35”]-df[“X34”]
df[“a16”]=df[“X17”]/df[“X16”]
df[“a17”]=df[“X8”]+df[“X15”]
df[“a18”]=df[“X11”]+df[“X16”]#
df[“a19”]=df[“X14”]+df[“X17”]#
df[“a24”]=df[“X16”]/df[“X17”]
df[“a35”]=df[“X32”]/df[“X15”]
df[“a36”]=df[“X33”]/df[“X15”]
cols1=df.columns.tolist()
drop_tezheng1=[“user_id”,“label”,“orgin”,“product_no”,“X7”,“X8”,“X10”,“X11”,“X13”,“X14”,“X19”,“X20”,“X22”,“X23”,“X35”,“X36”]
for col in drop_tezheng1:
cols1.remove(col)
lisan_tezheng1=[‘X1’,“X3”,“X4”,“X5”,“X24”,“X25”,“X26”,“X27”,“X28”,“X29”,“X30”,“X31”,“X37”,“X39”,“X40”,“X41”,“X42”,“X43”]
for col in lisan_tezheng1:
df[col]=df[col].astype(object)
cols1.remove(col)
for col in cols1:
df[col] = df[col].replace(-np.inf, np.nan) # 新特征有除,会出现正负无穷大
df[col] = df[col].replace(np.inf, np.nan)
df[col] = df[col].fillna(-1)
from sklearn import preprocessing
df[cols1]=preprocessing.scale(df[cols1])
cols1=cols1+lisan_tezheng1
cols1.append(“user_id”)
train_df = df.loc[df[“orgin”]“train”]
test_df = df.loc[df[“orgin”]“test”]
gc.collect()
#######################
from feature_selector import FeatureSelector
train_features = train_df[cols1]
train_labels = train_df[“label”]
fs = FeatureSelector(data=train_features,labels=train_labels)
fs.identify_collinear(correlation_threshold=0.99, one_hot=False)
fs.identify_zero_importance(task = “classification”, eval_metric = “auc”, n_iterations = 5, early_stopping = True)
fs.identify_low_importance(cumulative_importance=0.999)
train1=fs.remove(methods=[‘collinear’, ‘zero_importance’,“low_importance”], keep_one_hot=False)
tezheng=train1.columns.tolist()
tezheng.remove(“X16”)
tezheng.remove(“X9”)
#训练模型
model1=cat.CatBoostClassifier(loss_function=“Logloss”,
eval_metric=“AUC”,
learning_rate=0.01,
iterations=5000,
random_seed=42,
od_type=“Iter”,
depth=10,
early_stopping_rounds=500)
model1.fit(train_df[tezheng],train_df[“label”])
y_pred_proba=model1.predict_proba(test_df[tezheng])
y_pred_tmp=y_pred_proba.copy()
y_pred_tmp[:,0]=y_pred_tmp[:,0]*0.86
y_pred_tmp[:,1]=y_pred_tmp[:,1]*2.96
pred4=np.argmax(y_pred_tmp, axis=1)
test_df[“label”]=pred4
ss=test_df[[“user_id”,“label”]]
ss[“label”]=ss[“label”].astype(int)
ss.to_csv(‘D:/职业比赛/重庆赛道/submit2.csv’,index=False)
test_df[“label”].value_counts()
pi币是注册邀请制货币,扫码安装Pi币,我的邀请码“lizifu02”,可免费获得一个Pi
图片:
带尺寸的图片:
居中的图片:
居中并且带尺寸的图片:
当然,我们为了让用户更加便捷,我们增加了图片拖拽功能。
去博客设置页面,选择一款你喜欢的代码片高亮样式,下面展示同样高亮的 代码片
.
// An highlighted block
var foo = 'bar';
一个简单的表格是这么创建的:
项目 | Value |
---|---|
电脑 | $1600 |
手机 | $12 |
导管 | $1 |
使用:---------:
居中
使用:----------
居左
使用----------:
居右
第一列 | 第二列 | 第三列 |
---|---|---|
第一列文本居中 | 第二列文本居右 | 第三列文本居左 |
SmartyPants将ASCII标点字符转换为“智能”印刷标点HTML实体。例如:
TYPE | ASCII | HTML |
---|---|---|
Single backticks | 'Isn't this fun?' |
‘Isn’t this fun?’ |
Quotes | "Isn't this fun?" |
“Isn’t this fun?” |
Dashes | -- is en-dash, --- is em-dash |
– is en-dash, — is em-dash |
一个具有注脚的文本。1
Markdown将文本转换为 HTML。
您可以使用渲染LaTeX数学表达式 KaTeX:
Gamma公式展示 Γ ( n ) = ( n − 1 ) ! ∀ n ∈ N \Gamma(n) = (n-1)!\quad\forall n\in\mathbb N Γ(n)=(n−1)!∀n∈N 是通过欧拉积分
Γ ( z ) = ∫ 0 ∞ t z − 1 e − t d t . \Gamma(z) = \int_0^\infty t^{z-1}e^{-t}dt\,. Γ(z)=∫0∞tz−1e−tdt.
你可以找到更多关于的信息 LaTeX 数学表达式here.
可以使用UML图表进行渲染。 Mermaid. 例如下面产生的一个序列图:
这将产生一个流程图。:
我们依旧会支持flowchart的流程图:
如果你想尝试使用此编辑器, 你可以在此篇文章任意编辑。当你完成了一篇文章的写作, 在上方工具栏找到 文章导出 ,生成一个.md文件或者.html文件进行本地保存。
如果你想加载一篇你写过的.md文件,在上方工具栏可以选择导入功能进行对应扩展名的文件导入,
继续你的创作。
注脚的解释 ↩︎