莫烦python sklearn笔记

莫烦python sklearn笔记

莫烦python bilibili视频
视频时长一个小时,下面是分模块整理的笔记,需要哪块就复制就可以用

走过路过的仙女仙子,有用记得点个赞哦

安装

python3.6:
先安装numpy、scipy,再安装 scikit-learn

各个命令的库调用

from sklearn import datasets   # sklearn自带数据集
from sklearn.model_selection import learning_curve   # 验证过拟合learning curve
from sklearn.model_selection import  validation_curve   # 选择超参数 validation curve 
from sklearn.model_selection  import cross_val_score   # 交叉验证 cross val score
from sklearn.model_selection import train_test_split   # 分训练集和测试集
from sklearn import preprocessing   # normalization 归一化
from sklearn.neighbors import KNeighborsClassifier   # KNN模型
from sklearn.linear_model import LinearRegression   # 线性回归模型(拟合预测)
from sklearn.linear_model import LogisticRegression   # 逻辑回归(分类)
from sklearn.ensemble import RandomForestClassifier   # 随机森林(分类)
from sklearn.svm import SVC   # SVC支持向量机模型(分类)

下面基本用的都是这个数据

#鸢尾花数据
iris = datasets.load_iris()

自己生成的训练回归网络的数据的代码

# 自己生成数据   ,一百个样本,n_features:1个属性
x,y = datasets.make_regression(n_samples=100, n_features=1, n_targets=1,noise=1)
# 训练集和测试集
iris_x = iris.data  # 取数据  n行4列
iris_y = iris.target   # 取标签  n行
x_train,x_test,y_train,y_test = train_test_split(iris_x,iris_y,test_size=0.3)
# 归一化
x = preprocessing.scale(x) 
# 训练KNN模型
knn =KNeighborsClassifier(n_neighbors=5)    # 定义KNN模型
knn.fit(x_train,y_train)   # 训练
'''回归模型'''
model = LinearRegression()
model.fit(data_x,data_y) # 训练
'''CSV支持向量机模型'''
df = SVC() 
df.fit(x_train,y_train)

测试集测试准确度

print(knn.predict(x_test))   # 预测test
print(y_test) #真实test

下面都是网络的选取和参数设置

'''交叉验证,取平均'''
# 可判断多个网络(如knn)的识别准确度然后选择网络
scores =cross_val_score(knn,iris_x,iris_y,cv=5,scoring="accuracy")
print(scores.mean())

'''对网络参数进行选取'''
# 找到最合适的n_neighbors
k_range=range(1,31)
k_scores = []
for k in k_range:
    knn = KNeighborsClassifier(n_neighbors=k)  #网络
    # classification用:accuracy
    scores = cross_val_score(knn, iris_x, iris_y, cv=10, scoring="accuracy") 
    # regression用 加负号,neg_mean_squared_error
    # loss = -cross_val_score(knn, iris_x, iris_y, cv=10, scoring="neg_mean_squared_error") 
    k_scores.append(scores.mean())
plt.plot(k_range, k_scores)
'''选择最合适的超参数'''
param_range = np.logspace(-6,-2.3,5)   #a的选择范围
train_loss1,test_loss1 = validation_curve(
    SVC(),x,y,param_name="gamma",param_range=param_range,
    cv=10,scoring="neg_mean_squared_error",)
train_loss_mean1 = -np.mean(train_loss1, axis=1)
test_loss_mean1 = -np.mean(test_loss1, axis=1)

plt.plot(param_range, train_loss_mean1, 'o-', color="r",
             label="Training")
plt.plot(param_range, test_loss_mean1, 'o-', color="g",
             label="Cross-validation")
'''过拟合曲线'''
train_sizes,train_loss,test_loss = learning_curve(
    SVC(gamma=0.001),x,y,cv=10,scoring="neg_mean_squared_error",
    train_sizes=[0.1,0.25,0.75,1]) # 在这四个位置记录loss
train_loss_mean = -np.mean(train_loss,axis=1)
test_loss_mean = -np.mean(test_loss, axis=1)
plt.plot(train_sizes,train_loss_mean,color="orange", linestyle="--")
plt.plot(train_sizes,test_loss_mean,color="cyan",linestyle="-")
'''储存'''
# 第一种方法
import pickle
with open("save/clf.pickle","wb") as f:   # weite bite
    pickle.dump(knn, f)   #dump:保存模型

with open("save/clf.pickle","rb") as f:
    knn2 = pickle.load(f)   # load 读取文件
    print(knn2.predict(x_test[0:1]),y_test[0:1])
#第二种方法
import joblib
joblib.dump(knn, "save/clf.pkl")
knn3 = joblib.load("save/clf.pkl")

你可能感兴趣的:(python笔记,python,机器学习,逻辑回归,深度学习)