文章目录
- 线性回归与逻辑回归
-
- 朴素贝叶斯模型
-
- 例6-4-1(a) 以鸢尾花数据预分析
- 例6-4-1(b) 鸢尾花的高斯-贝叶斯模型
- 例6-4-1(c) 鸢尾花的高斯-贝叶斯模型性能评价
- 例6-4-2 ROC应用举例
- 决策树
- K-means迭代算法
线性回归与逻辑回归
线性回归
import pandas as pd
import numpy as py
from scipy import stats
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn import metrics
data.corr(method = 'pearson')
x = data[[]]
y = data[]
data_lreg = ______________
data_lreg._____(x,y)
pred_y = data_lreg._______(x)
print('f(x)=',data_lreg.intercept_,'+',data_lreg.coef_[0],'x')
print('RMSE=',np.sqrt(metrics.mean_squared_error(y,pred_y)))
print('r_square=',data_lreg.______(x,y))
逻辑回归
import pandas as pd
import numpy as py
from scipy import stats
from matplotlib import pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.model_selection import _____________
X = data[['']]
Y = data[['']]
X_train,X_test,Y_train,Y_test = train_test_split(X,Y)
data_1reg = ________________
data_1reg.fit(X_train,Y_train)
print('分类准确率为:',data_1reg.score(X_test,Y_test))
data1_dummies = pd.get_dummies(my_data['sex'],prefix = 'sex_')
朴素贝叶斯模型
例6-4-1(a) 以鸢尾花数据预分析
import pandas as pd
import numpy as np
import _________.pyplot as plt
from ________ import stats
from sklearn import ________
iris = datasets.__________
plt.figure(figsize = (12,15))
for n in range(4):
for m in range(3):
x = (iris.data[m*50 : m*50 + 50,n] - iris.data[m*50 : m*50 + 50,n].mean())/iris.data[m*50 : m*50 + 50,n].std()
plt.______(4,3,n*3 + m + 1)
stats._________(x,dist = 'norm',plot = plt)
plt.text(-2,2,iris.feature_names[n])
if n == 0:
plt.title(iris.target_names[n])
else:
plt.title('')
plt.xlim([-2.5,2.5])
plt.ylim([-2.5,2.5])
plt.plot([-2.5,2.5],[-2.5,2.5],c ='g')
例6-4-1(b) 鸢尾花的高斯-贝叶斯模型
from sklearn._________ import train_test_split
from sklearn.naive_bayes import ________
my_data = iris.data[:,:2]
X_train,X_test,Y_train,Y_test = ___________(my_data, iris.target, test_size = 0.2, random_state = 0)
clf = ___________
clf._____(X_train,Y_train)
y_pred = clf.________(X_test)
Y = pd.DataFrame(np._______([Y_test,y_pred]),columns = {'true_type','predict_type'})
Y.head(10)
例6-4-1© 鸢尾花的高斯-贝叶斯模型性能评价
from sklearn.metrics import _________
print(confusion_matrix(Y_test,y_pred))
from sklearn.metrics import _______________
print(classification_report(Y_test,y_pred))
例6-4-2 ROC应用举例
from sklearn.metrics import roc_curve
from sklearn.metrics import _________
my_auc = []
for n in range(4):
my_auc.append(__________(iris.target[:100], iris.data[:100,n]))
print('4个参数的ROC_AUC是',my_auc)
plt.plot(np.ones([50,1]), iris.data[:50,0],'or')
plt.plot(np.ones([50,1]) + 0.2, iris.data[50:100,0],'*g')
plt.plot(np.ones([50,1]) + 1, iris.data[:50,1],'or')
plt.plot(np.ones([50,1]) + 1.2, iris.data[50:100,1],'*g')
plt.plot(np.ones([50,1]) + 2, iris.data[:50,2],'or')
plt.plot(np.ones([50,1]) + 2.2, iris.data[50:100,2],'*g')
plt.plot(np.ones([50,1]) + 3, iris.data[:50,3],'or')
plt.plot(np.ones([50,1]) + 3.2, iris.data[50:100,3],'*g')
plt.xticks([1,2,3,4],iris._______)
plt._______(iris.target_names[:2])
决策树
import pandas as pd
import numpy as np
from sklearn import tree
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import ________________
X = data[[]]
Y = data[]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, __________ = 22, test_size = 0.2)
my_tree = DecisionTreeClassifier(max_depth = 3)
my_tree.fit(X_train,Y_train)
print('分类结果为:',my_tree._______(X_test),'\n')
print('平均准确率为:',my_tree._______(X_test,Y_test))
pd.DataFrame({'feature':X.columns,'importance':my_tree.feature_importances_})
plt.figure(figsize = ( , ))
tree.plot_tree(my_tree,fontsize = 12,feature_names = X.columns,class_names = ['Good','Bad'])
plt.________('my_tree')
K-means迭代算法
import numpy as np
import pandas as pd
from scipy import stats
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
X = my_data[['budget','popularity','revenue']]
km = KMeans(n_clusters = 3,random_state = 1)
km.fit(X)
my_cl = pd.DataFrame(data = km.labels_ ,columns = ['cluster'])
X = pd.______([X,my_cl],axis = 1)
X.________('cluster').mean()
x = X['budget']
y = X['popularity']
z = X['revenue']
colors = list()
palette = {0:"red",1:"green",2:"blue"}
for n ,row in _________(X['cluster']):
colors.append(palette[X['cluster'][n]])
fig = plt.figure(figsize = (12,10))
ax = fig.____(projection = '3d')
ax.______(x,y,z,color = colors)
ax.set_xlim(0,2e8)
ax.set_zlim(0,1e9)
ax.set_xlabel('budget',size = 15)
ax.set_ylabel('popularity',size = 15)
ax.set_zlabel('revenue',size = 15)