课后题

第二章
1.
import numpy as np
arr = np.random.rand(10,5)
arr
2.
import numpy as np
matr1 = np.mat("2 3; 4 5")
matr1

matr2 = np.mat("5 6; 7 8")
matr2

matr1 * matr2

第三章
1.
import numpy as  np
import matplotlib.pyplot as plt
%matplotlib inline
iris = np.load('../data/iris.npz')['data'][:,:-1]
name = np.load('../data/iris.npz')['features_name']

plt.rcParams['font.sans-serif'] = 'SimHei'
p = plt.figure(figsize=(16,16)) ##设置画布
plt.title('iris散点图矩阵')
for i in range(4):
    for j in range(4):
        p.add_subplot(4,4,(i*4)+(j+1))
        plt.scatter(iris[:,i],iris[:,j])## 绘制散点图
        plt.xlabel(name[i])
        plt.ylabel(name[j])
plt.show()
2.
#箱线图
import numpy as np
import matplotlib.pyplot as plt
data = np.random.normal(size =100 , loc = 0 , scale = 1)
plt.boxplot(data , sym='o' , whis=0.05)
print(data)
plt.show()


第四章
1.
import pandas as pd
mtcars = pd.read_csv('../data/mtcars.csv')

print('mtcars的维度为:',mtcars.ndim)
print('mtcars的大小为:',mtcars.shape)
2.
print('mtcars的描述性统计为:',mtcars.describe())
3.
data = mtcars.loc[:,['cyl','carb','mpg','hp']]
mpgHp = data.groupby(['cyl','carb']).mean()
print('不同cyl(汽缸数),carb(化油器)对应的mpg(油耗)和hp(马力)的均值为:',mpgHp)



第五章
1.
import numpy as np
import pandas as pd
data = [200,300,400,600,1000]
Ser1 = pd.Series(data)

# 最小-最大规范化
def MinMaxScaler(data):
    scaler = (data-data.min())/(data.max()-data.min())
    return scaler
MinMaxScaler(Ser1)

# 标准差标准化
def StandardScaler(data):
    data=(data-data.mean())/data.std()
    return data
StandardScaler(Ser1)

# 小数定标规范化
def DecimalScaler(data):
    data=data/10**np.ceil(np.log10(data.abs().max()))
    return data
DecimalScaler(Ser1)

2.
price = np.array([5,10,11,13,15,35,50,55,72,92,204,215])
Ser2 = pd.Series(price)
pd.cut(Ser2,3)

3.
def PreProcessing(data):
    data.drop_duplicate(inplace = True)
    data.fillna(data.median(),inplace = True)
    return(data)


第六章
1.
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
data = load_iris()
iris = data['data']
target = data['target']
X_train, X_test, y_train, y_test = train_test_split(iris, target, test_size=0.2, random_state=42)
2.
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
Scaler = MinMaxScaler().fit(X_train) ##生成规则
X_trainScaler = Scaler.transform(X_train) ##将规则应用于训练集
X_testScaler = Scaler.transform(X_test) ##将规则应用于测试集
pca_model = PCA(n_components=3).fit(X_trainScaler) ##生成规则
X_trainPca = pca_model.transform(X_trainScaler) ##将规则应用于训练集
X_testPca = pca_model.transform(X_testScaler) ##将规则应用于测试集
3.
from sklearn.svm import SVC
from sklearn.metrics import classification_report
svm = SVC().fit(X_trainPca,y_train)
print('建立的SVM模型为:\n',svm)
y_pred = svm.predict(X_testPca)
print('使用SVM预测iris数据的结果分析报告为:',classification_report(y_test,y_pred))

第七章
import numpy as np  
import pandas as pd
import matplotlib.pyplot as plt  
from sklearn.cluster import KMeans

# 读取数据
data = pd.read_csv("../data/data.csv")
X = data.iloc[:,1:]
print(X)

#K-means聚类
clf = KMeans(n_clusters=5)  #表示输出将数据集分成类簇数为5的聚类
#输出聚类预测结果,对X聚类,20行数据,每个y_pred对应X的一行或一个孩子,聚成3类,类标为012
y_pred = clf.fit_predict(X)  
print(y_pred)  #输出结果

x = X.iloc[:,0]  #获取第1列的值
print(x)  
y = X.iloc[:,1]  #获取第2列的值
print(y)
 
# 可视化操作
#绘制散点图(scatter),横轴为x,获取的第1列数据;纵轴为y,获取的第2列数据;
#c=y_pred对聚类的预测结果画出散点图,marker='o'说明用点表示图形
plt.scatter(x, y, c=y_pred, marker='o')
plt.title("Kmeans-Basketball Data")  #表示图形的标题为Kmeans-heightweight Data
plt.xlabel("assists_per_minute")  #表示图形x轴的标题
plt.ylabel("points_per_minute")  #表示图形y轴的标题
plt.legend(["Rank"])  #设置右上角图例
plt.show()  #显示图形



第八章
import numpy as np
import pandas as pd
inputfile = '../data/data.csv' #输入的数据文件
data = pd.read_csv(inputfile) #读取数据
print('相关系数矩阵为:',np.round(data.corr(method = 'pearson'), 2))  #保留两位小数

第九章
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

## 读取数据
data = pd.read_csv('../data/data.csv',encoding='gbk')

## 将target变为数字
data.loc[data['好瓜与否']!= '是','好瓜与否'] = 0
data.loc[data['好瓜与否']== '是','好瓜与否'] = 1
data['好瓜与否']= data['好瓜与否'].astype('int')

## 取出X和y
X = pd.get_dummies(data.iloc[:,1:-1]).values
y = data.iloc[:,-1].values

## 切割数据集
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size = 0.8,random_state = 125)

## 建模并预测
BPNet = MLPClassifier(random_state = 123)
BPNet.fit(X_train,y_train)
y_pred = BPNet.predict(X_test)

## 输出预测结果报告
print('预测报告为:\n',classification_report(y_test,y_pred))



你可能感兴趣的:(课后题)