import numpy as np
import json
# 读入训练数据
datafile = 'boston_house_price_english.xlsx'
data = np.fromfile(datafile, sep=' ')
data
c:\users\django\appdata\local\programs\python\python37\lib\site-packages\ipykernel_launcher.py:5: DeprecationWarning: string or file could not be read to its end due to unmatched data; this will raise a ValueError in the future.
"""
array([], dtype=float64)
feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE','DIS',
'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]
feature_num = len(feature_names)
data = data.reshape([data.shape[0] // feature_num, feature_num])
x = data[0] # 查看数据
print(x.shape)
print(x)
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
in
4 data = data.reshape([data.shape[0] // feature_num, feature_num])
5
----> 6 x = data[0] # 查看数据
7 print(x.shape)
8 print(x)
IndexError: index 0 is out of bounds for axis 0 with size 0
ratio = 0.8
offset = int(data.shape[0] * ratio)
training_data = data[:offset]
training_data.shape
(0, 14)
# 计算train数据集的最大值,最小值,平均值
maximums, minimums, avgs = \
training_data.max(axis=0), \
training_data.min(axis=0), \
training_data.sum(axis=0) / training_data.shape[0]
# 对数据进行归一化处理
for i in range(feature_num):
#print(maximums[i], minimums[i], avgs[i])
data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
in
1 # 计算train数据集的最大值,最小值,平均值
2 maximums, minimums, avgs = \
----> 3 training_data.max(axis=0), \
4 training_data.min(axis=0), \
5 training_data.sum(axis=0) / training_data.shape[0]
c:\users\django\appdata\local\programs\python\python37\lib\site-packages\numpy\core\_methods.py in _amax(a, axis, out, keepdims, initial, where)
28 def _amax(a, axis=None, out=None, keepdims=False,
29 initial=_NoValue, where=True):
---> 30 return umr_maximum(a, axis, None, out, keepdims, initial, where)
31
32 def _amin(a, axis=None, out=None, keepdims=False,
ValueError: zero-size array to reduction operation maximum which has no identity
def load_data():
# 从文件导入数据
datafile = './work/housing.data'
data = np.fromfile(datafile, sep=' ')
# 每条数据包括14项,其中前面13项是影响因素,第14项是相应的房屋价格中位数
feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', \
'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]
feature_num = len(feature_names)
# 将原始数据进行Reshape,变成[N, 14]这样的形状
data = data.reshape([data.shape[0] // feature_num, feature_num])
# 将原数据集拆分成训练集和测试集
# 这里使用80%的数据做训练,20%的数据做测试
# 测试集和训练集必须是没有交集的
ratio = 0.8
offset = int(data.shape[0] * ratio)
training_data = data[:offset]
# 计算train数据集的最大值,最小值,平均值
maximums, minimums, avgs = training_data.max(axis=0), training_data.min(axis=0), \
training_data.sum(axis=0) / training_data.shape[0]
# 对数据进行归一化处理
for i in range(feature_num):
#print(maximums[i], minimums[i], avgs[i])
data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])
# 训练集和测试集的划分比例
training_data = data[:offset]
test_data = data[offset:]
return training_data, test_data
training_data, test_data = load_data()
x = training_data[:, :-1]
y = training_data[:, -1:]
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
in
----> 1 training_data, test_data = load_data()
2 x = training_data[:, :-1]
3 y = training_data[:, -1:]
in load_data()
2 # 从文件导入数据
3 datafile = './work/housing.data'
----> 4 data = np.fromfile(datafile, sep=' ')
5
6 # 每条数据包括14项,其中前面13项是影响因素,第14项是相应的房屋价格中位数
FileNotFoundError: [Errno 2] No such file or directory: './work/housing.data'
# 定义Network类,计算预测值和损失函数。类中的变量x, w,b, z, error等均是向量。
class Network(object):
def __init__(self, num_of_weights):
# 随机产生w的初始值
# 为了保持程序每次运行结果的一致性,此处设置固定的随机数种子
np.random.seed(0)
self.w = np.random.randn(num_of_weights, 1)
self.b = 0.
def forward(self, x):
z = np.dot(x, self.w) + self.b
return z
def loss(self, z, y):
error = z - y
cost = error * error
cost = np.mean(cost)
return cost
#求解参数www和bbb的数值,这个过程也称为模型训练。
# 定义Network类,计算预测值和损失函数。类中的变量x, w,b, z, error等均是向量。
class Network(object):
def __init__(self, num_of_weights):
# 随机产生w的初始值
# 为了保持程序每次运行结果的一致性,此处设置固定的随机数种子
np.random.seed(0)
self.w = np.random.randn(num_of_weights, 1)
self.b = 0.
def forward(self, x):
z = np.dot(x, self.w) + self.b
return z
def loss(self, z, y):
error = z - y
cost = error * error
cost = np.mean(cost)
return cost
net = Network(13)
losses = []
#只画出参数w5和w9在区间[-160, 160]的曲线部分,已经包含损失函数的极值
w5 = np.arange(-160.0, 160.0, 1.0)
w9 = np.arange(-160.0, 160.0, 1.0)
losses = np.zeros([len(w5), len(w9)])
#计算设定区域内每个参数取值所对应的Loss
for i in range(len(w5)):
for j in range(len(w9)):
net.w[5] = w5[i]
net.w[9] = w9[j]
z = net.forward(x)
loss = net.loss(z, y)
losses[i, j] = loss
#将两个变量和对应的Loss作3D图
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = Axes3D(fig)
w5, w9 = np.meshgrid(w5, w9)
ax.plot_surface(w5, w9, losses, rstride=1, cstride=1, cmap='rainbow')
plt.show()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
in
30 net.w[5] = w5[i]
31 net.w[9] = w9[j]
---> 32 z = net.forward(x)
33 loss = net.loss(z, y)
34 losses[i, j] = loss
NameError: name 'x' is not defined
波士顿房价回归模型我几乎看了一下午 ,我还是没有看明白,最主要的是我没有看懂代码板块,其实损失函数和梯度下降我已经看懂了,但是脖子实在是太疼了,明天再接着看吧!另外说一句,我觉得百度AI的机器学习和深度学习课程真的挺好的,我这篇微博的主要内容就是自己的感想与总结以及课程的大纲。
在人类能够预见的将来,人工智能一定能为人类社会和人类文明的发展做出突出的推动作用。就像农业革命与工业革命一样,将深深的改变全人类的生活方式以及对未来世界的认知,而未来的世界将不再是人类单独的历史,而是人机共存的新历史。(调参工作在不久的将来就会完成。。。)