def relu(x):
return np.maximum(0,x)
def softmax_1(z):
if z.ndim==2:
c=np.max(z,axis=1)
z=z.T-c#溢出对策
y=np.exp(z)/np.sum(np.exp(z),axis=0)
return y.T
c=np.max(z)
exp_z = np.exp(z - c)
return exp_z/np.sum(exp_z)
#交叉熵误差
def cross_entropy_error(p,y):
'''
:param p: 预测值
:param y: 真实值
:return: 损失值
'''
delta=1e-7#防止出现log(0)
batch_size=p.shape[0]
return -np.sum(y*np.log(p+delta))/batch_size
def numerical_gradient(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
tmp_val = x[idx]
x[idx] = float(tmp_val) + h
fxh1 = f(x) # f(x+h)
x[idx] = tmp_val - h
fxh2 = f(x) # f(x-h)
grad[idx] = (fxh1 - fxh2) / (2 * h)
x[idx] = tmp_val # 还原值
it.iternext()
return grad
class two_layer:
#定义网络
def __init__(self,input_size,hidden_size,output_size,weight_init_std=0.01):
#初始化权重
#weight_init_std为防止权重过大
self.network={}#储存权重及偏移量的值,
weight_scale=1e-3
self.network['W1']=weight_init_std*np.random.randn(input_size,hidden_size)
self.network['b1']=np.zeros(hidden_size)
self. network['W2']=weight_init_std*np.random.randn(hidden_size,output_size)
self.network['b2']=np.zeros(output_size)
#-----------------------------------------------------------------------------------------------------------------------
#预测
#前向传播
def predict(self,x):
W1,W2=self.network['W1'],self.network['W2']
b1,b2=self.network['b1'],self.network['b2']
a1=np.dot(x,W1)+b1
z1=softmax_1(a1)
a2=np.dot(z1,W2)+b2
predict=softmax_1(a2)
return predict
#损失函数
def loss(self,x,y):
p=self.predict(x)
return cross_entropy_error(p,y)
#梯度下降
def numerical_gradient(self,x,y):
loss_W=lambda W:self.loss(x,y)
grad={}
grad['W1']=numerical_gradient(loss_W,self.network['W1'])
grad['b1']=numerical_gradient(loss_W,self.network['b1'])
grad['W2']=numerical_gradient(loss_W,self.network['W2'])
grad['b2']=numerical_gradient(loss_W,self.network['b2'])
return grad
#-----------------------------------------------------------------------------------------------------------------------
def acc(self,x,t):
p=self.predict(x)
y = np.argmax(t, axis=1)
p=np.argmax(y,axis=1)
acc=np.sum(p==y)/float(x.shape[0])
return acc
# 使用pytorch框架进行数据的下载与读取
import torch
from torch.utils.data import DataLoader
import torchvision.datasets as da
batch_size = 100
# minist dataset
train_dataset = da.MNIST(root='/pymnist', # 选择数据根目录
train=True, # 选择训练数据集
transform=None, # 不考虑使用任何数据预处理
download=True) # 从网上下载
test_dataset = da.MNIST(root='/pymnist', # 选择数据根目录
train=False, # 选择测试数据集
transform=None, # 不考虑使用任何数据预处理
download=True) # 从网上下载
# 加载数据
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True) # 打乱数据
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=True)
x_train=train_dataset.train_data.numpy().reshape(-1,28*28)
y_train_tmp=train_dataset.train_labels.reshape(train_dataset.train_labels.shape[0],1)
y_train=torch.zeros(y_train_tmp.shape[0],10).scatter_(1,y_train_tmp,1).numpy()
x_test=test_dataset.test_data.numpy().reshape(-1,28*28)
y_test_tmp = test_dataset.test_labels.reshape(test_dataset.test_labels.shape[0], 1)
y_test = torch.zeros(y_test_tmp.shape[0], 10).scatter_(1, y_test_tmp, 1).numpy()
x_train=train_dataset.train_data.numpy().reshape(-1,28*28)
y_train_tmp=train_dataset.train_labels.reshape(train_dataset.train_labels.shape[0],1)
y_train=torch.zeros(y_train_tmp.shape[0],10).scatter_(1,y_train_tmp,1).numpy()
x_test=test_dataset.test_data.numpy().reshape(-1,28*28)
y_test_tmp = test_dataset.test_labels.reshape(test_dataset.test_labels.shape[0], 1)
y_test = torch.zeros(y_test_tmp.shape[0], 10).scatter_(1, y_test_tmp, 1).numpy()
train_size = x_train.shape[0]
iters_num = 600
learning_rate = 0.001
epoch = 5
batch_size = 100
network = two_layer(input_size=784, hidden_size=50, output_size=10)
for i in range(epoch):
print('current epoch is :', i)
for num in range(iters_num):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
y_batch = y_train[batch_mask]
grad = network.numerical_gradient(x_batch, y_batch)
for key in ('W1', 'b1', 'W2', 'b2'):
network.network[key] -= learning_rate * grad[key]
loss = network.loss(x_batch, y_batch)
if num % 100 == 0:
print(loss)
print(network.acc(x_test, y_test))
期待大家和我交流,留言或者私信,一起学习,一起进步!