import numpy as np
# 输入规格
# X - R(batch_size,nx) , Y - R(batch_size , 1) W - R(1,nx) b — R (1,1)
def layer(X, W, b):
# in: X - R(batch_size,nx) W - R(1,nx)b - R(1,1)
# out: z - R(batch_size,1) - 同Y
z = np.matmul(W, X.T) + b
z = z.T
return z
def sigmoid(z):
# in : z - R(batch_size,1)
# out: pre_y - R(batch_size,1)
return 1 / (1 + np.exp(-z))
def lossfunction(pre_y, true_y):
# in: pre_y -R(bact_size,1) true_y - R(batch_size,1)
# out: loss - R(batch_size,1)
return 0.5 * (pre_y - true_y) ** 2
def forward(X, W, b):
# in: X ,W b
# out: pre_y - R(batch_size,1)
z = layer(X, W, b) # R(batch_size,1)
pre_y = sigmoid(z)
return pre_y
def layer_grad(X, W, b):
# in: X,W,b
# out: [X.T,dz_b] X.T -R(nx,batch_size) dz_b - R(batch_size,1) 同Y
dz_b = np.ones([X.shape[0],1])
return X.T, dz_b
def sigmoid_grad(pre_y):
# in: pre_y - R(batch_size,1)
# out: dy_z - R(batch_size,1)
return pre_y * (1 - pre_y)
def lossfunction_grad(pre_y, true_y):
# in: pre_y , true_y
# out: dl_y - R(batch_size,1)
# print(pre_y.shape, true_y.shape)
return pre_y - true_y
def backward(X, W, b, dl_y):
# in : X,W,b,dl_y dl_y - R(batch_size,1)
# out : dw,db dw - R(1,nx) db - R(1,1)
pre_y = forward(X, W, b) # pre_y - R(batch_size,1)
# print("pred_y: {}".format(pre_y))
dy_z = sigmoid_grad(pre_y) # dy_z - R(batch_size,1)
dz_w, dz_b = layer_grad(X, W, b) # dz_w - R(nx,batch_size) dz_b - R(batch_size,1)
# print(dl_y.shape, dy_z.shape, dz_w.T.shape)
dw = dl_y * dy_z * dz_w.T # R(batch_size,nx)
dw = np.sum(dw,axis=0) # 按列累加 dw - R(1,nx)
dw = dw.reshape([1,dw.shape[0]])
db = dl_y * dy_z # R(batch_size,1)
db = np.sum(db,axis=0) # 按列累加 db - R(1,1)
db = db.reshape([1,1])
return dw, db
class DNN(object):
def __init__(self, batch_size, nx, lr):
self.lr = lr
self.bt = batch_size
self.w, self.b = self.__init__weight(nx)
def __init__weight(self,nx):
w = np.random.randn(1,nx)
b = np.random.randn(1,1)
return w, b
def forward(self, X):
return forward(X, self.w, self.b)
def backward(self, X, dl_y):
return backward(X, self.w, self.b, dl_y)
def step(self, dw, db):
self.w -= self.lr * dw
self.b -= self.lr * db
def train(model, X, Y,batch_size, epochs):
# X - R [Data_size,nx] Y - R[Data_size,1]
losses = []
for e in range(epochs):
count_loss = 0
N = X.shape[0] # Data_size
indexs = list(range(N))
np.random.shuffle(indexs) # BGP
i = 0
while i < N:
batch_indxs = indexs[i:i + batch_size] # R[batch_size,1]
batch_x = X[batch_indxs] # R[batch_size,nx]
batch_y = Y[batch_indxs] # R[batch_size,1]
batch_x = batch_x.reshape([batch_size,nx])
batch_y = batch_y.reshape([batch_size,1])
batch_pre_y = model.forward(batch_x) # R[batch_size,1]
dl_y = lossfunction_grad(batch_pre_y, batch_y) # R[bt,1]
dw, db = model.backward(batch_x, dl_y) # R[1,nx] , R[1,1]
model.step(dw, db)
loss = lossfunction(batch_pre_y, batch_y) # loss -R(batch_size,1)
loss = np.mean(loss)
# print("db: {}".format(db))
print("epoch: {}, batch: {}, loss: {}".format(e, i // batch_size, loss))
count_loss += loss
i += batch_size
print('epochs:{},loss:{}'.format(e, count_loss / batch_size))
losses.append(count_loss)
return np.array(losses)
# 开始咯!
batch_size = 100
nx = 10
epochs = 10
train_N = 10000
test_N =10000
lr = 0.001
mu0 = 0
mu1 = 10
X0 = np.random.randn((train_N + test_N) // 2, nx) * 10 + mu0
X1 = np.random.randn((train_N + test_N) // 2, nx) * 10 + mu1
Y0 = np.zeros(X0.shape[0])
Y1 = np.ones(X1.shape[0])
X = np.concatenate((X0, X1)) # 输入的 X - R(Data_size,nx) 默认axis=0 竖着拼接
Y = np.concatenate((Y0, Y1)) # Y - R(Data_size,1)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.5) # 分成训练集 和测试集
model = DNN(batch_size,nx,lr)
losses = train(model, X_train, y_train,batch_size, epochs)
# 来! 画个图
import matplotlib.pyplot as plt
plt.plot(range(len(losses)), losses)
# 看看结果
def test(test_X, test_y):
pred_y = model.forward(test_X)
pred_y = pred_y
pred_y[pred_y > 0.5] = 1
pred_y[pred_y <= 0.5] = 0
test_y = test_y.squeeze()
return np.sum(pred_y == test_y) / len(test_y)
test(X_test, y_test)