关于梯度检测的一些代码及使用示例:
五种gradient_check:
from __future__ import print_function
from builtins import range
import numpy as np
from random import randrange
def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
"""
a naive implementation of numerical gradient of f at x
- f should be a function that takes a single argument
- x is the point (numpy array) to evaluate the gradient at
"""
fx = f(x) # evaluate function value at original point
grad = np.zeros_like(x)
# iterate over all indexes in x
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
# evaluate function at x+h
ix = it.multi_index
oldval = x[ix]
x[ix] = oldval + h # increment by h
fxph = f(x) # evalute f(x + h)
x[ix] = oldval - h
fxmh = f(x) # evaluate f(x - h)
x[ix] = oldval # restore
# compute the partial derivative with centered formula
grad[ix] = (fxph - fxmh) / (2 * h) # the slope
if verbose:
print(ix, grad[ix])
it.iternext() # step to next dimension
return grad
def eval_numerical_gradient_array(f, x, df, h=1e-5):
"""
Evaluate a numeric gradient for a function that accepts a numpy
array and returns a numpy array.
"""
grad = np.zeros_like(x)
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
ix = it.multi_index
oldval = x[ix]
x[ix] = oldval + h
pos = f(x).copy()
x[ix] = oldval - h
neg = f(x).copy()
x[ix] = oldval
grad[ix] = np.sum((pos - neg) * df) / (2 * h)
it.iternext()
return grad
def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):
"""
Compute numeric gradients for a function that operates on input
and output blobs.
We assume that f accepts several input blobs as arguments, followed by a
blob where outputs will be written. For example, f might be called like:
f(x, w, out)
where x and w are input Blobs, and the result of f will be written to out.
Inputs:
- f: function
- inputs: tuple of input blobs
- output: output blob
- h: step size
"""
numeric_diffs = []
for input_blob in inputs:
diff = np.zeros_like(input_blob.diffs)
it = np.nditer(input_blob.vals, flags=['multi_index'],
op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
orig = input_blob.vals[idx]
input_blob.vals[idx] = orig + h
f(*(inputs + (output,)))
pos = np.copy(output.vals)
input_blob.vals[idx] = orig - h
f(*(inputs + (output,)))
neg = np.copy(output.vals)
input_blob.vals[idx] = orig
diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)
it.iternext()
numeric_diffs.append(diff)
return numeric_diffs
def eval_numerical_gradient_net(net, inputs, output, h=1e-5):
return eval_numerical_gradient_blobs(lambda *args: net.forward(),
inputs, output, h=h)
def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):
"""
sample a few random elements and only return numerical
in this dimensions.
"""
for i in range(num_checks):
ix = tuple([randrange(m) for m in x.shape])
oldval = x[ix]
x[ix] = oldval + h # increment by h
fxph = f(x) # evaluate f(x + h)
x[ix] = oldval - h # increment by h
fxmh = f(x) # evaluate f(x - h)
x[ix] = oldval # reset
grad_numerical = (fxph - fxmh) / (2 * h)
grad_analytic = analytic_grad[ix]
rel_error = (abs(grad_numerical - grad_analytic) /
(abs(grad_numerical) + abs(grad_analytic)))
print('numerical: %f analytic: %f, relative error: %e'
%(grad_numerical, grad_analytic, rel_error))
使用示例:
# Test the affine_backward function
np.random.seed(231)
x = np.random.randn(10, 2, 3)
w = np.random.randn(6, 5)
b = np.random.randn(5)
dout = np.random.randn(10, 5)
dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout)
_, cache = affine_forward(x, w, b)
dx, dw, db = affine_backward(dout, cache)
# The error should be around 1e-10
print('Testing affine_backward function:')
print('dx error: ', rel_error(dx_num.reshape(dx_num.shape[0],-1), dx))
print('dw error: ', rel_error(dw_num, dw))
print('db error: ', rel_error(db_num, db))
rel_error函数:
def rel_error(x, y):
""" returns relative error """
return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))