算法工程师面试常见代码题汇总

主要参考:《深度学习入门:基于python的理论和实现》


文章目录

  • 激活函数
    • sigmoid
    • relu
    • softmax
    • better softmax
  • 损失函数
    • mean squared error
    • cross entropy error
    • detailed cross entropy error
  • 梯度下降
    • SGD
    • Momentum
    • Nesterov
    • AdaGrad
    • RMSProp
    • Adam
  • layers
    • multiply
    • add
    • Relu
    • sigmoid
    • Affine
    • softmaxwithloss
  • BN
  • weight decay
  • drop out
  • MLP
  • convolution
  • pooling
  • iou
    • bbox
    • bboxes
  • precision and recall

激活函数

sigmoid

def sigmoid(x):
    return 1/(1+np.exp(-x))

relu

def relu(x):
    return np.maximum(0,x)

softmax

def softmax(a):
    exp_a = np.exp(a)
    sum_exp_a = np.sum(exp_a)
    y = exp_a/sum_exp_a
    return y

better softmax

def better_softmax(a):
    c = np.max(a)
    exp_a = np.exp(a-c)
    sum_exp_a = np.sum(exp_a)
    y = exp_a/sum_exp_a
    return y

损失函数

mean squared error

def mean_squared_error(y,t):
    return 0.5 * np.sum((y-t)**2)

cross entropy error

def cross_entropy_error(y,t):
    return -np.sum(t*np.log(y))

detailed cross entropy error

给出一个比较具体的做法,其中label是类别,输入的logit是模型输出的logit。
首先,label要转成one-hot的形式。

def np_onehot(nc, label):
    tmp = np.arange(nc)
    tmp = tmp[None,:]
    true_label = label[:,None]
    ans = tmp == true_label
    return ans.astype(int)
label = np_onehot(4,label)
def np_softmax(arr):
    assert len(arr.shape)==2
    arr_exp = np.exp(arr)
    arr_sum = np.sum(arr_exp, axis=1)
    arr_sum = arr_sum[:,None]
    return arr_exp / arr_sum
soft_logit = np_softmax(logit)
log_logit = np.log(soft_logit)
res = - log_logit * label
loss = np.mean(np.sum(res,axis=1))

梯度下降

SGD

class SGD:
    def __init__(self, lr = 0.01):
        self.lr = lr   
    def update(self, params, grads):
        for key in params.keys():
            params[key] -= self.lr * grads[key]

Momentum

class Momentum:
    # w<- w+v
    # v<- alpha v - lr * gd
    def __init__(self, lr = 0.01, momentum=0.9):
        self.lr = lr
        self.momentum = momentum
        self.v = None
    
    def update(self, params, grads):
        if self.v is None:
            self.v = {}
            for key, val in params.items():
                self.v[key] = np.zeros_like(val)
        for key in params.keys():
            self.v[key] = self.momentum*self.v[key] - self.lr*grads[key]
            params += self.v[key]

Nesterov

class Nesterov:
    # 先用速度更新,再用梯度更新
    def __init__(self, lr = 0.01, momentum = 0.9):
        self.lr = lr
        self.momentum = momentum
        self.v = None
    def update(self, params, grads):
        if self.v is None:
            self.v  = {}
            for key, val in params.items():
                self.v[key] = np.zeros_like(val)
        for key in params.keys():
            self.v[key] = self.momentum*self.v[key] - self.lr*grads[key]
            params[key] += self.momentum*self.momentum*self.v[key]
            params[key] -= (1+self.momentum)*self.lr*grads[key]

AdaGrad

class AdaGrad:
    # 学习率衰减
    def __init__(self, lr = 0.01):
        self.lr = lr
        self.h = None
    def update(self, params, grads):
        if self.h is None:
            self.h = {}
            for key, val in params.items():
                self.h[key] = np.zeros_like(val)
        for key in params.keys():
            self.h[key] += grads[key]*grads[key]
            params[key] -= self.lr * grads[key]/ np.sqrt(self.h[key])

RMSProp

class RMSprop:
    def __init__(self, lr=0.01,decay_rate = 0.99):
        self.lr = lr
        self.decay_rate = decay_rate
        self.h = None
    def update(self,params, grads):
        if self.h  is None:
            self.h = {}
            for key, val in params.items():
                self.h[key] = np.zeros_like(val)
        for key in params.keys():
            self.h[key]*= self.decay_rate
            self.h[key] += (1-self.decay_rate)*grads[key]&grads[key]
            params[key] -= self.lr * grads[key]/np.sqrt(self.h[key])

Adam

class Adam:
    def __init__(self, lr = 0.001, beta1=0.9, beta2 = 0.99):
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.iter = 0
        self.m = None
        self.v = None
        
    def update(self, params, grads):
        if self.m is None:
            self.m, self.v = {},{}
            for key,val in params.items():
                self.m[key] = np.zeros_like(val)
                self.v[key] = np.zeros_like(val)
        self.iter += 1
        lr_t = self.lr * np.sqrt(1 - self.beta2**self.iter)/ (1.0 - self.beta1**self.iter)
        for key in params.keys():
            self.m[key] = (1-self.beta1)*grads[key] + self.beta1* self.m[key]
            self.v[key] = (1-self.beta2)*(grads[eky]**2) + self.beta2* self.v[key]
            params[key] -= lr_t * self.m[key]/ np.sqrt(self.v[key])

layers

multiply

class MulLayer:
    def __init__(self):
        self.x = None
        self.y = None
    def forward(self,x,y):
        self.x = x
        self.y = y
        out = x*y
        return out
    def backward(self,dout):
        dx = dout*self.y
        dy = dout*self.x
        return dx, dy

add

class AddLayer:
    def __init__(self):
        pass
    def forward(self, x, y):
        out = x+y
        return out
    def backward(self, dout):
        dx = dout * 1
        dy = dout * 1
        return dx, dy

Relu

class ReluLayer:
    def __init__(self):
        pass
    def forward(self, x):
        self.mask = x<=0
        out = x.copy()
        out[self.mask] = 0
        return out
    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout
        return dx

sigmoid

class SigmoidLayer:
    def __init__(self):
        pass
    def forward(self, x):
        self.y = 1/(1+np.exp(-x))
        return self.y
    def backward(self,dout):
        out = dout*(self.y)*(1-self.y)
        return out

Affine

class Affine:
    def __init__(self,W,b):
        self.W = W
        self.b = b
    def forward(self, x):
        self.x = x
        out = np.dot(self.x, self.W)+self.b
        return out
    def backward(self,dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dout(self.x.T, dout)
        self.db = np.sum(dout,axis=0)
        return dx

softmaxwithloss

class SoftmaxWithLoss:
    def __init__(self):
        pass
    def softmax(self,x):
        if x.ndim == 2:
            x = x.T
            x = x - np.max(x,axis=0)
            y = np.exp(x)/np.sum(np.exp(x),axis=0)
            return y.T
        x = x - np.max(x)
        return np.exp(x)/np.sum(np.exp(x))
    def cross_entropy(self, y, t):
        # y是softmax的结果
        # -sum(t*logy)
        log_logit = np.log(y)
        res = -t*log_logit
        loss = np.mean(np.sum(res,axis=1))
        return loss
    def forward(self,x,t):
        self.t = t
        self.y = self.softmax(x)
        self.error = self.cross_entropy(self.y, self.t)
        return self.error
    
    def backward(self, dout):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t)/batch_size
        return dx

BN

class BatchNormalization:
    
    def __init__(self, gamma, beta, momentum=0.9, runing_mean = None, running_var = None):
        self.gamma = gamma
        self.beta = beta
        self.momentum = momentum
        self.running_mean = running_mean
        self.running_var = running_var
    def forward(self, x, Train = True):
        batch_size = x.shape[0]
        self.input_shape = x.shape
        x = x.reshape(batch, -1)
        if self.running_mean = None:
            self.runing_mean = np.zeros(x.shape[-1])
            self.running_var = np.zeros(x.shape[-1])
        if Train:
            mu = np.mean(x,axis=0)
            var = np.mean((x-mu)**2, axis=0)
            std = np.sqrt(var)
            self.std = std
            self.x = (x-mu)/std
            self.running_mean = self.momentum*self.running_mean + (1-self.momentum)*mu
            self.running_var = self.momentum*self.running_var + (1-self.momentum)*var  
        else:
            mu = self.running_mean
            std = np.sqrt(self.runnning_var)
        out = self.gamma* (x-mu)/std + self.beta
        out = out.reshape(*self.input_shape)
        return out
    def backwards(self, dout):
        batch_size = dout.shape[0]
        dout = dout.reshape(batch_size,-1)
        dbeta = dout.sum(axis=0)
        dgamma = np.sum(dout*self.x, axis=0)
        dxmu = dout(self.gamma )
        dxc = dxmu/std
        dstd = -np.sum(d(xmu*self.x)/(self.std**2),axis=0)
        dvar = 0.5*dstd/self.std
        dmu = np.sum(dxc, axis=0)
        dx = dxc - dmu / self.batch_size
        self.dgamma = dgamma
        self.dbeta = dbeta
        dx = dx.reshape(*self.input_shape)
        return dx

weight decay

# weight decay
def weight_decay(x):
    weight_decay = 0
    for idx in range(1, self.hidden_layer_num + 2):
        W = self.params['W' + str(idx)]
        weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W ** 2)

drop out

class Dropout:
    def __init__(self, dropout_ratio = 0.5):
            self.dropout_ratio = dropout_ratio
            self.mask = None

    def forward(self, x, train_flag = True):
        if train_flag:
            self.mask = np.random.rand(*x.shape)> self.dropout_ratio
            return x*self.mask
        else:
            return x*(1.0 - self.dropout_ratio)

MLP

class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weights_init_std = 0.01):
        self.w1 = np.random.randn(input_size, hidden_size)
        self.b1 = np.zeros(hidden_size)
        self.w2 = np.random.randn(hidden_size, output_size)
        self.b2 = np.zeros(output_size)
    def sigmoid(self,x):
        return 1/(1+np.exp(-x))
    def softmax(self,x):
        exp_logits = np.exp(x)
        sum_logits = np.sum(exp_logits,axis=1)
        return exp_logits/sum_logits
    def predict(self,x):
        a1 = np.dot(x,self.w1)+self.b1
        z1 = self.sigmoid(a1)
        a2 = np.dot(z1,self.w2) + self.b2
        y = self.sigmoid(a2)
        return y
    def cross_entropy_error(self,pred, target):
        def onehot(target,num_classes):
            classes = np.arange(num_classes)
            classes = classes[None,:]
            target = target[:,None]
            ans = classes == target
            return ans.astype(int)
        num_classes = pred.shape[-1]
        one_hot_label = onehot(target)
        log_logit = np.log(pred)
        res = -log_logit*one_hot_label
        res = np.mean(np.sum(res,axis=1))
        return res
    def loss(self, x, y):
        y_hat = self.predict(x)
        return self.cross_entropy_error(y_hat,y)
    def accuracy(self, x, target):
        y_hat = self.predict(x)
        pred = np.argmax(y_hat,axis=1)
        accuracy = np.sum(pred == target)/float(x.shape[0])
        return accuracy
    
    def gradient(self, y,x,target):
        
        batch_size = x.shape[0]
        dy = (y-target)/batch_size
        
        grad_w2 = np.dot(z1.T,dy)
        grad_b2 = np.sum(dy)
        
        grad_z1 = np.dot(dy,self.w2)
        grad_a1 = grad_z1*self.sigmoid(a1)*(1-self.sigmoid(a1))
        
        self.w1 = np.dot(x.T,grad_a1)
        self.b1 = np.sum(grad_z1)

convolution

def im2col(input_data, filter_h, filter_w, stride=1,pad=0):
    N,C,H,W = input_data.shape
    out_h = 1+int((H+2*pad-filter_h)/self.stride)
    out_w = 1+int((W+2*pad-filter_w)/self.stride)
    
    img = np.pad(input_data,[(0,0),(0,0),(pad,pad),(pad,pad)],'constant')  #四个维度的填充,N和C不填充
    col = np.zeros((N,C,filter_h, filter_w, out_h, out_w))
    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            col[:,:,y,x,:,:] = img[:,:,y:y_max:stride, x:x_max:stride]
    col = col.transpose(0,4,5,1,2,3,).reshape(N*out_h*out_w,-1)
    return col
def col2im(col, input_shape,fh, fw, stride=1, pad=0):
    # n*out*out, cff)
    N,C,H,W = input.shape
    out_h = 1+int((H+2*pad-filter_h)/self.stride)
    out_w = 1+int((W+2*pad-filter_w)/self.stride)
    col = col.reshape(N,out_h,out_w,C,fh, fw)
    
    img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1))
    for y in range(filter_h):
        y_max = y+stride*out_h
        for x in range(filter_w):
            x_max = x+stride*out_w
            img[:,:,y:y_max:stride, x:x_max:stride]+=col[:,:,y,x,,:,:]
    return img[:,:,pad:H+pad,pad:W+pad]
    
class Convolution:
    def __init__(self,w, b, stride=1, pad=0):
        self.w = w
        # self.w = np.random.randn((out_c, in_c, fh, fw))
        self.b = b
        self.stride = stride
        self.pad = pad
    def forward(self,x):
        oc, C, fh,fw = self.w.shape
        N, C, H, W = x.shape
        out_h = 1+int((H+2*self.pad-fh)/self.stride)
        out_w = 1+int((W+2*self.pad-fw)/self.stride)
        
        col = im2col(x,fh,fw,self.stride, self.pad)
        self.col = col
        # n*out_h*out_w, c*k*k
        out = col* self.w.reshape(oc,-1).T + self.b
        out = out.reshape(N,out_h,out_w, -1).transpose(0,3,1,2)
        return out
    def backward(self,dout):
        oc,C,fh,fw = self.w.shape
        dout = dout.transpose(0,2,3,1).reshape(-1,oc)
        self.db = np.sum(dout,axis=0)
        self.dw = np.dot(dout,self.col.T)
        self.dw = self.dw.transpose(1,0).reshape(oc,C,fh,fw)
        self.col_w = self.w.reshape(oc,-1).T
        dcol = np.dot(dout,self.col_w.T)
        dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)
        return dx

pooling

class Pooling:
    def __init__(self, pool_h, poo_w, stride = 1, pad = 0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad
    def forward(self,x):
        N,C,H,W = x.shape
        out_h = int(1+(H-self.pool_h)/self.stride)
        out_w = int(1+(W-self.pool_w)/self.stride)
        
        # n,c,h,w 展开一下
        col = np.zeros((N,C,self.pool_h,self.pool_w,out_h,out_h))
        for y in range(self.pool_h):
            y_max = y + stride * out_h
            for x in range(self.pool_w):
                x_max = x + stride*out_w
                col[:,:,x,y,:,:] = x[:,:,y:y_max:stride,x:x_max:stride]
        col = col.transpose(0,4,5,1,2,3).reshape(-1, self.pool_h*sefl.pool_w)
        self.mask = np.argmax(col,axis=1)
        out = np.max(col,axis=1)
        out = out.reshape(N,out_h,out_w,C).transpose(0,3,1,2)
        return out
    def backward(self,dout):
        # N,C,out_h,out_w
        dout = dout.transpose(0,2,3,1)
        pool_size = self.pool_h*self.pool_w
        dmax = np.zeros((dout.soze,pool_size))
        dmax[np.arange(self.mask.size), self.mask.flatten()] = dout.flatten()
        dmax = dmax.reshape(dout.shape + (pool_size,)) 
        
        dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
        dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
        
        return dx

        

iou

bbox

def calculate_iou(pred, target):
    x1 = max(pred[0],target[0])
    y1 = max(pred[1],target[1])
    x2 = min(pred[2],target[2])
    y2 = min(pred[3], target[3])
    inter = max(0,x2-x1)*max(0,y2-y1)
    area1 = (box1[2]-box1[0]) * (box1[3]-box1[1])
    area2 = (box2[2]-box2[0]) * (box2[3]-box2[1])
    union = inter/(area1+area2-inter)
    return iou

bboxes

def bboxes_iou(boxes1, boxes2):
    
    # 交集和并集  x1y1 x2 y1
    area1 = (boxes1[:,0]-boxes1[:,2])*(boxes1[:1]-boxes1[:3])
    area2 = (boxes2[:,0]-boxes2[:,2])*(boxes2[:1]-boxes2[:3])
    n1 = boxes1.shape[0]
    n2 = boxes2.shape[0]
    iou = np.zeros((n1,n2))
    for i in range(n1):
        cur = boxes1[i]
        x1 = np.max(cur[0], boxes2[:,0])
        y1 = np.max(cur[1],boxes2[:,1])
        x2 = np.min(cur[2], boxes2[:,2])
        y2 = np.min(cur[3],boxes2[:3])
        inter = np.max((x2-x1),0) * np.max((y2-y1),0)
        outer = area1[i] + area2
        iou_cur = inter/(outer-inter)
        iou[i] = iou_cur
        
    return iou

precision and recall

def computePR(pred, target, iou_threshold):
    num = len(target)
    TP = 0
    for target_box in target:
        max_iou = 0
        for p_bbox in pred:
            iou = calculate_iou(p_bbox, target_box)
            if iou > max_iou:
                max_iou = iou
            if max_iou >= iou_threshold:
                TP +=1
                break
    FP = len(pred) - TP
    FN = len(target) - TP
    
    precision = TP/(TP+FP)
    recall = TP/(TP+FN)

你可能感兴趣的:(深度学习,算法)