人脸关键点检测

先上效果图,然后贴代码。依赖库主要有opencv、Lasagne、nolearn
人脸关键点检测_第1张图片

这幅图是训练用的数据库图片。数据库地址https://www.kaggle.com/c/facial-keypoints-detection 图片保存在csv文件里。

人脸关键点检测_第2张图片

这幅图是训练之后检测的结果,效果还不错,不过对于侧脸的检测可以发现效果相对差一些,少部分图像会出现较大偏差。不过这个检测的数据库图片质量真是不怎么好。

# coding:utf-8
import os
import cv2
import numpy as np
from pandas.io.parsers import read_csv
from sklearn.utils import shuffle
from lasagne import layers
from lasagne.updates import nesterov_momentum
from nolearn.lasagne import NeuralNet
import theano.tensor as T

def load():
    df = read_csv(os.path.expanduser(<span style="font-family: Arial, Helvetica, sans-serif;">training.csv</span><span style="font-family: Arial, Helvetica, sans-serif;">))  # load pandas dataframe</span>
    df['Image'] = df['Image'].apply(lambda im: np.fromstring(im, sep=' '))
    print(df.count())  # prints the number of values for each column
    df = df.dropna()  # drop all rows that have missing values in them
    X = np.vstack(df['Image'].values) / 255.  # scale pixel values to [0, 1]
    X = X.astype(np.float32)
    y = df[df.columns[:-1]].values
    y = ( y - 48 ) / 48.  # scale target coordinates to [-1, 1]
    X, y = shuffle(X, y, random_state=42)  # shuffle train data
    y = y.astype(np.float32)
    return X, y


def load_testdata():
    df = read_csv(os.path.expanduser('test.csv'))  # load pandas dataframe
    df['Image'] = df['Image'].apply(lambda im: np.fromstring(im, sep=' '))
    X = np.vstack(df['Image'].values) 
    X = X.astype(np.float32)
    X = X/255.
    return X
    '''
    for i in range(len(X)):
        img = X[i]
        img = img.reshape(96,96)
        add = './test_data/'+str(i)+'.Bmp'
        cv2.imwrite(add,img)
    '''
net1 = NeuralNet(
    layers=[  # three layers: one hidden layer
        ('input', layers.InputLayer),
        ('hidden', layers.DenseLayer),
        ('output', layers.DenseLayer),
        ],
    # layer parameters:
    input_shape=(None, 9216),  # 96x96 input pixels per batch
    hidden_num_units=100,  # number of units in hidden layer
    output_nonlinearity=None,  # output layer uses identity function
    output_num_units=30,  # 30 target values

    # optimization method:
    update=nesterov_momentum,   
    update_learning_rate=0.01,
    update_momentum=0.9,

    regression=True,  # flag to indicate we're dealing with regression problem 回归问题,而不是分类
    max_epochs=400,  # we want to train this many epochs
    verbose=1,
    )

net2 = NeuralNet(
    layers=[
        ('input', layers.InputLayer),
        ('conv1', layers.Conv2DLayer),
        ('pool1', layers.MaxPool2DLayer),
        ('dropout1', layers.DropoutLayer),#
        ('conv2', layers.Conv2DLayer),
        ('pool2', layers.MaxPool2DLayer),
        ('dropout2', layers.DropoutLayer),#
        ('conv3', layers.Conv2DLayer),
        ('pool3', layers.MaxPool2DLayer),
        ('dropout3', layers.DropoutLayer),#
        ('hidden4', layers.DenseLayer),
        ('dropout4', layers.DropoutLayer),#
        ('hidden5', layers.DenseLayer),
        ('output', layers.DenseLayer),
        ],
    input_shape=(None, 1, 96, 96),
    conv1_num_filters=32, conv1_filter_size=(3, 3), pool1_pool_size=(2, 2),
    conv2_num_filters=64, conv2_filter_size=(3, 3), pool2_pool_size=(2, 2),
    conv3_num_filters=128, conv3_filter_size=(3, 3), pool3_pool_size=(2, 2),
    hidden4_num_units=500, hidden5_num_units=500,
    dropout1_p=0.1,dropout2_p=0.2,dropout3_p=0.2,dropout4_p=0.5,
    output_num_units=30, output_nonlinearity=None,

    update_learning_rate=0.01,
    update_momentum=0.9,

    regression=True,
    max_epochs=200,
    verbose=1,
    )

net3 = NeuralNet(
# three layers: one hidden layer
layers=[
('input', layers.InputLayer),
('h1', layers.DenseLayer),
('h2', layers.DenseLayer),
('output', layers.DenseLayer),
],# layer parameters:# variable batch size.
input_shape=(None, 9216), # 96x96 input pixels per batch
h1_num_units = 300, # number of units in hidden layer
h1_nonlinearity = T.tanh,

h2_num_units = 100,
h2_nonlinearity = None, # rectify,
output_nonlinearity=T.tanh,
# None if output layer wants to use identity function
output_num_units=30, # 30 target values# optimization method:
update=nesterov_momentum,
update_learning_rate=0.01,
update_momentum=0.9,regression = True, # flag to indicate we're dealing with regression problem
max_epochs = 1000, # we want to train this many epochs
eval_size = 0.1,
verbose=1
)


def train_mlp():
    x , y = load()
    net1.load_params_from('face_weight_mlp')
    net1.fit(x, y)
    net1.save_params_to('face_weight_mlp')

def test_mlp():
    net1.load_params_from('face_weight_mlp')
    x = load_testdata()
    y = np.empty(shape = (x.shape[0],30))
    y = net1.predict(x)
    #保存图片
    x = x*255
    y = y*48+48
    for i in range(len(x)):
        tmp = x[i].reshape(96,96)
        img = cv2.cvtColor(tmp,cv2.COLOR_GRAY2BGR)
        add = './predict/'+str(i)+'.Bmp'
        for j in range(15):
            point = (y[i][2*j],y[i][2*j+1])
            cv2.circle(img,point,1,(0,255,0))
        cv2.imwrite(add,img)

def train_ann():
    x , y = load()
    net1.load_params_from('face_weight_ann')
    net3.fit(x, y)
    net3.save_params_to('face_weight_ann')

def test_ann():
    net3.load_params_from('face_weight_ann')
    x = load_testdata()
    y = np.empty(shape = (x.shape[0],30))
    y = net3.predict(x)
    #保存图片
    x = x*255
    y = y*48+48
    for i in range(len(x)):
        tmp = x[i].reshape(96,96)
        img = cv2.cvtColor(tmp,cv2.COLOR_GRAY2BGR)
        add = './predict/'+str(i)+'.Bmp'
        for j in range(15):
            point = (y[i][2*j],y[i][2*j+1])
            cv2.circle(img,point,1,(0,255,0))
        cv2.imwrite(add,img)

def train_cnn():
    x , y = load()
    x = x.reshape(-1, 1, 96, 96)
    net2.load_params_from('face_weight_cnn')
    net2.fit(x, y)
    net2.save_params_to('face_weight_cnn')

def test_cnn():
    net2.load_params_from('face_weight_cnn')
    x = load_testdata()
    x = x.reshape(-1, 1, 96, 96)
    y = np.empty(shape = (x.shape[0],30))
    y = net2.predict(x)
    #保存图片
    x = x*255
    y = y*48+48
    for i in range(len(x)):
        tmp = x[i].reshape(96,96)
        img = cv2.cvtColor(tmp,cv2.COLOR_GRAY2BGR)
        add = './predict/'+str(i)+'.Bmp'
        for j in range(15):
            point = (y[i][2*j],y[i][2*j+1])
            cv2.circle(img,point,1,(0,255,0))
        cv2.imwrite(add,img)

if __name__ == '__main__':
    #train_ann()
    #test_ann()

    #train_mlp()
    #test_mlp()
    
    #train_cnn()
    test_cnn()

    print 'OK'
代码比较简单,但是调试过程中还出了不少问题,本来看了这个算法之后觉得比较简单想自己用keras写,但是发现完全不能收敛,问题非常简单,这里要解决的问题是一个回归问题,而不是分类问题。而之前用到的keras的函数也只提供了分类的接口(如果有知道如何用keras解决回归问题请指教,不胜感激)。

后来没办法只好去用lasagne,谁知道程序出来之后 报错如下:

File "/usr/local/lib/python2.7/dist-packages/theano/gradient.py", line 432, in grad
raise TypeError("cost must be a scalar.")
TypeError: cost must be a scalar.

这里有解决方法https://github.com/dnouri/kfkd-tutorial/issues/16 

主要就是这三句:

pip uninstall Lasagne
pip uninstall nolearn
pip install -r https://raw.githubusercontent.com/dnouri/kfkd-tutorial/master/requirements.txt

当时没有找到解决办法,百度不给力呀,然后就想自己解决,这个报错太坑了,专门跑去看了源码,没多大收货。最后想到的解决方法是自己去用theano写,主要要修改的地方如下:

self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)

        # symbolic description of how to compute prediction as class whose
        # probability is maximal
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        # end-snippet-1

        # parameters of the model
        self.params = [self.W, self.b]

        # keep track of model input
        self.input = input

    def negative_log_likelihood(self, y):
        """Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        .. math::

            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|}
                \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
            \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label

        Note: we use the mean instead of the sum so that
              the learning rate is less dependent on the batch size
        """
        # start-snippet-2
        # y.shape[0] is (symbolically) the number of rows in y, i.e.,
        # number of examples (call it n) in the minibatch
        # T.arange(y.shape[0]) is a symbolic vector which will contain
        # [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
        # Log-Probabilities (call it LP) with one row per example and
        # one column per class LP[T.arange(y.shape[0]),y] is a vector
        # v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
        # LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
        # the mean (across minibatch examples) of the elements in v,
        # i.e., the mean log-likelihood across the minibatch.
        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
        # end-snippet-2


这里的cost函数表示的是归类问题的趋近概率,而我们要的cost函数是表示预测值和真实值的差值,感觉用LMS( Least mean square)函数比较合适(学的少,只知道这个- -)。不过因为theano调试比较麻烦,python学的又菜,感觉调试时间太久,放弃了又回去找之前的问题。其实还是有点懒,用了这些库之后就不太想再用theano了。




你可能感兴趣的:(深度学习,卷积神经网络,人脸关键点定位)