深度学习实践-人脸活体检测CelebA_Spoof数据集

1.下载数据集

原数据集:
链接:https://pan.baidu.com/s/19dTr...
提取码:sngl
数据集70多G,太大,加工成.npz文件
链接:https://pan.baidu.com/s/1CBjP...
提取码:ranb

2. 加工代码

import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import cv2
import matplotlib.pyplot as plt


def standard_width_height_scaling(real_w, real_h, bbox0, bbox1, bbox2, bbox3):
    x1 = int(int(bbox0) * (float(real_w) / 224))  # bbox[0]
    y1 = int(int(bbox1) * (float(real_h) / 224))  # bbox[1]
    w1 = int(int(bbox2) * (float(real_w) / 224))  # bbox[2]
    h1 = int(int(bbox3) * (float(real_h) / 224))  # bbox[3]
    return x1, y1, w1, h1


def get_area_bbox_indices(bound_box_path, real_w, real_h):
    bound_box_read = open(bound_box_path, "r")
    bound_box_indices = list()
    for i in bound_box_read:
        bound_box_indices.append(i)
    bbox = bound_box_indices[0].split()
    x1, y1, w1, h1 = standard_width_height_scaling(real_w, real_h,
                                                   bbox[0], bbox[1], bbox[2], bbox[3])
    return x1, y1, w1, h1


def get_ratio_bbox_and_image(full_img_path, bound_box_path):
    img = cv2.imread(full_img_path)
    real_h, real_w, _ = img.shape
    area_image = real_h * real_w
    x1, y1, w1, h1 = get_area_bbox_indices(bound_box_path, real_w, real_h)
    area_bbox = w1 * h1
    return area_bbox / area_image


def get_padding_bbox_indices(x1, y1, w1, h1, real_w, real_h, ratio_bbox_and_image):
    x1_padding = x1 - int((w1) * (1 + ratio_bbox_and_image))
    y1_padding = y1 - int((h1) * (1 + ratio_bbox_and_image))
    w1_padding = w1 + int((w1) * (1 + ratio_bbox_and_image))
    h1_padding = h1 + int((h1) * (1 + ratio_bbox_and_image))
    if x1_padding < 0:
        x1_padding = 0
    if y1_padding < 0:
        y1_padding = 0
    if w1_padding > real_w:
        w1_padding = real_w
    if h1_padding > real_h:
        h1_padding = real_h
    return x1_padding, y1_padding, w1_padding, h1_padding


def read_crop_img_with_bbox(full_img_path, bound_box_path):
    img = cv2.imread(full_img_path)
    real_w = img.shape[1]
    real_h = img.shape[0]
    x1, y1, w1, h1 = get_area_bbox_indices(bound_box_path, real_w, real_h)
    return x1, y1, w1, h1, img, real_w, real_h


original_cropped_storage = []
padding_cropped_storage = []
img_names = []
original_cropped_labels = []
padding_cropped_labels = []

count_live = 0
count_spoof = 0
dim = (100, 100)
count_limit_live = 5000
count_limit_spoof = 5000

rootdir_train = r'E:\mldata\CelebA_Spoof_zip\CelebA_Spoof\Data\train'
for file in os.listdir(rootdir_train):
    d = os.path.join(rootdir_train, file)
    print(d)
    if os.path.isdir(d):
        for e in os.listdir(d):
            imgs_path = d + '/' + e + '/'
            for img_path in os.listdir(imgs_path):
                if (img_path.endswith(".jpg")):
                    full_img_path = imgs_path + img_path
                    bound_box_path = full_img_path[0:-4] + '_BB.txt'
                    x1, y1, w1, h1, img, real_w, real_h = read_crop_img_with_bbox(full_img_path, bound_box_path)
                    ratio_bbox_and_image = get_ratio_bbox_and_image(full_img_path, bound_box_path)
                    x1_padding, y1_padding, w1_padding, h1_padding = get_padding_bbox_indices(x1, y1, w1, h1,
                                                                                              real_w, real_h,
                                                                                              ratio_bbox_and_image)
                    padding_img = img[y1_padding:y1 + h1_padding, x1_padding:x1 + w1_padding]
                    try:
                        if (e == 'live' and count_live >= count_limit_live) or (
                                e == 'spoof' and count_spoof >= count_limit_spoof):
                            continue
                        resized_padding_img = cv2.resize(padding_img, dim, interpolation=cv2.INTER_AREA)
                        padding_cropped_storage.append(resized_padding_img)
                        if e == 'live':
                            count_live = count_live + 1
                            padding_cropped_labels.append(1)
                        elif e == 'spoof':
                            count_spoof = count_spoof + 1
                            padding_cropped_labels.append(0)
                    except:
                        continue

                    img_names.append(img_path)

                    if (count_live == count_limit_live and e == 'live') or (
                            count_spoof == count_limit_spoof and e == 'spoof'):
                        break
            if count_live >= count_limit_live and count_spoof >= count_limit_spoof:
                break
    if count_live >= count_limit_live and count_spoof >= count_limit_spoof:
        print("生成完成 ")
        break
X = np.asarray(padding_cropped_storage)
y = np.asarray(padding_cropped_labels)
np.savez('../data/spoofing_data.npz', X, y)
print("生成npz完成")

3.创建模型

def fit_model():
    anti_spoofing_data = np.load('../data/anti_spoofing_data.npz')
    X, y = anti_spoofing_data['arr_0'], anti_spoofing_data['arr_1']
    check_live_label = 0
    check_spoof_label = 0
    for i in y:
        if i == 1:
            check_live_label += 1
        elif i == 0:
            check_spoof_label += 1
    print(
        f"There are 2 classes including number of live is {check_live_label} and number of spoof is {check_spoof_label}")

    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3, random_state=42)
    X_valid, X_test, y_valid, y_test = train_test_split(X_valid, y_valid, test_size=0.5, random_state=42)

    print(f'Training dataset size of X_train: {len(X_train)}')
    print(f'Training : {X_train.shape}')
    print(f'Testing dataset size of X_test: {len(X_test)}')
    print(f'Validation dataset size of X_valid: {len(X_valid)}')
    print(f'Testing dataset size of y_train: {len(y_train)}')
    print(f'Testing dataset size of y_test: {len(y_test)}')
    print(f'Testing dataset size of y_valid: {len(y_valid)}')

    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(100, 100, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(2))

    model.compile(optimizer='adam',
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])
    history = model.fit(X_train, y_train, epochs=10,
                        validation_data=(X_valid, y_valid))
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
    print(test_loss, test_acc)
    model.save('./model/live_model.h5')

4.训练结果

Epoch 1/10
219/219 [==============================] - 45s 203ms/step - loss: 8.3599 - accuracy: 0.7795 - val_loss: 0.2634 - val_accuracy: 0.9073
Epoch 2/10
219/219 [==============================] - 45s 203ms/step - loss: 0.1445 - accuracy: 0.9432 - val_loss: 0.2837 - val_accuracy: 0.9227
Epoch 3/10
219/219 [==============================] - 45s 205ms/step - loss: 0.1026 - accuracy: 0.9586 - val_loss: 0.2011 - val_accuracy: 0.9367
Epoch 4/10
219/219 [==============================] - 45s 205ms/step - loss: 0.1586 - accuracy: 0.9619 - val_loss: 0.1472 - val_accuracy: 0.9493
Epoch 5/10
219/219 [==============================] - 45s 204ms/step - loss: 0.1232 - accuracy: 0.9554 - val_loss: 0.0931 - val_accuracy: 0.9653
Epoch 6/10
219/219 [==============================] - 46s 210ms/step - loss: 0.0491 - accuracy: 0.9832 - val_loss: 0.1221 - val_accuracy: 0.9567
Epoch 7/10
219/219 [==============================] - 47s 214ms/step - loss: 0.0468 - accuracy: 0.9823 - val_loss: 0.0854 - val_accuracy: 0.9660
Epoch 8/10
219/219 [==============================] - 45s 204ms/step - loss: 0.0254 - accuracy: 0.9933 - val_loss: 0.1062 - val_accuracy: 0.9640
Epoch 9/10
219/219 [==============================] - 45s 204ms/step - loss: 0.0291 - accuracy: 0.9901 - val_loss: 0.1596 - val_accuracy: 0.9647
Epoch 10/10
219/219 [==============================] - 45s 205ms/step - loss: 0.0254 - accuracy: 0.9923 - val_loss: 0.1037 - val_accuracy: 0.9720
47/47 - 2s - loss: 0.1117 - accuracy: 0.9653
0.11169376969337463 0.9653333425521851

5.完整代码

from sklearn.model_selection import train_test_split
import numpy as  np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
import os
import cv2
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Sequential

def fit_model():
    anti_spoofing_data = np.load('../data/anti_spoofing_data.npz')
    X, y = anti_spoofing_data['arr_0'], anti_spoofing_data['arr_1']
    check_live_label = 0
    check_spoof_label = 0
    for i in y:
        if i == 1:
            check_live_label += 1
        elif i == 0:
            check_spoof_label += 1
    print(
        f"There are 2 classes including number of live is {check_live_label} and number of spoof is {check_spoof_label}")

    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3, random_state=42)
    X_valid, X_test, y_valid, y_test = train_test_split(X_valid, y_valid, test_size=0.5, random_state=42)

    print(f'Training dataset size of X_train: {len(X_train)}')
    print(f'Training : {X_train.shape}')
    print(f'Testing dataset size of X_test: {len(X_test)}')
    print(f'Validation dataset size of X_valid: {len(X_valid)}')
    print(f'Testing dataset size of y_train: {len(y_train)}')
    print(f'Testing dataset size of y_test: {len(y_test)}')
    print(f'Testing dataset size of y_valid: {len(y_valid)}')

    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(100, 100, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(2))

    model.compile(optimizer='adam',
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])
    history = model.fit(X_train, y_train, epochs=10,
                        validation_data=(X_valid, y_valid))
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
    print(test_loss, test_acc)
    model.save('./model/live_model.h5')

if __name__ == '__main__':
    # fit2()
    fit_model()

你可能感兴趣的:(深度学习实践-人脸活体检测CelebA_Spoof数据集)