1.下载数据集
原数据集:
链接:https://pan.baidu.com/s/19dTr...
提取码:sngl
数据集70多G,太大,加工成.npz文件
链接:https://pan.baidu.com/s/1CBjP...
提取码:ranb
2. 加工代码
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import cv2
import matplotlib.pyplot as plt
def standard_width_height_scaling(real_w, real_h, bbox0, bbox1, bbox2, bbox3):
x1 = int(int(bbox0) * (float(real_w) / 224)) # bbox[0]
y1 = int(int(bbox1) * (float(real_h) / 224)) # bbox[1]
w1 = int(int(bbox2) * (float(real_w) / 224)) # bbox[2]
h1 = int(int(bbox3) * (float(real_h) / 224)) # bbox[3]
return x1, y1, w1, h1
def get_area_bbox_indices(bound_box_path, real_w, real_h):
bound_box_read = open(bound_box_path, "r")
bound_box_indices = list()
for i in bound_box_read:
bound_box_indices.append(i)
bbox = bound_box_indices[0].split()
x1, y1, w1, h1 = standard_width_height_scaling(real_w, real_h,
bbox[0], bbox[1], bbox[2], bbox[3])
return x1, y1, w1, h1
def get_ratio_bbox_and_image(full_img_path, bound_box_path):
img = cv2.imread(full_img_path)
real_h, real_w, _ = img.shape
area_image = real_h * real_w
x1, y1, w1, h1 = get_area_bbox_indices(bound_box_path, real_w, real_h)
area_bbox = w1 * h1
return area_bbox / area_image
def get_padding_bbox_indices(x1, y1, w1, h1, real_w, real_h, ratio_bbox_and_image):
x1_padding = x1 - int((w1) * (1 + ratio_bbox_and_image))
y1_padding = y1 - int((h1) * (1 + ratio_bbox_and_image))
w1_padding = w1 + int((w1) * (1 + ratio_bbox_and_image))
h1_padding = h1 + int((h1) * (1 + ratio_bbox_and_image))
if x1_padding < 0:
x1_padding = 0
if y1_padding < 0:
y1_padding = 0
if w1_padding > real_w:
w1_padding = real_w
if h1_padding > real_h:
h1_padding = real_h
return x1_padding, y1_padding, w1_padding, h1_padding
def read_crop_img_with_bbox(full_img_path, bound_box_path):
img = cv2.imread(full_img_path)
real_w = img.shape[1]
real_h = img.shape[0]
x1, y1, w1, h1 = get_area_bbox_indices(bound_box_path, real_w, real_h)
return x1, y1, w1, h1, img, real_w, real_h
original_cropped_storage = []
padding_cropped_storage = []
img_names = []
original_cropped_labels = []
padding_cropped_labels = []
count_live = 0
count_spoof = 0
dim = (100, 100)
count_limit_live = 5000
count_limit_spoof = 5000
rootdir_train = r'E:\mldata\CelebA_Spoof_zip\CelebA_Spoof\Data\train'
for file in os.listdir(rootdir_train):
d = os.path.join(rootdir_train, file)
print(d)
if os.path.isdir(d):
for e in os.listdir(d):
imgs_path = d + '/' + e + '/'
for img_path in os.listdir(imgs_path):
if (img_path.endswith(".jpg")):
full_img_path = imgs_path + img_path
bound_box_path = full_img_path[0:-4] + '_BB.txt'
x1, y1, w1, h1, img, real_w, real_h = read_crop_img_with_bbox(full_img_path, bound_box_path)
ratio_bbox_and_image = get_ratio_bbox_and_image(full_img_path, bound_box_path)
x1_padding, y1_padding, w1_padding, h1_padding = get_padding_bbox_indices(x1, y1, w1, h1,
real_w, real_h,
ratio_bbox_and_image)
padding_img = img[y1_padding:y1 + h1_padding, x1_padding:x1 + w1_padding]
try:
if (e == 'live' and count_live >= count_limit_live) or (
e == 'spoof' and count_spoof >= count_limit_spoof):
continue
resized_padding_img = cv2.resize(padding_img, dim, interpolation=cv2.INTER_AREA)
padding_cropped_storage.append(resized_padding_img)
if e == 'live':
count_live = count_live + 1
padding_cropped_labels.append(1)
elif e == 'spoof':
count_spoof = count_spoof + 1
padding_cropped_labels.append(0)
except:
continue
img_names.append(img_path)
if (count_live == count_limit_live and e == 'live') or (
count_spoof == count_limit_spoof and e == 'spoof'):
break
if count_live >= count_limit_live and count_spoof >= count_limit_spoof:
break
if count_live >= count_limit_live and count_spoof >= count_limit_spoof:
print("生成完成 ")
break
X = np.asarray(padding_cropped_storage)
y = np.asarray(padding_cropped_labels)
np.savez('../data/spoofing_data.npz', X, y)
print("生成npz完成")
3.创建模型
def fit_model():
anti_spoofing_data = np.load('../data/anti_spoofing_data.npz')
X, y = anti_spoofing_data['arr_0'], anti_spoofing_data['arr_1']
check_live_label = 0
check_spoof_label = 0
for i in y:
if i == 1:
check_live_label += 1
elif i == 0:
check_spoof_label += 1
print(
f"There are 2 classes including number of live is {check_live_label} and number of spoof is {check_spoof_label}")
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_valid, y_valid, test_size=0.5, random_state=42)
print(f'Training dataset size of X_train: {len(X_train)}')
print(f'Training : {X_train.shape}')
print(f'Testing dataset size of X_test: {len(X_test)}')
print(f'Validation dataset size of X_valid: {len(X_valid)}')
print(f'Testing dataset size of y_train: {len(y_train)}')
print(f'Testing dataset size of y_test: {len(y_test)}')
print(f'Testing dataset size of y_valid: {len(y_valid)}')
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(100, 100, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(2))
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=10,
validation_data=(X_valid, y_valid))
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(test_loss, test_acc)
model.save('./model/live_model.h5')
4.训练结果
Epoch 1/10
219/219 [==============================] - 45s 203ms/step - loss: 8.3599 - accuracy: 0.7795 - val_loss: 0.2634 - val_accuracy: 0.9073
Epoch 2/10
219/219 [==============================] - 45s 203ms/step - loss: 0.1445 - accuracy: 0.9432 - val_loss: 0.2837 - val_accuracy: 0.9227
Epoch 3/10
219/219 [==============================] - 45s 205ms/step - loss: 0.1026 - accuracy: 0.9586 - val_loss: 0.2011 - val_accuracy: 0.9367
Epoch 4/10
219/219 [==============================] - 45s 205ms/step - loss: 0.1586 - accuracy: 0.9619 - val_loss: 0.1472 - val_accuracy: 0.9493
Epoch 5/10
219/219 [==============================] - 45s 204ms/step - loss: 0.1232 - accuracy: 0.9554 - val_loss: 0.0931 - val_accuracy: 0.9653
Epoch 6/10
219/219 [==============================] - 46s 210ms/step - loss: 0.0491 - accuracy: 0.9832 - val_loss: 0.1221 - val_accuracy: 0.9567
Epoch 7/10
219/219 [==============================] - 47s 214ms/step - loss: 0.0468 - accuracy: 0.9823 - val_loss: 0.0854 - val_accuracy: 0.9660
Epoch 8/10
219/219 [==============================] - 45s 204ms/step - loss: 0.0254 - accuracy: 0.9933 - val_loss: 0.1062 - val_accuracy: 0.9640
Epoch 9/10
219/219 [==============================] - 45s 204ms/step - loss: 0.0291 - accuracy: 0.9901 - val_loss: 0.1596 - val_accuracy: 0.9647
Epoch 10/10
219/219 [==============================] - 45s 205ms/step - loss: 0.0254 - accuracy: 0.9923 - val_loss: 0.1037 - val_accuracy: 0.9720
47/47 - 2s - loss: 0.1117 - accuracy: 0.9653
0.11169376969337463 0.9653333425521851
5.完整代码
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
import os
import cv2
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Sequential
def fit_model():
anti_spoofing_data = np.load('../data/anti_spoofing_data.npz')
X, y = anti_spoofing_data['arr_0'], anti_spoofing_data['arr_1']
check_live_label = 0
check_spoof_label = 0
for i in y:
if i == 1:
check_live_label += 1
elif i == 0:
check_spoof_label += 1
print(
f"There are 2 classes including number of live is {check_live_label} and number of spoof is {check_spoof_label}")
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_valid, y_valid, test_size=0.5, random_state=42)
print(f'Training dataset size of X_train: {len(X_train)}')
print(f'Training : {X_train.shape}')
print(f'Testing dataset size of X_test: {len(X_test)}')
print(f'Validation dataset size of X_valid: {len(X_valid)}')
print(f'Testing dataset size of y_train: {len(y_train)}')
print(f'Testing dataset size of y_test: {len(y_test)}')
print(f'Testing dataset size of y_valid: {len(y_valid)}')
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(100, 100, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(2))
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=10,
validation_data=(X_valid, y_valid))
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(test_loss, test_acc)
model.save('./model/live_model.h5')
if __name__ == '__main__':
# fit2()
fit_model()