笔者是一个痴迷于挖掘数据中的价值的学习人,希望在平日的工作学习中,挖掘数据的价值,找寻数据的秘密,笔者认为,数据的价值不仅仅只体现在企业中,个人也可以体会到数据的魅力,用技术力量探索行为密码,让大数据助跑每一个人,欢迎直筒们关注我的公众号,大家一起讨论数据中的那些有趣的事情。
我的公众号为:livandata
本案例主要用了CNN算法进行实现,对七种表情进行判别,具体的七种表情为:
# 0:生气;
# 1:厌恶;
# 2:害怕;
# 3:高兴;
# 4:伤心;
# 5:惊讶;
# 6:正常;
本案例使用的数据集为fer2013.csv,这一数据集有三个维度:表情类别、图片像素、训练测试的分类。
1、face_data.py:
#!/usr/bin/env python
# _*_ UTF-8 _*_
import csv
import os
# 主要是将原始数据集分成三部分:
# 其一是做训练集;
# 其二是做验证集;
# 其三是做测试集;
databases_path = 'E:/Python_workspace/1cfz/fer2013'
datasets_path = 'E:/Python_workspace/1cfz'
csv_file = os.path.join(databases_path, 'fer2013.csv')
train_csv = os.path.join(datasets_path, 'train.csv')
val_csv = os.path.join(datasets_path, 'val.csv')
test_csv = os.path.join(datasets_path, 'test.csv')
with open(csv_file) as f:
csvr = csv.reader(f)
header = next(csvr)
# 获取第一行标题
print(header)
rows = [row for row in csvr]
# row[:-1]:取出除了最后一列之外的所有列
# row[-1]:取出最后一列;
trn = [row[:-1] for row in rows if row[-1] == 'Training']
csv.writer(open(train_csv, 'w+'), lineterminator='\n').writerows([header[:-1]]+trn)
print(len(trn))
val = [row[:-1] for row in rows if row[-1] == 'PublicTest']
csv.writer(open(val_csv, 'w+'), lineterminator='\n').writerows([header[:-1]]+val)
print(len(val))
tst = [row[:-1] for row in rows if row[-1] == 'PrivateTest']
csv.writer(open(test_csv, 'w+'), lineterminator='\n').writerows([header[:-1]]+tst)
print(len(tst))
数据分类后,需要将数据恢复成照片,恢复的时候同时将数据按照表情进行分类:
Img_recover.py:
#!/usr/bin/env python
# _*_ UTF-8 _*_
import csv
import os
from PIL import Image
import numpy as np
import face_data
# 主要是将分开的三个数据集转化成图片,即将三个数据集转化成可以查看的图片
# 0:生气;
# 1:厌恶;
# 2:害怕;
# 3:高兴;
# 4:伤心;
# 5:惊讶;
# 6:正常;
datasets_path = 'E:/Python_workspace/1cfz'
# 构建流
train_csv = os.path.join(datasets_path, 'train.csv')
val_csv = os.path.join(datasets_path, 'val.csv')
test_csv = os.path.join(datasets_path, 'test.csv')
train_set = os.path.join(datasets_path, 'train')
val_set = os.path.join(datasets_path, 'val')
test_set = os.path.join(datasets_path, 'test')
for save_path, csv_file in [(train_set, train_csv), (val_set, val_csv), (test_set, test_csv)]:
if not os.path.exists(save_path):
os.makedirs(save_path)
num = 1
with open(csv_file) as f:
csvr = csv.reader(f)
print(csvr)
header = next(csvr)
for i, (label, pixel) in enumerate(csvr):
# pixel.split()将像素按照空格进行截取,截取到的元素转化成float类型,并存放在48*48的矩阵中。
pixel = np.asarray([float(p) for p in pixel.split()]).reshape(48, 48)
subfolder = os.path.join(save_path, label)
if not os.path.exists(subfolder):
os.makedirs(subfolder)
im = Image.fromarray(pixel).convert('L')
image_name = os.path.join(subfolder, '{:05d}.jpg'.format(i))
print(image_name)
im.save(image_name)
数据分类完成后,需要对数据进行分批,直接使用每一批次的数据进行训练:
train_batchs.py:
#!/usr/bin/env python
# _*_ UTF-8 _*_
import os
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import face_data
import Img_recover
from tensorflow.python.training.queue_runner_impl import start_queue_runners
# 0:生气;
# 1:厌恶;
# 2:害怕;
# 3:高兴;
# 4:伤心;
# 5:惊讶;
# 6:正常;
# 将数据细分成7类,每类又分成训练集和标签
anger_0 = []
anger_0_labels = []
disgust_1 = []
disgust_1_label = []
fear_2 = []
fear_2_label = []
happy_3 = []
happy_3_label = []
sad_4 = []
sad_4_label = []
surprised_5 = []
surprised_5_label = []
normal_6 = []
normal_6_label = []
def get_file(file_dir):
# os.listdir(file_dir+'0'):返回文件夹中包含的文件,此处返回的file是目录下面的一个个文件名,即00001.jpg
for file in os.listdir(file_dir+'0'):
# 组装0下面的文件名
anger_0.append(file_dir+'0'+'/'+file)
# 将其标签设置为0:生气;
anger_0_labels.append(0)
for file in os.listdir(file_dir+'1'):
disgust_1.append(file_dir+'1'+'/'+file)
disgust_1_label.append(1)
for file in os.listdir(file_dir+'2'):
fear_2.append(file_dir+'2'+'/'+file)
fear_2_label.append(2)
for file in os.listdir(file_dir+'3'):
happy_3.append(file_dir+'3'+'/'+file)
happy_3_label.append(3)
for file in os.listdir(file_dir+'4'):
sad_4.append(file_dir+'4'+'/'+file)
sad_4_label.append(4)
for file in os.listdir(file_dir+'5'):
surprised_5.append(file_dir+'5'+'/'+file)
surprised_5_label.append(5)
for file in os.listdir(file_dir+'6'):
normal_6.append(file_dir+'6'+'/'+file)
normal_6_label.append(6)
# np.hstack将各个元素水平叠加起来
image_list = np.hstack((anger_0,disgust_1,fear_2,happy_3,sad_4,surprised_5,normal_6))
label_list = np.hstack((anger_0_labels,disgust_1_label,fear_2_label,happy_3_label,
sad_4_label,surprised_5_label,normal_6_label))
temp = np.array([image_list, label_list])
# transpose实现矩阵转置
temp = temp.transpose()
# 对第一维行做打乱顺序操作
np.random.shuffle(temp)
# temp[:,0]:获取第一列的所有行;
# temp[:,1]:获取第二列的所有行;
all_image_list = list(temp[:,0])
all_label_list = list(temp[:,1])
all_label_list = [int(i) for i in all_label_list]
# 拼在一起做乱序,然后再将其分开,返回分类完成的数据集图片路径,以及标签
return all_image_list, all_label_list
def get_batch(image, label, image_W, image_H, batch_size, capacity):
# cast:类型转换
image = tf.cast(image, tf.string)
label = tf.cast(label, tf.int32)
# 在数据读入到cpu时需要先将数据的文件名读入到文件名队列中,当文件名队列存储完成后,另有线程将数据从文件名队列中取出,进行计算处理:
# 磁盘------》文件名队列-------》cpu
# 1)slice_input_producer:定义样本放入文件名队列的方式,包括迭代和乱序等的线程,只是定义传输方式,并未进行实际传输;
# 2)start_queue_runners:执行将数据填充到文件名队列的线程;
# 3)tf.train.batch:按照给定的顺序,把batch_size个tensor推送给文件队列列表,作为训练一个batch的数据,等待tensor出队执行计算;
input_queue = tf.train.slice_input_producer([image, label])
label = input_queue[1]
image_contents = tf.read_file(input_queue[0])
# decode_jpeg:解码jpg格式的文件
image = tf.image.decode_jpeg(image_contents, channels=1)
# resize_image_with_crop_or_pad:图像的剪裁或填充,(image_W, image_H)为需要填充的图像的大小;
image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)
# per_image_standardization:将图片的函数标准化,主要是按照正太曲线,将像素转化成正则的数据,方便神经网络的训练
image = tf.image.per_image_standardization(image)
# num_threads:执行入队操作的线程数量
# 将数据用32个线程传入到文件夹队列中。
image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads = 32, capacity=capacity)
label_batch = tf.reshape(label_batch, [batch_size])
image_batch = tf.cast(image_batch, tf.float32)
# 将图片数据进行标准化处理,然后返回其中的batch
return image_batch, label_batch
# [[0对应的图片list],[0]
# [1对应的图片list],[1]
# [2对应的图片list],[2]
# [3对应的图片list],[3]
# [4对应的图片list],[4]
# [5对应的图片list],[5]
# [6对应的图片list],[6]]
# file_path = 'E:/Python_workspace/1cfz/test/'
# all_image_list, all_label_list = get_file(file_path)
# print(all_image_list)
# print(all_label_list)
# image_batch, label_batch = get_batch(all_image_list, all_label_list, 24, 24, 100)
# print(image_batch)
# print(label_batch)
数据分批完成后,开始构建模型cnn:
face_cnn.py:
#!/usr/bin/env python
# _*_ UTF-8 _*_
import tensorflow as tf
import face_data
import Img_recover
from tensorflow.python.ops.distributions.kullback_leibler import cross_entropy
from keras.models import load_model
from keras.models import Model
from keras import backend as K
from keras.backend.common import image_dim_ordering
# 前面三个face_data, img_recover, train_batch用来进行数据的处理:
# face_data:将数据集分成三类;
# img_recover:将像素数据转换成图片,并分类;
# train_batch:将分类好的数据再进行分批处理,形成多个批次;
# 然后进行第二段:即编写训练模型;
IMAGE_SIZE = 64
MODEL_PATH = "E:/Python_workspace/1cfz/train_log/model.ckpt"
def inference(images, batch_size, n_classes, regularizer, reuse):
with tf.variable_scope('conv1', reuse = reuse) as scope:
conv1_weights = tf.get_variable("weights", shape=[3,3,1,16], dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev = 0.1, dtype=tf.float32))
conv1_biases = tf.get_variable("biases", shape=[16], dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
conv1 = tf.nn.conv2d(images, conv1_weights, strides=[1,1,1,1], padding="SAME")
pre_activation = tf.nn.bias_add(conv1, conv1_biases)
activation = tf.nn.relu(pre_activation, name=scope.name)
# 一层卷积,紧接着增加一个激活函数
with tf.variable_scope('pool2') as scope:
# 定义一个池化
pool2 = tf.nn.max_pool(activation, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME", name=scope.name)
with tf.variable_scope('fc1', reuse=reuse) as scope:
reshaped = tf.reshape(pool2, shape=[batch_size, -1])
dim = reshaped.get_shape()[1].value
fc1_weights = tf.get_variable("weights", shape=[dim, 2048], dtype = tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
if regularizer is not None:
# 将数据转化成正则矩阵, 然后将fc1_weights放入一个集合,regularizer为一个正则化函数:
tf.add_to_collection("losses", regularizer(fc1_weights))
fc1_biases = tf.get_variable("biases", shape=[2048], dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
fc1 = tf.matmul(reshaped, fc1_weights)+fc1_biases
activation = tf.nn.relu(fc1, name=scope.name)
if not reuse:
activation = tf.nn.dropout(activation, keep_prob=0.5)
with tf.variable_scope('fc2', reuse=reuse) as scope:
fc2_weights = tf.get_variable("weights", shape=[2048, 512], dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
if regularizer is not None:
tf.add_to_collection("losses", regularizer(fc2_weights))
fc2_biases = tf.get_variable("biases", shape=[512], dtype=tf.float32,
initializer=tf.constant_initializer(0.0))
fc2 = tf.matmul(activation, fc2_weights)+fc2_biases
activation = tf.nn.relu(fc2, name=scope.name)
if not reuse:
activation = tf.nn.dropout(activation, keep_prob=0.5)
with tf.variable_scope('softmax', reuse=reuse) as scope:
softmax_weights = tf.get_variable("weights", shape=[512, n_classes], dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev = 0.1, dtype = tf.float32))
softmax_biases = tf.get_variable("biases", shape=[n_classes], dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
softmax_linear = tf.add(tf.matmul(activation, softmax_weights), softmax_biases, name=scope.name)
return softmax_linear
def losses(logits, labels):
with tf.variable_scope('loss') as scope:
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name=scope.name)
loss = tf.add_n(tf.get_collection("losses"))+cross_entropy_mean
tf.summary.scalar(scope.name + '/loss', cross_entropy_mean)
return loss
def training(loss, learning_rate):
with tf.variable_scope('optimizer') as scope:
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
global_step = tf.Variable(0, trainable=False, name='global_step')
train_op = optimizer.minimize(loss, global_step=global_step, name=scope.name)
return train_op
def evaluation(logits, labels):
with tf.variable_scope('accuracy') as scope:
correct = tf.nn.in_top_k(logits, labels, 1)
correct = tf.cast(correct, tf.float16)
accuracy = tf.reduce_mean(correct)
tf.summary.scalar(scope.name+'/accuracy', accuracy)
return accuracy
def load_models(file_path = MODEL_PATH):
model = load_model(filepath)
def resize_image(image, height = IMAGE_SIZE, width = IMAGE_SIZE):
top, bottom, left, right = (0,0,0,0)
h, w = image.shape
longest_edge = max(h, w)
if h
dh = longest_edge - h
top = dh // 2
bottom = dh - top
elif w
dw = longest_edge - w
left = dw // 2
right = dw - left
else:
pass
black = [0,0,0]
constant = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value = black)
return cv2.resize(constant, (height, width))
def face_predict(image):
if K.image_dim_ordering() == 'th' and image.shape != (1, 3, IMAGE_SIZE, IMAGE_SIZE):
image = resize_image(image)
image = image.reshape((1, 3, IMAGE_SIZE, IMAGE_SIZE))
elif K.image_dim_ordering() == 'tf' and image.shape != (1, IMAGE_SIZE, IMAGE_SIZE, 3):
image = resize_image(image)
image = image.reshape((1, IMAGE_SIZE, IMAGE_SIZE, 3))
image = image.astype('float32')
image/=255
# predict_classes:用于对测试数据的分类预测;
result = model.predict_classes(image)
return result[0]
模型构建完成后,需要进行模型的训练:
face_train.py:
#!/usr/bin/env python
# _*_ UTF-8 _*_
import os
import numpy as np
import tensorflow as tf
import train_batchs
import face_cnn
from tensorflow.contrib.layers import l2_regularizer
# 模型构建完成后,需要对模型进行训练,即确定对应的参数
N_CLASSES = 7
IMG_W = 48
IMG_H = 48
TRAIN_BATCH_SIZE = 32
VALIDATION_BATCH_SIZE = 100
CAPACITY = 256
MAX_STEP = 50000
LEARNING_RATE = 0.0001
REGULARIZATION_RATE = 0.0001
train_dir = "E:/Python_workspace/1cfz/train/"
logs_train_dir = "E:/Python_workspace/1cfz/train_log/"
logs_validation_dir = "E:/Python_workspace/1cfz/val/"
# 训练集:
train, train_label = train_batchs.get_file(file_dir=train_dir)
# 验证集:
validation, validation_label = train_batchs.get_file(file_dir=logs_validation_dir)
print(validation)
# 获取训练集数据批
train_batch, train_label_batch = train_batchs.get_batch(train, train_label, IMG_W, IMG_H, TRAIN_BATCH_SIZE, CAPACITY)
# 获取验证集数据批
validation_batch, validation_label_batch = train_batchs.get_batch(validation, validation_label, IMG_W, IMG_H,
VALIDATION_BATCH_SIZE, CAPACITY)
# 使用regularizer防止过拟合,防止过拟合的两种方法:
# 1)L1、L2优化方法
# 2)dropout方法
regularizer = l2_regularizer(REGULARIZATION_RATE)
train_logits_op = face_cnn.inference(images=train_batch, batch_size=TRAIN_BATCH_SIZE, n_classes=N_CLASSES,
regularizer=regularizer, reuse = False)
validation_logits_op = face_cnn.inference(images=validation_batch, batch_size=VALIDATION_BATCH_SIZE, n_classes=N_CLASSES,
regularizer=None, reuse = True)
train_losses_op = face_cnn.losses(logits=train_logits_op, labels = train_label_batch)
validation_losses_op = face_cnn.losses(logits=validation_logits_op, labels=validation_label_batch)
# 训练完成的模型train_op,以及精确度
train_op = face_cnn.training(train_losses_op, learning_rate = LEARNING_RATE)
train_accuracy_op = face_cnn.evaluation(logits = train_logits_op, labels = train_label_batch)
validation_accuracy_op = face_cnn.evaluation(logits = validation_logits_op, labels = validation_label_batch)
# 将图形、训练过程等数据合并在一起,即将计算图融合
summary_op = tf.summary.merge_all()
with tf.Session() as sess:
# tf.summary.FileWriter:制定一个文件用来保存图
train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph, max_queue=3)
val_writer = tf.summary.FileWriter(logs_validation_dir, sess.graph, max_queue=3)
Saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
# 创建了一个线程管理器,主要是进行多线程操作时使用。
coord = tf.train.Coordinator()
# start_queue_runners:queuerunner用来启动tensor入队线程,可以启动多个线程将文件传输到文件名队列中,当文件名被读入到文件名队列中后,可以激活计算线程;
# coord:是线程协调器;
# 用线程管理器,对sess任务启动多线程,并将线程存储在threads中
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
try:
for step in np.arange(MAX_STEP):
# 是否应该终止所有线程,当文件队列中的所有文件都已经读取出列的时候,会抛出一个outofrangeError异常,这是就应该停止sess中的所有线程。
if coord.should_stop():
break
# 可以一次执行多个方法:
# _:是指保留表达式的最后一次运算结果,此处保留了train_op的最后一次运算结果。
_, train_loss, train_accuracy = sess.run([train_op, train_losses_op, train_accuracy_op])
if step%100 == 0:
print('step %d, train loss = %.2f, train accuracy = %.2f' % (step, train_loss, train_accuracy*100.0))
summary_str = sess.run(summary_op)
# 每运行一百次做一次计算图的融合,然后将其保存到train_writer中。
train_writer.add_summary(summary_str, step)
if step % 500 == 0 or (step+1) == MAX_STEP:
checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')
# 500次的时候做一次保存
Saver.save(sess, checkpoint_path, global_step=step)
if step % 2000 ==0 or (step+1) == MAX_STEP:
val_loss, val_accuracy = sess.run([validation_losses_op, validation_accuracy_op])
print('** step %d, val loss = %.2f, val accuracy = %.2f' % (step, val_loss, val_accuracy*100.0))
summary_str = sess.run(summary_op)
val_writer.add_summary(summary_str, step)
except tf.errors.OutOfRangeError:
print("Done training -- epoch limit reached")
finally:
coord.request_stop()
# train_op:是训练之后的模型;
# 导入要识别的图片,然后用上面的模型进行分类
# pred = train_op.eval(feed_dict={x:[result],keep_prob:1.0}, session=sess)
截止到上面模型已经构建完成,此时需要对验证数据和测试数据进行一下处理:
camera_face.py:
#!/usr/bin/env python
# _*_ UTF-8 _*_
import cv2
def CatchPICFromVideo(window_name, camera_idx, catch_pic_num, path_name):
cv2.namedWindow(window_name)
# 获取视频内容
cap = cv2.VideoCapture(camera_idx)
calssfier = cv2.CascadeClassifier("E:/Python_workspace/face_Recognition/fer2013/harracascade_frontalface_default.xml")
color = (0, 255, 0)
num = 0
while cap.isOpened():
# ok表示返回的状态,frame存储着图像数据矩阵,mat类型的;
ok, frame = cap.read()
if not ok:
break
# 图像灰度化:
grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 加载分类器,opencv自带的
# 能够检测出图片中的所有人脸,并将人脸用vertor保存各个人脸的坐标、大小
# grey:待检测图片;
# scaleFactor:前后两次扫描窗口中,搜索窗的比例系数,1.1为扩大10%,1.2为扩大20%;
# minNeighbors:检测目标相邻举行的最小个数;
# minSize:限定目标区域的范围;
faceRects = classfier.detectMultiScale(grey, scaleFactor = 1.2, minNeighbors = 3, minSize = (32, 32))
if len(faceRects)>0:
for faceRect in faceRects:
x, y, w, h = faceRect
# 返回的是坐标值
img_name = "%s/%d.jpg" %(path_name, num)
image = frame[y-10:y+h+10, x-10:x+w+10]
# 此处用来对图片进行检测:
faceID = face_cnn.face_predict(image)
if faceID == 0:
cv2.rectangle(frame, (x-10, y-10), (x+w+10, y+h+10), color, thickness = 2)
cv2.putText(frame, 'ME', (x+30, y+30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,255), 2)
else:
cv2.rectangle(frame, (x-10, y-10), (x+w+10, y+h+10), color, thickness = 2)
cv2.putText(frame, 'others', (x+30, y+30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,255), 2)
# 保存图片
cv2.imwrite(img_name, image, [int(cv2.IMWRITE_PNG_COMPRESSION), 9])
num += 1
if num>(catch_pic_num):
break
cv2.rectangle(frame, (x-10, y-10), (x+w+10, y+h+10), color, 2)
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(frame, 'num:%d/100' %(num), (x+30, y+30), font, 1, (255, 0, 255), 4)
if num>(catch_pic_num):
break
cv2.imshow(window_name, frame)
c = cv2.waitKey(10)
if c&0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
IdentifyFace('IdentifyFace')
另外在导入图片的时候,有可能遇到图片不符合(48*48)规格的情况,需要对图片进行修剪:
cut_face.py:
#!/usr/bin/env python
# _*_ UTF-8 _*_
import tensorflow as tf
def process():
img = tf.gfile.GFile("E:/Python_workspace/face_Recognition/pic/0.jpg").read()
with tf.Session() as sess:
img_data = tf.image.decode_jpeg(img)
resized = tf.image.resize_images(img_data, [48, 48], method=1)
image_data = sess.run(tf.image(rgb_to_grayscale(resized)))
encoded_image = tf.image.encode_jpeg(image_data)
with tf.gfile.GFile("E:/Python_workspace/face_Recognition/pic/60.jpg", "wb") as f:
f.write(encoded_image.eval())
截止到此,模型使用完毕。