用python加机器学习的方法批量判断图片是否有logo

爬来的10w张图片需要放到自家商城上,但其中有图片没加品牌的logo,所以需要先区分图片是否有logo,再加上logo
用python加机器学习的方法批量判断图片是否有logo_第1张图片

数据准备

import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_yes_dir = 'E:\\ml\\tr\\logo_yes\\' #训练数据-有logo 数量315张
train_no_dir = 'E:\\ml\\tr\\logo_no\\' #训练数据-无logo 数量121张
validation_yes_dir = 'E:\\ml\\v\\yes_v\\' #验证数据-有logo 数量64张
validation_no_dir = 'E:\\ml\\v\\no_v\\' #验证数据-无logo 数量47张
# 归一化
train_image_generator = ImageDataGenerator(rescale=1./255)
validation_image_generator = ImageDataGenerator(rescale=1./255)
# 训练设置
batch_size = 32 #样本集
epochs = 15 # 步数
IMG_HEIGHT = 32 # 图片数据的高
IMG_WIDTH = 32 # 图片数据的宽
# 载入图片进容器
train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
                                                           directory='E:\\ml\\tr\\',
                                                           color_mode='rgb', # rgb格式,这里用灰度格式更好,但后面会出错,就还是rgb吧
                                                           shuffle=True,
                                                           target_size=(IMG_WIDTH, IMG_HEIGHT),
                                                           class_mode='binary')# 二分类模式
val_data_gen = validation_image_generator.flow_from_directory(batch_size=batch_size,
                                                              directory='E:\\ml\\v\\',
                                                              color_mode='rgb',
                                                              shuffle=True,
                                                              target_size=(IMG_WIDTH,IMG_HEIGHT),
                                                              class_mode='binary')

可视化训练图像

sample_training_images, _ = next(train_data_gen)
def plotImages(images_arr):
    fig, axes = plt.subplots(1, 5, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip( images_arr, axes):
        ax.imshow(img, plt.cm.gray)
        ax.axis('off')
    plt.tight_layout()
    plt.show()
plotImages(sample_training_images[:5])

用python加机器学习的方法批量判断图片是否有logo_第2张图片

建立模型

# Conv2D(16个神经元, 3通道, padding='same'卷积时自动用0补充边缘, activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH ,3))
model = Sequential([
    Conv2D(16, 3, padding='same', activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH ,3)),
    MaxPooling2D(),
    Conv2D(32, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Conv2D(64, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid') # 二分类输出层用sigmoid
])
model.compile(optimizer='RMSprop',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy']) # 准确率作为二分类回归指标
model.summary()

用python加机器学习的方法批量判断图片是否有logo_第3张图片

history = model.fit_generator(
    train_data_gen,
    steps_per_epoch=312 // batch_size,
    epochs=epochs,
    validation_data=val_data_gen,
    validation_steps=100 // batch_size
)

用python加机器学习的方法批量判断图片是否有logo_第4张图片

训练过程查看

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss=history.history['loss']
val_loss=history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

用python加机器学习的方法批量判断图片是否有logo_第5张图片
无异常

预测并保存结果至excel

### E:\\pyrunning5\\为待预测图片文件夹
from openpyxl import load_workbook
filename = r'C:\\Users\\Administrator\\Desktop\\空白.xlsx'
wb = load_workbook(filename)
wb.guess_types = True   #猜测格式类型
ws = wb.active


import os
n = 0
imglist = os.listdir('E:\\pyrunning5\\')
for i in imglist:
    print(n)
    n += 1
    file = 'E:\\pyrunning5\\' + i
    if predict(model, file, (IMG_HEIGHT, IMG_WIDTH))[0] < 0.5: # 预测值小于0.5表示无logo
        Image.open(file).save('E:\\running\\'+'0000_'+i)
        ws.cell(n, 1, i)
        ws.cell(n, 2, 'no_logo')
    else:
        Image.open(file).save('E:\\running\\'+'1111_'+i)
        ws.cell(n, 1, i)
        ws.cell(n, 2, 'yes_logo')
wb.save('yuce_logo.xlsx')

最后再用PIL给图片加上logo就ok了

你可能感兴趣的:(数据分析学习,机器学习,分类算法)