因为这次的煤矸识别程序是在之前的猫狗识别程序上直接改的,所以猫狗识别的程序没有完整的了,因为数据集已经被改为煤、矸石的数据集了,下面讲详细介绍我实战煤矸识别的整个流程。
我这次采用的是经典网络模型中的VGG19 Resnet50 Inception三种,可以直接通过程序在网上下载预训练模型,也就是网络结构中的参数都是被训练好的,只需下载下来就行。
下载网络模型代码如下:
import torch.nn as nn
import torchvision.models as models
class feature_net(nn.Module):
def __init__(self, model, dim, n_classes):
super(feature_net, self).__init__()
# if model == 'vgg':
# vgg = models.vgg19(pretrained=True)
# self.feature = nn.Sequential(*list(vgg.children())[:-1])
# self.feature.add_module('global average', nn.AvgPool2d(3))
#选择resnet50网络模型
if model == 'resnet50':
resnet = models.resnet50(pretrained=True)
self.feature = nn.Sequential(*list(resnet.children())[:-1])
# # 选择inceptionv3网络模型
# if model == 'inceptionv3':
# inception = models.inception_v3(pretrained=True)
# self.feature = nn.Sequential(*list(inception.children())[:-1])
# self.feature._modules.pop('13')
# self.feature.add_module('global average', nn.AvgPool2d(18))
self.classifier = nn.Sequential(
nn.Linear(dim, 4096),
nn.ReLU(True),
nn.Dropout(0.5),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(0.5),
nn.Linear(4096, n_classes)
)
def forward(self, x):
x = self.feature(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
# model = feature_net('vgg19',10,2)
model = feature_net('resnet50',10,2) #model='resnet50',dim=10,n_classes=2
# model = feature_net('inceptionv3',10,2) #model='resnet50',dim=10,n_classes=2
#print(model)
上述代码放置在一个新的py文件(network.py)下
因为之前在学校获取的煤、矸石的图片是有限的,所以需要一个图片数据集的扩充。之前利用opencv做过单张图片的增强处理,所以这次还是用opencv进行批量(整个文件夹)增强。
步骤如下:
代码如下:
import os.path
import glob
import cv2
def convertjpg(jpgfile, outdir, width=320, height=256):
src1 = cv2.imread(jpgfile, cv2.IMREAD_ANYCOLOR)
src2 = cv2.imread(jpgfile, cv2.IMREAD_ANYCOLOR)
# rows, cols, channel = src.shape[:3]
try:
#1.裁剪320*256
dst = cv2.resize(src, (width, height), interpolation=cv2.INTER_CUBIC)
# #2.围绕中心旋转180°
# M = cv2.getRotationMatrix2D((cols / 2, rows / 2), 180, 1)
# dst = cv2.warpAffine(src, M, (cols, rows))
# #3.围绕中心旋转90°
# M = cv2.getRotationMatrix2D((cols / 2, rows / 2), 90, 1)
# dst = cv2.warpAffine(src, M, (cols, rows))
# #4.绕x轴翻转
# dst = cv2.flip(src, 0)
# #5.调整对比度
# dst = cv2.addWeighted(src1, 0.5, src2, 0.2, 1)
# #6.裁剪图片
# dst = scr1[50:200, 100:220]
#7.平移
M = np.float32([[1, 0, 200], [0, 1, 200]])
dst = cv2.warpAffine(src1, M, (src1.shape[1], src1.shape[0]))
##保存图片
cv2.imwrite(os.path.join(outdir, os.path.basename(jpgfile)), dst) #os.path.basename(jpgfile)返回jpgfile的文件名
except Exception as e:
print(e)
for jpgfile in glob.glob(r'D:\图像\gangue-cai4\*.jpg'): ##获取指定目录下的所有图片
convertjpg(jpgfile, r'D:\图像\gangue-ping')
增强后的部分文件夹:此次使用了程序注释中的7种增强方法,另外还有是先裁剪后旋转,或者先裁剪后平移等混合方法增强。
增强后选取煤、矸石各1000张煤的图片放到coal-test gangue-test两个文件夹下。等待下一步命名处理
因为这是自己的数据集,所以命名比较乱,因此要命名成像猫狗数据集一样的方式。
猫狗命名方式如下:
自己的煤矸数据集命名如下:coal/gangue.+有序数字.+jpg
命名程序代码为:
import os
path_name = r'D:\dogs_vs_cats1\train2\coal'
#path_name :表示你需要批量改的文件夹
i = 0 #起始编号
for item in os.listdir(path_name): # 进入到文件夹内,对每个文件进行循环遍历
os.rename(os.path.join(path_name, item),
os.path.join(path_name, ('coal.'+ str(i) + '.jpg'))) # os.path.join(path_name,item)表示找到每个文件的绝对路径并进行拼接操作
i += 1
上面已经完成了图片的增强,得到了命名好的煤、矸石共2000张图片,下面要对这2000张图片进行预处理。
在硬盘中建立一个项目(coal-gangue),然后在项目下建立一个名称为datanet的空文件夹,接着把刚命名好的2000(coal.0.jpg-coal.999.jpg)张图片复制到此文件夹下。最后在datanet同等文件夹下创建train、val两个空文件夹,用来存放训练、测试集。然后在这两个文件夹里分别再创建两个空文件夹:coal、gangue。如下图所示:
下面将通过运行程序把2000张图片按照90%、10%随机划分为训练样本、测试样本。
800张煤的训练样本被放在了/D:coal-gangue>train>coal文件下; 同样,800张矸石的训练样本被放在了/D:coal-gangue>train>gangue文件下。
200张煤的测试样本被放在了/D:coal-gangue>val>coal文件下。200张矸石的测试样本被放在了/D:coal-gangue>val>gangue文件下。
def redistribution():
data_file = os.listdir('D:/dogs_vs_cats1/dataset1') #os.listdir():返回指定目录下的所有文件名:即图片名。
#将图片名为和cats的图片分别取出来,存为两个list
coals_file = list(filter(lambda x: x[:4] == 'coal', data_file))
gangues_file = list(filter(lambda x: x[:6] == 'gangue', data_file))
print(len(coals_file)) #刚开始是5000,移动完后变为0
print(len(gangues_file))
data_root = 'D:/dogs_vs_cats1/'
train_root = 'D:/dogs_vs_cats1/train2'
val_root = 'D:/dogs_vs_cats1/val2'
for i in range(len(gangues_file)):
image_path = data_root + 'dataset1/' + gangues_file[i]
if i < len(gangues_file) * 0.9:
new_path = train_root + '/gangue/' + gangues_file[i]
else:
new_path = val_root + '/gangue/' + gangues_file[i]
shutil.move(image_path, new_path) #把90%的s图片从'Dataset/'移动到'D:/dogs_vs_cats1/train//'
for i in range(len(coals_file)):
image_path = data_root + 'dataset1/' +coals_file[i]
if i < len(coals_file) * 0.9:
new_path = train_root + '/coal/' +coals_file[i]
else:
new_path = val_root + '/coal/' +coals_file[i]
shutil.move(image_path, new_path) #把10%的s图片从'Dataset/'移动到'D:/dogs_vs_cats1/val/cats /'
if __name__ == '__main__':
redistribution()
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import os #用来打开文件夹
import shutil #用来移动图片的库
def redistribution():
data_file = os.listdir('D:/dogs_vs_cats1/dataset1') #os.listdir():返回指定目录下的所有文件名:即图片名。
#将图片名为和cats的图片分别取出来,存为两个list
coals_file = list(filter(lambda x: x[:4] == 'coal', data_file))
gangues_file = list(filter(lambda x: x[:6] == 'gangue', data_file))
print(len(coals_file)) #刚开始是5000,移动完后变为0
print(len(gangues_file))
data_root = 'D:/dogs_vs_cats1/'
train_root = 'D:/dogs_vs_cats1/train2'
val_root = 'D:/dogs_vs_cats1/val2'
for i in range(len(gangues_file)):
image_path = data_root + 'dataset1/' + gangues_file[i]
if i < len(gangues_file) * 0.9:
new_path = train_root + '/gangue/' + gangues_file[i]
else:
new_path = val_root + '/gangue/' + gangues_file[i]
shutil.move(image_path, new_path) #把90%的s图片从'Dataset/'移动到'D:/dogs_vs_cats1/train//'
for i in range(len(coals_file)):
image_path = data_root + 'dataset1/' +coals_file[i]
if i < len(coals_file) * 0.9:
new_path = train_root + '/coal/' +coals_file[i]
else:
new_path = val_root + '/coal/' +coals_file[i]
shutil.move(image_path, new_path) #把10%的s图片从'Dataset/'移动到'D:/dogs_vs_cats1/val/cats /'
if __name__ == '__main__':
redistribution()
##3.训练
import torch
from torch.autograd import Variable
import torchvision
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import os
import time
import argparse #
from tensorboardX import SummaryWriter
from network import feature_net
# 参数设置
total_epoch =10
parser = argparse.ArgumentParser(description='cifar10')
parser.add_argument('--dataset1_dir', default='D:\dogs_vs_cats1')
parser.add_argument('--checkpoint_dir', default='./checkpoint')
parser.add_argument('--record_dir', default='./record')
parser.add_argument('--log_dir', default='./log')
# parser.add_argument('--model', default='vgg', help='model for training')
parser.add_argument('--model', default='resnet50', help='model for training')
#parser.add_argument('--model', default='inceptionv3', help='model for training')
parser.add_argument('--outf', default='./model', help='folder to output images and model checkpoints') # 输出结果保存路径
parser.add_argument('--pre_model', default=False, help='use pre-model') # 恢复训练时的模型路径
args = parser.parse_args()
# print('args:',args) #(CenterCropSize=224, batch_size=4, checkpoint_dir='./checkpoint', dataset_dir='D:\\dogs_vs_cats1',
# log_dir='./log', model='vgg', outf='./model', pre_epoch=0, pre_model=False, record_dir='./record', total_epoch=1)
# 定义使用上述参数的模型
model = args.model
# 使用gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 图片导入
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder #ImageFolder:数据加载器
path = args.dataset1_dir #dataset_dir='D:\\dogs_vs_cats1'
#图片预处理
transform = transforms.Compose([transforms.CenterCrop(224), #图像裁剪成:224*224*3 CenterCropSize=224 从图片的中间区域进行裁剪
transforms.ToTensor(), #将图片转换为Tensor,归一化至[0,1];convert a PIL image to tensor (H*W*C) in range [0,255] to a torch.Tensor(C*H*W) in the range [0,1]
transforms.Normalize((0.5,), (0.5,))])
#找到训练集、测试集的路径,并依次遍历图片
data_image = {x: ImageFolder(root=os.path.join(path, x), #在指定的root路径下面寻找图片 /os.path.join:连接两个或更多的路径名组件
transform=transform)
for x in ["train2", "val2"]} #把"train2", "val2"添加到路径'D:\\dogs_vs_cats1'下
# 打印训练、验证集
print("train data set:", len(data_image["train2"]))
print("val data set:", len(data_image["val2"]))
# train_dataset = torchvision.datasets.ImageFolder(root='D:\dogs_vs_cats1\train2', transform=data_transform)
# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=0)
#
# val_dataset = torchvision.datasets.ImageFolder(root='D:\dogs_vs_cats1\val2', transform=data_transform)
# val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=16, shuffle=True, num_workers=0)
# print("train data set:", len(train_dataset))
# print("val data set:", len(val_dataset))
#装载图片
dataloader_image = {x: DataLoader(dataset=data_image[x],
batch_size=16 , #batch_size=16:一次装载的图片个数,大小不能超过GPU的内存
shuffle=True)
for x in ["train2", "val2"]}
classes = data_image["train2"].classes # 按文件夹名字分类 ['', 'cats']
classes_index = data_image["train2"].class_to_idx # 文件夹类名所对应的label {'': 0, 'cats': 1}
print(classes)
print(classes_index)
#遍历16张训练图片和其对应的label
image_train2, label_train2 = next(iter(dataloader_image["train2"])) #iter()函数获取这些可迭代对象的迭代器,使⽤next()函数来获取下⼀条数据
print([classes[i] for i in label_train2]) # 打印16张图像中对应的label_train,也就是图像的类型
#显示图片
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]
img = torchvision.utils.make_grid(image_train2) # 把batch_size(16)张的图片拼成一个图片
# print(img.shape)#[3, 228, 906]#拼接后图像的形状
img = img.numpy().transpose((1, 2, 0)) # 本来是(0,1,2),相当于把第一维变为第三维,其他两维前移
# print(img.shape) #(228,996,3)
img = img * std + mean # (228, 906, 3)范围由(-1, 1)变成(0, 1)
plt.imshow(img) # mshow能显示数据归一化到0到1的图像
plt.show()
# 分类器工厂
classifier_factory = {
'vgg': 25088,
'resnet50': 2048,
'inceptionv3': 2048
}
# 构建网络
use_model = feature_net(model, dim=classifier_factory[args.model], n_classes=2)
for parma in use_model.feature.parameters():
parma.requires_grad = False #参数不需要计算梯度
for index, parma in enumerate(use_model.classifier.parameters()):
print(index)
if index == 6:
parma.requires_grad = True ##参数需要计算梯度
use_model = use_model.to(device)
# 损失函数和优化器
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(use_model.classifier.parameters()) #适应性动量估计法
if __name__ == '__main__':
#没有就创建checkpoint文件夹
if not os.path.exists(args.checkpoint_dir + '/' + args.model):
os.makedirs(args.checkpoint_dir + '/' + args.model)
print("Checkpoint directory has been created:", str(args.checkpoint_dir + '/' + args.model))
else:
print("Checkpoint directory is already existed:", str(args.checkpoint_dir + '/' + args.model))
if not os.path.exists(args.outf + '/' + args.model):
os.makedirs(args.outf + '/' + args.model)
print("Model directory has been created:", str(args.outf + '/' + args.model))
else:
print("Model directory is already existed:", str(args.outf + '/' + args.model))
# total_epoch =10
writer = SummaryWriter(log_dir=args.log_dir + '/' + args.model)
print("Start Training, {}...".format(model))
record_path = args.record_dir
acc_path = args.record_dir + '/' + args.model + '/acc.txt'
log_path = args.record_dir + '/' + args.model + '/log.txt'
if not os.path.exists(args.record_dir + '/' + args.model):
os.makedirs(args.record_dir + '/' + args.model)
print("acc.txt and log.txt will be recorded into:", str(args.record_dir + '/' + args.model))
else:
print("record directory is already existed:", str(args.record_dir + '/' + args.model))
with open(acc_path, "w") as acc_f:
with open(log_path, "w") as log_f:
start_time = time.time()
for epoch in range(0, total_epoch):
print("epoch{}/{}:".format(epoch, total_epoch))
# print("Train Epoch: {}".format(epoch))
# print("-" * 10)
# 开始训练
use_model.train()
# print(use_model)
# 初始化
sum_loss = 0.0
accuracy = 0.0
total = 0
best_test_acc = 0
for i, (image, label) in enumerate(dataloader_image["train2"]): #i= 0 1.........
# print(i)
image, label = image.to(device), label.to(device)
label_prediction = use_model(image) #计算预测输出结果
_, prediction = torch.max(label_prediction.data, 1) #找出tensor中每一行中的最大值,以及其所在位置
# print(total) #16 32 48 64 80 总共训练的图片数
current_loss = loss(label_prediction, label) #计算目前的损失
sum_loss += current_loss.item() # 计算总损失
accuracy += torch.sum(prediction == label.data)
# accuracy += (label_prediction == label).sum()
total += label.size(0)
current_loss.backward() #误差反向传递
optimizer.step() #优化器参数更新
optimizer.zero_grad()
if total % 5 == 0: #每训练5组16张的图片(即80张),输出一次训练结果
print("total {}, train loss:{:.4f}, train accuracy:{:.4f}".format(
total, sum_loss / total, 100 * accuracy / total))
# 写入日志
log_f.write("total {}, train loss:{:.4f}, train accuracy:{:.4f}".format(
total, sum_loss / total, 100 * accuracy / total))
log_f.write('\n')
log_f.flush()
# 写入tensorboard
writer.add_scalar('loss/train', sum_loss / (i + 1), epoch)
writer.add_scalar('accuracy/train', 100. * accuracy / total, epoch)
# 每一个epoch测试准确率
print("Waiting for test...")
# 在上下文环境中切断梯度计算,在此模式下,每一步的计算结果中requires_grad都是False,即使input设置为requires_grad=True
with torch.no_grad():
accuracy = 0
total = 0
for image, label in dataloader_image["val2"]:
use_model.eval() # 告诉我们的网络,这个阶段是用来测试的,于是模型的参数在该阶段不进行更新。
image, label = Variable(image.to(device)), Variable(label.to(device))
label_prediction = use_model(image)
_, prediction = torch.max(label_prediction.data, 1)
total += label.size(0)
accuracy += torch.sum(prediction == label.data)
# 输出测试准确率
print('测试准确率为: %.3f%%' % (100 * accuracy / total))
acc = 100. * accuracy / total
# 写入tensorboard
writer.add_scalar('accuracy/test', acc, epoch)
# 将测试结果写入文件
model_path = args.outf + '/' + args.model + '/net_%3d.pth' % (epoch + 1)
torch.save(use_model.state_dict(), model_path)
print("Model has been saved in:", model_path)
acc_f.write("epoch = %03d, accuracy = %.3f%%" % (epoch + 1, acc))
acc_f.write('\n')
acc_f.flush()
writer.close()
end_time = time.time() - start_time
print("training time is:{:.0f}m {:.0f}s".format(end_time // 60, end_time % 60))
writer.close()
另外再加第1节中的network.py文件代码。
运行结果的截图暂时没有找到,电脑也快要没电了,,,后续会补充运行结果的可视化。
(by dxz 2020 06 02 17:21)