首先将训练的图片和标签制作成数据集,我用的是numpy库里的savez函数,可以将numpy数组保存为.npz文件(无压缩,所以文件较大)。
import cv2 as cv
import numpy as np
import os
import glob
#调整图像的大小、制作数据集
def img_process(subpath,path_list):
print('This is',subpath,'dataset.')
img=np.zeros((len(path_list),1145,1145),dtype=np.uint8)
if subpath=='pos':
label=np.zeros(len(path_list),dtype=np.uint8)
else:
label=np.ones(len(path_list),dtype=np.uint8)
finish=0;
for img_path in path_list:
#读取灰度图
aimg = cv.imread(img_path,cv.IMREAD_GRAYSCALE)
#print('This image shape:',img.shape)
aimg=cv.resize(aimg,(1145,1145))
#aimg=aimg.reshape(1,-1)
img[finish]=aimg
finish+=1
if finish%100==0:
print('...',finish,'/',len(path_list))
print(img.shape)
#print(label.shape)
return (img,label)
def make_npz(img,label,wpath):
np.savez(wpath,trainx=img,trainy=label)
path=""
wpath=""
path_list=[]
path_list.extend(glob.glob(path+'pos/*.jpg'))
path_list2=[]
path_list2.extend(glob.glob(path+'neg/*.jpg'))
for i in range(20):
sublist=path_list[i*500:i*500+500]
#print(i,len(sublist),i*500)
i1,l1=img_process('pos',sublist)
i2,l2=img_process('neg',path_list2)
img=np.zeros((len(sublist)*2,1145,1145),dtype=np.uint8)
label=np.zeros((len(sublist)*2),dtype=np.uint8)
for j in range(len(path_list2)):
img[j*2]=i1[j]
label[j*2]=l1[j]
img[j*2+1]=i2[j]
label[j*2+1]=l2[j]
md_name='c%d.npz'%i
make_npz(img,label,wpath+md_name)
训练网络选择了alexnet,注意要根据图片大小调整网络的输入,根据分类个数调整网络输出。训练前需要把输入数据转化为float类型。因为没有测试集标注,所以训练时把训练集作为测试集输入(也可以不输入)。训练过程中出现了loss突然增大的现象,暂时还没搞明白原因。
#coding=utf-8
import keras
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense,Flatten,Dropout
from keras.layers.convolutional import Conv2D,MaxPooling2D
from keras.utils.np_utils import to_categorical
import numpy as np
from keras.callbacks import TensorBoard
import os
#os.environ["CUDA_VISIBLE_DEVICES"] = "4,5"
seed = 7
np.random.seed(seed)
num_classes=2
img_rows=1145
img_cols=1145
path=''
model = Sequential()
model.add(Conv2D(3,(15,15),strides=(5,5),input_shape=(1145,1145,1),padding='valid',activation='relu',kernel_initializer='uniform')) #新加层
model.add(Conv2D(96,(11,11),strides=(4,4),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(3,3),strides=(2,2)))
model.add(Conv2D(256,(5,5),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(3,3),strides=(2,2)))
model.add(Conv2D(384,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(384,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(256,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(3,3),strides=(2,2)))
model.add(Flatten())
model.add(Dense(4096,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4096,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1000,activation='relu'))
model.add(Dense(num_classes,activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])
model.summary()
for i in range(20):
md_name='c%d.npz'%i
t=np.load(path+md_name);
img=t['trainx']
label=t['trainy']
if K.image_data_format() == 'channels_first':
img=img.reshape(img.shape[0],1,img_rows,img_cols)
else:
img=img.reshape(img.shape[0],img_rows,img_cols,1)
img=img.astype('float32')
img/=255.0
label=keras.utils.to_categorical(label,num_classes)
print('>>>>>>>>>begin... i=',i+1,'shape =',img.shape,label.shape)
#开始训练
model.fit(img,label,batch_size=128,epochs=20,validation_data=(img,label))
score=model.evaluate(img,label)
print(' Test loss:',score[0])
print(' Test accuracy:',score[1])
测试只需要加载训练模型,调用predict函数即可完成,注意输出的结果表示为one-hot类型,需要转化。
from keras.models import load_model
from keras import backend as K
import cv2 as cv
import numpy as np
import os
import glob
img_rows=1145
img_cols=1145
num_classes=2
#调整图像的大小、制作数据集
def img_process(path_list):
img=np.zeros((len(path_list),1145,1145),dtype=np.uint8)
finish=0;
for img_path in path_list:
#读取灰度图
aimg=cv.imread(img_path,cv.IMREAD_GRAYSCALE)
aimg=cv.resize(aimg,(1145,1145))
img[finish]=aimg
finish+=1
if finish%100==0:
print('...',finish,'/',len(path_list))
return img
path=' '
path_list=[]
for i in range(2000):
path_list.append(path+'%d.jpg'%(i+1))
img=img_process(path_list)
model=load_model('model.h5')
if K.image_data_format() == 'channels_first':
img = img.reshape(img.shape[0], 1, img_rows, img_cols)
else:
img = img.reshape(img.shape[0], img_rows, img_cols, 1)
print('>>>>>>>>>>>>Read image finished!>>>>>>>>>>>>>')
pre=model.predict(img)
print(pre.shape,type(pre[0][0]))
f=open('op.txt','w')
f2=open('op2.txt','w')
for i in range(img.shape[0]):
f.write("%d.jpg %d\r"%(i+1,1-np.argmax(pre[i])))
f2.write("%d.jpg %0.3f %0.3f \r" % (i+1,pre[i][0],pre[i][1]))
继续改进中……