上一期用了传统的模板匹配的算法对单字符进行识别,本期就来写简单的机器学习的方式,主要是SVM和CNN,入门级别,可供初学者参考,文章最后给出整个开源的代码和数据集,参考学习。
数据标签为0-9十个文件夹,里面分别包含切割出来的字符图片。
训练集和测试集按如此划分好,接下来就构建机器学习的网络模型了,首先给出SVM。
import numpy as np
from tqdm import tqdm
import glob
import os
from PIL import Image
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, train_test_split,learning_curve
from sklearn.externals import joblib
def img2vec(filename):
dataVec = np.zeros((1,800))
img = Image.open(filename)
img = img.crop((0,0,20,40))
img = np.array(img)
for i in range(40):
for j in range(20):
dataVec[0,20*i+j] = img[i][j]
return dataVec
def make_dataset(parent_dir,sub_dirs,file_ext="*.jpg"):
label = []
feature = []
for sub_dir in sub_dirs:
for fn in tqdm(glob.glob(os.path.join(parent_dir,sub_dir,file_ext))):
label_name = fn.split("\\")[-2]
label_name = label_name.split("/")[-1]
imgdata = img2vec(fn)
label.extend([label_name])
feature.extend([imgdata])
return [feature,label]
parent_dir = "./data/train"
sub_dirs = np.array(['0','1','2','3','4','5','6','7','8','9'])
temp = make_dataset(parent_dir,sub_dirs)
temp = np.array(temp)
data = temp.transpose()
# 获取特征
X = np.vstack(data[:, 0])
# 获取标签
Y = np.array(data[:, 1])
print('X的特征尺寸是:',X.shape)
print('Y的特征尺寸是:',Y.shape)
scaler = StandardScaler()
x_std = scaler.fit_transform(X) # 标准化
x_train, x_test, y_train, y_test = train_test_split(x_std, Y, test_size=.2)
# rbf核函数,设置数据权重
svc = SVC(kernel='rbf', class_weight='balanced',)
c_range = np.logspace(-5, 4, 10, base=2)
gamma_range = np.logspace(-9, 0, 10, base=2)
# 网格搜索交叉验证的参数范围,cv=3,3折交叉,n_jobs=-1,多核计算
param_grid = [{'kernel': ['rbf'], 'C': c_range, 'gamma': gamma_range}]
grid = GridSearchCV(svc, param_grid, cv=3, n_jobs=-1)
# 训练模型
clf = grid.fit(x_train, y_train)
joblib.dump(clf,"./model_save/svm.pkl")
模型保存再文件夹里,方便后续推理。
下面再给出CNN的baseline代码:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPool2D, Dropout
from tensorflow.keras.utils import to_categorical
import numpy as np
from PIL import Image
from tqdm import tqdm
import glob
import os
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib
def img2vec(filename):
dataVec = np.zeros((1,800))
img = Image.open(filename)
img = img.crop((0,0,20,40))
img = np.array(img)
for i in range(40):
for j in range(20):
dataVec[0,20*i+j] = img[i][j]
return dataVec
def make_dataset(parent_dir,sub_dirs,file_ext="*.jpg"):
label = []
feature = []
for sub_dir in sub_dirs:
for fn in tqdm(glob.glob(os.path.join(parent_dir,sub_dir,file_ext))):
label_name = fn.split("\\")[-2]
label_name = label_name.split("/")[-1]
imgdata = img2vec(fn)
label.extend([label_name])
feature.extend([imgdata])
return [feature,label]
parent_dir = "./data/train"
sub_dirs = np.array(['0','1','2','3','4','5','6','7','8','9'])
temp = make_dataset(parent_dir,sub_dirs)
temp = np.array(temp)
data = temp.transpose()
# 获取特征
X = np.vstack(data[:, 0])
# 获取标签
Y = np.array(data[:, 1])
Y = to_categorical(Y)
print('X的特征尺寸是:',X.shape)
print('Y的特征尺寸是:',Y.shape)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
model = Sequential()
x_train = x_train.reshape(-1,40,20,1)
x_test = x_test.reshape(-1,40,20,1)
# 输入的大小
input_dim = (40, 20, 1)
model.add(Conv2D(64, (3, 3), padding = "same", activation = "relu", input_shape = input_dim))# 卷积层
model.add(MaxPool2D(pool_size=(2, 2)))# 最大池化
model.add(Conv2D(256, (3, 3), padding = "same", activation = "relu")) #卷积层
model.add(MaxPool2D(pool_size=(2, 2))) # 最大池化层
model.add(Dropout(0.3))
model.add(Flatten()) # 展开
model.add(Dense(512, activation = "relu"))
model.add(Dense(10, activation = "softmax"))
# 编译模型,设置损失函数,优化方法以及评价标准
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()
# 训练模型
model.fit(x_train, y_train, epochs = 16, batch_size = 16, validation_data = (x_test, y_test))
model.save("./model_save/base_cnn_model")
同理,模型保存好,可以利用测试集测试准确率,代码如下:
from sklearn.externals import joblib
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
import numpy as np
from tqdm import tqdm
import os
import glob
from PIL import Image
def img2vec(filename):
dataVec = np.zeros((1,800))
img = Image.open(filename)
img = img.crop((0,0,20,40))
img = np.array(img)
for i in range(40):
for j in range(20):
dataVec[0,20*i+j] = img[i][j]
return dataVec
def make_dataset(parent_dir,sub_dirs,file_ext="*.jpg"):
feature = []
for sub_dir in sub_dirs:
for fn in tqdm(glob.glob(os.path.join(parent_dir,sub_dir,file_ext))):
imgdata = img2vec(fn)
feature.extend([imgdata])
return feature
def svm_pred(root_dir,sub_dirs,data="*.jpg"):
img_total = 0
acc_total = 0
clf = joblib.load("./model_save/svm.pkl")
data = make_dataset(root_dir,sub_dirs)
data = np.array(data)
X = np.vstack(data[:, 0])
scaler = StandardScaler()
x_std = scaler.fit_transform(X) # 标准化
pred = clf.predict(x_std)
k = 0
for sub_dir in sub_dirs:
acc = 0
sub_path = os.path.join(root_dir,sub_dir)
num = len(os.listdir(sub_path))
img_total+=num
for i in range(num):
if pred[k] == sub_dir:
acc+=1
k+=1
else:
k+=1
print("当前识别数字为:",sub_dir, " 识别正确个数为:",acc, "总数为:",num, "识别准确率为:",float(acc/num))
acc_total+=acc
print("训练集总数:",img_total, "正确识别总数:",acc_total, "准确率:",float(acc_total/img_total))
def cnn_pred(root_dir,sub_dirs):
model = load_model("./model_save/base_cnn_model")
img_total = 0
acc_total = 0
for sub_dir in sub_dirs:
acc = 0
sub_path = os.path.join(root_dir,sub_dir)
num = len(os.listdir(sub_path))
img_total+=num
imgfile = [os.path.join(sub_path,x) for x in os.listdir(sub_path)]
for img in imgfile:
pred_img = img2vec(img)
pred_img = pred_img.reshape(-1,40,20,1)
predlabel = np.argmax(model.predict(pred_img))
if str(predlabel) == sub_dir:
acc+=1
print("当前识别数字为:",sub_dir, " 识别正确个数为:",acc, "总数为:",num, "识别准确率为:",float(acc/num))
acc_total+=acc
print("训练集总数:", img_total, "正确识别总数:", acc_total, "准确率:", float(acc_total / img_total))
def main():
i = input("请输入识别类别1.train 2.test")
if i == '1':
root_dir = "./data/train"
sub_dirs = np.array(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
print("svm+++++++Train++++++++")
svm_pred(root_dir,sub_dirs)
print("cnn+++++++train+++++++")
cnn_pred(root_dir,sub_dirs)
if i == '2':
root_dir = "./data/test"
sub_dirs = np.array(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
print("svm+++++++TEST++++++++")
svm_pred(root_dir,sub_dirs)
print("cnn+++++++TEST+++++++")
cnn_pred(root_dir,sub_dirs)
if __name__ == "__main__":
main()
欢迎入门者参考学习!
源码资源:源码+数据集+预训练模型