import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import numpy as np
import os
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
import matplotlib.pyplot as plt
epochs = 5
batch_size=128
img_rows = None
img_cols = None
digits_in_img = 4
x_list = list()
y_list = list()
x_train = list()
y_train = list()
x_val= list()
y_val = list()
x_test = list()
y_test = list()
train_image="Task3/internet-verification-code/train_images"
test_image="Task3/internet-verification-code/test_images"
alphabetdic={"a":10,"b":11,"c":12,"d":13,"e":14,
"f":15,"g":16,"h":17,"i":18,"j":19,"k":20,"l":21,
"m":22,"n":23,"o":24,"p":25,"q":26,"r":27,"s":28,
"t":29,"u":30,"v":31,"w":32,"x":33,"y":34,"z":35}
print("alphabetdic",alphabetdic["a"])
def split_digits_in_img(img_array, x_list, y_list):
for i in range(digits_in_img):
step = img_cols // digits_in_img
x_list.append(img_array[:, i * step:(i + 1) * step] / 255)
if img_filename[i].isdigit():
y_list.append(img_filename[i])
else:
y_list.append(alphabetdic[img_filename[i]])
def split_digits_testimage(img_array, x_test,y_test ):
for j in range(digits_in_img):
step = img_cols // digits_in_img
x_test.append(img_array[:, j* step:(j + 1) * step] / 255)
if test_filename[j].isdigit():
y_test.append(test_filename[j])
else:
y_test.append(alphabetdic[test_filename[j]])
# load all img filenames
img_filenames = os.listdir(train_image)
test_filenames=os.listdir(test_image)
# load images as arrays
for img_filename in img_filenames:
if '.jpeg' not in img_filename:
continue
img = load_img(train_image+'/{0}'.format(img_filename), color_mode='grayscale')
img_array = img_to_array(img)
img_rows, img_cols, _ = img_array.shape
split_digits_in_img(img_array, x_list, y_list)
y_list = keras.utils.to_categorical(y_list, num_classes=36)
for test_filename in test_filenames:
if '.jpeg' not in test_filename:
continue
img = load_img(test_image+'/{0}'.format(test_filename ), color_mode='grayscale')
img_array = img_to_array(img)
img_rows, img_cols, _ = img_array.shape
split_digits_testimage(img_array, x_test,y_test )
y_test= keras.utils.to_categorical(y_test, num_classes=36)
# split data into training set and Validation set
x_train, x_val, y_train, y_val = train_test_split(x_list, y_list)
# model
if os.path.isfile('model/cnn_model.h5'):
# recreate the exact same model purely from the file if exist
model = models.load_model('model/cnn_model.h5')
print('Model loaded from file.')
else:
# otherwise, create a new cnn model
model = models.Sequential()
model.add(layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(img_rows, img_cols // digits_in_img, 1)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Dropout(rate=0.25))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(36, activation='softmax'))
print('New model created.')
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
# start training
model.fit(np.array(x_list), np.array(y_list), batch_size=128, epochs=epochs, verbose=1, validation_data=(np.array(x_val), np.array(y_val)))
# evaluate model
loss, accuracy = model.evaluate(np.array(x_test), np.array(y_test), verbose=0)
print('Test loss:', loss)
print('Test accuracy:', accuracy)
# save the model
model.save('model/cnn_model.h5')