import os
import numpy as np
import struct
import PIL.Image
import cv2
import scipy.misc
from sklearn.utils import shuffle
import tensorflow as tf
from pylab import *
tf.app.flags.DEFINE_string("checkpoint", "ckpt/", "dir of checkpoint")
tf.app.flags.DEFINE_bool("restore", False, "restore from previous checkpoint")
FLAGS = tf.app.flags.FLAGS
train_data_dir = "../trn_gnt"
test_data_dir = "../tst_gnt"
char_set = "的一是了我不人在他有这个上们来到时大地为子中你说生国年着就那和要她出也得里后自以会家可下而过天去能对小多然于心学么之都好看起发当没成只如事把还用第样道想作种开美总从无情己面最女但现前些所同日手又行意动"
print(len(char_set))
def read_from_gnt_dir(gnt_dir=train_data_dir):
def one_file(f):
header_size = 10
while True:
header = np.fromfile(f, dtype='uint8', count=header_size)
if not header.size: break
sample_size = header[0] + (header[1] << 8) + (header[2] << 16) + (header[3] << 24)
tagcode = header[5] + (header[4] << 8)
width = header[6] + (header[7] << 8)
height = header[8] + (header[9] << 8)
if header_size + width * height != sample_size:
break
image = np.fromfile(f, dtype='uint8', count=width * height).reshape((height, width))
yield image, tagcode
for file_name in os.listdir(gnt_dir):
if file_name.endswith('.gnt'):
file_path = os.path.join(gnt_dir, file_name)
with open(file_path, 'rb') as f:
for image, tagcode in one_file(f):
yield image, tagcode
def extractImge():
train_counter = 0
test_counter = 0
for image, tagcode in read_from_gnt_dir(gnt_dir=train_data_dir):
tagcode_unicode = struct.pack('>H', tagcode).decode('gb2312')
if train_counter < 1000:
im = PIL.Image.fromarray(image)
im.convert('RGB').save('images/' + tagcode_unicode + str(train_counter) + '.png')
else:
break
train_counter += 1
for image, tagcode in read_from_gnt_dir(gnt_dir=test_data_dir):
tagcode_unicode = struct.pack('>H', tagcode).decode('gb2312')
test_counter += 1
print(train_counter, test_counter)
def resize_and_normalize_image(img):
pad_size = abs(img.shape[0] - img.shape[1]) // 2
if img.shape[0] < img.shape[1]:
pad_dims = ((pad_size, pad_size), (0, 0))
else:
pad_dims = ((0, 0), (pad_size, pad_size))
img = np.lib.pad(img, pad_dims, mode='constant', constant_values=255)
img = scipy.misc.imresize(img, (64 - 4 * 2, 64 - 4 * 2))
img = np.lib.pad(img, ((4, 4), (4, 4)), mode='constant', constant_values=255)
img = img.flatten()
img = (img - 128) / 128
return img
def convert_to_one_hot(char):
vector = np.zeros(len(char_set))
vector[char_set.index(char)] = 1
return vector
train_data_x = []
train_data_y = []
train_data_count = 0
batch_size = 64
num_batch = 0
def preProcessImg(image):
if len(image.shape) == 3 or len(image.shape) == 4 :
image1 = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
image1 = image
ret, image2 = cv2.threshold(image1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
image3 = cv2.resize(image2,(64,64))
image = 1 * (image3.flatten())
image = np.asarray(image) / 255.0
return image
def load_train_data():
global train_data_x
global train_data_y
global num_batch
global train_data_count
for image, tagcode in read_from_gnt_dir(gnt_dir=train_data_dir):
tagcode_unicode = struct.pack('>H', tagcode).decode('gb2312')
if tagcode_unicode in char_set:
print(tagcode_unicode)
train_data_count += 1
train_data_x.append(resize_and_normalize_image(image))
train_data_y.append(convert_to_one_hot(tagcode_unicode))
print(np.shape(train_data_x))
print(np.shape(train_data_y))
num_batch = len(train_data_x) // batch_size
print("num_batch=", num_batch)
def shuffleData():
global train_data_x
global train_data_y
train_data_x, train_data_y = shuffle(train_data_x, train_data_y, random_state=0)
test_data_x = []
test_data_y = []
test_data_count = 0
def load_test_data():
global test_data_x
global test_data_y
global test_data_count
for image, tagcode in read_from_gnt_dir(gnt_dir=test_data_dir):
tagcode_unicode = struct.pack('>H', tagcode).decode('gb2312')
if tagcode_unicode in char_set:
test_data_count += 1
test_data_x.append(resize_and_normalize_image(image))
test_data_y.append(convert_to_one_hot(tagcode_unicode))
print(np.shape(test_data_x))
print(np.shape(test_data_y))
X = tf.placeholder(tf.float32, [None, 64 * 64])
Y = tf.placeholder(tf.float32, [None, 100])
keep_prob = tf.placeholder(tf.float32)
def chinese_hand_write_cnn():
x = tf.reshape(X, shape=[-1, 64, 64, 1])
w_c1 = tf.Variable(tf.random_normal([3, 3, 1, 32], stddev=0.01))
b_c1 = tf.Variable(tf.zeros([32]))
conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, w_c1, strides=[1, 1, 1, 1], padding='SAME'), b_c1))
conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
w_c2 = tf.Variable(tf.random_normal([3, 3, 32, 64], stddev=0.01))
b_c2 = tf.Variable(tf.zeros([64]))
conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, w_c2, strides=[1, 1, 1, 1], padding='SAME'), b_c2))
conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
"""
w_c3 = tf.Variable(tf.random_normal([3, 3, 64, 128], stddev=0.01))
b_c3 = tf.Variable(tf.zeros([128]))
conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, w_c3, strides=[1, 1, 1, 1], padding='SAME'), b_c3))
conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
conv3 = tf.nn.dropout(conv3, keep_prob)
"""
w_d = tf.Variable(tf.random_normal([16 * 16 * 64, 1024], stddev=0.01))
b_d = tf.Variable(tf.zeros([1024]))
dense = tf.reshape(conv2, [-1, w_d.get_shape().as_list()[0]])
dense = tf.nn.relu(tf.add(tf.matmul(dense, w_d), b_d))
dense = tf.nn.dropout(dense, keep_prob)
w_out = tf.Variable(tf.random_normal([1024, 100], stddev=0.01))
b_out = tf.Variable(tf.zeros([100]))
out = tf.nn.softmax(tf.add(tf.matmul(dense, w_out), b_out))
return out
lable_size = 100
input_size = 64 * 64
batch_size = 64
hidden_size = 1024
def bp_nn():
w1 = tf.Variable(tf.random_normal([input_size,hidden_size],stddev=0.1))
b1 = tf.Variable(tf.constant(0.1),[hidden_size])
hidden = tf.matmul(X,w1)+b1
hidden = tf.nn.relu(hidden)
w2 = tf.Variable(tf.random_normal([hidden_size,lable_size],stddev=0.1))
b2 = tf.Variable(tf.constant(0.1), [lable_size])
output = tf.matmul(hidden,w2) + b2
output = tf.nn.relu(output)
return output
def train_hand_write_nn():
output = chinese_hand_write_cnn()
loss = -tf.reduce_sum(Y * tf.log(tf.clip_by_value(output, 1e-15, 1.0)))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(output, 1), tf.argmax(Y, 1)), tf.float32))
tf.summary.scalar("loss", loss)
tf.summary.scalar("accuracy", accuracy)
merged_summary_op = tf.summary.merge_all()
saver = tf.train.Saver(max_to_keep=1)
max_acc = 0
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
step = 0
if FLAGS.restore:
checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint)
if checkpoint:
saver.restore(sess, checkpoint)
step += int(checkpoint.split('-')[-1])
print("step=",step)
print("Train from checkpoint")
summary_writer = tf.summary.FileWriter('./log', sess.graph)
for e in range(50):
for i in range(num_batch):
batch_x = train_data_x[i * batch_size: (i + 1) * batch_size]
batch_y = train_data_y[i * batch_size: (i + 1) * batch_size]
_, loss_, summary = sess.run([optimizer, loss, merged_summary_op],feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.5})
step = e * num_batch + i
summary_writer.add_summary(summary, step)
print(step, "loss=", loss_)
if (step) % 10 == 0:
acc = sess.run(accuracy, feed_dict={X: test_data_x[:100], Y: test_data_y[:100], keep_prob: 1.})
print(step, "accuracy=", acc)
if (acc > max_acc):
max_acc = acc
saver.save(sess, 'ckpt/nn-model.ckpt', global_step=step+1)
def predict():
return 0;
def test(path):
tst_image = cv2.imread(path)
tst_image = preProcessImg(tst_image)
print(tst_image)
with tf.Session() as sess:
output = chinese_hand_write_cnn()
predict = tf.nn.top_k(output, 10)
saver = tf.train.Saver()
saver.restore(sess=sess, save_path=tf.train.latest_checkpoint('ckpt-85/'))
value_topk, index_topk = sess.run(predict, feed_dict={X: [tst_image], keep_prob: 0.5})
index_topk = index_topk.flatten()
value_topk = value_topk.flatten()
print("value_topk:",value_topk)
print("index_topk:",index_topk)
for i in range(len(index_topk)):
print("预测汉字是: ", char_set[index_topk[i]]," 概率是:",value_topk[i])
def main():
print("main")
test('testimages/ta.jpg')
if __name__ == '__main__':
main()