http://www.vlfeat.org/matconvnet/models/beta16/
2.ImageNet 1000种分类以及排列
https://github.com/sh1r0/caffe-android-demo/blob/master/app/src/main/assets/synset_words.txt
3.vgg模型
说明:
1.使用vgg19模型进行图片识别
2.下载的是.mat 格式的vgg模型
具体代码
utils.py
import numpy as np
import os
import scipy.misc
def get_img(src, img_size=False):
img = scipy.misc.imread(src, mode='RGB') # misc.imresize(, (256, 256, 3))
if not (len(img.shape) == 3 and img.shape[2] == 3):
img = np.dstack((img,img,img))
if img_size != False:
img = scipy.misc.imresize(img, img_size)
return img
def list_files(in_path):
files = []
for (dirpath, dirnames, filenames) in os.walk(in_path):
files.extend(filenames)
break
return files
def _get_files(img_dir):
files = list_files(img_dir)
return [os.path.join(img_dir,x) for x in files]
def save_img(out_path, img):
img = np.clip(img, 0, 255).astype(np.uint8)
scipy.misc.imsave(out_path, img)
vgg.py
import tensorflow as tf
import numpy as np
import scipy.io
import pdb
MEAN_PIXEL = np.array([ 123.68 , 116.779, 103.939])
def net(data_path, input_image):
layers = (
'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
'relu5_3', 'conv5_4', 'relu5_4', 'pool5',
'fc6', 'relu6',
'fc7', 'relu7',
'fc8', 'softmax' #''prob'
)
data = scipy.io.loadmat(data_path)
mean = data['normalization'][0][0][0]
mean_pixel = np.mean(mean, axis=(0, 1))
weights = data['layers'][0]
net = {}
current = input_image
for i, name in enumerate(layers):
kind = name[:4]
if kind == 'conv':
kernels, bias = weights[0][0][0][0]
# matconvnet: weights are [width, height, in_channels, out_channels]
# tensorflow: weights are [height, width, in_channels, out_channels]
kernels = np.transpose(kernels, (1, 0, 2, 3))
bias = bias.reshape(-1)
current = _conv_layer(current, kernels, bias)
elif kind == 'relu':
current = tf.nn.relu(current)
elif kind == 'pool':
current = _pool_layer(current)
elif kind == 'soft':
current = _softmax_preds(current)
kind2 = name[:2]
if kind2 == 'fc':
# print(weights)
kernels, bias = weights[0][0][0][0]
kernels = kernels.reshape(-1, kernels.shape[-1])
bias = bias.reshape(-1)
current = _fc_layer(current, kernels, bias)
net[name] = current
assert len(net) == len(layers)
return net
def _conv_layer(input, weights, bias):
conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),
padding='SAME')
return tf.nn.bias_add(conv, bias)
def _pool_layer(input):
return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
padding='SAME')
def _fc_layer(input, weights, bias):
shape = input.get_shape().as_list()
dim = 1
for d in shape[1:]:
dim *= d
x = tf.reshape(input, [-1, dim])
Wx_plus = tf.matmul(x, weights)
fc = tf.nn.bias_add(Wx_plus, bias)
return fc
def _softmax_preds(input):
preds = tf.nn.softmax(input, name="prediction")
return preds
def preprocess(image):
return image - MEAN_PIXEL
def unprocess(image):
return image + MEAN_PIXEL
vgg_classfication.py
# encoding: UTF-8
import tensorflow as tf, numpy as np
from utils import _get_files, get_img
import vgg
def _get_allClassificationName(file_path):
f = open(file_path, 'r')
lines = f.readlines()
f.close()
return lines
if __name__ == "__main__":
lines = _get_allClassificationName('synset_words.txt')
images = _get_files('testImages/')
for i, imgPath in enumerate(images):
image = get_img(imgPath, (224,224,3)).astype(np.float32)
print(imgPath)
image_pre = vgg.preprocess(image)
# image_pre = image_pre.transpose((2, 0, 1))
image_pre = np.expand_dims(image_pre, axis=0)
image_preTensor = tf.convert_to_tensor(image_pre)
image_preTensor = tf.to_float(image_preTensor)
# Test pretrained model
net = vgg.net('data/imagenet-vgg-verydeep-19.mat', image_preTensor)
preds = net['softmax']
# nIndex = np.argmax(preds)
with tf.Session() as sess:
predsSortIndex = np.argsort(-preds[0].eval())
for i in range(5):
nIndex = predsSortIndex
# print (nIndex)
classificationName = lines[nIndex]
problity = preds[0][nIndex]
print (classificationName)
print (problity.eval())
vgg_mapVisual.py
# encoding: utf-8
import tensorflow as tf, numpy as np
import os
from utils import _get_files, get_img, save_img
import vgg
if __name__ == "__main__":
image = get_img('testImages/Bird-catching-the-fly.jpg', (224, 224, 3)).astype(np.float32)
image_pre = vgg.preprocess(image)
image_pre = np.expand_dims(image_pre, axis=0)
image_preTensor = tf.convert_to_tensor(image_pre)
image_preTensor = tf.to_float(image_preTensor)
# Test pretrained model
net = vgg.net('data/imagenet-vgg-verydeep-19.mat', image_preTensor)
maps = net['pool5']
mapSums = tf.reduce_sum(maps, [0, 1, 2])
with tf.Session() as sess:
mapSumsIndex = np.argsort(-mapSums.eval())
for i in range(4):
nIndex = mapSumsIndex
mapImage = maps[0,:,:,nIndex]
sPath = "%s.png" % i
save_img(sPath, mapImage.eval())
print(nIndex)
实验结果
1 使用vgg网络识别以下图片,识别结果中最大概率5分类名称和概率值是什么?
答:
testImages/Bird-catching-the-fly.jpg
n02231487 walking stick, walkingstick, stick insect 0.103682
n01608432 kite 0.078659
n02236044 mantis, mantid 0.0750281
n01784675 centipede 0.0576766
n02226429 grasshopper, hopper 0.0568324
testImages/Crowd-of-people-008.jpg
n03598930 jigsaw puzzle 0.446802
n04435653 tile roof 0.222148
n04200800 shoe shop, shoe-shop, shoe store 0.126811
n03047690 clog, geta, patten, sabot 0.0362671
n02536864 coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch 0.0311323
testImages/German_American_Kids_Bookshelf.jpg
n02870880 bookcase 0.49299
n02978881 cassette 0.224196
n03290653 entertainment center 0.0401046
n03529860 home theater, home theatre 0.0267266
n04392985 tape player 0.0224044
2 使用A图片输入vgg, 将其最后一层卷积网络 relu maxpool 生成的特征map 按激活度排序,保存激活度最高的4张为png。
答:
pool5 层 激活度最高的4个map位置是 498、477、393、280,激活度分别为899.205、731.777、647.197、613.698。
图片尺寸大小是7*7,比较小,可下载看大图。
参考网址:
https://github.com/lengstrom/fast-style-transfer/tree/master/src
http://machinethink.net/blog/con ... iphone-with-vggnet/
https://github.com/USTCchenjl/vgg_face_gender
http://blog.csdn.net/u013473520/article/details/50730620
https://github.com/boyw165/tensorflow-vgg
http://blog.csdn.net/qq_16949707/article/details/54837376
http://blog.csdn.net/u013473520/article/details/50730620