[视觉工程]以图搜图之提取特征

#!/usr/bin/env python
# coding: utf-8

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import numpy as np
from numpy.linalg import norm
import pickle
from tqdm import tqdm, tqdm_notebook
import os
import random
import time
import math
import tensorflow
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, GlobalAveragePooling2D

os.system(u'mkdir -p ../datasets')
os.system(u'curl http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz --output ../datasets/caltech101.tar.gz')
os.system(u'tar -xvzf ../datasets/caltech101.tar.gz --directory ../datasets')
os.system(u'mv ../datasets/101_ObjectCategories ../datasets/caltech101')
os.system(u'rm -rf ../datasets/caltech101/BACKGROUND_Google')

# We will define a helper function that allows us to choose any pretrained model with all the necessary details for our experiments.

# In[ ]:


def model_picker(name):
    if (name == 'vgg16'):
        model = VGG16(weights='imagenet',
                      include_top=False,
                      input_shape=(224, 224, 3),
                      pooling='max')
    elif (name == 'vgg19'):
        model = VGG19(weights='imagenet',
                      include_top=False,
                      input_shape=(224, 224, 3),
                      pooling='max')
    elif (name == 'mobilenet'):
        model = MobileNet(weights='imagenet',
                          include_top=False,
                          input_shape=(224, 224, 3),
                          pooling='max',
                          depth_multiplier=1,
                          alpha=1)
    elif (name == 'inception'):
        model = InceptionV3(weights='imagenet',
                            include_top=False,
                            input_shape=(224, 224, 3),
                            pooling='max')
    elif (name == 'resnet'):
        model = ResNet50(weights='imagenet',
                         include_top=False,
                         input_shape=(224, 224, 3),
                        pooling='max')
    elif (name == 'xception'):
        model = Xception(weights='imagenet',
                         include_top=False,
                         input_shape=(224, 224, 3),
                         pooling='max')
    else:
        print("Specified model not available")
    return model


# Now, let's put our function to use.

# In[ ]:


model_architecture = 'resnet'
model = model_picker(model_architecture)


# Let's define a function to extract image features given an image and a model. We developed a similar function in Chapter-2

# In[ ]:


def extract_features(img_path, model):
    input_shape = (224, 224, 3)
    img = image.load_img(img_path,
                         target_size=(input_shape[0], input_shape[1]))
    img_array = image.img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis=0)
    preprocessed_img = preprocess_input(expanded_img_array)
    features = model.predict(preprocessed_img)
    flattened_features = features.flatten()
    normalized_features = flattened_features / norm(flattened_features)
    return normalized_features


# Let's see the feature length the model generates. 

# In[ ]:


features = extract_features('../sample-images/cat.jpg', model)
print(len(features))


# Now, we will see how much time it takes to extract features of one image.

# In[ ]:


# The time taken to extract features is dependent on a few factors such as image size, computing power etc. A better benchmark would be running the network over an entire dataset. A simple change to the existing code will allow this.
# 
# Let's make a handy function to recursively get all the image files under a root directory.

# In[ ]:


extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']

def get_file_list(root_dir):
    file_list = []
    for root, directories, filenames in os.walk(root_dir):
        for filename in filenames:
            if any(ext in filename for ext in extensions):
                file_list.append(os.path.join(root, filename))
    return file_list


# Now, let's run the extraction over the entire dataset and time it.

# In[ ]:


# path to the your datasets
root_dir = '../datasets/caltech101'
filenames = sorted(get_file_list(root_dir))

feature_list = []
for i in tqdm_notebook(range(len(filenames))):
    feature_list.append(extract_features(filenames[i], model))


# Now let's try the same with the Keras Image Generator functions.

# In[ ]:


batch_size = 64
datagen = tensorflow.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=preprocess_input)

generator = datagen.flow_from_directory(root_dir,
                                        target_size=(224, 224),
                                        batch_size=batch_size,
                                        class_mode=None,
                                        shuffle=False)

num_images = len(generator.filenames)
num_epochs = int(math.ceil(num_images / batch_size))

start_time = time.time()
feature_list = []
feature_list = model.predict_generator(generator, num_epochs)
end_time = time.time()


# In[ ]:


for i, features in enumerate(feature_list):
    feature_list[i] = features / norm(features)

feature_list = feature_list.reshape(num_images, -1)

print("Num images   = ", len(generator.classes))
print("Shape of feature_list = ", feature_list.shape)
print("Time taken in sec = ", end_time - start_time)


# ### GPU Utilization's effect on time taken by varying batch size 
# 
# 
# GPUs are optimized to parallelize the feature generation process and hence will give better results when multiple images are passed instead of just one image.
# The opportunity to improve can be seen based on GPU Utilization. Low GPU Utilization indicates an opportunity to further improve the througput.
# 
# 
# GPU Utilization can be seen using the nvidia-smi command. To update it every half a second
# 
#     watch -n .5 nvidia-smi
#     
# To pool the GPU utilization every second and dump into a file
# 
#     nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -f gpu_utilization.csv -l 1
#     
# To calculate median GPU Utilization from the file generated
# 
#     sort -n gpu_utilization.csv | datamash median 1
# 
# |Model |Time second (sec) | batch_size | % GPU Utilization | Implementation|
# |-|-|-|
# |Resnet50 | 124  | 1  | 52 | extract_features    |
# |Resnet50 | 98   | 1  | 72 | ImageDataGenerator |
# |Resnet50 | 57   | 2  | 81 | ImageDataGenerator |
# |Resnet50 | 40   | 4  | 88 | ImageDataGenerator |
# |Resnet50 | 34   | 8  | 94 | ImageDataGenerator |
# |Resnet50 | 29   | 16 | 97 | ImageDataGenerator |
# |Resnet50 | 28   | 32 | 97 | ImageDataGenerator |
# |Resnet50 | 28   | 64 | 98 | ImageDataGenerator |

# ### Some benchmarks on different model architectures to see relative speeds
# 
# Keeping batch size of 64, benchmarking the different models
# 
# |Model |items/second |
# |-|-|-|
# | VGG19     | 31.06 |
# | VGG16     | 28.16 | 
# | Resnet50  | 28.48 | 
# | Inception | 20.07 |
# | Mobilenet | 13.45 |

# Let's save the features as intermediate files to use later.

# In[ ]:


filenames = [root_dir + '/' + s for s in generator.filenames]


# In[ ]:


pickle.dump(generator.classes, open('./data/class_ids-caltech101.pickle',
                                    'wb'))
pickle.dump(filenames, open('./data/filenames-caltech101.pickle', 'wb'))
pickle.dump(
    feature_list,
    open('./data/features-caltech101-' + model_architecture + '.pickle', 'wb'))


# Let's train a finetuned model as well and save the features for that as well.

# In[ ]:


TRAIN_SAMPLES = 8677
NUM_CLASSES = 101
IMG_WIDTH, IMG_HEIGHT = 224, 224


# In[ ]:


train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,
                                   rotation_range=20,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   zoom_range=0.2)


# In[ ]:


train_generator = train_datagen.flow_from_directory(root_dir,
                                                    target_size=(IMG_WIDTH,
                                                                 IMG_HEIGHT),
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    seed=12345,
                                                    class_mode='categorical')


# In[ ]:


def model_maker():
    base_model = ResNet50(include_top=False,
                           input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))
    for layer in base_model.layers[:]:
        layer.trainable = False
    input = Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3))
    custom_model = base_model(input)
    custom_model = GlobalAveragePooling2D()(custom_model)
    custom_model = Dense(64, activation='relu')(custom_model)
    custom_model = Dropout(0.5)(custom_model)
    predictions = Dense(NUM_CLASSES, activation='softmax')(custom_model)
    return Model(inputs=input, outputs=predictions)


# In[ ]:


model_finetuned = model_maker()
model_finetuned.compile(loss='categorical_crossentropy',
              optimizer=tensorflow.keras.optimizers.Adam(0.001),
              metrics=['acc'])
model_finetuned.fit_generator(
    train_generator,
    steps_per_epoch=math.ceil(float(TRAIN_SAMPLES) / batch_size),
    epochs=10)


# In[ ]:


model_finetuned.save('./data/model-finetuned.h5')


# In[ ]:


start_time = time.time()
feature_list_finetuned = []
feature_list_finetuned = model_finetuned.predict_generator(generator, num_epochs)
end_time = time.time()

for i, features_finetuned in enumerate(feature_list_finetuned):
    feature_list_finetuned[i] = features_finetuned / norm(features_finetuned)

feature_list = feature_list_finetuned.reshape(num_images, -1)

print("Num images   = ", len(generator.classes))
print("Shape of feature_list = ", feature_list.shape)
print("Time taken in sec = ", end_time - start_time)


# In[ ]:


pickle.dump(
    feature_list,
    open('./data/features-caltech101-resnet-finetuned.pickle', 'wb'))

 

你可能感兴趣的:(#,工程实战,计算机视觉,机器学习)