声明:版权所有,转载请注明出处,谢谢!!!
地址:https://blog.csdn.net/a781751136/article/details/79725922
仅供参考,错误欢迎指正!!!
1.相信大家都已经在其他地方找到了cs231n的翻译课程,邻近算法原理和KNN自己看其他人博客,我就直接进入正题,解决操作上遇到的问题!!!
2.数据的下载http://www.cs.toronto.edu/~kriz/cifar.html,进入一个后下来下载python数据集。
3.解压数据放在Python的下路径里面。
4.重点来了,一般小白同学,刚开始学cs231n时候读取数据不会,python下没有读取数据的函数,这就需要去下载,但是下载下来是python2.x的代码,必须做简单的调整才可以在python3.6.4上运行,进而读取数据!下面我直接给出代码,保存为py文件,就可以python编译器里面运行。
# -*- coding: utf-8 -*-
"""
Created on Tue Mar 27 16:03:04 2018
@author: 78175
"""
import pickle as pickle #2.x的版本是cpickle,这里从别人那里copy是有问题的,3.x版本是pickle
import numpy as np
import os
from scipy.misc import imread
def load_CIFAR_batch(filename):
""" load single batch of cifar """
with open(filename, 'rb') as f:
datadict = pickle.load(f,encoding='iso-8859-1')#encoding='iso-8859-1这个在2.x版本中不需要,3.x中必须需要,会出编码问题,也不需要懂,先copy
X = datadict['data']
Y = datadict['labels']
X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
Y = np.array(Y)
return X, Y
def load_CIFAR10(ROOT):#这是root是文件夹的根目录,在python工作空间里面,把数据集解压放在工作目录下。
""" load all of cifar """
xs = []
ys = []
for b in range(1,6):
f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
X, Y = load_CIFAR_batch(f)
xs.append(X)
ys.append(Y)
Xtr = np.concatenate(xs)
Ytr = np.concatenate(ys)
del X, Y
Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
return Xtr, Ytr, Xte, Yte
def load_tiny_imagenet(path, dtype=np.float32):
"""
Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and
TinyImageNet-200 have the same directory structure, so this can be used
to load any of them.
Inputs:
- path: String giving path to the directory to load.
- dtype: numpy datatype used to load the data.
Returns: A tuple of
- class_names: A list where class_names[i] is a list of strings giving the
WordNet names for class i in the loaded dataset.
- X_train: (N_tr, 3, 64, 64) array of training images
- y_train: (N_tr,) array of training labels
- X_val: (N_val, 3, 64, 64) array of validation images
- y_val: (N_val,) array of validation labels
- X_test: (N_test, 3, 64, 64) array of testing images.
- y_test: (N_test,) array of test labels; if test labels are not available
(such as in student code) then y_test will be None.
"""
# First load wnids
with open(os.path.join(path, 'wnids.txt'), 'r') as f:
wnids = [x.strip() for x in f]
# Map wnids to integer labels
wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}
# Use words.txt to get names for each class
with open(os.path.join(path, 'words.txt'), 'r') as f:
wnid_to_words = dict(line.split('\t') for line in f)
for wnid, words in wnid_to_words.iteritems():
wnid_to_words[wnid] = [w.strip() for w in words.split(',')]
class_names = [wnid_to_words[wnid] for wnid in wnids]
# Next load training data.
X_train = []
y_train = []
for i, wnid in enumerate(wnids):
if (i + 1) % 20 == 0:
print ('loading training data for synset %d / %d' % (i + 1, len(wnids)))
# To figure out the filenames we need to open the boxes file
boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid)
with open(boxes_file, 'r') as f:
filenames = [x.split('\t')[0] for x in f]
num_images = len(filenames)
X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype)
y_train_block = wnid_to_label[wnid] * np.ones(num_images, dtype=np.int64)
for j, img_file in enumerate(filenames):
img_file = os.path.join(path, 'train', wnid, 'images', img_file)
img = imread(img_file)
if img.ndim == 2:
## grayscale file
img.shape = (64, 64, 1)
X_train_block[j] = img.transpose(2, 0, 1)
X_train.append(X_train_block)
y_train.append(y_train_block)
# We need to concatenate all training data
X_train = np.concatenate(X_train, axis=0)
y_train = np.concatenate(y_train, axis=0)
# Next load validation data
with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f:
img_files = []
val_wnids = []
for line in f:
img_file, wnid = line.split('\t')[:2]
img_files.append(img_file)
val_wnids.append(wnid)
num_val = len(img_files)
y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids])
X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype)
for i, img_file in enumerate(img_files):
img_file = os.path.join(path, 'val', 'images', img_file)
img = imread(img_file)
if img.ndim == 2:
img.shape = (64, 64, 1)
X_val[i] = img.transpose(2, 0, 1)
# Next load test images
# Students won't have test labels, so we need to iterate over files in the
# images directory.
img_files = os.listdir(os.path.join(path, 'test', 'images'))
X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype)
for i, img_file in enumerate(img_files):
img_file = os.path.join(path, 'test', 'images', img_file)
img = imread(img_file)
if img.ndim == 2:
img.shape = (64, 64, 1)
X_test[i] = img.transpose(2, 0, 1)
y_test = None
y_test_file = os.path.join(path, 'test', 'test_annotations.txt')
if os.path.isfile(y_test_file):
with open(y_test_file, 'r') as f:
img_file_to_wnid = {}
for line in f:
line = line.split('\t')
img_file_to_wnid[line[0]] = line[1]
y_test = [wnid_to_label[img_file_to_wnid[img_file]] for img_file in img_files]
y_test = np.array(y_test)
return class_names, X_train, y_train, X_val, y_val, X_test, y_test
def load_models(models_dir):
"""
Load saved models from disk. This will attempt to unpickle all files in a
directory; any files that give errors on unpickling (such as README.txt) will
be skipped.
Inputs:
- models_dir: String giving the path to a directory containing model files.
Each model file is a pickled dictionary with a 'model' field.
Returns:
A dictionary mapping model file names to models.
"""
models = {}
for model_file in os.listdir(models_dir):
with open(os.path.join(models_dir, model_file), 'rb') as f:
try:
models[model_file] = pickle.load(f)['model']
except pickle.UnpicklingError:
continue
return models
5.上面一长串代码,其实不用懂,只是读数据的函数,不影响后面的操作。
6.接下来是读取代码
Xtr, Ytr, Xte, Yte = load_CIFAR10('cifar-10-python/cifar-10-batches-py/')#这个位置一定是数据集文件夹,不是文件,understand
Xtr_rows = Xtr.reshape(Xtr.shape[0], 32 * 32 *3) # 这是训练集,这重新组成了一个新矩阵50000*3072,解释下3072是指图像每个像素下的每个颜色都表示一个特征32*32*3
Xte_rows = Xte.reshape(Xte.shape[0], 32 * 32 *3)#测试集,1000*32*32*3
7.开始训练
nn = NearestNeighbor() # 创建一个邻近算法对象
nn.train(Xtr_rows, Ytr) # 训练样本
Yte_predict = nn.predict(Xte_rows) # 预测集
print ('accuracy: %f' % ( np.mean(Yte_predict == Yte) ))# 平均分类精度计算
8.训练函数NearestNeighbor上面7所用到的函数
class NearestNeighbor(object):
def __init__(self):
pass
def train(self, X, y):
""" X is N x D where each row is an example. Y is 1-dimension of size N """
# the nearest neighbor classifier simply remembers all the training data
self.Xtr = X
self.ytr = y
def predict(self, X):
""" X is N x D where each row is an example we wish to predict label for """
num_test = X.shape[0]#获取输入样本数量
# lets make sure that the output type matches the input type
Ypred = np.zeros(num_test, dtype = self.ytr.dtype)
# loop over all test rows
for i in range(num_test):
# find the nearest training image to the i'th test image
# using the L1 distance (sum of absolute value differences)
distances = np.sum(np.abs(self.Xtr - X[i,:]), axis = 1)#计算距离,训练集每个图像特征与样本集特征的距离
min_index = np.argmin(distances) # argmin取最小值坐标
Ypred[i] = self.ytr[min_index] # 预测
return Ypred
距离选择:计算向量间的距离有很多种方法,另一个常用的方法是L2距离,从几何学的角度,可以理解为它在计算两个向量间的欧式距离。L2距离的公式如下:
换句话说,我们依旧是在计算像素间的差值,只是先求其平方,然后把这些平方全部加起来,最后对这个和开方。在Numpy中,我们只需要替换上面代码中的1行代码就行:
distances = np.sqrt(np.sum(np.square(self.Xtr - X[i,:]), axis = 1))
注意在这里使用了np.sqrt,但是在实际中可能不用。因为求平方根函数是一个单调函数,它对不同距离的绝对值求平方根虽然改变了数值大小,但依然保持了不同距离大小的顺序。所以用不用它,都能够对像素差异的大小进行正确比较。如果你在CIFAR-10上面跑这个模型,正确率是35.4%,比刚才低了一点。
L1和L2比较。比较这两个度量方式是挺有意思的。在面对两个向量之间的差异时,L2比L1更加不能容忍这些差异。也就是说,相对于1个巨大的差异,L2距离更倾向于接受多个中等程度的差异。L1和L2都是在p-norm常用的特殊形式。
9.KNN训练,KNearestNeighbor类,直接上代码
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 28 09:49:26 2018
@author: 78175
"""
import numpy as np
class KNearestNeighbor(object):#首先是定义一个处理KNN的类
""" a kNN classifier with L2 distance """
def __init__(self):
pass
def train(self, X, y):
"""
Train the classifier. For k-nearest neighbors this is just
memorizing the training data.
Inputs:
- X: A numpy array of shape (num_train, D) containing the training data
consisting of num_train samples each of dimension D.
- y: A numpy array of shape (N,) containing the training labels, where
y[i] is the label for X[i].
"""
self.X_train = X
self.y_train = y
def predict(self, X, k=1, num_loops=0):
"""
Predict labels for test data using this classifier.
Inputs:
- X: A numpy array of shape (num_test, D) containing test data consisting
of num_test samples each of dimension D.
- k: The number of nearest neighbors that vote for the predicted labels.
- num_loops: Determines which implementation to use to compute distances
between training points and testing points.
Returns:
- y: A numpy array of shape (num_test,) containing predicted labels for the
test data, where y[i] is the predicted label for the test point X[i].
"""
if num_loops == 0:#选择三种不同计算距离的方法
dists = self.compute_distances_no_loops(X)
elif num_loops == 1:
dists = self.compute_distances_one_loop(X)
elif num_loops == 2:
dists = self.compute_distances_two_loops(X)
else:
raise ValueError('Invalid value %d for num_loops' % num_loops)
return self.predict_labels(dists, k=k)
def compute_distances_two_loops(self, X):#两个循环
"""
Compute the distance between each test point in X and each training point
in self.X_train using a nested loop over both the training data and the
test data.
Inputs:
- X: A numpy array of shape (num_test, D) containing test data.
Returns:
- dists: A numpy array of shape (num_test, num_train) where dists[i, j]
is the Euclidean distance between the ith test point and the jth training
point.
"""
num_test = X.shape[0]
num_train = self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
for i in range(num_test):
for j in range(num_train):
dists[i][j] = np.sqrt(np.sum(np.square(self.X_train[j,:] - X[i,:])))
#####################################################################
# TODO: #
# Compute the l2 distance between the ith test point and the jth #
# training point, and store the result in dists[i, j]. You should #
# not use a loop over dimension. #
#####################################################################
#####################################################################
# END OF YOUR CODE #
#####################################################################
return dists
def compute_distances_one_loop(self, X):
"""
Compute the distance between each test point in X and each training point
in self.X_train using a single loop over the test data.
Input / Output: Same as compute_distances_two_loops
"""
num_test = X.shape[0]
num_train = self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
for i in range(num_test):
#######################################################################
# TODO: #
# Compute the l2 distance between the ith test point and all training #
# points, and store the result in dists[i, :]. #
#######################################################################
dists[i,:] = np.sqrt(np.sum(np.square(self.X_train-X[i,:]),axis = 1))
#######################################################################
# END OF YOUR CODE #
#######################################################################
return dists
def compute_distances_no_loops(self, X):#没用循环完全用的矩阵运行,可能会出现,Memory error,建议把编译器关了重新操作,是之前占用的内存太多,用64位的尽量
"""
Compute the distance between each test point in X and each training point
in self.X_train using no explicit loops.
Input / Output: Same as compute_distances_two_loops
"""
num_test = X.shape[0]
num_train = self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
#########################################################################
# TODO: #
# Compute the l2 distance between all test points and all training #
# points without using any explicit loops, and store the result in #
# dists. #
# #
# You should implement this function using only basic array operations; #
# in particular you should not use functions from scipy. #
# #
# HINT: Try to formulate the l2 distance using matrix multiplication #
# and two broadcast sums. #
#########################################################################
dists = np.multiply(np.dot(X,self.X_train.T),-2)
sq1 = np.sum(np.square(X),axis=1,keepdims = True) #保持二维特性
sq2 = np.sum(np.square(self.X_train),axis=1)
dists = np.add(dists,sq1)
dists = np.add(dists,sq2)
dists = np.sqrt(dists)
#########################################################################
# END OF YOUR CODE #
#########################################################################
return dists
def predict_labels(self, dists, k=1):
"""
Given a matrix of distances between test points and training points,
predict a label for each test point.
Inputs:
- dists: A numpy array of shape (num_test, num_train) where dists[i, j]
gives the distance betwen the ith test point and the jth training point.
Returns:
- y: A numpy array of shape (num_test,) containing predicted labels for the
test data, where y[i] is the predicted label for the test point X[i].
"""
num_test = dists.shape[0]
y_pred = np.zeros(num_test)
for i in range(num_test):
# A list of length k storing the labels of the k nearest neighbors to
# the ith test point.
closest_y = []
#########################################################################
# TODO: #
# Use the distance matrix to find the k nearest neighbors of the ith #
# training point, and use self.y_train to find the labels of these #
# neighbors. Store these labels in closest_y. #
# Hint: Look up the function numpy.argsort. #
#########################################################################
closest_y = self.y_train[np.argsort(dists[i,:])[:k]]
#排序argsort,把排序后的位置返回为一个向量,
#后面的[:,k]取前k个值 #并且前K个值数对应y_train里面位置上,
#并取出值这些位置上的值,这些值代表了K个位置上的标签,给closest_y #########
################################################################
#########################################################################
# TODO: #
# Now that you have found the labels of the k nearest neighbors, you #
# need to find the most common label in the list closest_y of labels. #
# Store this label in y_pred[i]. Break ties by choosing the smaller #
# label. #
#########################################################################
y_pred[i] = np.argmax(np.bincount(closest_y))
# #统计closest_y,bincount就是统计函数,统计每个数出现个数####
# 返回最大个数所在的位置,这个位置的数值就是标签,标签就是预测的结果
# END OF YOUR CODE #
#########################################################################
return y_pred
10.上面代码直接复制用,对于小白来说,难点我已经中文备注,解释,不清楚的自行百度下函数用法,下面KNN训练。
Xval_rows = Xtr_rows[:1000, :] # take first 1000 for validation
Yval = Ytr[:1000]
Xtr_rows = Xtr_rows[1000:, :] # keep last 49,000 for train
Ytr = Ytr[1000:]#分成了49000个训练集1000验证集
validation_accuracies = []
for k in [1, 3, 5, 10, 20, 50, 100]:
# use a particular value of k and evaluation
#on validation data
nn = KNearestNeighbor()
nn.train(Xtr_rows, Ytr)
# here we assume a modified NearestNeighbor
#class that can take a k as input
Yval_predict = nn.predict(Xval_rows, k = k)
acc = np.mean(Yval_predict == Yval)
print ('accuracy: %f' % (acc,))
# keep track of what works on the validation
#set
validation_accuracies.append((k, acc))
结语:以上代码我已经完全调通可以直接用python3.6.4测试,可能这个过程都是现成的,但是小白会遇到很多问题,对比网上其他资料,主要就是直接从数据读取代码,到读取到训练,小白会反映不过来的。我也只是把该改的改了,运行通了,把我认为需要加强理解的地方备注了,大牛勿喷!!!
第一次写博客,坚持!!!