
1. 下载代码,通过点击链接(http://yann.lecun.com/exdb/mnist/),打开页面如下图所示,下载对应MNIST手写数字识别数据集,包括训练集图像、训练集标签、测试集图像与测试集标签四个部分。下载保存于指定位置。



2.分析数据集,进行预处理,由于所提供的数据集格式为.idx3_ubyte。不方便直接进行训练,因此,如要将其转化为图片格式,通过直接读取图片像素作为训练特征。数据集预处理的程序代码为(程序文件名: analysisdataset.py):

import numpy as np

import struct

import matplotlib.pyplot as plt


# 训练集文件

train_images_idx3_ubyte_file = 'user/mnist/train-images.idx3-ubyte'

# 训练集标签文件

train_labels_idx1_ubyte_file = 'user/mnist/train-labels.idx1-ubyte'


# 测试集文件

test_images_idx3_ubyte_file = 'user /mnist/t10k-images.idx3-ubyte'

# 测试集标签文件

test_labels_idx1_ubyte_file = 'user/mnist/t10k-labels.idx1-ubyte'



def decode_idx3_ubyte(idx3_ubyte_file):



    :param idx3_ubyte_file: idx3文件路径

    :return: 数据集


    # 读取二进制数据

    bin_data = open(idx3_ubyte_file, 'rb').read()


    # 解析文件头信息,依次为魔数、图片数量、每张图片高、每张图片宽

    offset = 0

    fmt_header = '>iiii'

    magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, offset)

    print('魔数:%d, 图片数量: %d张, 图片大小: %d*%d' % (magic_number, num_images, num_rows, num_cols))


    # 解析数据集

    image_size = num_rows * num_cols

    offset += struct.calcsize(fmt_header)

    fmt_image = '>' + str(image_size) + 'B'

    images = np.empty((num_images, num_rows, num_cols))

    for i in range(num_images):

        if (i + 1) % 10000 == 0:

            print('已解析 %d' % (i + 1) + '张')

        images[i] = np.array(struct.unpack_from(fmt_image, bin_data, offset)).reshape((num_rows, num_cols))

        offset += struct.calcsize(fmt_image)

    return images



def decode_idx1_ubyte(idx1_ubyte_file):



    :param idx1_ubyte_file: idx1文件路径

    :return: 数据集


    # 读取二进制数据

    bin_data = open(idx1_ubyte_file, 'rb').read()


    # 解析文件头信息,依次为魔数和标签数

    offset = 0

    fmt_header = '>ii'

    magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset)

    print('魔数:%d, 图片数量: %d张' % (magic_number, num_images))


    # 解析数据集

    offset += struct.calcsize(fmt_header)

    fmt_image = '>B'

    labels = np.empty(num_images)

    for i in range(num_images):

        if (i + 1) % 10000 == 0:

            print('已解析 %d' % (i + 1) + '张')

        labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0]

        offset += struct.calcsize(fmt_image)

    return labels



def load_train_images(idx_ubyte_file=train_images_idx3_ubyte_file):


    TRAINING SET IMAGE FILE (train-images-idx3-ubyte):

    :param idx_ubyte_file: idx文件路径

    :return: n*row*col维np.array对象,n为图片数量


    return decode_idx3_ubyte(idx_ubyte_file)



def load_train_labels(idx_ubyte_file=train_labels_idx1_ubyte_file):


    TRAINING SET LABEL FILE (train-labels-idx1-ubyte):

    :param idx_ubyte_file: idx文件路径

    :return: n*1维np.array对象,n为图片数量


    return decode_idx1_ubyte(idx_ubyte_file)



def load_test_images(idx_ubyte_file=test_images_idx3_ubyte_file):


    TEST SET IMAGE FILE (t10k-images-idx3-ubyte):

    :param idx_ubyte_file: idx文件路径

    :return: n*row*col维np.array对象,n为图片数量


    return decode_idx3_ubyte(idx_ubyte_file)



def load_test_labels(idx_ubyte_file=test_labels_idx1_ubyte_file):


    TEST SET LABEL FILE (t10k-labels-idx1-ubyte):

    :param idx_ubyte_file: idx文件路径

    :return: n*1维np.array对象,n为图片数量


return decode_idx1_ubyte(idx_ubyte_file)




import analysisdataset

import numpy as np


def load_features():

    # extract the training and testing datasets and labels

    training_images = analysisdataset.load_train_images()

    training_labels = analysisdataset.load_train_labels()

    testing_images = analysisdataset.load_test_images()

    testing_labels = analysisdataset.load_test_labels()


    # pre-processing the mnist datasets

    num_train = training_images.shape[0]

    dimension = (training_images[0].shape[0])**2

    training_features = np.empty([num_train, dimension])

    num_test = testing_images.shape[0]

    testing_features = np.empty([num_test, dimension])

    # transform the matrix to a column vector

    for i in range(num_train):

        training_features[i, :] = training_images[i].reshape([dimension, ])

    for i in range(num_test):

        testing_features[i, :] = testing_images[i].reshape([dimension, ])

return training_features, training_labels, testing_features, testing_labels



4. 由于任务为0,1二分类任务,因此,还需要将提取好的特征中将标签为0,1的数据集特征分开,用于进一步的逻辑回归的优化问题,处理的代码为(程序文件logistregression.py):

import extraction_feature as ef

import numpy as np



# pre-processing the dataset

def extract_binary_features():

    # extraction features of dataset mnist

    training_features, training_labels, testing_features, testing_labels = ef.load_features()

    # extract the 0,1 images of mnist dataset

    bi_training_features = training_features[training_labels <= 1, :]

    bi_training_labels = training_labels[training_labels <= 1]

    bi_testing_features = testing_features[testing_labels <= 1, :]

    bi_testing_labels = testing_labels[testing_labels <= 1]

    num_training = bi_training_features.shape[0]

    num_testing = bi_testing_features.shape[0]

    # pre-process the shape of feature matrix

    bi_training_features = bi_training_features / 255

    bi_testing_features = bi_testing_features / 255

    bi_training_labels = bi_training_labels.reshape([1, num_training])

    bi_testing_labels = bi_testing_labels.reshape([1, num_testing])

    return bi_training_features.T, bi_training_labels.T,\

           bi_testing_features.T, bi_testing_labels.T


5. 提取完特征,需要进行训练逻辑回归模型,这里对逻辑回归模型的优化利用梯度下降算法。其中,模型的优化目标为:

% MathType!MTEF!2!1!+- % feaagKart1ev2aaatCvAUfeBSjuyZL2yd9gzLbvyNv2CaerbuLwBLn % hiov2DGi1BTfMBaeXatLxBI9gBaerbd9wDYLwzYbItLDharqqtubsr % 4rNCHbWexLMBbXgBd9gzLbvyNv2CaeHbl7mZLdGeaGqiVu0Je9sqGq % pepC0xbbL8F4rqqrFfpeea0xe9Lq-Jc9vqaqpepm0xbba9pwe9Q8fs % 0-yqaqpepae9pg0FirpepeKkFr0xfr-xfr-xb9adbaqaaeGaciGaai % aabeqaamaabaabauaakeaacaWGWbGaaiikaiaadMhadaWgaaWcbaGa % amyAaaqabaGccaGG8bGaamiEamaaBaaaleaacaWGPbaabeaakiaacU % dacaWG3bGaaiilaiaadkgacaGGPaGaeyypa0JaamyEamaaBaaaleaa % caWGPbaabeaakiaadchadaWgaaWcbaGaaGymaaqabaGccaGGOaGabm % iEayaajaWaaSbaaSqaaiaadMgaaeqaaOGaai4oaiabek7aIjaacMca % cqGHRaWkcaGGOaGaaGymaiabgkHiTiaadMhadaWgaaWcbaGaamyAaa % qabaGccaGGPaGaamiCamaaBaaaleaacaaIWaaabeaakiaacIcaceWG % 4bGbaKaadaWgaaWcbaGaamyAaaqabaGccaGG7aGaeqOSdiMaaiykaa % aa!62A0! \[p({y_i}|{x_i};w,b) = {y_i}{p_1}({\hat x_i};\beta ) + (1 - {y_i}){p_0}({\hat x_i};\beta )\]p({y_i}|{x_i};w,b) = {y_i}{p_1}({\hat x_i};\beta ) + (1 - {y_i}){p_0}({\hat x_i};\beta )

其中,{p_1}({\hat x_i};\beta ) = \frac{{{e^{{\beta ^{\rm{T}}}\hat x}}}}{{1 + {e^{{\beta ^{\rm{T}}}\hat x}}}} ,{p_0}({\hat x_i};\beta ) = \frac{1}{{1 + {e^{{\beta ^{\rm{T}}}\hat x}}}},而$\beta {\rm{ = (}}w{\rm{;}}b{\rm{)}}$,w和b分别为特征向量的权重与偏置。而\hat x = (x;1)。对以上模型利用“极大似然法”来估计w和b,对率回归模型最大化“对数似然”:

$l(w,b) = \sum\limits_{i = 1}^m {\ln p({y_i}|{x_i};w,b)} $


$l(\beta ) = \sum\limits_{i = 1}^m {( - {y_i}{\beta ^T}{{\hat x}_i} + ln(1 + {e^{{\beta ^T}{{\hat x}_i}}}))} $



def cond_pro(w_b, x_hat):


    this function calculates the conditional probability with y equals 1

    :param w_b: the combined weight and bias

    :param x_hat: expanded training data features which add ones matrix as a row

    :return: the result of conditional probability of x with y equals 1


    dim = w_b.shape[0]

    w_b = w_b.reshape([dim, ])

    x_hat = x_hat.reshape([dim, ])

    pro = np.inner(w_b, x_hat)

    e_pro = np.math.exp(pro)

    result = e_pro/(1 + e_pro)

    return result



def obj_fun(w_b, x_hat, y):


    this function calculates the result of objective function

    :param w_b: the combined weight and bias

    :param x_hat: expanded training data features which add ones matrix as a row

    :param y: training data labels

    :return: objective result


    dim, num = x_hat.shape

    l_beta = 0

    w_b = w_b.reshape([dim, ])

    for i in range(num):

        x = x_hat[:, i]

        a = np.inner(x, w_b)

        l_beta += -y[i] * a + np.math.log(1 + np.math.exp(a))

    return l_beta



def first_order(w_b, x_hat, y):


    this function get the first order derivation of objection function

    :param w_b: the combined weight and bias

    :param x_hat: expanded training data features which add ones matrix as a row

    :param y: training data labels

    :return: the first order derivation


    dim, num = x_hat.shape

    result = np.zeros([dim, 1])

    for i in range(num):

        x = x_hat[:, i].reshape([dim, 1])

        result += x * (y[i] - cond_pro(w_b, x))

    return -result



def newton_optimal(x, y, max_iter, acc):


    this is the main optimal process of learning weight and bias

    :param x: training data features

    :param y: training data labels

    :param max_iter: maximum iterations of algorithm

    :param acc: objective accuracy of algorithm

    :return: the optimal weight and bias: w_b


    dim, num = x.shape

    # initial weight and bias vector

    w_b = np.random.random([dim+1, 1])

    x_hat = np.insert(x, dim, 1, 0)


    # calculate objective result to decide it is convergence or not

    obj_result = obj_fun(w_b, x_hat, y)

    # initial the original accuracy of optimal method

    flag = 1  # mark the number of iterations


    print('---------newton optimal process-----------')

    # newton optimal method begins

    while True:

        # sec = np.mat(second_order(w_b, x_hat))

        fir = first_order(w_b, x_hat, y)

        w_b = w_b - 0.001 * fir

        new_obj_result = obj_fun(w_b, x_hat, y)

        accuracy = obj_result - new_obj_result

        obj_result = new_obj_result

        print('iteration %d: error: %f(objective: %f)' % (flag, accuracy,new_obj_result))

        flag += 1

        if (flag >= max_iter) or (accuracy <= acc):


    return w_b



def model_test(w_b, x, y):


    this function uses the learnt model to test the accuracy of logistic regression method

    :param w_b: this is learnt weight and bias

    :param x: testing data features, a column represents an instance

    :param y: testing data labels

    :return: accuracy of the model


    dim, num = x.shape

    flag = 0  # mark the right match pairs

    for i in range(num):

        x_h = x[:, i]

        w = w_b[: -1, 0]

        b = w_b[-1, 0]

        z = np.inner(w, x_h) + b

        forecast_labels = 1 / (1 + np.math.exp(-z))

        if forecast_labels > 0.5:

            forecast_labels = 1


            forecast_labels = 0

        if forecast_labels == y[i]:

            flag += 1

    accuracy = flag / num

    return accuracy



def main():

    bi_training_features, bi_training_labels, bi_testing_features, bi_testing_labels = extract_binary_features()

    maximum = 10000

    acc = 0.00001

    w_b = newton_optimal(bi_training_features, bi_training_labels, maximum, acc)

    accuracy = model_test(w_b, bi_testing_features,bi_testing_labels)

    print('model accuracy is: %f' % accuracy)



if __name__ == '__main__':



