【Pytorch】计算图片均值和方差

计算图片数据集的均值和方差

因为 pytorch 的 transforms.Normalize 方法需要输入数据集的每个通道的均值和标准差,而网上许多方法都是直接把整个数据集读入内存中进行计算,对于小数据集这样做问题不大,但是对于稍大一点的数据集就可能会出现 Momery Error 即内存不足的问题。故本文采用对数据集进行多次抽样分别计算每次抽样的均值和标准差,最后再求平均的方式来近似计算整个数据集的均值和标准差。

import numpy as np
import cv2
import os
from glob import glob
from tqdm import tqdm , trange
import random

def calculate(sample):
    "calculate the mean adn std of RGB channels respectively"
    ":param sample the sample of orginal dataset"

    means, stdevs = [], []
    img_list = []
    img_h, img_w = 480, 640  # set depends on your dataset

    for single_img_path in tqdm(sample,colour='green'):
        img = cv2.imread(single_img_path)
        img = cv2.resize(img, (img_w, img_h))
        img = img[:, :, :, np.newaxis]
        img_list.append(img)

    imgs = np.concatenate(img_list, axis=3)
    imgs = imgs.astype(np.float32) / 255.

    for i in range(3):
        pixels = imgs[:, :, i, :].ravel()  # 拉成一行
        means.append(np.mean(pixels))
        stdevs.append(np.std(pixels))

    # BGR --> RGB , CV读取的需要转换,PIL读取的不用转换
    means.reverse()
    stdevs.reverse()
    print("normMean = {}".format(means))
    print("normStd = {}".format(stdevs))
    return means,stdevs

if __name__ == "__main__":
    ave_mean , ave_std = [0.,0.,0.] , [0.,0.,0.]
    n = 5 #抽样次数
    sample_num = 1000  # 每次抽样数量
    TRAIN_DATASET_PATH = '../dataset' #数据集位置
    image_fns = glob(
        os.path.join(TRAIN_DATASET_PATH, '**.*'))  # get the address of all of the images in target folder and subfolder
    print('The number of the dataset: ', len(image_fns))

    for i in trange(n):
        sample = random.sample(image_fns,sample_num) #随机抽样
        mean , stdev = calculate(sample)
        ave_mean = np.sum([ave_mean,mean],axis=0)
        ave_std = np.sum([ave_std,stdev],axis=0)

   print("ave_normMean = {}".format(ave_mean[:]/n))
   print("ave_normStd = {}".format(ave_std[:]/n))
    ```

你可能感兴趣的:(Pytorch,深度学习,pytorch)