这是5幅图,加上背景共5类。
可以参考这篇文章https://blog.csdn.net/u012426298/article/details/81232386
对于一个多类别图片数据库,每个类别都会有一个class frequency, 该类别像素数目除以数据库总像素数目, 求出所有class frequency 的median 值,除以该类别对应的frequency 得到weight:
weight=median(weights)/weights
#coding:utf-8
from __future__ import print_function
import os
import numpy as np
import cv2
w,h=512,512
def find_pic(img,array_list,n_class,pixs):
img_sum = np.sum(img == array_list, axis=-1)
pix_numbers=img_sum.reshape(-1).tolist().count(3)
if pix_numbers:
pixs+=pix_numbers
n_class+=1
return pixs,n_class
def compute_class(pixs,n_class):
return pixs/(n_class*w*h)
def frequence():
# images_path = './trainannot_visual'
images_path = './visual_example'
red = np.array([0, 0, 128])
yellow = np.array([0, 128, 128])
green = np.array([0, 128, 0])
blue = np.array([128, 0, 0])
back_gro = np.array([0, 0, 0])
images_list_path = [os.path.join(images_path,i) for i in os.listdir(images_path)]
n_red=0
red_pixs=0
n_yellow = 0
yellow_pixs = 0
n_green= 0
green_pixs = 0
n_blue = 0
blue_pixs = 0
n_back = 0
back_pixs = 0
for count,image_path in enumerate(images_list_path):
print('{}image'.format(count))
img=cv2.imread(image_path)
red_pixs, n_red=find_pic(img,red,n_red,red_pixs)
yellow_pixs,n_yellow = find_pic(img, yellow,n_yellow,yellow_pixs)
green_pixs,n_green = find_pic(img, green,n_green,green_pixs)
blue_pixs,n_blue = find_pic(img, blue,n_blue,blue_pixs)
#
back_pixs,n_back = find_pic(img, back_gro,n_back,back_pixs)
print(red_pixs, n_red)
print(yellow_pixs,n_yellow)
print(green_pixs, n_green)
print(blue_pixs, n_blue)
print(back_pixs, n_back)
f_class_red=compute_class(red_pixs,n_red)
f_class_yellow = compute_class(yellow_pixs, n_yellow)
f_class_green = compute_class(green_pixs, n_green)
f_class_blue = compute_class(blue_pixs, n_blue)
f_class_back = compute_class(back_pixs, n_back)
print(f_class_red,f_class_yellow,f_class_green,f_class_blue,f_class_back)
f_class=[f_class_red,f_class_yellow,f_class_green,f_class_blue,f_class_back]
f_class_median=np.median(np.array(f_class))
print(f_class_median)
print(f_class_median/np.array(f_class))
if __name__ == '__main__':
frequence()
这样可以保证占比小的class, 权重大于1, 占比大的class, 权重小于1, 达到balancing的效果.