通常,事件A在事件B(发生)的条件下的概率,与事件B在事件A的条件下的概率是不一样的;然而,这两者是有确定的关系,贝叶斯法则就是这种关系的陈述。
作为一个规范的原理,贝叶斯法则对于所有概率的解释是有效的;然而,频率主义者和贝叶斯主义者对于在应用中概率如何被赋值有着不同的看法:频率主义者根据随机事件发生的频率,或者总体样本里面的个数来赋值概率;贝叶斯主义者要根据未知的命题来赋值概率。一个结果就是,贝叶斯主义者有更多的机会使用贝叶斯法则。
贝叶斯法则是关于随机事件A和B的条件概率和边缘概率的。
其中P(A|B)是在B发生的情况下A发生的可能性。为完备事件组,即
在贝叶斯法则中,每个名词都有约定俗成的名称:
Pr(A)是A的先验概率或边缘概率。之所以称为"先验"是因为它不考虑任何B方面的因素。
Pr(A|B)是已知B发生后A的条件概率,也由于得自B的取值而被称作A的后验概率。
Pr(B|A)是已知A发生后B的条件概率,也由于得自A的取值而被称作B的后验概率。
Pr(B)是B的先验概率或边缘概率,也作标准化常量(normalized constant)。
按这些术语,Bayes法则可表述为:
后验概率 = (似然度 * 先验概率)/标准化常量 也就是说,后验概率与先验概率和似然度的乘积成正比。
另外,比例Pr(B|A)/Pr(B)也有时被称作标准似然度(standardised likelihood),Bayes法则可表述为:
后验概率 = 标准似然度 * 先验概率。 [1]
对于变量有二个以上的情况,贝式定理亦成立。例如:
这个式子可以由套用多次二个变量的贝氏定理及条件机率的定义导出。
本文使用数据集是Sort_1000pics,并对数据进行了分类,使用VGG对分类后的数据集特征提取,对得到的数据集使用贝叶斯原理进行训练识别。
数据集分类代码
import os
import shutil
import glob
def mycopyfile(srcfile, dstpath, numlist): # 复制函数
if not os.path.isfile(srcfile):
print("%s not exist!" % (srcfile))
else:
fpath, fname = os.path.split(srcfile) # 分离文件名和路径
num = str.split(fname, '.')[0]
if not os.path.exists(dstpath + fname):
if numlist <= int(num) <= numlist + 99:
shutil.copyfile(srcfile, dstpath + fname)
print("copy %s -> %s" % (srcfile, dstpath + fname))
else:
print("{}.{} file exist!".format(str.split(fname, '.')[0], str.split(fname, '.')[1]))
dirlist = ['People', 'Architecture', 'Beach', 'BigTrucks', 'Dinosaurs', 'Elephant', 'Flowers', 'Food', 'Horse',
'Mountain']
numlist = [0, 200, 100, 300, 400, 500, 600, 900, 700, 800]
dirnumdict = dict(zip(dirlist, numlist))
print(dirnumdict)
src_dir = os.getcwd() + '/dataset1/'
if os.path.isdir(src_dir):
print(True)
src_file_list = glob.glob(src_dir + '*')
for eachdir in dirlist:
dst_dir = os.getcwd() + '/dataset/{}/'.format(eachdir)
for srcfile in src_file_list:
mycopyfile(srcfile, dst_dir, dirnumdict[eachdir])
VGG特征提取代码
import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import models, transforms
from PIL import Image
import numpy as np
import os, glob
import csv
dir_root = './dataset/'
features_dir = './dataset/feature/' # Resnet_features_train
class Encoder(nn.Module):
def __init__(self):
super(Encoder, self).__init__()
VGG = models.vgg16(pretrained=True)
self.feature = VGG.features
self.classifier = nn.Sequential(*list(VGG.classifier.children())[:-3])
pretrained_dict = VGG.state_dict()
model_dict = self.classifier.state_dict()
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
model_dict.update(pretrained_dict)
self.classifier.load_state_dict(model_dict)
def forward(self, x):
output = self.feature(x)
output = output.view(output.size(0), -1)
output = self.classifier(output)
return output
model = Encoder()
#model = model.cuda()
def extractor(img_path, saved_path, net, use_gpu=False):
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor()]
)
img = Image.open(img_path)
img = transform(img)
print(img.shape)
x = Variable(torch.unsqueeze(img, dim=0).float(), requires_grad=False)
print(x.shape)
if use_gpu:
x = x.cuda()
net = net.cuda()
y = net(x).cpu()
y = torch.squeeze(y)
y = y.data.numpy()
feature_numpy = y
print(y.shape)
np.savetxt(saved_path, y, delimiter=',')
return feature_numpy
if __name__ == '__main__':
extensions = ['jpg', 'jpeg', 'JPG', 'JPEG']
mkdictten = {}
print(os.listdir(dir_root))
feature_list = []
lable_name = []
for eachfilename in os.listdir(dir_root):
if eachfilename != 'feature':
data_dir = './dataset/{}'.format(eachfilename) # train
files_list = []
x = os.walk(data_dir)
for path, d, filelist in x:
for filename in filelist:
file_glob = os.path.join(path, filename)
files_list.extend(glob.glob(file_glob))
print(files_list)
use_gpu = torch.cuda.is_available()
for x_path in files_list:
file_name = x_path.split('\\')[-1].split('.')[0]
file_feature_dir = features_dir+x_path.split('/')[-1].split('\\')[0]
print(file_name)
if not os.path.exists(file_feature_dir):
os.makedirs(file_feature_dir)
fx_path = file_feature_dir + '/' + file_name + '.txt'
if not os.path.exists(fx_path):
feature_numpy = extractor(x_path, fx_path, model, use_gpu)
feature_list.append(list(feature_numpy))
lable_name.append(eachfilename)
else:
print("{} file exist!".format(fx_path))
with open('labelset.csv','w') as f:
f_csv = csv.writer(f)
#f_csv.writerow(feature_list)
f_csv.writerow(lable_name)
with open('dataset.csv', 'w') as f:
f_csv = csv.writer(f)
f_csv.writerows(feature_list)
贝叶斯图像分类识别代码
from sklearn.model_selection import train_test_split #对数据集进行切分
from sklearn.naive_bayes import GaussianNB # 朴素贝叶斯网络模型
import csv
X = []
with open('dataset.csv', 'r') as f:
reader = csv.reader(f)
for row in reader:
float_row = []
if len(row):
for each in row:
float_row.append(float(each))
X.append(float_row)
with open('labelset.csv','r') as f:
f_csv = csv.reader(f)
rows = [row for row in f_csv]
Y = rows[0]
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
#将数据集按照 训练集比测试集为8:2的比例随机拆分数据集
clf = GaussianNB() # 建立朴素贝叶斯网络
clf.fit(X_train, y_train) # 带入训练集训练模型
pre = clf.predict(X_test) # 预测测试集中特征矩阵所属的类别
print('预测集标签')
print(y_test)
print('预测结果')
print(pre)
num = 0
for i in range(0,len(pre)):
if y_test[i] == pre[i]:
num += 1
print(num/len(pre))
with open('result.csv','w') as f:
f_csv = csv.writer(f)
#f_csv.writerow(feature_list)
f_csv.writerow(['预测试集标签'])
f_csv.writerow(y_test)
f_csv.writerow(['预测结果标签'])
f_csv.writerow(pre)
f_csv.writerow(['精度:{}'.format(num/len(pre))])
测试结果:
预测试集标签 | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
People | Mountain | BigTrucks | Flowers | Food | People | Architecture | BigTrucks | Dinosaurs | Horse | Elephant | Flowers | Horse | BigTrucks | Flowers | Architecture | People | Beach | Architecture | Architecture | Architecture | Elephant | Dinosaurs | Dinosaurs | Mountain | Horse | Architecture | Beach | Horse | Architecture | Mountain | Horse | Food | Horse | Elephant | Horse | People | Dinosaurs | People | BigTrucks | BigTrucks | People | Food | Food | BigTrucks | Horse | Mountain | Beach | Dinosaurs | Flowers | Dinosaurs | BigTrucks | BigTrucks | Elephant | Mountain | Flowers | BigTrucks | BigTrucks | People | Dinosaurs | Flowers | Food | Beach | BigTrucks | Elephant | Beach | BigTrucks | Flowers | BigTrucks | Flowers | Horse | BigTrucks | Horse | BigTrucks | People | Elephant | Dinosaurs | Horse | Food | Food | BigTrucks | Elephant | Dinosaurs | People | People | Food | BigTrucks | Flowers | Elephant | Mountain | Elephant | Elephant | People | BigTrucks | Food | Elephant | Flowers | Beach | Dinosaurs | Food | Horse | Mountain | Architecture | People | Dinosaurs | Food | Food | Horse | People | BigTrucks | Architecture | Flowers | Elephant | Flowers | BigTrucks | Dinosaurs | Flowers | Dinosaurs | Elephant | Flowers | Horse | People | Beach | People | Architecture | Mountain | Dinosaurs | BigTrucks | People | Food | Elephant | Food | Flowers | Horse | Horse | Food | Dinosaurs | Architecture | Flowers | Mountain | Beach | Food | Flowers | BigTrucks | BigTrucks | Elephant | Dinosaurs | Dinosaurs | Architecture | Architecture | BigTrucks | Architecture | Elephant | Horse | People | People | Elephant | Dinosaurs | Mountain | Elephant | Flowers | People | Food | Mountain | People | Dinosaurs | Dinosaurs | People | Dinosaurs | Elephant | People | Horse | Beach | Mountain | Elephant | Horse | BigTrucks | Food | Mountain | Beach | Horse | People | Elephant | Dinosaurs | People | People | Horse | Elephant | Horse | BigTrucks | Mountain | Mountain | Elephant | Architecture | Horse | Food | Horse | BigTrucks | Food | Beach |
预测结果标签 | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
People | People | BigTrucks | Flowers | Food | Beach | Architecture | BigTrucks | Dinosaurs | Horse | Elephant | Flowers | Horse | BigTrucks | Flowers | Architecture | Beach | Beach | Architecture | Architecture | Architecture | Elephant | Dinosaurs | Dinosaurs | Mountain | Horse | Architecture | Beach | Horse | Architecture | Mountain | Horse | Food | Horse | Elephant | Horse | People | Dinosaurs | People | BigTrucks | BigTrucks | People | Food | Food | BigTrucks | Horse | Mountain | Mountain | Dinosaurs | Flowers | Dinosaurs | BigTrucks | BigTrucks | Elephant | Mountain | Flowers | BigTrucks | BigTrucks | People | Dinosaurs | Flowers | People | Beach | BigTrucks | Elephant | Beach | BigTrucks | Flowers | BigTrucks | Food | Horse | BigTrucks | Horse | BigTrucks | People | Elephant | Dinosaurs | Horse | Food | Food | BigTrucks | Elephant | Dinosaurs | People | Beach | Food | BigTrucks | Flowers | Elephant | Mountain | Elephant | Elephant | People | BigTrucks | Food | Elephant | Flowers | Food | Dinosaurs | Food | Horse | Mountain | Architecture | People | Dinosaurs | Food | Food | Horse | People | BigTrucks | Beach | Flowers | Elephant | Flowers | BigTrucks | Dinosaurs | Flowers | Dinosaurs | Elephant | Flowers | Horse | People | Beach | People | Architecture | Mountain | Dinosaurs | BigTrucks | People | People | Elephant | Food | Flowers | Beach | Horse | Food | Dinosaurs | Architecture | Flowers | Beach | Beach | Food | Flowers | BigTrucks | BigTrucks | Elephant | Dinosaurs | Dinosaurs | Beach | Beach | BigTrucks | Architecture | Elephant | Horse | People | People | Elephant | Dinosaurs | Beach | Elephant | Flowers | Food | Food | Elephant | People | Dinosaurs | Dinosaurs | People | Dinosaurs | Elephant | People | Horse | Beach | Mountain | Elephant | Horse | BigTrucks | People | Mountain | Beach | Elephant | People | Elephant | Dinosaurs | People | People | Horse | Elephant | Horse | BigTrucks | Mountain | Mountain | Elephant | Architecture | Horse | People | Horse | BigTrucks | Food | Beach |
精度:0.9 | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||