贝叶斯分类:参考上篇博客
# -*- coding: utf-8 -*-
# @Time : 19-4-10 上午8:34
# @Author : MRB
# @File : demo2.py
# @Software: PyCharm Community Edition
from numpy import *
def str2list(data_str):
data = [int(temp) for temp in data_str.strip().split()]
return data
#数据格式化
def load_data(n):
labels_str = input()
labels =[int(temp) for temp in labels_str.strip().split()]
print(labels)
if len(labels)!= n:
return -1
train_set = []
for index in range(n):
temp = str2list(input())
train_set.append(temp)
test_set = str2list(input())
# print(test_set)
return labels,train_set,test_set
#数据统计---方便求出概率
def count(data_mat,labels):
'''
:param data_mat: 特征矩阵
:return: 返回每列数据各个数字的个数
'''
num_data = len(data_mat)
num_feature = len(data_mat[0])
pA = sum(labels)/float(num_data) # 去打球的概率
result = {"0":{},"1":{}}
for index in range(num_feature):
temp0 = {}
temp1 = {}
for i in range(len(data_mat)):
if labels[i] == 0:
if str(data_mat[i][index]) not in temp0:
temp0[str(data_mat[i][index])] = 1
else:
temp0[str(data_mat[i][index])] += 1
else:
if str(data_mat[i][index]) not in temp1:
temp1[str(data_mat[i][index])] = 1
else:
temp1[str(data_mat[i][index])] += 1
# print(str(index),temp0)
result['0'][str(index)] = temp0
result['1'][str(index)] = temp1
return result,pA
def main():
# n = int(input('n:'))
# labels, train_set, test_set = load_data(n)
n = 9
labels = [0, 0, 0, 0, 1, 1, 1, 1, 1]
train_set = [[0, 0, 30, 450, 7], [1, 1, 5, 500, 3], [1, 0, 10, 150, 1], [0, 1, 40, 300, 6], [1, 0, 20, 100, 10],
[0, 1, 25, 180, 12], [0, 0, 32, 50, 11], [1, 0, 23, 120, 9], [0, 0, 27, 200, 8]]
test_set = [0, 0, 40, 180, 8]
# print(labels)
# print(train_set)
# print(test_set)
for index in range(2,5):
max_n = min_n = train_set[0][index]
for i in range(n):
if max_n < train_set[i][index]:
max_n = train_set[i][index]
if min_n > train_set[i][index]:
min_n = train_set[i][index]
step = (max_n - min_n)/3.0
for i in range(n):
if train_set[i][index]