一、感知机
import numpy as np
from matplotlib import pyplot as plt
np.random.seed(3)
#其实这里应该用x1=np.linspace(0,5,50)
#X轴数据集
x1=[i for i in np.arange(0,5,0.1)]
#正样本数据集
x2=np.abs(np.random.randn(50))
#负样本数据集
x3=np.abs(np.random.randn(50)+8)
plt.scatter(x1,x2)
plt.scatter(x1,x3)
#利用上述数据构建数据矩阵
#特征1
x_1=x1+x1
#特征2
x_2=list(x2)+list(x3)
#标签列
y=[1]*50+[-1]*50
data=np.zeros((100,3))
data=np.c_[x_1,x_2,y]
data.shape
#(100, 3)
class Perceptron():
"""
:param data: ndarray格式数据: N x P N个P维数据
:param lr: 学习率
:param maxiter: 最大迭代次数
:param w_vect: 初始权数向量
"""
def __init__(self,data,lr,maxiter,w_vect):
self.data=data
self.w=w_vect
self.lr=lr
self.maxiter=maxiter
def get_wrong(self):
"""
:return: 误分数据集矩阵及原矩阵的对应索引
"""
#筛选数据
x=np.c_[self.data[:,:-1],np.ones(self.data.shape[0])]
wrong_index=np.where((x.dot(self.w)*((self.data[:,-1]).reshape(-1,1))<=0))[0]
return data[wrong_index,:],wrong_index
def fit(self):
for j in range(self.maxiter):
error=0
wrong_data,wrong_index=self.get_wrong()
#print(wrong_data)
x=np.c_[wrong_data[:,:-1],np.ones(wrong_data.shape[0])]
#打乱数据集获取不同的超平面解
np.random.shuffle(x)
for i in range(0,wrong_data.shape[0]):
gradient=((-wrong_data[i,-1:])*x[i,:]).reshape(-1,1)
self.w=self.w-self.lr*gradient
error+=1
#print(gradient.shape)
if error==0:
break
#w_vect=np.zeros((data.shape[1],1))
w_vect=np.array([[0],[0],[0]])
a=Perceptron(data,0.01,200,w_vect)
a.fit()
weights=a.w
w1 = weights[0][0]
w2 = weights[1][0]
bias = weights[-1][0]
print(a.w)
x6 = -w1 / w2 * np.array(x1) - bias / w2
plt.scatter(x1,x2)
plt.scatter(x1,x3)
plt.plot(x1,x6)
#建立kd树
import numpy as np
import matplotlib.pyplot as plt
class kdTree():
def __init__(self, parent_node):
#节点初始化
self.nodedata = None #当前节点的数据值,二维数据
self.split = None #分割平面的方向轴序号,0代表沿着x轴分割,1代表沿着y轴分割
self.range = None #分割临界值
self.left = None #左子树节点
self.right = None #右子树节点
self.parent = parent_node #父节点
self.leftdata = None #保留左边节点的所有数据
self.rightdata = None #保留右边节点的所有数据
self.isinvted = False #记录当前节点是否被访问过
def print(self):
#打印当前节点信息
print(self.nodedata, self.split, self.range)
def getSplitAxis(self, all_data):
#根据方差决定分割轴
var_all_data = np.var(all_data, axis=0)
if var_all_data[0] > var_all_data[1]:
return 0
else:
return 1
def getRange(self, split_axis, all_data):
#获取对应分割轴上的中位数据值大小
split_all_data = all_data[:, split_axis]
data_count = split_all_data.shape[0]
med_index = int(data_count/2)
sort_split_all_data = np.sort(split_all_data)
range_data = sort_split_all_data[med_index]
return range_data
def getNodeLeftRigthData(self, all_data):
#将数据划分到左子树,右子树以及得到当前节点
data_count = all_data.shape[0]
ls_leftdata = []
ls_rightdata = []
for i in range(data_count):
now_data = all_data[i]
if now_data[self.split] < self.range:
ls_leftdata.append(now_data)
elif now_data[self.split] == self.range and self.nodedata == None:
self.nodedata = now_data
else:
ls_rightdata.append(now_data)
self.leftdata = np.array(ls_leftdata)
self.rightdata = np.array(ls_rightdata)
def createNextNode(self,all_data):
#迭代创建节点,生成kd树
if all_data.shape[0] == 0:
print("create kd tree finished!")
return None
self.split = self.getSplitAxis(all_data)
self.range = self.getRange(self.split, all_data)
self.getNodeLeftRigthData(all_data)
if self.leftdata.shape[0] != 0:
self.left = kdTree(self)
self.left.createNextNode(self.leftdata)
if self.rightdata.shape[0] != 0:
self.right = kdTree(self)
self.right.createNextNode(self.rightdata)
def plotKdTree(self):
#在图上画出来树形结构的递归迭代过程
if self.parent == None:
plt.figure(dpi=300)
plt.xlim([0.0, 10.0])
plt.ylim([0.0, 10.0])
color = np.random.random(3)
if self.left != None:
plt.plot([self.nodedata[0], self.left.nodedata[0]],[self.nodedata[1], self.left.nodedata[1]], '-o', color=color)
plt.arrow(x=self.nodedata[0], y=self.nodedata[1], dx=(self.left.nodedata[0]-self.nodedata[0])/2.0, dy=(self.left.nodedata[1]-self.nodedata[1])/2.0, color=color, head_width=0.2)
self.left.plotKdTree()
if self.right != None:
plt.plot([self.nodedata[0], self.right.nodedata[0]],[self.nodedata[1], self.right.nodedata[1]], '-o', color=color)
plt.arrow(x=self.nodedata[0], y=self.nodedata[1], dx=(self.right.nodedata[0]-self.nodedata[0])/2.0, dy=(self.right.nodedata[1]-self.nodedata[1])/2.0, color=color, head_width=0.2)
self.right.plotKdTree()
# if self.split == 0:
# x = self.range
# plt.vlines(x, 0, 10, color=color, linestyles='--')
# else:
# y = self.range
# plt.hlines(y, 0, 10, color=color, linestyles='--')
test_array = 10.0*np.random.random([30,2])
my_kd_tree = kdTree(None)
my_kd_tree.createNextNode(test_array)
my_kd_tree.plotKdTree()