回顾:
实现ransac地面分割
DBSCNA python 复现-1- 距离矩阵法
DBSCNA python 复现-2-kd-_tree加速
链接:https://pan.baidu.com/s/1aTTBMqGsisnPJvQXEtHRVw 提取码:865H
因为数据点比较多,运行了26s左右 (cpu:AMD 3700x)
自写的聚类效果并不是很好
iters = 25.955452
segment datasize:55762
[-1 -1 -1 ... -1 -1 -1]
生成的聚类个数:224
dbscan time:27.400408
使用sklearn自带库 速度很快
可以很好的将人体轮廓聚类出来
自写
def dbscan_clustering(data):
#使用sklearn dbscan 库
#eps 两个样本的最大距离,即扫描半径; min_samples 作为核心点的话邻域(即以其为圆心,eps为半径的圆,含圆上的点)中的最小样本数(包括点本身);n_jobs :使用CPU格式,-1代表全开
# cluster_index = DBSCAN(eps=0.25,min_samples=5,n_jobs=-1).fit_predict(data)
# 使用自写
cluster_index = dbs.DBSCAN(data,eps=0.25,Minpts=5)
print(cluster_index)
return cluster_index
sklearn dbscan
def dbscan_clustering(data):
#使用sklearn dbscan 库
#eps 两个样本的最大距离,即扫描半径; min_samples 作为核心点的话邻域(即以其为圆心,eps为半径的圆,含圆上的点)中的最小样本数(包括点本身);n_jobs :使用CPU格式,-1代表全开
cluster_index = DBSCAN(eps=0.25,min_samples=5,n_jobs=-1).fit_predict(data)
# 使用自写
#cluster_index = dbs.DBSCAN(data,eps=0.25,Minpts=5)
print(cluster_index)
return cluster_index
#clustering.py
# 文件功能:
# 1. 从数据集中加载点云数据
# 2. 从点云数据中滤除地面点云
# 3. 从剩余的点云中提取聚类
import numpy as np
import open3d as o3d
import struct
from sklearn import cluster, datasets, mixture
import matplotlib.pyplot as plt
from pandas import DataFrame
from pyntcloud import PyntCloud
import math
import random
from collections import defaultdict
from sklearn.cluster import DBSCAN
import DBSCAN_fast as dbs
# matplotlib显示点云函数
def Point_Cloud_Show(points):
fig = plt.figure(dpi=150)
ax = fig.add_subplot(111, projection='3d')
ax.scatter(points[:, 0], points[:, 1], points[:, 2], cmap='spectral', s=2, linewidths=0, alpha=1, marker=".")
plt.title('Point Cloud')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
plt.show()
# 功能:从kitti的.bin格式点云文件中读取点云
# 输入:
# path: 文件路径
# 输出:
# 点云数组
def read_velodyne_bin(path):
'''
:param path:
:return: homography matrix of the point cloud, N*3
'''
pc_list = []
with open(path, 'rb') as f:
content = f.read()
pc_iter = struct.iter_unpack('ffff', content)
for idx, point in enumerate(pc_iter):
pc_list.append([point[0], point[1], point[2]])
return np.asarray(pc_list, dtype=np.float32)
# 功能:从点云文件中滤除地面点
# 输入:
# data: 一帧完整点云
# 输出:
# segmengted_cloud: 删除地面点之后的点云
def ground_segmentation(data):
# 作业1
# 屏蔽开始
#初始化数据
idx_segmented = []
segmented_cloud = []
iters = 100 #最大迭代次数 000002.bin:10
sigma = 0.4 #数据和模型之间可接受的最大差值 000002.bin:0.5 000001.bin: 0.2 000000.bin: 0.15 002979.bin:0.15 004443.bin:0.4
##最好模型的参数估计和内点数目,平面表达方程为 aX + bY + cZ +D= 0
best_a = 0
best_b = 0
best_c = 0
best_d = 0
pretotal = 0 #上一次inline的点数
#希望的到正确模型的概率
P = 0.99
n = len(data) #点的数目
outline_ratio = 0.6 #e :outline_ratio 000002.bin:0.6 000001.bin: 0.5 000000.bin: 0.6 002979.bin:0.6
for i in range(iters):
ground_cloud = []
idx_ground = []
#step1 选择可以估计出模型的最小数据集,对于平面拟合来说,就是三个点
sample_index = random.sample(range(n),3) #重数据集中随机选取3个点
point1 = data[sample_index[0]]
point2 = data[sample_index[1]]
point3 = data[sample_index[2]]
#step2 求解模型
##先求解法向量
point1_2 = (point1-point2) #向量 poin1 -> point2
point1_3 = (point1-point3) #向量 poin1 -> point3
N = np.cross(point1_3,point1_2) #向量叉乘求解 平面法向量
##slove model 求解模型的a,b,c,d
a = N[0]
b = N[1]
c = N[2]
d = -N.dot(point1)
#step3 将所有数据带入模型,计算出“内点”的数目;(累加在一定误差范围内的适合当前迭代推出模型的数据)
total_inlier = 0
pointn_1 = (data - point1) #sample(三点)外的点 与 sample内的三点其中一点 所构成的向量
distance = abs(pointn_1.dot(N))/ np.linalg.norm(N) #求距离
##使用距离判断inline
idx_ground = (distance <= sigma)
total_inlier = np.sum(idx_ground == True) #统计inline得点数
##判断当前的模型是否比之前估算的模型
if total_inlier > pretotal: # log(1 - p)
iters = math.log(1 - P) / math.log(1 - pow(total_inlier / n, 3)) #N = ------------
pretotal = total_inlier #log(1-[(1-e)**s])
#获取最好得 abcd 模型参数
best_a = a
best_b = b
best_c = c
best_d = d
# 判断是否当前模型已经符合超过 inline_ratio
if total_inlier > n*(1-outline_ratio):
break
print("iters = %f" %iters)
#提取分割后得点
idx_segmented = np.logical_not(idx_ground)
ground_cloud = data[idx_ground]
segmented_cloud = data[idx_segmented]
return ground_cloud,segmented_cloud
# 屏蔽结束
# print('origin data points num:', data.shape[0])
# print('segmented data points num:', segmengted_cloud.shape[0])
# return segmengted_cloud
# 功能:从点云中提取聚类
# 输入:
# data: 点云(滤除地面后的点云)
# 输出:
# clusters_index: 一维数组,存储的是点云中每个点所属的聚类编号(参考上一章内容容易理解)
def spectral_clustering(data,k):
# 作业2
# 屏蔽开始
n = len(data)
cluster_data = [[] for _ in range(k)]
cluster_data_index = []
#进行 Spectral 谱聚类
spectral = sp.Spectral(n_clusters=k)
spectral.fit(data) #聚类
cat = spectral.predict(data)
#将聚类好的点,相同类中得点得index 放到相同得列表中
d = defaultdict(list)
##构建字典,将key值相同的放到一起
for key , index in [(v,i) for i,v in enumerate(cat)]:
d[key].append(index)
for val in d.values():
cluster_data_index.append(val)
for i in range(k):
for j in cluster_data_index[i]:
cluster_data[i].append(data[j])
return cluster_data
def dbscan_clustering(data):
#使用sklearn dbscan 库
#eps 两个样本的最大距离,即扫描半径; min_samples 作为核心点的话邻域(即以其为圆心,eps为半径的圆,含圆上的点)中的最小样本数(包括点本身);n_jobs :使用CPU格式,-1代表全开
# cluster_index = DBSCAN(eps=0.25,min_samples=5,n_jobs=-1).fit_predict(data)
# 使用自写
cluster_index = dbs.DBSCAN(data,eps=0.25,Minpts=5)
print(cluster_index)
return cluster_index
# 屏蔽结束
# return clusters_index
# 功能:显示聚类点云,每个聚类一种颜色
# 输入:
# data:点云数据(滤除地面后的点云)
# cluster_index:一维数组,存储的是点云中每个点所属的聚类编号(与上同)
def plot_clusters(segmented_ground, segmented_cloud, cluster_index):
"""
Visualize segmentation results using Open3D
Parameters
----------
segmented_cloud: numpy.ndarray
Segmented surrounding objects as N-by-3 numpy.ndarray
segmented_ground: numpy.ndarray
Segmented ground as N-by-3 numpy.ndarray
cluster_index: list of int
Cluster ID for each point
"""
def colormap(c, num_clusters):
"""
Colormap for segmentation result
Parameters
----------
c: int
Cluster ID
C
"""
# outlier:
if c == -1:
color = [1]*3
# surrouding object:
else:
color = [0] * 3
color[c % 3] = c/num_clusters
return color
# ground element:
pcd_ground = o3d.geometry.PointCloud()
pcd_ground.points = o3d.utility.Vector3dVector(segmented_ground)
pcd_ground.colors = o3d.utility.Vector3dVector(
[
[0, 0, 255] for i in range(segmented_ground.shape[0])
]
)
# surrounding object elements:
pcd_objects = o3d.geometry.PointCloud()
pcd_objects.points = o3d.utility.Vector3dVector(segmented_cloud)
num_clusters = max(cluster_index) + 1
pcd_objects.colors = o3d.utility.Vector3dVector(
[
colormap(c, num_clusters) for c in cluster_index
]
)
# visualize:
o3d.visualization.draw_geometries([pcd_ground, pcd_objects])
def main():
iteration_num = 1 #文件数
# for i in range(iteration_num):
filename = 'data/004443.bin' #数据集路径
print('clustering pointcloud file:', filename)
origin_points = read_velodyne_bin(filename) #读取数据点
origin_points_df = DataFrame(origin_points,columns=['x', 'y', 'z']) # 选取每一列 的 第0个元素到第二个元素 [0,3)
point_cloud_pynt = PyntCloud(origin_points_df) # 将points的数据 存到结构体中
point_cloud_o3d = point_cloud_pynt.to_instance("open3d", mesh=False) # 实例化
#
# o3d.visualization.draw_geometries([point_cloud_o3d]) # 显示原始点云
# # 地面分割
ground_points, segmented_points = ground_segmentation(data=origin_points)
#
# ground_points_df = DataFrame(ground_points, columns=['x', 'y', 'z']) # 选取每一列 的 第0个元素到第二个元素 [0,3)
# point_cloud_pynt_ground = PyntCloud(ground_points_df) # 将points的数据 存到结构体中
# point_cloud_o3d_ground = point_cloud_pynt_ground.to_instance("open3d", mesh=False) # 实例化
# point_cloud_o3d_ground.paint_uniform_color([0, 0, 255])
#
#
# #显示segmentd_points示地面点云
# segmented_points_df = DataFrame(segmented_points, columns=['x', 'y', 'z']) # 选取每一列 的 第0个元素到第二个元素 [0,3)
# point_cloud_pynt_segmented = PyntCloud(segmented_points_df) # 将points的数据 存到结构体中
# point_cloud_o3d_segmented = point_cloud_pynt_segmented.to_instance("open3d", mesh=False) # 实例化
# point_cloud_o3d_segmented.paint_uniform_color([255, 0, 0])
#
# o3d.visualization.draw_geometries([point_cloud_o3d_ground,point_cloud_o3d_segmented])
# 显示聚类结果
# 显示segmentd_points 分割后每个类的点云
cluster_index = dbscan_clustering(segmented_points)
plot_clusters(ground_points, segmented_points, cluster_index)
if __name__ == '__main__':
main()
#DBSCAN_fast.py
# 文件功能:实现 Spectral 谱聚类 算法
import numpy as np
from numpy import *
import scipy
import pylab
import random, math
from numpy.random import rand
from numpy import square, sqrt
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
from scipy.stats import multivariate_normal
from result_set import KNNResultSet, RadiusNNResultSet
from sklearn.cluster import KMeans
import kdtree as kdtree
import time
#from scipy.spatial import KDTree
from sklearn.neighbors import KDTree # KDTree 进行搜索
import copy
plt.style.use('seaborn')
# matplotlib显示点云函数
def Point_Show(point,point_index):
def colormap(c, num_clusters):
if c == -1:
color = [1] * 3
# surrouding object:
else:
color = [0] * 3
color[c % 3] = c / num_clusters
return color
x = []
y = []
num_clusters = max(point_index) + 1
point = np.asarray(point)
for i in range(len(point)):
x.append(point[i][0])
y.append(point[i][1])
plt.scatter(x, y,color=[colormap(c,num_clusters) for c in point_index])
plt.show()
#构建距离矩阵
def my_distance_Marix(data):
S = np.zeros((len(data), len(data))) # 初始化 关系矩阵 w 为 n*n的矩阵
# step1 建立关系矩阵, 每个节点都有连线,权重为距离的倒数
for i in range(len(data)): # i:行
for j in range(len(data)): # j:列
S[i][j] = np.linalg.norm(data[i] - data[j]) # 二范数计算两个点直接的距离,两个点之间的权重为之间距离的倒数
return S
# @profile
def DBSCAN(data, eps, Minpts):
"""
基于密度的点云聚类
:param d_bbox: 点与点之间的距离矩阵
:param eps: 最大搜索直径阈值
:param Minpts: 最小包含其他对象数量阈值
:return: 返回聚类结果,是一个嵌套列表,每个子列表就是这个区域的对象的序号
"""
n = len(data)
# 构建kd_tree
leaf_size = 4
tree = KDTree(data,leaf_size)
#step1 初始化核心对象集合T,聚类个数k,聚类集合C, 未访问集合P
T = set() #set 集合
k = 0 #第k类
cluster_index = np.zeros(n,dtype=int) #聚类集合
unvisited = set(range(n)) #初始化未访问集合
#step2 通过判断,通过kd_tree radius NN找出所有核心点
nearest_idx = tree.query_radius(data, eps) # 进行radius NN搜索,半径为epsion,所有点的最临近点储存在 nearest_idx中
for d in range(n):
if len(nearest_idx[d]) >= Minpts: #临近点数 > min_sample,加入核心点
T.add(d) #最初得核心点
#step3 聚类
while len(T): #visited core ,until all core points were visited
unvisited_old = unvisited #更新为访问集合
core = list(T)[np.random.randint(0,len(T))] #从 核心点集T 中随机选取一个 核心点core
unvisited = unvisited - set([core]) #把核心点标记为 visited,从 unvisited 集合中剔除
visited = []
visited.append(core)
while len(visited):
new_core = visited[0]
#kd-tree radius NN 搜索邻近
if new_core in T: #如果当前搜索点是核心点
S = unvisited & set(nearest_idx[new_core]) #当前 核心对象的nearest 与 unvisited 的交集
visited += (list(S)) #对该new core所能辐射的点,再做检测
unvisited = unvisited - S #unvisited 剔除已 visited 的点
visited.remove(new_core) #new core 已做检测,去掉new core
cluster = unvisited_old - unvisited #原有的 unvisited # 和去掉了 该核心对象的密度可达对象的visited就是该类的所有对象
T = T - cluster #去掉该类对象里面包含的核心对象,差集
cluster_index[list(cluster)] = k
k += 1 #类个数
noise_cluster = unvisited
cluster_index[list(noise_cluster)] = -1 #噪声归类为 1
print(cluster_index)
print("生成的聚类个数:%d" %k)
return cluster_index
# 生成仿真数据
def generate_X(true_Mu, true_Var):
# 第一簇的数据
num1, mu1, var1 = 400, true_Mu[0], true_Var[0]
X1 = np.random.multivariate_normal(mu1, np.diag(var1), num1)
# 第二簇的数据
num2, mu2, var2 = 600, true_Mu[1], true_Var[1]
X2 = np.random.multivariate_normal(mu2, np.diag(var2), num2)
# 第三簇的数据
num3, mu3, var3 = 1000, true_Mu[2], true_Var[2]
X3 = np.random.multivariate_normal(mu3, np.diag(var3), num3)
# 合并在一起
X = np.vstack((X1, X2, X3))
# 显示数据
plt.figure(figsize=(10, 8))
plt.axis([-10, 15, -5, 15])
plt.scatter(X1[:, 0], X1[:, 1], s=5)
plt.scatter(X2[:, 0], X2[:, 1], s=5)
plt.scatter(X3[:, 0], X3[:, 1], s=5)
#plt.show()
return X
if __name__ == '__main__':
# 生成数据
true_Mu = [[0.5, 0.5], [5.5, 2.5], [1, 7]]
true_Var = [[1, 3], [2, 2], [6, 2]]
X = generate_X(true_Mu, true_Var)
begin_t =time.time()
cluster_index = DBSCAN(X,eps=0.5,Minpts=15)
dbscan_time = time.time() - begin_t
print("dbscan time:%f"%dbscan_time)
Point_Show(X,cluster_index)