DBSCAN算法c++实现

 首先计算出距离每个点e以内的点,如果个数>=minPts,则找出它的直接密度可达和间接可达的点,用visited标记点是否已经在簇中,循环直到最后一个点。

#include 
#include 
#include 
#include 
#include 
#include 
using namespace std;

struct Point
{
    double x;
    double y;
};

double distance(const Point& a, const Point& b)
{
    return sqrt(pow(a.x - b.x,2) + pow(a.y - b.y,2));
}

vector > getPointSet(const vector data, double e)
{
    vector> PointSet(data.size());         //用到了PointSet[i],要先进行初始化
    //vector> PointSet;
    for(int i=0;i> DAIANA(vector> point,int MinPts)
{
    vector visted(point.size(),false);
    vector> cluster;
    for (int i = 0; i < point.size(); i++)
    {
        if (!visted[i])
        {
            if (point[i].size() >= MinPts)              //{几个}  
            {
                for (set::const_iterator j = point[i].begin(); j != point[i].end(); j++)
                {
                    visted[*j] = true;
                    if (point[*j].size() >= MinPts)         //间接相连
                    {
                        for (auto k = point[*j].begin(); k != point[*j].end(); k++)
                        {
                            visted[*k] = true;
                            point[i].insert(*k);
                        }
                        //point[i].insert(point[*j].begin(), point[*j].end());
                    }
                }
                cluster.push_back(point[i]);
            }
        }
       
    }
    return cluster;
}


vector ReadData(string filename)
{
    vector data;
    ifstream file(filename);
    if (file.is_open())
    {
        string line;
        while (getline(file, line))
        {
            istringstream iss(line);
            double x, y;
            string token;
            Point point;
            if (getline(iss, token, ',') && istringstream(token) >> point.x &&
                getline(iss, token, ',') && istringstream(token) >> point.y) {
                data.push_back(point);
            }
        }
    }
    else
    {
        cout << "open fail";
    }
    file.close();
    return data;
}

int main()
{ 
    vector dataset = ReadData("data.txt");
    double e;
    int MinPts;
    cin >> e >> MinPts;

    vector > point;
    vector> cluster;
    point=getPointSet(dataset, e);
    cluster=DAIANA(point, MinPts);
    for (int i = 0; i < cluster.size(); i++)
    {
        cout << "{";
        int count = 0;
        for (auto j = cluster[i].begin(); j != cluster[i].end(); j++)
        {
            
                cout << *j+1 ;
                if (++count < cluster[i].size())
                {
                    cout << ",";
                }
        }
        cout << "}" << endl;
    }
}

或 

判断是否是核心点,如果是,将它的直接密度可达点放进set neighbors;再判断这些直接密度可达点里如果有核心点则又将该直接密度可达点放进set subNeighbors,最后neighbors.insert(subNeighbors.begin(), subNeighbors.end());

std::vector> dbscan(const std::vector& dataset, double epsilon, int minPts) {
    std::vector> clusters;
    std::vector visited(dataset.size(), false);
    
    for (int i = 0; i < dataset.size(); ++i) {
        if (visited[i]) continue;
        
        visited[i] = true;
        std::vector cluster;
        
        std::set neighbors;
        for (int j = 0; j < dataset.size(); ++j) {
            if (calculateDistance(dataset[i], dataset[j]) <= epsilon) {
                neighbors.insert(j);
            }
        }
        
        if (neighbors.size() >= minPts) {
            for (auto it = neighbors.begin(); it != neighbors.end(); ++it) {
                int index = *it;
                if (!visited[index]) {
                    visited[index] = true;
                    std::set subNeighbors;
                    
                    for (int j = 0; j < dataset.size(); ++j) {
                        if (calculateDistance(dataset[index], dataset[j]) <= epsilon) {
                            subNeighbors.insert(j);
                        }
                    }
                    
                    if (subNeighbors.size() >= minPts) {
                        neighbors.insert(subNeighbors.begin(), subNeighbors.end());
                    }
                }
            }
        }
        
        for (auto it = neighbors.begin(); it != neighbors.end(); ++it) {
            cluster.push_back(dataset[*it]);
        }
        
        if (!cluster.empty()) {
            clusters.push_back(cluster);
        }
    }
    
    return clusters;
}

数据集:参考数据挖掘原理与算法第四版DBSCAN例子

1.0, 0.0
4.0, 0.0
0.0, 1.0
1.0, 1.0
2.0, 1.0
3.0,1.0    
4.0,1.0    
5.0,1.0    
0.0,2.0    
1.0,2.0    
4.0,2.0    
1.0,3.0

结果

DBSCAN算法c++实现_第1张图片

你可能感兴趣的:(数据挖掘,c++,算法,开发语言)