/*
特征向量相似度和距离的计算
相似度:
·夹角余弦
·相关系数
·Dice
·Jaccard
距离
·明氏距离
·欧氏距离
·马氏距离
·Jffreys & Matusita 距离
·Mahalanobis 距离,未实现,协方差矩阵
·Camberra 距离(Lance 距离,Williams 距离)
*/
#include
#include
#include
#include
using namespace std;
double dotProduct(const vector& v1, const vector& v2)
{
assert(v1.size() == v2.size());
double ret = 0.0;
for (vector::size_type i = 0; i != v1.size(); ++i)
{
ret += v1[i] * v2[i];
}
return ret;
}
double module(const vector& v)
{
double ret = 0.0;
for (vector::size_type i = 0; i != v.size(); ++i)
{
ret += v[i] * v[i];
}
return sqrt(ret);
}
// 夹角余弦
double cosine(const vector& v1, const vector& v2)
{
assert(v1.size() == v2.size());
return dotProduct(v1, v2) / (module(v1) * module(v2));
}
double mean(const vector& v)
{
assert(v.size() != 0);
double ret = 0.0;
for (vector::size_type i = 0; i != v.size(); ++i)
{
ret += v[i];
}
return ret / v.size();
}
double cov(const vector& v1, const vector& v2)
{
assert(v1.size() == v2.size() && v1.size() > 1);
double ret = 0.0;
double v1a = mean(v1), v2a = mean(v2);
for (vector::size_type i = 0; i != v1.size(); ++i)
{
ret += (v1[i] - v1a) * (v2[i] - v2a);
}
return ret / (v1.size() - 1);
}
// 相关系数
double coefficient(const vector& v1, const vector& v2)
{
assert(v1.size() == v2.size());
return cov(v1, v2) / sqrt(cov(v1, v1) * cov(v2, v2));
}
// Dice 系数
double dice(const vector& v1, const vector& v2)
{
assert(v1.size() == v2.size());
return 2.0 * dotProduct(v1, v2) / (dotProduct(v1, v1) + dotProduct(v2, v2));
}
// Jaccard 系数
double jaccard(const vector& v1, const vector& v2)
{
assert(v1.size() == v2.size());
return dotProduct(v1, v2) / (dotProduct(v1, v2) + dotProduct(v2, v2) - dotProduct(v1, v2));
}
// Minkowsky 距离
double minkowsky(const vector& v1, const vector& v2, double m)
{
assert(v1.size() == v2.size());
double ret = 0.0;
for (vector::size_type i = 0; i != v1.size(); ++i)
{
ret += pow(abs(v1[i] - v2[i]), m);
}
return pow(ret, 1.0 / m);
}
// Euclidean 距离
double euclidean(const vector& v1, const vector& v2)
{
assert(v1.size() == v2.size());
return minkowsky(v1, v2, 2.0);
}
// Manhattan 距离
double manhattan(const vector& v1, const vector& v2)
{
assert(v1.size() == v2.size());
return minkowsky(v1, v2, 1.0);
}
// Jffreys & Matusita 距离
double jffreysMatusita(const vector& v1, const vector& v2)
{
assert(v1.size() == v2.size());
double ret = 0.0;
for (vector::size_type i = 0; i != v1.size(); ++i)
{
ret += (sqrt(v1[i]) - sqrt(v2[i])) * (sqrt(v1[i]) - sqrt(v2[i]));
}
return sqrt(ret);
}
// Mahalanobis 距离
double mahalanobis(const vector& v1, const vector& v2)
{
assert(v1.size() == v2.size());
return 0.0;
}
// Camberra 距离(Lance 距离,Williams 距离)
double camberra(const vector& v1, const vector& v2)
{
assert(v1.size() == v2.size());
double ret = 0.0;
for (vector::size_type i = 0; i != v1.size(); ++i)
{
ret += abs(v1[i] - v2[i]) / abs(v1[i] + v2[i]);
}
return ret;
}
int main()
{
double a[] = {1, 2, 3, 4, 5};
double b[] = {5, 4, 3, 2, 1};
vector v1(a, a + sizeof (a) / sizeof (*a)), v2(b, b + sizeof (b) / sizeof (*b));
cout << cosine(v1, v2) << endl;
cout << coefficient(v1, v2) << endl;
cout << dice(v1, v2) << endl;
cout << jaccard(v1, v2) << endl;
cout << minkowsky(v1, v2, 5.0) << endl;
cout << euclidean(v1, v2) << endl;
cout << manhattan(v1, v2) << endl;
cout << jffreysMatusita(v1, v2) << endl;
cout << mahalanobis(v1, v2) << endl;
cout << camberra(v1, v2) << endl;
return 0;
}