以下是实验设计
设计一个一维的数据,14个数据,7个成一组,一个高斯分布,整体数据隐含了2个高斯分布。
系统最初给出第一个数属于z0的概率0.9,最后一个数属于在z1的概率0.9,其余数据不可判定。
迭代到最后,自动识别前7个数属于z0,后7个数属于z1。
实验代码
include "stdio.h"
#include "stdlib.h"
#include "stdint.h"
#include "math.h"
double PI = 3.1415926;
double Gauss(const double px,const double mu,const doublecsigma)
{
double val =sqrt(2*PI*csigma);
val = 1/val;
val =val*exp(-pow(px-mu,2)/(2*csigma));
return val ;
}
int main(void)
{
double x[] = {1.5,1.8,1.9,2.0,2.1,2.2,2.5,
3.0,3.9,3.9,4.0,4.1,4.1,5.0};
int m = sizeof(x)/sizeof(double);
const int k = 2;
double fei[k] ;
double mean[k] ;
double mao[k] ;
doubleprobability_x[][2]={{0.9,0.1},{0.5,0.5},{0.5,0.5},{0.5,0.5},{0.5,0.5},{0.5,0.5},{0.5,0.5},{0.5,0.5},{0.5,0.5},{0.5,0.5},{0.5,0.5},{0.5,0.5},{0.5,0.5},{0.
1,0.9}}; // first make the 1.5 belong to z0 ,wihle 5.0 belongto z1
bool improve_large = false;
int step = 0;
do{
//E-step
for(intj=0;j<k&&step!=0;++j)
{
for(inti=0;i<m;++i)
{
probability_x[i][j] =
Gauss(x[i],mean[j],mao[j])*fei[j]/ (Gauss(x[i],mean[0],mao[0])*fei[0]+Gauss(x[i],mean[1],mao[1])*fei[1] );
}
}
//M-step
for(intj=0;j<k;++j)
{
fei[j] =0.0;
doublesum_prob = 0.0;
for(inti=0;i<m;++i)
{
sum_prob += probability_x[i][j];
}
fei[j] =sum_prob / m;
}
for(intj=0;j<k;++j)
{
mean[j] =0.0;
doubleweight = 0.0;
doublesum_prob = 0.0;
for(inti=0;i<m;++i)
{
weight += x[i]*probability_x[i][j];
sum_prob += probability_x[i][j];
}
mean[j] =weight/sum_prob;
}
for(intj=0;j<k;++j)
{
mao[j] =0.0;
doubleweight = 0.0;
doublesum_prob = 0.0;
for(inti=0;i<m;++i)
{
weight +=(x[i]-mean[j])*(x[i]-mean[j])*probability_x[i][j];
sum_prob += probability_x[i][j];
}
mao[j] =weight/sum_prob;
}
printf("********************%d*************************\n",step++);
printf("%f,%f,%f\n",fei[0],mean[0],mao[0]);
printf("%f,%f,%f\n",fei[1],mean[1],mao[1]);
for(inti=0;i<m;++i)
{
printf("%f\t%f\n",probability_x[i][0],probability_x[i][1]);
}
printf("***********************************************\n");
}while(!improve_large&& step<100);
return 0;
}