参考文章
模拟两个正态分布的均值估计。θ是我们要估计的均值
模拟两个正态分布的均值估计,由于我们使用的是高斯分布,即p服从高斯分布
由上面这张图:E步:固定θ,优化Q;M步:固定Q,优化θ;交替将极值推向最大
# # 模拟两个正态分布的均值估计
#
from numpy import *
import numpy as np
import random
import copy
SIGMA = 6
EPS = 0.0001
# 生成方差相同,均值不同的样本
def generate_data():
Miu1 = 20 # 先假设分布1均值为20
Miu2 = 40 # 分布2为40
N = 1000 # 1k个样本,随机服从一个分布
X = mat(zeros((N, 1))) # [1000,1]
for i in range(N):
temp = random.uniform(0, 1)
if (temp > 0.5):
X[i] = temp * SIGMA + Miu1
else:
X[i] = temp * SIGMA + Miu2
return X # [1000,1]
# EM算法
def my_EM(X):
k = 2
N = len(X)
Miu = np.random.rand(k, 1)
Posterior = mat(zeros((N, 2)))
# 先求后验概率Qi(z^(i))
for iter in range(1000): # 最大迭代次数
# E-step
for i in range(N):
dominator = 0
# 对应第一张图紫色框内的公式
for j in range(k): # k类分布 z^(1),z^(2),...,z^(k)
dominator = dominator + np.exp(-1.0 / (2.0 * SIGMA ** 2) * (X[i] - Miu[j]) ** 2)
for j in range(k):
numerator = np.exp(-1.0 / (2.0 * SIGMA ** 2) * (X[i] - Miu[j]) ** 2)
Posterior[i, j] = numerator / dominator
oldMiu = copy.deepcopy(Miu)
# M-step
for j in range(k):
numerator = 0
dominator = 0
for i in range(N):
numerator = numerator + Posterior[i, j] * X[i]
dominator = dominator + Posterior[i, j]
Miu[j] = numerator / dominator
print((abs(Miu - oldMiu)).sum())
if (abs(Miu - oldMiu)).sum() < EPS:
print('-----------')
print('两个正态分布的均值估计是',Miu)
print('迭代次数', iter)
break
if __name__ == '__main__':
X = generate_data()
my_EM(X)