import numpy as np
from scipy.stats import norm
def gmm_em(X ,K=2, max_iter=100, epsilon=1e-4, T = 5):
N = len(X)
print('-----EM optimization for GMM-----')
# EM算法只能保证收敛到稳定点,不能保证收敛至极大值点
# 常用的办法是进行几次迭代,对结果做比较,并选择最好
Mu = []
Sigma = []
for t in range(T):
# 初始化
alpha = np.ones([K, 1]) /K # 权重
mu = np.ones([K, 1]) # 均值
sigma = np.ones([K, 1]) # 标准差
for k in range(K):
n = np.random.randint(K*3, K*30)
x = np.random.choice(X, n)
mu[k][0] = np.mean(x)
sigma[k][0] = np.std(x, ddof = 1)
alpha_, mu_, sigma_ = alpha, mu, sigma
for _iter in range(max_iter):
# expectation 隐变量估计
gamma = np.zeros([K, N])
for k in range(K):
gamma[k] = alpha[k] * norm.pdf(X, loc=mu[k][0], scale=sigma[k][0])
s = np.sum(gamma, axis=0, keepdims=True)
s[s==0] = np.inf
gamma /= s
# maximization 更新参数
mu_ = np.sum(gamma*X, axis=1, keepdims=True) / np.sum(gamma, axis=1, keepdims=True)
sigma_ = np.sqrt( np.sum(gamma*(X-mu)**2, axis=1, keepdims=True) / np.sum(gamma, axis=1, keepdims=True) )
alpha_ = np.sum(gamma, axis=1, keepdims=True) / N
# 计算Q函数
q = np.zeros([K,])
for k in range(K):
joint_dist = np.clip(alpha[k]*norm.pdf(X, loc=mu[k][0], scale=sigma[k][0]), 1e-300, 1)
q[k] = np.sum( gamma[k]*np.log( joint_dist ) )
Q = np.sum(q)
q_ = np.zeros([K,])
for k in range(K):
joint_dist_ = np.clip(alpha_[k]*norm.pdf(X, loc=mu_[k][0], scale=sigma_[k][0]), 1e-300, 1)
q_[k] = np.sum( gamma[k]*np.log( joint_dist_ ) )
Q_ = np.sum(q_)
# 对数似然函数
# L = np.zeros([K, N])
# for k in range(K):
# L[k] = alpha_[k] * norm.pdf(X, loc=mu_[k][0], scale=sigma_[k][0])
# L = np.sum(L, axis=0)
# L = np.sum(np.log(np.clip(L, 1e-300, 1)))
# if (_iter+1)%10 == 0:
print('T:', t+1, ' Iteration:', _iter+1, ' Q value:', Q_)
alpha = alpha_
mu = mu_
sigma = sigma_
# 判断是否收敛
if np.abs(Q_-Q) < epsilon:
break
idx = mu[:,0].argsort()
mu = mu[idx]
sigma = sigma[idx]
Mu.append(mu)
Sigma.append(sigma)
Mu = np.concatenate(Mu, axis=1)
Sigma = np.concatenate(Sigma, axis=1)
# 剔除离群值
index = np.array([True]*T)
for _ in range(int(T/K)):
Mu_ = Mu[:,index]
mean = np.mean(Mu_, axis=1, keepdims=True)
Mu_centered = np.abs( Mu_ - mean)
idx = np.argmax(Mu_centered, axis=1)
index[idx] = False
# 取均值
Mu = np.mean(Mu[:,index], axis=1)
Sigma = np.mean(Sigma[:,index], axis=1)
return Mu, Sigma
if __name__ == '__main__':
X1 = norm.rvs(loc=5, scale=10, size=7000)
X2 = norm.rvs(loc=65, scale=16, size=3000)
X = np.concatenate([X1, X2])
np.random.shuffle(X)
mu1, sigma1 = np.mean(X1), np.std(X1)
mu2, sigma2 = np.mean(X2), np.std(X2)
mu, sigma = gmm_em(X)
print(end='') # 可以在这行打断点调试,比较结果
参考资料:
《统计学习方法》李航