在使用estimator.fit()的过程中,出现float() argument must be a string or a number 错误。经检查,是mean_init逻辑错误,导致模型无初始化平均值。
# -*- coding:utf-8 -*-
import cPickle
import matplotlib as mpl
import matplotlib.pyplot as plt
from dataset import *
import numpy as np
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import StratifiedKFold
colors = ['aliceblue', 'aquamarine', 'brown', 'coral',
'cyan', 'dimgray', 'fuchsia']
def make_ellipses(gmm, ax):
for n, color in enumerate(colors):
if gmm.covariance_type == 'full':
covariances = gmm.covariances_[n][:2, :2]
elif gmm.covariance_type == 'tied':
covariances = gmm.covariances_[:2, :2]
elif gmm.covariance_type == 'diag':
covariances = np.diag(gmm.covariances_[n][:2])
elif gmm.covariance_type == 'spherical':
covariances = np.eye(gmm.means_.shape[1])*gmm.covariances_[n]
v,w = np.linalg.eigh(covariances)
u = w[0] / np.linalg.norm(w[0])
angle = np.arctan2(u[1], u[0])
angle = 180 * angle / np.pi
v = 2. * np.sqrt(2.) * np.sqrt(v)
ell = mpl.patches.Ellipse(gmm.means_[n, :2], v[0], v[1],
180 + angle, color=color)
ell.set_clip_box(ax.bbox)
ell.set_alpha(0.5)
ax.add_artist(ell)
def main():
with open('berlin_features.p', 'rb') as f_Fglobal:
Fglobal = cPickle.load(f_Fglobal)
f_Fglobal.close()
#Fglobal = cPickle.load(open('berlin_features.p', 'rb'))
db = cPickle.load(open('berlin_db.p','rb'))
y = np.array(db.targets)
#y = np.array([0,1,2,3,4,5,6])
classes = {0:'W', 1:'L', 2:'E', 3:'A', 4:'F', 5:'T', 6:'N'}
skf = StratifiedKFold(n_splits=4)
train_index, test_index = next(iter(skf.split(Fglobal,y)))
X_train = np.array(Fglobal)[train_index]
print X_train
print type(X_train)
Y_train = np.array(y)[train_index]
X_test = np.array(Fglobal)[test_index]
Y_test = np.array(y)[test_index]
estimators = dict((cov_type,GaussianMixture(n_components=7,
covariance_type=cov_type, max_iter=100, random_state=0))
for cov_type in ['spherical', 'diag', 'tied', 'full'])
n_estimators = len(estimators)
plt.figure(figsize=( 3 * n_estimators //2, 6))
plt.subplots_adjust(bottom=0.01, top=0.95, hspace=.15, wspace=.05,
left=.01, right=.99)
for index, (name, estimator) in enumerate(estimators.items()):
estimator.means_init = np.array([X_train[Y_train==i].mean(axis=0)
for i in range(7)])
# estimator.fit(x)
estimator.fit(X_train)
h = plt.subplot(2, n_estimators //2, index + 1)
make_ellipses(estimator, h)
for n,color in enumerate(colors):
data = np.array(Fglobal)[y == n]
plt.scatter(data[:, 0], data[:, 1], s=0.8, color=color,
label=classes[n])
for n,color in enumerate(colors):
data = X_test[Y_test==n]
plt.scatter(data[:, 0], data[:, 1], marker='x', color=color)
y_train_pred = estimator.predict(X_train)
train_accuracy = np.mean(y_train_pred.ravel() == Y_train.ravel()) *100
plt.text(0.05, 0.9, 'train_accuracy: %.1f' %train_accuracy,
transform=h.transAxes)
y_test_pred = estimator.predict(X_test)
test_accuracy = np.mean(y_test_pred.ravel() == Y_test.ravel()) * 100
plt.text(0.05, 0.8, 'test_accuracy: %.1f' %test_accuracy,
transform=h.transAxes)
plt.xticks(())
plt.yticks(())
plt.title(name)
plt.legend(scatterpoints=1, loc='lower right', prop=dict(size=12))
plt.show()
main()
其中dataset.py出自emotion项目