GraphLossoCV & affinity_propagation

'''
from sklearn import datasets, model_selection, svm, decomposition, pipeline, metrics
import matplotlib.pyplot as plt

lfw_people = datasets.fetch_lfw_people(min_faces_per_person=70, resize=.4)
n_images, h, w = lfw_people.images.shape
x = lfw_people.images.reshape((n_images, -1))
n_feature = x.shape[1]
print(n_feature)
target_names = lfw_people.target_names
n_class = len(target_names)
print(n_class)
y = lfw_people.target
x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size=.25)

svc = svm.SVC(class_weight='balanced')
pca = decomposition.PCA(whiten=True, svd_solver='randomized')
pipe = pipeline.Pipeline([('pca', pca), ('svc', svc)])
gs = model_selection.GridSearchCV(pipe, {'pca__n_components': [8, 16, 24, 32, 48, 56, 64], 'svc__C': [1e3, 5e3, 1e4, 5e4, 1e5], 'svc__gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1]}, n_jobs=-1, cv=5, iid=False)
gs.fit(x_train, y_train)
print(gs.score(x_test, y_test))
y_pred = gs.predict(x_test)
print(metrics.classification_report(y_test, y_pred, target_names=target_names))
print(metrics.confusion_matrix(y_test, y_pred, labels=range(n_class)))


def gallery(titles, images, h, w, ncols=4, nrows=3):
    plt.figure(figsize=(1.8*ncols, 2.4*nrows))
    plt.subplots_adjust(hspace=.24, left=.01, right=.99, bottom=0)
    for i in range(ncols*nrows):
        plt.subplot(nrows, ncols, i+1)
        plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)
        plt.xticks(())
        plt.yticks(())
        plt.title(titles[i])
    plt.show()


def title(target_names, y_true, y_pred, i):
    true_name = target_names[y_true[i]]
    pred_name = target_names[y_pred[i]]
    return '%s\n%s' % (true_name, pred_name)


titles = [title(target_names, y_test, y_pred, i) for i in range(len(x_test))]
gallery(titles, x_test, h, w)
eigenface = gs.best_estimator_.named_steps['pca'].components_
eigentitle = ['eigenface%i' % (i+1) for i in range(len(eigenface))]
gallery(eigentitle, eigenface, h, w)

print(gs.best_estimator_.named_steps['pca'].n_components_)
n = len(gs.best_estimator_.named_steps['pca'].explained_variance_ratio_)
print(n)
plt.figure()
plt.plot(range(1, n+1), gs.best_estimator_.named_steps['pca'].explained_variance_ratio_)
plt.show()
'''

import numpy
import pandas
from sklearn import cluster, covariance

symbol_dict = {
    'TOT': 'Total',
    'XOM': 'Exxon',
    'CVX': 'Chevron',
    'COP': 'ConocoPhillips',
    'VLO': 'Valero Energy',
    'MSFT': 'Microsoft',
    'IBM': 'IBM',
    'TWX': 'Time Warner',
    'CMCSA': 'Comcast',
    'CVC': 'Cablevision',
    'YHOO': 'Yahoo',
    'DELL': 'Dell',
    'HPQ': 'HP',
    'AMZN': 'Amazon',
    'TM': 'Toyota',
    'CAJ': 'Canon',
    'SNE': 'Sony',
    'F': 'Ford',
    'HMC': 'Honda',
    'NAV': 'Navistar',
    'NOC': 'Northrop Grumman',
    'BA': 'Boeing',
    'KO': 'Coca Cola',
    'MMM': '3M',
    'MCD': 'McDonald\'s',
    'PEP': 'Pepsi',
    'K': 'Kellogg',
    'UN': 'Unilever',
    'MAR': 'Marriott',
    'PG': 'Procter Gamble',
    'CL': 'Colgate-Palmolive',
    'GE': 'General Electrics',
    'WFC': 'Wells Fargo',
    'JPM': 'JPMorgan Chase',
    'AIG': 'AIG',
    'AXP': 'American express',
    'BAC': 'Bank of America',
    'GS': 'Goldman Sachs',
    'AAPL': 'Apple',
    'SAP': 'SAP',
    'CSCO': 'Cisco',
    'TXN': 'Texas Instruments',
    'XRX': 'Xerox',
    'WMT': 'Wal-Mart',
    'HD': 'Home Depot',
    'GSK': 'GlaxoSmithKline',
    'PFE': 'Pfizer',
    'SNY': 'Sanofi-Aventis',
    'NVS': 'Novartis',
    'KMB': 'Kimberly-Clark',
    'R': 'Ryder',
    'GD': 'General Dynamics',
    'RTN': 'Raytheon',
    'CVS': 'CVS',
    'CAT': 'Caterpillar',
    'DD': 'DuPont de Nemours'
}

symbol_list, name_list = numpy.array(sorted(symbol_dict.items())).T

quote_list = []
for symbol in symbol_list:
    quote_list.append(pandas.read_csv('data/{}.csv'.format(symbol)))

close_price_list = numpy.vstack([q['close'] for q in quote_list])
open_price_list = numpy.vstack([q['open'] for q in quote_list])
variation_list = close_price_list-open_price_list

x = variation_list.T
x /= x.std(0)

edge_model = covariance.GraphLassoCV(cv=5)
edge_model.fit(x)

_, label_list = cluster.affinity_propagation(edge_model.covariance_)
for i in range(label_list.max()+1):
    print('cluster%i %s' % (i+1, ' '.join(name_list[label_list == i])))
    

 

你可能感兴趣的:(GraphLossoCV & affinity_propagation)