ex7: K-Means与PCA

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.io import loadmat
import random

data = loadmat('ex7data2')
X = data['X']

def initialize(X,K=3):
    list = X.tolist()
    centroids = np.zeros(K)
    centroids = random.sample(list,K)
    return np.array(centroids)


def findClosestCentroids(X, centroids):
    d = np.zeros((X.shape[0],centroids.shape[0]))
    idx = np.zeros((X.shape[0],1))
    for i in range(centroids.shape[0]):
        for j in range(X.shape[0]):
            d[j,i] = np.linalg.norm(X[j,:] - centroids[i,:])**2
    for i in range(d.shape[0]):
        idx[i] = np.where( d[i,:] == np.min(d[i,:]))
    return idx

def computeMeans(X, idx, K):
    centroids = np.zeros((K,X.shape[1]))
    for i in range(K):
        centroids[i, :] = np.mean(X[np.ravel(idx == i), :], axis=0).reshape(1, -1)
    return centroids
"""
def kMeansInitCentroids(X,K):
    iterations = 10
    centroids = initialize(X, K=3)
    for inter in range(iterations):
        idx = findClosestCentroids(X, centroids)
        centroids = computeMeans(X, idx, K)
    return centroids
"""
def kMeansInitCentroids(X,K):
    centroids = initialize(X, K=3)
    idx = findClosestCentroids(X, centroids)
    centroids = computeMeans(X, idx, K)
    return centroids

def plot_data(X):
    plt.figure()
    plt.scatter(X[:,0],X[:,1],c='blue',marker='o')
    iterations = 10
    for i in range(iterations):
        now_centroids = kMeansInitCentroids(X, K=3)
        plt.plot(now_centroids[:,0],now_centroids[:,1],'->', linewidth = 5 )
    plt.show()
plot_data(X)

ex7: K-Means与PCA_第1张图片

二。K-means压缩图片

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.io import loadmat
import random
import cv2 as cv
from matplotlib import colors
from scipy import io as spio
import scipy.misc

img = cv.imread(r'bird_small.png')
img_data = img/255.0

#把图片数据变形成 Mx3(M 是图片中像素点的数目)的向量
X = img.reshape((-1,3))


def initialize(X,K=16):
    list = X.tolist()
    centroids = np.zeros(K)
    centroids = random.sample(list,K)
    return np.array(centroids)


def findClosestCentroids(X, centroids):
    m = X.shape[0]
    K = centroids.shape[0]
    d = np.zeros((m,K))
    idx = np.zeros(m)
    for i in range(K):
        for j in range(m):
            d[j,i] = np.linalg.norm(X[j,:] - centroids[i,:])**2
    for i in range(m):
            # 返回最小值的列索引
            idx[i] = np.argmin(d[i,:])
    return idx

def computeMeans(X, idx, K):
    centroids = np.zeros((K,X.shape[1]))
    for i in range(K):
        centroids[i, :] = np.mean(X[np.ravel(idx == i), :], axis=0).reshape(1, -1)
    return centroids

def kMeansInitCentroids(X,K):
    iterations = 10
    centroids = initialize(X, K=16)
    for inter in range(iterations):
        print(u'迭代计算次数:%d' % (inter + 1))
    idx = findClosestCentroids(X, centroids)
    centroids = computeMeans(X, idx, K)
    return centroids


def plot_data(X):
    plt.figure()
    plt.subplot(1, 2, 1), plt.imshow(cv.cvtColor(img,cv.COLOR_BGR2RGB)), plt.title('original')
    plt.xticks([]), plt.yticks([])
    centroids = kMeansInitCentroids(X,K=16)
    print('K-Means运行结束\n')
    print('压缩图片...\n')
    compress_img = np.zeros((X.shape[0],centroids.shape[1]))
    idx = findClosestCentroids(X, centroids)
    # 取每个像素对应的聚类中心,并将像素值替换为聚类中心的像素值
    for i in range(centroids.shape[0]):
        value = np.where(idx==i)[0]
        compress_img[value,:] = centroids[i,:]
    compress_img = compress_img.reshape((img_data.shape))
    #opencv显示的数组类型必须为uint8
    compress_img = compress_img.astype("uint8")
    plt.subplot(1, 2, 2), plt.imshow(cv.cvtColor(compress_img,cv.COLOR_BGR2RGB)), plt.title('Compress')
    plt.xticks([]), plt.yticks([])
    plt.show()

plot_data(X)

ex7: K-Means与PCA_第2张图片

三。图像PCA

import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt

data = loadmat('ex7data1.mat')
X = data['X']

def feature_normalize(X):
    m , n = X.shape
    for i in range(n):
        X[:,i] = ( X[:,i] - np.mean(X[:,i])) / np.std(X[:,i])
    return X


def PCA(X):
    X = feature_normalize(X)
    m = X.shape[0]
    Sigma =  ( (X.T).dot(X) )/m
    U,S,V = np.linalg.svd(Sigma)
    return U,S,V


def project_data(X,K):
    U, S, V = PCA(X)
    U_reduce = U[:, 0:K]
    Z = (X).dot(U_reduce)
    return Z

def recover_data(U,Z,K):
    U, S, V = PCA(X)
    U_reduce = U[:,0:K]
    return Z.dot(U_reduce.T)

def plot_data(X):
    plt.figure()
    plt.subplot(1, 3, 1),plt.scatter(X[:,0],X[:,1],c = 'blue' ,marker ='o'),plt.title('original')
    # 绘制原始图像
    U, S, V = PCA(X)
    Z = project_data(X,K=1)
    X_rec = recover_data(U, Z, K=1)
    # 绘制恢复后图像
    plt.subplot(1, 3, 2), plt.scatter(X_rec[:, 0], X_rec[:, 1], c='red', marker='+'),plt.title('from 1D to 2D')
    plt.xticks([-2,2]), plt.yticks([-2,2])
    # 可视化映射
    plt.subplot(1, 3, 3),plt.scatter(X[:, 0], X[:, 1], facecolors='none',c='blue', marker='o',s=20)
    plt.scatter(X_rec[:, 0], X_rec[:, 1],facecolors='none', c='red', marker='+',s=20)
    for i in range(X.shape[0]):
        plt.plot( (X[i,:][0], X_rec[i,:][0]),( X[i,:][1], X_rec[i,:][1]), 'k--')
    plt.title('draw line')

    plt.show()

plot_data(X)

ex7: K-Means与PCA_第3张图片

四。人脸

from scipy.io import loadmat
import numpy as np
import matplotlib.pyplot as plt

data = loadmat('ex7faces.mat')
X = data['X']

def visual(data):
    m, n = data.shape
    example_width = np.int(np.round(np.sqrt(n)))
    example_height = np.int((n / example_width))
    display_rows = np.int(np.floor(np.sqrt(m)))
    display_cols = np.int(np.ceil(m / display_rows))
    pad = 1
    display_array = - np.ones((pad + display_rows * (example_height + pad),
                               pad + display_cols * (example_width + pad)))

    curr_ex = 0
    for j in np.arange(display_rows):
        for i in np.arange(display_cols):
            if curr_ex > m:
                break
            max_val = np.max(np.abs(X[curr_ex, :]))
            display_array[pad + j * (example_height + pad) + np.arange(example_height),
                          pad + i * (example_width + pad) + np.arange(example_width)[:, np.newaxis]] = \
                data[curr_ex].reshape((example_height, example_width)) / max_val
            curr_ex = curr_ex + 1
        if curr_ex > m:
            break

    plt.figure()
    plt.imshow(display_array, cmap='gray', extent=[-1, 1, -1, 1])
    plt.axis('off')
    plt.show()

def feature_normalize(X):
    m , n = X.shape
    for i in range(n):
        X[:,i] = ( X[:,i] - np.mean(X[:,i])) / np.std(X[:,i])
    return X


def PCA(X):
    X = feature_normalize(X)
    m = X.shape[0]
    Sigma =  ( (X.T).dot(X) )/m
    U,S,V = np.linalg.svd(Sigma)
    return U,S,V


def project_data(X,K):
    U, S, V = PCA(X)
    U_reduce = U[:, 0:K]
    Z = (X).dot(U_reduce)
    return Z

def recover_data(U,Z,K):
    U, S, V = PCA(X)
    U_reduce = U[:,0:K]
    return Z.dot(U_reduce.T)

visual(X[0:100,:])
# 降维后的图像
U, S, V = PCA(X)
Z = project_data(X,K=100)
visual(Z[0:100,:])
# 恢复图像
X_rec = recover_data(U, Z, K=100)
visual(X_rec[0:100,:])

 ex7: K-Means与PCA_第4张图片

你可能感兴趣的:(吴恩达机器学习)