import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.io import loadmat
import random
data = loadmat('ex7data2')
X = data['X']
def initialize(X,K=3):
list = X.tolist()
centroids = np.zeros(K)
centroids = random.sample(list,K)
return np.array(centroids)
def findClosestCentroids(X, centroids):
d = np.zeros((X.shape[0],centroids.shape[0]))
idx = np.zeros((X.shape[0],1))
for i in range(centroids.shape[0]):
for j in range(X.shape[0]):
d[j,i] = np.linalg.norm(X[j,:] - centroids[i,:])**2
for i in range(d.shape[0]):
idx[i] = np.where( d[i,:] == np.min(d[i,:]))
return idx
def computeMeans(X, idx, K):
centroids = np.zeros((K,X.shape[1]))
for i in range(K):
centroids[i, :] = np.mean(X[np.ravel(idx == i), :], axis=0).reshape(1, -1)
return centroids
"""
def kMeansInitCentroids(X,K):
iterations = 10
centroids = initialize(X, K=3)
for inter in range(iterations):
idx = findClosestCentroids(X, centroids)
centroids = computeMeans(X, idx, K)
return centroids
"""
def kMeansInitCentroids(X,K):
centroids = initialize(X, K=3)
idx = findClosestCentroids(X, centroids)
centroids = computeMeans(X, idx, K)
return centroids
def plot_data(X):
plt.figure()
plt.scatter(X[:,0],X[:,1],c='blue',marker='o')
iterations = 10
for i in range(iterations):
now_centroids = kMeansInitCentroids(X, K=3)
plt.plot(now_centroids[:,0],now_centroids[:,1],'->', linewidth = 5 )
plt.show()
plot_data(X)
二。K-means压缩图片
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.io import loadmat
import random
import cv2 as cv
from matplotlib import colors
from scipy import io as spio
import scipy.misc
img = cv.imread(r'bird_small.png')
img_data = img/255.0
#把图片数据变形成 Mx3(M 是图片中像素点的数目)的向量
X = img.reshape((-1,3))
def initialize(X,K=16):
list = X.tolist()
centroids = np.zeros(K)
centroids = random.sample(list,K)
return np.array(centroids)
def findClosestCentroids(X, centroids):
m = X.shape[0]
K = centroids.shape[0]
d = np.zeros((m,K))
idx = np.zeros(m)
for i in range(K):
for j in range(m):
d[j,i] = np.linalg.norm(X[j,:] - centroids[i,:])**2
for i in range(m):
# 返回最小值的列索引
idx[i] = np.argmin(d[i,:])
return idx
def computeMeans(X, idx, K):
centroids = np.zeros((K,X.shape[1]))
for i in range(K):
centroids[i, :] = np.mean(X[np.ravel(idx == i), :], axis=0).reshape(1, -1)
return centroids
def kMeansInitCentroids(X,K):
iterations = 10
centroids = initialize(X, K=16)
for inter in range(iterations):
print(u'迭代计算次数:%d' % (inter + 1))
idx = findClosestCentroids(X, centroids)
centroids = computeMeans(X, idx, K)
return centroids
def plot_data(X):
plt.figure()
plt.subplot(1, 2, 1), plt.imshow(cv.cvtColor(img,cv.COLOR_BGR2RGB)), plt.title('original')
plt.xticks([]), plt.yticks([])
centroids = kMeansInitCentroids(X,K=16)
print('K-Means运行结束\n')
print('压缩图片...\n')
compress_img = np.zeros((X.shape[0],centroids.shape[1]))
idx = findClosestCentroids(X, centroids)
# 取每个像素对应的聚类中心,并将像素值替换为聚类中心的像素值
for i in range(centroids.shape[0]):
value = np.where(idx==i)[0]
compress_img[value,:] = centroids[i,:]
compress_img = compress_img.reshape((img_data.shape))
#opencv显示的数组类型必须为uint8
compress_img = compress_img.astype("uint8")
plt.subplot(1, 2, 2), plt.imshow(cv.cvtColor(compress_img,cv.COLOR_BGR2RGB)), plt.title('Compress')
plt.xticks([]), plt.yticks([])
plt.show()
plot_data(X)
三。图像PCA
import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt
data = loadmat('ex7data1.mat')
X = data['X']
def feature_normalize(X):
m , n = X.shape
for i in range(n):
X[:,i] = ( X[:,i] - np.mean(X[:,i])) / np.std(X[:,i])
return X
def PCA(X):
X = feature_normalize(X)
m = X.shape[0]
Sigma = ( (X.T).dot(X) )/m
U,S,V = np.linalg.svd(Sigma)
return U,S,V
def project_data(X,K):
U, S, V = PCA(X)
U_reduce = U[:, 0:K]
Z = (X).dot(U_reduce)
return Z
def recover_data(U,Z,K):
U, S, V = PCA(X)
U_reduce = U[:,0:K]
return Z.dot(U_reduce.T)
def plot_data(X):
plt.figure()
plt.subplot(1, 3, 1),plt.scatter(X[:,0],X[:,1],c = 'blue' ,marker ='o'),plt.title('original')
# 绘制原始图像
U, S, V = PCA(X)
Z = project_data(X,K=1)
X_rec = recover_data(U, Z, K=1)
# 绘制恢复后图像
plt.subplot(1, 3, 2), plt.scatter(X_rec[:, 0], X_rec[:, 1], c='red', marker='+'),plt.title('from 1D to 2D')
plt.xticks([-2,2]), plt.yticks([-2,2])
# 可视化映射
plt.subplot(1, 3, 3),plt.scatter(X[:, 0], X[:, 1], facecolors='none',c='blue', marker='o',s=20)
plt.scatter(X_rec[:, 0], X_rec[:, 1],facecolors='none', c='red', marker='+',s=20)
for i in range(X.shape[0]):
plt.plot( (X[i,:][0], X_rec[i,:][0]),( X[i,:][1], X_rec[i,:][1]), 'k--')
plt.title('draw line')
plt.show()
plot_data(X)
四。人脸
from scipy.io import loadmat
import numpy as np
import matplotlib.pyplot as plt
data = loadmat('ex7faces.mat')
X = data['X']
def visual(data):
m, n = data.shape
example_width = np.int(np.round(np.sqrt(n)))
example_height = np.int((n / example_width))
display_rows = np.int(np.floor(np.sqrt(m)))
display_cols = np.int(np.ceil(m / display_rows))
pad = 1
display_array = - np.ones((pad + display_rows * (example_height + pad),
pad + display_cols * (example_width + pad)))
curr_ex = 0
for j in np.arange(display_rows):
for i in np.arange(display_cols):
if curr_ex > m:
break
max_val = np.max(np.abs(X[curr_ex, :]))
display_array[pad + j * (example_height + pad) + np.arange(example_height),
pad + i * (example_width + pad) + np.arange(example_width)[:, np.newaxis]] = \
data[curr_ex].reshape((example_height, example_width)) / max_val
curr_ex = curr_ex + 1
if curr_ex > m:
break
plt.figure()
plt.imshow(display_array, cmap='gray', extent=[-1, 1, -1, 1])
plt.axis('off')
plt.show()
def feature_normalize(X):
m , n = X.shape
for i in range(n):
X[:,i] = ( X[:,i] - np.mean(X[:,i])) / np.std(X[:,i])
return X
def PCA(X):
X = feature_normalize(X)
m = X.shape[0]
Sigma = ( (X.T).dot(X) )/m
U,S,V = np.linalg.svd(Sigma)
return U,S,V
def project_data(X,K):
U, S, V = PCA(X)
U_reduce = U[:, 0:K]
Z = (X).dot(U_reduce)
return Z
def recover_data(U,Z,K):
U, S, V = PCA(X)
U_reduce = U[:,0:K]
return Z.dot(U_reduce.T)
visual(X[0:100,:])
# 降维后的图像
U, S, V = PCA(X)
Z = project_data(X,K=100)
visual(Z[0:100,:])
# 恢复图像
X_rec = recover_data(U, Z, K=100)
visual(X_rec[0:100,:])