数据集如下:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
pd.set_option('display.unicode.ambiguous_as_wide', True)
pd.set_option('display.unicode.east_asian_width', True)
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
plt.rcParams['axes.unicode_minus'] = False
df = pd.read_excel("xyh.xlsx")
print(df)
n = len(df.columns)
R = df.corr()
print('相关系数矩阵为:')
print(R)
w, v = np.linalg.eig(R)
sorted_indices = np.argsort(w)
topk_evecs = v[:, sorted_indices[::-1]]
sorted_indices = sorted_indices[::-1]
w = w[sorted_indices]
print('特征值为{}\n特征向量为{}'.format(w, topk_evecs))
S = sum(w)
print("每个维度的方差贡献率")
print(w / S)
print("使用R得到的累积方差贡献率为:")
print(np.cumsum(w) / S)
print("-" * 100)
x = [i + 1 for i in range(n)]
print(x)
print(w)
plt.plot(x, w)
plt.title("碎石图")
plt.xlabel("主成分个数")
plt.ylabel("主成分特征值")
plt.grid()
plt.show()
PC1 = pd.DataFrame(topk_evecs[:, 0].reshape(1, n))
PC2 = pd.DataFrame(topk_evecs[:, 1].reshape(1, n))
PC1 = PC1 * math.sqrt(w[0])
PC2 = PC2 * math.sqrt(w[1])
pc = pd.concat([PC1, PC2])
pc.columns = df.columns
pc.index = [i + 1 for i in range(len(pc))]
Y = []
for i in range(len(pc.columns)):
Y.append(math.pow(pc.iloc[0, i], 2) + math.pow(pc.iloc[1, i], 2))
Y = np.array(Y).reshape(1, n)
Y = pd.DataFrame(Y)
Y.index = ["贡献率"]
Y.columns = pc.columns
pc = pd.concat([pc, Y])
print('因子载荷矩阵和贡献率如下:')
print(pc)