读取文本数据,并计算特征向量

numpy.linalg.eig() 计算矩阵特征向量

import glob
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
from numpy import linalg as LA

fcf = glob.glob('data/*.txt')

#get the feature
file_feature = open('data/feature.txt', 'r')
Feature = file_feature.read().split()
file_feature.close()

total_files = len(fcf)-1
total_keywords = len(Feature)
Xtrain = np.zeros([40,total_keywords]) 
for i in range(40):
	file = open(fcf[i], 'r',errors = "replace")
	test = file.read()
	file.close()
	for j in range(total_keywords):
		#cout the number of ketword
		occurence = test.count(Feature[j])
		Xtrain[i][j] = occurence

Xtest = np.zeros([10,total_keywords])
for i in range(41, 51):
	file = open(fcf[i], 'r',errors = "replace")
	test = file.read()
	file.close()
	for j in range(total_keywords):
		#cout the number of ketword
		occurence = test.count(Feature[j])
		Xtest[i-41][j] = occurence


meanXtrain = np.mean(Xtrain,axis = 0)
Ctrx = Xtrain - meanXtrain
S1 = np.dot(np.transpose(Ctrx),Ctrx)
Cxx = 1.0/(Xtrain.shape[0]-1)*S1
W, V = LA.eig(Cxx)#W-特征值, V-特征向量
print(V)

你可能感兴趣的:(读取文本数据,并计算特征向量)