t-SNE.py
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.manifold import TSNE
PATH = 'D:/Code/test1/Vector/'
# PATH = 'D:/Code/Vector_mean/'
vector = []
my_label = []
# 加载数据
def get_data():
"""
:return: 数据集、标签、样本数量、特征数量
"""
digits = datasets.load_digits(n_class=10)
data = digits.data # 图片特征
label = digits.target # 图片标签
n_samples, n_features = data.shape # 数据集的形状
return data, label, n_samples, n_features
def get_mydata():
class_name = ['CatVector.txt', 'DogVector.txt']
class_label = ['Cat', 'Dog']
for i in range(len(class_name)):
vector = np.loadtxt(PATH + class_name[i])
# vector = np.expand_dims(vector, axis = 0)
for j in range(vector.shape[0]):
my_label.append(class_label[i])
if i == 0:
allvector = vector
else:
allvector = np.concatenate((allvector, vector), axis=0)
print(allvector.shape)
print(np.array(my_label).shape)
return allvector, np.array(my_label)
# 对样本进行预处理并画图
def plot_embedding(data, label, title):
"""
:param data:数据集
:param label:样本标签
:param title:图像标题
:return:图像
"""
x_min, x_max = np.min(data, 0), np.max(data, 0)
data = (data - x_min) / (x_max - x_min) # 对数据进行归一化处理
fig = plt.figure() # 创建图形实例
ax = plt.subplot(111) # 创建子图
# 遍历所有样本
for i in range(data.shape[0]):
# 在图中为每个数据点画出标签
if label[i] == 'Cat':
# colorplt = plt.cm.Set1(0.82)
colorplt = 'brown'
print(1)
elif label[i] == 'Dog':
# colorplt = plt.cm.Set1(1.64)
colorplt = 'red'
print(2)
# plt.text(data[i, 0], data[i, 1], str(label[i]), color=plt.cm.Set1(label[i] / 10),
# fontdict={'weight': 'bold', 'size': 7})
# plt.text(data[i, 0], data[i, 1], str(label[i]), color=colorplt,
# fontdict={'weight': 'bold', 'size': 7})
plt.scatter(data[i, 0], data[i, 1], color=colorplt, marker='o', s=50)
plt.xticks() # 指定坐标的刻度
plt.yticks()
plt.title(title, fontsize=14)
# 返回值
return fig
# 主函数,执行t-SNE降维
def main():
data, label , n_samples, n_features = get_data() # 调用函数,获取数据集信息
my_allvector, my_alllabel = get_mydata()
print('Starting compute t-SNE Embedding...')
# print(label.shape)
ts = TSNE(n_components=2, init='pca', random_state=0)
# t-SNE降维
reslut = ts.fit_transform(my_allvector)
# 调用函数,绘制图像
fig = plot_embedding(reslut, my_alllabel, 't-SNE Embedding of digits')
# 显示图像
plt.show()
# 主函数
if __name__ == '__main__':
main
get_data()
加载MINIST数据集,来源网络
def get_data():
"""
:return: 数据集、标签、样本数量、特征数量
"""
digits = datasets.load_digits(n_class=10)
data = digits.data # 图片特征
label = digits.target # 图片标签
n_samples, n_features = data.shape # 数据集的形状
return data, label, n_samples, n_features
get_mydata()
加载自己的数据集
class_name
为保存每类数据的文件名,文件中每一行代表一个样本
class_label
为标签
def get_mydata():
class_name = ['CatVector.txt', 'DogVector.txt']
class_label = ['Cat', 'Dog']
for i in range(len(class_name)):
vector = np.loadtxt(PATH + class_name[i])
# vector = np.expand_dims(vector, axis = 0)
for j in range(vector.shape[0]):
my_label.append(class_label[i])
if i == 0:
allvector = vector
else:
allvector = np.concatenate((allvector, vector), axis=0)
print(allvector.shape)
print(np.array(my_label).shape)
return allvector, np.array(my_label)
TSNE()
参数这个博主整理的很好
链接:link
matplotlib.pyplot.scatter()
中颜色问题颜色对照表:
链接:link