下载地址 :
http://yann.lecun.com/exdb/mnist/
library("factoextra")
#image
load_image_file = function(file) {
f = file(file, 'rb')
readBin(f, 'integer', n = 1, size = 4,
endian = 'big')
n = readBin(f, 'integer', n = 1, size = 4,
endian = 'big')
nrow = readBin(f, 'integer', n = 1, size = 4,
endian = 'big')
ncol = readBin(f, 'integer', n = 1, size = 4,
endian = 'big')
x = readBin(f, 'integer', n = n * nrow * ncol, size = 1, signed = FALSE)
close(f)
return(data.frame(matrix(x, ncol = nrow * ncol, byrow = TRUE)))
}
#label
load_label_file = function(file) {
f = file(file, 'rb')
readBin(f, 'integer', n = 1, size = 4,
endian = 'big')
n = readBin(f, 'integer', n = 1, size = 4,
endian = 'big')
y = readBin(f, 'integer', n = n, size = 1,
signed = FALSE)
close(f)
return(y)
}
# 读数据
train = load_image_file("train-images.idx3-ubyte")
# 读标签
train$y = as.factor(load_label_file("train-labels.idx1-ubyte"))
write.csv(train,"train.csv",row.names = F)#output csv
#pca
pca <- prcomp(train[,-785])
summary(pca)
fviz_screeplot(pca, ncp=10)#主成分方差占比
jpeg("biplot1.jpg")#图像保存本地
print(fviz_pca_biplot(pca, label ="var"))
dev.off()
jpeg("biplot.jpg")
biplot(pca)
dev.off()
jpeg("individual.jpg")
fviz_pca_ind(pca, label="none", habillage=train$y,
addEllipses=TRUE, ellipse.level=0.95)
dev.off()