x=matrix(rnorm(1000,0),nrow=20)
y=matrix(rnorm(1000,10),nrow=20)
z=matrix(rnorm(1000,20),nrow=20)
d=rbind(x,y,z)
plot(d,main="raw data")
d_pca=prcomp(d)$x
plot(d_pca[,1:2], col=c(rep(1,20), rep(2,20), rep(3,20)))
res = kmeans(d, centers = 3)
true_class = c(rep(1,20), rep(2,20), rep(3,20))
table(res$cluster, true_class)
res=kmeans(d[,1:2], center=3)
#The first two principal component
true_class= c(rep(1,20),rep(2,20), rep(3,20))
table(res$cluster, true_class)
#Look at the total within sum of squares of the clusters for varying number of k’s.
#Which is the best number of k?
withss = rep(NA,20)
for (k in 1:length(withss)) {
withss[k] = sum(kmeans(d,k)$withinss)
}
plot(withss)
https://rpubs.com/ppaquay/65561
alpha=c()
for (i in 1:100){
mu1=c(0,0)
sigma1=matrix(c(1,0,0.5,0.5,1.25),nrow=2)
rand1=mvrnorm(n=100,mu=mu1,Sigma=sigma1)
x=rand1[,1]
y=rand1[,2]
alpha[i]=(var(y)-cov(x,y))/(var(x)+var(y)-2*cov(x,y))
}
for (j in 1:100)
{
ran=rand1[sample(c(1:100),100,replace=TRUE),]
#此处c(1:100)指的是要抽取100个数,后一个100指的是总共要抽100个数
x=ran[,1]
y=ran[,2]
alpha[j]=(var(y)-cov(x,y))/(var(x)+var(y)-2*cov(x,y))
}
#rand1用来储存多元正态分布新的观测值(满足分布)
#ran是将rand1中的100个数,随机有放回的抽取,形成一组新的response