无标题文章

x=matrix(rnorm(1000,0),nrow=20)

y=matrix(rnorm(1000,10),nrow=20)

z=matrix(rnorm(1000,20),nrow=20)

d=rbind(x,y,z)

plot(d,main="raw data")

d_pca=prcomp(d)$x

plot(d_pca[,1:2], col=c(rep(1,20), rep(2,20), rep(3,20)))

res = kmeans(d, centers = 3)

true_class = c(rep(1,20), rep(2,20), rep(3,20))

table(res$cluster, true_class)

res=kmeans(d[,1:2], center=3)

#The first two principal component

true_class= c(rep(1,20),rep(2,20), rep(3,20))

table(res$cluster, true_class)

#Look at the total within sum of squares of the clusters for varying number of k’s.

#Which is the best number of k?

withss = rep(NA,20)

for (k in 1:length(withss)) {

  withss[k] = sum(kmeans(d,k)$withinss)

  }

plot(withss)

https://rpubs.com/ppaquay/65561

alpha=c()

for (i in 1:100){

mu1=c(0,0)

sigma1=matrix(c(1,0,0.5,0.5,1.25),nrow=2)

rand1=mvrnorm(n=100,mu=mu1,Sigma=sigma1)

x=rand1[,1]

y=rand1[,2]

alpha[i]=(var(y)-cov(x,y))/(var(x)+var(y)-2*cov(x,y))

}


for (j in 1:100)

{

ran=rand1[sample(c(1:100),100,replace=TRUE),]

#此处c(1:100)指的是要抽取100个数,后一个100指的是总共要抽100个数

x=ran[,1]

y=ran[,2]

alpha[j]=(var(y)-cov(x,y))/(var(x)+var(y)-2*cov(x,y))

}

#rand1用来储存多元正态分布新的观测值(满足分布)

#ran是将rand1中的100个数,随机有放回的抽取,形成一组新的response

你可能感兴趣的:(无标题文章)