判别方法的比较:
#距离判别:按照方差是否相等比较x与总体均值的距离。
#Bayes判别:假定对研究对象已经有一定的认识,但这种认识常用先验概率来描述,取得样本后,就可以用样本修正已有的先验概率,得到后验概率。
#Fisher判别:按照类内方差尽可能小,类间方差尽可能大来求判别函数。
#距离判别:
#二分类问题:
discriminiant.distance <- function
(TrnX1, TrnX2, TstX = NULL, var.equal = FALSE){
if (is.null(TstX) == TRUE) TstX <- rbind(TrnX1,TrnX2)
if (is.vector(TstX) == TRUE) TstX <- t(as.matrix(TstX))
else if (is.matrix(TstX) != TRUE)
TstX <- as.matrix(TstX)
if (is.matrix(TrnX1) != TRUE) TrnX1 <- as.matrix(TrnX1)
if (is.matrix(TrnX2) != TRUE) TrnX2 <- as.matrix(TrnX2)
nx <- nrow(TstX)
blong <- matrix(rep(0, nx), nrow=1, byrow=TRUE,
dimnames=list("blong", 1:nx))
mu1 <- colMeans(TrnX1); mu2 <- colMeans(TrnX2)
if (var.equal == TRUE || var.equal == T){
S <- var(rbind(TrnX1,TrnX2))
w <- mahalanobis(TstX, mu2, S)- mahalanobis(TstX, mu1, S)
}
else{
S1 <-var(TrnX1); S2 <- var(TrnX2)
w <- mahalanobis(TstX, mu2, S2)- mahalanobis(TstX, mu1, S1)
}
for (i in 1:nx){
if (w[i] > 0)
blong[i] <- 1
else
blong[i] <- 2
}
blong
}
#多分类问题:
distinguish.distance <- function
(TrnX, TrnG, TstX = NULL, var.equal = FALSE){
if ( is.factor(TrnG) == FALSE){
mx <- nrow(TrnX); mg <- nrow(TrnG)
TrnX <- rbind(TrnX, TrnG)
TrnG <- factor(rep(1:2, c(mx, mg)))
}
if (is.null(TstX) == TRUE) TstX <- TrnX
if (is.vector(TstX) == TRUE) TstX <- t(as.matrix(TstX))
else if (is.matrix(TstX) != TRUE)
TstX <- as.matrix(TstX)
if (is.matrix(TrnX) != TRUE) TrnX <- as.matrix(TrnX)
nx <- nrow(TstX)
blong <- matrix(rep(0, nx), nrow=1,
dimnames=list("blong", 1:nx))
g <- length(levels(TrnG))
mu <- matrix(0, nrow=g, ncol=ncol(TrnX))
for (i in 1:g)
mu[i,] <- colMeans(TrnX[TrnG==i,])
D < -matrix(0, nrow=g, ncol=nx)
if (var.equal == TRUE || var.equal == T){
for (i in 1:g)
D[i,] <- mahalanobis(TstX, mu[i,], var(TrnX))
}
else{
for (i in 1:g)
D[i,] <- mahalanobis(TstX, mu[i,], var(TrnX[TrnG==i,]))
}
for (j in 1:nx){
dmin <- Inf
for (i in 1:g)
if (D[i,j] < dmin){
dmin <- D[i,j]; blong[j] <- i
}
}
blong
}
#Bayes判别:(多分类的代码见薛毅的R统计建模与R软件)
discriminiant.bayes <- function
(TrnX1, TrnX2, rate = 1, TstX = NULL, var.equal = FALSE){
if (is.null(TstX) == TRUE) TstX<-rbind(TrnX1,TrnX2)
if (is.vector(TstX) == TRUE) TstX <- t(as.matrix(TstX))
else if (is.matrix(TstX) != TRUE)
TstX <- as.matrix(TstX)
if (is.matrix(TrnX1) != TRUE) TrnX1 <- as.matrix(TrnX1)
if (is.matrix(TrnX2) != TRUE) TrnX2 <- as.matrix(TrnX2)
nx <- nrow(TstX)
blong <- matrix(rep(0, nx), nrow=1, byrow=TRUE,
dimnames=list("blong", 1:nx))
mu1 <- colMeans(TrnX1); mu2 <- colMeans(TrnX2)
if (var.equal == TRUE || var.equal == T){
S <- var(rbind(TrnX1,TrnX2)); beta <- 2*log(rate)
w <- mahalanobis(TstX, mu2, S)- mahalanobis(TstX, mu1, S)
}
else{
S1 <- var(TrnX1); S2 <- var(TrnX2)
beta <- 2*log(rate) + log(det(S1)/det(S2))
w <- mahalanobis(TstX, mu2, S2)- mahalanobis(TstX, mu1, S2)
}
for (i in 1:nx){
if (w[i] > beta)
blong[i] <- 1
else
blong[i] <- 2
}
blong
}
#Fisher判别:
discriminiant.fisher <- function(TrnX1, TrnX2, TstX = NULL){
if (is.null(TstX) == TRUE) TstX <- rbind(TrnX1,TrnX2)
if (is.vector(TstX) == TRUE) TstX <- t(as.matrix(TstX))
else if (is.matrix(TstX) != TRUE)
TstX <- as.matrix(TstX)
if (is.matrix(TrnX1) != TRUE) TrnX1 <- as.matrix(TrnX1)
if (is.matrix(TrnX2) != TRUE) TrnX2 <- as.matrix(TrnX2)
nx <- nrow(TstX)
blong <- matrix(rep(0, nx), nrow=1, byrow=TRUE,
dimnames=list("blong", 1:nx))
n1 <- nrow(TrnX1); n2 <- nrow(TrnX2)
mu1 <- colMeans(TrnX1); mu2 <- colMeans(TrnX2)
S <- (n1-1)*var(TrnX1) + (n2-1)*var(TrnX2)
mu <- n1/(n1+n2)*mu1 + n2/(n1+n2)*mu2
w <- (TstX-rep(1,nx) %o% mu) %*% solve(S, mu2-mu1);
for (i in 1:nx){
if (w[i] <= 0)
blong[i] <- 1
else
blong[i] <- 2
}
blong
}