作为一个开源软件,R的一个非常大的优点就是我们可以随意查看所有算法的源代码,在对这些源代码进行分析的过程中不仅可以加深对算法的认识,而且可以大步提高对R语言的掌握程度。如果可以也能根据自己的需求,对算法进行改进。如下是函数源代码的查看的方法:
1. 最直接的方法当然是直接键入函数,大部分函数源代码就可以直接显现出来
eg:
> fivenum
function (x, na.rm = TRUE)
{
xna <- is.na(x)
if (any(xna)) {
if (na.rm)
x <- x[!xna]
else return(rep.int(NA, 5))
}
x <- sort(x)
n <- length(x)
if (n == 0)
rep.int(NA, 5)
else {
n4 <- floor((n + 3)/2)/2
d <- c(1, n4, (n + 1)/2, n + 1 - n4, n)
0.5 * (x[floor(d)] + x[ceiling(d)])
}
}
2. 有些函数直接键入出不来源代码,主要是因为R是面向对象设计的程序语言,不同的对象,计算方式也不同,所以要通过methods()来进一步定义具体的查看对象:
eg: 键入 mean函数 只会出现如下结果:
> mean
function (x, ...)
UseMethod("mean")
则需要用methods()来进一步查看
> methods(mean)
[1] mean.Date mean.default mean.difftime mean.POSIXct mean.POSIXlt
see '?methods' for accessing help and source code
此时mean包含多个函数,任选取一个函数作为查看
> mean.default
function (x, trim = 0, na.rm = FALSE, ...)
{
if (!is.numeric(x) && !is.complex(x) && !is.logical(x)) {
warning("argument is not numeric or logical: returning NA")
return(NA_real_)
}
if (na.rm)
x <- x[!is.na(x)]
if (!is.numeric(trim) || length(trim) != 1L)
stop("'trim' must be numeric of length one")
n <- length(x)
if (trim > 0 && n) {
if (is.complex(x))
stop("trimmed means are not defined for complex data")
if (anyNA(x))
return(NA_real_)
if (trim >= 0.5)
return(stats::median(x, na.rm = FALSE))
lo <- floor(n * trim) + 1
hi <- n + 1 - lo
x <- sort.int(x, partial = unique(c(lo, hi)))[lo:hi]
}
.Internal(mean(x))
}
3. 在函数包中的函数,则需要选调函数包,然后 再直接键入函数名。
eg:查看聚类中的DBSCAN算法
> library(fpc)
> dbscan
function (data, eps, MinPts = 5, scale = FALSE, method = c("hybrid",
"raw", "dist"), seeds = TRUE, showplot = FALSE, countmode = NULL)
{
distcomb <- function(x, data) {
data <- t(data)
temp <- apply(x, 1, function(x) {
sqrt(colSums((data - x)^2))
})
if (is.null(dim(temp)))
matrix(temp, nrow(x), ncol(data))
else t(temp)
}
method <- match.arg(method)
data <- as.matrix(data)
n <- nrow(data)
if (scale)
data <- scale(data)
classn <- cv <- integer(n)
isseed <- logical(n)
cn <- integer(1)
for (i in 1:n) {
if (i %in% countmode)
cat("Processing point ", i, " of ", n, ".\n")
unclass <- (1:n)[cv < 1]
if (cv[i] == 0) {
if (method == "dist") {
reachables <- unclass[data[i, unclass] <= eps]
}
else {
reachables <- unclass[as.vector(distcomb(data[i,
, drop = FALSE], data[unclass, , drop = FALSE])) <=
eps]
}
if (length(reachables) + classn[i] < MinPts)
cv[i] <- (-1)
else {
cn <- cn + 1
cv[i] <- cn
isseed[i] <- TRUE
reachables <- setdiff(reachables, i)
unclass <- setdiff(unclass, i)
classn[reachables] <- classn[reachables] + 1
while (length(reachables)) {
if (showplot)
plot(data, col = 1 + cv, pch = 1 + isseed)
cv[reachables] <- cn
ap <- reachables
reachables <- integer()
if (method == "hybrid") {
tempdist <- distcomb(data[ap, , drop = FALSE],
data[unclass, , drop = FALSE])
frozen.unclass <- unclass
}
for (i2 in seq(along = ap)) {
j <- ap[i2]
if (showplot > 1)
plot(data, col = 1 + cv, pch = 1 + isseed)
if (method == "dist") {
jreachables <- unclass[data[j, unclass] <=
eps]
}
else if (method == "hybrid") {
jreachables <- unclass[tempdist[i2, match(unclass,
frozen.unclass)] <= eps]
}
else {
jreachables <- unclass[as.vector(distcomb(data[j,
, drop = FALSE], data[unclass, , drop = FALSE])) <=
eps]
}
if (length(jreachables) + classn[j] >= MinPts) {
isseed[j] <- TRUE
cv[jreachables[cv[jreachables] < 0]] <- cn
reachables <- union(reachables, jreachables[cv[jreachables] ==
0])
}
classn[jreachables] <- classn[jreachables] +
1
unclass <- setdiff(unclass, j)
}
}
}
}
if (!length(unclass))
break
}
rm(classn)
if (any(cv == (-1))) {
cv[cv == (-1)] <- 0
}
if (showplot)
plot(data, col = 1 + cv, pch = 1 + isseed)
out <- list(cluster = cv, eps = eps, MinPts = MinPts)
if (seeds && cn > 0) {
out$isseed <- isseed
}
class(out) <- "dbscan"
out
}