1 quick begineer
source("z.R")
pdf("xh.pdf")
hist(rnorm(100))
dev.off()
$R CMD BATCH z.R #shell NO window
data()
oddcount<-function(x){
k<-0
for (n in x) {
if(n%%2==1) k<-k+1
}
return(k) ## 如果没有词句,则返回最后运行的一句
}
mode(z)
u<-paste("abc","de","f")
v<-strsplit(u,"")
m<-rbind(c(1,4),c(2,2))
x<-list(u=2,v="abc")
str(hn)
d<-data.frame(list(kids=c("Jack","Jill"),ages=c(12,10)))
examquiz<-read.table("ExamsQuiz.txt",header=FALSE)
class(examquiz)
head(examquiz)
lma<-lm(examquiz[,2]~examquiz[,1]
lma<-lm(examquiz$V2~examquiz$V1)
attribute(lma)
lma$coef
option(editor="/usr/bin/vim")
getwd()
setwd()
help()
?"for"
example(seq)
help.search("multivariate normal")
help(package=MASS)
R CMD INSTALL --help ## No Window
# 向量
first1<-function(x){
for(i in length(x)){ # 注意当x为NULL时,已经运行了一次, bug;
if((x[i]==1) break
}
return (i)
}
y<-vector(length=2)
y[1]<-5
y[2]<-12
c(1,2,4)+c(6,0,9,20,22) ##循环补齐
"+"(2,3)
x<-c(4,2,17,5)
y<-x[c(1,1,3)] ##索引是允许重复的
1:i-1 # this means(1:i)-1 # 冒号的优先级高于减号
seq(from=12,to=30,by=3)
seq(from 1.1, to 2, length=10)
for( i in seq(x)) # 解决了 注意当x为NULL时,已经运行了一次, bug;
rep(c(5,12,3),3)
rep(c(5,12,3),each=2)
all(x>8)
any(x>8)
findruns<-function(x,k){
n<-length(x)
runs<-NULL
for(i in 1:(n-k+1)){
if (all(x[i:(i+k-1)]==1)) runs<-c(runs,i)
}
}
改进, 预先分配内存
findruns<-function(x,k){
n<-length(x)
runs<-vector(length(n))
count<-0
for(i in 1:(n-k+1)){
if (all(x[i:(i+k-1)]==1)) {
count<-count+1
runs[count]<-i
}
}
if(count>0){
runs<-runs[1:count]
} else runs<-NULL
return (runs)
}
向量
z<-c(5,2,-3,8)
z[z*z>8]
y<-1:10
y<-ifelse(y%%2==0,5,12)
x<-c(5,2,9,12)
ifelse(x>6,2*x,3*x)
findud<-function(v){
vud-v[-1]-v[length(v)]
return(ifelse(vud>0,1,-1))
}
udcorr<-function(x,y){
ud<-lappy(list(x,y),findud)
return(mean(ud[[1]]==ud[[2]]))
}
sign(c(-1,-2,-4,1,3,0) # 转化为1,-1,0
grps<-list() #
for(gen in c("M","F","I")) grps[[gen]]<-which(g==gen)
identical(x,y)
typeof(y)
names(r)<-c("a","b","c","d")
rownames(t)[2]<-c("j")
矩阵
y[row(y)==col(y)]
对矩阵的行和列调用函数
apply(y,2,mean)
colMeans(y)
f<-function(x) x/c(2,8)
y<-apply(z,2,f) #c(2,8) 注意循环补齐
findols<-function(x){
findol<-function(xrow){
mdn<-median(xrow)
devs<-abs(xrow-mdn)
return(which.max(devs))
}
return(apply(x,1,findol))
} # 注意嵌套
矩阵插入 cbind,rbind, 循环补齐
z[,2, drop=FALSE] drop 避免降维
"["(z,,2)
dimnames(f)<-list(c("a","b","c"),c("d","e"),c("f","g")) 数组名字
z<-vector(mode="list")
z$b
unlist(z)
as.list(z) 相互转换, list(z) 转化成另外的形式
findwords<-function(tf){
txt<-scan(tf,"")
wl<-list()
for (i in 1:length(txt)){
wrd<-txt[i]
wl[[wrd]]<-c(wl[[wrd]],i)
}
return(wl)
}
lapply # list apply
lapply(f,median)
sapply # simplifed apply 结果转化为矩阵或向量, 可直接输出矩阵
freqwl<-function(wrdlist){
freqs<-sapply(wrdlist,length)
return(wrdlst[order(freqs)])
}
c(list(a=1,b=2,c=list(d=4,e=9))) 列表递归
c(list(a=1,b=2,c=list(d=4,e=9)),recursive=T)
df5<-complete.cases(d4) ###去掉NA
assign(x,y,env 设置全局变量赋值
makecorp<-function(corpname){
t<-all2006[all2006$Employer_Name==corpname,]
return (t)
}
f<-data.frame(a=c(1,2,3),b=c(4,5,6))
z<-data.frame(a=c(1,3,4),r=c(5,6,7))
merge(f,z)
merge(f,z, all=T) # 数据框合并 SQl join 合并?
all(count.fields("DA",sep='')>5)
table(count.fields("DA,sep=''))
aba<-read.csv("abaline.data",header=T)
lftn<-function(clmn){
glm(aba$Gender~clmn,family=binomial)$coef
}
loall<-sapply(aba[,-1],lftn) # sapply是lapply的友好简洁版本,使用列
merge2fy<-function(fy1,fy2){
outdf<-merge(fy1,fy2)
for(fy in list(fy1,fy2)){
saplout<-sapply((fy[[2]]),sepsoundtone)
tmpdf<-data.frame(fy[,1],t(saplout),row.names=NULL,stringsAsFactors =F)
consname<-paste(names(fy)[[2]],"cons" sep="")
restname<-paste(names(fy)[[2]],"sound" sep="")
tonename<-paste(names(fy)[[2]],"tone"sep="")
names(tmpdf)<-c("Ch Char",consname,restname,tonename)
outdf<-merge(outdf,tmpdf)
}
return (outdf)
}
sepsoundtone<-function(pronun){
nchr<-nchar(pronun)
vowels<-c("a","e","i","o","u")
numcons<-0
for(i in 1:nchr) {
ltr<-substr(pronun,i,i)
if(!ltr %in% vowels) numcons<-numcons+1 else break
}
cons<-if(numcons>0) substr(pronun,1,numcons) else NA
tone<-substr(pronun,nchr,nchr)
numtones<-tone %in% letters
if(numtones==1) tone<-NA
therest<-substr(pronun,numcons+1,nchr-numtones)
return(c(cons,therest,tone))
}
split,分割
unsplit, 还原分割的数据,
因子和表
x<-c(5,12,13,12)
unclass(xf) # 已经把数值转化为因子了, 使用因子值参与计算
xff<-factor(x,levels=c(5,12,13,88)) 水平和值对应
xff[2]<-88 修改的是值, 如果水平不存在,显示插入非法水平
tapply(x,f,g) f 因子, tapply 用于因子函数 x不可以是数据框
ages<-c(25,26,55,37,21,42)
affils<-c("R","D","D","R","U","D")
tapply(ages,affils,mean)
d<-data.frame(list(gender=c("M","M","F","M","F","F"),age=c(47,59,21,32,33,24),income=c(55000,88000,32450,76500,12300,45650)))
d$over25<-ifelse(d$age>25,1,0)
tapply(d$income,list(d$gender,d$over25),mean)
split(d$income,list(d$gender,d$over25))
findwords<-function(tf){
txt<-scan(tf,"")
words<-split(1:length(txt),txt)
return(words)
}
by函数,应用的对象不止向量,而tapply只能向量
aba<-read.csv("alaone.data",header=T)
by(aba,aba$Gender,function(m) lm(m[,2]~m[,3]))
u<-c(22,8,33,6,8,29,-2)
fl<-list(c(5,12,13,12,13,5,13),c("a","bc","a","a","bc","a","a"))
tapply(u,fl,length)
table(fl) contingency table
table(c(5,12,13,12,8,5))
class(cttab)
cttab[1,1] 可以和矩阵一样的方式访问 table
apply(cttab,1,sum)
addmargins(cttab) 变量的边际值
subtable<-function(tbl,subnames){
tblarray<-unclass(tbl)
dcargs<-list(tblarray)
ndims<-length(subnames)
for(i in 1:ndims) {
dcargs[[i+1]]<-subnames[[i]]
}
subarray<-do.call("[",dcargs) ## 注意可变参数 do.call(f,argslist)
dims<-lapply(subnames,length)
subtbl<-array(subarray,dims,dimnames=subnames)
class(subtbl)<-"table" ######
return(subtbl)
}
tabdom<-function(tbl,k){
tbldf<-as.data.frame(tbl)
freqord<-order(tbldf$Freq,decreasing=T)
dom<-tbldf[freqord,][1:k,]
return(dom)
}
aggregate(aba[,-1],list(aba$Gender),median)
Z <- stats::rnorm(10000)
table(cut(Z, breaks = -6:6)) # cut 创建因子,分组
x <- 2:18
v <- c(5, 10, 15)
t<-cbind(x, findInterval(x, v)) # findInterval 创建因子,分组
循环控制
i<-1
while (i<10) i<-i+4
i<-1
while(TRUE){
i<-i+4
if (i>10) break
}
i<-1
repeat {
i<-i+4
if(i>10) break
}
sim<-function(nreps){
commdata<-list()
commdata$countabsamecomm<-0
for(rep in 1:nreps){
commdata$whosleft<-1:20
commdata$numabchosen<-0
commdata<-choosecomm(commdata,5)
if(commdata$numabchosen>0) next
commdata<-choosecomm(commdata,4)
if(commdata$numabchosen>0) next
cmmdata<-choosecomm(commdata,3)
}
print(commdata$countabsamecomm/nreps)
}
a<-matrix(1:4,2,2)
b<-matrix(1:6,3,2)
for(m in c("a","b")) {
z<-get(m)
print(lm(z[,2]~z[,1]))
} R不直接支持非向量的循环,可以使用lapply 和get 注意get的用法,获得对象。才能进行循环
if(r==4){
x<-1}
else # 注意else写的位置,前有{, 或者else 写到上一行上去?
{
x<-3
y<-4
}
x&&y 标量“与”
x&y 向量“与”
x||y 标量“或”
x|y 向量“或”
g<-function(x){
return(x+1)
}
formals(g)
bodys(g)
abline #查看函数
page(abline)
f1<-function(a,b) return(a+b)
f2<-function(a,b) return(a-b)
f<-f1
f(3,2)
g<-function(h,a,b) h(a,b)
g(f1,3,2)
g1<-function(x) return(sin(x))
g2<-function(x) return(sqrt(x^2+1))
g3<-function(x) return(2*x-1)
plot(c(0,1),c(-1,1.5))
for(f in c(g1,g2,g3)) plot(f,0,1, add=T) # 函数对象循环
g<-function(h,a,b) h(a,b)
body(g)<-quote(2*x+3)
ls() 顶层环境对象
ls.str()
environment(f)
print(ls(enivr=parent.frame(n=1))) #函数内部
print(ls())
f<-function(){
a<-1
return(g(a)+1)
}
g<-function(aa){
b<-2
aab<-h(aa+b)
return(aab)
}
h<-function(aaa){
c<-3
return(aaa+c)
}
showframe<-function(upn) {
if(upn<0){
env<-.GlobalEnv
} else {
env<-parent.frame(n=upn+1)
}
vars<-ls(envir=env)
for(vr in vars){
vrg<-get(vr,envir=env)
if(!is.function(vrg)){
cat(vr,":\n", sep="")
print(vrg)
}
}
}
two<-function(u){
u<<-2*u ##使用超值运算符对上层变量进行创建并赋值,一层一层寻找
z<-z*z
}
two<-function(u) {
assign("u",2*u,pos=.GlabalEnv) ###对顶层赋值
z<-2*z
}
eventrow<-function(evnttm,evntty,appin=NULL){
rw<-c(list(evnttime=evnttm,evnttype=evntty),appin)
return(as.data.frame(rm))
}
schedevnt<-function(evnttm,evntty,appin=NULL) {
newevnt<-evntrow(evnttm,evntty,appin)
if(is.null(sim$evnts)){
sim$evnts<<-newevnt
return()
}
inspt<-binsearch((sim$evnts)$evnttime,evnttm)
before<-
if(inspt==1) NULL else sim$evnts[1:(inspt-1),]
nr<-nrow(sim$evnts)
after<-if(inspt<=nr)sim$events[inspt:nr,] else NULL
sim$evnts<<-rbind(before,newevnt,after)
}
binsearch<-function(x,y){
n<-length(x)
lo<-1
hi<-n
while(lo+1 mid<-floor((lo+hi)/2)
if(y==x[mid]) return(mid)
if(y }
if(y<=x[lo]) return(lo)
if(y return(hi+1)
}
counter<-function(){
ctr<-0
f<-function(){
ctr<<-ctr+1
cat("this count currently has value", ctr,"\n")
}
return(f)
} # 闭包
c1<-connter()
c2<-counter()
c1() #执行完毕后c1是f()的拷贝,同时变量ctr也存在
qs<-function(x){
if(length(x)<=1) return (x)
pivot<-x[1]
therest<-x[-1]
sv1<-therest[therest sv2<-therest[therest>=pivot]
sv1<-qs(sv1)
sv2<-qs(sv2)
return(c(sv1,pivot,sv2)
}
置换函数
"["(x,1)
x<-"names<-"(x,value=c("a","b")
"[<-"(x,2:3, value=55:56)
newbookvec<-function(x){
tmp<-list()
tmp$vec<-x
tmp$wrts<-rep(0,length(x))
class(tmp)<-"bookvec"
return(tmp)
}
"[.bookvec"<-function(bv,subs){
return(bv$vec[subs])
}
"[<-.bookvec"<-function(bv,subs,value){
bv$wrts[subs]<-bv$wrts[subs]+1
bv$vec[subs]<-value
return(bv)
}
函数代码编写工具
edit()
source("zyx.R")
常见二元运算符
"%a2b%"<-function(a,b) return (a+b)
3 %a2b% 5
匿名函数
数值运算与模拟
pmax(c(1,2,3),c(4,1,3))
pmin(c(1,2,3),c(4,1,3))
prod(c(1,2,3,4))
cumsum(c(12,1,2))
cumprod(c(1,2,3))
nlm(function(x) return(x^2-sin(x)),8) 函数的最小值
排序
order(x) 索引
sort(x)
d[order(d$kids),]
crossprod(1:3,c(5,12,13)) # 计算向量内积
%*% 计算外积
a<-matrix(c(1,1,-1,1),nrow=2,ncol=2)
b<-c(2,4)
solve(a,b) 解方程
det(a)
eigen(a)
sweep(m,1,c(1,4,7),"+")
集合运算
union(x,y)
intersect(x,y)
setdiff(x,y)
choose(x,y)
面向对象的编程
daparse()
methods(print)
getAnywhere(print)
invisible(x)
getAnywhere(aspell)
utils:::print.aspell(word)
methods(class="default")
t <- c(if(is.matrix(x)) "mlm", "lm")
z <- c(if (is.matrix(x)) matrix(, 0, 3) else numeric())
j<-list(name="Joe",salary=55000,union=T)
class(j)<-"employee"
attributes(j)
print.employee<-function(wrkr){
cat(wrkr$name,"\n")
cat("salary",wrkr$salary,"\n")
cat("union memeber",wrkr$union,"\n")
}
methods(,"employee")
使用继承
k<-list(name="Kate",salary=68000,union=F,hrsthismonth=2)
class(k)<-c("hrlyemplyee","employee")
矩阵压缩案例
回归类
ployfit<-function(y,x,maxdeg){
pwrs<-powers(x,maxdeg)
lmout<-list()
class(lmout)<-"ployreg"
for(i in 1:maxdeg){
lmo<-lm(y~pwrs[,1:i])
lmo$fitted.cavvalues<-lvoneout(y,pwrs[,1:i,drop=F])
lmout[[i]]<-lmo
}
lmout$x<-x
lmout$y<-y
return(lmout)
}
print.polyreg<-function(fits){
maxdeg<-length(fits)-2
n<-length(fits$y)
tbl<-matrix(nrow=maxdeg,ncol=1)
colnames(tbl)<-"MSPE"
for(i in 1:maxdeg){
fi<-fits[[i]]
errs<-fits$y-fi$fitted.cvvalues
spe<-crossprod(errs,errs)
tbl[i,1]<-spe/n
}
cat("mean squared prediction errors, by degree\n")
print(tbl)
}
powers<-function(x,dg) {
pw<-matrix(x,nrow=length(x))
prod<-x
for(i in 2:dg){
prod<-prod*x
pw<-cbind(pw,prod)
}
return(pw)
}
lvoneout<-function(y,xmat){
n<-length(y)
predy<-vector(length=n)
for (i in 1:n) {
lmo<-lm(y[-i]~xmat[-i,])
betahat<-as.vector(lmo$coef)
predy[i]<-betahat %*% c(1,xmat[i,])
}
return(predy)
}
poly<-function(x,cfs){
val<-cfs[1]
prod<-1
dg<-length(cfs)-1
for(i in 1:dg){
prod<-prod*x
val<-val+cfs[i+1]*prod
}
}
S4类
setClass("employee",representation(name="character",salary="numeric",union="logical"))
joe<-new("employee",name="Joe",salary=55000,union=T)
joe@salary
joe@salary<-65000
slot(joe,"salary")<-88000
show(joe)
setMethod("show","employee",
function(object){
inorout<-ifelse(object@union,"is","is not")
cat(object@name,"has a salary of",object@salary,
"and",inorout,"in the union","\n")
}
)
ls(pattern="notebook")
page()
exists("j")
连接键盘与连接器
scan("z4.txt",what="")
v<-scan("")
inits<-readline("type your initials:")
print()
cat("abc\n")
x<-c(5,12,13,8,88)
cat(x,sep=c(".",".",",","\n","\n"))
x<-matrix(scan("x"),nrow=5,byrow=T)
read.matrix<-function(filename){
as.matrix(read.table(filename))}
z1<-readlines("z1")
z<-file("z4.txt","r+")
readLines(z,n=1)
while(T){
rl<-readLines(c,n=1)
if(length(rl)==0){
print("reached the end")
break
} else print(rl)
}
seek(con=c,where=0) #从头开始读
close(c)
extractpums<-function(pf,flds){
dtf<-data.frame()
con<-file(pf,"r")
repeat{
hrec<-readLines(con,1)
if(length(hrec)==0) break
serno<-intextract(hrec,c(2,8))
npr<-intextract(hrec,c(106,107))
if(npr>0)
for(i in 1:npr) {
prec<-readLines(con,1)
person<-makerow(serno,prec,flds)
dtf<-rbind(dtf,person)
}
}
return(dtf)
}
makerow<-function(srn,pr,fl){
l<-list()
l[["serno"]]<-srn
for(nm in names(fl)) {
l[[nm]]<-intextract(pr,fl[[nm]])
}
return(l)
}
intextract<-function(s,rng){
fld<-substr(s,rng[1],rng[2])
return(as.integer(fld))
}
uci<-"http://archive.ics.uci.edu/ml/machine-learning-databases/"
uci<-paste(uci,"echocardiogram/echocardigram.data",sep="")
ecc<-read.csv(uci)
write.table(xc,"xcnew",row.names=F,col.names=F)
cat("abc\n",file="u")
cat("de\n",file="u",append=T)
c<-file("www","w")
writeLines(c("abc","de","f"),c)
close(c)
sumtree<-function(drtr){
tot<-0
fls<-dir(drtr,recursive=T)
for(f in fls) {
f<-file.path(drtr,f)
if(!file.info(f)$isdir){
tot<-tot+sum(scan(f,quiet=T))
}
}
return(tot)
}
访问互联网
cons<<-vector(mode="list",length=ncon)
option("timeout"=10000)
for(i in 1:ncon)
cons[[i]]<<-
socketConnection(port=port,server=T,blocking=T,open="a+b")
checkin<-unserialize(cons[[i]])
}
for(i in 1:ncon)
serialize(c(i,ncon),cons[[i]])
}
字符串操作
grep("Pole",c("Equator","North Pole","South Pole"))
nchar("South Pole")
paste("North","Pole",sep="")
paste("North","Pole")
i<-8
s<-sprintf("the square of %d is %d",i,i^2)
substring("Equator",3,5)
strsplit("6-16-2011",split="-")
regexpr("uat","Equator")
gregexpr("iss","Mississippi")
正则表达式
grep(),grepl(),regexpr(),gregexpr(),sub(),gsub() strsplit()
grep("[au]",c("Equator","North Pole","South Pole"))
grep("o.e",c("Equator","North Pole","South Pole"))
grep(".",c("abc","de","f.g"))
grep("\\.",c("abc","de","f.g"))
testsuffix<-function(fn,suff){
parts<-strsplit(fn,".",fixed=T)
nparts<-length(parts[[1]])
return(parts[[1]][nparts]==suff)
}
for(i in 1:5) {
fname<-paste("q",i,"pdf")
pdf(fname)
hist(rnorm(100,sd=i))
dev.off()
}
for(i in 1:5) {
fname<-sprintf("q%d.pdf",i)
pdf(fname)
hist(rnorm(100,sd=i))
}
绘图
plot(c(-3,3),c(-1,5),type="n",xlab="x",ylab="y")
plot(x,y)
lmout<-lm(y~x)
abline(lmout)
abline(c(2,1))
lines(c(1.5,2.5),c(3,3))
plot(x,y,type="l")
windows()
d1<-density(testscores$Exam1,from=0,to=100)
d2<-density(testscores$Exam2,from=0,to=100)
plot(d1,main="",xlab="")
lines(d2)
plot.ployreg<-function(fits){
plot(fit$x,fits$y,xlab="X",ylab="Y")
maxdg<-length(fits)-2
cols<-c("red","green","blue")
dg<-curecount<-1
while(dg<-maxdg){
prompt<-paste("Return for XV fit for degree".dg,"or type degree","or q for quit")
rl<-readline(prompt)
dg<-if(rl=="") dg else if(rl!="q") as.integer(rl) else break
lines(fits$x,fits[[dg]]$fitted.values,col=cols[curvecount%%3+1])
dg<-dg+1
cuvecount<-curvecount+1
}
}
points(testscores$Exam1,testscores$Exam3,pch="+")
points(c(1,2),c(2,3),pch="+")
legend()
text(12,23,"Exam1")
locator(1)
recordPlot()
replayPlot()
f<-function(x) return(1-exp(-x))
curve(f,0,2)
polygon(c(1.2,1.4,1.4,1.2),c(0,0,f(1.3),f(1.3)),col="gray")
g<-function(t){return(t^2+1)^0.5}
x<-seq(0,5,length=10000)
y<-g(x)
plot(x,y,type="l")
pdf("d12.pdf")
dev.list()
dev.set(2)
dev.copy(which=3)
dev.off()
library(lattice)
a<-1:10
b<-1:15
eg<-expand.grid(x=a,y=b)
eg$z<-eg$x^2+eg$x*eg$y
wireframe(z~x+y,eg)
调试
debug(g(2,3))
stopifnot(x>0)
g<-function(x,y){
t<<-list()
stopifnot(x>0)
t[[1]]<-x+y
}
browser()
untrace(g)
setBreakpoint("x.R",28)
trace(rt,browser)
untrace(rt)
options(error=dump.frames)
findruns<-function(x,k){
n<-length(x)
runs<-NULL
for ( i in 1:(n-k)){
if(all(x[i:i+k-1]==1)) runs<-c(runs,i)
}
return(runs)
}
findruns(c(1,0,0,1,1,0,1,1,1),2)
debug(findruns)
速度与内存
x<-runif(100000)
y<-runif(100000)
z<-vector(length=100000)
system.time(z<-x+y)
system.time(for(i in 1:length(x)) z[i]<-x[i]+y[i])
":"(1,10)
sum<-0
nreps<-100000
for (i in 1:nreps) {
xy<-rnorm(2)
sum<-sum+max(xy)
}
print(sum/nreps)
nreps<-100000
xymat<-matrix(rnorm(2*nreps),ncol=2)
maxs<-pmax(xymat[,1],xymat[,2])
print(mean(maxs))
sim3<-function(nreps){
nb1<-10
nb2<-6
n1<-18
n2<-13
u<-matrix(c(runif(2*nreps)),nrow=nreps,ncol=2)
cndtn<-u[,1]<=nb1/n1&u[,2]<=(nb2+1)/n2|
u[,1]>nb1/n1&u[,2]<=nb2/n2
return(mean(cndtn))
}
outer(1:2,2:3,"*")
Rprof()
invisible(powers1(x,8)
Rprof(NULL)
summaryRprof()
R 与其它语言的接口
Rpy
R 并行计算
snow 包
library(snow)
dst<-function(x,y){
tmpmat<-matrix(abs(x-y),byrow=T,ncol=length(x))
rowSums(tmpmat)
}
findnewgrps<-function(currctrs){
ngrps<-nrow(currctrs)
spacedim<-ncol(currctrs)
sumcounts<-matrix(rep(0,ngrps*(spacedim+1)),nrow=ngrps)
for(i in 1:nrow(mchunk)){
dsts<-dst(mchunk[i,],t(currctrs))
j<-which.min(dsts)
sumcount[j,]<-sumcounts[j,]+c(mchunk[i,],1)
}
sumcounts
}
parkm<-function(cls,m,niters,initcenters){
n<-nrow(m)
spacedim<-ncol(m)
options(warn=-1)
ichunks<-split(1:n,1:length(cls))
options(warn=0)
mchunks<-lapply(ichunks,function(ichunk) m[ichunk,])
mcf<-function(mchunk)
mchunk<<-mchunk
invisible(clusterApply(cls,mchunks,mcf))
clusterExport(cls,"dst")
centers<-initcenters
for (i in 1:niters) {
sumcounts<-clusterCall(cls,findnewgrps,centers)
tmp<-Reduce("+",sumcounts)
centers<-tmp[,1:spacedim]/tmp[,spacedim+1]
center[is.nan(centers)】<-0
}
centers
}