R语言实战 - 手打代码总结
- ch1 入门
- ch2 数据结构
- ch3 图形初阶
- ch4 数据预处理
- ch5 各类函数
ch1 入门
age<-c(1,2,3,2,11,9,3,9,12,3)
weight<-c(4.4,5.3,7.2,5.2,8.5,7.3,6.0,10.4,10.2,6.1)
mean(age)
sd(age)
cor(age,weight)
plot(age,weight)
q()
demo()
demo(Hershey)
demo(persp)
demo(image)
?apply
help(package="package_name")
example(apply)
example("Arthritis")
data()
vignette("foo")
apropos("apply",mode = "function")
help.start()
history()
savehistory("myfile")
loadhistory("myfile")
save(a,file="myfile2")
options()
options(digits = 3)
.libPaths()
library()
search()
installed.packages()
install.packages("glue")
update.packages()
update.packages(ask = FALSE)
library(name)
getwd()
setwd("D:/data")
setwd("D:\\data")
ls()
rm(zz)
x<-runif(20)
hist(x)
source("filename")
sink("filename", append = TRUE, split = TRUE)
dev.off()
bmp("filename.bmp")
jpeg("filename.jpg")
pdf("filename.pdf")
png("filename.png")
help.start()
install.packages("vcd")
library(vcd)
help(package="vcd")
help(Arthritis)
Arthritis
data("Arthritis")
example("Arthritis")
ch2 数据结构
a<-c(1,2,4)
b<-c("one","two","three")
c<-c("TRUE","FALSE")
d<-c(2:6)
y<-matrix(1:20,nrow = 5)
rnames<-c("R1","R2","R3","R4")
cnames<-c("C1","C2","C3","C4","C5")
mymatrix<-matrix(1:20,nrow = 4, byrow = T, dimnames = list(rnames,cnames))
rownames(y)<-c("a","b","c","d","e")
row.names(y)<-c("a","b","c","d","e")
y[1,c(2,3)]
y[1:3,]
mymatrix["a"]
dim1<-c("A1","A2")
dim2<-c("B1","B2","B3")
dim3<-c("C1","C2","C3","C4")
myarray<-array(1:24,c(2,3,4),dimnames = list(dim1,dim2,dim3))
patientID<-c(1,2,3,4)
age<-c(24,34,55,34)
mydataframe<-data.frame(patientID,age)
mydataframe2<-data.frame(diabetes=c("Type1","Type2","Type3","Type4"),
status=c("Poor","Improved","Excellent","Poor"))
class(mymatrix)
class(myarray)
class(mydataframe)
mydataframe[1:2]
mydataframe[c("patientID","age")]
mydataframe$patientID
table(mydataframe2$diabetes,mydataframe2$status)
attach(mtcars)
plot(mpg,disp)
detach(mtcars)
with(mtcars,
plot(mpg,disp)
)
with(mtcars,{
stats<-summary(mpg)
stats
})
stats
sex<-c("Male","Female","Female","Male")
sex<-factor(sex,levels = c(1,2),labels = c("Male","Female"))
status<-c("Poor","Improved","Excellent","Poor")
status<-factor(status,ordered = T, levels = c("Poor","Improved","Excellent"))
attributes(status)
patientID<-c(1,2,3,4)
age<-c(25,34,55,66)
diabetes<-c("Type1","Type2","Type1","Type1")
status<-c("Poor","Improved","Excellent","Poor")
diabetes<-factor(diabetes)
status<-factor(status,ordered = T)
patientdata<-data.frame(patientID,age,diabetes,status)
str(diabetes)
str(patientdata)
summary(patientdata)
a<-c(1,2,3,4,NA)
sum(a)
mydata<-data.frame()
mydata<-edit(mydata)
fix(mydata)
data<-read.table("student.csv",header = T, row.names = "StudentID",sep = ",",colClasses = c("character","character","character","numeric","numeric","numeric"))
?file
library(xlsx)
setwd("D:/data")
data2<-read_xlsx("Numbers2.xlsx",1)
library(Hmisc)
data3<-spss.get("li2.1.sav",use.value.labels = T)
library(foreign)
data4<-read.dta("mydata.dta")
ch3 图形初阶
data(mtcars)
pdf("mpgonweight.pdf")
attach(mtcars)
plot(wt,mpg)
abline(lm(mpg~wt))
title("Regression of MPG on Weight")
detach(mtcars)
dev.off()
a<-c(20,30,40,45,60)
b<-c(16,20,27,40,60)
plot(a,b,type="b")
dev.new()
opar<-par(no.readonly = T)
par(lty=2,pch=17)
plot(a,b,type="b")
par(opar)
plot(a,b,type="b",lty=2,pch=21,bg="red",col="black")
colors()
rainbow(n=5)
heat.colors(10)
terrain.colors(10)
topo.colors(10)
cm.colors(10)
library(RColorBrewer)
display.brewer.all()
brewer.pal.info
mycol<-brewer.pal(7,"BrBG")
barplot(rep(1,7),col=mycol)
mycol2<-rainbow
pie(rep(1,10),col=mycol2,labels = mycol2)
par(font.lab=3,cex.lab=1.5,font.main=4,cex.main=2)
par(pin=c(3,3),mai=c(0.5,0.5,0.5,2))
plot(a,b,type="b")
par(opar)
plot(a,b,type="b",
col="red",lty=2,pch=2,lwd=2,
main="test",sub="test_sub",
xlab="Dosage",ylab="Drug regression",
xlim=c(0,60),ylim=c(0,70))
plot(a,b)
title(main="标题", col.main="red",
sub="副标题", col.sub="blue",
xlab="x轴", ylab="y轴",
col.lab="green", cex.lab=0.75)
x<-1:10
y<-x
z<-10/x
plot(x,y,type="b",yaxt="n",ann=FALSE)
axis(2,at=x+1,labels = x+1,col.axis="red",las=2)
lines(x,z,type="b",col="blue")
axis(4,at=z,labels = round(z,digits = 2),col.axis="blue",las=2,cex.axis=0.7,tck=-0.05)
library(Hmisc)
minor.tick(nx=2,ny=2,tick.ratio = 0.5)
plot(a,b,type="b")
abline(h=c(30,40,50),,v=c(40,50,60),lty=,col="yellow")
legend("topleft",inset = 0.05,title = "图例标题",legend=c("A","B"),lty=c(1,6),pch=c(15,17),col=c("red","blue"))
legend(locator(1),title = "图例标题",c("A","B"),lty=c(1,6),pch=c(15,17),col=c("red","blue"))
plot(a,b,type="b")
text(23,23,"morenziti",pos=4,offset = 5)
mtext(side=4,"mtext",line = 0.5)
attach(mtcars)
plot(wt,mpg)
text(wt,mpg,row.names(mtcars),pos=4)
detach(mtcars)
par(mfrow=c(2,3))
attach(mtcars)
plot(wt,mpg)
barplot(c(1,1,1,2,2,2,3,3))
boxplot(wt)
hist(wt)
pie(rep(1,5))
hist(wt,ann=F)
detach(mtcars)
layout(matrix(c(1,1,2,3),2,2,byrow = T),widths = c(2,1),heights = c(1,2))
attach(mtcars)
hist(wt)
hist(mpg)
hist(disp)
par(opar)
par(fig=c(0,0.8,0,0.8))
plot(mtcars$wt,mtcars$mpg)
par(fig=c(0,0.8,0.55,1),new=T)
boxplot(mtcars$wt,horizontal = T,axes=FALSE)
par(fig=c(0.65,1,0,0.8),new=T)
boxplot(mtcars$mpg,axes=FALSE)
mtext("标题",side = 3,outer = TRUE, line=-6)
ch4 数据预处理
mydata$sum<-mydata$x1+mydata$x2
attach(mydata)
mydata$sum<-x1+x2
detach(mydata)
mydata<-transform(mydata,sum=x1+x2)
transform(data,newcol=c(a,b,c))
transform(data,somecol=NULL)
transform(data,newcol=col1+col2)
df<-within(df,{
newcol<-NA
newcol[age>75]<-"Elder"
newcol[age>=55&age<=75]<-"Middle Aged"
newcol[age<55]<-"Young"
})
names(df)[2]<-"v2"
library(plyr)
rename(df,c(oldname="newname",oldname="newname2"))
is.infinite()
is.nan()
y<-c(1,2,3,NA)
is.na(y)
is.na(df[,2:4])
df$age[df$age==99]<-NA
s<-sum(x,na.rm = TRUE)
na.omit(df)
date1 <- as.Date(c("2020-04-01","2020-04-02"))
date1
class(date1)
date2<-as.Date(c("04/01/2020","04/02/2020"),"%m/%d/%Y")
date2
class(date2)
Sys.Date()
Sys.time()
date()
today<-Sys.Date()
format(today,format="%B %d %Y")
format(today,format="%A")
startdate<-as.Date("2000-01-01")
enddate<-as.Date("2020-04-01")
days<-enddate-startdate
days
difftime(enddate,startdate,units = "weeks")
strDates<-as.character(startdate)
is.numeric() as.numeric()
is.character() as.character()
is.vector() as.vector()
is.matrix() as.matrix()
is.data.frame() as.data.frame()
is.factor() as.factor()
is.logical() as.logical()
x<-c(3,1,6,32,7,2,9)
order(x)
sort(x)
x
ordered_data <- data[order(data$age),]
ordered_data2 <- data[order(data$gender,data$age),]
attach(data)
ordered_data3 <- data[order(gender,-age),]
detach(data)
v
df1<-data.frame(id=c(1,2,5,6,8,9),country=c("a","b","d","g","h","k"),age=c(34,66,27,77,56,45))
df2<-data.frame(id=c(2,3,5,6,7,8),country=c("b","d","g","l","r","w"),gender=c("F","M"))
total<-merge(df1,df2,by = "id")
total2<-merge(df1,df2,by = c("id","country"))
df<-iris
df[,c(1:3)]
df[c("Sepal.Length","Sepal.Width","Petal.Length")]
df["Sepal.Length"]
v_logical<-names(df) %in% c("Sepal.Length","Sepal.Width")
df<-df[!v_logical]
newdata<-women[1:3,]
newdata2<-women[women$height>50&women$weight<120]
leadership$date<-as.Date(leadership$date,"%m/%d/%y")
startdate<-as.Date("2009-01-01")
enddate<-as.Date("2009-10-31")
newdate<-leadership[which(leadership$date>=startdate&leadership$date<=enddate)]
newdata<-subset(data,age>=35 | age<24,select = c(q1,q2,q3,q4))
testdata <- data[sample(1:nrow(data),100,replace = FALSE),]
library(sqldf)
sqldf("select * from mtcars where carb=1 order by mpg",row.names=TRUE)
sqldf("select avg(mpg) as avg_mpg,avg(disp) as avg_disp,gear from mtcars where cyl in (4,6) group by gear")
ch5 各类函数
abs(x)
sqrt(x)
ceiling(x)
floor(x)
trunc(x)
round(x,digits = 2)
signif(x,digits = 2)
cos(x) sin(x) tan(x)
acos(x) asin(x) atan(x)
cosh(x) sinh(x) tanh(x)
acosh(x) asinh(x) atanh(x)
log(x,base=n)
log(x)
log10(x)
exp(x)
mean(x)
median(x)
sd(x)
var(x)
mad(x)
quantile(x,probs = 0.75)
range(x)
sum(x)
diff(x,lag = n)
min(x)
max(x)
scale(x,center = TRUE,scale=TRUE)
newdata<-scale(mydata)
newdata2<-scale(mydata)*sd + m
newdata2<-transform(mydata,x=scale(x)*10+50)
dnorm
pnorm
qnorm
rnorm
x<-pretty(c(-3,3),30)
y<-dnorm(x)
plot(x,y,type="l")
set.seed(100)
runif(10)
library(MASS)
mean<-c(230.7,146.7,3.6)
sigma<-matrix(c(15360.8,6721.2,-47.1,
6721.2,4700.9,-16.5,
-47.1,-16.5,0.3),nrow=3)
mvdata <- mvrnorm(100,mean,sigma)
mvdata <- as.data.frame(mvdata)
names(mvdata)
x <- c("ab","deg","defrs")
nchar(x)
substr(x,2,4)
x<-c(310,456,311,431,435,534,312,313,320,321,322,323,314,324,317,3231)
grep("^3",x,value = T)
grep("4$",x,value = T)
grep("3.2",x,value = T)
grep("*31",x,value = T)
grep("3.*1",x,value = T)
grep("3|1",x,value = T)
grep("[1]",x,value = T)
paste("x",1:3,sep = "")
paste("x",1:3,sep = "M")
toupper("abc")
tolower("ABC")
x<-c(1,2,3,4,"abc")
length(x)
seq(1,10,2)
rep(1:3,3)
cut(x,2)
pretty(x,n)
cat()
set.seed(11)
m<-matrix(rnorm(30),nrow=6)
result<-apply(m,1,mean)
class(result)
apply(m,1,mean,trim=0.2)
options(digits = 2)
Student<-c("John Davids","Angela Williams","Bullwinkle Moose",
"David Jones","Janice Markhammer","Cheryl Cushing",
"Reuven Ytzrhak","Greg Knox","Joel England","Mary Rayburn")
Math<-c(502,600,412,358,495,512,410,625,573,522)
Science<-c(95,99,80,82,75,85,80,95,89,86)
English<-c(25,22,18,15,20,28,15,30,27,18)
roster<-data.frame(Student,Math,Science,English,stringsAsFactors = F)
z<-scale(roster[,2:4])
score<-apply(z, 1, mean)
roster<-cbind(roster,score)
y<-quantile(score,c(0.8,0.6,0.4,0.2))
roster$grade[score>=y[1]]<-"A"
roster$grade[score<y[1]&score>=y[2]]<-"B"
roster$grade[score<y[2]&score>=y[3]]<-"C"
roster$grade[score<y[3]&score>=y[4]]<-"D"
roster$grade[score<y[4]]<-"F"
name<-strsplit(roster$Student," ")
Lastname<-sapply(name,"[",2)
Firstname<-sapply(name, "[",1)
roster<-cbind(Firstname,Lastname,roster[,-1])
roster<-roster[order(Lastname,Firstname),]
for(i in 1:10)print(i)
i<-10
while(i>0){print("Helloworld");i<-i-1}
if(i<0) {
print("a")
}else if(i>0&i<1){
print("b")
} else {
print("c")
}
ifelse(i>0,"positive","negative")
switch("Tom",
Tom="lawyer",
Mike="Teacher",
Karl="Student")
diamonds
aggregate(price~cut,diamonds,mean)
aggregate(price~cut+color,diamonds,mean)
aggregate(cbind(price,carat)~cut,diamonds,mean)
aggregate(cbind(price,carat)~cut+color,diamonds,mean)
aggregate(cut~color,diamonds,length)
aggregate(cut~color,diamonds,function(x) length(unique(x)))
install.packages("reshape2")
library(reshape2)
originaldata<-data.frame(ID=c(1,1,2,2),Time=c(1,2,1,2),X1=c(5,3,6,2),X2=c(6,5,1,4))
meltdata<-melt(originaldata,id=c("ID","Time"))
mydata<-data.frame(ID=c(1,1,2,2),Time=c(1,2,1,2),X1=c(5,3,0,2),X2=c(6,5,1,4))
md<-melt(mydata,id=c("ID","Time"))
dcast(md,ID~variable,mean)
dcast(md,Time~variable,mean)
dcast(md,ID~Time,mean)
dcast(md,ID+Time~variable)
dcast(md,ID+variable~Time)
dcast(md,ID~variable+Time)