r语言入门
1.基本操作
x <- 3
x <<- 3
sum(1,2,3,4,5)
mean(1,2,3,4,5)
mean(c(1,2,3,4,5))
ls()
ls.str()
str(x)
ls(all.names = TRUE)
rm(x)
rm(a,b,x1,mtcars)
rm(list = ls())
save.image()
2.r包的安装
install.packages("vcd")
.libPaths()
library()
install.packages(c("AER","ca"));
update.packages()
3.r包的使用
library(vcd)
require(vcd)
help(package="vcd")
help(package="ggplot2")
library(help="vcd")
Arthritis
ls("package:vcd")
data(package="vcd")
detach("package:vcd")
installed.packages()
installed.packages()[,1]
Rpack0 <- installed.packages()[,1]
save(Rpack,file = "Rpack.Rdata")
Rpack1 <- load(file = "Rpack.Rdata")
for (i in Rpack1) install.packages(i)
4.获取帮助
help.start()
help(sum)
?plot
args(plot)
Speed <- cars$speed
Distance <- cars$dist
plot(Speed, Distance, panel.first = grid(8, 8),
pch = 0, cex = 1.2, col = "blue")
plot(Speed, Distance,
panel.first = lines(stats::lowess(Speed, Distance), lty = "dashed"),
pch = 0, cex = 1.2, col = "blue")
x <- 0:12
y <- sin(pi/5 * x)
op <- par(mfrow = c(3,3), mar = .1+ c(2,2,3,1))
for (tp in c("p","l","b", "c","o","h", "s","S","n")) {
plot(y ~ x, type = tp, main = paste0("plot(*, type = \"", tp, "\")"))
if(tp == "S") {
lines(x, y, type = "s", col = "red", lty = 2)
mtext("lines(*, type = \"s\", ...)", col = "red", cex = 0.8)
}
}
par(op)
example("plot")
example("hist")
demo(graphics)
help(package="ggplot2")
vignette()
library("vcd")
help("vcd")
??vcd
help.search("heatmap")
??heatmap
apropos("sum")
apropos("sum",mode = "function")
RSiteSearch("matlab")
5.excel案例
6.内置数据集
help(package="datasets")
data()
rivers
plot(rivers)
help(mtcars)
names(euro)
state <- data.frame(state.name,state.abb,state.area,state.division,state.region)
state
heatmap(volcano)
Titanic
cars
iris
iris3
data(package="MASS")
data(package=.packages(all.available = TRUE))
data(Chile,package = "carData")
Chile
7.数据结构
8.向量
x <- c(1,2,3,4,5)
x
y <- c("one","two","three")
y
z <- c(T,F,T)
z
c(1:100)
seq(from=1,to=100,by=2)
seq(from=1,to=100,length.out=10)
rep(c(1,2,3),10)
rep(x,each=3)
rep(x,each=2,times=3)
y <- c(6,7,8,9,10)
x*2+y
x <- c(1:100)
x[5]
x[-1]
x[c(4:18)]
y <- c(1:10)
y[c(T,F,T,T,F)]
y[y>5]
y[y>5 & y<9]
z <- c("one","two","three","four","five")
"one" %in% z
z["one" %in% z]
z %in% c("one","two")
k <- z %in% c("one","two")
z[k]
names(y) <- c("one","two","three","four","five","six","seven","eight","nine","ten")
y
y["ten"]
euro["ATS"]
x[101] <- 101
x
v <- c(1:5)
v[20] <- 4
v
append(v,99,after = 4)
rm(v)
y <- y[-c(1:3)]
x <- c(1:10)
x+1
y <- seq(1,100,length.out=10)
x+y
x*y
x**y
x^y
y;x
y%%x
y%/%x
z <- c(1,2)
x+z
z <- c(1,2,3)
x+z
x>5
x>y
x==y
abs(x)
sqrt(25)
log(16,2)
log10(10)
exp(x)
ceiling(-2.3)
floor(c(-2.3,4.5))
trunc(c(-2.3,4.5))
round(c(-2.3,4.5464),3)
signif(c(-2.3,4.5464),digits = 4)
sin(x)
cos(x)
vec <- c(1:100)
vec
sum(vec)
max(vec)
min(vec)
range(vec)
mean(vec)
var(vec)
round(var(vec),2)
round(sd(vec),2)
prod(vec)
median(vec)
quantile(vec)
quantile(vec,c(0.1,0.2,0.5,0.6))
t <- c(1,4,2,5,7,9,6)
which.max(t)
which(t==7)
which(t<7)
t[which(t<7)]
9.矩阵与数组
heatmap(state.x77)
x <- 1:20
m <- matrix(x,4,5)
m
m <- matrix(x,4,5,byrow = T)
m
rnames <- c("r1","r2","r3","r4")
cnames <- c("c1","c2","c3","c4","c5")
dimnames(m) <- list(rnames,cnames)
m
dim(x)
dim(x) <- c(4,5)
x
dim(m)
x <- 1:20
dim(x) <- c(2,2,5)
x
dim1 <- c("a1","a2")
dim2 <- c("b1","b2","b3")
dim3 <- c("c1","c2","c3","c4")
z <- array(1:24,c(2,3,4),dimnames = list(dim1,dim2,dim3))
z
m <- matrix(x,4,5,byrow = T)
m
m[1,2]
m[1,c(2,3,4)]
m[c(2,3),c(1,4)]
m["r1","c2"]
head(state.x77)
state.x77[,"Income"]
state.x77["Alabama",]
t <- m[1,]
sum(t)
colSums(m)
rowSums(m)
rowMeans(m)
colMeans(m)
n <- matrix(1:9,3,3)
t <- matrix(2:10,3,3)
n*t
n %*% t
diag(n)
n
t(n)
10.列表
state.center
a <- 1:20
b <- matrix(1:20,4,5)
c <- mtcars
d <- "This is a test list"
mlist <- list(a,b,c,d)
mlist
mlist <- list(fisrt=a,second=b,third=c,fourth=d)
mlist
mlist[1]
mlist[c(1,4)]
mlist$second
class(mlist[1])
class(mlist[[1]])
mlist[[5]] <- iris
mlist[[5]] <- NULL
11.数据框
state <- data.frame(state.name,state.abb,state.area)
state[1]
state$state.area
plot(women$height,women$weight)
lm(weight ~height,data=women)
attach(mtcars)
mpg
detach(mtcars)
12.因子
mtcars
table(mtcars$cyl)
f <- factor(c("red","red","blue","green","blue"))
f
week <- factor(c("Mon","Fri","Thu","Wed","Mon","Fri","Sun"),ordered = T,
levels = c("Mon","Tue","Wed","Thu","Fri","Sat","Sun"))
week
fcyl <- factor(mtcars$cyl)
plot(mtcars$cyl)
plot(fcyl)
num <- 1:100
cut(num,c(seq(0,100,10)))
state.division
state.region
13.缺失数据
1+NA
a <- c(NA,1:20)
sum(a)
sum(a,na.rm = T)
mean(1:20)
mean(a,na.rm = T)
is.na(a)
install.packages("colorspace")
install.packages("data.table")
install.packages("VIM")
library(VIM)
RSiteSearch("VIM")
c <- na.omit(a)
c
is.na(c)
sum(c)
is.nan(0/0)
14.字符串
nchar("Hello world")
month.name
nchar(month.name)
length(month.name)
nchar(c(12,2,345))
paste(c("Everybody","loves","stats"))
paste("Everybody","loves","stats" ,sep="-")
names <- c("alt","ctrl","shift")
paste(names,"love 1")
temp <- substr(x=month.name,start = 1,stop = 3)
tolower(temp)
gsub("^(\\w)","\\U\\1",tolower(temp),perl = T)
x <- c("b","A+","AC")
grep("A+",x,fixed = T)
grep("A+",x,fixed=F)
match("AC",x)
path <- "user/local/bin/R"
strsplit(path,"/")
strsplit(c(path,path),"/")
face <- 1:13
suit <- c("spades","clubs","hearts","diamonds")
outer(suit,face,FUN = paste)
15.日期和时间
install.packages("timeSeries")
library(timeDate)
library(timeSeries)
airquality
class(airquality)
presidents
class(presidents)
airmiles
class(airmiles)
Sys.Date()
a <- "2027-01-01"
as.Date(a)
class(as.Date(a))
as.Date(a,format = "%Y-%m-%d")
class(as.Date(a,format = "%Y-%m-%d"))
seq(as.Date("2022-01-03"),as.Date("2022-04-08"),by=5)
sales <- round(runif(48,min = 50,max=100))
sales
ts(sales,start = c(2022,01),end=c(2025,12),frequency = 4)
ts(sales,start = c(2022,01),end=c(2025,12),frequency = 12)
16.常见错误
17.获取数据
patientID <- c(1,2,3,4)
admdate <- c("10/15/2009","11/01/2009","10/21/2009","10/28/2009")
age <- c(25, 34, 28, 52)
diabetes <- c("Type1", "Type2", "Type1", "Type1")
status <- c("Poor", "Improved", "Excellent", "Poor")
data <- data.frame(patientID,admdate,age,diabetes,status)
data
data2 <- data.frame(patientID=character(0)
,admdate=character(0)
,age=numeric()
,diabetes=character()
,status=character())
data2 <- edit(data2)
data2
fix(data2)
install.packages("RODBC")
library(RODBC)
help("RODBC")
18.读写文件
setwd("E:/RLanguage")
x <- read.table("input.txt")
head(x);head(x,n=10)
tail(x)
read.csv("input.csv",sep=",")
x <- read.csv("input.csv",sep=",",header=T)
head(x)
read.csv("input.txt",header = T,skip = 5)
read.table("input 1.txt",header=T,,skip = 50,nrows=200)
read.csv("input.csv")
read.fwf("fwf.txt",widths = c(3,3))
library(XML)
readHTMLTable("https://en.wikipedia.org/wiki/World_population",which=3)
help(package="foreign")
read.table("clipboard",header = T,sep = ",")
read.table("clipboard",header = T,sep = "\t")
readClipboard()
read.table(gzfile("input.txt.gz"))
readLines("input.csv",n=15)
scan("scan.txt",what = list(character(3),numeric(0),numeric(0)))
cat(rivers)
getwd()
x <- read.table("input.txt",header = T)
write.table(x,file = "newfile.txt")
write.table(x,file = "newfile.csv",sep=",")
read.table("newfile.csv")
read.csv("newfile.csv")
write.table(x,file = "newfile.csv",sep=",",row.names = F)
write.table(iris,file = "newfile.csv",sep=",",append = T)
write.table(mtcars,gzfile("mtcars.txt.gz"))
19.读写excel
install.packages("rJava")
install.packages("XLConnect")
library(rJava)
library(XLConnect)
ex <- loadWorkbook("data.xlsx")
edata <- readWorksheet(ex,1)
head(edata)
readWorksheetFromFile("data.xlsx",1)
wb <- loadWorkbook("file.xlsx",create = T)
createSheet(wb,"Sheet 1")
writeWorksheet(wb,data=mtcars,sheet = "Sheet 1")
saveWorkbook(wb)
writeWorksheetToFile("file1.xlsx",data = mtcars,sheet = "Sheet 1")
vignette("XLConnect")
detach("package:XLConnect")
detach("package:rJava")
install.packages("xlsx")
library(xlsx)
help(package=xlxs)
read.xlsx("data.xlsx",sheetIndex = 1)
write.xlsx(x,file = "rdata.xlsx",sheetName = "Sheet 1",append = F)
20.读写r文件
iris
saveRDS(iris,"iris.RDS")
x <- readRDS("iris.RDS")
x
load(".RData")
save(iris,iris3,file="iris3.Rdata")
21.数据转换
library(xlsx)
cars32 <- read.xlsx("mtcars.xlsx",sheetIndex = 1,header = T)
cars32
class(cars32)
is.data.frame(state.x77)
dstate.x77 <- as.data.frame(state.x77)
is.data.frame(dstate.x77)
as.matrix(data.frame(state.region,state.x77))
methods(is)
methods(as)
x <- state.abb
x
dim(x) <- c(5,10)
x
as.factor(x)
as.list(x)
state <- data.frame(x,state.region,state.x77)
state
state$Income
state["Nevada",]
is.data.frame(state["Nevada",])
y <- state["Nevada",]
unname(y)
unlist(y)
who <- read.csv("WHO.csv",header = T)
head(who)
who1 <- who[c(1:50),c(1:10)]
View(who1)
who2 <- who[c(1,5,3,8),c(2,14,16,18)]
View(who2)
who$Continent
who3 <- who[which(who$Continent==7)]
who3
who4 <- who[which(who$CountryID>50&who$CountryID<=100),]
who4
?sample
x <- 1:100
sample(x,30)
sort(sample(x,60,replace = T))
sample(who$CountryID,30,replace = T)
who[sample(who$CountryID,30),]
x <- mtcars[-1:-5,]
x
x$mpg <- NULL
head(x)
state.division
data.frame(USArrests,state.division)
?cbind
cbind(USArrests,state.division)
data1 <- head(USArrests)
data2 <- tail(USArrests)
rbind(data1,data2)
data3 <- head(cbind(USArrests,state.division),20)
rbind(data3,data2)
data1 <- head(USArrests,30)
data2 <- tail(USArrests,30)
data4 <- rbind(data1,data2)
duplicated(data4)
data4[duplicated(data4),]
data4[!duplicated(data4),]
length(row.names(data4[!duplicated(data4),]))
unique(data4)
sractm <- t(mtcars)
sractm
?rev
letters
rev(letters)
women
row.names(women)
rev(row.names(women))
women[rev(row.names(women)),]
height_cm <- women$height*2.54
data.frame(height_cm,women$height)
?transform
transform(women,height=height*2.54)
transform(women,height_cm=height*2.54)
sort(rivers)
sort(state.name)
rev(sort(rivers))
sort(rev(rivers))
order(rivers)
mtcars[sort(row.names(mtcars)),]
mtcars[order(mtcars$mpg),]
mtcars[order(-mtcars$mpg),]
?rank
mtcars[order(mtcars$mpg,mtcars$disp),]
WorldPhones
wp <- as.data.frame(WorldPhones)
rs <- rowSums(wp)
rs
mean <- colMeans(wp)
total <- cbind(wp,total=rs)
total
total <- rbind(total,mean)
total
?apply
apply(WorldPhones, MARGIN = 1, FUN = sum)
apply(WorldPhones,MARGIN = 2,FUN = mean)
?lapply
?sapply
lapply(state.center,FUN = length)
sapply(state.center,FUN = length)
?tapply
tapply(state.name, state.division, FUN = length)
x <- c(3,5,2,5,7,8,6)
mean(c)
x-mean(c)
sd(x)
(x-mean(c))/sd(x)
?scale
scale(state.x77,center=T)
x <- scale(state.x77,center=T,scale=T)
head(x)
heatmap(x)
install.packages("reshape2")
x <- data.frame(k1 = c(NA,NA,3,4,5), k2 = c(1,NA,NA,4,5),
data = 1:5)
y <- data.frame(k1 = c(NA,2,NA,4,5), k2 = c(NA,NA,3,4,5),
data = 1:5)
merge(x,y,by="k1")
merge(x,y,by="k2",incomparables = T)
merge(x,y,by=c("k1","k2"))
library(reshape2)
help(package="reshape2")
airquality
names(airquality) <- tolower(names(airquality))
melt(airquality)
aql <- melt(airquality,id.vars = c("month","day"))
head(aql,50)
aqw <- dcast(aql,month+day ~variable)
aqw
aqw <- dcast(aql,month ~ variable,fun.aggregate = mean,na.rm=T)
aqw <- dcast(aql,month ~ variable,fun.aggregate = sum,na.rm=T)
install.packages(c("tidyr","dplyr"))
library(tidyr)
library(dplyr)
help(package="tidyr")
mtcars[1:10,1:3]
tdata=mtcars[1:10,1:3]
tdata <- data.frame(names=rownames(tdata),tdata)
tdata
gather(tdata,key="Key",value="Value",cyl,disp,mpg)
gather(tdata,key="Key",value="Value",cyl,-disp)
gdata <- gather(tdata,key="Key",value="Value",2:4)
spread(gdata,key="Key",value="Value")
df <- data.frame(x=c(NA,"a.b","a.d","b.c"))
df
separate(df,col=x,into = c("A","B"))
df <- data.frame(x=c(NA,"a.b-c","a-d","b-c"))
x <- separate(df,col=x,into = c("A","B"),sep="-")
unite(x,col = "AB",A,B,sep="-")
iris
ls("package:dplyr")
dplyr::filter(iris,Sepal.Length>7)
dplyr::slice(iris,10:15)