条形图和饼图:可视化基于频率的分类变量
- 条形图
barplot()
mtcars[1:5,]
cyl.freq <- table(mtcars$cyl)
cyl.freq
barplot(cyl.freq)
- 根据其他分类变量进一步分割每一条频率
cyl.freq.matrix <- table(mtcars$am,mtcars$cyl)
cyl.freq.matrix
barplot(cyl.freq.matrix,beside=TRUE,horiz=TRUE,las=1,
main="Performance car counts\nby transmission and cylinders",
names.arg=c("V4","V6","V8"),legend.text=c("auto","manual"),
args.legend=list(x="bottomright"))
- ggplot2 绘制条形图
library("ggplot2")
qplot(factor(mtcars$cyl),geom="bar")
qplot(factor(mtcars$cyl),geom="blank",fill=factor(mtcars$am),xlab="",
ylab="",main="Performance car counts\nby transmission and cylinders") +
geom_bar(position="dodge") + scale_x_discrete(labels=c("V4","V6","V8"))+
scale_y_continuous(breaks=seq(0,12,2)) +
scale_fill_grey(name="Trans.",labels=c("auto","manual")) +
theme_bw() + coord_flip()
- 饼图
pie()
pie(table(mtcars$cyl),labels=c("V4","V6","V8"),
col=c("white","gray","black"),main="Performance cars by cylinders")
直方图:可视化连续变量的分布
hist()
mtcars$hp
hist(mtcars$hp)
# 自主设置间距:
hist(mtcars$hp,breaks=seq(0,400,25),col="gray",main="Horsepower",xlab="HP")
abline(v=c(mean(mtcars$hp),median(mtcars$hp)),lty=c(2,3),lwd=2)
legend("topright",legend=c("mean HP","median HP"),lty=c(2,3),lwd=2)
- ggplot2绘制直方图
library("ggplot2")
qplot(mtcars$hp)
qplot(mtcars$hp,geom="blank",main="Horsepower",xlab="HP") +
geom_histogram(color="black",fill="white",breaks=seq(0,400,25),closed="right") +
geom_vline(mapping=aes(xintercept=c(mean(mtcars$hp),median(mtcars$hp)),
linetype=factor(c("mean","median"))),show.legend=TRUE) +
scale_linetype_manual(values=c(2,3)) + labs(linetype="")
箱线图:五位数概括法的可视化表示
- 独立箱线图
boxplot()
, 低于下四分位数或高于上四分位数1.5陪四分位差(IQR)的观测值定义为异常点,boxplot
的rang
参数可以控制此分类,默认range=1.5
hist(quakes$mag)
boxplot(quakes$mag)
- 并列箱线图
比较不同组的五数概况分布~
stations.fac <- cut(quakes$stations,breaks=c(0,50,100,150)) #定义分组因子
stations.fac[1:5]
boxplot(quakes$mag~stations.fac,xlab="# stations detected",ylab="Magnitude",col="gray")
- ggplot2绘制箱线图
library("ggplot2")
qplot(stations.fac,quakes$mag,geom="boxplot",
xlab="# stations detected",ylab="Magnitude")
散点图:识别两个不同连续型数值变量的观测值之间的关系
- 单一散点图
plot()
,points()
iris[1:5,]
plot(iris[,4],iris[,3],type="n",xlab="Petal Width (cm)",
ylab="Petal Length (cm)") # type="n"生成正确维数的绘图区域
points(iris[iris$Species=="setosa",4],
iris[iris$Species=="setosa",3],pch=19,col="black") # 绘制物种为setosa的点图
points(iris[iris$Species=="virginica",4],
iris[iris$Species=="virginica",3],pch=19,col="gray") # 绘制物种为virginica的点图
points(iris[iris$Species=="versicolor",4],
iris[iris$Species=="versicolor",3],pch=1,col="black") # 绘制物种为versicolor的点图
legend("topleft",legend=c("setosa","virginica","versicolor"),
col=c("black","gray","black"),pch=c(19,19,1)) # 设置图例
- 上面散点图的简化版:先设置向量指定每个观测个体所需点的字符和颜色
iris_pch <- rep(19,nrow(iris))
iris_pch[iris$Species=="versicolor"] <- 1
iris_col <- rep("black",nrow(iris))
iris_col[iris$Species=="virginica"] <- "gray"
plot(iris[,4],iris[,3],col=iris_col,pch=iris_pch,
xlab="Petal Width (cm)",ylab="Petal Length (cm)")
- 散点图矩阵
pairs()
pairs(iris[,1:4],pch=iris_pch,col=iris_col,cex=0.75)
- ggplot 来绘制散点图
library("ggplot2")
qplot(iris[,4],iris[,3],xlab="Petal width",ylab="Petal length",
shape=iris$Species) +
scale_shape_manual(values=4:6) + labs(shape="Species")
- 使用GGally包的
ggpairs()
绘制散点图矩阵
library("GGally")
ggpairs(iris,mapping=aes(col=Species),axisLabels="internal")