这次给大家带来的是R语言绘图神器—ggplot2绘图包,根据Hadley的说法,这个包的核心思想,是来源于 Leland Wilkinson《The Grammar of Graphics》。Hadley在他的书中建议大家都去读一读这本书,以便对ggplot2的绘图思想有一个了解。我读了这本书的一小部分,发现这本书理解起来还是比较困难的。
我在学习ggplot2绘图主要是通过《R Graphics Cookbook》这本书,这本书目前出到了第二本,网上也有它的电子版本可供免费阅读,链接如下:
https://r-graphics.org/
简单的说呢,ggplot2的绘图方式可以理解为元素映射和图像叠加。
元素映射是指数据和图像之间的对应关系、图像中图形和其属性(大小、颜色、透明度、形状)的对应关系以及图形和图例的对应关系。
图像叠加就是图像中的坐标轴、背景主题、图形元素(组)、图例等图形的透视叠加,这个是遵循透视法则的。比如最新的图形会叠加在原有的图形之上,可能会对原有图形造成遮挡,也可能和原有图形共同组成一幅新的图形,需要使用代码定义设置。
在ggplot2输出的图形中,坐标轴、图形元素、图例、标题、文本、背景主题等等,都是可以自定义编辑设置的,这就给了创作者很大的发挥空间,能够根据自己的需要绘制精美的图形。
对于ggplot2的学习,虽然我已经掌握了基本的绘图方法,但还有更多内容需要在实践中进一步学习才行,以下是我的读书笔记。
#R Graphic CookBooK 笔记(ggplot2)library(ggplot2)library(gcookbook)library(data.table)library(MASS)#2.1散点图Scartter Plotggplot(data=mtcars,aes(x=wt,y=mpg)) + geom_point()#2.2折线图Line Graphggplot(pressure,aes(x=temperature,y=pressure)) + geom_line() + geom_point()#2.3条形图Bar Graphggplot(BOD,aes(x=Time,y=demand)) + geom_col()ggplot(BOD,aes(x=factor(Time),y=demand)) + geom_col()#2.4直方图Histogramggplot(mtcars,aes(x=mpg)) + geom_histogram(binwidth = 4)#2.5箱型图Box Plotggplot(ToothGrowth,aes(x=supp,y=len)) + geom_boxplot()#2.6绘制函数曲线Function Curvemyfun x){ggplot(data.frame(x=c(-10,10)),aes(x)) + stat_function(fun=myfun,geom = "line")#3.条形图#3.2分组作图ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar)) + geom_col(position = "dodge",color="black") + scale_fill_brewer(palette = "Pastel1")#3.4给条形图上色upc 1:ggplot(upc,aes(x=Abb,y=Change,fill=Region))+geom_col(color="Black")ggplot(upc,aes(x=reorder(Abb,Change),y=Change,fill=Region)) + geom_col(color="Black") + scale_fill_manual(values = c("#669933","#FFCC66")) + xlab("State")#3.5分正负号作图和上色climate_sub "Berkeley"&Year >= ggplot(climate_sub,aes(x=Year,y=Anomaly10y,fill=Pos)) + geom_col(position = "Identity",color="Black",size=0.1) + scale_fill_manual(values=c("#CCEEFF","#FFDDDD"),guide=FALSE)#3.6调整宽度和间距ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar)) + geom_col(width=0.5,position = position_dodge(0.7))#3.7堆叠型条形图(可以看单条的百分比)ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar)) + geom_col() + guides(fill=guide_legend(reverse = TRUE))#3.8百分比堆叠型条形图ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar)) + geom_col(color="Black",position = "fill") + scale_y_continuous(labels=scales::percent) + scale_fill_brewer(palette = "Pastel1")#3.9给条形图加标签ggplot(cabbage_exp,aes(x=interaction(Date,Cultivar),y=Weight)) + geom_col() + geom_text(aes(label = Weight),vjust=1.5,color="White")ggplot(mtcars,aes(x=factor(cyl))) + geom_bar() + geom_text(aes(label=..count..),stat="count",vjust=1.5,color="white")ggplot(cabbage_exp,aes(x=interaction(Date,Cultivar),y=Weight)) + geom_col() + geom_text(aes(label=Weight),vjust=-0.2) + ylim(0,max(cabbage_exp$Weight)*1.15)ggplot(cabbage_exp,aes(x=interaction(Date,Cultivar),y=Weight)) + geom_col() + geom_text(aes(y=Weight - 0.2,label=Weight))ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar)) + geom_col(position="dodge") + geom_text(aes(label=Weight),color="white",vjust=1.5,position = position_dodge(.9))#3.10制作克利夫兰点图tophit 1:ggplot(tophit,aes(x=avg,y=reorder(name,avg))) + geom_point(size=3) + theme_bw() + theme(panel.grid.major.x = element_blank(),panel.grid.minor.x = element_blank(),panel.grid.major.y = element_line(color = "grey60",linetype = "dashed"))ggplot(tophit,aes(x=reorder(name,avg),y=avg)) + geom_point(size=3) + theme_bw() + theme(panel.grid.major.y = element_blank(),panel.grid.minor.y = element_blank(),panel.grid.major.x = element_line(color = "grey60",linetype = "dashed"),axis.text.x = element_text(angle = 60,hjust = 1))#4折线图 Line Graphs#4.1基本折线图ggplot(BOD,aes(x=Time,y=demand)) + geom_line()#4.2将点加入折线图中ggplot(BOD,aes(x=Time,y=demand)) + geom_line() + geom_point()ggplot(worldpop,aes(x=Year,y=Population)) + geom_line() + geom_point()#4.3多个折现的折线图ggplot(tg,aes(x=dose,y=length,color=supp)) + geom_line()ggplot(tg,aes(x=dose,y=length,linetype=supp,color=supp)) + geom_line() + geom_point(shape=21)#4.4更改线条的外观ggplot(BOD,aes(x=Time,y=demand)) + geom_line(linetype="dashed",size=1,color="blue")ggplot(tg,aes(x=dose,y=length,color=supp)) + geom_line() + scale_color_brewer(palette = "Set1")ggplot(tg,aes(x=dose,y=length,group=supp)) + geom_line(color="darkblue",size=1.5)ggplot(tg,aes(x=dose,y=length,color=supp)) + theme_bw() + geom_line(linetype="dashed") + geom_point(shape=22,size=3,fill="white")#4.5改变点的外观pd 0.ggplot(tg,aes(x=dose,y=length,fill=supp)) + geom_line(position=pd) + geom_point(shape=21,size=3,position=pd) + scale_fill_manual(values = c("Black","White"))#4.6制作阴影图sunspotyear time(sunspot.year)),Sunspots=as.numeric(sunspot.year))ggplot(sunspotyear,aes(x=Year,y=Sunspots)) + geom_area()ggplot(sunspotyear,aes(x=Year,y=Sunspots)) + theme_bw() + geom_area(color="black",fill="blue",alpha=.2)ggplot(sunspotyear,aes(x=Year,y=Sunspots)) + theme_bw() + geom_area(fill="blue",alpha=.2) + geom_line()#4.7制作堆叠面积图ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup)) + theme_bw() + geom_area()ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup)) + theme_bw() + geom_area(alpha=.4) + scale_fill_brewer(palette = "Blues") + geom_line(position = "stack",size=.2)#4.8制作带比例的堆叠面积图ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup)) + theme_bw() + geom_area(position = "fill",color="black",size=.2,alpha=.4) + scale_fill_brewer(palette = "Blues") + scale_y_continuous(labels=scales::percent)#4.9添加置信区间climate_mod "Berkeley",.(Year,Anomaly10y,Unc10y)]ggplot(climate_mod,aes(x=Year,y=Anomaly10y)) + geom_ribbon(aes(ymin=Anomaly10y-Unc10y,ymax=Anomaly10y+Unc10y),alpha=.2) + geom_line()ggplot(climate_mod,aes(x=Year,y=Anomaly10y)) + geom_line(aes(y=Anomaly10y-Unc10y),color="grey50",linetype="dotted") + geom_line(aes(y=Anomaly10y+Unc10y),color="grey50",linetype="dotted") + geom_line()#5.散点图#5.1制作基本散点图HW ggplot(HW,aes(x=ageYear,y=heightIn)) + geom_point(shape=21,size=1.5)#5.2按数据标签进行分组HW1 ggplot(HW1,aes(x=ageYear,y=heightIn,shape=sex,color=sex)) + geom_point()ggplot(HW1,aes(x=ageYear,y=heightIn,shape=sex,color=sex)) + geom_point() + scale_shape_manual(values = c(1,2)) + scale_color_brewer(palette="Set1")#5.3使用不同形状的点ggplot(HW1,aes(x=ageYear,y=heightIn,shape=sex,color=sex)) + geom_point(shape=5)#5.4为连续变量设置颜色或大小HW2 ggplot(HW2,aes(x=ageYear,y=heightIn,color=weightLb)) + geom_point()ggplot(HW2,aes(x=ageYear,y=heightIn,size=weightLb)) + geom_point()ggplot(HW2,aes(x=ageYear,y=heightIn,size=weightLb)) + geom_point() + scale_size_area()#5.5处理过多点绘图问题diamonds_sp x=carat,diamonds_sp + stat_bin2d(bins=50) + scale_fill_gradient(low="lightblue",high = "red",limits=c(0,6000))CW_SP x=Time,CW_SP + geom_point(position = position_jitter(width=.5,height=0))CW_SP + geom_boxplot(aes(group=Time))#5.6添加拟合回归线hw_sp x=ageYear,hw_sp + geom_point(color="lightblue") + stat_smooth(method = lm,color="black")#5.7为自定义回归模型绘制拟合线#5.8从多个模型添加拟合线#5.9添加具有模型系数的注释#5.10为散点图添加边缘地毯(Marginal Rugs)#5.11为散点图中的点加标签(注释)countries_sub 2009 &healthexp > countries_sp x=healthexp,countries_sp + annotate("text",x=4350,y=5.4,label="Canada") + annotate("text",x=7400,y=6.8,label="USA")countries_sp + geom_text(aes(label=Name),size=4)#5.12创建气球图countrylist "Canada",cdat 2009&Name %in% countrylist]cdat_sp x=healthexp,cdat_sp + scale_size_area(max_size = 15)#5.13制作散点图矩阵c2009 2009,.(Name,GDP,laborrate,healthexp,infmortality)]pairs(c2009[,.(GDP,laborrate,healthexp,infmortality)])#6.汇总数据分布#6.1制作基本直方图ggplot(faithful,aes(x=waiting)) + geom_histogram(binwidth = 5,fill="white",color="black")binsize 15ggplot(faithful,aes(x=waiting)) + geom_histogram(binwidth = binsize,fill="white",color="black",boundary=47)#6.2使用分组数据制作多个直方图ggplot(birthwt,aes(x=bwt)) + geom_histogram(fill="white",color="black") + facet_grid(smoke~.)#6.3制作密度曲线ggplot(faithful,aes(x=waiting)) + geom_density()ggplot(faithful,aes(x=waiting)) + geom_line(stat="density") + expand_limits(y=0)#6.4使用分组数据制作多个直方图birthwt_mod "smoke":=as.factor(smoke)]ggplot(birthwt_mod,aes(bwt,fill=smoke)) + geom_density(alpha=.3)#6.5制作频率多边形图ggplot(faithful,aes(waiting)) + geom_freqpoly(binwidth=4)#6.6制作基本箱形图ggplot(birthwt,aes(x=factor(race),y=bwt)) + geom_boxplot(width=.5,outlier.size = 1.5,outlier.shape = 21)#6.7为箱型图添加切口ggplot(birthwt,aes(x=factor(race),y=bwt)) + geom_boxplot(notch = TRUE)#6.8在箱型图中添加均值ggplot(birthwt, aes(x = factor(race), y = bwt)) + geom_boxplot() + stat_summary(fun.y = "mean", geom = "point", shape = 23, size = 3, fill = "white")#6.9制作小提琴图#6.10绘制Wilkinson dot图:类似频率图c2009 2009&healthexp>ggplot(c2009,aes(infmortality)) + geom_dotplot()#6.11为分组数据绘制Wilkinson dot图#6.12制作二维分布的密度图faithful_p x=eruptions,faithful_p + geom_point() + stat_density2d(aes(color=..level..))faithful_p + stat_density2d(aes(fill=..density..),geom = "raster",contour = FALSE)faithful_p + geom_point() + stat_density2d(aes(alpha=..density..),geom = "tile",contour = FALSE)#7.注释Annotations#7.1添加文本注释p x=eruptions,p + annotate(geom = "text",x=3,y=48,label="Group1",family="serif",fontface="italic",color="darkred",size=3)#7.2在注释中使用数学公式p x=c(-p + annotate("text",x=2,y=0.3,parse=TRUE,label="frac(1,sqrt(2*pi))*e^{-x^2/2}")p + annotate("text",x=0,y=0.05,parse=TRUE,size=4,label="'Function: '* y==frac(1,sqrt(2*pi))*e^{-x^2/2}")#7.3添加线hw_plot x=ageYear,hw_plot + geom_hline(yintercept = 60) + geom_vline(xintercept = 14)hw_plot + geom_abline(intercept = 37.4,slope=1.75)hw_means "heightIn"=mean(heightIn)),keyby=sex]hw_plot + geom_hline(data=hw_means,aes(yintercept=heightIn,color=sex),linetype="dashed",size=1)#7.4添加线段和箭头p "Berkeley"],aes(p + annotate("segment",x=1950,xend=1980,y=-0.25,yend=-0.25)p + annotate(geom="segment",x=1850,xend=1820,y=-0.8,yend=-0.95,color="blue",size=2,arrow=arrow()) + annotate("segment",x=1950,xend=1980,y=-0.25,yend=-.25,arrow=arrow(ends="both",angle=90,length=unit(0.3,"cm")))#7.5添加阴影区域p + annotate("rect",xmin=1950,xmax=1980,ymin=-1,ymax=1,alpha=0.1,fill="blue")#7.6突出显示项目pg_mod "hl":=fifelse(group==ggplot(pg_mod,aes(x=group,y=weight,fill=hl)) + geom_boxplot() + scale_fill_manual(values=c("grey85","#FFDDCC"),guide=FALSE)#7.7添加错误条ce_mod "c39"]ggplot(ce_mod,aes(x=Date,y=Weight)) + geom_col(fill="white",color="black") + geom_errorbar(aes(ymin=Weight-se,ymax=Weight+se),width=0.2)ggplot(ce_mod,aes(x=Date,y=Weight)) + geom_line(aes(group=1)) + geom_point(size=4) + geom_errorbar(aes(ymin=Weight-se,ymax=Weight+se),width=0.2)ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar)) + geom_col(position="dodge") + geom_errorbar(aes(min=Weight-se,ymax=Weight+se),position=position_dodge(0.9),width=0.2)pd 0.ggplot(cabbage_exp,aes(x=Date,y=Weight,color=Cultivar,group=Cultivar)) + geom_errorbar(aes(ymin=Weight-se,ymax=Weight+se),width=0.2,size=0.25,color="black",position=pd) + geom_line(position=pd) + geom_point(position=pd,size=2.5)#7.8给各个分面加注释mpg_plot x=displ,f_labels "4",mpg_plot + geom_text(x=6,y=40,aes(label=label),data=f_labels)mpg_plot + annotate("text",x=6,y=42,label="label text")#8.坐标轴#8.1交换X轴和Y轴ggplot(PlantGrowth,aes(x=group,y=weight)) + geom_boxplot() + coord_flip()#8.2设定连续轴的范围pg_plot x=group,pg_plot + ylim(0,max(PlantGrowth$weight))#8.3翻转连续轴(切换正序方向)ggplot(PlantGrowth,aes(x=group,y=weight)) + geom_boxplot() + scale_y_reverse()ggplot(PlantGrowth,aes(x=group,y=weight)) + geom_boxplot() + ylim(6.5,3.5)#8.4改变分类变量在轴上的顺序pg_plot + scale_x_discrete(limits=c("ctrl","trt2"))#8.5设置X,Y轴的缩放比例m_plot x=Half,m_plot + coord_fixed() + scale_y_continuous(breaks=seq(0,420,30)) + scale_x_continuous(breaks=seq(0,420,30))#8.6设置刻度线的位置ggplot(PlantGrowth,aes(x=group,y=weight)) + geom_boxplot() + scale_y_continuous(breaks=c(4,4.25,4.5,5,6,8))#8.7删除刻度线和标签pg_plot + theme(axis.text.y=element_blank())pg_plot + theme(axis.ticks=element_blank(),axis.text.y=element_blank())pg_plot + scale_y_continuous(breaks=NULL)#8.8改变刻度标签的文本值hw_plot x=ageYear,hw_plot + scale_y_continuous(breaks=c(50,56,60,66,72),labels=c("Tiny","Really\nshort","Short","Medium","Tallish"))#8.9改变刻度标签的外观pg_plot + scale_x_discrete(breaks=c("ctrl","trt1","trt2"),labels=c("Control","Treatment 1","Treatment 2")) + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=1))#8.10更改轴标签的文本hw_plot x=ageYear,hw_plot + xlab("Age in years") + ylab("Height in inches")hw_plot + labs(x="Age in years",y="Height in inches")hw_plot + scale_x_continuous(name="Age in years") + scale_y_continuous(name="Height in inches")#8.11删除轴标签pg_plot + xlab(NULL)#8.12改变轴标签的外观hw_plot + theme(axis.title.x=element_text(face="italic",color="darkred",size=14))hw_plot + ylab("Height\n(inches)") + theme(axis.title.y=element_text(angle=0,face="italic",size=14))#8.13显示轴线(颜色、粗细,线形)hw_plot + theme(axis.line=element_line(color="black"))#8.14使用对数轴animals_plot x=body,animals_plot + scale_x_log10() + scale_y_log10()#8.15为对数轴添加刻度#8.16绘制雷达图ggplot(wind,aes(x=DirCat,fill=SpeedCat)) + geom_histogram(binwidth=15,boundary=-7.5) + coord_polar() + scale_x_continuous(limits=c(0,360))#8.17在轴上使用日期#8.18在轴上使用相对时间#9.控制图形的整体外观#9.1设置图标题hw_plot x=ageYear,hw_plot + ggtitle("Age and Height of Schoolchildren","——11.5 to 17.5 years old")#9.2改变文本的外观hw_plot x=ageYear,hw_plot + theme(axis.title.x=element_text(size=0.9,lineheight=0.9,family="Microsoft YaHei",face="bold.italic",color="red"))hw_plot + ggtitle("这是图标题") + theme(plot.title=element_text(size=rel(1.5),lineheight=0.9,family="Microsoft YaHei"))#9.3使用主题hw_plot x=ageYear,hw_plot + theme_classic()#9.4改变主题的外观hw_plot x=ageYear,hw_plot + theme( panel.grid.major = element_line(color="red"), panel.grid.minor = element_line(color="red",linetype = "dashed",size=0.2), panel.background = element_rect(fill="lightblue"), panel.border = element_rect(color="blue",fill=NA,size=2) )hw_plot + theme( legend.background = element_rect(fill="grey85",color="red"), panel.grid.minor = element_line(color="red") )hw_plot + ggtitle("Plot title here") + theme(axis.title.x=element_text(color="red",size=14),axis.text.x=element_text(color="blue"),axis.title.y=element_text(color="red",size=14,angle=90),axis.text.y=element_text(color="blue"),plot.title=element_text(color="red",size=20,face="bold"))#9.5创造你自己的主题mytheme "red"), axis.title = element_text(size = rel(1.25)))hw_plot x=ageYear,#9.6隐藏网格线hw_plot x=ageYear,hw_plot + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())hw_plot + theme(panel.grid.major.x = element_blank(),panel.grid.minor.x = element_blank())#10.图例Legends#10.移除图例pg_plot x=group,pg_plot + guides(fill=FALSE)pg_plot + scale_fill_discrete(guide=FALSE)pg_plot + theme(legend.position = "none")#10.2改变图例的位置pg_plot x=group,pg_plot + theme(legend.position = "bottom")pg_plot + theme(legend.position = c(1,0),legend.justification = c(1,0))pg_plot + theme(legend.position = c(0.85,0.2),legend.background = element_rect(fill="white",color="black"))pg_plot + theme(legend.position = c(0.85,0.2)) + theme(legend.background = element_rect(fill="white",color="black"))pg_plot + theme(legend.position = c(0.85,0.2)) + theme(legend.background = element_blank()) + theme(legend.key = element_blank())#10.3改变图例的中组件的顺序pg_plot + scale_fill_discrete(limits=c("trt1","trt2","ctrl"))pg_plot + scale_fill_grey(start=.5,end=1,limits=c("trt1","trt2","ctrl"))pg_plot + scale_fill_brewer(palette="Pastel2",limits=c("trt1","trt2","ctrl"))#10.4将图例中的组件逆序pg_plot + guides(fill=guide_legend(reverse=TRUE))#10.5改变图例标题pg_plot x=group,pg_plot + labs(fill="Condition")pg_plot + scale_fill_discrete(name="Condition")hw_plot geom_point(aes(size=weightLb)) + scale_size_continuous(range=c(1,4))hw_plot + labs(color="Male/Female",size="Weight\n(pounds)")hw_plot2 x=ageYear,hw_plot2 + labs(shape="Male/Female",color="Male/Female")#10.6改变图例标题的外观pg_plot x=group, geom_boxplot()pg_plot + theme(legend.title = element_text( face = "italic", family = "Times", color = "red", size = 14))#10.7删除图例标题ggplot(PlantGrowth,aes(group,weight,fill=group)) + geom_boxplot() + guides(fill=guide_legend(title=NULL))#10.8改变标签中的文本pg_plot geom_boxplot()pg_plot + scale_fill_discrete(labels=c("Control","Treatment1","Treatment2"))#10.9-10.10更改图例文本的外观(字体、颜色、大小,文本多行)pg_plot + scale_fill_discrete(labels=c("Control","Type 1\nTreatment","Type 2\nTreatment")) + theme(legend.text = element_text(lineheight=.8),legend.key.height=unit(1,"cm"))#11.分集(切片)#11.1利用分集将数据切分成子图mpg_plot mpg_plot + facet_grid(drv~.)mpg_plot + facet_grid(.~cyl)mpg_plot + facet_grid(drv~cyl)mpg_plot + facet_wrap(~class)#11.2使用不同的轴进行分集mpg_plot + facet_grid(drv~cyl,scales = "free_y")#11.3修改分集标签的文本#11.4修改分集标签的外观和标题#12.在图中使用颜色#12.1设置对象的颜色ggplot(birthwt,aes(bwt)) + geom_histogram(fill="red",color="black")ggplot(mtcars,aes(wt,mpg)) + geom_point(color="red")#12.2用颜色表示变量ggplot(cabbage_exp,aes(Date,Weight,fill=Cultivar)) + geom_col(color="black",position="dodge")#12.3使用对色盲友好的调色板#12.4对离散变量使用不同的调色板#12.5对离散变量使用自定义调色板#12.6根据值来对阴影区域上色#13.其他图形#14.图形输出格式(PDF等等)#15.数据处理(dplyr)#附录