条形图
Part 1
- 简单条形图
- 簇状条形图
- 频数条形图
- 条形图着色
- 正负条形图分别着色
- 调整宽度和条形间距
Part 2
- 绘制堆积条形图
- 绘制百分比堆积条形图
- 添加数据标签
- 绘制Cleveland点图
绘制堆积条形图
library(gcookbook)
library(ggplot2)
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat="identity")
# 有时候会遇到一个问题,就是堆积的顺序和图例的顺序是相反的
# 可以使用guides()函数对图例顺序进行调整
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat="identity") +
guides(fill=guide_legend(reverse=TRUE))
# 若想要调整堆积顺序,可以使用desc()函数进行调整
library(plyr) #desc()函数调用包
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar, order=desc(Cultivar))) +
geom_bar(stat="identity")
# 使用新的调色板和蓝色边框线
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat="identity", colour="blue") +
guides(fill=guide_legend(reverse=TRUE)) +
scale_fill_brewer(palette = "Pastell")
绘制百分比堆积条形图
# 首先使用plyr包中的ddply()函数和transform()函数将每组条形对应的数据标准化为100%格式后,
# 再对计算结果绘制对即条形图即可
library(gcookbook) #为了使用示例数据cabbage_exp
library(plyr)
cabbage_exp
# Cultivar Date Weight sd n se
# 1 c39 d16 3.18 0.9566144 10 0.30250803
# 2 c39 d20 2.80 0.2788867 10 0.08819171
# 3 c39 d21 2.74 0.9834181 10 0.31098410
# 4 c52 d16 2.26 0.4452215 10 0.14079141
# 5 c52 d20 3.11 0.7908505 10 0.25008887
# 6 c52 d21 1.47 0.2110819 10 0.06674995
# 以Date为切割变量,对每组数据进行transform()
ce = ddply(cabbage_exp, "Date", transform,
percent_weight=Weight/sum(Weight) * 100)
ce
# Cultivar Date Weight sd n se percent_weight
# 1 c39 d16 3.18 0.9566144 10 0.30250803 58.45588
# 2 c52 d16 2.26 0.4452215 10 0.14079141 41.54412
# 3 c39 d20 2.80 0.2788867 10 0.08819171 47.37733
# 4 c52 d20 3.11 0.7908505 10 0.25008887 52.62267
# 5 c39 d21 2.74 0.9834181 10 0.31098410 65.08314
# 6 c52 d21 1.47 0.2110819 10 0.06674995 34.91686
ggplot(ce, aes(x=Date, y=percent_weight, fill=Cultivar)) +
geom_bar(stat = "identity")
# 计算百分比之后,可以按照绘制常规对即条形图的方法来绘制百分比堆积条形图
# 可以自行调整图例顺序、更换调色板及添加边框线等等
添加数据标签
# 绘图命令中加上geom_text()即可为条形图添加数据标签(其他图也可)
# 使用时,需要指定一个变量映射给x、y和标签本身,通过设定vjust可将标签位置移动至条形图的上方或者下方
library(gcookbook)
# 标签在图形顶端上方
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) +
geom_bar(stat="identity") +
geom_text(aes(label=Weight), vjust=1.5, colour="white")
# 标签在图形底端上方
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) +
geom_bar(stat="identity") +
geom_text(aes(label=Weight), vjust=-0.2)
# 有时候标签在图形顶端上面时,可能会发生数字溢出绘图区
# 解决这个问题可以 1>调整y轴范围 2>调整标签的y轴坐标
# 第二种方法的缺陷是竖直方向的调整的幅度依赖于y轴的数据范围,而更
# 改vjust时,数据标签离条形顶端的距离会根据条形图的高度自动进行调整
# 将y轴上限变大
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) +
geom_bar(stat="identity") +
geom_text(aes(label=Weight), vjust=-0.2) +
ylim(0, max(cabbage_exp$Weight)*1.05)
# 将y轴上限变大
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) +
geom_bar(stat="identity") +
geom_text(aes(y=Weight + 0.1), label=Weight)
# 设定标签的y轴位置使其略高于条形图顶端--y轴范围会自动调整
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) +
geom_bar(stat="identity") +
geom_text(aes(y=Weight+0.1, label=Weight))
# 如果是簇状条形图,需要设定position_dodge()并给一个参数来设定分类
# 间距,分类间距默认值是0.9,标签字体大小可使用size来调整。默认为5
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar), y=Weight) +
geom_bar(stat="identity", position="dodge") +
geom_text(aes(label=Weight), vjust=1.5, colour="white",
position = position_dodge(.9), size=3)
# 堆积条形图增加数据标签
# 在堆积条形图添加数据标签之前,要先对每组条形对应的数据进行累积求和,这
# 个过程必须保证数据的合理安排,否则可能出现错误的累积和
# 可以使用plyr包的arrange()函数完成上述操作,plyr随ggplot2加载
library(plyr)
ce = arrange(cabbage_exp, Date, Cultivar)
#
# Cultivar Date Weight sd n se
# 1 c39 d16 3.18 0.9566144 10 0.30250803
# 2 c52 d16 2.26 0.4452215 10 0.14079141
# 3 c39 d20 2.80 0.2788867 10 0.08819171
# 4 c52 d20 3.11 0.7908505 10 0.25008887
# 5 c39 d21 2.74 0.9834181 10 0.31098410
# 6 c52 d21 1.47 0.2110819 10 0.06674995
#
# 在数据确认安排合理后,可以借助ddply函数以Date为分组变量对
# 数据进行分组,并分别计算每组数据对应的变量Weight的累积和
# 计算累积和
ce = ddply(ce, 'Date', transform, label_y=cumsum(Weight))
ce
# Cultivar Date Weight sd n se label_y
# 1 c39 d16 3.18 0.9566144 10 0.30250803 3.18
# 2 c52 d16 2.26 0.4452215 10 0.14079141 5.44
# 3 c39 d20 2.80 0.2788867 10 0.08819171 2.80
# 4 c52 d20 3.11 0.7908505 10 0.25008887 5.91
# 5 c39 d21 2.74 0.9834181 10 0.31098410 2.74
# 6 c52 d21 1.47 0.2110819 10 0.06674995 4.21
ggplot(ce, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat = "identity") +
geom_text(aes(y=label_y, label=Weight), vjust=1.5, colour="White")
# 如果想把数据标签置于条形中部
ce = arrange(cabbage_exp, Date, Cultivar)
ce = ddply(ce, "Date", transform, label_y=cumsum(Weight)-0.5*Weight)
ggplot(ce, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat = "identity", position = position_stack(reverse = TRUE)) +
geom_text(aes(y=label_y, label=Weight), colour="White")
# 修改颜色样式等
ggplot(ce, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat = "identity", position = position_stack(reverse = TRUE),
colour="black") +
geom_text(aes(y=label_y, label=paste(format(Weight, nsamll=2),"kg")),
size=4) +
guides(fill=guide_legend(reverse = TRUE)) +
scale_fill_brewer(palette = "Pastell")
绘制Cleveland点图
# 使用Cleveland点图来替代条形图以减少图形造成的视觉混乱并使图形更具可读性
library(gcookbook)
# 取出tophitters数据集中的前25个数据
tophit = tophitters2001[1:25,]
ggplot(tophit, aes(x=avg, y=name)) + geom_point()
# tophitters2001数据集包含很多列,观察其中三列
tophit[,c("name","lg","avg")]
# name lg avg
# 1 Larry Walker NL 0.3501
# 2 Ichiro Suzuki AL 0.3497
# 3 Jason Giambi AL 0.3423
# 4 Roberto Alomar AL 0.3357
# 5 Todd Helton NL 0.3356
# 6 Moises Alou NL 0.3314
# 7 Lance Berkman NL 0.3310
# 8 Bret Boone AL 0.3307
# 9 Frank Catalanotto AL 0.3305
# 10 Chipper Jones NL 0.3304
ggplot(tophit, aes(x=avg, y=reorder(name, avg))) +
geom_point(size=3) +
theme_bw() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour="grey60", linetype ="dashed"))
# 也可以互换x和y轴,x轴对应于姓名,y轴对应于数值
ggplot(tophit, aes(x=reorder(name, avg), y=avg)) +
geom_point(size=3) +
theme_bw() +
theme(axis.text.x = element_text(angle = 60, hjust = 1),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour="grey60", linetype ="dashed"))
# 有时候根据其他变量对样本进行分组很哟用,根据因子lg对样本进行分组,因子lg对应有NL
# 和AL两个水平,分别表示国家队和美国队,依次根据lg和avg排序,reorder参数只能对
# 一个变量对因子水平进行排序,只能手动实现上述过程
# 提取出name变量,依次根据变量lg和avg对其进行排序
nameorder = tophit$name[order(tophit$lg, tophit$avg)]
tophit$name = factor(tophit$name, levels = nameorder)
# 绘图时,将lg变量映射到点的颜色上,使用geom_segment()函数,用“以数据点为
# 端点”代替贯通全图的网格线,
ggplot(tophit, aes(x=avg, y=name)) +
geom_segment(aes(yend=name), xend=0, colour="grey50") +
geom_point(size=3, aes(colour=lg)) +
scale_color_brewer(palette = "Set1", limits=c("NL", "AL")) +
theme_bw() +
theme(panel.grid.major.y = element_blank(),
legend.position = c(1, 0.55),
legend.justification = c(1, 0.5))
# 分面
ggplot(tophit, aes(x=avg, y=name)) +
geom_segment(aes(yend=name), xend=0, colour="grey50") +
geom_point(size=3, aes(colour=lg)) +
scale_color_brewer(palette = "Set1", limits=c("NL", "AL"), guide=FALSE) +
theme_bw() +
theme(panel.grid.major.y = element_blank())+
facet_grid(lg ~ .,scales = "free_y", space = "free_y")
end