条形图 Part2

条形图

Part 1

  • 简单条形图
  • 簇状条形图
  • 频数条形图
  • 条形图着色
  • 正负条形图分别着色
  • 调整宽度和条形间距

Part 2

  • 绘制堆积条形图
  • 绘制百分比堆积条形图
  • 添加数据标签
  • 绘制Cleveland点图

绘制堆积条形图

library(gcookbook)
library(ggplot2)
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) + 
    geom_bar(stat="identity")
# 有时候会遇到一个问题,就是堆积的顺序和图例的顺序是相反的
# 可以使用guides()函数对图例顺序进行调整
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) + 
    geom_bar(stat="identity") +
    guides(fill=guide_legend(reverse=TRUE))
# 若想要调整堆积顺序,可以使用desc()函数进行调整
library(plyr)  #desc()函数调用包
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar, order=desc(Cultivar))) + 
    geom_bar(stat="identity")

# 使用新的调色板和蓝色边框线
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) + 
    geom_bar(stat="identity", colour="blue") +
    guides(fill=guide_legend(reverse=TRUE)) + 
    scale_fill_brewer(palette = "Pastell")


绘制百分比堆积条形图

# 首先使用plyr包中的ddply()函数和transform()函数将每组条形对应的数据标准化为100%格式后,
# 再对计算结果绘制对即条形图即可

library(gcookbook) #为了使用示例数据cabbage_exp
library(plyr)

cabbage_exp
# Cultivar Date Weight        sd  n         se
# 1      c39  d16   3.18 0.9566144 10 0.30250803
# 2      c39  d20   2.80 0.2788867 10 0.08819171
# 3      c39  d21   2.74 0.9834181 10 0.31098410
# 4      c52  d16   2.26 0.4452215 10 0.14079141
# 5      c52  d20   3.11 0.7908505 10 0.25008887
# 6      c52  d21   1.47 0.2110819 10 0.06674995

# 以Date为切割变量,对每组数据进行transform()
ce = ddply(cabbage_exp, "Date", transform,
           percent_weight=Weight/sum(Weight) * 100)

ce
# Cultivar Date Weight        sd  n         se percent_weight
# 1      c39  d16   3.18 0.9566144 10 0.30250803       58.45588
# 2      c52  d16   2.26 0.4452215 10 0.14079141       41.54412
# 3      c39  d20   2.80 0.2788867 10 0.08819171       47.37733
# 4      c52  d20   3.11 0.7908505 10 0.25008887       52.62267
# 5      c39  d21   2.74 0.9834181 10 0.31098410       65.08314
# 6      c52  d21   1.47 0.2110819 10 0.06674995       34.91686

ggplot(ce, aes(x=Date, y=percent_weight, fill=Cultivar)) + 
    geom_bar(stat = "identity")

# 计算百分比之后,可以按照绘制常规对即条形图的方法来绘制百分比堆积条形图
# 可以自行调整图例顺序、更换调色板及添加边框线等等

添加数据标签

# 绘图命令中加上geom_text()即可为条形图添加数据标签(其他图也可)
# 使用时,需要指定一个变量映射给x、y和标签本身,通过设定vjust可将标签位置移动至条形图的上方或者下方

library(gcookbook)
# 标签在图形顶端上方
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) + 
    geom_bar(stat="identity") + 
    geom_text(aes(label=Weight), vjust=1.5, colour="white")
# 标签在图形底端上方
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) + 
    geom_bar(stat="identity") + 
    geom_text(aes(label=Weight), vjust=-0.2)


# 有时候标签在图形顶端上面时,可能会发生数字溢出绘图区
# 解决这个问题可以  1>调整y轴范围 2>调整标签的y轴坐标
# 第二种方法的缺陷是竖直方向的调整的幅度依赖于y轴的数据范围,而更
# 改vjust时,数据标签离条形顶端的距离会根据条形图的高度自动进行调整

# 将y轴上限变大
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) + 
    geom_bar(stat="identity") + 
    geom_text(aes(label=Weight), vjust=-0.2) +
    ylim(0, max(cabbage_exp$Weight)*1.05)
# 将y轴上限变大
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) + 
    geom_bar(stat="identity") + 
    geom_text(aes(y=Weight + 0.1), label=Weight)
# 设定标签的y轴位置使其略高于条形图顶端--y轴范围会自动调整
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) + 
    geom_bar(stat="identity") + 
    geom_text(aes(y=Weight+0.1, label=Weight))
# 如果是簇状条形图,需要设定position_dodge()并给一个参数来设定分类
# 间距,分类间距默认值是0.9,标签字体大小可使用size来调整。默认为5
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar), y=Weight) + 
    geom_bar(stat="identity", position="dodge") + 
    geom_text(aes(label=Weight), vjust=1.5, colour="white",
              position = position_dodge(.9), size=3)
# 堆积条形图增加数据标签
# 在堆积条形图添加数据标签之前,要先对每组条形对应的数据进行累积求和,这
# 个过程必须保证数据的合理安排,否则可能出现错误的累积和
# 可以使用plyr包的arrange()函数完成上述操作,plyr随ggplot2加载

library(plyr)
ce = arrange(cabbage_exp, Date, Cultivar)
# 
# Cultivar Date Weight        sd  n         se
# 1      c39  d16   3.18 0.9566144 10 0.30250803
# 2      c52  d16   2.26 0.4452215 10 0.14079141
# 3      c39  d20   2.80 0.2788867 10 0.08819171
# 4      c52  d20   3.11 0.7908505 10 0.25008887
# 5      c39  d21   2.74 0.9834181 10 0.31098410
# 6      c52  d21   1.47 0.2110819 10 0.06674995
# 
# 在数据确认安排合理后,可以借助ddply函数以Date为分组变量对
# 数据进行分组,并分别计算每组数据对应的变量Weight的累积和

# 计算累积和
ce = ddply(ce, 'Date', transform, label_y=cumsum(Weight))
ce
# Cultivar Date Weight        sd  n         se label_y
# 1      c39  d16   3.18 0.9566144 10 0.30250803    3.18
# 2      c52  d16   2.26 0.4452215 10 0.14079141    5.44
# 3      c39  d20   2.80 0.2788867 10 0.08819171    2.80
# 4      c52  d20   3.11 0.7908505 10 0.25008887    5.91
# 5      c39  d21   2.74 0.9834181 10 0.31098410    2.74
# 6      c52  d21   1.47 0.2110819 10 0.06674995    4.21

ggplot(ce, aes(x=Date, y=Weight, fill=Cultivar)) + 
    geom_bar(stat = "identity") + 
    geom_text(aes(y=label_y, label=Weight), vjust=1.5, colour="White")

# 如果想把数据标签置于条形中部

ce = arrange(cabbage_exp, Date, Cultivar)
ce = ddply(ce, "Date", transform, label_y=cumsum(Weight)-0.5*Weight)
ggplot(ce, aes(x=Date, y=Weight, fill=Cultivar)) + 
    geom_bar(stat = "identity", position = position_stack(reverse = TRUE)) +
    geom_text(aes(y=label_y, label=Weight), colour="White")
# 修改颜色样式等
ggplot(ce, aes(x=Date, y=Weight, fill=Cultivar)) + 
    geom_bar(stat = "identity", position = position_stack(reverse = TRUE),
             colour="black") +
    geom_text(aes(y=label_y, label=paste(format(Weight, nsamll=2),"kg")), 
              size=4) + 
    guides(fill=guide_legend(reverse = TRUE)) + 
    scale_fill_brewer(palette = "Pastell")

绘制Cleveland点图

# 使用Cleveland点图来替代条形图以减少图形造成的视觉混乱并使图形更具可读性

library(gcookbook)
# 取出tophitters数据集中的前25个数据
tophit = tophitters2001[1:25,]
ggplot(tophit, aes(x=avg, y=name)) + geom_point()

# tophitters2001数据集包含很多列,观察其中三列
tophit[,c("name","lg","avg")]

# name lg    avg
# 1       Larry Walker NL 0.3501
# 2      Ichiro Suzuki AL 0.3497
# 3       Jason Giambi AL 0.3423
# 4     Roberto Alomar AL 0.3357
# 5        Todd Helton NL 0.3356
# 6        Moises Alou NL 0.3314
# 7      Lance Berkman NL 0.3310
# 8         Bret Boone AL 0.3307
# 9  Frank Catalanotto AL 0.3305
# 10     Chipper Jones NL 0.3304

ggplot(tophit, aes(x=avg, y=reorder(name, avg))) +
    geom_point(size=3) + 
    theme_bw() + 
    theme(panel.grid.major.x = element_blank(),
          panel.grid.minor.x = element_blank(),
          panel.grid.major.y = element_line(colour="grey60", linetype ="dashed"))
    
# 也可以互换x和y轴,x轴对应于姓名,y轴对应于数值
ggplot(tophit, aes(x=reorder(name, avg), y=avg)) +
    geom_point(size=3) + 
    theme_bw() + 
    theme(axis.text.x = element_text(angle = 60, hjust = 1),
          panel.grid.major.x = element_blank(),
          panel.grid.minor.x = element_blank(),
          panel.grid.major.y = element_line(colour="grey60", linetype ="dashed"))
# 有时候根据其他变量对样本进行分组很哟用,根据因子lg对样本进行分组,因子lg对应有NL
# 和AL两个水平,分别表示国家队和美国队,依次根据lg和avg排序,reorder参数只能对
# 一个变量对因子水平进行排序,只能手动实现上述过程

# 提取出name变量,依次根据变量lg和avg对其进行排序
nameorder = tophit$name[order(tophit$lg, tophit$avg)]
tophit$name = factor(tophit$name, levels = nameorder)

# 绘图时,将lg变量映射到点的颜色上,使用geom_segment()函数,用“以数据点为
# 端点”代替贯通全图的网格线,
ggplot(tophit, aes(x=avg, y=name)) + 
    geom_segment(aes(yend=name), xend=0, colour="grey50") +
    geom_point(size=3, aes(colour=lg)) +
    scale_color_brewer(palette = "Set1", limits=c("NL", "AL")) +
    theme_bw() +
    theme(panel.grid.major.y = element_blank(),
          legend.position = c(1, 0.55),
          legend.justification = c(1, 0.5))

# 分面
ggplot(tophit, aes(x=avg, y=name)) + 
    geom_segment(aes(yend=name), xend=0, colour="grey50") +
    geom_point(size=3, aes(colour=lg)) +
    scale_color_brewer(palette = "Set1", limits=c("NL", "AL"), guide=FALSE) +
    theme_bw() +
    theme(panel.grid.major.y = element_blank())+
    facet_grid(lg ~ .,scales = "free_y", space = "free_y")

end

你可能感兴趣的:(条形图 Part2)