ggplot2是我见过最human friendly的画图软件,这得益于Leland Wilkinson在他的著作《The Grammar of Graphics》中提出了一套图形语法,把图形元素抽象成可以自由组合的成分,Hadley Wickham把这套想法在R中实现。
ggplot2基本要素
下面以一份钻石的数据为例,这份数据非常大,随机取一个子集来画图。
require(ggplot2)
data(diamonds)
set.seed(42)
small <- diamonds[sample(nrow(diamonds), 1000), ]
head(small)
> head(small)
carat cut color clarity depth table price x y z
49345 0.71 Very Good H SI1 62.5 60 2096 5.68 5.75 3.57
50545 0.79 Premium H SI1 61.8 59 2275 5.97 5.91 3.67
15434 1.03 Ideal F SI1 62.4 57 6178 6.48 6.44 4.03
44792 0.50 Ideal E VS2 62.2 54 1624 5.08 5.11 3.17
34614 0.27 Ideal E VS1 61.6 56 470 4.14 4.17 2.56
27998 0.30 Premium E VS2 61.7 58 658 4.32 4.34 2.67
画图实际上是把数据中的变量映射到图形属性上。以克拉(carat)数为X轴变量,价格(price)为Y轴变量。
require(ggplot2)
data(diamonds)
set.seed(42)
small <- diamonds[sample(nrow(diamonds), 1000), ]#随机抽取1000个作为样本
head(small)
#画图实际上是把数据中的变量映射到图形属性上。
#以克拉(carat)数为X轴变量,价格(price)为Y轴变量。
p <- ggplot(data = small, mapping = aes(x = carat, y = price))
#上面这行代码把数据映射XY坐标轴上,需要告诉ggplot2,这些数据要映射成什么样的几何对象,
#下面以散点为例:
p + geom_point()
#如果想将切工(cut)映射到形状属性。只需要:
p <- ggplot(data=small, mapping=aes(x=carat, y=price, shape=cut))
p + geom_point()
#再比如我想将钻石的颜色(color)映射颜色属性:
p <- ggplot(data=small, mapping=aes(x=carat, y=price, shape=cut, colour=color))
p+geom_point()
最后一幅图也可以用下面的写法来完成
#再比如我想将钻石的颜色(color)映射颜色属性:
ggplot(data=small) + geom_point(aes(x=carat, y=price, shape=cut, colour=color))
p <- ggplot(mpg, aes(displ, cty)) + geom_point()
head(mpg)
tail(mpg)
p + facet_grid(. ~ cyl)#将cyl维度拆分成:左右多图
p + facet_grid(cyl~.)#将cyl维度拆分成:上下多图
p + facet_grid(drv ~ .)#将drv维度拆分成:上下多图
p + facet_grid(drv ~ cyl)#将drv和cyl拆分成二维多图
> head(mpg)
manufacturer model displ year cyl trans drv cty hwy fl class
1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
3 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
4 audi a4 2.0 2008 4 auto(av) f 21 30 p compact
5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact
6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compact
> tail(mpg)
manufacturer model displ year cyl trans drv cty hwy fl class
229 volkswagen passat 1.8 1999 4 auto(l5) f 18 29 p midsize
230 volkswagen passat 2.0 2008 4 auto(s6) f 19 28 p midsize
231 volkswagen passat 2.0 2008 4 manual(m6) f 21 29 p midsize
232 volkswagen passat 2.8 1999 6 auto(l5) f 16 26 p midsize
233 volkswagen passat 2.8 1999 6 manual(m5) f 18 26 p midsize
234 volkswagen passat 3.6 2008 6 auto(s6) f 17 26 p midsize
p + facet_grid(. ~ cyl)#将cyl维度拆分成:左右多图
p + facet_grid(cyl~.)#将cyl维度拆分成:上下多图
p + facet_grid(drv ~ .)#将drv维度拆分成:上下多图
p + facet_grid(drv ~ cyl)#将drv和cyl拆分成二维多图
dts <- as.Date("20050101", '%Y%m%d') + seq(0,1000,15)
A <- data.frame( Dates = dts, Gas = 4000 + cumsum(abs( rnorm(length(dts), 100, 30))))
head(A)
A <- transform( A,
Year = format(Dates, '%Y'),
DayOfYear = as.numeric( format(Dates, '%j')),
GasDiff = c(diff( Gas ),NA))
head(A)
require(ggplot2)
ggplot(A, aes(Dates, GasDiff)) + geom_line( aes(colour = Year))#不同的年份不同的颜色
ggplot(A, aes(DayOfYear, GasDiff)) + geom_line( ) + facet_grid(Year ~ .)#不同的年份不同的图框(上下放置)
ggplot(A, aes(DayOfYear, GasDiff)) + geom_line( aes(colour = Year) )#不同的年份不同颜色(不拆分图框)
ggplot(A, aes(DayOfYear, GasDiff)) + geom_line( aes(colour = Year) ) + geom_point( aes(shape = Year))#不同的年份不同颜色(不拆分图框)
> head(A)
Dates Gas
1 2005-01-01 4149.130
2 2005-01-16 4285.929
3 2005-01-31 4371.898
4 2005-02-15 4451.551
5 2005-03-02 4602.443
6 2005-03-17 4748.729
> A <- transform( A,
+ Year = format(Dates, '%Y'),
+ DayOfYear = as.numeric( format(Dates, '%j')),
+ .... [TRUNCATED]
> head(A)
Dates Gas Year DayOfYear GasDiff
1 2005-01-01 4149.130 2005 1 136.79945
2 2005-01-16 4285.929 2005 16 85.96883
3 2005-01-31 4371.898 2005 31 79.65307
4 2005-02-15 4451.551 2005 46 150.89207
5 2005-03-02 4602.443 2005 61 146.28541
6 2005-03-17 4748.729 2005 76 106.43583
ggplot(A, aes(Dates, GasDiff)) + geom_line( aes(colour = Year))#不同的年份不同的颜色
参考:https://www.plob.org/article/7264.html