注:R语言的再复习之路
## 拟合平滑曲线
ggplot(data = mtcars, aes(x = wt, y = mpg)) +
geom_point(pch = 17, color = 'blue', size = 2) +
geom_smooth(method = 'lm', color = 'red', linetype = 2) +
labs(title = 'Automobile Data', x = 'Weight', y = 'Miles Per Gallon')
## 将变量转化为因子
mtcars$am <- factor(mtcars$am, levels = c(0, 1), labels = c('Automatic', 'Manual'))
mtcars$vs <- factor(mtcars$vs, levels = c(0, 1), labels = c('V-Engine', 'Straight Engine'))
mtcars$cyl <- factor(mtcars$cyl)
## 分面绘图
ggplot(data = mtcars, aes(x = hp, y = mpg, shape = cyl, color = cyl)) +
geom_point(size = 3) +
facet_grid(am ~ vs) +
labs(title = 'Automobile Data by Engine Type', x = 'Horsepower', y = 'Miles Per Gallon')
函数 | 添加 | 选项 |
---|---|---|
geom_bar() |
条形图 | |
geom_boxplot() |
箱线图 | |
geom_density() |
密度图 | |
geom_histogram() |
直方图 | |
geom_point() |
散点图 | |
geom_line() |
线图 | |
geom_hline() |
水平线 | |
geom_vline() |
垂线 | |
geom_jitter() |
抖动点 | |
geom_rug() |
地毯图 | |
geom_smooth() |
拟合曲线 | |
geom_text() |
文字注释 | |
geom_violin() |
小提琴图 |
## 直方图
ggplot(data = singer, aes(x = height)) + geom_histogram()
## 箱线图
ggplot(data = singer, aes(x = voice.part, y = height)) + geom_boxplot()
注:创建直方图时只有变量x是指定的,但创建箱线图时变量x和y都需要指定。geom_histogram()
函数在y变量没有指定时默认对y轴变量计数。
选项 | 详述 |
---|---|
color | 对点、线和填充区域的边界进行着色 |
fill | 对填充区域着色,如条形和密度区域 |
alpha | 颜色的透明度,从0(完全透明)到1(完全不透明) |
linetype | 图案的线条(1=实线,2=虚线,3=点,4=点破折号,5=长破折号,6=双破折号) |
size | 点的尺寸和线的宽度 |
shape | 点的形状 |
position | 绘制诸如条形图和点等对象的位置。对条形图来说,‘dodge’将条形图分组并排,‘stacked’将条形图分组堆叠,‘fill’垂直地堆叠分组条形图并规范其高度相等。对于点来说,‘jitter’减少点重叠 |
binwidth | 直方图的宽度 |
width | 箱线图的宽度 |
notch | 表示方块图是否应为缺口(TRUE/FALSE) |
sides | 地毯图的安置 |
data(Salaries, package = 'car')
ggplot(Salaries, aes(x = rank, y = salary)) +
geom_boxplot(fill = 'cornflowerblue', color = 'black', notch = TRUE) +
geom_point(position = 'jitter', color = 'blue', alpha = 0.5) +
geom_rug(side = '1', color = 'black')
data(singer, package = 'lattice')
ggplot(data = singer, aes(x = voice.part, y = height)) +
geom_violin(fill = 'lightblue') +
geom_boxplot(fill = 'lightgreen', width = 0.2)
## 分组核密度图
ggplot(data = Salaries, aes(x = salary, fill = rank)) +
geom_density(alpha = .3)
## 分组散点图
ggplot(data = Salaries, aes(x = yrs.since.phd, y =salary, color = rank, shape = sex)) +
geom_point()
## 分组条形图
ggplot(data = Salaries, aes(x = rank, fill = sex)) +
geom_bar(position = 'dodge') + labs(title = 'position = "dodge"')
注:变量应该设在aes()
函数内,分配常数应该在aes()
函数外
语法 | 结果 |
---|---|
facet_wrap(~ var, ncol = n) |
将每个var水平排列成n列的独立图 |
facet_wrap(~ var, nrow = n) |
将每个var水平排列成n行的独立图 |
facet_grid(rowvar ~ colvar) |
rowvar和colvar组合的独立图,其中rowvar表示行,colvar表示列 |
facet_grid(rowvar ~ .) |
每个rowvar水平的独立图,配置成一个单列 |
facet_grid(. ~ colvar) |
每个colvar水平的独立图,配置成一个单行 |
ggplot(data = singer, aes(x = height)) +
geom_histogram() +
facet_wrap(~ voice.part, nrow = 4)
ggplot(data = Salaries, aes(x = yrs.since.phd, y = salary, color = rank, shape = rank)) +
geom_point() + facet_grid(. ~ sex)
ggplot(data = singer, aes(x = height, fill = voice.part)) +
geom_density() +
facet_grid(voice.part ~ .)
选项 | 描述 |
---|---|
method = |
使用的平滑函数。允许的值包括lm,glm,smooth,rlm和gam,分别对应线性、广义线性、loess、健壮线性和广义相加模型。smooth是默认值 |
formula = |
在光滑函数中使用的公式。例子包括y ~ x(默认),y ~ log(x),y ~ poly(x, n)表示n次多项式拟合y ~ ns(x, n)表示一个具有n个自由度的样条拟合 |
se |
绘制置信区间(TRUE/FALSE)。默认为TRUE |
level |
使用的置信区间水平(默认为95%) |
fullrange |
指定拟合应涵盖全图(TRUE)或仅仅是数据(FALSE)。默认为FALSE |
ggplot(data = Salaries, aes(x = yrs.since.phd, y = salary, linetype = sex, shape = sex, color = sex)) +
geom_smooth(method = lm, formula = y ~ poly(x, 2), se = FALSE, size = 1) +
geom_point(size = 2)
函数 | 选项 |
---|---|
scale_x_continuous()和scale_y_continuous() |
breaks = 指定刻度标记,labels = 指定刻度标记标签,limit = 控制要展示的值的范围 |
scale_x_discrete()和scale_y_discrete() |
breaks = 对因子的水平进行放置和排序,labels = 指定这些水平的标签,limits = 表示哪些水平应该展示 |
coord_flip() |
颠倒x轴和y轴 |
ggplot(data = Salaries, aes(x = rank, y = salary, fill = sex)) +
geom_boxplot() +
scale_x_discrete(breaks = c('AsstProf', 'AssoProf', 'Prof'), labels = c('Assistant\nProfessor', 'Associate\nProfessor', 'Full\nProfessor')) +
scale_y_continuous(breaks = c(50000, 100000, 150000, 200000), labels = c('$50K', '$100K', '$150K', '$200K')) +
labs(title = 'Faculty Salary by Rank and Sex', x = '', y = '')
aes()
函数中fill/shape/color
等参数表示的变量。fill = 'mytitle'
加到labs()
函数中来改变标题。theme()
函数中的legend.position
选项控制,可能的值包括left
/top
/right
/bottom
。ggplot(data = Salaries, aes(x = rank, y = salary, fill = sex)) +
geom_boxplot() +
scale_x_discrete(breaks = c('AsstProf', 'AssoProf', 'Prof'), labels = c('Assistant\nProfessor', 'Associate\nProfessor', 'Full\nProfessor')) +
scale_y_continuous(breaks = c(50000, 100000, 150000, 200000), labels = c('$50K', '$100K', '$150K', '$200K')) +
labs(title = 'Faculty Salary by Rank and Sex', x = '', y = '', fill = 'Gender') +
theme(legend.position = c(.1, .8))
ggplot(data = mtcars, aes(x = wt, y = mpg, size = disp)) +
geom_point(shape = 21, color = 'black', fill = 'cornsilk') +
labs(x = 'Weight', y = 'Miles Per Gallon', title = 'Bubble Chart', size = 'Engine\nDisplacement')
ggplot(data = Salaries, aes(x = yrs.since.phd, y = salary, color = rank)) +
scale_color_manual(values = c('orange', 'olivedrab', 'navy')) +
geom_point(size = 2)
mytheme <- theme(plot.title = element_text(face = 'blod.italic', size = '14', color = 'brown'),
axis.title = element_text(face = 'blod.italic', size = 10, color = 'brown'),
axis.text = element_text(face = 'bold', size = 9, color = 'darkblue'),
panel.background = element_rect(fill = 'white', color = 'darkblue'),
panel.grid.major.y = element_line(color = 'grey', linetype = 1),
panel.grid.minor.x = element_blank(),
legend.position = 'top')
ggplot(Salaries, aes(x = rank, y = salary, fill = sex)) +
geom_boxplot() +
labs(title = 'Salary by Rank and Sex', x = 'Rank', y = 'Salary') +
mytheme
p1 <- ggplot(Salaries, aes(x = rank)) + geom_bar()
p2 <- ggplot(Salaries, aes(x = sex)) + geom_bar()
p3 <- ggplot(Salaries, aes(x = yrs.since.phd, y = salary)) + geom_point()
library(gridExtra)
grid.arrange(p1, p2, p3, ncol = 3)