首先整理数据
data <- read.table("D://swRJ/R/practice/数据统计区间分布图以及累计分布函数图/714.txt", header = F , sep = "\t")
library(ggplot2)
as.data.frame(data) -> data2
colnames(data2)[2] <- "value"
summary(data2$value)
na.omit(data2) -> data2
data2[data2$value <= 0,][,1] <- "A"
data2[data2$value > 0 & data2$value <= 0.58,][,1] <- "B"
data2[data2$value > 0.58,][,1] <- "C"
ggplot2绘制柱状图
ggplot(data2,aes(x = value,fill = V1)) +
geom_histogram(position = "stack",binwidth = 0.01) +
theme_bw() + xlab("") +theme(panel.grid=element_blank()) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),axis.line = element_line(colour = "black")) +
scale_x_continuous(breaks = c(-1,0,0.5,1.5,2,2.5,3))
ggplot2绘制密度图
ggplot(data2,aes(x = data2$value, fill = V1)) +
# geom_histogram(breaks = c(-1,0,0.5,1.5,2,2.5,3), alpha=.5, position="identity") +
scale_x_continuous(breaks = c(-1,0,0.5,1.5,2,2.5,3)) +
geom_density(alpha=.3) +
theme_bw() + xlab("") +theme(panel.grid=element_blank()) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),axis.line = element_line(colour = "black"))
ggpurb绘制密度图
library(ggpubr)
ggdensity(data2,x = "value", fill = "V1",add = "mean", rug = TRUE, color = "V1")
自定义主题 (来源于基迪奥某一个帖子,具体忘了,是在抱歉)
mytheme<- theme_bw()+theme(axis.title = element_text(size = 12),
axis.text = element_text(size=12),
panel.grid.major = element_line(color = "white"),
panel.grid.minor = element_line(colour = "white"),
axis.text.x = element_text(size = 12,angle=0,vjust=0.6,hjust=0.8,color = "black"),
axis.text.y = element_text(size = 12,color = "black"),
legend.text = element_text(size = 12),legend.title = element_blank(),
plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))
区间分布图
dlast <- table(cut(data2$value, breaks = c(min(data2$value),-1,0,0.58,1.5,2,2.5,3,max(data2$value))))
dlast1 <- as.vector(dlast)
dlast2 <- round(dlast1/nrow(data2), digits = 4)
dlast3 <- paste(dlast2*100, "%")
dl <- data.frame(dimnames(dlast),dlast1, dlast3)
colnames(dl) <- c("class","number","percent")
ggplot(dl, aes(class, y = number)) +
geom_bar(aes(fill = class),stat = "identity") +
scale_y_continuous(limits=c(0,1500),breaks=seq(0,1500,300)) +
mytheme
填充百分比柱状图
dta <- data.frame(dimnames(dlast),dlast1, dlast2)
colnames(dta ) <- c("class","number","percent")
dta$ltype <- "A"
ggplot(dta,aes(ltype,number,fill = class)) + ##注意fill参数放置在aes里面和外面的区别
geom_bar(position = position_fill(0.2),stat = "identity",width = 0.2) +
scale_fill_brewer(palette = "Set3") +
xlab("class_region") + ylab("percent") +
scale_y_continuous(labels = function(x) paste0(x*100, "%")) + mytheme
累计分布图
什么是累计分布图?百度百科的定义
- 对于所有实数,累积分布函数定义如下:
即累积分布函数表示:对离散变量而言,所有小于等于a的值出现概率的和 [1] 。
ggplot(data2,aes(value)) + stat_ecdf(geom = "step",size=1) +
scale_x_continuous(breaks = c(-1,0,0.58,1.5,2,2.5,3)) +
geom_vline(xintercept = 0.58, linetype = "dashed") +
scale_y_continuous(limits=c(0,1),labels = waiver() ,expand = c(0, 0)) +
geom_segment(aes(x=0,y=0,xend=1,yend=1),
colour = "gray", linetype="longdash", size=1) + ylab("Cumulative") +
mytheme