ggplot2画图工作中的代码总结

工作中经常要ggplot各种图形,下面是这几个月用ggplot画图的一些代码,最经常碰到的情况就是各个类别下的频率,在柱状图中每个分类上显示百分比,各个类别对应的字体要倾斜,各个图的颜色什么的,总之一大堆的事情,下面就是ggplot包中的针对这些技巧的代码,数据用的R软件自带的mpg数据  以字段class为例,class是汽车的类型,具体上代码:

library(ggplot2)
library(reshape)
test<-data.frame(table(mpg$class))
test<-rename(test,c(Var1="cars",Freq="freq"))
labels<-paste(round((sort(test$freq)/sum(test$freq))*100,2),"%",sep="")

#reorder是重新排序坐标条数,fill是改变柱状图颜色,width改变柱形图宽度,geom_text是标签
p<-ggplot(test,aes(x=reorder(cars,freq),y=freq))+geom_bar(stat="identity",fill = "green",width=0.5)+
  geom_text(label=labels,colour = "blue", vjust=-1)

#theme(axis.text.x=element_text(angle=30,colour="red")是改变柱状图的的下标的顺序
p+theme(axis.text.x=element_text(angle=30,colour="red"))+theme(axis.text.x=element_text(angle=30,colour="red"))+scale_fill_manual(values=c("green","red"))

#coord_flip()是改变横向和众向坐标方向
p+labs(x="车型号",y="频率",title = "生产、各流通级别、终端流通量和纯销比\n")+coord_flip()

#南丁格尔玫瑰图:
ggplot(test,aes(cars,freq))+geom_bar(stat="identity",fill = "green")+coord_polar()+theme(legend.position="none")

众筹用户画像
library(RMySQL)
library(ggplot2)
library(reshape)
# 文件操作
# list.files(pattern=("\\.pdf$"))
# file.remove( list.files(pattern=("\\.pdf$")))
# file.exists("uv走势图total.pdf")

conn <- dbConnect(MySQL(), dbname = "pms", username="gaoyang922", password="gaoyang922@123456!",host="10.10.109.62",port=1333)



query<-dbSendQuery(conn,"select * from ((select insert_date,site,count(distinct(label)) as UV,count(distinct(pindex)) as PV from( 
                                                                                                             SELECT date_format(left(insert_time,8),'%Y-%m-%d') as insert_date,site, label, concat(label,sessionid,visit_time) as pindex  FROM tracker.hbase_visit
                                                                                                             where page_url not like '%zhongchouban%' and insert_time is NOT NULL and site ='pc') pc group by insert_date,site order by insert_date,site 
                   ) union (select insert_date,site,count(distinct(label)) as UV,count(distinct(pindex)) as PV from( 
                                                                                                                    SELECT date_format(left(insert_time,8),'%Y-%m-%d') as insert_date,site, label, concat(label,sessionid,visit_time) as pindex  FROM tracker.hbase_visit 
                                                                                                                    where page_url not like '%zhongchouban%' and insert_time is NOT NULL and site ='m' 
                   ) m group by insert_date,site order by insert_date,site) union ( 
                                                                                   select insert_date,'t' as site,count(distinct(label)) as UV,count(distinct(pindex)) as PV from( 
                                                                                                                                                                                  SELECT date_format(left(insert_time,8),'%Y-%m-%d')  as insert_date,site, label, concat(label,sessionid,visit_time) as pindex  FROM tracker.hbase_visit 
                                                                                                                                                                                  where page_url not like '%zhongchouban%' and insert_time is NOT NULL ) t group by insert_date order by insert_date)) c 
                   order by insert_date , site  desc;")
result <- fetch(query,n=-1)
dbDisconnect(conn)
head(result)

 # pdf("D:\\Users\\zhoumeixu204\\Desktop\\uv走势图.pdf",width=23,height=10,family="GB1",onefile=FALSE)
p_uv<-ggplot(result,aes(x=insert_date,y=UV,color=site))+geom_point(alpha=1)+facet_grid(site~.)+geom_line(aes(group=site))
p_uv+xlab("日期")+ylab("uv量")+ggtitle("8月18至9月20日每日uv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray")) +
scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30,color='black'))
# dev.off()


# pdf("D:\\Users\\zhoumeixu204\\Desktop\\uv走势图total.pdf",width=23,height=10,family="GB1",onefile=FALSE)
p_uv<-ggplot(result,aes(x=insert_date,y=UV,color=site))+geom_point(alpha=1)+geom_line(aes(group=site))
p_uv+xlab("日期")+ylab("uv量")+ggtitle("8月18至9月20日每日uv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+
  scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30))
# dev.off()

# pdf("D:\\Users\\zhoumeixu204\\Desktop\\pv走势图.pdf",width=23,height=10,family="GB1",onefile=FALSE)
p_pv<-ggplot(result,aes(x=insert_date,y=PV,color=site))+geom_point(alpha=1)+facet_grid(site~.)+geom_line(aes(group=site))
p_pv+xlab("日期")+ylab("pv量")+ggtitle("8月18至9月20日每日pv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+
scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30))
# dev.off

# pdf("D:\\Users\\zhoumeixu204\\Desktop\\pv走势图total.pdf",width=23,height=10,family="GB1",onefile=FALSE)
p_pv<-ggplot(result,aes(x=insert_date,y=PV,color=site))+geom_point(alpha=1)+geom_line(aes(group=site))
p_pv+xlab("日期")+ylab("pv量")+ggtitle("8月18至9月20日每日pv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+
scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30))


网站pv uv分析走势图

library(RMySQL)
library(ggplot2)
library(reshape)
# 文件操作
# list.files(pattern=("\\.pdf$"))
# file.remove( list.files(pattern=("\\.pdf$")))
# file.exists("uv走势图total.pdf")

conn <- dbConnect(MySQL(), dbname = "pms", username="gaoyang922", password="gaoyang922@123456!",host="10.10.109.62",port=1333)



query<-dbSendQuery(conn,"select * from ((select insert_date,site,count(distinct(label)) as UV,count(distinct(pindex)) as PV from( 
                                                                                                             SELECT date_format(left(insert_time,8),'%Y-%m-%d') as insert_date,site, label, concat(label,sessionid,visit_time) as pindex  FROM tracker.hbase_visit
                                                                                                             where page_url not like '%zhongchouban%' and insert_time is NOT NULL and site ='pc') pc group by insert_date,site order by insert_date,site 
                   ) union (select insert_date,site,count(distinct(label)) as UV,count(distinct(pindex)) as PV from( 
                                                                                                                    SELECT date_format(left(insert_time,8),'%Y-%m-%d') as insert_date,site, label, concat(label,sessionid,visit_time) as pindex  FROM tracker.hbase_visit 
                                                                                                                    where page_url not like '%zhongchouban%' and insert_time is NOT NULL and site ='m' 
                   ) m group by insert_date,site order by insert_date,site) union ( 
                                                                                   select insert_date,'t' as site,count(distinct(label)) as UV,count(distinct(pindex)) as PV from( 
                                                                                                                                                                                  SELECT date_format(left(insert_time,8),'%Y-%m-%d')  as insert_date,site, label, concat(label,sessionid,visit_time) as pindex  FROM tracker.hbase_visit 
                                                                                                                                                                                  where page_url not like '%zhongchouban%' and insert_time is NOT NULL ) t group by insert_date order by insert_date)) c 
                   order by insert_date , site  desc;")
result <- fetch(query,n=-1)
dbDisconnect(conn)
head(result)

 # pdf("D:\\Users\\zhoumeixu204\\Desktop\\uv走势图.pdf",width=23,height=10,family="GB1",onefile=FALSE)
p_uv<-ggplot(result,aes(x=insert_date,y=UV,color=site))+geom_point(alpha=1)+facet_grid(site~.)+geom_line(aes(group=site))
p_uv+xlab("日期")+ylab("uv量")+ggtitle("8月18至9月20日每日uv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray")) +
scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30,color='black'))
# dev.off()


# pdf("D:\\Users\\zhoumeixu204\\Desktop\\uv走势图total.pdf",width=23,height=10,family="GB1",onefile=FALSE)
p_uv<-ggplot(result,aes(x=insert_date,y=UV,color=site))+geom_point(alpha=1)+geom_line(aes(group=site))
p_uv+xlab("日期")+ylab("uv量")+ggtitle("8月18至9月20日每日uv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+
  scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30))
# dev.off()

# pdf("D:\\Users\\zhoumeixu204\\Desktop\\pv走势图.pdf",width=23,height=10,family="GB1",onefile=FALSE)
p_pv<-ggplot(result,aes(x=insert_date,y=PV,color=site))+geom_point(alpha=1)+facet_grid(site~.)+geom_line(aes(group=site))
p_pv+xlab("日期")+ylab("pv量")+ggtitle("8月18至9月20日每日pv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+
scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30))
# dev.off

# pdf("D:\\Users\\zhoumeixu204\\Desktop\\pv走势图total.pdf",width=23,height=10,family="GB1",onefile=FALSE)
p_pv<-ggplot(result,aes(x=insert_date,y=PV,color=site))+geom_point(alpha=1)+geom_line(aes(group=site))
p_pv+xlab("日期")+ylab("pv量")+ggtitle("8月18至9月20日每日pv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+
scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30))





R机器学习实战书本ggplot图汇总

library(reshape)
library(ggplot2)
data.file<-read.csv("D:\\ML_for_Hackers-master\\02-Exploration\\data\\01_heights_weights_genders.csv",stringsAsFactors = F,sep=",",head=T)
heigths<-with(data.file,Height)
tapply(data.file$Height,data.file$Gender,mean)
aggregate(data.file[,2:3],list(data.file$Gender),mean)
data.file.melt<-melt(data.file,id="Gender")
result<-cast(Gender~variable,data=data.file.melt,mean,margins="grand_row")
result<-cast(Gender~variable,data=data.file.melt,mean,subset=variable %in% c("Height"),margins="grand_row")
result<-ddply(data.file,.(Gender),nrow)
quantile(heigths,probs=seq(0,1,0.2))
quantile(data.file$Height,probs=seq(0,1,0.2))
quantile(data.file$Height,probs=c(0.975,0.225))
ggplot(data.file,aes(x=Height))+geom_histogram(binwidth=1)
ggplot(data.file,aes(x=Height))+geom_histogram(binwidth=5)
#fill是颜色填充的效果
ggplot(data.file,aes(x=Height,y=Weight,fill=Gender))+geom_point()+facet_grid(Gender~.)  #散点图
ggplot(data.file[1:800,],aes(x=Height,y=Weight))+geom_point()+geom_smooth()  #增加平滑曲线      小于1000数目
ggplot(data.file,aes(x=Height,y=Weight))+geom_point(aes(color=Gender,alpha))+geom_smooth()
ggplot(data.file,aes(x=Height,y=Weight,color=factor(Gender)))+geom_point()   #颜色按到Gender的不同显示不同

heights.weights <- transform(data.file,
                             Male = ifelse(Gender == 'Male', 1, 0))

logit.model <- glm(Male ~ Weight + Height,
                   data = heights.weights,
                   family = binomial(link = 'logit'))

ggplot(data.file, aes(x = Height, y = Weight)) +geom_point(aes(color = Gender, alpha = 0.25)) +scale_alpha(guide = "none") + 
scale_color_manual(values = c("Male" = "black", "Female" = "gray")) +theme_bw() +stat_abline(intercept = -coef(logit.model)[1] / coef(logit.model)[2], slope = - coef(logit.model)[3] / coef(logit.model)[2],
geom = 'abline', color = 'black')


#散点图   
head(mpg)
p<-ggplot(mpg,aes(x=cty,y=hwy))+geom_point(aes(color=class,size=displ))+stat_smooth(method='loess') #数据超过1000stat_smooth用method=‘loess’来拟合
p+labs(x="城市",y="耗油量",title="汽车油耗与型号")   #加标题  x、y轴加标签
p+facet_wrap(~year,ncol=2)+labs(xlab="城市",ylab="耗油量",title="汽车油耗与型号") #在两幅图显示,表示两列


#条形图
class2<-mpg$class;class2<-reorder(class2,class2,length)
mpg$class2<-class2
ggplot(mpg,aes(x=class2))+geom_bar(aes(fill=class2))   #在这里fill是调颜色的作用
class2<-mpg$class;class2<-reorder(class2,class2,length)    #柱形图按到频数高低画图
ggplot(mpg,aes(class2,fill=factor(year)))+geom_bar(position = 'stack') #在一起按year分开,纵向方向
ggplot(mpg,aes(class2,fill=factor(year)))+geom_bar(position = 'dodge')    #在一起按year分卡,横向分开
ggplot(mpg,aes(class2,fill=factor(year)))+geom_bar(position = 'stack')#在两两页显示

#饼图
ggplot(mpg,aes(x=factor(1),fill=factor(class)))+geom_bar(width=1)+coord_polar(theta="y")  #饼图原始的画图更美观



#箱线图
ggplot(mpg,aes(class,hwy,fill=class))+geom_boxplot()

# 散点图用线连接起来  
ggplot(guss.accuracy,aes(x=Guss,y=Error))+geom_point()+geom_line()

#ggsave(plot=, filename=)   plot为要保存的图像名  filename为要保存的图像路径


# 同一幅图里面显示分类数据的散点图连线
set.seed(1)
x<-seq(0,1,by=0.01)
y<-sin(2*pi*x)+rnorm(length(x),0,1)
n<-length(x)
indices<-sort(sample(1:n,round(0.5*n)))
training.x<-x[indices]
training.y<-y[indices]

test.x<-x[-indices]
test.y<-y[-indices]
training.df<-data.frame(X=training.x,Y=training.y)
test.df<-data.frame(X=test.x,Y=test.y)
rmse<-function(y,h){
  sqrt(mean((y-h)^2))
  
}
performance<-data.frame()
for(d in 1:12){
  
  poly.fit<-lm(Y~poly(X,degree=d),data=training.df)
  performance<-rbind(performance,data.frame(Degree=d,Data='Training',
                                            RMSE=rmse(training.y,predict(poly.fit))))
  performance<-rbind(performance,data.frame(Degree=d,Data='Test',
                                            RMSE=rmse(test.y,predict(poly.fit,newdata=test.df))))
  
}
ggplot(performance,aes(x=Degree,y=RMSE,linetype=Data))+geom_point(aes(color=Data))+geom_line()

g<-ggplot(data=bidpx1,aes=(x=as.POSIXct(tradaetime,format="%Y%m%d%H%M%s"),y=bidpx1))+geom_line(aes(group=securityid,colours=securityid))
+xlab('tradetime')+ylab('bidpx1')   #同一个图里面散点图连用直线连接两条数据,以securityid分开 

# p=ggplot(data.frame(x,y),aes(x,y,label = colnames(watervoles)))
# p+geom_point(shape=16,size=3,colour='red')+
# geom_text(hjust=-0.1,vjust=0.5,alpha=0.5)


你可能感兴趣的:(R语言)