工作中经常要ggplot各种图形,下面是这几个月用ggplot画图的一些代码,最经常碰到的情况就是各个类别下的频率,在柱状图中每个分类上显示百分比,各个类别对应的字体要倾斜,各个图的颜色什么的,总之一大堆的事情,下面就是ggplot包中的针对这些技巧的代码,数据用的R软件自带的mpg数据 以字段class为例,class是汽车的类型,具体上代码:
library(ggplot2)
library(reshape)
test<-data.frame(table(mpg$class))
test<-rename(test,c(Var1="cars",Freq="freq"))
labels<-paste(round((sort(test$freq)/sum(test$freq))*100,2),"%",sep="")
#reorder是重新排序坐标条数,fill是改变柱状图颜色,width改变柱形图宽度,geom_text是标签
p<-ggplot(test,aes(x=reorder(cars,freq),y=freq))+geom_bar(stat="identity",fill = "green",width=0.5)+
geom_text(label=labels,colour = "blue", vjust=-1)
#theme(axis.text.x=element_text(angle=30,colour="red")是改变柱状图的的下标的顺序
p+theme(axis.text.x=element_text(angle=30,colour="red"))+theme(axis.text.x=element_text(angle=30,colour="red"))+scale_fill_manual(values=c("green","red"))
#coord_flip()是改变横向和众向坐标方向
p+labs(x="车型号",y="频率",title = "生产、各流通级别、终端流通量和纯销比\n")+coord_flip()
#南丁格尔玫瑰图:
ggplot(test,aes(cars,freq))+geom_bar(stat="identity",fill = "green")+coord_polar()+theme(legend.position="none")
众筹用户画像
library(RMySQL)
library(ggplot2)
library(reshape)
# 文件操作
# list.files(pattern=("\\.pdf$"))
# file.remove( list.files(pattern=("\\.pdf$")))
# file.exists("uv走势图total.pdf")
conn <- dbConnect(MySQL(), dbname = "pms", username="gaoyang922", password="gaoyang922@123456!",host="10.10.109.62",port=1333)
query<-dbSendQuery(conn,"select * from ((select insert_date,site,count(distinct(label)) as UV,count(distinct(pindex)) as PV from(
SELECT date_format(left(insert_time,8),'%Y-%m-%d') as insert_date,site, label, concat(label,sessionid,visit_time) as pindex FROM tracker.hbase_visit
where page_url not like '%zhongchouban%' and insert_time is NOT NULL and site ='pc') pc group by insert_date,site order by insert_date,site
) union (select insert_date,site,count(distinct(label)) as UV,count(distinct(pindex)) as PV from(
SELECT date_format(left(insert_time,8),'%Y-%m-%d') as insert_date,site, label, concat(label,sessionid,visit_time) as pindex FROM tracker.hbase_visit
where page_url not like '%zhongchouban%' and insert_time is NOT NULL and site ='m'
) m group by insert_date,site order by insert_date,site) union (
select insert_date,'t' as site,count(distinct(label)) as UV,count(distinct(pindex)) as PV from(
SELECT date_format(left(insert_time,8),'%Y-%m-%d') as insert_date,site, label, concat(label,sessionid,visit_time) as pindex FROM tracker.hbase_visit
where page_url not like '%zhongchouban%' and insert_time is NOT NULL ) t group by insert_date order by insert_date)) c
order by insert_date , site desc;")
result <- fetch(query,n=-1)
dbDisconnect(conn)
head(result)
# pdf("D:\\Users\\zhoumeixu204\\Desktop\\uv走势图.pdf",width=23,height=10,family="GB1",onefile=FALSE)
p_uv<-ggplot(result,aes(x=insert_date,y=UV,color=site))+geom_point(alpha=1)+facet_grid(site~.)+geom_line(aes(group=site))
p_uv+xlab("日期")+ylab("uv量")+ggtitle("8月18至9月20日每日uv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray")) +
scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30,color='black'))
# dev.off()
# pdf("D:\\Users\\zhoumeixu204\\Desktop\\uv走势图total.pdf",width=23,height=10,family="GB1",onefile=FALSE)
p_uv<-ggplot(result,aes(x=insert_date,y=UV,color=site))+geom_point(alpha=1)+geom_line(aes(group=site))
p_uv+xlab("日期")+ylab("uv量")+ggtitle("8月18至9月20日每日uv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+
scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30))
# dev.off()
# pdf("D:\\Users\\zhoumeixu204\\Desktop\\pv走势图.pdf",width=23,height=10,family="GB1",onefile=FALSE)
p_pv<-ggplot(result,aes(x=insert_date,y=PV,color=site))+geom_point(alpha=1)+facet_grid(site~.)+geom_line(aes(group=site))
p_pv+xlab("日期")+ylab("pv量")+ggtitle("8月18至9月20日每日pv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+
scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30))
# dev.off
# pdf("D:\\Users\\zhoumeixu204\\Desktop\\pv走势图total.pdf",width=23,height=10,family="GB1",onefile=FALSE)
p_pv<-ggplot(result,aes(x=insert_date,y=PV,color=site))+geom_point(alpha=1)+geom_line(aes(group=site))
p_pv+xlab("日期")+ylab("pv量")+ggtitle("8月18至9月20日每日pv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+
scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30))
网站pv uv分析走势图
library(RMySQL)
library(ggplot2)
library(reshape)
# 文件操作
# list.files(pattern=("\\.pdf$"))
# file.remove( list.files(pattern=("\\.pdf$")))
# file.exists("uv走势图total.pdf")
conn <- dbConnect(MySQL(), dbname = "pms", username="gaoyang922", password="gaoyang922@123456!",host="10.10.109.62",port=1333)
query<-dbSendQuery(conn,"select * from ((select insert_date,site,count(distinct(label)) as UV,count(distinct(pindex)) as PV from(
SELECT date_format(left(insert_time,8),'%Y-%m-%d') as insert_date,site, label, concat(label,sessionid,visit_time) as pindex FROM tracker.hbase_visit
where page_url not like '%zhongchouban%' and insert_time is NOT NULL and site ='pc') pc group by insert_date,site order by insert_date,site
) union (select insert_date,site,count(distinct(label)) as UV,count(distinct(pindex)) as PV from(
SELECT date_format(left(insert_time,8),'%Y-%m-%d') as insert_date,site, label, concat(label,sessionid,visit_time) as pindex FROM tracker.hbase_visit
where page_url not like '%zhongchouban%' and insert_time is NOT NULL and site ='m'
) m group by insert_date,site order by insert_date,site) union (
select insert_date,'t' as site,count(distinct(label)) as UV,count(distinct(pindex)) as PV from(
SELECT date_format(left(insert_time,8),'%Y-%m-%d') as insert_date,site, label, concat(label,sessionid,visit_time) as pindex FROM tracker.hbase_visit
where page_url not like '%zhongchouban%' and insert_time is NOT NULL ) t group by insert_date order by insert_date)) c
order by insert_date , site desc;")
result <- fetch(query,n=-1)
dbDisconnect(conn)
head(result)
# pdf("D:\\Users\\zhoumeixu204\\Desktop\\uv走势图.pdf",width=23,height=10,family="GB1",onefile=FALSE)
p_uv<-ggplot(result,aes(x=insert_date,y=UV,color=site))+geom_point(alpha=1)+facet_grid(site~.)+geom_line(aes(group=site))
p_uv+xlab("日期")+ylab("uv量")+ggtitle("8月18至9月20日每日uv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray")) +
scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30,color='black'))
# dev.off()
# pdf("D:\\Users\\zhoumeixu204\\Desktop\\uv走势图total.pdf",width=23,height=10,family="GB1",onefile=FALSE)
p_uv<-ggplot(result,aes(x=insert_date,y=UV,color=site))+geom_point(alpha=1)+geom_line(aes(group=site))
p_uv+xlab("日期")+ylab("uv量")+ggtitle("8月18至9月20日每日uv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+
scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30))
# dev.off()
# pdf("D:\\Users\\zhoumeixu204\\Desktop\\pv走势图.pdf",width=23,height=10,family="GB1",onefile=FALSE)
p_pv<-ggplot(result,aes(x=insert_date,y=PV,color=site))+geom_point(alpha=1)+facet_grid(site~.)+geom_line(aes(group=site))
p_pv+xlab("日期")+ylab("pv量")+ggtitle("8月18至9月20日每日pv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+
scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30))
# dev.off
# pdf("D:\\Users\\zhoumeixu204\\Desktop\\pv走势图total.pdf",width=23,height=10,family="GB1",onefile=FALSE)
p_pv<-ggplot(result,aes(x=insert_date,y=PV,color=site))+geom_point(alpha=1)+geom_line(aes(group=site))
p_pv+xlab("日期")+ylab("pv量")+ggtitle("8月18至9月20日每日pv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+
scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30))
R机器学习实战书本ggplot图汇总
library(reshape)
library(ggplot2)
data.file<-read.csv("D:\\ML_for_Hackers-master\\02-Exploration\\data\\01_heights_weights_genders.csv",stringsAsFactors = F,sep=",",head=T)
heigths<-with(data.file,Height)
tapply(data.file$Height,data.file$Gender,mean)
aggregate(data.file[,2:3],list(data.file$Gender),mean)
data.file.melt<-melt(data.file,id="Gender")
result<-cast(Gender~variable,data=data.file.melt,mean,margins="grand_row")
result<-cast(Gender~variable,data=data.file.melt,mean,subset=variable %in% c("Height"),margins="grand_row")
result<-ddply(data.file,.(Gender),nrow)
quantile(heigths,probs=seq(0,1,0.2))
quantile(data.file$Height,probs=seq(0,1,0.2))
quantile(data.file$Height,probs=c(0.975,0.225))
ggplot(data.file,aes(x=Height))+geom_histogram(binwidth=1)
ggplot(data.file,aes(x=Height))+geom_histogram(binwidth=5)
#fill是颜色填充的效果
ggplot(data.file,aes(x=Height,y=Weight,fill=Gender))+geom_point()+facet_grid(Gender~.) #散点图
ggplot(data.file[1:800,],aes(x=Height,y=Weight))+geom_point()+geom_smooth() #增加平滑曲线 小于1000数目
ggplot(data.file,aes(x=Height,y=Weight))+geom_point(aes(color=Gender,alpha))+geom_smooth()
ggplot(data.file,aes(x=Height,y=Weight,color=factor(Gender)))+geom_point() #颜色按到Gender的不同显示不同
heights.weights <- transform(data.file,
Male = ifelse(Gender == 'Male', 1, 0))
logit.model <- glm(Male ~ Weight + Height,
data = heights.weights,
family = binomial(link = 'logit'))
ggplot(data.file, aes(x = Height, y = Weight)) +geom_point(aes(color = Gender, alpha = 0.25)) +scale_alpha(guide = "none") +
scale_color_manual(values = c("Male" = "black", "Female" = "gray")) +theme_bw() +stat_abline(intercept = -coef(logit.model)[1] / coef(logit.model)[2], slope = - coef(logit.model)[3] / coef(logit.model)[2],
geom = 'abline', color = 'black')
#散点图
head(mpg)
p<-ggplot(mpg,aes(x=cty,y=hwy))+geom_point(aes(color=class,size=displ))+stat_smooth(method='loess') #数据超过1000stat_smooth用method=‘loess’来拟合
p+labs(x="城市",y="耗油量",title="汽车油耗与型号") #加标题 x、y轴加标签
p+facet_wrap(~year,ncol=2)+labs(xlab="城市",ylab="耗油量",title="汽车油耗与型号") #在两幅图显示,表示两列
#条形图
class2<-mpg$class;class2<-reorder(class2,class2,length)
mpg$class2<-class2
ggplot(mpg,aes(x=class2))+geom_bar(aes(fill=class2)) #在这里fill是调颜色的作用
class2<-mpg$class;class2<-reorder(class2,class2,length) #柱形图按到频数高低画图
ggplot(mpg,aes(class2,fill=factor(year)))+geom_bar(position = 'stack') #在一起按year分开,纵向方向
ggplot(mpg,aes(class2,fill=factor(year)))+geom_bar(position = 'dodge') #在一起按year分卡,横向分开
ggplot(mpg,aes(class2,fill=factor(year)))+geom_bar(position = 'stack')#在两两页显示
#饼图
ggplot(mpg,aes(x=factor(1),fill=factor(class)))+geom_bar(width=1)+coord_polar(theta="y") #饼图原始的画图更美观
#箱线图
ggplot(mpg,aes(class,hwy,fill=class))+geom_boxplot()
# 散点图用线连接起来
ggplot(guss.accuracy,aes(x=Guss,y=Error))+geom_point()+geom_line()
#ggsave(plot=, filename=) plot为要保存的图像名 filename为要保存的图像路径
# 同一幅图里面显示分类数据的散点图连线
set.seed(1)
x<-seq(0,1,by=0.01)
y<-sin(2*pi*x)+rnorm(length(x),0,1)
n<-length(x)
indices<-sort(sample(1:n,round(0.5*n)))
training.x<-x[indices]
training.y<-y[indices]
test.x<-x[-indices]
test.y<-y[-indices]
training.df<-data.frame(X=training.x,Y=training.y)
test.df<-data.frame(X=test.x,Y=test.y)
rmse<-function(y,h){
sqrt(mean((y-h)^2))
}
performance<-data.frame()
for(d in 1:12){
poly.fit<-lm(Y~poly(X,degree=d),data=training.df)
performance<-rbind(performance,data.frame(Degree=d,Data='Training',
RMSE=rmse(training.y,predict(poly.fit))))
performance<-rbind(performance,data.frame(Degree=d,Data='Test',
RMSE=rmse(test.y,predict(poly.fit,newdata=test.df))))
}
ggplot(performance,aes(x=Degree,y=RMSE,linetype=Data))+geom_point(aes(color=Data))+geom_line()
g<-ggplot(data=bidpx1,aes=(x=as.POSIXct(tradaetime,format="%Y%m%d%H%M%s"),y=bidpx1))+geom_line(aes(group=securityid,colours=securityid))
+xlab('tradetime')+ylab('bidpx1') #同一个图里面散点图连用直线连接两条数据,以securityid分开
# p=ggplot(data.frame(x,y),aes(x,y,label = colnames(watervoles)))
# p+geom_point(shape=16,size=3,colour='red')+
# geom_text(hjust=-0.1,vjust=0.5,alpha=0.5)