上午用R处理写了一些代码,做了一些数据处理工作,代码很快就写完,最后要把生成的目标表插入mysql数据库中出现了点问题,折腾了好一会:
conn <- dbConnect(MySQL(), dbname = "analyse", username="root", password="Pa123456!",host="202.69.27.239",port=8443)
# dbSendQuery(conn,'SET NAMES utf8')
dbSendQuery(conn,"delete from total_staytime_bouncerate")
dbWriteTable(conn, value =total_stayTime_bounceRate, name = "total_staytime_bouncerate", append = TRUE ,row.names = F)
dbDisconnect(conn)
dbWriteTable这里要注意name=""后面的表名不能出现大写的字母,否则会报错,因为mysql中不区分大小写,只有全是小写才不会报错,这里连接数据库不通过ODBC来连接,出现乱码可以用dbSendQuery(conn,'SET NAMES utf8')来解决;最后附上今天上午花了一点时间写的R代码:
#library(readxl) 读取excel
#read_excel("C:\\Users\\Administrator\\Desktop\\shuju.xlsx",sheetname=1)
library(RMySQL)
# library(ggplot2)
# library(reshape)
options(warn=-1)
conn <- dbConnect(MySQL(), dbname = "pms", username="gaoyang922", password="gaoyang922@123456!",host="10.10.109.62",port=1333)
query<-dbSendQuery(conn,"select insert_time,stay_time,page_url ,leave_time from tracker.hbase_visit ;")
result<- fetch(query,n=-1) #n=-1时是获取所有数据 n=2是取前2条数据
#获取连接信息,查看database下所有表,
#以及删除testname表 summary(con) dbGetInfo(con) dbListTables(con) dbRemoveTable(con,"test")
dbDisconnect(conn)
# head(result)
# str(result)
result$insert_date<-substr(result$insert_time,1,8)
result$insert_time<-NULL
# head(result)
#首页停留时间计算
result_page_stat_time<-result[grepl(".?www\\.zhong\\.com.?",result$page_url),]
result_page_stat_time<-subset(result_page_stat_time,insert_date!="" )
# head(result_page_stat_time)
result_page_stat_time$stay_time<-ifelse(result_page_stat_time$stay_time>1800,1800,result_page_stat_time$stay_time)
pagehome_stat_time<-aggregate(result_page_stat_time$stay_time,by=list(result_page_stat_time$insert_date),mean)
colnames(pagehome_stat_time)<-c("date","hp_stay_time")
#首页平均跳出率
result_page_jump<-result_page_stat_time
# head(result_page_jump)
result_page_jump$bounce<-ifelse(result_page_jump$stay_time==0 & result_page_jump$leave_time==0 ,1,0)
temp<-as.data.frame(table(result_page_jump$insert_date))
colnames(temp)<-c("date","hp_pv")
page_bounce<-aggregate(result_page_jump$bounce,by=list(result_page_jump$insert_date),sum)
colnames(page_bounce)<-c("date","hp_bounce")
page_bounce<-merge(temp,page_bounce,by.x = "date", by.y = "date")
page_bounce$hp_bounceRate<-page_bounce$hp_bounce/page_bounce$hp_pv
#详情页停留时间计算
result_detail_stat_time<-result[grepl(".?item\\.zhong\\.com\\/item\\/detail\\.do\\?productId.?",result$page_url),]
result_detail_stat_time<-subset(result_detail_stat_time,insert_date!="" )
# head(result_detail_stat_time)
result_detail_stat_time$stay_time <-ifelse(result_detail_stat_time$stay_time>1800,1800,result_detail_stat_time$stay_time)
detail_stat_time<-aggregate(result_detail_stat_time$stay_time,by=list(result_detail_stat_time$insert_date),mean)
colnames(detail_stat_time)<-c("date","detail_stay_time")
#详情页跳出率
result_detail_jump<-result_detail_stat_time
result_detail_jump$bounce<-ifelse(result_detail_jump$stay_time==0 & result_detail_jump$leave_time==0 ,1,0)
# head(result_detail_jump)
temp<-as.data.frame(table(result_detail_jump$insert_date))
colnames(temp)<-c("date","detail_pv")
detail_bounce<-aggregate(result_detail_jump$bounce,by=list(result_detail_jump$insert_date),sum)
colnames(detail_bounce)<-c("date","detail_bounce")
detail_bounce<-merge(temp,detail_bounce,by.x = "date", by.y = "date")
detail_bounce$detail_bounceRate=detail_bounce$detail_bounce/detail_bounce$detail_pv
total_stayTime_bounceRate<-merge(merge(pagehome_stat_time,page_bounce,by.x="date"),merge(detail_stat_time,detail_bounce, by.x="date"),by="date")
# head(total_stayTime_bounceRate)
conn <- dbConnect(MySQL(), dbname = "analyse", username="root", password="Pa123456!",host="202.69.27.239",port=8443)
# dbSendQuery(conn,'SET NAMES utf8')
dbSendQuery(conn,"delete from total_staytime_bouncerate")
dbWriteTable(conn, value =total_stayTime_bounceRate, name = "total_staytime_bouncerate", append = TRUE ,row.names = F)
dbDisconnect(conn)
#覆盖写testB覆盖test表 dbWriteTable(con,"test",testB,overwrite=T,row.names=F) dbReadTable(con,"test")