1:计算两个时间相差多少天:hive内置函数datediff
例子Demo:datediff('2009-07-28','2009-07-30')=2
2:也可以写自定义函数
package cdel.edu.hive.udf.demo04;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* 计算一个时间到今天的相差多少天
* @author dell
*
*/
public class MyDateDiff extends UDF {
public String evaluate(String str) {
try {
String today = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date().getTime());
int days = daysBetween(str,today);
return String.valueOf(days);
} catch (Exception e) {
return null;
}
}
/**
* 计算两个日期之间相差的天数
*
* @param smdate
* 较小的时间
* @param bdate
* 较大的时间
* @return 相差天数
* @throws ParseException
*/
public static int daysBetween(Date smdate, Date bdate)
throws ParseException {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
smdate = sdf.parse(sdf.format(smdate));
bdate = sdf.parse(sdf.format(bdate));
Calendar cal = Calendar.getInstance();
cal.setTime(smdate);
long time1 = cal.getTimeInMillis();
cal.setTime(bdate);
long time2 = cal.getTimeInMillis();
long between_days = (time2 - time1) / (1000 * 3600 * 24);
return Integer.parseInt(String.valueOf(between_days));
}
/**
* 字符串的日期格式的计算
*/
public static int daysBetween(String smdate, String bdate)
throws ParseException {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
Calendar cal = Calendar.getInstance();
cal.setTime(sdf.parse(smdate));
long time1 = cal.getTimeInMillis();
cal.setTime(sdf.parse(bdate));
long time2 = cal.getTimeInMillis();
long between_days = (time2 - time1) / (1000 * 3600 * 24);
return Integer.parseInt(String.valueOf(between_days));
}
public static void main(String[] args) throws Exception {
String str="2015-04-27";
System.out.println(new MyDateDiff().evaluate(str));
}
}
-创建自定义函数
add jar /home/admin/xxg/MyDateDiff.jar;
create temporary function myDateDiff as 'cdel.edu.hive.udf.demo04.MyDateDiff';
--hive 统计
-- 最近30 天内学员的做题量统计
--分网统计
create table qz_count as
SELECT mps.userID,mem.memberid,COUNT(distinct qpq.questionID) as doQuesCount,mem.domain,
to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime)))) as daytime
from tmp_qz_member_paper_score mps
left join tmp_qz_member_paper_question qpq on qpq.paperScoreID = mps.paperScoreID and mps.domain = qpq.domain
left join order.tmp_member_info_new mem on mem.uid = mps.userID and mem.domain = mps.domain
where cast(myDateDiff(concat_ws(' ',substr(mps.createTime,1,10),'')) as int) <=30
group by mps.userID,mem.memberid ,mem.domain,
to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime))))
having COUNT(distinct qpq.questionID) >0
order by doQuesCount desc ;
SELECT mps.userID,mem.memberid,COUNT(distinct qpq.questionID) as doQuesCount,mem.domain,
to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime)))) as daytime
from tmp_qz_member_paper_score mps
left join tmp_qz_member_paper_question qpq on qpq.paperScoreID = mps.paperScoreID and mps.domain = qpq.domain
left join order.tmp_member_info_new mem on mem.uid = mps.userID and mem.domain = mps.domain
where mps.userID='48888922' and cast(myDateDiff(concat_ws(' ',substr(mps.createTime,1,10),'')) as int) <=31
group by mps.userID,mem.memberid ,mem.domain,
to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime))))
having COUNT(distinct qpq.questionID) >0
order by doQuesCount desc ;
SELECT qpq.*
from tmp_qz_member_paper_score mps
left join tmp_qz_member_paper_question qpq on qpq.paperScoreID = mps.paperScoreID and mps.domain = qpq.domain
where mps.userID='48888922' limit 100;
经验验证:没有医学网做题题目信息
select distinct logdata from tmp_qz_member_paper_question where domain='med' limit 10
验证数据
select * from qz_count where userid=48888922 order by doQuesCount desc limit 100;
select * from qz_count where doQuesCount>=500 order by doQuesCount desc ;
-- 不分网统计
create table qz_count as
SELECT mps.userID,mem.memberid,COUNT(distinct qpq.questionID) as doQuesCount,
to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime)))) as daytime
from tmp_qz_member_paper_score mps
left join tmp_qz_member_paper_question qpq on qpq.paperScoreID = mps.paperScoreID and mps.domain = qpq.domain
left join order.tmp_member_info_new mem on mem.uid = mps.userID and mem.domain = mps.domain
where cast(myDateDiff(concat_ws(' ',substr(mps.createTime,1,10),'')) as int) <=30
group by mps.userID,mem.memberid ,
to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime))))
having COUNT(distinct qpq.questionID) >0
order by doQuesCount desc ;
-- 统计学员每天做多少套试卷
-- 分网统计
create table paper_count as
select mps.userID,mem.Memberid,mem.domain, COUNT(distinct mps.paperViewID) AS doPaperCount ,
to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime)))) as daytime
from tmp_qz_member_paper_score mps
left join order.tmp_member_info_new mem on mem.uID = mps.userID and mem.domain = mps.domain
where cast(myDateDiff(concat_ws(' ',substr(mps.createTime,1,10),'')) as int) <=31
group by mps.userID,mem.Memberid, mem.domain,
to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime))))
having COUNT(distinct mps.paperViewID) >0
order by doPaperCount desc;
-- 不分网统计
select mps.userID,mem.Memberid,COUNT(distinct mps.paperViewID) AS doPaperCount ,
to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime)))) as daytime
from tmp_qz_member_paper_score mps
left join order.tmp_member_info_new mem on mem.uID = mps.userID and mem.domain = mps.domain
where mps.userID=48888922
group by mps.userID,mem.Memberid,
to_date(concat_ws(' ',substr(mps.createTime,1,10),substr(mps.createTime,11,length(mps.createTime))))
having COUNT(distinct mps.paperViewID) >0
order by doPaperCount desc;
测试:
select * from paper_count order by doPaperCount desc limit 100;
select mps.userID, mps.paperViewID from tmp_qz_member_paper_score mps where mps.userID=48888922;
-- 导出本地文件中
insert overwrite local directory '/home/admin/xxg/qz/med/'
row format delimited
fields terminated by '\t'
select * from qz_count;
-- 最近30天内学员的提问数量
-- 针对主题的提问统计
SELECT ft.userID,ft.userName , COUNT(ft.userID) as num FROM FAQ_TOPIC ft
group by FT.userid ,ft.userName
SELECT ft.userID,ft.userName , COUNT(af.USERID) as num
FROM FAQ_TOPIC ft
inner JOIN faq af on ft.topicID = af.topicID and af.istopic = 1
group by ft.userID,ft.userName
create table faq_count as
SELECT ft.userID, mem.memberid, COUNT(distinct ft.topicID) as num, mem.domain,
to_date(concat_ws(' ',substr(ft.createTime,1,10),substr(ft.createTime,11,length(ft.createTime)))) as daytime
FROM tmp_faq_topic ft
left join order.tmp_member_info_new mem on mem.uid = ft.userID and mem.domain = ft.domain
where cast(myDateDiff(concat_ws(' ',substr(ft.createTime,1,10),'')) as int) <=30
group by ft.userID, mem.memberid,mem.domain,
to_date(concat_ws(' ',substr(ft.createTime,1,10),substr(ft.createTime,11,length(ft.createTime))))
having COUNT(distinct ft.topicID) >0
order by num desc ;
--针对主题和继续提问的 --- 就是如果一个主题,两个继续的话 你统计的sum =3
create table faq_count2 as
SELECT ft.userID, mem.memberid, COUNT(distinct af.faqid) as num, mem.domain,
to_date(concat_ws(' ',substr(af.createTime,1,10),substr(af.createTime,11,length(af.createTime)))) as daytime
FROM tmp_faq af
left join tmp_faq_topic ft on ft.topicID = af.topicID and af.domain = ft.domain
left join order.tmp_member_info_new mem on mem.uid = ft.userID and mem.domain = af.domain
where cast(myDateDiff(concat_ws(' ',substr(af.createTime,1,10),'')) as int) <=30
group by ft.userID, mem.memberid, mem.domain,
to_date(concat_ws(' ',substr(af.createTime,1,10),substr(af.createTime,11,length(af.createTime))))
having COUNT(distinct af.faqid) >0
order by num desc limit 10;
-- 导出本地文件中
insert overwrite local directory '/home/admin/xxg/faq/'
row format delimited
fields terminated by '\t'
select * from faq_count;
-- 删除表信息
drop table qz_count;
drop table faq_count;
drop table paper_count;
-- 统计
每天做题 >=3000道以上学员的个数
每天做题 >=2500 and <3000 道以上的学员个数
每天做题 >=2000 and <2500 道以上的学员个数
每天做题 >=1500 and <2000 道以上的学员个数
每天做题 >=1000 and <1500 道以上的学员个数
每天做题 >=500 and <1000 道以上的学员个数
每天做题 >=100 and <500 道以上的学员个数
每天做题 >=50 and <100 道以上的学员个数
每天做题 >0 and <50 道以上的学员个数
-- 根据学员分组统计,学员做题数量统计
select userid,memberid,domain,
SUM(case when doquescount=0 then 1 else 0 end) as n0,
SUM(case when doquescount>0 and doquescount<50 then 1 else 0 end) as n1,
SUM(case when doquescount>=50 and doquescount<100 then 1 else 0 end) as n2,
SUM(case when doquescount>=100 and doquescount<500 then 1 else 0 end) as n3,
SUM(case when doquescount>=500 and doquescount<1000 then 1 else 0 end) as n4,
SUM(case when doquescount>=1000 and doquescount<1500 then 1 else 0 end) as n5,
SUM(case when doquescount>=1500 and doquescount<2000 then 1 else 0 end) as n6,
SUM(case when doquescount>=2000 and doquescount<2500 then 1 else 0 end) as n7,
SUM(case when doquescount>=2500 and doquescount<3000 then 1 else 0 end) as n8,
SUM(case when doquescount>=3000 then 1 else 0 end) as n9
from qz_count
group by userid,memberid, domain limit 100;
-- 根据日期分组统计,学员做题量统计
select daytime,
SUM(case when doquescount=0 then 1 else 0 end) as n0,
SUM(case when doquescount>=1 and doquescount<50 then 1 else 0 end) as n1,
SUM(case when doquescount>=50 and doquescount<100 then 1 else 0 end) as n2,
SUM(case when doquescount>=100 and doquescount<500 then 1 else 0 end) as n3,
SUM(case when doquescount>=500 and doquescount<1000 then 1 else 0 end) as n4,
SUM(case when doquescount>=1000 and doquescount<1500 then 1 else 0 end) as n5,
SUM(case when doquescount>=1500 and doquescount<2000 then 1 else 0 end) as n6,
SUM(case when doquescount>=2000 and doquescount<2500 then 1 else 0 end) as n7,
SUM(case when doquescount>=2500 and doquescount<3000 then 1 else 0 end) as n8,
SUM(case when doquescount>=3000 then 1 else 0 end) as n9
from qz_count
group by daytime;
测试
select count(distinct userid)
from qz_count
where doquescount>=2500 and doquescount<3000;
select *
from qz_count
where doquescount>=2500 and doquescount<3000;
select count(distinct userid)
from qz_count
where doquescount>=3000 ;
--
-- 根据日期统计每天学员的所做试卷量
select daytime,
SUM(case when dopapercount=0 then 1 else 0 end) as n0,
SUM(case when dopapercount>=1 and dopapercount<5 then 1 else 0 end) as n1,
SUM(case when dopapercount>=5 and dopapercount<10 then 1 else 0 end) as n2,
SUM(case when dopapercount>=10 and dopapercount<50 then 1 else 0 end) as n3,
SUM(case when dopapercount>=50 and dopapercount<100 then 1 else 0 end) as n4,
SUM(case when dopapercount>=100 and dopapercount<150 then 1 else 0 end) as n5,
SUM(case when dopapercount>=150 and dopapercount<200 then 1 else 0 end) as n6,
SUM(case when dopapercount>=200 and dopapercount<250 then 1 else 0 end) as n7,
SUM(case when dopapercount>=250 and dopapercount<300 then 1 else 0 end) as n8,
SUM(case when dopapercount>=300 then 1 else 0 end) as n9
from paper_count
group by daytime limit 100;
测试检验:
select * from paper_count order by dopapercount desc limit 100;
select count(*) from paper_count where dopapercount>=10 and dopapercount<50 and daytime='2015-04-26' limit 100;
select count(distinct userid) from paper_count where dopapercount>=10 and dopapercount<50 and daytime='2015-04-26' limit 100;
-- 根据日期统计每天学员在不同区间内答疑数量
每天答疑提问主题 >=60 道以上学员的个数
每天答疑提问主题 >=55 and < 60道以上学员的个数
每天答疑提问主题 >=50 and < 55道以上学员的个数
每天答疑提问主题 >=45 and <50 道以上的学员个数
每天答疑提问主题 >=40 and <45 道以上的学员个数
每天答疑提问主题 >=35 and <40 道以上的学员个数
每天答疑提问主题 >=30 and <35 道以上的学员个数
每天答疑提问主题 >=25 and <30 道以上的学员个数
每天答疑提问主题 >=20 and <25 道以上的学员个数
每天答疑提问主题 >=15 and <20 道以上的学员个数
每天答疑提问主题 >=10 and <15 道以上的学员个数
每天答疑提问主题 >=5 and <10 道以上的学员个数
每天答疑提问主题 >=1 and <5 道以上的学员个数
每天答疑提问主题 =0 道以上的学员个数
select daytime,
SUM(case when num=0 then 1 else 0 end) as n0,
SUM(case when num>=1 and num<5 then 1 else 0 end) as n1,
SUM(case when num>=5 and num<10 then 1 else 0 end) as n2,
SUM(case when num>=10 and num<15 then 1 else 0 end) as n3,
SUM(case when num>=15 and num<20 then 1 else 0 end) as n4,
SUM(case when num>=20 and num<25 then 1 else 0 end) as n5,
SUM(case when num>=25 and num<30 then 1 else 0 end) as n6,
SUM(case when num>=30 and num<35 then 1 else 0 end) as n7,
SUM(case when num>=35 and num<40 then 1 else 0 end) as n8,
SUM(case when num>=40 and num<45 then 1 else 0 end) as n9,
SUM(case when num>=45 and num<50 then 1 else 0 end) as n10,
SUM(case when num>=50 and num<55 then 1 else 0 end) as n11,
SUM(case when num>=55 and num<60 then 1 else 0 end) as n12,
SUM(case when num>=60 then 1 else 0 end) as n13
from faq_count
group by daytime limit 100;