目标
需求
数据来源
数据集及所需文件
--创建数据库
create database db_msg;
--切换数据库
use db_msg;
--建表
create table db_msg.tb_msg_source(
msg_time string comment "消息发送时间"
, sender_name string comment "发送人昵称"
, sender_account string comment "发送人账号"
, sender_sex string comment "发送人性别"
, sender_ip string comment "发送人IP"
, sender_os string comment "发送人操作系统"
, sender_phonetype string comment "发送人手机型号"
, sender_network string comment "发送人网络类型"
, sender_gps string comment "发送人GPS定位"
, receiver_name string comment "接收人昵称"
, receiver_ip string comment "接收人IP"
, receiver_account string comment "接收人账号"
, receiver_os string comment "接收人操作系统"
, receiver_phonetype string comment "接收人手机型号"
, receiver_network string comment "接收人网络类型"
, receiver_gps string comment "接收人GPS定位"
, receiver_sex string comment "接收人性别"
, msg_type string comment "消息类型"
, distance string comment "双方距离"
, message string comment "消息内容"
)
--指定分隔符为制表符
row format delimited fields terminated by '\t';
#上传数据到node1服务器本地文件系统(HS2服务所在机器)
[root@node1 hivedata]# pwd
/root/hivedata
[root@node1 hivedata]# ll
total 54104
-rw-r--r-- 1 root root 28237023 Jun 13 20:24 data1.tsv
-rw-r--r-- 1 root root 27161148 Jun 13 20:24 data2.tsv
--加载数据入表
load data local inpath '/root/hivedata/data1.tsv' into table db_msg.tb_msg_source;
load data local inpath '/root/hivedata/data2.tsv' into table db_msg.tb_msg_source;
--查询表
select * from tb_msg_source limit 5;
数据问题
ETL需求
create table db_msg.tb_msg_etl as
select *,
substr(msg_time, 0, 10) as dayinfo, --获取天
substr(msg_time, 12, 2) as hourinfo, --获取小时
split(sender_gps, ",")[0] as sender_lng, --经度
split(sender_gps, ",")[1] as sender_lat --纬度
from db_msg.tb_msg_source
--过滤字段为空数据
where length(sender_gps) > 0;
select
msg_time,dayinfo,hourinfo,sender_gps,sender_lng,sender_lat
from db_msg.tb_msg_etl
limit 5;
--查询数据
from
group by
聚合
过滤 排序
统计今日消息总量
--需求:统计今日总消息量
create table if not exists tb_rs_total_msg_cnt
comment "今日消息总量"
as
select
dayinfo,
count(*) as total_msg_cnt
from db_msg.tb_msg_etl
group by dayinfo;
--查询
select * from tb_rs_total_msg_cnt ;
+------------------------------+------------------------------------+
| tb_rs_total_msg_cnt.dayinfo | tb_rs_total_msg_cnt.total_msg_cnt |
+------------------------------+------------------------------------+
| 2021-11-01 | 139062 |
+------------------------------+------------------------------------+
统计今日每小时消息量,发送/接收用户数
create table tb_rs_hour_msg_cnt
comment "每小时消息量趋势"
as
select
dayinfo,
hourinfo,
count(*) as total_msg_cnt,
count(distinct sender_account) as sender_usr_cnt,
count(distinct receiver_account)as receiver_usr_cnt
from db_msg.tb_msg_etl
group by dayinfo,hourinfo;
select * from tb_rs_hour_msg_cnt limit 5;
+-----------------------------+------------------------------+-----------------------------------+------------------------------------+--------------------------------------+
| tb_rs_hour_msg_cnt.dayinfo | tb_rs_hour_msg_cnt.hourinfo | tb_rs_hour_msg_cnt.total_msg_cnt | tb_rs_hour_msg_cnt.sender_usr_cnt | tb_rs_hour_msg_cnt.receiver_usr_cnt |
+-----------------------------+------------------------------+-----------------------------------+------------------------------------+--------------------------------------+
| 2021-11-01 | 00 | 4349 | 3520 | 3558 |
| 2021-11-01 | 01 | 2892 | 2524 | 2537 |
| 2021-11-01 | 02 | 882 | 842 | 838 |
| 2021-11-01 | 03 | 471 | 463 | 460 |
| 2021-11-01 | 04 | 206 | 202 | 205 |
+-----------------------------+------------------------------+-----------------------------------+------------------------------------+--------------------------------------+
统计今日各地区发送消息数据量
create table tb_rs_loc_cnt
comment "今日各地区发送总消息量"
as select
dayinfo,
sender_gps,
cast(sender_lng as double) as longitude,
cast(sender_lat as double) as latitude,
count(*) as total_msg_cnt
from tb_msg_etl
group by dayinfo, sender_gps, sender_lng,sender_lat;
select * from tb_rs_loc_cnt limit 5;
+------------------------+---------------------------+--------------------------+-------------------------+------------------------------+
| tb_rs_loc_cnt.dayinfo | tb_rs_loc_cnt.sender_gps | tb_rs_loc_cnt.longitude | tb_rs_loc_cnt.latitude | tb_rs_loc_cnt.total_msg_cnt |
+------------------------+---------------------------+--------------------------+-------------------------+------------------------------+
| 2021-11-01 | 100.297355,24.206808 | 100.297355 | 24.206808 | 1397 |
| 2021-11-01 | 100.591712,24.004148 | 100.591712 | 24.004148 | 1406 |
| 2021-11-01 | 101.62196,36.782187 | 101.62196 | 36.782187 | 1439 |
| 2021-11-01 | 102.357852,23.801165 | 102.357852 | 23.801165 | 1399 |
| 2021-11-01 | 102.357852,25.682909 | 102.357852 | 25.682909 | 1431 |
+------------------------+---------------------------+--------------------------+-------------------------+------------------------------+
统计今日发送消息和接受消息用户数
create table tb_rs_usr_cnt
comment "今日发送消息人数、接受消息人数"
as
select
dayinfo,
count(distinct sender_account) as sender_usr_cnt,
count(distinct receiver_account) as receiver_usr_cnt
from db_msg.tb_msg_etl
group by dayinfo;
select * from tb_rs_usr_cnt ;
+------------------------+-------------------------------+---------------------------------+
| tb_rs_usr_cnt.dayinfo | tb_rs_usr_cnt.sender_usr_cnt | tb_rs_usr_cnt.receiver_usr_cnt |
+------------------------+-------------------------------+---------------------------------+
| 2021-11-01 | 10008 | 10005 |
+------------------------+-------------------------------+---------------------------------+
统计今日发送消息最多的Top10用户
create table tb_rs_susr_top10
comment "发送消息条数最多的Top10用户"
as
select
dayinfo,
sender_name as username,
count(*) as sender_msg_cnt
from db_msg.tb_msg_etl
group by dayinfo,sender_name
order by sender_msg_cnt desc
limit 10;
select * from tb_rs_susr_top10;
+---------------------------+----------------------------+----------------------------------+
| tb_rs_susr_top10.dayinfo | tb_rs_susr_top10.username | tb_rs_susr_top10.sender_msg_cnt |
+---------------------------+----------------------------+----------------------------------+
| 2021-11-01 | 茹鸿晖 | 1466 |
| 2021-11-01 | 卢高达 | 1464 |
| 2021-11-01 | 犁彭祖 | 1460 |
| 2021-11-01 | 沐范 | 1459 |
| 2021-11-01 | 夫潍 | 1452 |
| 2021-11-01 | 烟心思 | 1449 |
| 2021-11-01 | 称子瑜 | 1447 |
| 2021-11-01 | 麻宏放 | 1442 |
| 2021-11-01 | 邴时 | 1439 |
| 2021-11-01 | 养昆颉 | 1431 |
+---------------------------+----------------------------+----------------------------------+
统计今日接受消息最多的Top10用户
create table tb_rs_rusr_top10
comment "接受消息条数最多的Top10用户"
as
select
dayinfo,
receiver_name as username,
count(*) as receiver_msg_cnt
from db_msg.tb_msg_etl
group by dayinfo,receiver_name
order by receiver_msg_cnt desc
limit 10;
select * from tb_rs_rusr_top10 limit 3;
+---------------------------+----------------------------+------------------------------------+
| tb_rs_rusr_top10.dayinfo | tb_rs_rusr_top10.username | tb_rs_rusr_top10.receiver_msg_cnt |
+---------------------------+----------------------------+------------------------------------+
| 2021-11-01 | 畅雅柏 | 1539 |
| 2021-11-01 | 春纯 | 1491 |
| 2021-11-01 | 邝琨瑶 | 1469 |
+---------------------------+----------------------------+------------------------------------+
统计发送人手机型号分布情况
create table if not exists tb_rs_sender_phone
comment "发送人的手机型号分布"
as
select
dayinfo,
sender_phonetype,
count(distinct sender_account) as cnt
from tb_msg_etl
group by dayinfo,sender_phonetype;
select * from tb_rs_sender_phone limit 3;
+-----------------------------+--------------------------------------+-------------------------+
| tb_rs_sender_phone.dayinfo | tb_rs_sender_phone.sender_phonetype | tb_rs_sender_phone.cnt |
+-----------------------------+--------------------------------------+-------------------------+
| 2021-11-01 | Apple iPhone 10 | 6749 |
| 2021-11-01 | Apple iPhone 11 | 3441 |
| 2021-11-01 | Apple iPhone 7 | 2424 |
+-----------------------------+--------------------------------------+-------------------------+
统计发送人设备操作系统分布情况
create table tb_rs_sender_os
comment "发送人的OS分布"
as
select
dayinfo,
sender_os,
count(distinct sender_account) as cnt
from tb_msg_etl
group by dayinfo,sender_os;
select * from tb_rs_sender_os;
+--------------------------+----------------------------+----------------------+
| tb_rs_sender_os.dayinfo | tb_rs_sender_os.sender_os | tb_rs_sender_os.cnt |
+--------------------------+----------------------------+----------------------+
| 2021-11-01 | Android 5.1 | 5750 |
| 2021-11-01 | Android 6 | 8514 |
| 2021-11-01 | Android 6.0 | 9398 |
| 2021-11-01 | Android 7.0 | 9181 |
| 2021-11-01 | Android 8.0 | 8594 |
| 2021-11-01 | IOS 10.0 | 1289 |
| 2021-11-01 | IOS 12.0 | 8102 |
| 2021-11-01 | IOS 9.0 | 8760 |
+--------------------------+----------------------------+----------------------+
官网
https://www.finebi.com/
官方文档
https://help.fanruan.com/finebi/doc-view-301.html
使用FineBI连接Hive,读取Hive数据表,需要在FineBI中添加Hive驱动jar包
将Hive驱动jar包放入FineBI的lib目录下
找到提供文件的HiveConnectDrive
webapps\webroot\WEB-INF\lib
插件安装
隔离插件:fr-plugin-hive-driver-loader-3.0.zip