目录
1,目的
2,hive中的操作
2.1创建数据库
2.2,建表并导入数据
2.3,提取需要的数据
2.4,创建新的表并导入上一步所得数据
3,开发web项目
3.1,创建maven的web项目,并导入依赖
3.2,Util工具包
3.3,pojo层
3.4,dao层
3.5,service层
3.6,control层
3.7,配置web.xml
3.8,html页面
4,页面显示
将数据导入到hive中,通过数据分析后将结果存到新的表中,然后读取hive中的数据进行数据可视化。需要项目所需jar包,json文件和js文件以及数据可以关注我的微信公众号大太阳花花公主,在后台回复hive即可。
首先将数据上传到HDFS,然后启动hive,如果因为namenode处于安全模式而无法启动hive可参考我的另一篇博文:
“Name node is in safe mode”的解决方法_大太阳花花公主的博客-CSDN博客
create database nybikedb;
注意在创建表之前需要先使用数据库,即use nybikedb;
创建表:
create table tb_trip_06(tripduration int,starttime string,stoptime
string,start_station_id int,start_station_name string,start_station_latitude
double,start_station_longitude double,stop_station_id int,stop_station_name
string,stop_station_latitude double,stop_station_longitude double,bikeid
int,usertype string,birth_year int,gender int) row format delimited fields
terminated by ',';
导入数据:
load data inpath 'hdfs://hadoop:9000/201906.csv' overwrite into table
tb_trip_06;
由于项目需求是分析6月份的30天的骑行数据,并对比每天每小时的骑行数量,因此只需要从源数据中提取日期,小时和骑行数量。
select day(starttime),hour(starttime),count(*) from tb_trip_06 group by
day(starttime),hour(starttime) order by `_c0`,`_c1`;
_c0, _c1, _c2为临时生成的三个字段,分别表示的是day、hour、 count(日期、小时、数量),需注意的是 _ 是关键字符,如果作为普通字符使用,在外面加上 `
创建新表:
create table tb_trip_day_hour_count(day int,hour int,counts int)
导入数据:
insert into tb_trip_day_hour_count selectday(starttime),hour(starttime),
count(*) from tb_trip_06 group byday(starttime),hour(starttime) order by `_c0`,`_c1`;
创建maven项目后在webapp目录下创建 data (存放china.json和world.json)和 js 文件夹(存放echarts.js、echarts-gl.js和jquery-1.11.0.min.js)
在pom.xml中导入相关依赖,内容如下:
org.apache.hadoop
hadoop-common
2.7.3
org.apache.hadoop
hadoop-client
2.7.3
org.apache.hadoop
hadoop-hdfs
2.7.3
org.apache.hadoop
hadoop-mapreduce-client-core
2.7.3
org.apache.hive
hive-jdbc
2.3.6
org.apache.hive
hive-exec
2.3.6
jdk.tools
jdk.tools
1.8
system
${JAVA_HOME}/lib/tools.jar
com.alibaba
fastjson
1.2.58
在util包中新建一个工具类用于与hive数据库连接。
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
//获取hive连接
public class HiveDBUtil {
private static String url="jdbc:hive2://192.168.59.100:10000/nybikedb";
private static String user = "root";
private static String password = "root";
static {
try {
Class.forName("org.apache.hive.jdbc.HiveDriver");
} catch (Exception e) {
e.printStackTrace();
}
}
public static Connection getHiveConn() throws SQLException {
return DriverManager.getConnection(url,user,password);
}
}
DayCount与hive中tb_trip_day_hour_count表对应,用于对应后端数据的封装
public class DayCount {
private Integer day;
private Integer hour;
private Integer counts;
public DayCount() {
super();
}
public DayCount(Integer day, Integer hour, Integer counts) {
super();
this.day = day;
this.hour = hour;
this.counts = counts;
}
public Integer getDay() {
return day;
}
public void setDay(Integer day) {
this.day = day;
}
public Integer getHour() {
return hour;
}
public void setHour(Integer hour) {
this.hour = hour;
}
public Integer getCounts() {
return counts;
}
public void setCounts(Integer counts) {
this.counts = counts;
}
@Override
public String toString() {
return "DayCount [day=" + day + ", hour=" + hour + ", counts=" + counts + "]";
}
}
HourCountVO类对应前端数据的封装
import java.util.List;
public class HourCountVO {
private List xData;
private List yData;
public HourCountVO() {
super();
}
public HourCountVO(List xData, List yData) {
super();
this.xData = xData;
this.yData = yData;
}
public List getxData() {
return xData;
}
public void setxData(List xData) {
this.xData = xData;
}
public List getyData() {
return yData;
}
public void setyData(List yData) {
this.yData = yData;
}
@Override
public String toString() {
return "HourCount [xData=" + xData + ", yData=" + yData + "]";
}
}
DayItem用于封装每天的数据,其中日期day用于图例。
import java.util.List;
public class DayItem {
private List hourData;
private String dataName;
public DayItem() {
super();
}
public DayItem(List hourData, String dataName) {
super();
this.hourData = hourData;
this.dataName = dataName;
}
public List getHourData() {
return hourData;
}
public void setHourData(List hourData) {
this.hourData = hourData;
}
public String getDataName() {
return dataName;
}
public void setDataName(String dataName) {
this.dataName = dataName;
}
@Override
public String toString() {
return "DayItem [hourData=" + hourData + ", dataName=" + dataName + "]";
}
}
在dao包中新建一个TripDao接口
import java.util.List;
import pojo.DayCount;
public interface TripDao {
List listCountPreHourOfDay();
}
在dao.impl包中新建一个类用于实现接口TripDao
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.List;
import dao.TripDao;
import pojo.DayCount;
import utils.HiveDBUtil;
public class TripDaoHiveTmpl implements TripDao{
public List listCountPreHourOfDay(){
List list = new ArrayList(720);
String sql="select * from tb_trip_day_hour_count";
try {
//获取连接
Connection conn = HiveDBUtil.getHiveConn();
//获取预编译的sql执行对象
PreparedStatement ps = conn.prepareStatement(sql);
//执行sql,获取结果
ResultSet rs = ps.executeQuery();
while(rs.next()) {
int day = rs.getInt("day");
int hour = rs.getInt("hour");
int counts = rs.getInt("counts");
DayCount DC = new DayCount(day,hour,counts);
list.add(DC);
}
} catch (Exception e) {
e.printStackTrace();
}
return list;
}
}
在service包中新建一个TripService接口
import pojo.HourCountVO;
public interface TripService {
HourCountVO findCountPreHourOfDay();
}
在service.impl包中新建一个类用于实现接口TripService
import Service.TripService;
import dao.TripDao;
import dao.impl.TripDaoHiveTmpl;
import pojo.DayCount;
import pojo.DayItem;
import pojo.HourCountVO;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
public class TripServiceImpl implements TripService{
private TripDao dao=new TripDaoHiveTmpl();
public HourCountVO findCountPreHourOfDay() {
HourCountVO vo=new HourCountVO();
List list=dao.listCountPreHourOfDay();
Set xDataSet = new TreeSet();
//声明数组,保存每天的DayItem,数组下标=日期-1
DayItem[] itemArr = new DayItem[31];
for (DayCount dc : list) {
xDataSet.add(dc.getHour());
DayItem item=itemArr[dc.getDay()-1];
if(item==null) {
item = new DayItem();
item.setDataName("6-"+dc.getDay());
item.setHourData(new ArrayList(24));
itemArr[dc.getDay()-1]=item;
}
item.getHourData().add(dc.getCounts());
}
List yDataList = new ArrayList();
for (DayItem dayItem : itemArr) {
if(dayItem !=null) {
yDataList.add(dayItem);
}
}
List xData = new ArrayList(xDataSet);
vo.setxData(xData);
vo.setyData(yDataList);
return vo;
}
}
import java.io.IOException;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import com.alibaba.fastjson.JSON;
import Service.TripService;
import Service.impl.TripServiceImpl;
import pojo.HourCountVO;
public class TripServlet extends HttpServlet {
private static final long ServiceVersionUID = 1L;
private TripService service = new TripServiceImpl();
@Override
protected void doGet(HttpServletRequest req, HttpServletResponse resp)
throws ServletException, IOException {
HourCountVO vo = service.findCountPreHourOfDay();
String jsonStr = JSON.toJSONString(vo);
resp.setContentType("application/json;charset=utf-8");
resp.getWriter().write(jsonStr);
}
@Override
protected void doPost(HttpServletRequest req, HttpServletResponse resp)
throws ServletException, IOException {
doGet(req, resp);
}
}
在web.xml中添加如下内容:
TripServlet
control.TripServlet
TripServlet
/trip
骑行数据展示