1、脚本
./hive -e 'select * from t_student';
hive -e "hql"
编写hive脚本
vi weblog.sh
#!/bin/bash
day_str="2017-09-17"
#day_str=`date +"%Y-%m-%d"`
hive_exec=/opt/apache-hive-1.2.2-bin/bin/hive
HQL_user_active_day="insert into weblog.t_activity_user partition(day='$day_str') select tmp.ip ip,tmp.uname uname,tmp.access_time access_time,tmp.url url from (select ip,uname,access_time,url,row_number() over(partition by uname order by access_time) rk from weblog.t_web_log where day='$day_str') tmp where rk = 1"
HQL_user_new_day="insert into weblog.t_new_user partition(day='$day_str') select tau.ip,tau.uname,tau.access_time,tau.url from weblog.t_activity_user tau left join weblog.t_history_user thu on tau.uname = thu.uname where tau.day = '$day_str' and thu.uname is null"
HQL_user_history_day="insert into weblog.t_history_user select uname from weblog.t_new_user where day = '$day_str'"
$hive_exec -e "$HQL_user_active_day"
$hive_exec -e "$HQL_user_new_day"
$hive_exec -e "$HQL_user_history_day"
还可以把hql语句写在一个sql脚本里
用hive -f sqlscript.sql
2、JAVA-API
org.apache.hive
hive-jdbc
1.2.2
provided
package com.bawei.hive;
import java.sql.*;
public class ClientDemo {
public static void main(String[] args) throws ClassNotFoundException, SQLException {
Class.forName("org.apache.hive.jdbc.HiveDriver");
Connection conn = DriverManager.getConnection("jdbc:hive2://hdp1:10000", "root", "");
Statement stat = conn.createStatement();
stat.execute("load data local inpath '/root/09-17.log' into table weblog.t_web_log partition(day='09-17')");
stat.execute("use weblog");
stat.execute("insert into t_activity_user partition(day='09-17')\n" +
"select\n" +
"tmp.ip ip,tmp.uname uname,tmp.access_time access_time,tmp.url url\n" +
"from\n" +
"(select\n" +
"ip,uname,access_time,url,row_number() over(partition by uname order by access_time) rk\n" +
"from\n" +
"t_web_log\n" +
"where day='09-17') tmp\n" +
"where rk = 1");
stat.execute("insert into t_new_user partition(day='09-17')\n" +
"select\n" +
"tau.ip,tau.uname,tau.access_time,tau.url\n" +
"from\n" +
"t_activity_user tau left join t_history_user thu on tau.uname = thu.uname\n" +
"where tau.day = '09-17' and thu.uname is null");
stat.execute("insert into t_history_user\n" +
"select uname from t_new_user where day = '09-17'");
ResultSet rs = stat.executeQuery("select * from t_activity_user where day = '09-17'");
System.out.println("ip | uname | access_time | url");
while (rs.next()){
String ip = rs.getString(1);
String uname = rs.getString(2);
String accessTime = rs.getString(3);
String url = rs.getString(4);
System.out.println(ip + " | " + uname + " | " + accessTime + " | " + url);
}
System.out.println("");
System.out.println("=============================================================");
System.out.println("");
ResultSet rs2 = stat.executeQuery("select * from t_new_user where day = '09-17'");
System.out.println("ip | uname | access_time | url");
while (rs2.next()){
String ip = rs2.getString(1);
String uname = rs2.getString(2);
String accessTime = rs2.getString(3);
String url = rs2.getString(4);
System.out.println(ip + " | " + uname + " | " + accessTime + " | " + url);
}
System.out.println("");
System.out.println("=============================================================");
System.out.println("");
ResultSet rs3 = stat.executeQuery("select * from t_history_user");
System.out.println("uname");
while (rs3.next()){
System.out.println(rs3.getString(1));
}
}
}
3、Hive自定义函数
package com.bawei.hive;
import com.alibaba.fastjson.JSONObject;
import org.apache.hadoop.hive.ql.exec.UDF;
public class JsonToString extends UDF {
public String evaluate(String json,String param){
JSONObject jsonObject = JSONObject.parseObject(json);
return jsonObject.getString(param);
}
}
pom.xml
4.0.0
com.bawei
hive
1.0-SNAPSHOT
org.apache.hive
hive-exec
1.2.2
provided
com.alibaba
fastjson
1.2.47
org.apache.maven.plugins
maven-shade-plugin
2.4.3
package
shade
*:*
META-INF/*.SF
META-INF/*.DSA
META-INF/*.RSA
1.test范围是指测试范围有效,在编译和打包时都不会使用这个依赖
2.compile范围是指编译范围内有效,在编译和打包时都会将依赖存储进去
3.provided依赖,在编译和测试过程中有效,最后生成的war包时不会加入 例如:
servlet-api,因为servlet-api tomcat服务器已经存在了,如果再打包会冲突
4.runtime在运行时候依赖,在编译时候不依赖
默认依赖范围是compile
创建永久函数
方法一:
add jar /opt/apache-hive-1.2.2-bin/lib/hive-udf2.jar;
create function getjson AS 'com.bawei.hive.JsonToString';
方法二:
在hive-site.xml里添加jar包
hive.aux.jars.path
file:///opt/apache-hive-1.2.2-bin/lib/hive-udf2.jar
create function getjson AS 'com.bawei.hive.JsonToString';
方法三:
create function getjson as 'com.bawei.hive.JsonToString' using jar 'hdfs://hiveudf/hive-udf2.jar';
drop function getjson
创建临时函数:temporary
create temporary function ……