首先看依赖
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.ta.hive</groupId>
<artifactId>tg_hive</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>TG_hadoop_hive</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<junit.version>4.12</junit.version>
<hbase.version>1.1.2</hbase.version>
<hadoop.version>2.6.2</hadoop.version>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.5</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<scope>runtime</scope>
<exclusions>
<exclusion>
<artifactId>jdk.tools</artifactId>
<groupId>jdk.tools</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-service</artifactId>
<version>1.1.1</version>
<exclusions>
<exclusion>
<artifactId>eigenbase-properties</artifactId>
<groupId>eigenbase</groupId>
</exclusion>
<exclusion>
<artifactId>
pentaho-aggdesigner-algorithm
</artifactId>
<groupId>org.pentaho</groupId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</project>
如果你的maven更新报错了,说明你的包有冲突,排除掉冲突的包即可
下面看看API的封装
package com.tg.hadoop.hive;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
/** * * @author 汤高 * */
public class HiveAPI {
//网上写 org.apache.hadoop.hive.jdbc.HiveDriver ,新版本不能这样写
private static String driverName = "org.apache.hive.jdbc.HiveDriver";
//这里是hive2,网上其他人都写hive,在高版本中会报错
private static String url = "jdbc:hive2://master:10000/default";
private static String user = "hive";
private static String password = "hive";
private static String sql = "";
public static ResultSet countData(Statement stmt, String tableName) {
sql = "select count(1) from " + tableName;
System.out.println("Running:" + sql);
ResultSet res=null;
try {
res = stmt.executeQuery(sql);
System.out.println("执行“regular hive query”运行结果:");
while (res.next()) {
System.out.println("count ------>" + res.getString(1));
}
} catch (SQLException e) {
e.printStackTrace();
}
return res;
}
public static ResultSet selectData(Statement stmt, String tableName) {
sql = "select * from " + tableName;
System.out.println("Running:" + sql);
ResultSet res=null;
try {
res = stmt.executeQuery(sql);
System.out.println("执行 select * query 运行结果:");
while (res.next()) {
System.out.println(res.getInt(1) + "\t" + res.getString(2));
}
} catch (SQLException e) {
e.printStackTrace();
}
return res;
}
public static boolean loadData(Statement stmt, String tableName,String filepath) {
// 目录 ,我的是hive安装的机子的虚拟机的home目录下
sql = "load data local inpath '" + filepath + "' into table " + tableName;
System.out.println("Running:" + sql);
boolean result=false;
try {
result=stmt.execute(sql);
} catch (SQLException e) {
e.printStackTrace();
}
return result;
}
public static ResultSet describeTables(Statement stmt, String tableName) {
sql = "describe " + tableName;
System.out.println("Running:" + sql);
ResultSet res=null;
try {
res = stmt.executeQuery(sql);
System.out.println("执行 describe table 运行结果:");
while (res.next()) {
System.out.println(res.getString(1) + "\t" + res.getString(2));
}
} catch (SQLException e) {
e.printStackTrace();
}
return res;
}
public static ResultSet showTables(Statement stmt, String tableName) {
if(tableName==null||tableName.equals(null)){
sql = "show tables";
}else{
sql = "show tables '" + tableName + "'";
}
ResultSet res=null;
try {
res = stmt.executeQuery(sql);
System.out.println("执行 show tables 运行结果:");
while (res.next()) {
System.out.println(res.getString(1));
}
} catch (SQLException e) {
e.printStackTrace();
}
return res;
}
public static boolean createTable(Statement stmt, String tableName) {
sql = "create table " + tableName + " (key int, value string) row format delimited fields terminated by '\t'";
boolean result=false;
try {
result=stmt.execute(sql);
} catch (SQLException e) {
e.printStackTrace();
}
return result;
}
public static boolean dropTable(Statement stmt,String tableName) {
// 创建的表名
//String tableName = "testHive";
sql = "drop table " + tableName;
boolean result=false;
try {
stmt.execute(sql);
} catch (SQLException e) {
e.printStackTrace();
}
return result;
}
public static Connection getConn() {
Connection conn = null;
try {
Class.forName(driverName);
conn = DriverManager.getConnection(url, user, password);
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}
return conn;
}
public static void close(Connection conn,Statement stmt){
try {
if (conn != null) {
conn.close();
conn = null;
}
if (stmt != null) {
stmt.close();
stmt = null;
}
} catch (SQLException e) {
e.printStackTrace();
}
}
}
下面看看junit测试
package com.tg.hive.test;
import static org.junit.Assert.*;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import org.junit.Before;
import org.junit.Test;
import com.tg.hadoop.hive.HiveAPI;
/** * * @author 汤高 * */
public class TestHive {
private Statement stmt = null;
private Connection conn=null;
@Before
public void setConAndStatement (){
conn = HiveAPI.getConn();
try {
stmt = conn.createStatement();
} catch (SQLException e) {
e.printStackTrace();
}
assertNotNull(conn);
}
@Test
public void testDropTable() {
String tableName="testhive";
assertNotNull(HiveAPI.dropTable(stmt, tableName));
}
@Test
public void testCreateTable() {
boolean result=HiveAPI.createTable(stmt,"testhive");
assertNotNull(result);
}
@Test
public void testdescribeTables(){
ResultSet res=HiveAPI.describeTables(stmt, "testhive");
assertNotNull(res);
}
@Test
public void testshowTables(){
//ResultSet res=HiveAPI.showTables(stmt, "testhive");
ResultSet res=HiveAPI.showTables(stmt, null);
assertNotNull(res);
}
@Test
public void testloadData(){
boolean result=HiveAPI.loadData( stmt, "testhive","user.txt");
assertNotNull(result);
}
@Test
public void testclose(){
HiveAPI.close(conn,stmt);
}
@Test
public void testSelectData(){
ResultSet res=HiveAPI.selectData(stmt, "testhive");
assertNotNull(res);
}
@Test
public void testCountData(){
ResultSet res=HiveAPI.countData(stmt, "testhive");
assertNotNull(res);
}
}
上面是从本地导入文件到hive
从本地文件系统中将数据导入到Hive表的过程中,其实是先将数据临时复制到HDFS的一个目录下(典型的情况是复制到上传用户的HDFS home目录下,比如/home/hive/),然后再将数据从那个临时目录下移动(注意,这里说的是移动,不是复制!)到对应的Hive表的数据目录里面。既然如此,那么Hive肯定支持将数据直接从HDFS上的一个目录移动到相应Hive表的数据目录下,假设有下面这个文件/hive/user.txt,具体的操作如下:
首先上传一个user.txt文件到hdfs根目录
import java.sql.Connection;
import java.sql.SQLException;
import java.sql.Statement;
public class Hive {
public static void main(String[] args) {
Connection conn = null;
Statement stmt = null;
try {
conn = HiveAPI.getConn();
stmt = conn.createStatement();
//HiveAPI.dropTable(stmt, "testhadoophive");
//HiveAPI.createTable(stmt,"testhadoophive");
HiveAPI.describeTables(stmt, "testhadoophive");
//注意,这里的路径其实是 "hdfs://192.168.52.140:9000/hive/user.txt"
HiveAPI.showTables(stmt, null);
//但是路径只能写"/hive/user.txt"
String path="/hive/user.txt";
HiveAPI.loadDataFromHDFS( stmt, "testhadoophive",path);
HiveAPI.selectData(stmt, "testhadoophive");
} catch (SQLException e) {
e.printStackTrace();
}finally {
//HiveAPI.close(conn,stmt);
}
}
}
结果:导入hdfs文件到hive成功
[27 21:39:29,613 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109
Running:describe testhadoophive
[27 21:39:29,636 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 130
[27 21:39:29,679 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109
[27 21:39:29,695 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 100
[27 21:39:29,712 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 53
[27 21:39:29,730 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 102
[27 21:39:29,733 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 273
执行 describe table 运行结果:
[27 21:39:29,781 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112
[27 21:39:29,787 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 179
[27 21:39:29,812 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string:
key int
[27 21:39:29,813 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string:
value string
[27 21:39:29,813 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112
[27 21:39:29,814 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 138
[27 21:39:29,819 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 96
[27 21:39:29,823 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 42
[27 21:39:29,826 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 118
[27 21:39:29,833 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109
[27 21:39:29,833 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 100
[27 21:39:29,849 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 53
[27 21:39:29,849 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 102
[27 21:39:29,850 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 131
执行 show tables 运行结果:
[27 21:39:29,850 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112
[27 21:39:29,853 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 171
[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string:
tanggao
[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string:
test
[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string:
testhadoophive
[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string:
testhive
[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string:
testhive1
[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string:
testhive3
[27 21:39:29,854 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112
[27 21:39:29,855 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 96
Running:load data inpath '/hive/user.txt' into table testhadoophive
[27 21:39:29,855 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 96
[27 21:39:29,858 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 42
[27 21:39:29,858 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 166
[27 21:39:29,888 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109
[27 21:39:29,888 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 100
[27 21:39:30,233 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 53
Running:select * from testhadoophive
[27 21:39:30,233 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 96
[27 21:39:30,235 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 42
[27 21:39:30,235 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 135
[27 21:39:30,305 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109
[27 21:39:30,305 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 100
[27 21:39:30,308 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 53
[27 21:39:30,308 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 102
[27 21:39:30,309 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 176
执行 select * query 运行结果:
[27 21:39:30,309 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112
[27 21:39:30,326 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 150
[27 21:39:30,331 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string:
1 tanggao
[27 21:39:30,331 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string:
2 zhousiyuan
[27 21:39:30,331 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112
[27 21:39:30,333 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 117
现在你刚上传到hdfs上hive文件下的user.txt已经移动到hive指定的hdfs上的路径上去了 我的是user/hive/warehouse/
转载请指明出处http://blog.csdn.net/tanggao1314/article/details/51519503