Hadoop整合Hive之API封装及操作

首先看依赖

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.ta.hive</groupId>
    <artifactId>tg_hive</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <packaging>jar</packaging>

    <name>TG_hadoop_hive</name>
    <url>http://maven.apache.org</url>
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <junit.version>4.12</junit.version>
        <hbase.version>1.1.2</hbase.version>
        <hadoop.version>2.6.2</hadoop.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>${junit.version}</version>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.version}</version>
            <scope>runtime</scope>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.5</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.version}</version>
            <scope>runtime</scope>
            <exclusions>
                <exclusion>
                    <artifactId>jdk.tools</artifactId>
                    <groupId>jdk.tools</groupId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-jdbc</artifactId>
            <version>1.1.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-service</artifactId>
            <version>1.1.1</version>
            <exclusions>
                <exclusion>
                    <artifactId>eigenbase-properties</artifactId>
                    <groupId>eigenbase</groupId>
                </exclusion>
                <exclusion>
                    <artifactId>
                        pentaho-aggdesigner-algorithm
                    </artifactId>
                    <groupId>org.pentaho</groupId>
                </exclusion>
            </exclusions>
        </dependency>
    </dependencies>
</project>

如果你的maven更新报错了,说明你的包有冲突,排除掉冲突的包即可

下面看看API的封装

package com.tg.hadoop.hive;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
/** * * @author 汤高 * */
public class HiveAPI {
    //网上写 org.apache.hadoop.hive.jdbc.HiveDriver ,新版本不能这样写
    private static String driverName = "org.apache.hive.jdbc.HiveDriver";  

  //这里是hive2,网上其他人都写hive,在高版本中会报错
    private static String url = "jdbc:hive2://master:10000/default"; 
    private static String user = "hive";  
    private static String password = "hive";  
    private static String sql = "";  

    public static ResultSet countData(Statement stmt, String tableName)  {
        sql = "select count(1) from " + tableName;
        System.out.println("Running:" + sql);
        ResultSet res=null;
        try {
            res = stmt.executeQuery(sql);
            System.out.println("执行“regular hive query”运行结果:");
            while (res.next()) {
                System.out.println("count ------>" + res.getString(1));
            }
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return res;
    }

    public static ResultSet selectData(Statement stmt, String tableName)  {
        sql = "select * from " + tableName;
        System.out.println("Running:" + sql);
        ResultSet res=null;
        try {
            res = stmt.executeQuery(sql);
            System.out.println("执行 select * query 运行结果:");
            while (res.next()) {
                System.out.println(res.getInt(1) + "\t" + res.getString(2));
            }
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return res;
    }

    public static boolean loadData(Statement stmt, String tableName,String filepath) {
        // 目录 ,我的是hive安装的机子的虚拟机的home目录下
        sql = "load data local inpath '" + filepath + "' into table " + tableName;
        System.out.println("Running:" + sql);
        boolean result=false;
        try {
            result=stmt.execute(sql);
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return result;
    }

    public static ResultSet describeTables(Statement stmt, String tableName)   {
        sql = "describe " + tableName;
        System.out.println("Running:" + sql);
        ResultSet res=null;
        try {
            res = stmt.executeQuery(sql);
            System.out.println("执行 describe table 运行结果:");
            while (res.next()) {
                System.out.println(res.getString(1) + "\t" + res.getString(2));
            }
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return res;
    }

    public static ResultSet showTables(Statement stmt, String tableName)  {
        if(tableName==null||tableName.equals(null)){
            sql = "show tables";
        }else{
            sql = "show tables '" + tableName + "'";
        }
        ResultSet res=null;
        try {
            res = stmt.executeQuery(sql);
            System.out.println("执行 show tables 运行结果:");
            while (res.next()) {
                System.out.println(res.getString(1));
            }
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return res;
    }

    public static boolean createTable(Statement stmt, String tableName)  {
        sql = "create table " + tableName + " (key int, value string) row format delimited fields terminated by '\t'";
        boolean result=false;
        try {
            result=stmt.execute(sql);
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return result;
    }

    public static boolean dropTable(Statement stmt,String tableName) {
        // 创建的表名
        //String tableName = "testHive";
        sql = "drop table " + tableName;
        boolean result=false;
        try {
            stmt.execute(sql);
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return result;
    }

    public static Connection getConn()  {
        Connection conn = null;
        try {
            Class.forName(driverName);
            conn = DriverManager.getConnection(url, user, password);
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return conn;
    }

    public static void  close(Connection conn,Statement stmt){
          try {
            if (conn != null) {  
                  conn.close();  
                  conn = null;  
              }  
              if (stmt != null) {  
                  stmt.close();  
                  stmt = null;  
              }
        } catch (SQLException e) {
            e.printStackTrace();
        }  
    }
}

下面看看junit测试

package com.tg.hive.test;

import static org.junit.Assert.*;

import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

import org.junit.Before;
import org.junit.Test;

import com.tg.hadoop.hive.HiveAPI;
/** * * @author 汤高 * */
public class TestHive {
    private  Statement stmt = null; 
    private  Connection conn=null;
    @Before
    public void setConAndStatement (){

       conn = HiveAPI.getConn();  
        try {
            stmt = conn.createStatement();
        } catch (SQLException e) {
            e.printStackTrace();
        }
        assertNotNull(conn);
    }

    @Test
    public void testDropTable() {
        String tableName="testhive";
        assertNotNull(HiveAPI.dropTable(stmt, tableName));
    }

    @Test
    public void testCreateTable() {
        boolean result=HiveAPI.createTable(stmt,"testhive");
        assertNotNull(result);
    }

    @Test
    public void testdescribeTables(){
        ResultSet res=HiveAPI.describeTables(stmt, "testhive");
        assertNotNull(res);
    }

    @Test
    public void testshowTables(){
        //ResultSet res=HiveAPI.showTables(stmt, "testhive");
        ResultSet res=HiveAPI.showTables(stmt, null);
        assertNotNull(res);
    }

    @Test
    public void testloadData(){
        boolean result=HiveAPI.loadData( stmt, "testhive","user.txt");
        assertNotNull(result);
    }


    @Test
    public  void testclose(){
        HiveAPI.close(conn,stmt);
    }

    @Test
    public  void testSelectData(){
        ResultSet res=HiveAPI.selectData(stmt, "testhive");
        assertNotNull(res);
    }

    @Test
    public  void testCountData(){
        ResultSet res=HiveAPI.countData(stmt, "testhive");
        assertNotNull(res);
    }


}

上面是从本地导入文件到hive

从本地文件系统中将数据导入到Hive表的过程中,其实是先将数据临时复制到HDFS的一个目录下(典型的情况是复制到上传用户的HDFS home目录下,比如/home/hive/),然后再将数据从那个临时目录下移动(注意,这里说的是移动,不是复制!)到对应的Hive表的数据目录里面。既然如此,那么Hive肯定支持将数据直接从HDFS上的一个目录移动到相应Hive表的数据目录下,假设有下面这个文件/hive/user.txt,具体的操作如下:

首先上传一个user.txt文件到hdfs根目录

import java.sql.Connection;
import java.sql.SQLException;
import java.sql.Statement;

public class Hive {

     public static void main(String[] args) {  
            Connection conn = null;  
            Statement stmt = null;  
                try {
                    conn = HiveAPI.getConn();  
                    stmt = conn.createStatement();  

                    //HiveAPI.dropTable(stmt, "testhadoophive");
                    //HiveAPI.createTable(stmt,"testhadoophive");

                    HiveAPI.describeTables(stmt, "testhadoophive");
                    //注意,这里的路径其实是 "hdfs://192.168.52.140:9000/hive/user.txt"
                    HiveAPI.showTables(stmt, null);
                    //但是路径只能写"/hive/user.txt"
                    String path="/hive/user.txt";
                    HiveAPI.loadDataFromHDFS( stmt, "testhadoophive",path);

                    HiveAPI.selectData(stmt, "testhadoophive");
                } catch (SQLException e) {

                    e.printStackTrace();
                }finally {
                    //HiveAPI.close(conn,stmt);
                }
     }
}

结果:导入hdfs文件到hive成功

[27 21:39:29,613 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109
Running:describe testhadoophive
[27 21:39:29,636 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 130
[27 21:39:29,679 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109
[27 21:39:29,695 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 100
[27 21:39:29,712 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 53
[27 21:39:29,730 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 102
[27 21:39:29,733 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 273
执行 describe table 运行结果:
[27 21:39:29,781 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112
[27 21:39:29,787 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 179
[27 21:39:29,812 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: 
key int
[27 21:39:29,813 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: 
value   string
[27 21:39:29,813 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112
[27 21:39:29,814 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 138
[27 21:39:29,819 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 96
[27 21:39:29,823 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 42
[27 21:39:29,826 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 118
[27 21:39:29,833 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109
[27 21:39:29,833 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 100
[27 21:39:29,849 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 53
[27 21:39:29,849 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 102
[27 21:39:29,850 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 131
执行 show tables 运行结果:
[27 21:39:29,850 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112
[27 21:39:29,853 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 171
[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: 
tanggao
[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: 
test
[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: 
testhadoophive
[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: 
testhive
[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: 
testhive1
[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: 
testhive3
[27 21:39:29,854 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112
[27 21:39:29,855 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 96
Running:load data inpath '/hive/user.txt' into table testhadoophive
[27 21:39:29,855 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 96
[27 21:39:29,858 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 42
[27 21:39:29,858 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 166
[27 21:39:29,888 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109
[27 21:39:29,888 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 100
[27 21:39:30,233 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 53
Running:select * from testhadoophive
[27 21:39:30,233 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 96
[27 21:39:30,235 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 42
[27 21:39:30,235 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 135
[27 21:39:30,305 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109
[27 21:39:30,305 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 100
[27 21:39:30,308 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 53
[27 21:39:30,308 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 102
[27 21:39:30,309 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 176
执行 select * query 运行结果:
[27 21:39:30,309 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112
[27 21:39:30,326 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 150
[27 21:39:30,331 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: 
1   tanggao
[27 21:39:30,331 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: 
2   zhousiyuan
[27 21:39:30,331 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112
[27 21:39:30,333 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 117

现在你刚上传到hdfs上hive文件下的user.txt已经移动到hive指定的hdfs上的路径上去了 我的是user/hive/warehouse/

转载请指明出处http://blog.csdn.net/tanggao1314/article/details/51519503

你可能感兴趣的:(hadoop,hive)