JDBC 连接Hive 简单样例(开启Kerberos)

  • 运用 Ambari 搭建的HDP 集群,由于开启了kerberos ,对外提供Hive数据时统一用JDBC 的方式,所以写了下面这么一个简单样例供第三方数据接入参考。

代码如下所示:

package com.bmsoft.hive.impl;

import org.apache.hadoop.security.UserGroupInformation;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

/**
 * 简单的jdbc连接hive实例(已开启kerberos服务)
 */

public class HiveSimple2 {
    /**
     * 用于连接Hive所需的一些参数设置 driverName:用于连接hive的JDBC驱动名 When connecting to
     * HiveServer2 with Kerberos authentication, the URL format is:
     * jdbc:hive2://:/;principal=
     * 
     */
    private static String driverName = "org.apache.hive.jdbc.HiveDriver";
    // 注意:这里的principal是固定不变的,其指的hive服务所对应的principal,而不是用户所对应的principal
    private static String url = "jdbc:hive2://bigdata40:10000/admin;principal=hive/[email protected]";
    private static String sql = "";
    private static ResultSet res;

    public static Connection get_conn() throws SQLException, ClassNotFoundException {
        /** 使用Hadoop安全登录 **/
        org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
        conf.set("hadoop.security.authentication", "Kerberos");

        if (System.getProperty("os.name").toLowerCase().startsWith("win")) {
            // 默认:这里不设置的话,win默认会到 C盘下读取krb5.init
            System.setProperty("java.security.krb5.conf", "C:/Windows/krbconf/bms/krb5.ini");
        } // linux 会默认到 /etc/krb5.conf 中读取krb5.conf,本文笔者已将该文件放到/etc/目录下,因而这里便不用再设置了
        try {
            UserGroupInformation.setConfiguration(conf);
            UserGroupInformation.loginUserFromKeytab("test2/[email protected]", "./conf/test2.keytab");
        } catch (IOException e1) {
            e1.printStackTrace();
        }
        Class.forName(driverName);
        Connection conn = DriverManager.getConnection(url);
        return conn;
    }

    /**
     * 查看数据库下所有的表
     *
     * @param statement
     * @return
     */
    public static boolean show_tables(Statement statement) {
        sql = "SHOW TABLES";
        System.out.println("Running:" + sql);
        try {
            ResultSet res = statement.executeQuery(sql);
            System.out.println("执行“+sql+运行结果:");
            while (res.next()) {
                System.out.println(res.getString(1));
            }
            return true;
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return false;
    }

    /**
     * 获取表的描述信息
     *
     * @param statement
     * @param tableName
     * @return
     */
    public static boolean describ_table(Statement statement, String tableName) {
        sql = "DESCRIBE " + tableName;
        try {
            res = statement.executeQuery(sql);
            System.out.print(tableName + "描述信息:");
            while (res.next()) {
                System.out.println(res.getString(1) + "\t" + res.getString(2));
            }
            return true;
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return false;
    }

    /**
     * 删除表
     *
     * @param statement
     * @param tableName
     * @return
     */
    public static boolean drop_table(Statement statement, String tableName) {
        sql = "DROP TABLE IF EXISTS " + tableName;
        System.out.println("Running:" + sql);
        try {
            statement.execute(sql);
            System.out.println(tableName + "删除成功");
            return true;
        } catch (SQLException e) {
            System.out.println(tableName + "删除失败");
            e.printStackTrace();
        }
        return false;
    }

    /**
     * 查看表数据
     *
     * @param statement
     * @return
     */
    public static boolean queryData(Statement statement, String tableName) {
        sql = "SELECT * FROM " + tableName + " LIMIT 20";
        System.out.println("Running:" + sql);
        try {
            res = statement.executeQuery(sql);
            System.out.println("执行“+sql+运行结果:");
            while (res.next()) {
                System.out.println(res.getString(1) + "," + res.getString(2) + "," + res.getString(3));
            }
            return true;
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return false;
    }

    /**
     * 创建表
     *
     * @param statement
     * @return
     */
    public static boolean createTable(Statement statement, String tableName) {
        sql = "CREATE TABLE test_1m_test2 AS SELECT * FROM test_1m_test"; //  为了方便直接复制另一张表数据来创建表
        System.out.println("Running:" + sql);
        try {
            boolean execute = statement.execute(sql);
            System.out.println("执行结果 :" + execute);
            return true;
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return false;
    }

    public static void main(String[] args) {

        try {
            Connection conn = get_conn();
            Statement stmt = conn.createStatement();
            // 创建的表名
            String tableName = "test_100m";
            show_tables(stmt);
            // describ_table(stmt, tableName);
            /** 删除表 **/
            // drop_table(stmt, tableName);
            // show_tables(stmt);
            // queryData(stmt, tableName);
            createTable(stmt, tableName);
            conn.close();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            System.out.println("!!!!!!END!!!!!!!!");
        }
    }
}

pom.xml 文件如下所示:

<dependencies>
        
        <dependency>
            <groupId>org.apache.hivegroupId>
            <artifactId>hive-jdbcartifactId>
            <version>1.2.1version>
        dependency>
        
        <dependency>
            <groupId>org.apache.hadoopgroupId>
            <artifactId>hadoop-commonartifactId>
            <version>2.7.1version>
        dependency>
        
        <dependency>
            <groupId>org.apache.hivegroupId>
            <artifactId>hive-execartifactId>
            <version>1.2.1version>
        dependency>
        
        <dependency>
            <groupId>org.apache.hivegroupId>
            <artifactId>hive-metastoreartifactId>
            <version>1.2.1version>
        dependency>
        
        <dependency>
            <groupId>org.apache.hivegroupId>
            <artifactId>hive-commonartifactId>
            <version>1.2.1version>
        dependency>
        
        <dependency>
            <groupId>org.apache.hivegroupId>
            <artifactId>hive-serviceartifactId>
            <version>1.2.1version>
        dependency>
        <dependency>
            <groupId>log4jgroupId>
            <artifactId>log4jartifactId>
            <version>1.2.17version>
            <type>jartype>
        dependency>

        <dependency>
            <groupId>org.apache.hadoopgroupId>
            <artifactId>hadoop-clientartifactId>
            <version>2.7.3version>
        dependency>

参考文档:
https://cwiki.apache.org/confluence/display/Hive/HiveServer2+Clients

文档其中比较值得注意的一点是:

JDBC Client Setup for a Secure Cluster
When connecting to HiveServer2 with Kerberos authentication, the URL format is:
jdbc:hive2://:/;principal=
  • 这里的principal是固定不变的,其指的hive服务所对应的principal,而不是用户所对应的principal; 对于这里的可以为不存在的数据库,但是如果这么做那么在查询表的时候则需要指出其所在的库(如db.tablename),否则默认会查询所对应的表。

你可能感兴趣的:(HDP,平台的运用)