【BigData】Impala-Jdbc数据导入(2)集成kerberos认证

文章目录

  • 前言
  • Project
    • Application.yml
  • Core-code
    • 配置类
    • Kerberos认证工具
    • 数据导入
  • 爬坑日志
    • configuration配置
    • IMPALA的URL
    • IMPALA执行操作需要使用LoginUser
    • 确认好了你的生产环境Kerberos没问题!!
  • Author

前言

生产环境肯定需要开启kerberos的。这个时候坑就多了

CDH6 + IMPALA + Kerberos

本地数据上传到 - > hdfs - >导入impala

Project

Application.yml


# 导入配置
import-config:
  csv-tmp-path: /home/huweihui/back-end/data/import
  #  hive-url: jdbc:hive2://x.x.x.x:10000/default
  impala-url: jdbc:impala://xxx:21050/default;AuthMech=1;KrbRealm=EXAMPLE.COM;KrbHostFQDN=xxx;KrbServiceName=impala;
  impala-user: huweihui
  impala-password: Hantele@1234!
  hdfs-uri: hdfs://xxxx:8020
  hdfs-user: huweihui
  hdfs-tmp-path: /user/huweihui/web_data

#kerberos配置
kerberos:
  krb5-file-path: /etc/krb5.conf
  keytab-file-path: /home/huweihui/back-end/config/huweihui-bi-be/huweihui.keytab
  ker-user: [email protected]

Core-code

配置类


/**
 * ImportConfig
 * 

* Description *

* Creation Time: 2019/6/12 16:58. * * @author Hu-Weihui */ @Component @Data @ConfigurationProperties(prefix = "import-config") public class ImportConfig { private String csvTmpPath; private String impalaUrl; private String impalaUser; private String impalaPassword; private String hdfsUser; private String hdfsUri; private String hdfsTmpPath; } /** * KerberosConfig *

* Description *

* Creation Time: 2019/7/5 11:12. * * @author Hu-Weihui * @since ${PROJECT_VERSION} */ @Component @Data @ConfigurationProperties(prefix = "kerberos") public class KerberosConfig { /** kerberos principal*/ private String kerUser; /**设置java安全krb5配置,其中krb5.conf文件可以从成功开启kerberos的集群任意一台节点/etc/krb5.conf拿到,放置本地*/ private String krb5FilePath; /** 对应kerberos principal的keytab文件,从服务器获取放置本地*/ private String keytabFilePath; }

Kerberos认证工具

/**
 * KerberosUtil
 * 

* Description kerberos 认证工具类 *

* Creation Time: 2019/7/5 11:06. * * @author Hu-Weihui * @since ${PROJECT_VERSION} */ @Slf4j public class KerberosUtil { /** * kerberos认证。 * @param configuration * @param krb5FilePath * @param kerUser * @param keytabFilePath * @return 返回kerberos登录对象,可使用此对象进一步操作 */ public static UserGroupInformation kerberosAuth(Configuration configuration, String krb5FilePath, String kerUser, String keytabFilePath) { // krb5.conf配置路径 System.setProperty("java.security.krb5.conf", krb5FilePath); //开启kerberos configuration.set("hadoop.security.authentication", "kerberos"); //鉴权 UserGroupInformation.setConfiguration(configuration); try { UserGroupInformation.loginUserFromKeytab(kerUser, keytabFilePath); UserGroupInformation loginUser = UserGroupInformation.getLoginUser(); return loginUser; } catch (IOException e) { log.error("kerberos auth fail : {}", e); } return null; } }

数据导入

关键点:

1.configuration设置增加了远端访问的配置

2.进行Kerberos认证

3.IMPALA操作需要使用认证后的用户(loginUser,通过UserGroupInformation登录后返回)

4.kerberos配置好


/**
     * IMPALA数据导入
     *
     * @param tableName
     * @param updateMethod
     * @param multipartFile
     */
    @Override
    public void importImpalaData(String tableName, String updateMethod, MultipartFile multipartFile) {
        // 1.csv 保存到loacal
        File localFile = saveToLocal(multipartFile);
        String localFilePath = localFile.getPath();
        String hdfsDstPath = importConfig.getHdfsTmpPath() + "/" + localFile.getName();
        // 2.上传到hdfs上
        Path srcPath = new Path(localFilePath);
        Path dstPath = new Path(hdfsDstPath);
        Path hdfsPath = new Path(importConfig.getHdfsTmpPath());
        try {
            // remote access need to set configuration
            Configuration configuration = new Configuration();
            configuration.set("fs.defaultFS", importConfig.getHdfsUri());
            configuration.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
            configuration.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");
            configuration.set(" dfs.namenode.kerberos.principal", "[email protected]");
            configuration.set("dfs.namenode.kerberos.principal.pattern", "*@EXAMPLE.COM");
            // read the config from yaml
            String krb5FilePath = kerberosConfig.getKrb5FilePath();
            String kerUser = kerberosConfig.getKerUser();
            String keytabFilePath = kerberosConfig.getKeytabFilePath();

            //kerberos auth
            UserGroupInformation loginUser = KerberosUtil.kerberosAuth(configuration, krb5FilePath, kerUser, keytabFilePath);

            FileSystem fileSystem = FileSystem.get(configuration);
            if (!fileSystem.exists(hdfsPath)) {
                fileSystem.mkdirs(hdfsPath);
            }
            fileSystem.copyFromLocalFile(srcPath, dstPath);

            // 3. impala 使用 Load命令从 hdfs 导入数据
            loginUser.doAs((PrivilegedAction<Void>) () -> {
                String url = importConfig.getImpalaUrl();
                String user = importConfig.getImpalaUser();
                String password = importConfig.getImpalaPassword();

                try (Connection connection = DriverManager.getConnection(url, user, password);
                     Statement statement = connection.createStatement();) {

                    Class.forName("com.cloudera.impala.jdbc41.Driver");

                    // load data from hdfs
                    String loadSql = "LOAD DATA INPATH '" + hdfsDstPath + "' INTO TABLE " + tableName;
                    if (updateMethod.equals(UpdateMethod.OVERRIDE.getCode())) {
                        loadSql = "LOAD DATA INPATH '" + hdfsDstPath + "'OVERWRITE INTO TABLE " + tableName;
                    }

                    statement.execute(loadSql);

                    // refresh the impala table
                    String refreshResult = String.format("REFRESH  %s", tableName);

                    statement.execute(refreshResult);

                } catch (ClassNotFoundException e) {
                    log.error("load impala driver class fail :", e);
                    throw new DataManagementException("导入数据失败");
                } catch (SQLException e) {
                    log.error("can not to load hdfs data into impala :", e);
                    throw new DataManagementException("导入数据失败");
                }
                return null;
            });
        } catch (IOException e) {
            log.error("con not get FileSystem :", e);
            throw new DataManagementException("上传到数据失败");
        }
    }

爬坑日志

configuration配置

            Configuration configuration = new Configuration();
            configuration.set("fs.defaultFS", importConfig.getHdfsUri());
            configuration.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
            configuration.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");
            configuration.set(" dfs.namenode.kerberos.principal", "[email protected]");
            configuration.set("dfs.namenode.kerberos.principal.pattern", "*@EXAMPLE.COM");

IMPALA的URL

  impala-url: jdbc:impala://xxx:21050/default;AuthMech=1;KrbRealm=EXAMPLE.COM;KrbHostFQDN=xxx;KrbServiceName=impala;

IMPALA执行操作需要使用LoginUser

 loginUser.doAs((PrivilegedAction<Void>) () -> {
              //....todo 
            });

确认好了你的生产环境Kerberos没问题!!

  1. 确保你的用户是能认证kerberos并且访问HDFS&IMPALA
  2. 奉劝生产环境别用IMPALA和HDFS这两个默认用户
  3. 新增一个能使用双方的用户。举例huweihui,把该用户加到和hdfs和impala同一个supergroup里面。(不懂的找专业安装CDH的人)
  4. 安装完kerberos要给对应用户生成 keytab文件并且copy到项目
  5. 上面第3点需要产生一个[email protected]的账户。root登录Kereros服务器,执行kadmin.local - > listprincs(查看用户列表必须有一个 [email protected])
  6. 网上坑货太多,建议直接看CDH impala-jdbc.dirver包的使用文档。附上链接:https://www.cloudera.com/documentation/other/connectors/impala-jdbc/latest.html
    Installation Guide (在线PDF)->CONFIGURING AUTHENTICATION -> USING KERBEROS (11页)
  7. 推荐一个Kerberos常用命令的BLOG :https://www.jianshu.com/p/69e6a2e7c648
  8. 怎么安装Kerberos和配置账号请自己查询一下,但是上述都是要注意的关键点

Author

 作者:HuHui
 转载:欢迎一起讨论web和大数据问题,转载请注明作者和原文链接,感谢

你可能感兴趣的:(BigData)