生产环境肯定需要开启kerberos的。这个时候坑就多了
CDH6 + IMPALA + Kerberos
本地数据上传到 - > hdfs - >导入impala
# 导入配置
import-config:
csv-tmp-path: /home/huweihui/back-end/data/import
# hive-url: jdbc:hive2://x.x.x.x:10000/default
impala-url: jdbc:impala://xxx:21050/default;AuthMech=1;KrbRealm=EXAMPLE.COM;KrbHostFQDN=xxx;KrbServiceName=impala;
impala-user: huweihui
impala-password: Hantele@1234!
hdfs-uri: hdfs://xxxx:8020
hdfs-user: huweihui
hdfs-tmp-path: /user/huweihui/web_data
#kerberos配置
kerberos:
krb5-file-path: /etc/krb5.conf
keytab-file-path: /home/huweihui/back-end/config/huweihui-bi-be/huweihui.keytab
ker-user: [email protected]
/**
* ImportConfig
*
* Description
*
* Creation Time: 2019/6/12 16:58.
*
* @author Hu-Weihui
*/
@Component
@Data
@ConfigurationProperties(prefix = "import-config")
public class ImportConfig {
private String csvTmpPath;
private String impalaUrl;
private String impalaUser;
private String impalaPassword;
private String hdfsUser;
private String hdfsUri;
private String hdfsTmpPath;
}
/**
* KerberosConfig
*
* Description
*
* Creation Time: 2019/7/5 11:12.
*
* @author Hu-Weihui
* @since ${PROJECT_VERSION}
*/
@Component
@Data
@ConfigurationProperties(prefix = "kerberos")
public class KerberosConfig {
/** kerberos principal*/
private String kerUser;
/**设置java安全krb5配置,其中krb5.conf文件可以从成功开启kerberos的集群任意一台节点/etc/krb5.conf拿到,放置本地*/
private String krb5FilePath;
/** 对应kerberos principal的keytab文件,从服务器获取放置本地*/
private String keytabFilePath;
}
/**
* KerberosUtil
*
* Description kerberos 认证工具类
*
* Creation Time: 2019/7/5 11:06.
*
* @author Hu-Weihui
* @since ${PROJECT_VERSION}
*/
@Slf4j
public class KerberosUtil {
/**
* kerberos认证。
* @param configuration
* @param krb5FilePath
* @param kerUser
* @param keytabFilePath
* @return 返回kerberos登录对象,可使用此对象进一步操作
*/
public static UserGroupInformation kerberosAuth(Configuration configuration, String krb5FilePath, String kerUser, String keytabFilePath) {
// krb5.conf配置路径
System.setProperty("java.security.krb5.conf", krb5FilePath);
//开启kerberos
configuration.set("hadoop.security.authentication", "kerberos");
//鉴权
UserGroupInformation.setConfiguration(configuration);
try {
UserGroupInformation.loginUserFromKeytab(kerUser, keytabFilePath);
UserGroupInformation loginUser = UserGroupInformation.getLoginUser();
return loginUser;
} catch (IOException e) {
log.error("kerberos auth fail : {}", e);
}
return null;
}
}
关键点:
1.configuration设置增加了远端访问的配置
2.进行Kerberos认证
3.IMPALA操作需要使用认证后的用户(loginUser,通过UserGroupInformation登录后返回)
4.kerberos配置好
/**
* IMPALA数据导入
*
* @param tableName
* @param updateMethod
* @param multipartFile
*/
@Override
public void importImpalaData(String tableName, String updateMethod, MultipartFile multipartFile) {
// 1.csv 保存到loacal
File localFile = saveToLocal(multipartFile);
String localFilePath = localFile.getPath();
String hdfsDstPath = importConfig.getHdfsTmpPath() + "/" + localFile.getName();
// 2.上传到hdfs上
Path srcPath = new Path(localFilePath);
Path dstPath = new Path(hdfsDstPath);
Path hdfsPath = new Path(importConfig.getHdfsTmpPath());
try {
// remote access need to set configuration
Configuration configuration = new Configuration();
configuration.set("fs.defaultFS", importConfig.getHdfsUri());
configuration.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
configuration.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");
configuration.set(" dfs.namenode.kerberos.principal", "[email protected]");
configuration.set("dfs.namenode.kerberos.principal.pattern", "*@EXAMPLE.COM");
// read the config from yaml
String krb5FilePath = kerberosConfig.getKrb5FilePath();
String kerUser = kerberosConfig.getKerUser();
String keytabFilePath = kerberosConfig.getKeytabFilePath();
//kerberos auth
UserGroupInformation loginUser = KerberosUtil.kerberosAuth(configuration, krb5FilePath, kerUser, keytabFilePath);
FileSystem fileSystem = FileSystem.get(configuration);
if (!fileSystem.exists(hdfsPath)) {
fileSystem.mkdirs(hdfsPath);
}
fileSystem.copyFromLocalFile(srcPath, dstPath);
// 3. impala 使用 Load命令从 hdfs 导入数据
loginUser.doAs((PrivilegedAction<Void>) () -> {
String url = importConfig.getImpalaUrl();
String user = importConfig.getImpalaUser();
String password = importConfig.getImpalaPassword();
try (Connection connection = DriverManager.getConnection(url, user, password);
Statement statement = connection.createStatement();) {
Class.forName("com.cloudera.impala.jdbc41.Driver");
// load data from hdfs
String loadSql = "LOAD DATA INPATH '" + hdfsDstPath + "' INTO TABLE " + tableName;
if (updateMethod.equals(UpdateMethod.OVERRIDE.getCode())) {
loadSql = "LOAD DATA INPATH '" + hdfsDstPath + "'OVERWRITE INTO TABLE " + tableName;
}
statement.execute(loadSql);
// refresh the impala table
String refreshResult = String.format("REFRESH %s", tableName);
statement.execute(refreshResult);
} catch (ClassNotFoundException e) {
log.error("load impala driver class fail :", e);
throw new DataManagementException("导入数据失败");
} catch (SQLException e) {
log.error("can not to load hdfs data into impala :", e);
throw new DataManagementException("导入数据失败");
}
return null;
});
} catch (IOException e) {
log.error("con not get FileSystem :", e);
throw new DataManagementException("上传到数据失败");
}
}
Configuration configuration = new Configuration();
configuration.set("fs.defaultFS", importConfig.getHdfsUri());
configuration.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
configuration.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");
configuration.set(" dfs.namenode.kerberos.principal", "[email protected]");
configuration.set("dfs.namenode.kerberos.principal.pattern", "*@EXAMPLE.COM");
impala-url: jdbc:impala://xxx:21050/default;AuthMech=1;KrbRealm=EXAMPLE.COM;KrbHostFQDN=xxx;KrbServiceName=impala;
loginUser.doAs((PrivilegedAction<Void>) () -> {
//....todo
});
作者:HuHui
转载:欢迎一起讨论web和大数据问题,转载请注明作者和原文链接,感谢