1,因业务需求,需要把mysql的数据同步到clickhouse,但是clickhouse建表语法有比较繁琐,故写了一个udf来生成数据
1.0 相关pom.xml
4.0.0
hdp-udf
hdp-udf
4.1
jar
hdp-udf
http://maven.apache.org
UTF-8
3.1.0
3.1.0
org.apache.hadoop
hadoop-client
${hadoop.version}
log4j
log4j
org.slf4j
slf4j-log4j12
provided
org.apache.hadoop
hadoop-common
${hadoop.version}
provided
org.apache.hive
hive-exec
${hive.version}
provided
org.apache.hive
hive-jdbc
2.1.0.2.6.4.76-1
provided
org.apache.hive
hive-service
2.1.0
provided
com.crgecent
crgt-util
1.1.0
redis.clients
jedis
2.9.0
junit
junit
4.12
test
org.apache.kafka
kafka-clients
1.1.1
org.apache.kafka
kafka_2.11
1.1.1
mysql
mysql-connector-java
5.1.25
ru.yandex.clickhouse
clickhouse-jdbc
0.2
com.alibaba
fastjson
1.2.62
hiveudf_${version}
src/main/java
net.alchim31.maven
scala-maven-plugin
3.3.1
scala-compile-first
process-resources
add-source
compile
compile
compile
testCompile
maven-assembly-plugin
3.0.0
jar-with-dependencies
make-assembly
package
single
org.apache.maven.plugins
maven-compiler-plugin
8
ownaliyunmaven
own aliyun maven repository
http://10.3.1.29:8081/repository/aliyun/
ownmaven
own maven repository
http://10.3.1.29:8081/repository/maven-central/
Hortonworks Repository
http://repo.hortonworks.com/content/repositories/releases/
true
false
true
true
hortonworks.other
Hortonworks Other Dependencies
http://repo.hortonworks.com/content/groups/public
package com.crgt;
import com.crgecent.common.util.DateUtil;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.io.Text;
import java.sql.*;
import java.util.Date;
/**
* https://blog.csdn.net/xiao_jun_0820/article/details/76638198
* 根据hive的表在,自动生成一个ck的表,输入值要求是带ck的中间表
* wpp 自己开发的
*
*
*/
public class Mysql2CKGenericUDF extends GenericUDF {
private static String driverName = "com.mysql.jdbc.Driver";
StringObjectInspector keyElementOI;
StringObjectInspector tableElementOI;
@Override
public Object evaluate(DeferredObject[] arg0) throws HiveException{
String jdbcUrl = keyElementOI.getPrimitiveJavaObject(arg0[0].get());
String tableName = tableElementOI.getPrimitiveJavaObject(arg0[1].get());
if( jdbcUrl == null || tableName == null ){
return new Text("获取参数错误,需要");
}
System.out.println("输入值 : " +jdbcUrl);
StringBuffer res = new StringBuffer( );
try{
res.append("\n========================mysql转成ck的建表语句 "+ tableName+ " ==================\n");
String ckConfInfo = getCKCreateTableInfo(jdbcUrl,tableName);
res.append(ckConfInfo);
}catch (Exception e){
e.printStackTrace();
System.out.println("获取结果异常:" +e.getMessage());
}
return new Text(res.toString());
}
public static String getCKCreateTableInfo(String cconn_str ,String tableName) throws SQLException {
String databaseName ="rt_dwd";
String dbTableName = tableName;
try {
Class.forName(driverName);
} catch (ClassNotFoundException e) {
System.out.println("=================================");
e.printStackTrace();
System.exit(1);
}
String conn_str = cconn_str +"&useUnicode=true&characterEncoding=UTF8";
Connection con = DriverManager.getConnection(conn_str);
Statement stmt = con.createStatement();
StringBuffer result = new StringBuffer();
StringBuffer resultCls = new StringBuffer();
StringBuffer insertSql = new StringBuffer();
String createSql = "show create table " + dbTableName ;
ResultSet res1 = stmt.executeQuery(createSql);
result.append("==============mysql原表的格式=============\n");
while (res1.next()) {
// System.out.println(res1.getString(2));
result.append(res1.getString(2) +"\n");
}
String sql = "describe " + dbTableName ;
System.out.println("Running: " + sql);
ResultSet res = stmt.executeQuery(sql);
String ckTable = tableName.replaceAll("_ck$","");
String ckDbTableName = databaseName +"." +ckTable;
result.append("==============请确定主键和date类型的列,并把Nullable去掉=============\n");
result.append("CREATE TABLE " +ckDbTableName +" on cluster crm_4shards_1replicas (\n");
resultCls.append("CREATE TABLE " +ckDbTableName +"_cls on cluster crm_4shards_1replicas (\n");
insertSql.append("INSERT INTO " +ckDbTableName +"_cls \nselect \n");
String[] jdbcUrlArr = cconn_str.replaceAll("jdbc:mysql://","").replaceAll("\\?","/").replaceAll("user=","").replaceAll("&password=","/").split("/");
while (res.next()) {
String dataKey = res.getString(1);
String dataType = res.getString(2);
// System.out.println(dataKey + "\t" + dataType);
String ckDataType = Utils.getMysqlParseType(dataType.toLowerCase());
if(dataKey.equals("id")){
result.append(" `" + dataKey + "` " + ckDataType + ",\n");
resultCls.append(" `" + dataKey + "` " + ckDataType + ",\n");
}else {
result.append(" `" + dataKey + "` Nullable(" + ckDataType + "),\n");
resultCls.append(" `" + dataKey + "` Nullable(" + ckDataType + "),\n");
}
insertSql.append( dataKey+ ",\n");
}
result.append(" `" + "ck_update_date" + "` Date,\n" );
resultCls.append(" `" + "ck_update_date" + "` Date,\n" );
insertSql.append( "if(update_time is null ,toDate(create_time),toDate(update_time)) \n");
result =new StringBuffer( result.substring(0,result.length()-2));
resultCls =new StringBuffer( resultCls.substring(0,resultCls.length()-2));
result.append("\n)ENGINE = MergeTree(ck_update_date, id, 8192);\n\n");
resultCls.append("\n)ENGINE = Distributed(crm_4shards_1replicas,"+databaseName+" ," +ckTable+ ",rand());\n");
insertSql.append("FROM mysql('"+jdbcUrlArr[0]+"', '"+jdbcUrlArr[1]+"', '"+dbTableName+"', '"+jdbcUrlArr[2]+"', '"+jdbcUrlArr[3]+"') ;\n");
// insertSql.append("where toDate(create_time) < '"+ DateUtil.format(new Date(),"yyyy-MM-dd") +"';");
result.append(resultCls);
result.append("=============insert into ================\n");
result.append(insertSql.toString());
return result.toString();
}
@Override
public String getDisplayString(String[] arg0) {
return "Mysql2ckTableCreateGenericUDF(jdbcUrl,tableName)";
}
@Override
public ObjectInspector initialize(ObjectInspector[] arg0) throws UDFArgumentException {
if (arg0.length != 2) {
throw new UDFArgumentException(" Expecting two arguments: jdbcUrl ,tableName");
}
// 1. 检查是否接收到正确的参数类型
ObjectInspector key = arg0[0];
if (!(key instanceof StringObjectInspector) ) {
throw new UDFArgumentException("one argument must be a string");
}
this.keyElementOI = (StringObjectInspector) key;
// 2 检查是否接收到正确的参数类型
ObjectInspector table = arg0[1];
if (!(table instanceof StringObjectInspector) ) {
throw new UDFArgumentException("one argument must be a string");
}
this.tableElementOI = (StringObjectInspector) table;
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
}
}
1.2 utils
package com.crgt;
import java.util.regex.Pattern;
/**
* @Author: wpp
* @Date: 2019/10/21 19:41
*/
public class Utils {
public static String HIVE_JDBC_URL= "jdbc:hive2://xx.xx.xx.xx:10000/default";
public static String getParseType(String oriDataType){
String dataType = null;
switch (oriDataType.toLowerCase()){
case "boolean":
dataType="Int64";break;
case "TINYINT":
dataType="Int64";break;
case "SMALLINT":
dataType="Int64";break;
case "int":
dataType="Int64";break;
case "bigint":
dataType="Int64";break;
case "float":
dataType="Float64";break;
case "double":
dataType="Float64";break;
case "decimal":
dataType="Float64";break;
case "string":
dataType="String";break;
case "datetime":
dataType="String";break;
case "timestamp":
dataType="String";break;
default:
dataType="999999999";
}
return dataType;
}
public static String getMysqlParseType(String oriDataType){
String dataType = null;
if(Pattern.matches(".*varchar.*",oriDataType)
||Pattern.matches(".*datetime.*",oriDataType)
||Pattern.matches(".*time.*",oriDataType)
||Pattern.matches(".*text.*",oriDataType)
){
dataType="String";
}else if(Pattern.matches(".*bigint.*.*unsigned.*",oriDataType)
|| Pattern.matches(".*tinyint.*.*unsigned.*",oriDataType)
|| Pattern.matches(".*int.*.*unsigned.*",oriDataType)
){
dataType="UInt64";
}else if(Pattern.matches(".*bigint.*",oriDataType)
|| Pattern.matches(".*tinyint.*",oriDataType)
|| Pattern.matches(".*int.*",oriDataType)
){
dataType="Int64";
}else if(Pattern.matches(".*float.*",oriDataType)
|| Pattern.matches(".*double.*",oriDataType)
|| Pattern.matches(".*decimal.*",oriDataType)
){
dataType="Float64";
}else {
dataType="9999999999999";
}
return dataType;
}
}
2,创建hive的udf函数
create function mysqlinfo as 'com.crgt.Mysql2CKGenericUDF' using jar 'hdfs:///hiveudf/hiveudf_3.8-jar-with-dependencies.jar';
3,在hive上执行语句
select mysqlinfo("jdbc:mysql://10.xx.xx.xx:3306/order_center?user=user123&password=passwd123",'mysql_table_name');
4,执行结构如下:
========================mysql转成ck的建表语句 mysql_table_name ==================
==============mysql原表的格式=============
CREATE TABLE `mysql_table_name` (
`id` varchar(25) NOT NULL COMMENT '自定义主键',
`order_id` varchar(25) NOT NULL COMMENT '',
`p_order_id` varchar(25) NOT NULL COMMENT '',
`user_no` varchar(25) NOT NULL COMMENT '用户id',
`wx_openid` varchar(100) DEFAULT NULL COMMENT '',
`machine_id` varchar(50) DEFAULT NULL COMMENT '',
`scene_type` tinyint(2) DEFAULT NULL COMMENT '',
`order_type` tinyint(2) DEFAULT NULL COMMENT '',
`merchant_shop_id` varchar(50) DEFAULT NULL COMMENT '',
`shop_name` varchar(50) DEFAULT NULL COMMENT '',
`site_name` varchar(50) DEFAULT NULL COMMENT '车站',
`run_no` varchar(10) DEFAULT NULL COMMENT '',
`section_no` varchar(5) DEFAULT NULL COMMENT '',
`seat` varchar(10) DEFAULT NULL COMMENT '',
`opening_time` datetime DEFAULT NULL COMMENT '',
`riding_time` datetime DEFAULT NULL COMMENT '',
`delivery_type` tinyint(2) DEFAULT NULL COMMENT ',0:自取,1:送达',
`user_name` varchar(50) DEFAULT NULL COMMENT '用户名',
`brands_name` varchar(200) DEFAULT NULL COMMENT '品牌名',
`mobile` varchar(50) DEFAULT NULL COMMENT '',
`invoice_type` tinyint(2) DEFAULT NULL COMMENT '0:不开发票,1:开票',
`delivery_range` varchar(50) DEFAULT NULL COMMENT '',
`refund_expire_time` datetime DEFAULT NULL COMMENT '',
`box_count` int(5) DEFAULT NULL COMMENT '',
`ware_count` int(5) DEFAULT NULL COMMENT '',
`remark` varchar(150) DEFAULT NULL COMMENT '备注',
`delivery_start_time` datetime DEFAULT NULL COMMENT '',
`delivery_end_time` datetime DEFAULT NULL COMMENT '',
`create_time` datetime DEFAULT NULL COMMENT '创建时间',
`update_time` datetime DEFAULT NULL COMMENT '修改时间',
PRIMARY KEY (`id`,`user_no`) USING BTREE,
KEY `idx_order_id` (`order_id`,`user_no`) USING BTREE,
KEY `idx_order_pid` (`p_order_id`,`user_no`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='点餐表' shardkey=user_no
==============请确定主键和date类型的列,并把Nullable去掉=============
CREATE TABLE rt_dwd.mysql_table_name on cluster crm_4shards_1replicas (
`id` String,
`order_id` Nullable(String),
`p_order_id` Nullable(String),
`user_no` Nullable(String),
`wx_openid` Nullable(String),
`machine_id` Nullable(String),
`scene_type` Nullable(Int64),
`order_type` Nullable(Int64),
`merchant_shop_id` Nullable(String),
`shop_name` Nullable(String),
`site_name` Nullable(String),
`run_no` Nullable(String),
`section_no` Nullable(String),
`seat` Nullable(String),
`opening_time` Nullable(String),
`riding_time` Nullable(String),
`delivery_type` Nullable(Int64),
`user_name` Nullable(String),
`brands_name` Nullable(String),
`mobile` Nullable(String),
`invoice_type` Nullable(Int64),
`delivery_range` Nullable(String),
`refund_expire_time` Nullable(String),
`box_count` Nullable(Int64),
`ware_count` Nullable(Int64),
`remark` Nullable(String),
`delivery_start_time` Nullable(String),
`delivery_end_time` Nullable(String),
`create_time` Nullable(String),
`update_time` Nullable(String),
`ck_update_date` Date
)ENGINE = MergeTree(ck_update_date, id, 8192);
CREATE TABLE rt_dwd.mysql_table_name_cls on cluster crm_4shards_1replicas (
`id` String,
`order_id` Nullable(String),
`p_order_id` Nullable(String),
`user_no` Nullable(String),
`wx_openid` Nullable(String),
`machine_id` Nullable(String),
`scene_type` Nullable(Int64),
`order_type` Nullable(Int64),
`merchant_shop_id` Nullable(String),
`shop_name` Nullable(String),
`site_name` Nullable(String),
`run_no` Nullable(String),
`section_no` Nullable(String),
`seat` Nullable(String),
`opening_time` Nullable(String),
`riding_time` Nullable(String),
`delivery_type` Nullable(Int64),
`user_name` Nullable(String),
`brands_name` Nullable(String),
`mobile` Nullable(String),
`invoice_type` Nullable(Int64),
`delivery_range` Nullable(String),
`refund_expire_time` Nullable(String),
`box_count` Nullable(Int64),
`ware_count` Nullable(Int64),
`remark` Nullable(String),
`delivery_start_time` Nullable(String),
`delivery_end_time` Nullable(String),
`create_time` Nullable(String),
`update_time` Nullable(String),
`ck_update_date` Date
)ENGINE = Distributed(crm_4shards_1replicas,rt_dwd ,mysql_table_name,rand());
=============insert into ================
INSERT INTO rt_dwd.mysql_table_name_cls
select
id,
order_id,
p_order_id,
user_no,
wx_openid,
machine_id,
scene_type,
order_type,
merchant_shop_id,
shop_name,
site_name,
run_no,
section_no,
seat,
opening_time,
riding_time,
delivery_type,
user_name,
brands_name,
mobile,
invoice_type,
delivery_range,
refund_expire_time,
box_count,
ware_count,
remark,
delivery_start_time,
delivery_end_time,
create_time,
update_time,
if(update_time is null ,toDate(create_time),toDate(update_time))
FROM mysql('xxxx:3306', 'order_center', 'mysql_table_name', 'user123', 'passwd123') ;