mybatis对ClickHouse的支持不是很好,JPA又没有对应的方言,做数仓需要大批量解析JSON数据,希望能自动根据JSONSchema定义来自动建表,节省工作量,并且不用为了复杂JSON建对象,还要服务发版。这个工具应用场景可能不是那个多,希望能帮到你,自己写也挺麻烦的。如果想要获取根据JSON数据生成Insert语句的同学请查看下一篇文章
1、表数量与JsonSchema的properties节点数保持一致
2、数据类型转换:
schemaType |
clickHouseType |
string |
String |
integer |
Int64 |
boolean |
String |
number |
Decimal64(8) |
3、关联
子父节点通过自定义ID关联,每个节点会自动生成 表名 + jzx_id 字段,并且作为主键
子节点中会保存父节点的主键用来做关联映射
如表:crm_custom_list_fields_field_fieldScope
该表中会存在自己的主键:crm_custom_list_fields_field_fieldScope_jzx_id
还会存在父级节点的主键:crm_custom_list_fields_field_jzx_id
4、表名定义
表名 = 数据源 + _ + 数据场景 + _ + json节点key通过下划线拼接
如: crm_custom_list_fields_field_fieldScope
其中 crm为数据源, custom为数据源中的一个数据场景,list_fields_field_fieldScope为JSON数据的节点
5、clickhouse对大小写敏感,对于json中的key不做任何处理,原key值直接用于表名或字段名拼接
6、jsonSchema中的required定义会将数据库建表字段设置为不可为空
7、每张表都会自动创建jzx_create_date字段,保存数据插入的时间,精确到秒,并且根据该时间通过toYYYYMM()函数进行数据分区
如果你用mybatis的话建议最后生成的SQL这么执行:
@Select(" ${sql} ")
void execSql(@Param("sql") String sql);
private static final String SCHEMA_TYPE = "type";
private static final String SCHEMA_TYPE_OBJECT = "object";
private static final String SCHEMA_TYPE_ARRAY = "array";
private static final String SCHEMA_TYPE_NULL = "null";
private static final String SCHEMA_TYPE_INTEGER = "integer";
private static final String SCHEMA_TYPE_NUMBER = "number";
private static final String SCHEMA_PROPERTIES = "properties";
private static final String SCHEMA_ITEMS = "items";
private static final String SCHEMA_REQUIRED = "required";
private static final Map typeConversion = new HashMap<>(16);
static {
typeConversion.put("string", " String ");
typeConversion.put("integer", " Int64 ");
typeConversion.put("boolean", " String ");
typeConversion.put("number", " Decimal64(8) ");
}
/**
* JSONSchema转ClickHouse建表SQL
* 一个JSONSchema会转为多个建表SQL
* @param source 数据源
* @param scene 数据源场景
* @param schema JSONSchema
* @param mustTable 必须的表列表 如果为空则返回所有
* @return SQL表
*/
public static List jsonSchema2CreateTableSql(String source, String scene, String schema, List mustTable) {
JSONObject schemaNode = JSON.parseObject(schema);
String nodeType = schemaNode.getString(SCHEMA_TYPE);
String rootTableName = source + "_" + scene;
if (SCHEMA_TYPE_OBJECT.equals(nodeType)) {
return jsonObj2CreateTableSql(rootTableName, null, schemaNode.getJSONObject(SCHEMA_PROPERTIES), schemaNode.getJSONArray(SCHEMA_REQUIRED), mustTable);
} else if (SCHEMA_TYPE_ARRAY.equals(nodeType)) {
return jsonArray2CreateTableSql(rootTableName, null, schemaNode.getJSONObject(SCHEMA_ITEMS), mustTable);
}
throw new BusinessErrException("Unable to resolve JSONSchema!");
}
/**
* JSONSchema Object类型转ClickHouse建表SQL
* @param tableName 节点表名
* @param parentIdName 父节点主键名称
* @param properties 节点字段表
* @param required 必填字段表
* @return
*/
private static List jsonObj2CreateTableSql(String tableName, String parentIdName, JSONObject properties, JSONArray required, final List mustTable) {
if (properties == null) {
return new ArrayList<>();
}
List result = new ArrayList<>();
String tableKey = tableName + "_jzx_id";
StringBuilder sql = new StringBuilder("CREATE TABLE ")
.append(tableName)
.append(" ON CLUSTER default ( ")
.append(tableKey)
.append(" String, ");
if (parentIdName != null) {
sql.append(parentIdName)
.append(" String, ");
}
Set keySet = properties.keySet();
for (String key : keySet) {
JSONObject typeObj = properties.getJSONObject(key);
String type = typeObj.getString(SCHEMA_TYPE);
if (StringUtils.isEmpty(type)) {
continue;
}
switch (type) {
case SCHEMA_TYPE_OBJECT:
result.addAll(jsonObj2CreateTableSql(tableName + "_" + key, tableKey, typeObj.getJSONObject(SCHEMA_PROPERTIES), typeObj.getJSONArray(SCHEMA_REQUIRED), mustTable));
continue;
case SCHEMA_TYPE_ARRAY:
result.addAll(jsonArray2CreateTableSql(tableName + "_" + key, tableKey, typeObj.getJSONObject(SCHEMA_ITEMS), mustTable));
continue;
case SCHEMA_TYPE_NULL:
continue;
}
sql.append(key);
if (!required.contains(key)) {
sql.append(" Nullable(");
}
String s = typeConversion.get(type);
if (StringUtils.isEmpty(s)) {
throw new BusinessErrException("Unrecognized type:" + type);
}
sql.append(s);
if (!required.contains(key)) {
sql.append(")");
}
sql.append(", ");
}
sql.append(" jzx_create_date DateTime ");
sql.append(") ENGINE = ReplicatedMergeTree( " +
" '/clickhouse/tables/"+ tableName +"/{shard}', " +
" '{replica}') " +
" PARTITION BY toYYYYMM(jzx_create_date) " +
" ORDER BY (intHash32(jzx_create_date), "+ tableKey +") " +
" SETTINGS index_granularity = 8192;");
if (CollectionUtils.isEmpty(mustTable) || mustTable.contains(tableName)) {
result.add(sql.toString());
}
return result;
}
/**
* JSONSchema Array类型转ClickHouse建表SQL
* @param tableName 该节点要建的表名
* @param parentIdName 父节点的主键列名
* @param items 字段表
* @return
*/
private static List jsonArray2CreateTableSql(String tableName, String parentIdName, JSONObject items, List mustTable) {
if (items == null) {
return new ArrayList<>();
}
String type = items.getString(SCHEMA_TYPE);
if (StringUtils.isEmpty(type)) {
return new ArrayList<>();
}
switch (type) {
case SCHEMA_TYPE_ARRAY:
return jsonArray2CreateTableSql(tableName, parentIdName, items.getJSONObject(SCHEMA_ITEMS), mustTable);
case SCHEMA_TYPE_OBJECT:
return jsonObj2CreateTableSql(tableName, parentIdName, items.getJSONObject(SCHEMA_PROPERTIES), items.getJSONArray(SCHEMA_REQUIRED), mustTable);
case SCHEMA_TYPE_NULL:
return new ArrayList<>();
default:
throw new BusinessErrException("Unrecognized type:" + type);
}
}