根据JsonSchema生成阿里云ClickHouse建表语句

 

mybatis对ClickHouse的支持不是很好,JPA又没有对应的方言,做数仓需要大批量解析JSON数据,希望能自动根据JSONSchema定义来自动建表,节省工作量,并且不用为了复杂JSON建对象,还要服务发版。这个工具应用场景可能不是那个多,希望能帮到你,自己写也挺麻烦的。如果想要获取根据JSON数据生成Insert语句的同学请查看下一篇文章

 

生成规则

1、表数量与JsonSchema的properties节点数保持一致

2、数据类型转换:

schemaType

clickHouseType

string

String

integer

Int64

boolean

String

number

Decimal64(8)

3、关联

子父节点通过自定义ID关联,每个节点会自动生成 表名 + jzx_id 字段,并且作为主键

子节点中会保存父节点的主键用来做关联映射

如表:crm_custom_list_fields_field_fieldScope

该表中会存在自己的主键:crm_custom_list_fields_field_fieldScope_jzx_id

还会存在父级节点的主键:crm_custom_list_fields_field_jzx_id

4、表名定义

表名 = 数据源 + _ + 数据场景 + _ + json节点key通过下划线拼接

如: crm_custom_list_fields_field_fieldScope

其中 crm为数据源, custom为数据源中的一个数据场景,list_fields_field_fieldScope为JSON数据的节点

5、clickhouse对大小写敏感,对于json中的key不做任何处理,原key值直接用于表名或字段名拼接

6、jsonSchema中的required定义会将数据库建表字段设置为不可为空

7、每张表都会自动创建jzx_create_date字段,保存数据插入的时间,精确到秒,并且根据该时间通过toYYYYMM()函数进行数据分区

 

如果你用mybatis的话建议最后生成的SQL这么执行:

    @Select(" ${sql} ")
    void execSql(@Param("sql") String sql);

 

代码

    private static final String SCHEMA_TYPE = "type";
    private static final String SCHEMA_TYPE_OBJECT = "object";
    private static final String SCHEMA_TYPE_ARRAY = "array";
    private static final String SCHEMA_TYPE_NULL = "null";
    private static final String SCHEMA_TYPE_INTEGER = "integer";
    private static final String SCHEMA_TYPE_NUMBER = "number";
    private static final String SCHEMA_PROPERTIES = "properties";
    private static final String SCHEMA_ITEMS = "items";
    private static final String SCHEMA_REQUIRED = "required";
    private static final Map typeConversion = new HashMap<>(16);
    static {
        typeConversion.put("string", " String ");
        typeConversion.put("integer", " Int64 ");
        typeConversion.put("boolean", " String ");
        typeConversion.put("number", " Decimal64(8) ");
    }


/**
     * JSONSchema转ClickHouse建表SQL
     * 一个JSONSchema会转为多个建表SQL
     * @param source 数据源
     * @param scene 数据源场景
     * @param schema JSONSchema
     * @param mustTable 必须的表列表 如果为空则返回所有
     * @return SQL表
     */
    public static List jsonSchema2CreateTableSql(String source, String scene, String schema, List mustTable) {
        JSONObject schemaNode = JSON.parseObject(schema);
        String nodeType = schemaNode.getString(SCHEMA_TYPE);
        String rootTableName = source + "_" + scene;
        if (SCHEMA_TYPE_OBJECT.equals(nodeType)) {
            return jsonObj2CreateTableSql(rootTableName, null, schemaNode.getJSONObject(SCHEMA_PROPERTIES), schemaNode.getJSONArray(SCHEMA_REQUIRED), mustTable);
        } else if (SCHEMA_TYPE_ARRAY.equals(nodeType)) {
            return jsonArray2CreateTableSql(rootTableName, null, schemaNode.getJSONObject(SCHEMA_ITEMS), mustTable);
        }
        throw new BusinessErrException("Unable to resolve JSONSchema!");
    }

    /**
     * JSONSchema Object类型转ClickHouse建表SQL
     * @param tableName 节点表名
     * @param parentIdName 父节点主键名称
     * @param properties 节点字段表
     * @param required 必填字段表
     * @return
     */
    private static List jsonObj2CreateTableSql(String tableName, String parentIdName, JSONObject properties, JSONArray required, final List mustTable) {
        if (properties == null) {
            return new ArrayList<>();
        }
        List result = new ArrayList<>();
        String tableKey = tableName + "_jzx_id";
        StringBuilder sql = new StringBuilder("CREATE TABLE ")
                .append(tableName)
                .append(" ON CLUSTER default ( ")
                .append(tableKey)
                .append(" String, ");
        if (parentIdName != null) {
            sql.append(parentIdName)
                    .append(" String, ");
        }
        Set keySet = properties.keySet();
        for (String key : keySet) {
            JSONObject typeObj = properties.getJSONObject(key);
            String type = typeObj.getString(SCHEMA_TYPE);
            if (StringUtils.isEmpty(type)) {
                continue;
            }
            switch (type) {
                case SCHEMA_TYPE_OBJECT:
                    result.addAll(jsonObj2CreateTableSql(tableName + "_" + key, tableKey, typeObj.getJSONObject(SCHEMA_PROPERTIES), typeObj.getJSONArray(SCHEMA_REQUIRED), mustTable));
                    continue;
                case SCHEMA_TYPE_ARRAY:
                    result.addAll(jsonArray2CreateTableSql(tableName + "_" + key, tableKey, typeObj.getJSONObject(SCHEMA_ITEMS), mustTable));
                    continue;
                case SCHEMA_TYPE_NULL:
                    continue;
            }

            sql.append(key);
            if (!required.contains(key)) {
                sql.append(" Nullable(");
            }
            String s = typeConversion.get(type);
            if (StringUtils.isEmpty(s)) {
                throw new BusinessErrException("Unrecognized type:" + type);
            }
            sql.append(s);

            if (!required.contains(key)) {
                sql.append(")");
            }
            sql.append(", ");

        }
        sql.append(" jzx_create_date DateTime ");
        sql.append(") ENGINE = ReplicatedMergeTree( " +
                "           '/clickhouse/tables/"+ tableName +"/{shard}', " +
                "           '{replica}') " +
                "      PARTITION BY toYYYYMM(jzx_create_date) " +
                "      ORDER BY (intHash32(jzx_create_date), "+ tableKey +") " +
                "      SETTINGS index_granularity = 8192;");
        if (CollectionUtils.isEmpty(mustTable) || mustTable.contains(tableName)) {
            result.add(sql.toString());
        }
        return result;
    }

    /**
     * JSONSchema Array类型转ClickHouse建表SQL
     * @param tableName 该节点要建的表名
     * @param parentIdName  父节点的主键列名
     * @param items 字段表
     * @return
     */
    private static List jsonArray2CreateTableSql(String tableName, String parentIdName, JSONObject items, List mustTable) {
        if (items == null) {
            return new ArrayList<>();
        }
        String type = items.getString(SCHEMA_TYPE);
        if (StringUtils.isEmpty(type)) {
            return new ArrayList<>();
        }
        switch (type) {
            case SCHEMA_TYPE_ARRAY:
                return jsonArray2CreateTableSql(tableName, parentIdName, items.getJSONObject(SCHEMA_ITEMS), mustTable);
            case SCHEMA_TYPE_OBJECT:
                return jsonObj2CreateTableSql(tableName, parentIdName, items.getJSONObject(SCHEMA_PROPERTIES), items.getJSONArray(SCHEMA_REQUIRED), mustTable);
            case SCHEMA_TYPE_NULL:
                return new ArrayList<>();
            default:
                throw new BusinessErrException("Unrecognized type:" + type);
        }
    }

 

你可能感兴趣的:(JAVA,数据库,java,数据库,clickHouse,数仓)