flink接入http数据写到hive

首先说明。需求很奇怪,但是无所谓了。

   
            org.apache.flink
            flink-streaming-java_2.11
            1.14.4
        
        
            org.apache.flink
            flink-table-api-java-bridge_2.11
            1.14.4
        
        
            org.apache.flink
            flink-table-planner_2.11
            1.14.4
        
        
            org.apache.flink
            flink-connector-hive_2.11
            1.14.4
        
        
            org.apache.flink
            flink-clients_2.11
            1.14.4
        
        
            cn.hutool
            hutool-all
            5.3.7
        
        
            org.apache.hive
            hive-exec
            2.1.1-cdh6.3.2
        
        
            org.apache.hadoop
            hadoop-client
            3.0.0-cdh6.3.2
            
        
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.SqlDialect;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.catalog.hive.HiveCatalog;

import java.util.List;

import static org.apache.flink.table.api.Expressions.$;

public class GetPayCardInfoJob {
    public static void main(String[] args) throws Exception {
        KerberosUtils.do_auth_hive();
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //获取http的数据转为datastream
        List cardPayMents = new HttpUtil2().doCall("http://xxxxxx/api/GetShuakaListByTime?startTime=2022-08-30&endTime=2022-08-31");
        DataStreamSource dataStreamSource = env.fromCollection(cardPayMents);
//获取tableEnv的运行环境
        StreamTableEnvironment tableEnv  = StreamTableEnvironment.create(env);

        String name            = "s2cluster";
        String defaultDatabase = "odsiadata";
//下面的hive目录放着cdh下载的hive-site core-site等xml
        String hiveConfDir     = "D:\\install\\code\\github\\dw_kpi2\\pay_card\\src\\main\\resources\\hive";
        //指定hadoop的位置,同上。否则能会报 unknown hosts 错误,能够show databases tables functions 但是不能select
        String hadoopConfDir     = "D:\\install\\code\\github\\dw_kpi2\\pay_card\\src\\main\\resources\\hive";
        String version ="2.1.1";
        HiveCatalog hive = new HiveCatalog(name, defaultDatabase, hiveConfDir,hadoopConfDir,version);
        tableEnv.registerCatalog("s2cluster", hive);
        tableEnv.useCatalog("s2cluster");
        tableEnv.getConfig().setSqlDialect(SqlDialect.HIVE);
        Table table = tableEnv.fromDataStream(dataStreamSource,
                $("staffName"),
                $("routeName"),
                $("city"),
                $("bancheType"),
                $("plateNumber"),
                $("shuakaType"),
                $("shuakaTime"),
                $("tradeType"),
                $("vendorName")
                );
//注意上面都指定了字段名, 如果你不指定,默认数据为raw,没有字段。
        tableEnv.executeSql("select * from "+table+" limit 10").print();
        tableEnv.createTemporaryView("httpTable",table);
        tableEnv.executeSql("select count(1) from test.card_payment");
//插入到hive表
        table.executeInsert("test.card_payment");
    }
}

这种不是特别好。因为http的数据一般很少,我这个是看班车的数据 一天差不多是3w多条。

你可能感兴趣的:(flink,hive,flink,http)