flink1.13 table api 查询hive数据,及腾讯云cos存储

前言

在调试flink table api 查询 hive数据的时候,真的是遇到很多坑,特别是要hive存储的数据是在腾讯云的cos上,而且我是跨集群查询数据,要解决各种依赖和环境问题,下面的代码和pom.xml 已经调试成功,在本地和集群 on yarn都可以运行,本地的时候需要在idea里面加args为dev,集群 on yarn不用加。

代码

package com.bigdata.etl

import org.apache.flink.table.api.{EnvironmentSettings, TableEnvironment}
import org.apache.flink.table.catalog.hive.HiveCatalog
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment

object FlinkTableTest extends App {

//本地idea环境需要设置用户为hadoop
  System.setProperty("HADOOP_USER_NAME", "hadoop")
  val settings = EnvironmentSettings.newInstance().useBlinkPlanner().build()
  println(settings.isStreamingMode)
  val stenv = TableEnvironment.create(settings)
  //查询默认的元数据
  stenv.executeSql("show catalogs").print()
  stenv.useCatalog("default_catalog")
  stenv.executeSql("show databases").print()
  stenv.executeSql("select 1").print()
  println("-----fengexian--------------")

  val name = "hive"
  val defaultDatabase = "odl"
  var hiveConfDir = ""
  var hadoopConf=""
  val hiveVision = "2.3.6"
  if (args.size > 0) {
    hiveConfDir = "/Users/duzhixin/Documents/flink-hive-conf"
    hadoopConf="/Users/duzhixin/Documents/flink-hive-conf"
  } else {
    hiveConfDir = "/usr/local/service/hive/conf"
  }

 //在这里把hive,hadoop的配置文件加进去,本地idea环境需要制定hive和hadoop的配置文件,集群上不用制定hadoop的配置
  val hive = new HiveCatalog(name, defaultDatabase, hiveConfDir,hadoopConf,hiveVision)
  
  hive.getHiveConf.set("streaming-source.enable ", "true")
  stenv.getConfig.getConfiguration.setString("streaming-source.enable ", "true")
  stenv.getConfig.getConfiguration.setString("table.exec.hive.infer-source-parallelism.max ", "10000")
  stenv.getConfig.getConfiguration.setString("table.exec.hive.infer-source-parallelism ", "true")

  stenv.registerCatalog("hive", hive)

  // set the HiveCatalog as the current catalog of the session
  stenv.useCatalog("hive")

 //执行sql,查询hive的元数据
  stenv.executeSql("show databases").print()

  stenv.executeSql("show tables").print()

  stenv.executeSql("select 1 from test.app limit 1").print()
  
  stenv.executeSql("select * from odl.tb_book where dt='2021-06-05' limit 10").print()
  
 stenv.executeSql("select *   from odl.dadian where dt='2021-06-05' limit 10").print()

}

pom.xml




    

        
            cloudera
            https://repository.cloudera.com/artifactory/cloudera-repos/
        

        
            spring-plugin
            https://repo.spring.io/plugins-release/
        


    
    4.0.0

    org.jiashu
    flink-dw
    1.0-SNAPSHOT
    jar

    
        8
        8
        2.4.3
        2.12

        2.8.2
        1.2.14
        9.2.5.v20141112
        2.17
        1.8
        1.2.0
        2.3.6
        1.13.0
    
    

        src/main/scala

        flink-dw
        
            
                org.apache.maven.plugins
                maven-compiler-plugin
                3.5.1
                
                    1.8
                    1.8
                
            
            
                org.scala-tools
                maven-scala-plugin
                2.12
                
                    
                        
                            compile
                            testCompile
                        
                    
                
            


            
            
            
            
                org.apache.maven.plugins
                maven-jar-plugin
                
                    target/classes/
                    

                        
                            .
                        
                    
                
            
            
                org.apache.maven.plugins
                maven-dependency-plugin
                
                    
                        copy-dependencies
                        package
                        
                            copy-dependencies
                        
                        
                            jar
                            jar
                            
                            
                                ${project.build.directory}/lib
                            
                        
                    
                
            
        
    

    

        
            org.apache.spark
            spark-core_${scala.version}
            ${spark.version}
        


        
            org.apache.flink
            flink-clients_2.12
            ${flink.version}
        



        
        
            redis.clients
            jedis
            2.9.0
        




        
            org.apache.flink
            flink-connector-kafka_2.12
            ${flink.version}
        


        
            org.apache.kafka
            kafka-clients
            2.6.0
        



        


        
            org.apache.flink
            flink-table-api-scala-bridge_${scala.version}
            ${flink.version}

        

        
            org.apache.flink
            flink-table-planner-blink_${scala.version}
            ${flink.version}

        
        
            org.apache.flink
            flink-streaming-scala_${scala.version}
            ${flink.version}

        


        
            org.codehaus.jackson
            jackson-core-asl
            1.9.13
        


        
        
            com.alibaba
            fastjson
            1.2.47
        
        
        
            org.apache.flink
            flink-connector-hive_2.12
            1.13.1
        



        
            org.apache.flink
            flink-table-api-java-bridge_2.12
            1.13.1
        

        
        
            org.apache.hive
            hive-exec
            ${hive.version}
        


        



        
        
            org.apache.hadoop
            hadoop-auth
            2.8.5
        

        
            org.apache.hadoop
            hadoop-client
            2.8.5
        

        
        
            org.apache.hadoop
            hadoop-common
            2.8.5
        




        
        
            hadoop-util
            hadoop-util
            0.3.0
        


        
            com.qcloud
            cos_api
            5.6.42
        

        
        
        
            com.qcloud
            qcloud-java-sdk
            2.0.1
        

        
            commons-codec
            commons-codec
            1.11
        

        
        
            org.apache.commons
            commons-compress
            1.19
        



        
            org.apache.commons
            commons-lang3
            3.11
        
        
            org.apache.maven.plugins
            maven-resources-plugin
            2.4
        
        
        
            org.apache.maven.plugins
            maven-compiler-plugin
            3.8.1
        

        
            org.apache.maven.plugins
            maven-dependency-plugin
            2.8
        

        
            org.apache.maven.plugins
            maven-jar-plugin
            2.4
        
    


如果还不行就试试加入hadoop-cos-2.8.5-5.9.22.jar 这个依赖
flink1.13 table api 查询hive数据,及腾讯云cos存储_第1张图片

你可能感兴趣的:(大数据,flink,flink,hadoop,hive,大数据)