sparksql 从oracle读取数据然后整合到elasticsearch

pom.xm

xml version="1.0" encoding="UTF-8"?>
xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    4.0.0

    com.ftms.spark
    ftms
    1.0-SNAPSHOT
    
        1.7
        1.7
        UTF-8
        2.11.8
        2.0.0
        2.7.3
    
    
        
            org.scala-lang
            scala-library
            ${scala.version}
        
    
    
    
    
    
        
            org.scala-lang
            scala-xml
            2.11.0-M4
        
        
            org.slf4j
            slf4j-api
            1.6.6
        
        
            org.slf4j
            slf4j-log4j12
            1.6.6
        
        
            log4j
            log4j
            1.2.16
        

        
            org.apache.spark
            spark-core_2.11
            ${spark.version}
        

        
            org.apache.hadoop
            hadoop-client
            ${hadoop.version}
        

        
            org.apache.spark
            spark-sql_2.11
            ${spark.version}
        

        

        
            org.apache.hbase
            hbase-server
            1.2.4
        
        
        
        
            mysql
            mysql-connector-java
            5.1.38
        
        

        
            
            
            
            
                
                    
                    
                
                
                    
                    
                
                
                    
                    
                
                
                    
                    
                
            
        
        
            com.oracle
            ojdbc6
            11.2.0.3
        
        
            org.elasticsearch
            elasticsearch-spark-20_2.10
            5.5.1
        
    
    
    
        
            
                net.alchim31.maven
                scala-maven-plugin
                3.2.2
            
            
                org.apache.maven.plugins
                maven-compiler-plugin
                3.5.1
            
        
    
    
        
            net.alchim31.maven
            scala-maven-plugin
            
                
                    scala-compile-first
                    process-resources
                    
                        add-source
                        compile
                    
                
                
                    scala-test-compile
                    process-test-resources
                    
                        testCompile
                    
                
            
        

        
            org.apache.maven.plugins
            maven-compiler-plugin
            
                
                    compile
                    
                        compile
                    
                
            
        

        
            org.apache.maven.plugins
            maven-shade-plugin
            2.4.3
            
                
                    package
                    
                        shade
                    
                    
                        
                            
                                *:*
                                
                                    META-INF/*.SF
                                    META-INF/*.DSA
                                    META-INF/*.RSA
                                
                            
                         
                    
                
            
        
    
    

package com.ftms.scala.result

import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.{DataFrame, Dataset, Row, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
import org.elasticsearch.spark.sql.EsSparkSQL
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}

/**
  * Created by othc on 2017/10/10.
  */
object Test {
  def main(args: Array[String]): Unit = {
    Logger.getLogger("org").setLevel(Level.ERROR)
    val conf = new SparkConf().setAppName("import").setMaster("local[2]")
      .set("spark.sql.warehouse.dir","file:///C:/WorkSpace/IdeaProjects/FTMS_MAVEN/spark-warehouse")
//      .set("es.index.auto.create", "true")
      .set("es.nodes", "192.168.1.212:9200")
      .set("es.mapping.id", "VINNO")
    val sc = new SparkContext(conf)
    val sqlContext = new SQLContext(sc)
    import sqlContext.implicits._

    //车辆
    val jdbcMapv = Map("url" -> "jdbc:oracle:thin:@//192.168.1.212:1521/ftms",
      "user" -> "ftms",
      "password" -> "ftms",
      "dbtable" -> "MTD_VEHICLE",
      "driver" -> "oracle.jdbc.driver.OracleDriver")
    val jdbcDFv = sqlContext.read.options(jdbcMapv).format("jdbc").load
    jdbcDFv.registerTempTable("vehicle")
    val data: DataFrame = sqlContext.sql("select * from vehicle")
    println(data.count())

    //联系人信息
    val jdbcMapm = Map("url" -> "jdbc:oracle:thin:@//192.168.1.212:1521/ftms",
      "user" -> "ftms",
      "password" -> "ftms",
      "dbtable" -> "MTD_CONTACTINFORMATION",
      "driver" -> "oracle.jdbc.driver.OracleDriver")
    val jdbcDFm = sqlContext.read.options(jdbcMapm).format("jdbc").load
    jdbcDFm.registerTempTable("information")
    val info: DataFrame = sqlContext.sql("select * from information")

    //join
    val join: DataFrame = sqlContext.sql(
      """
        |select v.*,concat_ws(',', collect_set(n.CONTACTDETAIL)) as CONTACTDETAIL from  vehicle v
        |left join (
        |select m.* from information m where m.CONTACTTYPE in ('1','2','3','4')
        |) n on v.customerid=n.customerid
        |group by v.VEHICLEID,v.VINNO,v.CUSTOMERID,v.VEHICLETYPE,v.VEHICLEUSETYPE,v.VEHICLENAMECODE,
        |  v.VEHICLENAME,v.VEHICLEGRADE,v.MISSION,v.MODEL,v.MODELYEAR,v.SFX,v.COLORCODE,v.COLORNAME,
        |  v.ENGINENO,v.EXHAUST,v.REGISTERNO,v.BUYDATE,v.SALESDEALERCODE,v.DEALERCODE,v.BILLTIME,v.BILLPRICE,
        |  v.BILLNUMBER,v.DELIVERYENDDATE,v.BODYPRICE,v.BODYCUT,v.BODYPAY,v.TAXRATE,v.TAX,v.REMARK,v.LOANSTATE,
        |  v.LOANGIVEOUTTIME,v.LOANORGRAN,v.PRODUCTYPE,v.YEARRATE,v.DOWNPAYMENT,v.LOANAMOUNT,v.INVALIDDATA,
        |  v.CREDATE,v.CREATEUSER,v.UPDATETIME,v.UPDATEUSER,v.SALESDATE,v.CANCELDATE,v.BUYERID,
        |  v.LOANPERIED,v.ISTTIME,v.UPTTIME,v.DELTIME
      """.stripMargin
    )

//    join.rdd.repartition(1).saveAsTextFile("C:\\data\\ttttt")

    val schema: StructType = join.schema
    println(schema)

    val res = join.select(join.col("VEHICLEID").cast(StringType).as("vehicleid"),
      join.col("VINNO").as("vinno"),join.col("CUSTOMERID").as("customerid"),join.col("VEHICLETYPE").as("vehicletype"),
      join.col("VEHICLEUSETYPE").as("vehicleusetype"),join.col("VEHICLENAMECODE").as("vehiclenamecode"),join.col("VEHICLENAME").as("vehiclename"),
      join.col("VEHICLEGRADE").as("vehicleusetype"),join.col("MISSION").as("mission"),join.col("MODEL").as("model"),
      join.col("MODELYEAR").as("modelyear"),join.col("SFX").as("sfx"),join.col("COLORCODE").as("colorcode"),join.col("COLORNAME").as("colorname"),
      join.col("ENGINENO").as("engineno"),join.col("EXHAUST").as("exhaust"),join.col("REGISTERNO").as("registerno"),join.col("BUYDATE").as("buydate"),
      join.col("SALESDEALERCODE").as("salesdealercode"),join.col("DEALERCODE").as("dealercode"),join.col("BILLTIME").as("billtime"),join.col("BILLPRICE").cast(StringType).as("billprice"),
      join.col("BILLNUMBER").as("billnumber"),join.col("DELIVERYENDDATE").as("deliveryendddate"),join.col("BODYPRICE").cast(IntegerType).as("bodyprice"),join.col("BODYCUT").cast(IntegerType).as("bodycut"),
      join.col("BODYPAY").cast(IntegerType).as("bodypay"),join.col("TAXRATE").cast(IntegerType).as("taxrate"),join.col("TAX").cast(IntegerType).as("tax"),join.col("REMARK").as("remark"),
      join.col("LOANSTATE"),join.col("LOANGIVEOUTTIME"),join.col("LOANORGRAN"),join.col("PRODUCTYPE"),
      join.col("YEARRATE").cast(IntegerType),join.col("DOWNPAYMENT").cast(IntegerType),join.col("LOANAMOUNT").cast(IntegerType),join.col("INVALIDDATA"),
      join.col("CREDATE"),join.col("CREATEUSER"),join.col("UPDATETIME"),join.col("UPDATEUSER"),
      join.col("SALESDATE"),join.col("CANCELDATE"),join.col("BUYERID"),join.col("LOANPERIED"),
      join.col("ISTTIME"),join.col("UPTTIME"),join.col("DELTIME"),join.col("CONTACTDETAIL")
    )

    EsSparkSQL.saveToEs(res,"mtd_vehicle_order_history/mtd_vehicle_type")
    sc.stop()







  }
}

你可能感兴趣的:(spark)