spark+phoenix 通过jdbc读取表中的数据

废话不说,直接代码,解决燃煤之急

添加maven配置

   org.apache.phoenix
   phoenix-core
   ${phoenix.version}

  org.apache.phoenix
  phoenix-spark
  ${phoenix.version}
spark读取phoenix中的数据
package com.cctsoft.spark.offline;

import org.apache.spark.sql.AnalysisException;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

/**
 * Created with IntelliJ IDEA.
 * User: Kevin Liu
 * CreateDate: 2018/6/8 10:32
 * Description: 读取phoenix表中数据
 */
public class FaceCrashImsiJob {
    public static void main(String[] args)  throws AnalysisException {
        // $example on:init_session$
        SparkSession spark = SparkSession
                .builder()
                .appName("Java Spark SQL basic example")
                .config("spark.some.config.option", "some-value")
                .master("local")
                .getOrCreate();
        // $example off:init_session$

        runBasicDataFrameExample(spark);


        spark.stop();
    }


    private static void runBasicDataFrameExample(SparkSession spark) throws AnalysisException {

       

    /**
     * 获取4G数据
     * String tableName= "(select * from LTE_DATA where to_char(cap_time) >= '"+imsiDataStartTime+"' and to_char(cap_time) < '"+imsiDataEndTime+"') as LTE_DATA_FILTER";
     */
    String imsiTableName = "(select * from LTE_DATA where to_char(cap_time) >= '"+imsiDataStartTime+"' and to_char(cap_time) < '"+imsiDataEndTime+"') as LTE_DATA_FILTER";
    logger.info("imsiTableName:"+imsiTableName);
    Dataset df = spark.read().format("jdbc")
            .option("driver","org.apache.phoenix.jdbc.PhoenixDriver")
            .option("url","jdbc:phoenix:"+zookeeper+":2181")
            .option("dbtable",imsiTableName)
            .load();
    df.registerTempTable("lte_data_tmp");
    Dataset lteDataAll = spark.sql("select lte_dev_code,cap_time,imsi from lte_data_tmp order by cap_time desc");
    lteDataAll.show();



    }




}
通过spark写入数据到phoenix
package com.cctsoft.spark.offline
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.sql.Row

/**
  * Created with IntelliJ IDEA.
  * User: Kevin Liu
  * CreateDate: 2018/6/15 12:32
  * Description: 写入phoenix表数据
  */
object TestMain {
  def main(args: Array[String]): Unit = {
    batchSaveFaceImsi(null)
  }


  def batchSaveFaceImsi(imsiRdd: JavaRDD[Row]): Unit ={
    import org.apache.phoenix.spark._

    val rdd = imsiRdd.rdd.map(x=>{
      (x.get(0).toString+":"+x.get(1).toString,x.get(0).toString,x.get(1).toString)
    }).filter(f=>f._3.toString!=null && f._3.toString!="")

    rdd.foreach(println)

    rdd.saveToPhoenix(
        "RESIDENT_TMP",
        Seq("ID","DEVICE_MAC","IMSI"),
        zkUrl = Some("jdbc:phoenix:abigdataclient1:2181")
    )



  }

}

你可能感兴趣的:(hbase-phoenix资料)