sparkSQL实际应用

 提交代码包

/usr/local/spark/bin$ spark-submit --class "getkv" /data/chun/sparktes.jar

 

1、查询KV

 

import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
object kv{
  def main(args: Array[String]) {

    val conf = new SparkConf()
    val sc = new SparkContext(conf)
    val log=sc.textFile("hdfs://10.0.58.21:9000/falcon/2016/*/*/*.log")
    val rowRDD=log.map(line=>(line.split("\"message\":\"").last.split(" ").head.trim(),line.split("account: ").last.split(", args:").head))
    val k=rowRDD.filter({case(k,v) =>k.length==10 && !k.contains("TypeError:")}).filter({case(k,v)=>v.length==8})
    k.repartition(1).saveAsTextFile("file:////data/kv")
  }
}

 

2、关联MySQL

 #  spark-shell --driver-class-path /usr/local/spark/mysql/mysql.jar
val log=sc.textFile("hdfs://10.0.58.21:9000/falcon/2016/*/*/*.log") val rowRDD=log.map(line=>(line.split("\"message\":\"").last.split(" ").head.trim(),line.split("account: ").last.split(", args:").head)) val k=rowRDD.filter({case(k,v) =>k.length==10 && !k.contains("TypeError:")}).filter({case(k,v)=>v.length==7}) val s=k.toDF("date","No").registerTempTable("kv") val role=sqlContext.jdbc("jdbc:mysql://rdssw603u1t68figaia7.mysql.rds.aliyuncs.com:3306/falcon?user=wy_app&password=V0tkEIve2","role") val job=sqlContext.jdbc("jdbc:mysql://rdssw603u1t68figaia7.mysql.rds.aliyuncs.com:3306/falcon?user=wy_app&password=V0tkEIve2","job") val staff_and_job=sqlContext.jdbc("jdbc:mysql://rdssw603u1t68figaia7.mysql.rds.aliyuncs.com:3306/falcon?user=wy_app&password=V0tkEIve2","staff_and_job") val project=sqlContext.jdbc("jdbc:mysql://rdssw603u1t68figaia7.mysql.rds.aliyuncs.com:3306/falcon?user=wy_app&password=V0tkEIve2","project") val ro=role.toDF().registerTempTable("role") val jo=job.toDF().registerTempTable("job") val s=staff_and_job.toDF().registerTempTable("staff_and_job") val p=project.toDF().registerTempTable("project") val q=sqlContext.sql("SELECT project.`name`,project.`code`,staff_and_job .`staff_id` FROM project LEFT JOIN job ON project.`code`=job.`project_code` LEFT JOIN role ON job.`role_code`=role.`code` LEFT JOIN staff_and_job ON job.`id`=staff_and_job .`job_id` WHERE project.`is_spread`='1' AND role.`name`='人事专员' AND staff_and_job .`staff_id` IS NOT NULL") val q1=q.toDF("name","code","No").registerTempTable("p") val ed=sqlContext.sql("select p.name,count(distinct kv.No) from p join kv on p.No=kv.No group By p.name")

 

你可能感兴趣的:(sparkSQL实际应用)