MyOracleDemo
package day0613
import java.sql.{Connection, DriverManager}
import org.apache.spark.rdd.JdbcRDD
import org.apache.spark.{SparkConf, SparkContext}
object MyJdbcRDDDemo {
val connection = () =>{
Class.forName("oracle.jdbc.OracleDriver").newInstance()
DriverManager.getConnection("jdbc:oracle:thin:@192.168.157.101:1521/orcl.example.com", "scott", "tiger")
}
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("My WebCount Demo").setMaster("local")
val sc = new SparkContext(conf)
val oracleRDD = new JdbcRDD(sc,connection,"select * from emp where sal>? and deptno=?",2000,10,1,r =>{
val ename = r.getString(2)
val sal = r.getInt(6)
(ename,sal)
})
val result = oracleRDD.collect()
println(result.toBuffer)
sc.stop()
}
}
package day0613
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import java.sql.Connection
import java.sql.DriverManager
import java.sql.PreparedStatement
object MyOracleDemo {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("MyWebLogDemo").setMaster("local")
val sc = new SparkContext(conf)
val rdd1 = sc.textFile("D:\\temp\\localhost_access_log.2017-07-30.txt").map{
line => {
val index1 = line.indexOf("\"")
val index2 = line.lastIndexOf("\"")
val str1 = line.substring(index1+1,index2)
val index3 = str1.indexOf(" ")
val index4 = str1.lastIndexOf(" ")
val str2 = str1.substring(index3+1, index4)
val jspName = str2.substring(str2.lastIndexOf("/")+1)
(jspName,1)
}
}
rdd1.foreachPartition(saveToOracle)
}
def saveToOracle(it:Iterator[(String,Int)]) ={
var conn:Connection = null
var pst:PreparedStatement = null
try{
conn = DriverManager.getConnection("jdbc:oracle:thin:@192.168.157.101:1521/orcl.example.com","scott", "tiger")
pst = conn.prepareStatement("insert into result values(?,?)")
it.foreach(data =>{
pst.setString(1, data._1)
pst.setInt(2,data._2)
pst.executeUpdate()
})
}catch{
case e1:Exception => e1.printStackTrace()
}finally{
if(pst != null) pst.close()
if(conn != null) conn.close()
}
}
}
Demo1
package day0615
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.types.IntegerType
import org.apache.spark.sql.types.StringType
import org.apache.spark.sql.Row
import org.apache.log4j.Logger
import org.apache.log4j.Level
//创建DataFrame的时候,指定schema
//
object SparkSQLDemo1 {
def main(args: Array[String]): Unit = {
Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
//创建SparkContext对象,采用另一种方式来创建
//通过SparkSession来创建SparkContext对象
val spark = SparkSession.builder().master("local").appName("SparkSQLDemo1").getOrCreate()
//通过SparkSession创建SparkContext,读入数据
val studentRDD = spark.sparkContext.textFile("d:\\temp\\students.txt").map {_.split(" ")}
//将RDD中的数据映射成Row
val rowRDD = studentRDD.map(student=>Row(student(0).toInt,student(1),student(2).toInt))
//使用StructType定义表结构Schema
val schema = StructType(List(
StructField("id",IntegerType,true),
StructField("name",StringType,true),
StructField("age",IntegerType,true)
))
//创建表DataFrame
val studentDF = spark.createDataFrame(rowRDD, schema)
//将DataFrame注册成表(视图)
studentDF.createOrReplaceTempView("student")
//执行SQL
spark.sql("select * from student order by age").show
//停止SparkSession
spark.stop()
}
}
Demo2
package day0615
import org.apache.spark.sql.SparkSession
import org.apache.log4j.Logger
import org.apache.log4j.Level
//定义一个case class来代表表的schema结构
case class Student(stuID:Int,stuName:String,stuAge:Int)
object SparkSQLDemo2 {
def main(args: Array[String]): Unit = {
Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
//创建SparkContext对象,采用另一种方式来创建
//通过SparkSession来创建SparkContext对象
val spark = SparkSession.builder().master("local").appName("SparkSQLDemo2").getOrCreate()
//通过SparkSession创建SparkContext,读入数据
val studentRDD = spark.sparkContext.textFile("d:\\temp\\students.txt").map {_.split(" ")}
//将数据的RDD和Case Class的schema关联
val dataRDD = studentRDD.map { data => Student(data(0).toInt,data(1),data(2).toInt) }
//把dataRDD转换成DataFrame,需要导入隐式转换规则
import spark.sqlContext.implicits._
val studentDF = dataRDD.toDF()
//注册表或者视图
studentDF.createOrReplaceTempView("student")
//执行SQL
//spark.sql("select * from student order by stuage").show
//查询分析年龄大于24的学生
spark.sql("select * from student where stuage > 24").show
spark.stop()
}
}
Demo3
package day0615
import org.apache.spark.sql.SparkSession
import org.apache.log4j.Logger
import org.apache.log4j.Level
import java.util.Properties
//定义一个case class来代表表的schema结构
case class Student1(stuID:Int,stuName:String,stuAge:Int)
object SparkSQLDemo3 {
def main(args: Array[String]): Unit = {
Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
//创建SparkContext对象,采用另一种方式来创建
//通过SparkSession来创建SparkContext对象
val spark = SparkSession.builder().master("local").appName("SparkSQLDemo2").getOrCreate()
//通过SparkSession创建SparkContext,读入数据
//val studentRDD = spark.sparkContext.textFile("d:\\temp\\students.txt").map {_.split(" ")}
//通过main方法参数传递进来
val studentRDD = spark.sparkContext.textFile(args(0)).map {_.split(" ")}
//将数据的RDD和Case Class的schema关联
val dataRDD = studentRDD.map { data => Student1(data(0).toInt,data(1),data(2).toInt) }
//把dataRDD转换成DataFrame,需要导入隐式转换规则
import spark.sqlContext.implicits._
val studentDF = dataRDD.toDF()
//注册表或者视图
studentDF.createOrReplaceTempView("student")
//查询分析年龄大于24的学生
val result = spark.sql("select * from student where stuage > 24")
//将结果保存到Oracle中
val props = new Properties
props.setProperty("user", "scott")
props.setProperty("password", "tiger")
result.write.jdbc("jdbc:oracle:thin:@192.168.157.101:1521/orcl.example.com",
"scott.mystudent", props)
spark.stop()
}
}