测试代码:
package make.zhangsheniMain
/**
* Hello world!
*
*/
import make.bean.CaseClass.{people, people_id}
import make.service.EsService
import make.tools.{DateUtils, PropertiesTool}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
object es_test {
def properties_test(): Unit = {
val username = PropertiesTool.getproperties("username", "jdbc.properties")
val password = PropertiesTool.getproperties("password", "jdbc.properties")
println(username, password)
}
def test1(spark: SparkSession) :Unit = {
val resrdd: RDD[people] = EsService.read4es(spark, "person/man").map(x => {
val persons = x._2.toMap
val name = persons.getOrElse("name", "").toString
val age = persons.getOrElse("age", 0).asInstanceOf[Int]
val date = DateUtils.format_date(persons.getOrElse("date", "").toString)
people(name, age, date)
})
// resrdd.foreach(println)
// EsService.save2es(resrdd, "people/man")
val df = EsService.read4es_df(spark, "person/man")
df.show()
println(df.schema)
df.createOrReplaceTempView("tmp")
var sql =
"""
|select name, age, from_unixtime(unix_timestamp(date,'EEE MMM dd HH:mm:ss zzz yyyy')) as date from tmp
""".stripMargin
spark.sql(sql).show()
}
def test2(spark: SparkSession) : Unit= {
val query =
s"""
{
| "query":{
| "match": {
| "name": "make"
| }
| }
|}
""".stripMargin
val resrdd = EsService.query4es(spark,query, "people/man")
resrdd.foreach(println)
}
def test3(spark: SparkSession) : Unit= {
val query =
s"""
{
| "query":{
| "match": {
| "name": "瓦力"
| }
| }
|}
""".stripMargin
//查询es,并写入到另外一个索引
val resrdd: RDD[people_id] = EsService.query4es(spark,query, "person/man").map(line =>{
val peopleid = line._1
val persons = line._2.toMap
val name = persons.getOrElse("name", "").toString
val age = persons.getOrElse("age", 0).asInstanceOf[Int]
val date = DateUtils.format_date(persons.getOrElse("date", "").toString)
people_id(peopleid, name, age, date)
})
resrdd.foreach(println)
EsService.save2es(resrdd, "people/man")
}
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.master("local[*]")
.appName("IndicServiceApp")
.config("spark.network.timeout","1200s")
.getOrCreate()
// test1(spark)
// test2(spark)
test3(spark)
}
case class person (id: Int, name: String, age: Int, date: String)
}
es服务配置代码:
package make.service
/**
* @Author: maketubu
* @Date: 2019/10/31 14:54
*/
import make.tools.PropertiesTool
import org.elasticsearch.spark.rdd.EsSpark
import make.bean.CaseClass._
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.elasticsearch.spark.sql.EsSparkSQL
object EsService {
val nodes = PropertiesTool.getproperties("es.nodes","es.properties")
val port = PropertiesTool.getproperties("es.port","es.properties")
val cluter_name = PropertiesTool.getproperties("cluster.name", "es.properties")
val essaveconfig = Map("es.nodes"-> nodes
,"es.port" -> port
,"es.write.operation" -> "upsert" //相同rowkey即更新数据
,"cluster.name" -> "my_app"
,"es.mapping.id" -> "peopleid" //对应的结构id字段名
,"spark.es.mapping.date.rich" -> "false")
val esconfig = Map("es.nodes"-> nodes
,"es.port" -> port
,"es.write.operation" -> "upsert"
,"cluster.name" -> "my_app")
//RDD[caseclass],caseclass的结构与index的结构对应
//如果在插入的时候指定id的列,则会存在自动生成的id的列可能为空的情况
def save2es(rdd:RDD[people_id], path: String):Unit={
// import spark.implicits._
// val df = rdd.toDF()
// EsSparkSQL.saveToEs(df,path,essaveconfig)
EsSpark.saveToEs(rdd, path,essaveconfig)
}
def read4es_df(spark:SparkSession, path: String) :DataFrame = {
val df: DataFrame = spark.sqlContext.read.format("org.elasticsearch.spark.sql").options(esconfig).load(path)
df
}
//返回的是(id, map)
def read4es(spark: SparkSession, path: String) :RDD[(String, collection.Map[String, AnyRef])]= {
val sc = spark.sparkContext
val resrdd = EsSpark.esRDD(sc, path, esconfig)
resrdd
}
def query4es(spark: SparkSession, query: String, path: String) :RDD[(String, collection.Map[String, AnyRef])]= {
val sc = spark.sparkContext
val resrdd = EsSpark.esRDD(sc, path, query, esconfig)
resrdd
}
}
这里的写入都是以case class 的形式写入的,还可以以map, json的形式写入详细请参考这个博客
https://www.iteblog.com/archives/1728.html#MapElasticSearch