最新 请自取谢谢

import java.io.{File, FileWriter, PrintWriter}

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

import scala.collection.mutable
import scala.util.parsing.json.JSON

/**
  * @author shkstart
  * @create 2020-08-11 22:34
  */
object demo {
  def main(args: Array[String]): Unit = {
      fun() // 进行添加操作
    // 在驱动端分发任务给执行器, 然后将执行器的结果拉到驱动端
        val conf: SparkConf = new SparkConf().setMaster("local[2]").setAppName("CreateRDD")
        val sc = new SparkContext(conf)
        val rdd1: RDD[String] = sc.textFile("data.json")
        import scala.util.parsing.json.JSON
        val rdd2: RDD[Option[Any]] = rdd1.map(JSON.parseFull)
        var rdd3: RDD[Map[String, Any]] = rdd2.map {
          case Some(x) => x.asInstanceOf[Map[String, Any]]
        }
        import scala.io.Source
        val lines: List[String] = Source.fromFile("next.txt").getLines().toList
        val score: mutable.HashMap[String, Double] = new mutable.HashMap[String, Double]()
        for (line <- lines) {
          var max: Double = Double.MinValue
          var m: String = null
          val words: Array[String] = line.split(",")
          if (words(0).length != 0) {
            val wBUffer: mutable.Buffer[String] = words.toBuffer
            if (!score.contains(wBUffer(1))) {
              score.put(wBUffer(1), 100)
            } else {
              score.put(wBUffer(1), score.get(wBUffer(1)).get - 10.0)
            }
            wBUffer.append(score.get(wBUffer(1)).get + "")

            rdd3 = rdd3.map {
              case map: Map[String, Any] if map.get("deviceId").get.asInstanceOf[String].equals(wBUffer(1)) => {
                //            // 需要重新分装的列表
                val temp: mutable.LinkedHashMap[String, Any] = mutable.LinkedHashMap[String, Any]()
                temp.put("deviceId", wBUffer(1))
                // 原先的用户列表
                var ll: List[Map[String, Any]] = map.get("lst").get.asInstanceOf[List[Map[String, Any]]]
                val tempMap: mutable.Buffer[Map[String, Any]] = ll.toBuffer // 单个lst
                val curr: mutable.Buffer[Map[String, Any]] = new mutable.ListBuffer[Map[String, Any]]()
                if (tempMap.length == 0) {
                  temp.put("lst", List(Map(("uid", wBUffer(0)), ("ts", wBUffer(2).toDouble), ("score", wBUffer(3).toDouble))))
                  if (map.get("guid") != None) {
                    if(map.get("guid").get.asInstanceOf[String].equals("  ")){
                      temp.put("guid", wBUffer(0))
                    }else{
                      temp.put("guid", map.get("guid").get)
                    }

                  }
                } else {

                  var flag: Boolean = false
                  for (i <- 0 until tempMap.length) { // 每个用户
                    if (tempMap(i).get("uid").get.equals(wBUffer(0))) {
                      val map1: mutable.Map[String, Any] = scala.collection.mutable.Map(tempMap(i).toSeq: _*)
                      map1.put("ts", wBUffer(2))
                      var value: Double = 0.0
                      if (tempMap(i).get("score").get.isInstanceOf[String]) {
                        value = tempMap(i).get("score").get.asInstanceOf[String].toDouble
                      } else {
                        value = tempMap(i).get("score").get.asInstanceOf[Double]
                      }
                      map1.put("score", wBUffer(3).toDouble + value)
                      curr.append(map1.toMap)
                      flag = true
                    } else {
                      curr.append(tempMap(i))
                    }
                    if (max < curr(i).get("score").get.asInstanceOf[Double]) {
                      max = curr(i).get("score").get.asInstanceOf[Double]
                      m = curr(i).get("uid").get.asInstanceOf[String]
                    }
                  }
                  if (!flag) {
                    curr.append(Map(("uid", wBUffer(0)), ("ts", wBUffer(2)), ("score", wBUffer(3))))
                  }
                  temp.put("lst", curr.toList)
                  if (map.get("guid").get.asInstanceOf[String].charAt(0) == 'd') {
                    temp.put("guid", map.get("guid").get)
                  }
                  if (!temp.contains("guid")) {
                    temp.put("guid", m)
                  }
                }
                temp.toMap
              }
              case map: Map[String, Any] => map
            }
          }
        }
        rdd3.collect().foreach(println)
  }


  def fun(): Unit = {
    import scala.io.Source
    val strings: Iterator[String] = Source.fromFile("data.json").getLines()
    val so: mutable.HashSet[String] = mutable.HashSet[String]() // 一开始json文件
    while (strings.hasNext) {
      val option: Option[Any] = JSON.parseFull(strings.next())
      option match {
        case Some(label: Map[String, Any]) => {
          so.add(label.get("deviceId").get.asInstanceOf[String])
        }
      }
    }
    val writer: FileWriter = new FileWriter(new File("data.json"), true)
    val strings1: Iterator[String] = Source.fromFile("next.txt").getLines()
    while (strings1.hasNext) {
      val str: Array[String] = strings1.next().split(",")
      if (!so.contains(str(1))) {

        var s : String = "{\"deviceId\":\"" + str(1) + "\",\"lst\":[], \"guid\":\"  \"}"

        println(s)
        writer.write("\n" + s);
        so.add(str(1))
      }
    }
    writer.close()
  }
}

pom依赖

<dependencies>
        <dependency>
            <groupId>org.apache.sparkgroupId>
            <artifactId>spark-core_2.11artifactId>
            <version>2.1.1version>
        dependency>
    dependencies>

你可能感兴趣的:(大数据,下载,spark)