数据清洗

import org.apache.spark.sql.SparkSession
object DataCleaning {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("DataCleaning")
      .master("local[2]").getOrCreate()

    val access = spark.sparkContext.textFile("file:///f:/access.txt")

    access.foreach(println)

    access.map(line => { line
      val splits = line.split(" ");
      splits(0) //取出IP地址
    }).foreach(println)
    spark.stop()
  }
}

你可能感兴趣的:(数据清洗)