spark读取csv写入csv

spark读取csv,写入csv

package daily

import handler.Transfrom
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Row, SaveMode, SparkSession}


object Data {

    def main(args: Array[String]): Unit = {
        //参数接收
        val Array(input, output) = args
        //val Array(input) = args
        //配置信息
        val conf = new SparkConf().setAppName("SparkSqlHello").setMaster("local[*]")
        //创建sparksession
        val sc = SparkSession.builder().config(conf).getOrCreate()
        //读取数据
        val data = sc.sqlContext.read.format("com.databricks.spark.csv")
          .option("header", "true") //在csv第一行有属性"true",没有就是"false"
          //.option("inferSchema", true.toString) //这是自动推断属性列的数据类型
          //.option("multiLine", true)
          .load(input)
        import sc.implicits._
        val frame = data.rdd.map(x => {
            val str = x.getAs[String]("Data")
            val ChV0 = x.getAs[String]("ChV0").toFloat
            val ChV1 = x.getAs[String]("ChV1").toFloat
            val ChAD0 = x.getAs[String]("ChAD0").toFloat
            val ChAD1 = x.getAs[String]("ChAD1").toFloat
            val ZeroV = x.getAs[String]("ZeroV").toFloat
            val Sensitivity = x.getAs[String]("Sensitivity").toFloat
            val straaa = str.substring(2, str.length)
            val ints = Transfrom.sixJieXi(straaa)
            val tuple = ints.splitAt(1000)
            val tuple2 = tuple._2.splitAt(1000)
            val tuple3 = tuple2._2.splitAt(1000)
            val tuple4 = tuple3._2.splitAt(1000)
            val tuple5 = tuple4._2.splitAt(1000)
            val tuple6 = tuple5._2.splitAt(1000)
            val tuple7 = tuple6._2.splitAt(1000)

            val str1 = jisuan(tuple._1, ChV1, ChV0, ChAD1, ChAD0, ZeroV, Sensitivity)
            val str2 = jisuan(tuple2._1, ChV1, ChV0, ChAD1, ChAD0, ZeroV, Sensitivity)
            val str3 = jisuan(tuple3._1, ChV1, ChV0, ChAD1, ChAD0, ZeroV, Sensitivity)
            val str4 = jisuan(tuple4._1, ChV1, ChV0, ChAD1, ChAD0, ZeroV, Sensitivity)
            val str5 = jisuan(tuple5._1, ChV1, ChV0, ChAD1, ChAD0, ZeroV, Sensitivity)
            val str6 = jisuan(tuple6._1, ChV1, ChV0, ChAD1, ChAD0, ZeroV, Sensitivity)
            val str7 = jisuan(tuple7._1, ChV1, ChV0, ChAD1, ChAD0, ZeroV, Sensitivity)
            val str8 = jisuan(tuple7._2, ChV1, ChV0, ChAD1, ChAD0, ZeroV, Sensitivity)

            new DataLogs(
                x.getAs("DataID"),
                x.getAs("DeviceNo"),
                x.getAs("Sensitivity"),
                x.getAs("Factor"),
                x.getAs("ChV0"),
                x.getAs("ChV1"),
                x.getAs("ChAD0"),
                x.getAs("ChAD1"),
                x.getAs("ZeroV"),
                x.getAs("TrigTime"),
                x.getAs("TrigTimeMs"),
                x.getAs("CollTime"),
                x.getAs("CollTimeMs"),
                x.getAs("LastCollTime"),
                x.getAs("Speed"),
                x.getAs("Overall"),
                x.getAs("SampleNumber"),
                x.getAs("UnitID"),
                x.getAs("DataLen"),
                x.getAs("Data"),
                x.getAs("DefBufLen"),
                x.getAs("DefBuf"),
                str1,
                str2,
                str3,
                str4,
                str5,
                str6,
                str7,
                str8
            )
        }).toDF("DataID", "DeviceNo", "Sensitivity", "Factor", "ChV0", "ChV1", "ChAD0", "ChAD1", "ZeroV", "TrigTime", "TrigTimeMs", "CollTime", "CollTimeMs", "LastCollTime", "Speed", "Overall", "SampleNumber", "UnitID", "DataLen", "Data", "DefBufLen", "DefBuf", "engineering1", "engineering2", "engineering3", "engineering4", "engineering5", "engineering6", "engineering7", "engineering8")
        frame.coalesce(1).write.mode(SaveMode.Append).format("com.databricks.spark.csv")
          .option("header", "true") //在csv第一行有属性"true",没有就是"false"
          // .option("delimiter",",")//默认以","分割
          .csv(output)

        sc.stop()
    }

    def jisuan(array: Array[Int], ChV1: Float, ChV0: Float, ChAD1: Float, ChAD0: Float, ZeroV: Float, Sensitivity: Float) = {
        array.map(xx => {
            val n = ((ChV1 - ChV0) * 1.0 / (ChAD1 - ChAD0) * (xx - ChAD0) + ChV0 - ZeroV) / Sensitivity
            n.formatted("%.4f")
        }).mkString(",")
    }
}

我这是想用Excel打开,把一个单元格8000数据分别放进8个单元格

你可能感兴趣的:(spark读取)