<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>3.1.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>3.1.2</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-hive -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.12</artifactId>
<version>3.1.2</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-graphx -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-graphx_2.12</artifactId>
<version>3.1.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java -->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.29</version>
</dependency>
object SparkDemo {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("sparkDemo")
val sc: SparkContext = SparkContext.getOrCreate(conf)
println(sc)
}
}
D:\environment\server\apache-maven-3.6.1\repository\org\apache\spark\spark-core_2.12\3.1.2\spark-core_2.12-3.1.2.jar!\org\apache\spark\log4j-defaults.properties
val spark: SparkSession = SparkSession.builder().master("local[*]").appName("SparkSessionDemo").getOrCreate()
println(spark)
val rdd: RDD[Int] = sc.parallelize(1 to 10)
rdd.collect().foreach(println)
val pNum: Int = rdd.getNumPartitions
println("分区数量:" + pNum)
Exception in thread “main“ java.lang.NoClassDefFoundError: org/apache/spark/sql/SparkSession$——红目香薰
val dataString: RDD[String] = sc.makeRDD(Array("hello world", "hello java", "hello scala", "hello spark", "hello hello"))
dataString.collect().foreach(println)
dataString.flatMap(x => x.split(" ")).map(x => (x,1)).reduceByKey(_+_).collect.foreach(println)
[root@cp145 opt]# spark-submit --class org.example.WordCount --master local[*] ./sparkstu-1.0-SNAPSHOT.jar
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setAppName("partitions").setMaster("local[5]")
val sc: SparkContext = SparkContext.getOrCreate(conf)
val rdd: RDD[Int] = sc.parallelize(1 to 20)
rdd.glom().collect.foreach(x => println(x.toList))
println("---------假分区----------")
val rdd2: RDD[Int] = rdd.coalesce(3,false)
rdd2.glom().collect.foreach(x => println(x.toList))
println("---------真分区----------")
val rdd3: RDD[Int] = rdd.coalesce(3,true)
rdd3.glom().collect.foreach(x => println(x.toList))
println("---------真分区----------")
val rdd4: RDD[Int] = rdd.repartition(3)
rdd4.glom().collect.foreach(x => println(x.toList))
}