from pyspark import SparkConf, SparkContext
import os
os.environ['PYSPARK_PYTHON'] = "D:/dev/python/python3.10.4/python.exe"
conf = SparkConf().setMaster("local[*]").setAppName("test_spark")
sc = SparkContext(conf=conf)
准备一个RDD
rdd_1 = sc.parallelize(["java python c", "云纹 术学 武理", "With great power comes great responsibility"])
将RDD数据中单词提取
a、使用map
rdd_2 = rdd_1.map(lambda x: x.split(" "))
print(rdd_2.collect())
# 关闭
sc.stop()
b、使用flatMap
rdd_2 = rdd_1.flatMap(lambda x: x.split(" "))
print(rdd_2.collect())
# 关闭
sc.stop()