如何使用PyCharm编写Spark程序(pyspark)

import os
import sys

# Path for spark source folder
os.environ['SPARK_HOME'] = "/Users/dustinchen/Documents/APP/spark-1.6.1-bin-hadoop2.6"

# You might need to enter your local IP
# os.environ['SPARK_LOCAL_IP']="192.168.2.138"

# Path for pyspark and py4j
sys.path.append("/Users/dustinchen/Documents/APP/spark-1.6.1-bin-hadoop2.6/python")
sys.path.append("/Users/dustinchen/Documents/APP/spark-1.6.1-bin-hadoop2.6/python/lib/py4j-0.9-src.zip")

try:
    from pyspark import SparkContext
    from pyspark import SparkConf

    print ("Successfully imported Spark Modules")
except ImportError as e:
    print ("Can not import Spark Modules", e)
    sys.exit(1)

sc = SparkContext('local')
words = sc.parallelize(["scala", "java", "hadoop", "spark", "akka"])
print(words.count())

你可能感兴趣的:(Spark)