spark python例子

# -*- coding: utf-8 -*-

from __future__ import print_function
import sys
from operator import add

from pyspark import SparkContext

#./pyspark /home/yunshouhu/PycharmProjects/untitled/word.py /home/yunshouhu/PycharmProjects/untitled/data.txt
if __name__=="__main__":
    if len(sys.argv)!=2:
        print ("useg: word ",file=sys.stderr);
        exit(-1)

    sc=SparkContext(appName="myword")
    lines=sc.textFile(sys.argv[1],1)
    counts=lines.flatMap(lambda x:x.split(' ')).map(lambda x:(x,1)).reduceByKey(add)

    output=counts.collect();
    for (word,count) in output:
        print("来自中国: %s %i" % (word,count))
    sc.stop()

你可能感兴趣的:(python)