spark之word2vec使用(python)

from pyspark import SparkConf, SparkContext,SQLContext
from pyspark.sql import SparkSession
from pyspark.ml.feature import Word2Vec,CountVectorizer

conf = SparkConf().setAppName("yjs_rec")
sc = SparkContext(conf=conf)
sqlContext=SQLContext(sc)

'''
documentDF = sqlContext.createDataFrame([
("Hi I heard about Spark".split(" "),),
("I wish Java could use case classes".split(" "),),
("Logistic regression models are neat".split(" "),)
], ["text"])
'''
spark_df = sqlContext.createDataFrame(documentDF)
#word2vec
word2Vec = Word2Vec(vectorSize=100, minCount=0, inputCol="words", outputCol="result")
model = word2Vec.fit(spark_df)
result = model.transform(spark_df)
result.select("result").show()

你可能感兴趣的:(python,spark)