import sys
sys.path.append("/home/mysql1/anqu/python/code")
reload(sys)
import config
sys.setdefaultencoding('utf8')
from pyspark import SparkContext, SparkConf
SparkAppName = "Anqu_serchAPP数据处理"
conf = SparkConf().setAppName(SparkAppName).setMaster("local")
sc = SparkContext(conf=conf)
from pyspark.sql import HiveContext
sqlContext = HiveContext(sc)
from pyspark.sql.functions import *
class sparkDF_searchAPP():
def __init__(self, database = "mysql_anqu_chi"):
self.sqlContext = sqlContext
self.sqlContext.sql("use " + database)
def in_genre_select_app(self, genres, table = "ansearchapp"):
result = None
num = 0
for g in genres:
if g:
res = sqlContext.sql("select genres, word, priority, searchapp, searchcount, genre, type from ansearchapp LATERAL VIEW OUTER explode(genre) s AS genres where genres = %d " %g)
if result:
result = result.unionAll(res)
else:
result = res
else:
pass
return result
def in_searchapp_select_app(self, searchapp, table = "ansearchapp"):
result = None
num = 0
for g in searchapp:
if g:
res = sqlContext.sql("select app, word, priority, searchapp, searchcount, genre, type from ansearchapp LATERAL VIEW OUTER explode(searchapp) s AS app where app = %d " %g)
if result:
result = result.unionAll(res)
else:
result = res
else:
pass
return result
def main():
ss = sparkDF_searchAPP()
ss.__init__()
gen = [6017, 6002]
result_gen = ss.in_genre_select_app(gen)
print result_gen.show()
app = [1124330238,1128622678]
result_app = ss.in_searchapp_select_app(app)
aa = result_app.select(result_app['searchapp'])
print "aa---",aa.show()
if __name__ == '__main__':
main()