IMF SparkSQLwithJoin 测试sparksql



IMF SparkSQLwithJoin 测试sparksql,为114课程测试

http://blog.csdn.net/duan_zhihua/article/details/51590390


people1.json

{"name":"Michael", "score":90}
{"name":"Andy", "score":80}
{"name":"Justin", "score":99}
{"name":"zhangsan", "score":100}
{"name":"zhangsan", "score":96}


SparkSQLwithJoin.java
public class SparkSQLwithJoin {
		public static void main(String[] args) {
			SparkConf conf = new SparkConf().setMaster("local").setAppName("SparkSQLwithJoin");
			JavaSparkContext sc = new JavaSparkContext(conf);
			//SQLContext sqlContext = new SQLContext(sc);
			HiveContext  sqlContext = new HiveContext(sc);
			//针对json文件数据源来创建DataFrame
			//DataFrame peoplesDF = sqlContext.read().json("G://IMFBigDataSpark2016//tesdata//people1.json");
		//	DataFrame peoplesDF = sqlContext.read().json("//usr//local//IMF_testdata//people1.json");
			DataFrame peoplesDF = sqlContext.read().json("/usr/local/IMF_testdata/people1.json");
			
			//基于Json构建的DataFrame来注册临时表
			peoplesDF.registerTempTable("peopleScores");
			
			//查询出分数大于90的人
			DataFrame execellentScoresDF = sqlContext.sql("select name,score from peopleScores where score >90");
			/**
			 * 在DataFrame的基础上转化成为RDD,通过Map操作计算出分数大于90的所有人的姓名
			 */
			List execellentScoresNameList = execellentScoresDF.javaRDD().map(new Function() {

				@Override
				public String call(Row row) throws Exception {
				
					return row.getAs("name");
				}
			}).collect();
			
			
			//动态组拼出JSON
			List peopleInformations = new ArrayList();
			peopleInformations.add("{\"name\":\"Michael\", \"age\":20}");
			peopleInformations.add("{\"name\":\"Andy\", \"age\":17}");
			peopleInformations.add("{\"name\":\"Justin\", \"age\":19}");
			peopleInformations.add("{\"name\":\"zhangsan\", \"age\":25}");
			
			System.out.println(peopleInformations);
			//通过内容为JSON的RDD来构造DataFrame
			JavaRDD peopleInformationsRDD = sc.parallelize(peopleInformations);
			DataFrame peopleInformationsDF = sqlContext.read().json(peopleInformationsRDD);
			
			//注册成为临时表
			peopleInformationsDF.registerTempTable("peopleInformations");
			
			String sqlText = "select name, age from peopleInformations where name in (";
			for(int i =0; i < execellentScoresNameList.size(); i++){
				sqlText += "'" + execellentScoresNameList.get(i) + "'";
				if (i < execellentScoresNameList.size()-1){
					sqlText += ",";
				}
			}
			sqlText += ")";
			
			System.out.println("=====================the sqlText is : " + sqlText + "=====================");
			
			DataFrame execellentNameAgeDF = sqlContext.sql(sqlText);
			
			JavaPairRDD>  resultRDD = execellentScoresDF.javaRDD().mapToPair(new PairFunction() {

				private static final long serialVersionUID = 1L;

				@Override
				public Tuple2 call(Row row) throws Exception {
					
					return new Tuple2((String) row.getAs("name"), (int) row.getLong(1));
				}
			}).join(execellentNameAgeDF.javaRDD().mapToPair(new PairFunction() {

				private static final long serialVersionUID = 1L;

				@Override
				public Tuple2 call(Row row) throws Exception {
					return new Tuple2((String) row.getAs("name"), (int) row.getLong(1));
				}
			}));
			
			JavaRDD reusltRowRDD = resultRDD.map(new Function>, Row>() {

				@Override
				public Row call(Tuple2> tuple) throws Exception {
					// TODO Auto-generated method stub
					return RowFactory.create(tuple._1, tuple._2._2,tuple._2._1 );
				}
			});
			
			List structFields = new ArrayList();
			structFields.add(DataTypes.createStructField("name", DataTypes.StringType, true));
			structFields.add(DataTypes.createStructField("age", DataTypes.IntegerType, true));
			structFields.add(DataTypes.createStructField("score", DataTypes.IntegerType, true));
			//构建StructType,用于最后DataFrame元数据的描述
			StructType structType =DataTypes.createStructType(structFields);
			
			
			DataFrame personsDF = sqlContext.createDataFrame(reusltRowRDD, structType);
			
			personsDF.show();
			//imf
			List imfLists =personsDF.toJavaRDD().collect();
			for (Row imfList:imfLists) {
				System.out.println("imfList===="+imfList);
			}
		//	personsDF.write().format("json").save("G://IMFBigDataSpark2016//tesdata//people_result73");
				
		}
	}




测试成功

IMF SparkSQLwithJoin 测试sparksql_第1张图片

你可能感兴趣的:(Hadoop)