spark2.1.0-在文本文件中统计出现关键字的次数

package com.wangzs;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;

/**
 * @title: 在文本文件中统计出现关键字的次数 
* @author: wangzs
* @date: 2018年4月26日 */ public class SimpleAppTest { public static void main(String[] args) { SparkConf conf = new SparkConf().setMaster("local").setAppName("SimpleAppTest"); JavaSparkContext sc = new JavaSparkContext(conf); JavaRDD logData = sc.textFile("E:\\pss_2018\\spark-2.5-learn\\src\\main\\resources\\README.md"); JavaRDD rddA = logData.filter(new Function() { @Override public Boolean call(String s) throws Exception { return s.contains("a"); } }); long countA = rddA.count(); JavaRDD rddB = logData.filter(new Function() { @Override public Boolean call(String s) throws Exception { return s.contains("b"); } }); long countB = rddB.count(); System.out.println("countA:" + countA + " \tcountB:" + countB); } }

	4.0.0

	com.pss
	spark-2.5-learn
	0.0.1-SNAPSHOT
	jar

	spark-2.5-learn
	http://maven.apache.org

	
		UTF-8
	

	
		 
			org.apache.spark
			spark-core_2.11
			2.1.0
		

	

	
		
			
				
					org.apache.maven.plugins
					maven-compiler-plugin
					
						1.7
						1.7
					
				
				
				
					org.apache.maven.plugins
					maven-surefire-plugin
					2.18.1
					
						true
					
				
			
		
	

	

统计结果:

countA:61 	countB:30


你可能感兴趣的:(spark,spark)