bin/spark-submit \
--class cn.spark.sparktest.core.WordCountCluster \
--driver-memory 100m \配置driver的内存(影响不大)
--num-executors 3 \ 配置executor的数量
--executor-memory 100m \ 配置每个executor的内存大小
--executor-cores 3 \ 配置每个executor的cpu core数量
/usr/local/SparkTest-0.0.1-SNAPSHOT-jar-with-dependencies.jar
SparkConf conf=new SparkConf().set("spark.default.paralelism","500")
final Broadcast<Map<String,Map<String,List<Integer>>>> dateHourExtractMapBroadcast=sc.broadcast(dateHourExtractMap);
Map<String, Map<String, List<Integer>>> dateHourExtractMap =dateHourExtractMapBroadcast.value();
set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntList;
Map<String,Map<String,IntList>> fastutilDateHourExtractMap=new HashMap<String, Map<String, IntList>>();
for(Map.Entry<String, Map<String,List<Integer>>> dateHourExtractEntry:dateHourExtractMap.entrySet()){
String date=dateHourExtractEntry.getKey();
Map<String,List<Integer>> hourExtractMap=dateHourExtractEntry.getValue();
Map<String, IntList> fastutilHourExtractMap = new HashMap<String, IntList>();
for(Map.Entry<String, List<Integer>> hourExtractEntry : hourExtractMap.entrySet()){
String hour = hourExtractEntry.getKey();
List<Integer> extractList = hourExtractEntry.getValue();
IntList fastutilExtractList = new IntArrayList();
for(int i = 0; i < extractList.size(); i++) {
fastutilExtractList.add(extractList.get(i));
}
fastutilHourExtractMap.put(hour, fastutilExtractList);
}
fastutilDateHourExtractMap.put(date, fastutilHourExtractMap);
}
SparkConf conf = new SparkConf()
.setAppName(Constants.SPARK_APP_NAME_SESSION)
.setMaster("local")
.set("spark.default.paralelism", "500")
.set("spark.locality.wait","10")
.set("spark.serializer","org.apache.spark.serializer.KryoSerializer")