sparkStreaming连接kafka(Direct)

//创建JavaStreamingContext
JavaStreamingContext jsc = createJavaStreamingContext(null, appName, batchDurationWithSeconds);

//kafkaParams 封装了kafka的参数
Object kafkaParams = new HashMap();
for (Iterator localIterator1 = config.getInputConfigs().entrySet().iterator(); localIterator1.hasNext();)
{
    Map.Entry entry = (Map.Entry)localIterator1.next();
    ((Map)kafkaParams).put(entry.getKey().toString(), entry.getValue());
}
System.out.println("inputParam: " + kafkaParams);
Object outputConfig = new HashMap();
for (Map.Entry entry : config.getOutputConfigs().entrySet())
 {
    ((HashMap)outputConfig).put(entry.getKey().toString(), entry.getValue().toString());
}
System.out.println("outputConfig: " + outputConfig);
Map fromOffsets = new HashMap();
//初始化redis连接
ShardedJedis redis = RedisSingleton.instance((HashMap)outputConfig).getResource();
//获取redis中的offset
Map kafkaOffsets = redis.hgetAll("POSITION_KAFKA_OFFSETS");
redis.close();
Pattern patter4value = Pattern.compile(Pattern.quote(":"));
//kafkaOffsets.entrySet() 形式为0:SGN_DATA 12541
for (Map.Entry entry : kafkaOffsets.entrySet())
{
    String[] info = patter4value.split((CharSequence)entry.getKey(), 2);
    if (info.length == 2) {
        //如果不是冷启动,fromOffsets中put偏移量
        fromOffsets.put(new TopicPartition(info[1], Integer.parseInt(info[0])), Long.valueOf(Long.parseLong((String)entry.getValue())));
    }
}
//如果起始redis中的偏移量为0,则插入0
if (fromOffsets.isEmpty()) {
    for (Map.Entry entry : topics.entrySet()) {
        if (((Integer)entry.getValue()).intValue() > 0) {
            for (int i = 0; i < ((Integer)entry.getValue()).intValue(); i++) {
                fromOffsets.put(new TopicPartition((String)entry.getKey(), i), Long.valueOf(0L));
            }
        }
    }
}
System.out.println("fromOffsets: " + fromOffsets);
//创建直连的sparkStreaming对象
Object stream = KafkaUtils
        .createDirectStream(jsc,
                LocationStrategies.PreferConsistent(),
                ConsumerStrategies.Assign(fromOffsets.keySet(), (Map)kafkaParams, fromOffsets));
//通过stream对象,获取各个分区的offset,并将各分区的offset存入到redis中
JavaPairDStream mapedDatas = ((JavaInputDStream)stream).transform(new Function() {
    private static final long serialVersionUID = -6012908518603112999L;
    private StringBuilder msg = new StringBuilder();
    public Object call(Object rdd1) throws Exception {
        JavaRDD> rdd = (JavaRDD>) rdd1;
        //获取rdd分区的offset
        OffsetRange[] offsetRanges = ((HasOffsetRanges)rdd.rdd()).offsetRanges();
        ShardedJedis redis = RedisSingleton.instance((HashMap) outputConfig).getResource();
        for (OffsetRange offset : offsetRanges)
        {
            this.msg.setLength(0);
            redis.hset("POSITION_KAFKA_OFFSETS", offset.partition() + ":" + offset.topic(), offset.fromOffset() + "");
        }
        redis.close();
        return (Object)rdd;
    }
}).mapToPair(new PhonePairFunction2());
JavaPairDStream> groupedData = mapedDatas.filter(new Function() {
    private static final long serialVersionUID = 6634203502402639467L;
    public Boolean call(Object v1)
            throws Exception
    {
        Tuple2 record = (Tuple2) v1;
        return Boolean.valueOf((record != null) && (record._1 != null) && (record._2 != null));
    }
}).groupByKey();
groupedData.foreachRDD(new RddProcessor((HashMap)outputConfig));

你可能感兴趣的:(spark,kafka)