maven
org.apache.kafka kafka_2.10 0.9.0-kafka-2.0.2
private SparkKafka kafka = null ; private static final String TOPIC_SOURCE = "TP_LABEL"; public SparkStoredKuduApp(String[] args){ kafka_conf = KafkaPool.getInstance().getConfig(); kafka_conf.setProperty("zookeeper_connect", "personas1:2181,personas2:2181,personas4:2181"); kafka_conf.setProperty("groupid_tdx", "tpsc01"); //tpsc01 kafka_conf.setProperty("bootstrap.servers", "personas1:9092,personas2:9092,personas4:9092"); kafka = new SparkKafka(kafkaParams()); kafka.setTopics(new HashSet<>(Arrays.asList(TOPIC_SOURCE))); } private Map, String> kafkaParams() { Map , String> kafkaParams = new HashMap , String>(); kafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, kafka_conf.getProperty("groupid_tdx")); kafkaParams.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, kafka_conf.getProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)); kafkaParams.put("zookeeper.connect", kafka_conf.getProperty("zookeeper_connect")); return kafkaParams; }
// 获取kafka开始读取偏移量 Map, Long> fromOffsets = kafka.getOffset();
public class SparkKafka implements Serializable { private static final long serialVersionUID = -7633373735487600970L; private Map, String> kafkaParams = null; private Set topics = null; private KafkaCluster kafkaCluster = null; public SparkKafka(Map , String> kafkaParams) { this.kafkaParams = kafkaParams; init(); } private void init() { scala.collection.mutable.Map , String> mutableKafkaParam = JavaConversions.mapAsScalaMap(kafkaParams); scala.collection.immutable.Map , String> immutableKafkaParam = mutableKafkaParam .toMap(new Predef.$less$colon$less , String>, Tuple2 , String>>() { @Override public Tuple2 , String> apply(Tuple2 , String> v1) { return v1; } }); kafkaCluster = new KafkaCluster(immutableKafkaParam); } /** * 获取kafka offset * * @return */ public Map , Long> getOffset() { Map , Long> fromOffsets = new HashMap , Long>(); scala.collection.mutable.Set mutableTopics = JavaConversions.asScalaSet(this.topics); scala.collection.immutable.Set immutableTopics = mutableTopics.toSet(); scala.collection.immutable.Set scalaTopicAndPartitionSet = kafkaCluster .getPartitions(immutableTopics).right().get(); // 首次消费 if (kafkaCluster.getConsumerOffsets(kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG), scalaTopicAndPartitionSet) .isLeft()) { scala.collection.immutable.Map , LeaderOffset> earliestOffsetsTemp = kafkaCluster .getEarliestLeaderOffsets(scalaTopicAndPartitionSet).right().get(); Set javaTopicAndPartitionSet = JavaConversions.setAsJavaSet(scalaTopicAndPartitionSet); Map , LeaderOffset> earliestOffsets = JavaConversions.mapAsJavaMap(earliestOffsetsTemp); for (TopicAndPartition topicAndPartition : javaTopicAndPartitionSet) { LeaderOffset latestOffset = earliestOffsets.get(topicAndPartition); fromOffsets.put(topicAndPartition, latestOffset.offset()); } } else { scala.collection.immutable.Map , LeaderOffset> earliestOffsetsTemp = kafkaCluster .getEarliestLeaderOffsets(scalaTopicAndPartitionSet).right().get(); scala.collection.immutable.Map , Object> consumerOffsetsTemp = kafkaCluster .getConsumerOffsets(kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG), scalaTopicAndPartitionSet) .right().get(); Map , LeaderOffset> earliestOffsets = JavaConversions.mapAsJavaMap(earliestOffsetsTemp); Map , Object> consumerOffsets = JavaConversions.mapAsJavaMap(consumerOffsetsTemp); Set javaTopicAndPartitionSet = JavaConversions.setAsJavaSet(scalaTopicAndPartitionSet); for (TopicAndPartition topicAndPartition : javaTopicAndPartitionSet) { LeaderOffset earliestOffset = earliestOffsets.get(topicAndPartition); Long offset = (Long) consumerOffsets.get(topicAndPartition); // 如果消费的offset小于leader的earlistOffset,有可能是kafka定时清理已删除该offset文件 // 这时将过期的offset更新为leader的earlistOffset开始消费,避免offsetOutOfRang异常 if (offset < earliestOffset.offset()) { offset = earliestOffset.offset(); } fromOffsets.put(topicAndPartition, offset); } } return fromOffsets; } /** * 设置kafka offset * * @param range */ public void setOffset(HasOffsetRanges range) { OffsetRange[] offsets = range.offsetRanges(); for (OffsetRange o : offsets) { // 封装topic.partition 与 offset对应关系 java Map TopicAndPartition topicAndPartition = new TopicAndPartition(o.topic(), o.partition()); Map , Object> topicAndPartitionObjectMap = new HashMap , Object>(); topicAndPartitionObjectMap.put(topicAndPartition, o.untilOffset()); // 转换java map to scala immutable.map scala.collection.mutable.Map , Object> map = JavaConversions .mapAsScalaMap(topicAndPartitionObjectMap); scala.collection.immutable.Map , Object> scalatopicAndPartitionObjectMap = map.toMap( new Predef.$less$colon$less , Object>, Tuple2 , Object>>() { private static final long serialVersionUID = 1L; public Tuple2 , Object> apply(Tuple2 , Object> v1) { return v1; } }); // 更新offset到kafkaCluster kafkaCluster.setConsumerOffsets(kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG), scalatopicAndPartitionObjectMap); } } @SuppressWarnings("unchecked") public static Class , byte[]>> getMsgClass() { return (Class , byte[]>>) (Class>) MessageAndMetadata.class; } public Map , String> getKafkaParams() { return kafkaParams; } public void setKafkaParams(Map , String> kafkaParams) { this.kafkaParams = kafkaParams; } public Set getTopics() { return topics; } public void setTopics(Set topics) { this.topics = topics; } public KafkaCluster getKafkaCluster() { return kafkaCluster; } public void setKafkaCluster(KafkaCluster kafkaCluster) { this.kafkaCluster = kafkaCluster; } }