kafka_11_消费&生产协同事务

producer before 上游

package com.soul.kafka.level11;

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.StringSerializer;

import java.util.Properties;
import java.util.UUID;

/**
 *
Kafka的幂等性,只能保证一条记录的在分区发送的原子性,
但是如果要保证多条记录(多分区)之间的完整性,这个时候就需要开启kafk的事务操作。

在Kafka0.11.0.0除了引入的幂等性的概念,同时也引入了事务的概念。
通常Kafka的事务分为 生产者事务Only、消费者&生产者事务。
一般来说默认消费者消费的消息的级别是read_uncommited数据,这有可能读取到事务失败的数据,
所有在开启生产者事务之后,需要用户设置消费者的事务隔离级别。

isolation.level	=  read_uncommitted 默认
该选项有两个值read_committed|read_uncommitted,如果开始事务控制,
消费端必须将事务的隔离级别设置为read_committed

开启的生产者事务的时候,只需要指定transactional.id属性即可,
一旦开启了事务,默认生产者就已经开启了幂等性。
但是要求"transactional.id"的取值必须是唯一的,
同一时刻只能有一个"transactional.id"存储在,其他的将会被关闭。

 *
 */
public class KafkaProducerBefore {
    public static void main(String[] args) {

        //构建生产者
        KafkaProducer<String, String> producer = buildKafkaProducer();

        //初始化事务
        producer.initTransactions();

        try {
            //开启事务控制
            producer.beginTransaction();
            for (int i = 0; i < 10; i++) {
                //创建Record
                ProducerRecord<String, String> producerRecord = new ProducerRecord<>("topic02",
                        "K" + i, "V" + i);
                producer.send(producerRecord);
                //为了测试使用, 每条都刷, 避免没有数据写入到kafka
                producer.flush();
            }
            //避免本地缓存数据, 将消息刷到kafka集群
            producer.flush();
            //提交事务
            producer.commitTransaction();
        } catch (Exception e) {
            System.out.println("error: " + e.getMessage());
            //终止事务
            producer.abortTransaction();
        } finally {
            producer.close();
        }
    }

    public static KafkaProducer<String, String> buildKafkaProducer() {
        //构建生产者配置信息
        Properties props = new Properties();
        //集群hostname:port
        props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka01:9092,kafka02:9092,kafka03:9092");
        //序列化 反序列化
        props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
        props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
        //配置生产者端事务ID
        props.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG,
                //避免重复, 拼接 UUID 来保证唯一
                "transaction-id" + ":" + UUID.randomUUID().toString());
        //配置批处理数量, 默认为16384字节, 测试时设置较小(多条数据缓冲到设定字节后, 打包一次发送)
        props.put(ProducerConfig.BATCH_SIZE_CONFIG, 1024);
        //为避免长时间未达到发送批次上线, 导致数据不发送, 设置必须发送的时间间隔, 毫秒, 5ms
        props.put(ProducerConfig.LINGER_MS_CONFIG, 5);

        //配置重试机制(all-至少一个副本写入数据, 20S未接收到应答则重复发送)
        props.put(ProducerConfig.ACKS_CONFIG, "all");
        props.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, 20000);
        //开启幂等机制
        props.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, true);

        return new KafkaProducer<>(props);
    }

}

consumer middle 中游

package com.soul.kafka.level11;

import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.errors.ProducerFencedException;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.kafka.common.serialization.StringSerializer;

import java.time.Duration;
import java.util.*;

/**
 *
Kafka的幂等性,只能保证一条记录的在分区发送的原子性,
但是如果要保证多条记录(多分区)之间的完整性,这个时候就需要开启kafk的事务操作。

在Kafka0.11.0.0除了引入的幂等性的概念,同时也引入了事务的概念。
通常Kafka的事务分为 生产者事务Only、消费者&生产者事务。
一般来说默认消费者消费的消息的级别是read_uncommited数据,这有可能读取到事务失败的数据,
所有在开启生产者事务之后,需要用户设置消费者的事务隔离级别。

isolation.level	=  read_uncommitted 默认
该选项有两个值read_committed|read_uncommitted,如果开始事务控制,
消费端必须将事务的隔离级别设置为read_committed

开启的生产者事务的时候,只需要指定transactional.id属性即可,
一旦开启了事务,默认生产者就已经开启了幂等性。
但是要求"transactional.id"的取值必须是唯一的,
同一时刻只能有一个"transactional.id"存储在,其他的将会被关闭。
 *
 */
public class KafkaProducerAndConsumerMiddle {
    // 消费者&生产者, 场景:
    // 当前消费端订阅上游(topic02)的消息(record),
    // 消费成功且成功将加工后的消息发送给下游(topic03)后,
    // 标记为上游消息成功消费(提交offset)

    //测试:
    // 创建 topic03:
    // kafka-topics.sh --bootstrap-server kafka01:9092,kafka02:9092,kafka03:9092 --create --partitions 2 --replication-factor 2 --topic topic03
    // 查看创建的topic列表
    // kafka-topics.sh --bootstrap-server kafka01:9092,kafka02:9092,kafka03:9092 --list
    // 运行 KafkaConsumerAfter 下游消费者服务器
    // 运行 KafkaProducerAndConsumerMiddle 消费者&生产者事务服务器
    // 运行 KafkaProducerBefore 生产发送上游消息
    // 发现 消息从middle(topic02)加工处理后, 流转到了下游after(topic03)
    public static void main(String[] args) {

        //构建生产者
        KafkaProducer<String, String> producer = buildKafkaProducer();
        //构建消费者
        KafkaConsumer<String, String> consumer = buildKafkaConsumer("group01");

        //初始化事务
        producer.initTransactions();

        //消费者订阅上游生产的数据 topic2
        consumer.subscribe(Arrays.asList("topic02"));

        try {
            while (true) {
                //consumer拉取数据
                ConsumerRecords<String, String> consumerRecords = consumer.poll(Duration.ofSeconds(1));
                if (!consumerRecords.isEmpty()) {
                    //获取record数据迭代器
                    Iterator<ConsumerRecord<String, String>> consumerRecordIterator = consumerRecords.iterator();
                    //开启事务控制
                    producer.beginTransaction();
                    try {
                        //记录record元数据, 及消费端消费的消息的offset
                        Map<TopicPartition, OffsetAndMetadata> offsets = new HashMap<>();
                        while (consumerRecordIterator.hasNext()) {
                            ConsumerRecord<String, String> record = consumerRecordIterator.next();
                            //记录元数据
                            offsets.put(new TopicPartition(record.topic(), record.partition()),
                                    //返回下次需要消费的消息的offset, 注意+1  !!!, 返回当前消息offset会重复消费当前消息
                                    new OffsetAndMetadata(record.offset() + 1));
                            System.out.println("middle flow key:" + record.key() + ", val:" + record.value()
                                    + ", partition:" + record.partition() + ", offset:" + record.offset());
                            //加工消费的上游消息, 创建新的Record交给下游处理
                            ProducerRecord<String, String> producerRecord = new ProducerRecord<>("topic03",
                                    //加工消息value
                                    record.key(), record.value() + "_after_gin_soul_deal");
                            producer.send(producerRecord);

                        }
                        //提交消费端偏移量
                        producer.sendOffsetsToTransaction(offsets, "group01");
                        //提交事务
                        producer.commitTransaction();
                    } catch (ProducerFencedException e) {
                        System.out.println("error: " + e.getMessage());
                        //终止事务
                        producer.abortTransaction();
                    }
                }
            }
        } catch (Exception e) {
            //终止事务
            producer.abortTransaction();
        } finally {
            producer.close();
        }
    }

    public static KafkaProducer<String, String> buildKafkaProducer() {
        //构建生产者配置信息
        Properties props = new Properties();
        //集群hostname:port
        props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka01:9092,kafka02:9092,kafka03:9092");
        //序列化 反序列化
        props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
        props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
        //配置生产者端事务ID
        props.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG,
                //避免重复, 拼接 UUID 来保证唯一
                "transaction-id" + ":" + UUID.randomUUID().toString());
        //配置批处理数量, 默认为16384字节, 测试时设置较小(多条数据缓冲到设定字节后, 打包一次发送)
        props.put(ProducerConfig.BATCH_SIZE_CONFIG, 1024);
        //为避免长时间未达到发送批次上线, 导致数据不发送, 设置必须发送的时间间隔, 毫秒, 5ms
        props.put(ProducerConfig.LINGER_MS_CONFIG, 5);

        //配置重试机制(all-至少一个副本写入数据, 20S未接收到应答则重复发送)
        props.put(ProducerConfig.ACKS_CONFIG, "all");
        props.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, 20000);
        //开启幂等机制
        props.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, true);

        return new KafkaProducer<>(props);
    }

    public static KafkaConsumer<String, String> buildKafkaConsumer(String group) {
        //构建消费者配置信息
        Properties props = new Properties();
        //集群hostname:port
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka01:9092,kafka02:9092,kafka03:9092");
        //序列化 反序列化
        props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        //与 KafkaConsumerReadUnCommitted 需不同组来验证
        props.put(ConsumerConfig.GROUP_ID_CONFIG, group);
        //设置事务隔离级别, 读已提交(仅仅消费有提交标记的消息)
        props.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed");
        // 消费者&生产者事务, 必须关闭消费端的自动提交
        // 应避免消费失败同时更新了offset, 导致无法重新消费"消费端失败"的消息
        props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);

        return new KafkaConsumer<>(props);
    }
}

consumer after 下游

package com.soul.kafka.level11;

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;

import java.time.Duration;
import java.util.Iterator;
import java.util.Properties;
import java.util.regex.Pattern;

public class KafkaConsumerAfter {
    public static void main(String[] args) {
        //1.创建Kafka链接参数
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka01:9092,kafka02:9092,kafka03:9092");
        props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        //与 KafkaConsumerReadUnCommitted 需不同组来验证
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "group01");
        //设置事务隔离级别, 读已提交(仅仅消费有提交标记的消息)
        props.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed");

        //2.创建Topic消费者
        KafkaConsumer<String, String> consumer = new KafkaConsumer<String, String>(props);
        //3.订阅topic开头的消息队列
        consumer.subscribe(Pattern.compile("topic03"));

        while (true) {
            ConsumerRecords<String, String> consumerRecords = consumer.poll(Duration.ofSeconds(1));
            Iterator<ConsumerRecord<String, String>> recordIterator = consumerRecords.iterator();
            while (recordIterator.hasNext()) {
                ConsumerRecord<String, String> record = recordIterator.next();
                String key = record.key();
                String value = record.value();
                long offset = record.offset();
                int partition = record.partition();
                System.out.println("key:" + key + ", val:" + value
                        + ", partition:" + partition + ", offset:" + offset);
            }
        }
    }
}

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.soul</groupId>
    <artifactId>kafka</artifactId>
    <version>0.0.1</version>
    <name>kafka</name>

    <properties>
        <java.version>1.8</java.version>
    </properties>

    <dependencies>
        <!-- kafka begin -->

        <!--https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients-->
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>2.2.0</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/log4j/log4j -->
        <dependency>
            <groupId>log4j</groupId>
            <artifactId>log4j</artifactId>
            <version>1.2.17</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-api -->
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>1.7.25</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 -->
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>1.7.25</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
            <version>3.9</version>
        </dependency>

        <!-- kafka end -->
        
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>8</source>
                    <target>8</target>
                </configuration>
            </plugin>
        </plugins>
    </build>

</project>

log4j.properties

log4j.rootLogger = info,console

log4j.appender.console = org.apache.log4j.ConsoleAppender
log4j.appender.console.Target = System.out
log4j.appender.console.layout = org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern =  %p %d{yyyy-MM-dd HH:mm:ss} %c - %m%n

你可能感兴趣的:(kafka)