Kafka 本身无法直接保证多分区的全局顺序性,因为分区设计旨在并行处理以提升吞吐量。
要实现多分区的顺序性,可尝试通过以下方法在系统层面或业务逻辑上解决:
单一分区路由(还是将消息发送到同一分区):
外部排序机制:
Kafka Streams 或自定义处理:
主题级顺序控制:
事务与自定义分区器:
read_committed
读取。每个方案包括较为详细的设计思路、操作步骤和简单的代码实现,基于 Java并考虑生产环境的可扩展性和稳定性。
设计思路:
操作流程:
代码示例:
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.StringSerializer;
import java.util.Properties;
public class SinglePartitionProducer {
public static void main(String[] args) {
// 配置生产者
Properties props = new Properties();
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
props.put(ProducerConfig.ACKS_CONFIG, "all"); // 确保一致性
KafkaProducer producer = new KafkaProducer<>(props);
String topic = "ordered-topic";
// 发送消息,固定分区键
String fixedKey = "order-group-1"; // 所有消息使用相同 key 路由到同一分区
for (int i = 0; i < 100; i++) {
String message = "Message-" + i;
ProducerRecord record = new ProducerRecord<>(topic, fixedKey, message);
producer.send(record, (metadata, exception) -> {
if (exception == null) {
System.out.printf("发送到 partition %d, offset %d%n", metadata.partition(), metadata.offset());
} else {
exception.printStackTrace();
}
});
}
producer.close();
}
}
生产注意事项:
设计思路:
操作流程:
代码示例:
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.time.Duration;
import java.util.*;
import java.util.concurrent.PriorityQueue;
public class ExternalSortingConsumer {
static class Message implements Comparable {
String value;
long timestamp;
Message(String value, long timestamp) {
this.value = value;
this.timestamp = timestamp;
}
@Override
public int compareTo(Message other) {
return Long.compare(this.timestamp, other.timestamp);
}
}
public static void main(String[] args) {
// 配置消费者
Properties props = new Properties();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(ConsumerConfig.GROUP_ID_CONFIG, "sorting-consumer-group");
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
KafkaConsumer consumer = new KafkaConsumer<>(props);
consumer.subscribe(Collections.singletonList("multi-partition-topic"));
// 使用 PriorityQueue 按时间戳排序
PriorityQueue buffer = new PriorityQueue<>();
long lastProcessedTimestamp = 0;
while (true) {
ConsumerRecords records = consumer.poll(Duration.ofMillis(100));
for (ConsumerRecord record : records) {
// 假设消息格式为 "message|timestamp"
String[] parts = record.value().split("\\|");
String message = parts[0];
long timestamp = Long.parseLong(parts[1]);
buffer.offer(new Message(message, timestamp));
}
// 处理排序后的消息
while (!buffer.isEmpty() && buffer.peek().timestamp <= lastProcessedTimestamp + 1000) {
Message msg = buffer.poll();
System.out.println("消息: " + msg.value + " 时间戳: " + msg.timestamp);
lastProcessedTimestamp = msg.timestamp;
}
// 手动提交偏移量
consumer.commitSync();
}
}
}
生产注意事项:
设计思路:
操作流程:
代码示例:
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.state.KeyValueStore;
import org.apache.kafka.streams.processor.Transformer;
import org.apache.kafka.streams.state.StoreBuilder;
import org.apache.kafka.streams.state.Stores;
import org.apache.kafka.streams.processor.ProcessorContext;
import org.apache.kafka.streams.KeyValue;
import java.util.Properties;
import java.util.TreeSet;
public class KafkaStreamsSorter {
public static void main(String[] args) {
// 配置 Streams
Properties props = new Properties();
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-sorter");
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
StreamsBuilder builder = new StreamsBuilder();
// 定义状态存储
StoreBuilder> storeBuilder = Stores.keyValueStoreBuilder(
Stores.persistentKeyValueStore("sorting-store"),
Serdes.String(), Serdes.String());
builder.addStateStore(storeBuilder);
// 读取输入主题
KStream input = builder.stream("multi-partition-topic");
// 按时间戳排序并输出
input.transform(() -> new SortingTransformer(), "sorting-store")
.to("ordered-output-topic");
KafkaStreams streams = new KafkaStreams(builder.build(), props);
streams.start();
Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
}
static class SortingTransformer implements Transformer> {
private KeyValueStore store;
private TreeSet sortedMessages;
@Override
public void init(ProcessorContext context) {
this.store = context.getStateStore("sorting-store");
this.sortedMessages = new TreeSet<>((a, b) -> {
long t1 = Long.parseLong(a.split("\\|")[1]);
long t2 = Long.parseLong(b.split("\\|")[1]);
return Long.compare(t1, t2);
});
}
@Override
public KeyValue transform(String key, String value) {
sortedMessages.add(value);
if (sortedMessages.size() >= 100) { // 批量处理
String oldest = sortedMessages.pollFirst();
return KeyValue.pair(key, oldest);
}
return null;
}
@Override
public void close() {}
}
}
生产注意事项:
设计思路:
操作流程:
代码示例:
import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.clients.producer.*;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.kafka.common.serialization.StringSerializer;
import java.time.Duration;
import java.util.Collections;
import java.util.Properties;
public class TopicLevelOrdering {
public static void main(String[] args) {
// 生产者配置
Properties producerProps = new Properties();
producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
KafkaProducer producer = new KafkaProducer<>(producerProps);
// 消费者配置
Properties consumerProps = new Properties();
consumerProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, "ordering-group");
consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
consumerProps.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
KafkaConsumer consumer = new KafkaConsumer<>(consumerProps);
consumer.subscribe(Collections.singletonList("multi-partition-topic"));
String outputTopic = "single-partition-topic";
while (true) {
ConsumerRecords records = consumer.poll(Duration.ofMillis(100));
for (ConsumerRecord record : records) {
// 假设消息包含序列号
ProducerRecord newRecord = new ProducerRecord<>(outputTopic, null, record.value());
producer.send(newRecord, (metadata, exception) -> {
if (exception != null) {
exception.printStackTrace();
}
});
}
consumer.commitSync();
}
}
}
生产注意事项: