Kafka Tumbling Time Window

翻滚时间窗口Tumbling time windows是是固定大小、不重叠、无间隙的窗口。翻滚窗口只由一个属性定义:size。翻滚窗口实际上是一种跳跃窗口,其窗口大小与其前进间隔相等。由于翻滚窗口从不重叠,数据记录将只属于一个窗口.。

案例:
topic “value” 中的消息格式为: {“uid”: 1,“value”:19}
统计相同UId的Value每分钟内的最大值、最小值和平均值,并把结果 输出到 “result” topic 中。

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.example.demo.vo.WindowResult;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.Materialized;
import org.apache.kafka.streams.kstream.TimeWindows;
import org.springframework.stereotype.Component;

import java.time.Duration;
import java.util.Properties;

@Component
public class TumblingTimeWindowDemo {

    public static void main(String[] args) {
        final static int TIME_WINDOW_SECONDS = 60;

        Properties props = new Properties();
        props.put(StreamsConfig.APPLICATION_ID_CONFIG, "tumbling-time-window");
        props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
        props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
        props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
        props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
        props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);

        StreamsBuilder builder = new StreamsBuilder();

        KStream<String, String> valueStream = builder.stream("value");

        valueStream.selectKey((key, value) -> {
            String newKey;
            try {
                JSONObject json = JSON.parseObject(value);
                newKey = json.getString("uid");
            } catch (Exception ex) {
                return key;
            }
            return newKey;
        }).groupByKey().windowedBy(TimeWindows.of(Duration.ofSeconds(TIME_WINDOW_SECONDS)))
                .aggregate(() -> {
                    WindowResult result = new WindowResult(0, 0, 0, 0, 0, 0);
                    return JSONObject.toJSONString(result);
                }, (aggKey, newValue, aggValue) -> {
                    // topic中的消息格式为{"uid": 1,"value":19}
                    WindowResult result = JSONObject.parseObject(aggValue, WindowResult.class);
                    Long newValueLong = null;
                    try {
                        JSONObject json = JSON.parseObject(newValue);
                        newValueLong = json.getLong("temperature");
                    } catch (ClassCastException ex) {

                    }
                    if (result.getMin() == 0 || result.getMin() > newValueLong) {
                        result.setMin(newValueLong);
                    }
                    if (result.getMax() == 0 || result.getMax() < newValueLong) {
                        result.setMax(newValueLong);
                    }
                    result.setUid(Integer.valueOf(aggKey));
                    result.setCount(result.getCount() + 1);
                    result.setSum(result.getSum() + newValueLong);
                    result.setAvg(result.getSum() / result.getCount());
                    return JSONObject.toJSONString(result);
                }, Materialized.with(Serdes.String(), Serdes.String())).toStream().to("window-result");


        final KafkaStreams streams = new KafkaStreams(builder.build(), props);

        streams.start();

    }


}

你可能感兴趣的:(Kafka)