FLINK消费KAFAK数据包括自定义序列化和反序列化

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.youfan.kafka;

import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer010;

import javax.annotation.Nullable;

/**
 * A simple example that shows how to read from and write to Kafka. This will read String messages
 * from the input topic, parse them into a POJO type {@link KafkaEvent}, group by some key, and finally
 * perform a rolling addition on each key for which the results are written back to another topic.
 *
 * 

This example also demonstrates using a watermark assigner to generate per-partition * watermarks directly in the Flink Kafka consumer. For demonstration purposes, it is assumed that * the String messages are of formatted as a (word,frequency,timestamp) tuple. * *

Example usage: * --input-topic test-input --output-topic test-output --bootstrap.servers localhost:9092 --zookeeper.connect localhost:2181 --group.id myconsumer */ public class Kafka010Example { public static void main(String[] args) throws Exception { // parse input arguments args = new String[]{"--input-topic","test1","--output-topic","test2","--bootstrap.servers","192.168.80.134:9092","--zookeeper.connect","192.168.80.134:2181","--group.id","myconsumer"}; final ParameterTool parameterTool = ParameterTool.fromArgs(args); if (parameterTool.getNumberOfParameters() < 5) { System.out.println("Missing parameters!\n" + "Usage: Kafka --input-topic --output-topic " + "--bootstrap.servers " + "--zookeeper.connect --group.id "); return; } StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000)); env.enableCheckpointing(5000); // create a checkpoint every 5 seconds env.getConfig().setGlobalJobParameters(parameterTool); // make parameters available in the web interface env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); DataStream input = env .addSource( new FlinkKafkaConsumer010<>( parameterTool.getRequired("input-topic"), new KafkaEventSchema(), parameterTool.getProperties()) .assignTimestampsAndWatermarks(new CustomWatermarkExtractor())) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink( new FlinkKafkaProducer010<>( parameterTool.getRequired("output-topic"), new KafkaEventSchema(), parameterTool.getProperties())); env.execute("Kafka 0.10 Example"); } /** * A {@link RichMapFunction} that continuously outputs the current total frequency count of a key. * The current total count is keyed state managed by Flink. */ private static class RollingAdditionMapper extends RichMapFunction { private static final long serialVersionUID = 1180234853172462378L; private transient ValueState currentTotalCount; @Override public KafkaEvent map(KafkaEvent event) throws Exception { Integer totalCount = currentTotalCount.value(); System.out.println("哈哈--"); if (totalCount == null) { totalCount = 0; } totalCount += event.getFrequency(); currentTotalCount.update(totalCount); return new KafkaEvent(event.getWord(), totalCount, event.getTimestamp()); } @Override public void open(Configuration parameters) throws Exception { currentTotalCount = getRuntimeContext().getState(new ValueStateDescriptor<>("currentTotalCount", Integer.class)); } } /** * A custom {@link AssignerWithPeriodicWatermarks}, that simply assumes that the input stream * records are strictly ascending. * *

Flink also ships some built-in convenience assigners, such as the * {@link BoundedOutOfOrdernessTimestampExtractor} and {@link AscendingTimestampExtractor} */ private static class CustomWatermarkExtractor implements AssignerWithPeriodicWatermarks { private static final long serialVersionUID = -742759155861320823L; private long currentTimestamp = Long.MIN_VALUE; @Override public long extractTimestamp(KafkaEvent event, long previousElementTimestamp) { // the inputs are assumed to be of format (message,timestamp) this.currentTimestamp = event.getTimestamp(); return event.getTimestamp(); } @Nullable @Override public Watermark getCurrentWatermark() { return new Watermark(currentTimestamp == Long.MIN_VALUE ? Long.MIN_VALUE : currentTimestamp - 1); } } }

自定义POJO类

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.youfan.kafka;

/**
 * The event type used in the {@link Kafka010Example}.
 *
 * 

This is a Java POJO, which Flink recognizes and will allow "by-name" field referencing * when keying a {@link org.apache.flink.streaming.api.datastream.DataStream} of such a type. * For a demonstration of this, see the code in {@link Kafka010Example}. */ public class KafkaEvent { private final static String splitword = "##"; private String word; private int frequency; private long timestamp; public KafkaEvent() {} public KafkaEvent(String word, int frequency, long timestamp) { this.word = word; this.frequency = frequency; this.timestamp = timestamp; } public String getWord() { return word; } public void setWord(String word) { this.word = word; } public int getFrequency() { return frequency; } public void setFrequency(int frequency) { this.frequency = frequency; } public long getTimestamp() { return timestamp; } public void setTimestamp(long timestamp) { this.timestamp = timestamp; } public static KafkaEvent fromString(String eventStr) { String[] split = eventStr.split(splitword); return new KafkaEvent(split[0], Integer.valueOf(split[1]), Long.valueOf(split[2])); } @Override public String toString() { return word +splitword + frequency + splitword + timestamp; } }

 自定义序列化实现序列化接口

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.youfan.kafka;

import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.serialization.SerializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;

import java.io.IOException;

/**
 * The serialization schema for the {@link KafkaEvent} type. This class defines how to transform a
 * Kafka record's bytes to a {@link KafkaEvent}, and vice-versa.
 */
public class KafkaEventSchema implements DeserializationSchema, SerializationSchema {

	private static final long serialVersionUID = 6154188370181669758L;

	@Override
	public byte[] serialize(KafkaEvent event) {
		return event.toString().getBytes();
	}

	@Override
	public KafkaEvent deserialize(byte[] message) throws IOException {
		return KafkaEvent.fromString(new String(message));
	}

	@Override
	public boolean isEndOfStream(KafkaEvent nextElement) {
		return false;
	}

	@Override
	public TypeInformation getProducedType() {
		return TypeInformation.of(KafkaEvent.class);
	}
}

 

你可能感兴趣的:(Flink,flink)