package flinkjava.Connecter.Kafka;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.StringSerializer;
import java.util.Properties;
public class KafkaProducer {
public static void main(String[] args) {
String brokerList = "114.116.219.197:5008,114.116.199.154:5008,114.116.220.98:5008";
Properties properties = new Properties();
// properties.setProperty("broker.list",brokerList);
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,brokerList);
properties.put(ProducerConfig.ACKS_CONFIG,"all");
properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,StringSerializer.class);
org.apache.kafka.clients.producer.KafkaProducer kafkaProducer = new org.apache.kafka.clients.producer.KafkaProducer(properties);
for(int i=0;i<1000;i++){
kafkaProducer.send(new ProducerRecord<String,String>("flinktestkafka",Integer.toString(i),i+"id,"+i+"name,"+i));
try {
Thread.sleep(3000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
package flinkjava.Connecter.Kafka;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import java.util.Properties;
public class KafkaSource {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(5000);
//Kafka源
Properties properties = new Properties();
properties.setProperty("bootstrap.servers","114.116.219.197:5008");
properties.setProperty("group.id","test");
FlinkKafkaConsumer<String> flinkKafkaConsumer = new FlinkKafkaConsumer<>("flinktestkafka", new SimpleStringSchema(), properties);
DataStreamSource<String> streamFromKafka = env.addSource(flinkKafkaConsumer);
SingleOutputStreamOperator<KafkaElement> mapresult = streamFromKafka.map(new MapFunction<String, KafkaElement>() {
@Override
public KafkaElement map(String value) throws Exception {
String[] sp = value.split(",");
System.out.println(sp[0]+" "+sp[1]+" "+sp[2]);
return new KafkaElement(sp[0], sp[1], Integer.valueOf(sp[2]));
}
});
try {
env.execute("this is kafkaflink job");
} catch (Exception e) {
e.printStackTrace();
}
}
}
package flinkjava.Connecter.Kafka;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
public class KafkaFlinkTest1 {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(5000);
//Kafka源
Properties properties = new Properties();
properties.setProperty("bootstrap.servers","114.116.219.197:5008");
properties.setProperty("group.id","KafkaFlinkTest1a");
FlinkKafkaConsumer<String> flinkKafkaConsumer = new FlinkKafkaConsumer<>("flinktestkafka", new SimpleStringSchema(), properties);
Map<KafkaTopicPartition,Long> specificStartOfssets = new HashMap<>();
specificStartOfssets.put(new KafkaTopicPartition("flinktestkafka",0),10L);
// specificStartOfssets.put(new KafkaTopicPartition("flinktestkafka",1),0L);
// specificStartOfssets.put(new KafkaTopicPartition("flinktestkafka",2),0L);
flinkKafkaConsumer.setStartFromSpecificOffsets(specificStartOfssets);
DataStreamSource<String> streamFromKafka = env.addSource(flinkKafkaConsumer);
streamFromKafka.map(new MapFunction<String, Object>() {
@Override
public Object map(String value) throws Exception {
String[] sp = value.split(",");
System.out.println(sp[0]+" "+sp[1]+" "+sp[2]);
return null;
}
});
try {
env.execute("this is kafkaflink job");
} catch (Exception e) {
e.printStackTrace();
}
}
}
这是我指定了分区2的消费,可以看见,写进入kafka的每一条消息,都可能会分到不同分区,主要还是根据key把,但是我这里似乎好像是随机的,这不是重点没事。
package flinkjava.Connecter.Kafka;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import javax.annotation.Nullable;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
public class KafkaFlinkTest1 {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// env.enableCheckpointing(5000);
//Kafka源
Properties properties = new Properties();
properties.setProperty("bootstrap.servers","114.116.219.197:5008");
properties.setProperty("group.id","KafkaFlinkTest1a");
FlinkKafkaConsumer<String> flinkKafkaConsumer = new FlinkKafkaConsumer<>("flinktestkafka8", new SimpleStringSchema(), properties);
// Map specificStartOfssets = new HashMap<>();
// specificStartOfssets.put(new KafkaTopicPartition("flinktestkafka4",0),0L);
// specificStartOfssets.put(new KafkaTopicPartition("flinktestkafka4",1),0L);
// specificStartOfssets.put(new KafkaTopicPartition("flinktestkafka4",2),0L);
// flinkKafkaConsumer.setStartFromSpecificOffsets(specificStartOfssets);
DataStreamSource<String> streamFromKafka = env.addSource(flinkKafkaConsumer);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
SingleOutputStreamOperator<KafkaElement> mapresult = streamFromKafka.map(new MapFunction<String, KafkaElement>() {
@Override
public KafkaElement map(String value) throws Exception {
String[] splits = value.split(",");
System.out.println(Thread.currentThread()+"===haha"+value);
return new KafkaElement(splits[0], splits[1], Long.valueOf(splits[2]));
}
});
SingleOutputStreamOperator<KafkaElement> watermarksstream = mapresult.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<KafkaElement>() {
Long currentMaxTimestamp = 0L;
Long maxOutOfOrderness = 5000L;
@Nullable
@Override
public Watermark getCurrentWatermark() {
return new Watermark(currentMaxTimestamp - maxOutOfOrderness);
}
@Override
public long extractTimestamp(KafkaElement element, long previousElementTimestamp) {
long timestamp = element.getAge();
currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
return timestamp;
}
});
/**迟到数据标志*/
OutputTag<KafkaElement> outputTag = new OutputTag<KafkaElement>("late-data"){};
SingleOutputStreamOperator<KafkaOut> winstream = watermarksstream.keyBy("id")
.window(TumblingEventTimeWindows.of(Time.seconds(3)))
.sideOutputLateData(outputTag)//j解决迟到数据
.apply(new WindowFunctionTest());
/**
* 迟到数据处理
* 这里打印的迟到数据应该是没有经过window和apply处理的初始数据
* */
DataStream<KafkaElement> sideOutput = winstream.getSideOutput(outputTag);
sideOutput.print();
/**
* 没有迟到数据处理
* */
winstream.map(new MapFunction<KafkaOut, Object>() {
@Override
public Object map(KafkaOut value) throws Exception {
System.out.println(value);
return null;
}
}).print();
try {
env.execute("this is kafkaflink job");
} catch (Exception e) {
e.printStackTrace();
}
}
//这是一个自定义的windown Function
static class WindowFunctionTest implements WindowFunction<KafkaElement,KafkaOut, Tuple, TimeWindow>{
/**
* 用来触发窗口的计算
* input作为所有这个窗口元素的迭代
* */
@Override
public void apply(Tuple tuple, TimeWindow window, Iterable<KafkaElement> input, Collector<KafkaOut> out) throws Exception {
System.out.println("string s是:"+tuple.toString());
Iterator<KafkaElement> iterators = input.iterator();
StringBuilder stringBuilder = new StringBuilder();
while(iterators.hasNext()){
KafkaElement tmpkafkaElement = iterators.next();
stringBuilder.append(tmpkafkaElement.getName());
}
out.collect(new KafkaOut(Integer.valueOf(tuple.getField(0)),new String(stringBuilder)));
}
}
}