Flink处理kafka实时流数据

Flink处理kafka数据 之 分流操作

1.创建flink环境

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //重启次数和间隔
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 3 * 1000));
        env.disableOperatorChaining();
        //设置checkpoint间隔时间,单位ms
        env.enableCheckpointing(10000);

2.创建流

// 创建流
SingleOutputStreamOperator input = env.addSource(KafkaSourceMaker.make("kafka-source.yml", new SimpleStringSchema()))
                .name("kafkaSource")
                .setParallelism(1);

3.分流操作

//关注好友处理流程
DataStream dataStreamFlow = input.process(
                                    new ProcessFilterUser(RelationTypeEnum.FRIEND_FLOW)     //筛选好友变更
                                    ).setParallelism(1);
//通讯录变更处理流程
DataStream dataStreamPhonebook = input.process(
                                          new ProcessFilterUser(RelationTypeEnum.FRIEND_PHONEBOOK)  //筛选通讯录变更
                                          ).setParallelism(1);

4.存储到对应topic

//关注好友-pro
dataStreamFlow.addSink(setKafkaSinkNext("dataclean_friend_company_split_flowcompany")).name("kafka_sink_flow").setParallelism(1);
//通讯录-pro
dataStreamPhonebook.addSink(setKafkaSinkNext("dataclean_friend_company_split_phonebook")).name("kafka_sink_phonebook").setParallelism(1); 
//结果数据写入kafka

5.开启任务

env.execute("FriendCompanyStep1");

----------------------------------

ProcessFilterUser

/**
*根据数据类型进行判断,实现分流操作
*/
public class ProcessFilterUser extends ProcessFunction {
    private RelationTypeEnum relationTypeEnum;
    public ProcessFilterUser(RelationTypeEnum relationTypeEnum) {
        this.relationTypeEnum = relationTypeEnum;
    }

    @Override
    public void processElement(String str, Context context, Collector collector) {
       // log.info("msg:{}", str);
        UserCompanyKafka userCompanyKafka = JSON.parseObject(str, UserCompanyKafka.class);
        if (userCompanyKafka != null) {
            Integer type = null;
            //通讯录
            if (phoneCompany.contains(userCompanyKafka.getType())) {
                type = RelationTypeEnum.FRIEND_PHONEBOOK.getCode();
            }
            //关注好友
            if (flowCompany.contains(userCompanyKafka.getType())) {
                type = RelationTypeEnum.FRIEND_FLOW.getCode();
            }
            if (relationTypeEnum.getCode().equals(type)) {
                log.info("send to :{},msg:{}",relationTypeEnum.getDesc(),str);
                collector.collect(str);
            }
        }
    }

//通讯录
Set phoneCompany = Sets.newHashSet(TypeEnum.ADD.getCode(), TypeEnum.DELETE.getCode(), TypeEnum.UPDATE.getCode(),       TypeEnum.DELETE_ALL.getCode());
//关注公司
Set flowCompany = Sets.newHashSet(TypeEnum.FRIEND_FLOW.getCode(), TypeEnum.FRIEND_UN_FLOW.getCode(), TypeEnum.FRIEND_FLOW_UPDATE.getCode(), TypeEnum.FRIEND_FLOW_CHANGE.getCode());
}

1.首先接收kafka中的操作流数据
2.判断单条数据的操作类型,分为通讯录操作与好友操作
3.将不同类型的数据分类好,形成新的数据流,一份为对通讯录操作的数据流,一份为对好友操作的数据流

-----------------------------------------------------------------------------

-----------------------------------------------------------------------------

Flink处理kafka数据 之 数据处理

1.定义flink执行环境

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//重启次数和间隔
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 3 * 1000));
env.disableOperatorChaining();
//设置checkpoint间隔时间,单位ms
env.enableCheckpointing(120000);

2.创建输入流

// 创建流

SingleOutputStreamOperator input = env.addSource(KafkaSourceMaker.make("kafka-source.yml", new SimpleStringSchema()))
                .name("kafkaSource")
                .setParallelism(10);

3.处理流式数据

DataStream dataStream = input.process(new ProcessFilterUser()).setParallelism(10);

4.保存数据

dataStream.addSink(setKafkaSinkOld("Company_Dynamics")).name("kafka_sink").setParallelism(1); //结果数据写入kafka

5.启动执行

env.execute("JobKafkaToEs_FriendCompany");

KafkaSourceMaker

/**
 * kafka的源
 */
public class KafkaSourceMaker {
    private static final String TOPIC = "topic";
    private static final String BOOTSTRAP_SERVERS = "bootstrap.servers";
    private static final String GROUP_ID = "group.id";
    private static final Logger loggr = LoggerFactory.getLogger(KafkaSourceMaker.class);
    private static PropertyConfigMaker kafkaConfigMaker = new PropertyConfigMaker();
    public static  FlinkKafkaConsumer010 make(String path, DeserializationSchema valueDeserializer) throws IOException {
        Properties properties = kafkaConfigMaker.generate(path);
        properties.put("enable.auto.commit", "true");
        properties.put("retries", 3);
        checkNecessaryProperties(properties, TOPIC, BOOTSTRAP_SERVERS, GROUP_ID);
        // 获取topic名称,并且从properties中移除
        String topicName = properties.getProperty(TOPIC);
        return new FlinkKafkaConsumer010<>(topicName, valueDeserializer, properties);
    }

    /**
     * 检测属性是否存在
     */
    private static void checkNecessaryProperty(String key, Properties properties) {
        Preconditions.checkArgument(properties.containsKey(key), "%s not found in properties ", key);
    }
    private static void checkNecessaryProperties(Properties properties, String... keys) {
        for (String key : keys) {
            checkNecessaryProperty(key, properties);
        }
    }
}

ProcessFilterUser

@Slf4j
public class ProcessFilterUser extends ProcessFunction {
    @Override
    public void processElement(String str, Context context, Collector collector) throws Exception {
        log.info("关注好友:{}", str);
        long beginTime = System.currentTimeMillis();
        UserCompanyKafka userCompanyKafka = JSON.parseObject(str, UserCompanyKafka.class);
        CompanyService companyService = getCompanyService(userCompanyKafka.getType());
        //TODO 前置校验
        if (companyService == null) {
            log.error("type 不合法,{}", str);
            return;
        }
        List nextData = companyService.process(userCompanyKafka);
        log.info("ProcessFilterUser 执行耗时{}ms", System.currentTimeMillis() - beginTime);
        log.info("nextData size:{}", nextData.size());
        //传递给下个业务执行
        if (!CollectionUtils.isEmpty(nextData)) {
            for (String next : nextData) {
                if (!Strings.isNullOrEmpty(next)) {
                    collector.collect(next);
                }
            }
        }
    }
    //通讯录
    Set phoneCompany = Sets.newHashSet(TypeEnum.ADD.getCode(), TypeEnum.DELETE.getCode(), TypeEnum.UPDATE.getCode(), TypeEnum.DELETE_ALL.getCode());
    //关注公司
    Set flowCompany = Sets.newHashSet(TypeEnum.FRIEND_FLOW.getCode(), TypeEnum.FRIEND_UN_FLOW.getCode(), TypeEnum.FRIEND_FLOW_UPDATE.getCode(), TypeEnum.FRIEND_FLOW_CHANGE.getCode());

    CompanyService flowCompanyService = new FlowCompanyService();
    CompanyService phoneCompanyService = new PhoneCompanyService();

    private CompanyService getCompanyService(Integer type) {
        if (phoneCompany.contains(type))
            return phoneCompanyService;
        if (flowCompany.contains(type))
            return flowCompanyService;
        return null;
    }
}

1.接收kafka中的数据(操作数据:好友关注/取关,通讯录增删改)

2.解析单条数据,判断数据的类型,对该类型中各条目(可能一次性更改多个)进行处理,然后更新mysql中以及es中数据

3.紧接着将处理后的各条目以list(json)方式返回

4.最后将返回值推送到新的topic中

你可能感兴趣的:(Flink处理kafka实时流数据)