Flink自定义format,实现解析kafka自定义格式的cdc数据

根据业务需求,kafka的数据为cdc数据,而数据格式是公司自定义的,非json类型数据,而Flink的kafka source已经支持format接口,因此只需开发自己的format即可。

OggDeserializationFactory

package cn.com.xx.cdc.connectors.deserialize;

import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.ConfigOptions;
import org.apache.flink.configuration.ReadableConfig;
import org.apache.flink.table.connector.ChangelogMode;
import org.apache.flink.table.connector.format.DecodingFormat;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.factories.DeserializationFormatFactory;
import org.apache.flink.table.factories.DynamicTableFactory;
import org.apache.flink.table.factories.FactoryUtil;
import org.apache.flink.table.types.DataType;
import org.apache.flink.table.types.logical.RowType;
import org.apache.flink.types.RowKind;

import java.util.Collections;
import java.util.HashSet;
import java.util.Set;

public class OggDeserializationFactory implements DeserializationFormatFactory {

	private static final ConfigOption<String> DELIMITER = ConfigOptions.key("delimiter")
			.stringType()
			.defaultValue(",")
			.withDescription("Delimiter of the ogg data");

	@Override
	public String factoryIdentifier() {
		return "ogg";
	}

	@Override
	public Set<ConfigOption<?>> requiredOptions() {
		return Collections.emptySet();
	}

	@Override
	public Set<ConfigOption<?>> optionalOptions() {
		Set<ConfigOption<?>> options = new HashSet<>();
		options.add(DELIMITER);
		return options;
	}

	@Override
	public DecodingFormat<DeserializationSchema<RowData>> createDecodingFormat(DynamicTableFactory.Context context, ReadableConfig formatOptions) {
		FactoryUtil.validateFactoryOptions(this,formatOptions);
		final String delimiter = formatOptions.get(DELIMITER);
//		return new OggFormat(first_delimiter,second_delimiter);
		return new DecodingFormat<DeserializationSchema<RowData>>() {
			@SuppressWarnings("unchecked")
			@Override
			public DeserializationSchema<RowData> createRuntimeDecoder(
					DynamicTableSource.Context context, DataType producedDataType) {
				final RowType rowType = (RowType) producedDataType.getLogicalType();
				final TypeInformation<RowData> rowDataTypeInfo =
						(TypeInformation<RowData>) context.createTypeInformation(producedDataType);
				return new OggDeserialization(
						producedDataType.getLogicalType().getChildren(),
						context.createDataStructureConverter(producedDataType),
						rowDataTypeInfo,
						delimiter);
			}

			@Override
			public ChangelogMode getChangelogMode() {
				return ChangelogMode.newBuilder()
						.addContainedKind(RowKind.INSERT)
						.addContainedKind(RowKind.UPDATE_BEFORE)
						.addContainedKind(RowKind.UPDATE_AFTER)
						.addContainedKind(RowKind.DELETE)
						.build();
			}
		};
	}
}

OggDeserialization

package cn.com.xxx.cdc.connectors.deserialize;

import org.apache.commons.lang.StringUtils;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.table.connector.RuntimeConverter;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.table.connector.source.DynamicTableSource.DataStructureConverter;
import org.apache.flink.table.types.logical.LogicalTypeRoot;
import org.apache.flink.types.Row;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector;

import java.io.IOException;
import java.util.List;


public class OggDeserialization implements DeserializationSchema<RowData> {
	private static final long serialVersionUID = 34225227829409419L;

	private final List<LogicalType> parsingTypes;

	private final DataStructureConverter converter;

	private final TypeInformation<RowData> producedTypeInfo;

	private final String delimiter;


	public OggDeserialization(List<LogicalType> parsingTypes,
							  DataStructureConverter converter,
							  TypeInformation<RowData> producedTypeInfo,
							  String delimiter) {
		this.parsingTypes = parsingTypes;
		this.converter = converter;
		this.producedTypeInfo = producedTypeInfo;
		this.delimiter = delimiter;

	}



	@Override
	public boolean isEndOfStream(RowData nextElement) {
		return false;
	}

	@Override
	public TypeInformation<RowData> getProducedType() {
		// return the type information required by Flink's core interfaces
		return producedTypeInfo;
	}

	@Override
	public void open(InitializationContext context) throws Exception {
		converter.open(RuntimeConverter.Context.create(OggDeserialization.class.getClassLoader()));
	}

	@Override
	public RowData deserialize(byte[] message) throws IOException {
		throw new RuntimeException(
				"Please invoke DeserializationSchema#deserialize(byte[], Collector) instead.");
	}


	@Override
	public void deserialize(byte[] message, Collector<RowData> out) throws IOException {
	//这部分逻辑自己写,这里只是写出了怎么把row转成rowdata
		final String[] columns = StringUtils.splitPreserveAllTokens(new String(message),"delimiter");
			RowKind kind = parseRowKind(columns[0]);
			Row row = new Row(kind, parsingTypes.size());
			for(int i = 1; i < columns.length; i++){
	row.setField(i,parse(parsingTypes.get(Integer.parseInt(i)).getTypeRoot(),columns[i]));
			}
			out.collect((RowData) converter.toInternal(row));
	}


	private static Object parse(LogicalTypeRoot root, String value) {
		switch (root) {
			case INTEGER:
				return Integer.parseInt(value);
			case VARCHAR:
				return value;
			case BIGINT:
				return Long.valueOf(value);
			default:
				throw new IllegalArgumentException();
		}
	}

	private static RowKind parseRowKind(String op) {
		switch (op) {
			case "I":
				return RowKind.INSERT;
			case "U":
				return RowKind.UPDATE_AFTER;
			case "D":
				return RowKind.DELETE;
			default:
				throw new UnsupportedOperationException("Unsupported operation '" + op + "' for row kind.");
		}
	}
}

pom如下,因为不止是这个程序所需要的依赖,所以自己取舍

    <properties>
        <flink.version>1.11.2</flink.version>
        <debezium.version>1.3.0.Final</debezium.version>
        <scala.binary.version>2.11</scala.binary.version>
        <hadoop.version>2.7.7</hadoop.version>
    </properties>
    <dependencies>
        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <!-- Debezium dependencies -->
        <dependency>
            <groupId>com.alibaba.ververica</groupId>
            <artifactId>flink-connector-debezium</artifactId>
            <version>1.1.0</version>
            <exclusions>
                <exclusion>
                    <artifactId>kafka-log4j-appender</artifactId>
                    <groupId>org.apache.kafka</groupId>
                </exclusion>
            </exclusions>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-json -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-json</artifactId>
            <version>${flink.version}</version>
            <scope>test</scope>
        </dependency>


        <dependency>
            <groupId>io.debezium</groupId>
            <artifactId>debezium-connector-mysql</artifactId>
            <version>${debezium.version}</version>
        </dependency>
        <dependency>
            <groupId>io.debezium</groupId>
            <artifactId>debezium-connector-oracle</artifactId>
            <version>${debezium.version}</version>
            <type>pom</type>
        </dependency>




        <dependency>
            <groupId>io.debezium</groupId>
            <artifactId>debezium-core</artifactId>
            <version>${debezium.version}</version>
            <type>test-jar</type>
            <scope>test</scope>
        </dependency>




        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-test-utils_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <scope>test</scope>
        </dependency>


        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-core</artifactId>
            <version>${flink.version}</version>
            <type>test-jar</type>
            <scope>test</scope>
        </dependency>


        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <type>test-jar</type>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-common</artifactId>
            <version>${flink.version}</version>
            <type>test-jar</type>
            <scope>test</scope>
        </dependency>


        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-tests</artifactId>
            <version>${flink.version}</version>
            <type>test-jar</type>
            <scope>test</scope>
        </dependency>


        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-blink_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <type>test-jar</type>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>com.alibaba.ververica</groupId>
            <artifactId>flink-connector-test-util</artifactId>
            <version>1.1.0</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-common</artifactId>
            <version>${flink.version}</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-java-bridge_2.12</artifactId>
            <version>${flink.version}</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-json</artifactId>
            <version>${flink.version}</version>
            <scope>compile</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
            <exclusions>
                <exclusion>
                    <artifactId>commons-logging</artifactId>
                    <groupId>commons-logging</groupId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.47</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/commons-lang/commons-lang -->
        <dependency>
            <groupId>commons-lang</groupId>
            <artifactId>commons-lang</artifactId>
            <version>2.6</version>
        </dependency>


    </dependencies>

你可能感兴趣的:(flink,flink,java)