DataX二次开发——(3)新增数据加密脱敏插件

一、背景

在数据同步的场景中,时常会有部分字段需要加密脱敏处理,但是DataX3.0是没有开发这个插件的,那我们可以开发一个EncryptTransformer来做数据的加密处理。目前开发的插件支持AES、RSA、SM4、MD5加密方式。

二、代码实现

1、在core模块下新增EncryptTransformer

DataX二次开发——(3)新增数据加密脱敏插件_第1张图片

package com.alibaba.datax.core.transport.transformer;

import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.element.StringColumn;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.core.util.AESUtils;
import com.alibaba.datax.core.util.MD5Utils;
import com.alibaba.datax.core.util.RSAUtils;
import com.alibaba.datax.core.util.SM4Utils;
import com.alibaba.datax.transformer.Transformer;
import org.apache.commons.lang3.StringUtils;

import java.util.Arrays;

/**
 * @Description: 加密转换器
 * @Author: chenweifeng
 * @Date: 2022年11月28日 上午9:13
 **/
public class EncryptTransformer extends Transformer {
    public EncryptTransformer() {
        super.setTransformerName("dx_encrypt");
    }


    @Override
    public Record evaluate(Record record, Object... paras) {

        int columnIndex;// 字段下标
        String encryptKey;// 加密key
        String encryptType;// 加密类型

        // 判断参数个数是否为3
        if (paras.length != 3) {
            throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_ILLEGAL_PARAMETER, "paras:" + Arrays.asList(paras).toString());
        }

        columnIndex = (Integer) paras[0];
        encryptType = (String) paras[1];
        encryptKey = (String) paras[2];

        // 判断加密类型
        if (!EncryptTypeEnum.checkEncryptType(encryptType)) {
            throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_ILLEGAL_PARAMETER, "加密类型错误->paras:" + Arrays.asList(paras).toString());
        }

        // 加密
        Column column = record.getColumn(columnIndex);
        try {
            String oriValue = column.asString();
            //如果字段为空,跳过,不进行解密操作
            if (StringUtils.isBlank(oriValue)) {
                return record;
            }

            String encryptValue = "error";
            if (EncryptTypeEnum.AES.toString().equals(encryptType)) {
                encryptValue = AESUtils.encrypt(oriValue, encryptKey);
            } else if (EncryptTypeEnum.SM4.toString().equals(encryptType)) {
                encryptValue = SM4Utils.encrypt(oriValue, encryptKey);
            } else if (EncryptTypeEnum.MD5.toString().equals(encryptType)) {
                encryptValue = MD5Utils.encrypt(oriValue, encryptKey);
            } else if (EncryptTypeEnum.RSA.toString().equals(encryptType)) {
                encryptValue = RSAUtils.encrypt(oriValue, encryptKey);
            }

            record.setColumn(columnIndex, new StringColumn(encryptValue));
        } catch (Exception e) {
            throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_RUN_EXCEPTION, e.getMessage(), e);
        }

        return record;
    }
}

2、把EncryptTransformer注册进去

DataX二次开发——(3)新增数据加密脱敏插件_第2张图片

 3、Pom文件新增依赖

        
            cn.hutool
            hutool-all
            5.6.3
        

        
        
            org.bouncycastle
            bcprov-jdk15on
            1.69
        

4、新增加密工具类

AES

package com.alibaba.datax.core.util;

import cn.hutool.core.util.HexUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.crypto.SecureUtil;

public class AESUtils {

    private static final String AES_KEY = "AES_KEY";

    private static final String DEFAULT_KEY = "xxxxxxxxxx";

    public static String encrypt(String content, String key) {
        if (StrUtil.isBlank(content)) {
            return content;
        }
        byte[] KEY_BYTES = getKeyBytes(key);
        return HexUtil.encodeHexStr(SecureUtil.aes(KEY_BYTES).encrypt(content), false);
    }

    private static byte[] getKeyBytes(String key) {
        if (StrUtil.isBlank(key)) {
            key = DEFAULT_KEY;
        }
        byte[] KEY_BYTES = new byte[16];
        int i = 0;
        for (byte b : key.getBytes()) {
            KEY_BYTES[i++ % 16] ^= b;
        }
        return KEY_BYTES;
    }

    public static String decrypt(String content) {
        if (StrUtil.isBlank(content)) {
            return content;
        }
        return SecureUtil.aes(getKeyBytes(null)).decryptStr(content);
    }

}

MD5

package com.alibaba.datax.core.util;

import java.security.MessageDigest;
import java.util.Objects;

public class MD5Utils {

    /**
     * 对消息进行MD5算法的信息摘要计算
     */
    public static String encrypt(byte[] data) {
        try {
            // 判断数据的合法性
            if (Objects.isNull(data)) {
                throw new RuntimeException("数据不能为NULL");
            }
            // 获取MD5算法
            MessageDigest md5 = MessageDigest.getInstance("MD5");
            // 加入要获取摘要的数据
            md5.update(data);
            // 获取数据的信息摘要
            byte[] resultBytes = md5.digest();
            // 将字节数组转化为16进制
            return fromBytesToHex(resultBytes);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }

    }

    /**
     * 对数据进行MD5消息的加密
     */
    public static String encrypt(String data) {
        try {
            // 判断数据的合法性
            if (Objects.isNull(data)) {
                throw new RuntimeException("数据不能为NULL");
            }
            // 获取MD5算法
            MessageDigest md5 = MessageDigest.getInstance("MD5");
            // 加入要获取摘要的数据
            md5.update(data.getBytes());
            // 获取数据的信息摘要
            byte[] resultBytes = md5.digest();
            // 将字节数组转化为16进制
            return fromBytesToHex(resultBytes);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }


    /**
     * 对数据,进行MD5算法的信息摘要计算,加入了salt
     *
     * @param data 数据的字节数组
     * @param salt 加入的盐
     * @return 返回String
     */
    public static String encrypt(byte[] data, Object salt) {
        try {
            // 将data和盐拼接
            String dataTemp = new String(data);
            String dataSalt = mergeDataAndSalt(dataTemp, salt);
            // 加入盐后,数据的信息摘要
            // 获取MD5算法
            MessageDigest md5 = MessageDigest.getInstance("MD5");
            // 加入要获取摘要的数据
            md5.update(dataSalt.getBytes());
            // 获取数据的信息摘要
            byte[] resultBytes = md5.digest();
            // 将字节数组转化为16进制
            return fromBytesToHex(resultBytes);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }

    }

    /**
     * 对数据,进行MD5算法的信息摘要计算,加入了salt
     *
     * @param data 数据的字符串形式
     * @param salt 加入的盐
     * @return 返回String
     */
    public static String encrypt(String data, Object salt) {
        try {
            // 将data和盐拼接
            String dataSalt = mergeDataAndSalt(data, salt);
            // 加入盐后,数据的信息摘要
            // 获取MD5算法
            MessageDigest md5 = MessageDigest.getInstance("MD5");
            // 加入要获取摘要的数据
            md5.update(dataSalt.getBytes());
            // 获取数据的信息摘要
            byte[] resultBytes = md5.digest();
            // 将字节数组转化为16进制
            return fromBytesToHex(resultBytes);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }

    }

    /**
     * 用于数据和salt的拼接
     *
     * @param data 要计算数据摘要的数据
     * @param salt 加入的盐
     * @return 返回string
     */
    private static String mergeDataAndSalt(String data, Object salt) {
        if (Objects.isNull(data)) {
            data = "";
        }

        if ((Objects.isNull(salt)) || "".equals(salt)) {
            return data;
        } else {
            return data + "{" + salt + "}";
        }

    }

    /**
     * 将给定的字节数组,转化为16进制数据
     */
    private static String fromBytesToHex(byte[] resultBytes) {
        StringBuilder builder = new StringBuilder();
        for (byte resultByte : resultBytes) {
            if (Integer.toHexString(0xFF & resultByte).length() == 1) {
                builder.append("0").append(
                        Integer.toHexString(0xFF & resultByte));
            } else {
                builder.append(Integer.toHexString(0xFF & resultByte));
            }
        }
        return builder.toString();
    }

    //测试加密算法是否正常
    public static void main(String[] args) {
        String data = "test";
        String data1 = "test1";

        String s = MD5Utils.encrypt(data);
        String s1 = MD5Utils.encrypt(data);
        String s2 = MD5Utils.encrypt(data1);
        System.out.println(s);
        System.out.println(s1);
        System.out.println(s2);

        System.out.println("-------------加盐后的-------------------");
        String s3 = MD5Utils.encrypt(data, "22");
        String s4 = MD5Utils.encrypt(data, "22");
        System.out.println(s3);
        System.out.println(s4);
    }
}



RSA

package com.alibaba.datax.core.util;

import org.apache.commons.codec.binary.Base64;

import javax.crypto.BadPaddingException;
import javax.crypto.Cipher;
import javax.crypto.IllegalBlockSizeException;
import javax.crypto.NoSuchPaddingException;
import java.nio.charset.StandardCharsets;
import java.security.InvalidKeyException;
import java.security.KeyFactory;
import java.security.NoSuchAlgorithmException;
import java.security.PrivateKey;
import java.security.PublicKey;
import java.security.spec.InvalidKeySpecException;
import java.security.spec.PKCS8EncodedKeySpec;
import java.security.spec.X509EncodedKeySpec;


public class RSAUtils {

    /**
     * 公钥
     */
    public static String DEFAULT_PUBLIC_KEY = "rsapublickey";

    /**
     * 加密方法
     */
    private final static String ALGORITHM = "RSA";

    /**
     * 获取默认公钥
     */
    public static PublicKey getPublicKey() throws NoSuchAlgorithmException, InvalidKeySpecException {
        X509EncodedKeySpec x509EncodedKeySpec = new X509EncodedKeySpec(Base64.decodeBase64(DEFAULT_PUBLIC_KEY));
        KeyFactory keyFactory = KeyFactory.getInstance(ALGORITHM);
        return keyFactory.generatePublic(x509EncodedKeySpec);
    }

    /**
     * 获取指定公钥
     *
     * @return
     * @throws NoSuchAlgorithmException
     * @throws InvalidKeySpecException
     */
    public static PublicKey getPublicKey(String pubKey) throws NoSuchAlgorithmException, InvalidKeySpecException {
        X509EncodedKeySpec x509EncodedKeySpec = new X509EncodedKeySpec(Base64.decodeBase64(pubKey));
        KeyFactory keyFactory = KeyFactory.getInstance(ALGORITHM);
        return keyFactory.generatePublic(x509EncodedKeySpec);
    }


    /**
     * 加密
     *
     * @param content 明文字符串
     * @return
     * @throws NoSuchPaddingException
     * @throws NoSuchAlgorithmException
     * @throws InvalidKeyException
     * @throws IllegalBlockSizeException
     * @throws BadPaddingException
     * @throws InvalidKeySpecException
     */
    public static String encrypt(String content) throws NoSuchPaddingException, NoSuchAlgorithmException, InvalidKeyException, IllegalBlockSizeException, BadPaddingException, InvalidKeySpecException {
        PublicKey publicKey = getPublicKey();
        Cipher cipher = Cipher.getInstance(ALGORITHM);
        cipher.init(Cipher.ENCRYPT_MODE, publicKey);
        return new String(Base64.encodeBase64(cipher.doFinal(content.getBytes(StandardCharsets.UTF_8))));
    }


    /**
     * 加密
     *
     * @param content 明文字符串
     * @return
     * @throws NoSuchPaddingException
     * @throws NoSuchAlgorithmException
     * @throws InvalidKeyException
     * @throws IllegalBlockSizeException
     * @throws BadPaddingException
     * @throws InvalidKeySpecException
     */
    public static String encrypt(String content, String pubKey) throws NoSuchPaddingException, NoSuchAlgorithmException, InvalidKeyException, IllegalBlockSizeException, BadPaddingException, InvalidKeySpecException {
        PublicKey publicKey = getPublicKey(pubKey);
        Cipher cipher = Cipher.getInstance(ALGORITHM);
        cipher.init(Cipher.ENCRYPT_MODE, publicKey);
        return new String(Base64.encodeBase64(cipher.doFinal(content.getBytes(StandardCharsets.UTF_8))));
    }

    /**
     * 获取私钥
     *
     * @return
     * @throws NoSuchAlgorithmException
     * @throws InvalidKeySpecException
     */
    public static PrivateKey getPrivateKey(String privateKey) throws NoSuchAlgorithmException, InvalidKeySpecException {
        PKCS8EncodedKeySpec pics8EncodedKeySpec5 = new PKCS8EncodedKeySpec(Base64.decodeBase64(privateKey));
        KeyFactory keyFactory = KeyFactory.getInstance(ALGORITHM);
        return keyFactory.generatePrivate(pics8EncodedKeySpec5);
    }

    /**
     * 解密
     *
     * @param content 加密字符串
     * @param priKey  私钥
     * @return
     * @throws NoSuchPaddingException
     * @throws NoSuchAlgorithmException
     * @throws InvalidKeyException
     * @throws IllegalBlockSizeException
     * @throws BadPaddingException
     * @throws InvalidKeySpecException
     */
    public static String decrypt(String content, String priKey) throws NoSuchPaddingException, NoSuchAlgorithmException, InvalidKeyException, IllegalBlockSizeException, BadPaddingException, InvalidKeySpecException {
        PrivateKey privateKey = getPrivateKey(priKey);
        Cipher cipher = Cipher.getInstance(ALGORITHM);
        cipher.init(Cipher.DECRYPT_MODE, privateKey);
        return new String(cipher.doFinal(Base64.decodeBase64(content)));
    }

    public static void main(String[] args) throws NoSuchPaddingException, NoSuchAlgorithmException, IllegalBlockSizeException, BadPaddingException, InvalidKeyException, InvalidKeySpecException {
        String enc = encrypt("1", "MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCyXV5eYVYxJfFS78AdVGgUX80nrRXId2OpUJ5pFeN12Bon0Zq2sjPyWViwNWRjVyPoKru9xmKzGPLS4UO7J3mBfKzHSqoGLgYML2C7aDc/EXPDp/6pErLLgl180cjhNTR14Xl3hQW5Dr8V5M6BukcuztdtOUQ/CMOmVkFeDjDwewIDAQAB");
        System.out.println(enc);
        String dec = decrypt(enc, "MIICdQIBADANBgkqhkiG9w0BAQEFAASCAl8wggJbAgEAAoGBALJdXl5hVjEl8VLvwB1UaBRfzSetFch3Y6lQnmkV43XYGifRmrayM/JZWLA1ZGNXI+gqu73GYrMY8tLhQ7sneYF8rMdKqgYuBgwvYLtoNz8Rc8On/qkSssuCXXzRyOE1NHXheXeFBbkOvxXkzoG6Ry7O1205RD8Iw6ZWQV4OMPB7AgMBAAECgYBlgzQ5POakNDcDf8X0K1zM87U8NosmO6Sx8Vsm7bk2EZbZQLbv8b4U2Prb1pda3nZmJSQSnvz/d1yD3ocA1jeVIVUUEPhVPVVNFkF3Cq0I5Y/USAJFJAQjJRO71pwEO116PwDLmAVKwUFnhq7QKvvFMAq9kYSCrVQLnV3oDF+Z+QJBAOoOFU5ocGVSRCk+D3ub7vTm9nkIvIGjvQIzJV8l8Iz9S5PX1kVh7VF4pUJjVUsdB9gmmsJm//5QyvhbujULvL0CQQDDFpOUdtuYMAqTN4HJ3QUMdsVYs0yscu66bhrlYziJEjVNVD9H6Twm8Z20xqvbDT3955oe1UF/wP2pH7AzVWGXAkAlrmTBvdCyjz8IAMTPWrhq1zBBJiXrHShVzVQr8Fi86h6by7jkbLKfVjQM7x8RO7hClVx/BcQa9njm6SWeWqhlAkBQT2PzkzZ1IRnJXulUE2N3rREyoaUnvKygHKF/2ue0LcW5jrv6O36ivYZhnAO1Sm9Bre0ZUksDb4w86imcCEMDAkAdC7dQQPCElFsVIcIaw639LqPshy0J98t67z5NleTOnx1KvhpoG+7O92+PLNUWH+LmkiLuVRe0LNRY2vVVOk93");
        System.out.println(dec);
    }
}

SM4

package com.alibaba.datax.core.util;

import org.bouncycastle.jce.provider.BouncyCastleProvider;
import org.bouncycastle.pqc.math.linearalgebra.ByteUtils;

import javax.crypto.Cipher;
import javax.crypto.KeyGenerator;
import javax.crypto.spec.SecretKeySpec;
import java.security.Key;
import java.security.NoSuchAlgorithmException;
import java.security.NoSuchProviderException;
import java.security.SecureRandom;
import java.security.Security;
import java.util.Arrays;



public class SM4Utils {
    static {
        Security.addProvider(new BouncyCastleProvider());
    }

    private static final String ENCODING = "UTF-8";
    public static final String ALGORITHM_NAME = "SM4";
    // 加密算法/分组加密模式/分组填充方式
    // PKCS5Padding-以8个字节为一组进行分组加密
    // 定义分组加密模式使用:PKCS5Padding
    public static final String ALGORITHM_NAME_ECB_PADDING = "SM4/ECB/PKCS5Padding";
    // 128-32位16进制;256-64位16进制
    public static final int DEFAULT_KEY_SIZE = 128;

    /**
     * 生成ECB暗号
     *
     * @param algorithmName 算法名称
     * @param mode          模式
     * @param key
     * @return
     * @throws Exception
     * @explain ECB模式(电子密码本模式:Electronic codebook)
     */
    private static Cipher generateEcbCipher(String algorithmName, int mode, byte[] key) throws Exception {
        Cipher cipher = Cipher.getInstance(algorithmName, BouncyCastleProvider.PROVIDER_NAME);
        Key sm4Key = new SecretKeySpec(key, ALGORITHM_NAME);
        cipher.init(mode, sm4Key);
        return cipher;
    }

    /**
     * 自动生成密钥
     *
     * @return
     * @throws NoSuchAlgorithmException
     * @throws NoSuchProviderException
     * @explain
     */
    public static byte[] generateKey() throws Exception {
        return generateKey(DEFAULT_KEY_SIZE);
    }


    /**************加密****************/
    /**
     * @param keySize
     * @return
     * @throws Exception
     * @explain 系统产生秘钥
     */
    public static byte[] generateKey(int keySize) throws Exception {
        KeyGenerator kg = KeyGenerator.getInstance(ALGORITHM_NAME, BouncyCastleProvider.PROVIDER_NAME);
        kg.init(keySize, new SecureRandom());
        return kg.generateKey().getEncoded();
    }

    /**
     * sm4加密
     *
     * @param content 待加密字符串
     * @param hexKey  16进制密钥(忽略大小写)
     * @return 返回16进制的加密字符串
     * @throws Exception
     * @explain 加密模式:ECB 密文长度不固定,会随着被加密字符串长度的变化而变化
     */
    public static String encrypt(String content, String hexKey) throws Exception {
        String cipherText = "";
        // 16进制字符串-->byte[]
        byte[] keyData = ByteUtils.fromHexString(hexKey);
        // String-->byte[]
        byte[] srcData = content.getBytes(ENCODING);
        // 加密后的数组
        byte[] cipherArray = encryptEcbPadding(keyData, srcData);
        // byte[]-->hexString
        cipherText = ByteUtils.toHexString(cipherArray);
        return cipherText;
    }

    /**
     * 加密模式之Ecb
     *
     * @param key
     * @param data
     * @return
     * @throws Exception
     */
    public static byte[] encryptEcbPadding(byte[] key, byte[] data) throws Exception {
        Cipher cipher = generateEcbCipher(ALGORITHM_NAME_ECB_PADDING, Cipher.ENCRYPT_MODE, key);//声称Ecb暗号,通过第二个参数判断加密还是解密
        return cipher.doFinal(data);
    }


    /**************解密****************/
    /**
     * sm4解密
     *
     * @param hexKey     16进制密钥
     * @param cipherText 16进制的加密字符串(忽略大小写)
     * @return 解密后的字符串
     * @throws Exception
     * @explain 解密模式:采用ECB
     */
    public static String decryptEcb(String hexKey, String cipherText) throws Exception {
        // 用于接收解密后的字符串
        String decryptStr = "";
        // hexString-->byte[]
        byte[] keyData = ByteUtils.fromHexString(hexKey);
        // hexString-->byte[]
        byte[] cipherData = ByteUtils.fromHexString(cipherText);
        // 解密
        byte[] srcData = decryptEcbPadding(keyData, cipherData);
        // byte[]-->String
        decryptStr = new String(srcData, ENCODING);
        return decryptStr;
    }

    /**
     * 解密
     *
     * @param key
     * @param cipherText
     * @return
     * @throws Exception
     * @explain
     */
    public static byte[] decryptEcbPadding(byte[] key, byte[] cipherText) throws Exception {
        Cipher cipher = generateEcbCipher(ALGORITHM_NAME_ECB_PADDING, Cipher.DECRYPT_MODE, key);//生成Ecb暗号,通过第二个参数判断加密还是解密
        return cipher.doFinal(cipherText);
    }

    /**
     * 校验加密前后的字符串是否为同一数据
     *
     * @param hexKey     16进制密钥(忽略大小写)
     * @param cipherText 16进制加密后的字符串
     * @param paramStr   加密前的字符串
     * @return 是否为同一数据
     * @throws Exception
     * @explain
     */
    public static boolean verifyEcb(String hexKey, String cipherText, String paramStr) throws Exception {
        // 用于接收校验结果
        boolean flag = false;
        // hexString-->byte[]
        byte[] keyData = ByteUtils.fromHexString(hexKey);
        // 将16进制字符串转换成数组
        byte[] cipherData = ByteUtils.fromHexString(cipherText);
        // 解密
        byte[] decryptData = decryptEcbPadding(keyData, cipherData);
        // 将原字符串转换成byte[]
        byte[] srcData = paramStr.getBytes(ENCODING);
        // 判断2个数组是否一致
        flag = Arrays.equals(decryptData, srcData);
        return flag;
    }

}

5、打包后运行,Job模板如下

{
	"job": {
		"setting": {
			"speed": {
				"channel": 3
			},
			"errorLimit": {
				"record": 0,
				"percentage": 0.02
			}
		},
		"content": [
			{
				"reader": {
					"name": "mysqlreader",
					"parameter": {
						"username": "root",
						"password": "xxxx",
						"column": [
							"姓名",
							"年龄",
							"专业",
							"部门"
						],
						"connection": [
							{
								"table": [
									"人员"
								],
								"jdbcUrl": [
									"jdbc:mysql://100.xxx.xxx.xxx:3306/test?useSSL=false&allowPublicKeyRetrieval=true&useUnicode=true&characterEncoding=UTF-8&serverTimezone=Asia/Shanghai"
								]
							}
						]
					}
				},
				"transformer":[
					{
						"name":"dx_encrypt",
						"parameter":{
							"columnIndex":0,
							"paras":[
                                "AES"
								"key"
							]
						}
					}
				],
				"writer": {
					"name": "postgresqlwriter",
					"parameter": {
						"print":true,
						"column": [
							"姓名",
							"年龄",
							"专业",
							"部门"
						],
						"connection": [
							{
								"jdbcUrl": "jdbc:postgresql://100.xxx.xxx.xxx:5432/人力数据0727",
								"table": ["人员"]
							}
						],
						"password": "xxxx",
						"username": "postgres"
					}
				}
			}
		]
	}
}

6、执行结果

DataX二次开发——(3)新增数据加密脱敏插件_第3张图片

你可能感兴趣的:(大数据,datax,数据同步,ETL,大数据)