mysql decimal 类型,通过sqoop 导入到hdfs 过程中,当decimal 精度小于等于18时,spark 无法解析
avro fix 类型 需要byte 位数 根据decimal 的精度确定
public static final int[] PRECISION_TO_BYTE_COUNT = new int[38];
// 例如有4位精度,则Math.log(Math.pow(10.0D, (double)prec) - 1.0D) / Math.log(2.0D)等于log2(9999), + 1.0D 表示加一个符号位,总共需要的位数除以8,得到byte位数
// 数学真的很重要
for(int prec = 1; prec <= 38; ++prec) {
PRECISION_TO_BYTE_COUNT[prec - 1] = (int)Math.ceil((Math.log(Math.pow(10.0D, (double)prec) - 1.0D) / Math.log(2.0D) + 1.0D) / 8.0D);
}
为了spark解析代码,当decimal精度小于18时,试用fix类型
public Schema toAvroSchema(int sqlType, String columnName, Integer precision, Integer scale) {
List childSchemas = new ArrayList();
childSchemas.add(Schema.create(Schema.Type.NULL));
if (this.options.getConf().getBoolean("sqoop.avro.logical_types.decimal.enable", false) || isLogicalType(sqlType)) {
if (precision > 18) {
childSchemas.add(this.toAvroLogicalType(columnName, sqlType, precision, scale).addToSchema(Schema.create(Schema.Type.BYTES)));
}
// 为了spark 可以解析,添加的代码
else {
childSchemas.add(this.toAvroLogicalType(columnName, sqlType, precision, scale).addToSchema(Schema.createFixed(columnName, (String)null, (String)null, AvroSchemaGeneratorOverride.PRECISION_TO_BYTE_COUNT[precision - 1])));
}
} else {
childSchemas.add(Schema.create(this.toAvroType(columnName, sqlType)));
}
return Schema.createUnion(childSchemas);
}
@Override
public ByteBuffer toBytes(BigDecimal value, Schema schema, LogicalType type) {
int scale = ((LogicalTypes.Decimal) type).getScale();
if (scale != value.scale()) {
throw new AvroTypeException("Cannot encode decimal with scale " +
value.scale() + " as scale " + scale);
}
return ByteBuffer.wrap(value.unscaledValue().toByteArray());
}
@Override
public BigDecimal fromFixed(GenericFixed value, Schema schema, LogicalType type) {
int scale = ((LogicalTypes.Decimal) type).getScale();
return new BigDecimal(new BigInteger(value.bytes()), scale);
}
@Override
public GenericFixed toFixed(BigDecimal value, Schema schema, LogicalType type) {
int scale = ((LogicalTypes.Decimal) type).getScale();
if (scale != value.scale()) {
throw new AvroTypeException("Cannot encode decimal with scale " +
value.scale() + " as scale " + scale);
}
// byte 范围 -128 ~ 127 有符号
// byte oxff = -1
byte fillByte = (byte) (value.signum() < 0 ? 0xFF : 0x00);
// toByteArray方法(使用的时补码):等于127,{127}; 等于128,{0,-128};等于255,{0,-1};等于256,{1,0}
byte[] unscaled = value.unscaledValue().toByteArray();
byte[] bytes = new byte[schema.getFixedSize()];
int offset = bytes.length - unscaled.length;
for (int i = 0; i < bytes.length; i += 1) {
if (i < offset) {
bytes[i] = fillByte;
// 正数
} else {
bytes[i] = unscaled[i - offset];
}
}
return new GenericData.Fixed(schema, bytes);
}
3.1 avro schema
{
"namespace": "com.wqb.hdfs.avro",
"type": "record",
"name": "User",
"fields": [
{"name": "name", "type": "string"},
{"name": "id", "type": "int"},
{"name": "salary", "type": "int"},
{"name": "age", "type": "int"},
{"name": "address", "type": "string"}
]
}
3.2 avro 逻辑类型 schema
{
"type": "bytes",
"logicalType": "decimal",
"precision": 4,
"scale": 2
}
3.3 avro fixed 类型
{
"type": "record",
"name": "HandshakeRequest", "namespace":"org.apache.avro.ipc",
"fields": [
{"name": "clientHash",
"type": {"type": "fixed", "name": "MD5", "size": 16}},
{"name": "clientProtocol", "type": ["null", "string"]},
{"name": "serverHash", "type": "MD5"},
{"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}
]
}
{
"type": "record",
"name": "HandshakeResponse", "namespace": "org.apache.avro.ipc",
"fields": [
{"name": "match",
"type": {"type": "enum", "name": "HandshakeMatch",
"symbols": ["BOTH", "CLIENT", "NONE"]}},
{"name": "serverProtocol",
"type": ["null", "string"]},
{"name": "serverHash",
"type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]},
{"name": "meta",
"type": ["null", {"type": "map", "values": "bytes"}]}
]
}