public ParquetRecordWriter(
...){
internalWriter = new InternalParquetRecordWriter(w, writeSupport, schema,
extraMetaData, blockSize, pageSize, compressor, dictionaryPageSize, enableDictionary, validating, writerVersion);
}
public InternalParquetRecordWriter(
...
initStore()
...)
public void write(Void key, T value) throws IOException, InterruptedException {
internalWriter.write(value);
}
public void write(T value) throws IOException, InterruptedException {
writeSupport.write(value);
++ recordCount;
checkBlockSizeReached();
}
int initialBlockBufferSize = max(MINIMUM_BUFFER_SIZE, rowGroupSize / schema.getColumns().size() / 5);
int initialPageBufferSize = max(MINIMUM_BUFFER_SIZE, min(pageSize + pageSize / 10, initialBlockBufferSize));
初始化BlockBufferSize和PageBufferSize
pageStore = new ColumnChunkPageWriteStore(compressor, schema, initialBlockBufferSize);
columnStore = new ColumnWriteStoreImpl(pageStore, pageSize, initialPageBufferSize, dictionaryPageSize, enableDictionary, writerVersion);
初始化pageSWritertore和ColumWriterStore,分别含有PageWriter和ColumnWriter内部类进行数据写入操作
MessageColumnIO columnIO = new ColumnIOFactory(validating).getColumnIO(schema);
writeSupport.prepareForWrite(columnIO.getRecordWriter(columnStore));
初始化MessageColumnIO,传递schema信息,为数据写入做准备
validating值在ParquetRecordWrite初始化时被确定,默认为false,如下代码所示
public RecordWriter getRecordWriter(Configuration conf, Path file, CompressionCodecName codec)
throws IOException, InterruptedException {
...
boolean validating = getValidation(conf);
if (INFO) LOG.info("Validation is " + (validating ? "on" : "off"));
....
return new ParquetRecordWriter(
w,
writeSupport,
init.getSchema(),
init.getExtraMetaData(),
blockSize, pageSize,
codecFactory.getCompressor(codec, pageSize),
dictionaryPageSize,
enableDictionary,
validating,
writerVersion);
}
根据ColumnWriteStore获取RecordConsumer(数据消费者)
public RecordConsumer getRecordWriter(ColumnWriteStore columns) {
RecordConsumer recordWriter = new MessageColumnIORecordConsumer(columns);
if (DEBUG) recordWriter = new RecordConsumerLoggingWrapper(recordWriter);
return validating ? new ValidatingRecordConsumer(recordWriter, getType()) : recordWriter;
}
def write(record: Row): Unit = {
val attributesSize = attributes.size
if (attributesSize > record.size) {
throw new IndexOutOfBoundsException(
s"Trying to write more fields than contained in row (${attributesSize}>${record.size})")
}
var index = 0
writer.startMessage()
while(index < attributesSize) {
// null values indicate optional fields but we do not check currently
if (record(index) != null && record(index) != Nil) {
writer.startField(attributes(index).name, index)
consumeType(attributes(index).dataType, record, index)
writer.endField(attributes(index).name, index)
}
index = index + 1
}
writer.endMessage()
}
public void startField(String field, int index) {
try {
if (DEBUG) log("startField(" + field + ", " + index + ")");
currentColumnIO = ((GroupColumnIO)currentColumnIO).getChild(index);
emptyField = true;
if (DEBUG) printState();
} catch (RuntimeException e) {
throw new ParquetEncodingException("error starting field " + field + " at " + index, e);
}
}
consumeType主要定义如下:字符串当做binary处理,因为
private def consumeType(
ctype: DataType,
record: Row,
index: Int): Unit = {
ctype match {
case StringType => writer.addBinary(
Binary.fromByteArray(
record(index).asInstanceOf[String].getBytes("utf-8")
)
)
case BinaryType => writer.addBinary(
Binary.fromByteArray(record(index).asInstanceOf[Array[Byte]]))
case IntegerType => writer.addInteger(record.getInt(index))
case ShortType => writer.addInteger(record.getShort(index))
case LongType => writer.addLong(record.getLong(index))
case ByteType => writer.addInteger(record.getByte(index))
case DoubleType => writer.addDouble(record.getDouble(index))
case FloatType => writer.addFloat(record.getFloat(index))
case BooleanType => writer.addBoolean(record.getBoolean(index))
case d: DecimalType =>
if (d.precisionInfo == None || d.precisionInfo.get.precision > 18) {
sys.error(s"Unsupported datatype $d, cannot write to consumer")
}
writeDecimal(record(index).asInstanceOf[Decimal], d.precisionInfo.get.precision)
case _ => sys.error(s"Unsupported datatype $ctype, cannot write to consumer")
}
}
@Override
public void addInteger(int value) {
if (DEBUG) log("addInt(" + value + ")");
emptyField = false;
getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel());
setRepetitionLevel();
if (DEBUG) printState();
}
write定义如下:由dataColum写入实际的值
public void write(int value, int repetitionLevel, int definitionLevel) {
if (DEBUG) log(value, repetitionLevel, definitionLevel);
repetitionLevelColumn.writeInteger(repetitionLevel);
definitionLevelColumn.writeInteger(definitionLevel);
dataColumn.writeInteger(value);
updateStatistics(value);
accountForValueWritten();
}
public void endField(String field, int index) {
if (DEBUG) log("endField(" + field + ", " + index + ")");
currentColumnIO = currentColumnIO.getParent();
if (emptyField) {
throw new ParquetEncodingException("empty fields are illegal, the field should be ommited completely instead");
}
fieldsWritten[currentLevel].markWritten(index);
r[currentLevel] = currentLevel == 0 ? 0 : r[currentLevel - 1];
if (DEBUG) printState();
}
startField("A", 0)
addValue(1)
addValue(2)
endField("A", 0)
startField("B", 1)
startGroup()
startField("C", 0)
addValue(3)
endField("C", 0)
endGroup()
endField("B", 1)
{
A:[1,2]
B:{C:3}
}
private void checkBlockSizeReached() throws IOException {
if (recordCount >= recordCountForNextMemCheck) { // checking the memory size is relatively expensive, so let's not do it for every record.
long memSize = columnStore.memSize();
if (memSize > rowGroupSize) {
LOG.info(format("mem size %,d > %,d: flushing %,d records to disk.", memSize, rowGroupSize, recordCount));
flushRowGroupToStore();
initStore();
recordCountForNextMemCheck = min(max(MINIMUM_RECORD_COUNT_FOR_CHECK, recordCount / 2), MAXIMUM_RECORD_COUNT_FOR_CHECK);
} else {
float recordSize = (float) memSize / recordCount;
recordCountForNextMemCheck = min(
max(MINIMUM_RECORD_COUNT_FOR_CHECK, (recordCount + (long)(rowGroupSize / recordSize)) / 2), // will check halfway
recordCount + MAXIMUM_RECORD_COUNT_FOR_CHECK // will not look more than max records ahead
);
if (DEBUG) LOG.debug(format("Checked mem at %,d will check again at: %,d ", recordCount, recordCountForNextMemCheck));
}
}
}
private void flushRowGroupToStore()
throws IOException {
LOG.info(format("Flushing mem columnStore to file. allocated memory: %,d", columnStore.allocatedSize()));
if (columnStore.allocatedSize() > 3 * (long)rowGroupSize) {
LOG.warn("Too much memory used: " + columnStore.memUsageString());
}
if (recordCount > 0) {
parquetFileWriter.startBlock(recordCount);
columnStore.flush();
pageStore.flushToFileWriter(parquetFileWriter);
recordCount = 0;
parquetFileWriter.endBlock();
}
columnStore = null;
pageStore = null;
}
public void flush() {
Collection values = columns.values();
for (ColumnWriterImpl memColumn : values) {
memColumn.flush();
}
}
ColumnWriterImpl.flush实现:
public void flush() {
if (valueCount > 0) {
writePage();
}
final DictionaryPage dictionaryPage = dataColumn.createDictionaryPage();
if (dictionaryPage != null) {
if (DEBUG) LOG.debug("write dictionary");
try {
pageWriter.writeDictionaryPage(dictionaryPage);
} catch (IOException e) {
throw new ParquetEncodingException("could not write dictionary page for " + path, e);
}
dataColumn.resetDictionary();
}
}
private void writePage() {
if (DEBUG) LOG.debug("write page");
try {
pageWriter.writePage(
concat(repetitionLevelColumn.getBytes(), definitionLevelColumn.getBytes(), dataColumn.getBytes()),
valueCount,
statistics,
repetitionLevelColumn.getEncoding(),
definitionLevelColumn.getEncoding(),
dataColumn.getEncoding());
} catch (IOException e) {
throw new ParquetEncodingException("could not write page for " + path, e);
}
repetitionLevelColumn.reset();
definitionLevelColumn.reset();
dataColumn.reset();
valueCount = 0;
resetStatistics();
}