Flink消费kafka写入hive,报错GC

报错信息

Flink消费kafka写入hive,报错GC。Taskmanager内存加到16GB。不起作用。

java.lang.OutOfMemoryError: GC overhead limit exceeded
	at java.nio.HeapByteBuffer.<init>(HeapByteBuffer.java:57) ~[?:1.8.0_181]
	at java.nio.ByteBuffer.allocate(ByteBuffer.java:335) ~[?:1.8.0_181]
	at org.apache.hive.orc.impl.OutStream.getNewInputBuffer(OutStream.java:109) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hive.orc.impl.OutStream.write(OutStream.java:142) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at com.google.protobuf.CodedOutputStream.refreshBuffer(CodedOutputStream.java:833) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at com.google.protobuf.CodedOutputStream.flush(CodedOutputStream.java:843) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at com.google.protobuf.AbstractMessageLite.writeTo(AbstractMessageLite.java:80) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hive.orc.impl.PhysicalFsWriter.writeIndexStream(PhysicalFsWriter.java:512) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hive.orc.impl.WriterImpl$StreamFactory.writeIndex(WriterImpl.java:221) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hive.orc.impl.WriterImpl$TreeWriter.writeStripe(WriterImpl.java:531) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hive.orc.impl.WriterImpl$StringBaseTreeWriter.writeStripe(WriterImpl.java:1007) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hive.orc.impl.WriterImpl$StructTreeWriter.writeStripe(WriterImpl.java:1786) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hive.orc.impl.WriterImpl.flushStripe(WriterImpl.java:2171) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hive.orc.impl.WriterImpl.close(WriterImpl.java:2335) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hadoop.hive.ql.io.orc.WriterImpl.close(WriterImpl.java:330) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:120) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.connectors.hive.write.HiveBulkWriterFactory$1.finish(HiveBulkWriterFactory.java:79) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.formats.hadoop.bulk.HadoopPathBasedPartFileWriter.closeForCommit(HadoopPathBasedPartFileWriter.java:71) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.api.functions.sink.filesystem.Bucket.closePartFile(Bucket.java:262) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.api.functions.sink.filesystem.Bucket.prepareBucketForCheckpointing(Bucket.java:304) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.api.functions.sink.filesystem.Bucket.onReceptionOfCheckpoint(Bucket.java:276) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.api.functions.sink.filesystem.Buckets.snapshotActiveBuckets(Buckets.java:270) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.api.functions.sink.filesystem.Buckets.snapshotState(Buckets.java:261) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSinkHelper.snapshotState(StreamingFileSinkHelper.java:87) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.table.filesystem.stream.AbstractStreamingWriter.snapshotState(AbstractStreamingWriter.java:129) ~[flink-table_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.table.filesystem.stream.StreamingFileWriter.snapshotState(StreamingFileWriter.java:101) ~[flink-table_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.api.operators.StreamOperatorStateHandler.snapshotState(StreamOperatorStateHandler.java:219) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.api.operators.StreamOperatorStateHandler.snapshotState(StreamOperatorStateHandler.java:170) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.api.operators.AbstractStreamOperator.snapshotState(AbstractStreamOperator.java:348) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.runtime.tasks.RegularOperatorChain.checkpointStreamOperator(RegularOperatorChain.java:233) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.runtime.tasks.RegularOperatorChain.buildOperatorSnapshotFutures(RegularOperatorChain.java:206) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.runtime.tasks.RegularOperatorChain.snapshotState(RegularOperatorChain.java:186) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
java.lang.OutOfMemoryError: Java heap space
	at org.apache.hive.orc.impl.RunLengthIntegerWriterV2.<init>(RunLengthIntegerWriterV2.java:140) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hive.orc.impl.WriterImpl$TreeWriter.createIntegerWriter(WriterImpl.java:398) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hive.orc.impl.WriterImpl$IntegerTreeWriter.<init>(WriterImpl.java:745) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hive.orc.impl.WriterImpl.createTreeWriter(WriterImpl.java:2087) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hive.orc.impl.WriterImpl.access$1200(WriterImpl.java:88) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hive.orc.impl.WriterImpl$StructTreeWriter.<init>(WriterImpl.java:1720) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hive.orc.impl.WriterImpl.createTreeWriter(WriterImpl.java:2117) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hive.orc.impl.WriterImpl.<init>(WriterImpl.java:161) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hive.orc.impl.WriterImpl.<init>(WriterImpl.java:126) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hadoop.hive.ql.io.orc.WriterImpl.<init>(WriterImpl.java:94) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hadoop.hive.ql.io.orc.OrcFile.createWriter(OrcFile.java:314) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.write(OrcOutputFormat.java:101) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.connectors.hive.write.HiveBulkWriterFactory$1.addElement(HiveBulkWriterFactory.java:71) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.connectors.hive.write.HiveBulkWriterFactory$1.addElement(HiveBulkWriterFactory.java:51) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.formats.hadoop.bulk.HadoopPathBasedPartFileWriter.write(HadoopPathBasedPartFileWriter.java:59) ~[flink-sql-connector-hive-2.2.0_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.api.functions.sink.filesystem.Bucket.write(Bucket.java:222) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.api.functions.sink.filesystem.Buckets.onElement(Buckets.java:305) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSinkHelper.onElement(StreamingFileSinkHelper.java:103) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.table.filesystem.stream.AbstractStreamingWriter.processElement(AbstractStreamingWriter.java:140) ~[flink-table_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.runtime.tasks.CopyingChainingOutput.pushToOperator(CopyingChainingOutput.java:82) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.runtime.tasks.CopyingChainingOutput.collect(CopyingChainingOutput.java:57) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.runtime.tasks.CopyingChainingOutput.collect(CopyingChainingOutput.java:29) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.api.operators.CountingOutput.collect(CountingOutput.java:56) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.api.operators.CountingOutput.collect(CountingOutput.java:29) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at StreamExecCalc$85.processElement_split6(Unknown Source) ~[?:?]
	at StreamExecCalc$85.processElement(Unknown Source) ~[?:?]
	at org.apache.flink.streaming.runtime.tasks.CopyingChainingOutput.pushToOperator(CopyingChainingOutput.java:82) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.runtime.tasks.CopyingChainingOutput.collect(CopyingChainingOutput.java:57) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.runtime.tasks.CopyingChainingOutput.collect(CopyingChainingOutput.java:29) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.runtime.tasks.SourceOperatorStreamTask$AsyncDataOutputToOutput.emitRecord(SourceOperatorStreamTask.java:196) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.streaming.api.operators.source.SourceOutputWithWatermarks.collect(SourceOutputWithWatermarks.java:110) ~[flink-dist_2.11-1.14.2.jar:1.14.2]
	at org.apache.flink.connector.kafka.source.reader.KafkaRecordEmitter.emitRecord(KafkaRecordEmitter.java:36) ~[flink-sql-connector-kafka_2.11-1.14.2.jar:1.14.2]

解决方法

TaskManager内存加到16GB,仍然GC。根据任务情况,肯定不是内存设置的不足问题。
参考文章:

https://community.cloudera.com/t5/Support-Questions/I-am-getting-outofmemory-while-inserting-the-data-into-table/m-p/119682

原因是:写入的hive表分区数过多(分钟级),hive的格式是ORC。
ORC 编写器为每个输出文件保持一个缓冲区打开。因此,如果您严重加载到分区表,它们将保持大量内存打开。所以即使加到16GB也无济于事。
解决办法:将hive表的格式由ORC格式改成text格式,任务运行正常。

你可能感兴趣的:(hive,大数据,hadoop,flink)