Hive任务参数优化建议

-- 优化建议

set spark.executor.memory=24g; -- 内存分配

set spark.driver.memory=32g; -- 堆内存扩大

set spark.vcore.boost.ratio=1;

set spark.driver.cores=4; -- 单个core执行的任务数,默认是1

set spark.sql.fragPartition.maxShuffleBytes=1073741824;

set spark.yarn.batch.smart.heuristic=125495624;

set spark.sql.files.maxPartitionBytes=8589934592; -- 默认128M,调小可提高map任务数(最后要合并小文件)

set spark.sql.parquet.adaptiveFileSplit=true;

set spark.sql.fragPartition.parquet.fast.mode.enabled=true;

set spark.sql.fragPartition.compactEnabled=true;

set spark.maxRemoteBlockSizeFetchToMem=268435456; -- 默认512M,为了避免占用太多内存的巨大请求、在较小的块上使用太多的内存

set spark.sql.fragPartition.skip.failure=true;

set spark.driver.memoryOverhead=4096; --允许使用对外内存

set spark.sql.adaptive.maxNumPostShufflePartitions=125; -- 优化运行速度,并减小存储减少CPU浪费

set spark.sql.fragPartition.threshold=268435456;

set spark.sql.orc.adaptiveFileSplit=true;

set spark.executor.memoryOverhead=4096; -- 最大值,允许executor使用堆外内存

set spark.sql.fragPartition.expectedBytes=268435456;

你可能感兴趣的:(hive,hadoop,数据仓库)