make install 去掉 folly
armv8-a+crc
arrow NEON
https://blog.csdn.net/u011889952/article/details/118762819
这里面的方案二,我之前也是用的这个
https://blog.csdn.net/zzhongcy/article/details/105512565
参考的此博客
火焰图
https://blog.csdn.net/qq_43097201/article/details/125683217
https://blog.csdn.net/21aspnet/article/details/122256644
https://www.hikunpeng.com/document/detail/zh/kunpengbds/function/function.html
https://www.hikunpeng.com/document/detail/zh/kunpenggrf/tuningtip/kunpengtuning_12_0002.html
#OmniRuntime
https://www.hikunpeng.com/document/detail/zh/kunpengbds/appAccelFeatures/sqlqueryaccelf/kunpengbds_omniruntime_20_0002.html
https://www.hikunpeng.com/document/detail/zh/kunpengboostkithistory/2200/bds/kunpengomnidata_20_0002.html
#腾讯云 文章
https://cloud.tencent.com/developer/article/1135965
#StarRocks
https://zhuanlan.zhihu.com/p/456520574
https://blog.csdn.net/weixin_35749796/article/details/129083732
#京东 spark内核优化
https://developer.jdcloud.com/article/2344
#字节
https://zhuanlan.zhihu.com/p/157592720
# 【麻省理工学院】MIT 6.S081 操作系统工程
https://www.bilibili.com/video/BV1Dy4y1m7ZE/?vd_source=883aee57c074fdece2b1cba2e009542b
# 哈工大 操作系统 李治军
https://www.bilibili.com/video/BV19r4y1b7Aw/?spm_id_from=333.337.search-card.all.click&vd_source=883aee57c074fdece2b1cba2e009542b
https://www.lanqiao.cn/courses/115/learning/
#linux 源码
https://zhuanlan.zhihu.com/p/469193712
https://ke.qq.com/course/4032547?flowToken=1040236#term_id=104185168
https://space.bilibili.com/2459964/channel/collectiondetail?sid=1408252
深入理解Linux虚拟内存管理
#深入理解linux内核架构
https://github.com/zhiweifan/Professional-Linux-Kernel-Architecture.git
#Operating Systems Design and Implementation, 3/E
https://book.douban.com/subject/1764254/
#linux 0.12
https://zhuanlan.zhihu.com/p/344082401
https://space.bilibili.com/2459964/channel/collectiondetail?sid=1408252
https://github.com/yifengyou/linux-0.12
https://github.com/Kevin-Kevin/hit-operatingSystem
#(哈工大)操作系统原理、实现与实践
https://github.com/EliasZuo/test2.git
#unix v6
https://github.com/deyuhua/xv6-book-chinese
#清华镜像
https://mirrors.tuna.tsinghua.edu.cn/gnu/
irate(node_disk_io_time_seconds_total[1m])
采集/proc/diskstats文件下的信息
# ps -ef | grep 4226
root 4226 4217 5 14:47 ? 00:00:08 /usr/jdk64/current/bin/java -server -Xmx4096m -Djava.io.tmpdir=/data10/nm/usercache/root/appcache/application_1690958779805_0001/container_1690958779805_0001_01_000002/tmp -Dspark.driver.port=10951 -Dspark.yarn.app.container.log.dir=/data01/gluten_home/hadoop/logs/userlogs/application_1690958779805_0001/container_1690958779805_0001_01_000002 -XX:OnOutOfMemoryError=kill %p org.apache.spark.executor.YarnCoarseGrainedExecutorBackend --driver-url spark://CoarseGrainedScheduler@nma07-301-a-12-sev-tg225-02u03:10951 --executor-id 1 --hostname nma07-301-a-12-sev-tg225-02u03 --cores 24 --app-id application_1690958779805_0001 --resourceProfileId 0 --user-class-path file:/data10/nm/usercache/root/appcache/application_1690958779805_0001/container_1690958779805_0001_01_000002/__app__.jar
root 8274 51248 0 14:49 pts/4 00:00:00 grep --color=auto 4226
# ll
total 4.0K
drwxr-xr-x 59 root root 4.0K Aug 2 15:33 blockmgr-c43711a4-5720-42b6-8979-8f0c4382847a
drwxr-xr-x 2 root root 10 Aug 2 14:47 gluten-279b0605-b14b-48bb-99bf-41bf1a999500
drwx------ 3 root root 63 Aug 2 14:47 spark-4bd32145-c445-4d9e-8739-e613af5f6fd8
pscp.pssh
pssh
$ cat /etc/os-release
(ARM)Shell_HDFS.sh
bin/spark-shell --master local[30] \
--driver-memory 120g \
--jars /home/op/spark-sql-perf_2.12-0.5.1-SNAPSHOT.jar
#vanilla spark
./bin/spark-shell \
--master local[30] \
--driver-memory 30g \
--conf spark.memory.offHeap.enabled=true \
--conf spark.memory.offHeap.size=90g \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.sql.shuffle.partitions=60 \
--conf spark.local.dir=/data01/gluten_home/tmp/
http://10.37.90.2:18080/history/local-1687240040038
Job 2 cancelled part of cancelled job group 5f7bc794-a8e8-44fd-b1b8-cf4fc076bfcf
#vanilla
./bin/spark-shell \
--master yarn \
--driver-memory 20g \
--executor-memory 4g \
--executor-cores 8 \
--conf spark.executor.instances=4 \
--conf spark.executor.memoryOverhead=1g \
--conf spark.memory.offHeap.enabled=true \
--conf spark.memory.offHeap.size=18g \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.sql.shuffle.partitions=64 \
--conf spark.local.dir=/data01/gluten_home/tmp/
#result
http://10.37.90.2:18080/history/application_1687694342114_0038/SQL/
#gluten
./bin/spark-shell \
--master yarn \
--driver-memory 20g \
--executor-memory 4g \
--executor-cores 8 \
--conf spark.executor.instances=4 \
--conf spark.executor.memoryOverhead=1g \
--conf spark.memory.offHeap.enabled=true \
--conf spark.memory.offHeap.size=18g \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.sql.shuffle.partitions=64 \
--conf spark.local.dir=/data01/gluten_home/tmp/ \
--conf spark.plugins=io.glutenproject.GlutenPlugin \
--conf spark.gluten.sql.columnar.backend.lib=velox \
--conf spark.gluten.loadLibFromJar=false \
--conf spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager \
--conf spark.driver.extraClassPath=/data01/gluten_home/gluten/package/target/gluten-velox-bundle-spark3.2_2.12-ctyunos_2.0.1-0.5.0-SNAPSHOT.jar \
--conf spark.executor.extraClassPath=/data01/gluten_home/gluten/package/target/gluten-velox-bundle-spark3.2_2.12-ctyunos_2.0.1-0.5.0-SNAPSHOT.jar \
--conf spark.gluten.memory.offHeap.size.in.bytes=19327352832 \
--conf spark.gluten.memory.task.offHeap.size.in.bytes=2415919104 \
--conf spark.gluten.sql.columnar.logicalJoinOptimizeEnable=true \
--conf spark.gluten.sql.columnar.logicalJoinOptimizationLevel=19 \
--conf spark.gluten.sql.columnar.physicalJoinOptimizeEnable=true \
--conf spark.gluten.sql.columnar.physicalJoinOptimizationLevel=18 \
--conf spark.executorEnv.LIBHDFS3_CONF="/data01/gluten_home/hadoop/etc/hadoop/hdfs-site.xml"
// eventlog
http://10.37.90.2:18080/history/application_1687694342114_0007/SQL/
// eventlog
http://10.37.90.2:18080/history/application_1687694342114_0040/SQL/
#0801
#gluten
./bin/spark-shell \
--master yarn \
--driver-memory 20g \
--executor-memory 4g \
--executor-cores 8 \
--conf spark.executor.instances=4 \
--conf spark.executor.memoryOverhead=1g \
--conf spark.memory.offHeap.enabled=true \
--conf spark.memory.offHeap.size=18g \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.sql.shuffle.partitions=64 \
--conf spark.local.dir=/data01/gluten_home/tmp/,/data02/tmp/,/data03/tmp/,/data04/tmp/,/data05/tmp/,/data06/tmp/,/data07/tmp/,/data08/tmp/,/data09/tmp/,/data10/tmp/ \
--conf spark.plugins=io.glutenproject.GlutenPlugin \
--conf spark.gluten.sql.columnar.backend.lib=velox \
--conf spark.gluten.loadLibFromJar=false \
--conf spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager \
--conf spark.driver.extraClassPath=/data01/gluten_home/gluten/package/target/gluten-velox-bundle-spark3.2_2.12-ctyunos_2.0.1-0.5.0-SNAPSHOT.jar \
--conf spark.executor.extraClassPath=/data01/gluten_home/gluten/package/target/gluten-velox-bundle-spark3.2_2.12-ctyunos_2.0.1-0.5.0-SNAPSHOT.jar \
--conf spark.gluten.memory.offHeap.size.in.bytes=19327352832 \
--conf spark.gluten.memory.task.offHeap.size.in.bytes=2415919104 \
--conf spark.gluten.sql.columnar.logicalJoinOptimizeEnable=true \
--conf spark.gluten.sql.columnar.logicalJoinOptimizationLevel=19 \
--conf spark.gluten.sql.columnar.physicalJoinOptimizeEnable=true \
--conf spark.gluten.sql.columnar.physicalJoinOptimizationLevel=18 \
--conf spark.executorEnv.LIBHDFS3_CONF="/data01/gluten_home/hadoop/etc/hadoop/hdfs-site.xml"
mkdir /data02/tmp/
mkdir /data03/tmp/
mkdir /data04/tmp/
mkdir /data05/tmp/
mkdir /data06/tmp/
mkdir /data07/tmp/
mkdir /data08/tmp/
mkdir /data09/tmp/
mkdir /data10/tmp/
mkdir /data01/nm/
mkdir /data02/nm/
mkdir /data03/nm/
mkdir /data04/nm/
mkdir /data05/nm/
mkdir /data06/nm/
mkdir /data07/nm/
mkdir /data08/nm/
mkdir /data09/nm/
mkdir /data10/nm/
/data01/nm/,/data02/nm/,/data03/nm/,/data04/nm/,/data05/nm/,/data06/nm/,/data07/nm/,/data08/nm/,/data09/nm/,/data10/nm/
yarn.nodemanager.local-dirs
/data01/nm/,/data02/nm/,/data03/nm/,/data04/nm/,/data05/nm/,/data06/nm/,/data07/nm/,/data08/nm/,/data09/nm/,/data10/nm/
#0802 spark
./bin/spark-shell \
--master yarn \
--driver-memory 20g \
--executor-memory 4g \
--executor-cores 24 \
--conf spark.executor.instances=4 \
--conf spark.executor.memoryOverhead=1g \
--conf spark.memory.offHeap.enabled=true \
--conf spark.memory.offHeap.size=18g \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.sql.shuffle.partitions=64 \
--conf spark.local.dir=/data01/gluten_home/tmp/,/data02/tmp/,/data03/tmp/,/data04/tmp/,/data05/tmp/,/data06/tmp/,/data07/tmp/,/data08/tmp/,/data09/tmp/,/data10/tmp/
#0802 gluiten
./bin/spark-shell \
--master yarn \
--driver-memory 20g \
--executor-memory 4g \
--executor-cores 24 \
--conf spark.executor.instances=4 \
--conf spark.executor.memoryOverhead=1g \
--conf spark.memory.offHeap.enabled=true \
--conf spark.memory.offHeap.size=18g \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.sql.shuffle.partitions=64 \
--conf spark.local.dir=/data01/gluten_home/tmp/,/data02/tmp/,/data03/tmp/,/data04/tmp/,/data05/tmp/,/data06/tmp/,/data07/tmp/,/data08/tmp/,/data09/tmp/,/data10/tmp/ \
--conf spark.plugins=io.glutenproject.GlutenPlugin \
--conf spark.gluten.sql.columnar.backend.lib=velox \
--conf spark.gluten.loadLibFromJar=false \
--conf spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager \
--conf spark.driver.extraClassPath=/data01/gluten_home/gluten/package/target/gluten-velox-bundle-spark3.2_2.12-ctyunos_2.0.1-0.5.0-SNAPSHOT.jar \
--conf spark.executor.extraClassPath=/data01/gluten_home/gluten/package/target/gluten-velox-bundle-spark3.2_2.12-ctyunos_2.0.1-0.5.0-SNAPSHOT.jar \
--conf spark.gluten.memory.offHeap.size.in.bytes=19327352832 \
--conf spark.gluten.memory.task.offHeap.size.in.bytes=2415919104 \
--conf spark.gluten.sql.columnar.logicalJoinOptimizeEnable=true \
--conf spark.gluten.sql.columnar.logicalJoinOptimizationLevel=19 \
--conf spark.gluten.sql.columnar.physicalJoinOptimizeEnable=true \
--conf spark.gluten.sql.columnar.physicalJoinOptimizationLevel=18 \
--conf spark.executorEnv.LIBHDFS3_CONF="/data01/gluten_home/hadoop/etc/hadoop/hdfs-site.xml"
export LIBHDFS3_CONF="/usr/local/hadoop-3.3.3/etc/hadoop/hdfs-site.xml"
./bin/spark-shell \
--master local[30] \
--driver-memory 50g \
--executor-memory 4g \
--executor-cores 8 \
--conf spark.executor.instances=24 \
--conf spark.executor.memoryOverhead=1g \
--conf spark.memory.offHeap.enabled=true \
--conf spark.memory.offHeap.size=18g \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.sql.shuffle.partitions=384 \
--conf spark.plugins=io.glutenproject.GlutenPlugin \
--conf spark.gluten.sql.columnar.backend.lib=velox \
--conf spark.gluten.loadLibFromJar=true \
--conf spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager \
--conf spark.gluten.memory.offHeap.size.in.bytes=19327352832 \
--conf spark.gluten.memory.task.offHeap.size.in.bytes=2415919104 \
--conf spark.gluten.sql.columnar.logicalJoinOptimizeEnable=true \
--conf spark.gluten.sql.columnar.logicalJoinOptimizationLevel=19 \
--conf spark.gluten.sql.columnar.physicalJoinOptimizeEnable=true \
--conf spark.gluten.sql.columnar.physicalJoinOptimizationLevel=18 \
--conf spark.executorEnv.LIBHDFS3_CONF="/usr/local/hadoop-3.3.3/etc/hadoop/hdfs-site.xml" \
--jars /home/op/0719/gluten-velox-bundle-spark3.2_2.12-centos_7-1.1.0-SNAPSHOT.jar,/home/op/0719/gluten-thirdparty-lib-centos-7.jar
#yarn, 未通过
./bin/spark-shell \
--master yarn --deploy-mode client \
--driver-memory 50g \
--executor-memory 4g \
--executor-cores 8 \
--conf spark.executor.instances=24 \
--conf spark.executor.memoryOverhead=1g \
--conf spark.memory.offHeap.enabled=true \
--conf spark.memory.offHeap.size=18g \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.sql.shuffle.partitions=384 \
--conf spark.plugins=io.glutenproject.GlutenPlugin \
--conf spark.gluten.sql.columnar.backend.lib=velox \
--conf spark.gluten.loadLibFromJar=true \
--conf spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager \
--conf spark.driver.extraClassPath=/home/op/0719/gluten-velox-bundle-spark3.2_2.12-centos_7-1.1.0-SNAPSHOT.jar \
--conf spark.executor.extraClassPath=/home/op/0719/gluten-velox-bundle-spark3.2_2.12-centos_7-1.1.0-SNAPSHOT.jar \
--conf spark.gluten.memory.offHeap.size.in.bytes=19327352832 \
--conf spark.gluten.memory.task.offHeap.size.in.bytes=2415919104 \
--conf spark.gluten.sql.columnar.logicalJoinOptimizeEnable=true \
--conf spark.gluten.sql.columnar.logicalJoinOptimizationLevel=19 \
--conf spark.gluten.sql.columnar.physicalJoinOptimizeEnable=true \
--conf spark.gluten.sql.columnar.physicalJoinOptimizationLevel=18 \
--conf spark.executorEnv.LIBHDFS3_CONF="/usr/local/hadoop-3.3.3/etc/hadoop/hdfs-site.xml" \
--jars /home/op/0719/gluten-velox-bundle-spark3.2_2.12-centos_7-1.1.0-SNAPSHOT.jar,/home/op/0719/gluten-thirdparty-lib-centos-7.jar
./bin/spark-shell \
--master yarn --deploy-mode client \
--driver-memory 50g \
--executor-memory 4g \
--executor-cores 8 \
--conf spark.executor.instances=24 \
--conf spark.executor.memoryOverhead=1g \
--conf spark.memory.offHeap.enabled=true \
--conf spark.memory.offHeap.size=18g \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.sql.shuffle.partitions=384 \
--conf spark.plugins=io.glutenproject.GlutenPlugin \
--conf spark.gluten.sql.columnar.backend.lib=velox \
--conf spark.gluten.loadLibFromJar=true \
--conf spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager \
--conf spark.driver.extraClassPath=/home/op/0719/gluten-velox-bundle-spark3.2_2.12-centos_7-1.1.0-SNAPSHOT.jar \
--conf spark.executor.extraClassPath=/home/op/0719/gluten-velox-bundle-spark3.2_2.12-centos_7-1.1.0-SNAPSHOT.jar \
--conf spark.gluten.memory.offHeap.size.in.bytes=19327352832 \
--conf spark.gluten.memory.task.offHeap.size.in.bytes=2415919104 \
--conf spark.gluten.sql.columnar.logicalJoinOptimizeEnable=true \
--conf spark.gluten.sql.columnar.logicalJoinOptimizationLevel=19 \
--conf spark.gluten.sql.columnar.physicalJoinOptimizeEnable=true \
--conf spark.gluten.sql.columnar.physicalJoinOptimizationLevel=18 \
--conf spark.executorEnv.LIBHDFS3_CONF="/usr/local/hadoop-3.3.3/etc/hadoop/hdfs-site.xml" \
--jars /home/op/0719/gluten-velox-bundle-spark3.2_2.12-centos_7-1.1.0-SNAPSHOT.jar,/home/op/0719/gluten-thirdparty-lib-centos-7.jar \
--conf spark.sql.files.maxPartitionBytes=2g \
--conf spark.gluten.sql.columnar.backend.velox.memoryCapRatio=0.75