2023.8

编译

make install 去掉 folly
armv8-a+crc
arrow NEON

相关链接

https://blog.csdn.net/u011889952/article/details/118762819
这里面的方案二,我之前也是用的这个

https://blog.csdn.net/zzhongcy/article/details/105512565
参考的此博客

火焰图

https://blog.csdn.net/qq_43097201/article/details/125683217
https://blog.csdn.net/21aspnet/article/details/122256644
https://www.hikunpeng.com/document/detail/zh/kunpengbds/function/function.html
https://www.hikunpeng.com/document/detail/zh/kunpenggrf/tuningtip/kunpengtuning_12_0002.html

#OmniRuntime
https://www.hikunpeng.com/document/detail/zh/kunpengbds/appAccelFeatures/sqlqueryaccelf/kunpengbds_omniruntime_20_0002.html
https://www.hikunpeng.com/document/detail/zh/kunpengboostkithistory/2200/bds/kunpengomnidata_20_0002.html

#腾讯云 文章
https://cloud.tencent.com/developer/article/1135965
#StarRocks
https://zhuanlan.zhihu.com/p/456520574
https://blog.csdn.net/weixin_35749796/article/details/129083732


#京东 spark内核优化
https://developer.jdcloud.com/article/2344
#字节
https://zhuanlan.zhihu.com/p/157592720


# 【麻省理工学院】MIT 6.S081 操作系统工程
https://www.bilibili.com/video/BV1Dy4y1m7ZE/?vd_source=883aee57c074fdece2b1cba2e009542b
# 哈工大 操作系统 李治军
https://www.bilibili.com/video/BV19r4y1b7Aw/?spm_id_from=333.337.search-card.all.click&vd_source=883aee57c074fdece2b1cba2e009542b
https://www.lanqiao.cn/courses/115/learning/


#linux  源码
https://zhuanlan.zhihu.com/p/469193712
https://ke.qq.com/course/4032547?flowToken=1040236#term_id=104185168
https://space.bilibili.com/2459964/channel/collectiondetail?sid=1408252

深入理解Linux虚拟内存管理

#深入理解linux内核架构
https://github.com/zhiweifan/Professional-Linux-Kernel-Architecture.git

#Operating Systems Design and Implementation, 3/E
https://book.douban.com/subject/1764254/


#linux 0.12
https://zhuanlan.zhihu.com/p/344082401
https://space.bilibili.com/2459964/channel/collectiondetail?sid=1408252
https://github.com/yifengyou/linux-0.12
https://github.com/Kevin-Kevin/hit-operatingSystem

#(哈工大)操作系统原理、实现与实践
https://github.com/EliasZuo/test2.git

#unix v6
https://github.com/deyuhua/xv6-book-chinese

#清华镜像
https://mirrors.tuna.tsinghua.edu.cn/gnu/






metrics 采集

irate(node_disk_io_time_seconds_total[1m])
采集/proc/diskstats文件下的信息

日志

# ps -ef | grep 4226
root      4226  4217  5 14:47 ?        00:00:08 /usr/jdk64/current/bin/java -server -Xmx4096m -Djava.io.tmpdir=/data10/nm/usercache/root/appcache/application_1690958779805_0001/container_1690958779805_0001_01_000002/tmp -Dspark.driver.port=10951 -Dspark.yarn.app.container.log.dir=/data01/gluten_home/hadoop/logs/userlogs/application_1690958779805_0001/container_1690958779805_0001_01_000002 -XX:OnOutOfMemoryError=kill %p org.apache.spark.executor.YarnCoarseGrainedExecutorBackend --driver-url spark://CoarseGrainedScheduler@nma07-301-a-12-sev-tg225-02u03:10951 --executor-id 1 --hostname nma07-301-a-12-sev-tg225-02u03 --cores 24 --app-id application_1690958779805_0001 --resourceProfileId 0 --user-class-path file:/data10/nm/usercache/root/appcache/application_1690958779805_0001/container_1690958779805_0001_01_000002/__app__.jar
root      8274 51248  0 14:49 pts/4    00:00:00 grep --color=auto 4226



# ll
total 4.0K
drwxr-xr-x 59 root root 4.0K Aug  2 15:33 blockmgr-c43711a4-5720-42b6-8979-8f0c4382847a
drwxr-xr-x  2 root root   10 Aug  2 14:47 gluten-279b0605-b14b-48bb-99bf-41bf1a999500
drwx------  3 root root   63 Aug  2 14:47 spark-4bd32145-c445-4d9e-8739-e613af5f6fd8

shell 命令

pscp.pssh
pssh
$  cat /etc/os-release

(ARM)Shell_HDFS.sh

bin/spark-shell --master local[30] \
 --driver-memory 120g \
 --jars /home/op/spark-sql-perf_2.12-0.5.1-SNAPSHOT.jar
 
 
#vanilla spark
./bin/spark-shell \
 --master local[30] \
 --driver-memory 30g \
 --conf spark.memory.offHeap.enabled=true \
 --conf spark.memory.offHeap.size=90g \
 --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
 --conf spark.kryoserializer.buffer.max=512m \
 --conf spark.sql.shuffle.partitions=60 \
 --conf spark.local.dir=/data01/gluten_home/tmp/
 
 
 http://10.37.90.2:18080/history/local-1687240040038
 Job 2 cancelled part of cancelled job group 5f7bc794-a8e8-44fd-b1b8-cf4fc076bfcf
 



#vanilla
./bin/spark-shell \
 --master yarn \
 --driver-memory 20g \
 --executor-memory 4g \
 --executor-cores 8 \
 --conf spark.executor.instances=4 \
 --conf spark.executor.memoryOverhead=1g \
 --conf spark.memory.offHeap.enabled=true \
 --conf spark.memory.offHeap.size=18g \
 --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
 --conf spark.kryoserializer.buffer.max=512m \
 --conf spark.sql.shuffle.partitions=64 \
 --conf spark.local.dir=/data01/gluten_home/tmp/

#result
http://10.37.90.2:18080/history/application_1687694342114_0038/SQL/




#gluten
./bin/spark-shell \
 --master yarn \
 --driver-memory 20g \
 --executor-memory 4g \
 --executor-cores 8 \
 --conf spark.executor.instances=4 \
 --conf spark.executor.memoryOverhead=1g \
 --conf spark.memory.offHeap.enabled=true \
 --conf spark.memory.offHeap.size=18g \
 --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
 --conf spark.kryoserializer.buffer.max=512m \
 --conf spark.sql.shuffle.partitions=64 \
 --conf spark.local.dir=/data01/gluten_home/tmp/ \
 --conf spark.plugins=io.glutenproject.GlutenPlugin \
 --conf spark.gluten.sql.columnar.backend.lib=velox \
 --conf spark.gluten.loadLibFromJar=false \
 --conf spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager \
 --conf spark.driver.extraClassPath=/data01/gluten_home/gluten/package/target/gluten-velox-bundle-spark3.2_2.12-ctyunos_2.0.1-0.5.0-SNAPSHOT.jar \
 --conf spark.executor.extraClassPath=/data01/gluten_home/gluten/package/target/gluten-velox-bundle-spark3.2_2.12-ctyunos_2.0.1-0.5.0-SNAPSHOT.jar \
 --conf spark.gluten.memory.offHeap.size.in.bytes=19327352832 \
 --conf spark.gluten.memory.task.offHeap.size.in.bytes=2415919104 \
 --conf spark.gluten.sql.columnar.logicalJoinOptimizeEnable=true \
 --conf spark.gluten.sql.columnar.logicalJoinOptimizationLevel=19 \
 --conf spark.gluten.sql.columnar.physicalJoinOptimizeEnable=true \
 --conf spark.gluten.sql.columnar.physicalJoinOptimizationLevel=18 \
 --conf spark.executorEnv.LIBHDFS3_CONF="/data01/gluten_home/hadoop/etc/hadoop/hdfs-site.xml"
 
// eventlog
http://10.37.90.2:18080/history/application_1687694342114_0007/SQL/

// eventlog
http://10.37.90.2:18080/history/application_1687694342114_0040/SQL/





#0801
#gluten
./bin/spark-shell \
 --master yarn \
 --driver-memory 20g \
 --executor-memory 4g \
 --executor-cores 8 \
 --conf spark.executor.instances=4 \
 --conf spark.executor.memoryOverhead=1g \
 --conf spark.memory.offHeap.enabled=true \
 --conf spark.memory.offHeap.size=18g \
 --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
 --conf spark.kryoserializer.buffer.max=512m \
 --conf spark.sql.shuffle.partitions=64 \
 --conf spark.local.dir=/data01/gluten_home/tmp/,/data02/tmp/,/data03/tmp/,/data04/tmp/,/data05/tmp/,/data06/tmp/,/data07/tmp/,/data08/tmp/,/data09/tmp/,/data10/tmp/ \
 --conf spark.plugins=io.glutenproject.GlutenPlugin \
 --conf spark.gluten.sql.columnar.backend.lib=velox \
 --conf spark.gluten.loadLibFromJar=false \
 --conf spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager \
 --conf spark.driver.extraClassPath=/data01/gluten_home/gluten/package/target/gluten-velox-bundle-spark3.2_2.12-ctyunos_2.0.1-0.5.0-SNAPSHOT.jar \
 --conf spark.executor.extraClassPath=/data01/gluten_home/gluten/package/target/gluten-velox-bundle-spark3.2_2.12-ctyunos_2.0.1-0.5.0-SNAPSHOT.jar \
 --conf spark.gluten.memory.offHeap.size.in.bytes=19327352832 \
 --conf spark.gluten.memory.task.offHeap.size.in.bytes=2415919104 \
 --conf spark.gluten.sql.columnar.logicalJoinOptimizeEnable=true \
 --conf spark.gluten.sql.columnar.logicalJoinOptimizationLevel=19 \
 --conf spark.gluten.sql.columnar.physicalJoinOptimizeEnable=true \
 --conf spark.gluten.sql.columnar.physicalJoinOptimizationLevel=18 \
 --conf spark.executorEnv.LIBHDFS3_CONF="/data01/gluten_home/hadoop/etc/hadoop/hdfs-site.xml"



mkdir  /data02/tmp/
mkdir  /data03/tmp/
mkdir  /data04/tmp/
mkdir  /data05/tmp/
mkdir  /data06/tmp/
mkdir  /data07/tmp/
mkdir  /data08/tmp/
mkdir  /data09/tmp/
mkdir  /data10/tmp/


mkdir  /data01/nm/
mkdir  /data02/nm/
mkdir  /data03/nm/
mkdir  /data04/nm/
mkdir  /data05/nm/
mkdir  /data06/nm/
mkdir  /data07/nm/
mkdir  /data08/nm/
mkdir  /data09/nm/
mkdir  /data10/nm/



/data01/nm/,/data02/nm/,/data03/nm/,/data04/nm/,/data05/nm/,/data06/nm/,/data07/nm/,/data08/nm/,/data09/nm/,/data10/nm/



                yarn.nodemanager.local-dirs
                /data01/nm/,/data02/nm/,/data03/nm/,/data04/nm/,/data05/nm/,/data06/nm/,/data07/nm/,/data08/nm/,/data09/nm/,/data10/nm/
        
		
		
		
#0802 spark
./bin/spark-shell \
 --master yarn \
 --driver-memory 20g \
 --executor-memory 4g \
 --executor-cores 24 \
 --conf spark.executor.instances=4 \
 --conf spark.executor.memoryOverhead=1g \
 --conf spark.memory.offHeap.enabled=true \
 --conf spark.memory.offHeap.size=18g \
 --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
 --conf spark.kryoserializer.buffer.max=512m \
 --conf spark.sql.shuffle.partitions=64 \
 --conf spark.local.dir=/data01/gluten_home/tmp/,/data02/tmp/,/data03/tmp/,/data04/tmp/,/data05/tmp/,/data06/tmp/,/data07/tmp/,/data08/tmp/,/data09/tmp/,/data10/tmp/
 


		
#0802 gluiten		
./bin/spark-shell \
 --master yarn \
 --driver-memory 20g \
 --executor-memory 4g \
 --executor-cores 24 \
 --conf spark.executor.instances=4 \
 --conf spark.executor.memoryOverhead=1g \
 --conf spark.memory.offHeap.enabled=true \
 --conf spark.memory.offHeap.size=18g \
 --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
 --conf spark.kryoserializer.buffer.max=512m \
 --conf spark.sql.shuffle.partitions=64 \
 --conf spark.local.dir=/data01/gluten_home/tmp/,/data02/tmp/,/data03/tmp/,/data04/tmp/,/data05/tmp/,/data06/tmp/,/data07/tmp/,/data08/tmp/,/data09/tmp/,/data10/tmp/ \
 --conf spark.plugins=io.glutenproject.GlutenPlugin \
 --conf spark.gluten.sql.columnar.backend.lib=velox \
 --conf spark.gluten.loadLibFromJar=false \
 --conf spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager \
 --conf spark.driver.extraClassPath=/data01/gluten_home/gluten/package/target/gluten-velox-bundle-spark3.2_2.12-ctyunos_2.0.1-0.5.0-SNAPSHOT.jar \
 --conf spark.executor.extraClassPath=/data01/gluten_home/gluten/package/target/gluten-velox-bundle-spark3.2_2.12-ctyunos_2.0.1-0.5.0-SNAPSHOT.jar \
 --conf spark.gluten.memory.offHeap.size.in.bytes=19327352832 \
 --conf spark.gluten.memory.task.offHeap.size.in.bytes=2415919104 \
 --conf spark.gluten.sql.columnar.logicalJoinOptimizeEnable=true \
 --conf spark.gluten.sql.columnar.logicalJoinOptimizationLevel=19 \
 --conf spark.gluten.sql.columnar.physicalJoinOptimizeEnable=true \
 --conf spark.gluten.sql.columnar.physicalJoinOptimizationLevel=18 \
 --conf spark.executorEnv.LIBHDFS3_CONF="/data01/gluten_home/hadoop/etc/hadoop/hdfs-site.xml"

		
		
		
		
		

集群




export LIBHDFS3_CONF="/usr/local/hadoop-3.3.3/etc/hadoop/hdfs-site.xml"



 ./bin/spark-shell \
 --master local[30] \
 --driver-memory 50g \
 --executor-memory 4g \
 --executor-cores 8 \
 --conf spark.executor.instances=24 \
 --conf spark.executor.memoryOverhead=1g \
 --conf spark.memory.offHeap.enabled=true \
 --conf spark.memory.offHeap.size=18g \
 --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
 --conf spark.kryoserializer.buffer.max=512m \
 --conf spark.sql.shuffle.partitions=384 \
 --conf spark.plugins=io.glutenproject.GlutenPlugin \
 --conf spark.gluten.sql.columnar.backend.lib=velox \
 --conf spark.gluten.loadLibFromJar=true \
 --conf spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager \
 --conf spark.gluten.memory.offHeap.size.in.bytes=19327352832 \
 --conf spark.gluten.memory.task.offHeap.size.in.bytes=2415919104 \
 --conf spark.gluten.sql.columnar.logicalJoinOptimizeEnable=true \
 --conf spark.gluten.sql.columnar.logicalJoinOptimizationLevel=19 \
 --conf spark.gluten.sql.columnar.physicalJoinOptimizeEnable=true \
 --conf spark.gluten.sql.columnar.physicalJoinOptimizationLevel=18 \
 --conf spark.executorEnv.LIBHDFS3_CONF="/usr/local/hadoop-3.3.3/etc/hadoop/hdfs-site.xml" \
 --jars /home/op/0719/gluten-velox-bundle-spark3.2_2.12-centos_7-1.1.0-SNAPSHOT.jar,/home/op/0719/gluten-thirdparty-lib-centos-7.jar
 
 






 

#yarn, 未通过

 ./bin/spark-shell \
 --master yarn --deploy-mode client \
 --driver-memory 50g \
 --executor-memory 4g \
 --executor-cores 8 \
 --conf spark.executor.instances=24 \
 --conf spark.executor.memoryOverhead=1g \
 --conf spark.memory.offHeap.enabled=true \
 --conf spark.memory.offHeap.size=18g \
 --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
 --conf spark.kryoserializer.buffer.max=512m \
 --conf spark.sql.shuffle.partitions=384 \
 --conf spark.plugins=io.glutenproject.GlutenPlugin \
 --conf spark.gluten.sql.columnar.backend.lib=velox \
 --conf spark.gluten.loadLibFromJar=true \
 --conf spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager \
 --conf spark.driver.extraClassPath=/home/op/0719/gluten-velox-bundle-spark3.2_2.12-centos_7-1.1.0-SNAPSHOT.jar \
 --conf spark.executor.extraClassPath=/home/op/0719/gluten-velox-bundle-spark3.2_2.12-centos_7-1.1.0-SNAPSHOT.jar \
 --conf spark.gluten.memory.offHeap.size.in.bytes=19327352832 \
 --conf spark.gluten.memory.task.offHeap.size.in.bytes=2415919104 \
 --conf spark.gluten.sql.columnar.logicalJoinOptimizeEnable=true \
 --conf spark.gluten.sql.columnar.logicalJoinOptimizationLevel=19 \
 --conf spark.gluten.sql.columnar.physicalJoinOptimizeEnable=true \
 --conf spark.gluten.sql.columnar.physicalJoinOptimizationLevel=18 \
 --conf spark.executorEnv.LIBHDFS3_CONF="/usr/local/hadoop-3.3.3/etc/hadoop/hdfs-site.xml" \
 --jars /home/op/0719/gluten-velox-bundle-spark3.2_2.12-centos_7-1.1.0-SNAPSHOT.jar,/home/op/0719/gluten-thirdparty-lib-centos-7.jar










 ./bin/spark-shell \
 --master yarn --deploy-mode client \
 --driver-memory 50g \
 --executor-memory 4g \
 --executor-cores 8 \
 --conf spark.executor.instances=24 \
 --conf spark.executor.memoryOverhead=1g \
 --conf spark.memory.offHeap.enabled=true \
 --conf spark.memory.offHeap.size=18g \
 --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
 --conf spark.kryoserializer.buffer.max=512m \
 --conf spark.sql.shuffle.partitions=384 \
 --conf spark.plugins=io.glutenproject.GlutenPlugin \
 --conf spark.gluten.sql.columnar.backend.lib=velox \
 --conf spark.gluten.loadLibFromJar=true \
 --conf spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager \
 --conf spark.driver.extraClassPath=/home/op/0719/gluten-velox-bundle-spark3.2_2.12-centos_7-1.1.0-SNAPSHOT.jar \
 --conf spark.executor.extraClassPath=/home/op/0719/gluten-velox-bundle-spark3.2_2.12-centos_7-1.1.0-SNAPSHOT.jar \
 --conf spark.gluten.memory.offHeap.size.in.bytes=19327352832 \
 --conf spark.gluten.memory.task.offHeap.size.in.bytes=2415919104 \
 --conf spark.gluten.sql.columnar.logicalJoinOptimizeEnable=true \
 --conf spark.gluten.sql.columnar.logicalJoinOptimizationLevel=19 \
 --conf spark.gluten.sql.columnar.physicalJoinOptimizeEnable=true \
 --conf spark.gluten.sql.columnar.physicalJoinOptimizationLevel=18 \
 --conf spark.executorEnv.LIBHDFS3_CONF="/usr/local/hadoop-3.3.3/etc/hadoop/hdfs-site.xml" \
 --jars /home/op/0719/gluten-velox-bundle-spark3.2_2.12-centos_7-1.1.0-SNAPSHOT.jar,/home/op/0719/gluten-thirdparty-lib-centos-7.jar \
 --conf spark.sql.files.maxPartitionBytes=2g \
 --conf spark.gluten.sql.columnar.backend.velox.memoryCapRatio=0.75






你可能感兴趣的:(运维)