- cudastream资源没释放:释放了就小了很多
- 其他资源没释放,问题未知!!!模型资源释放了!!!
- 多线程导致的没释放完成!!!!
https://blog.csdn.net/liu_feng_zi_/article/details/103421821
现象是:/dev/nvidia0
git remote -v # 查看仓库从哪里拉取的
git clone https://github.com/Syllo/nvtop.git
mkdir -p nvtop/build && cd nvtop/build
cmake .. -DNVIDIA_SUPPORT=ON -DAMDGPU_SUPPORT=ON -DINTEL_SUPPORT=ON
make
# Install globally on the system
sudo make install
watch -n 0.1 nvidia-smi # 每隔0.1s查看nvidia-smi情况
ls -lh /proc/进程号/fd
ls -l /proc/进程号/fd
ps -T -p 或
top -H -p
# 方法一:通过名字直接查看top
top -H -p $(pidof name)
top -H -p $(pgrep -f name)
# 方法二:通过程序名字查找进程号 progress id == pid 将返回进程号
ps -C -o pid=
# 看显存
fuser -v /dev/nvidia*
# 这个pro在当前文件夹下进行,要包含所有的库,cuda tensorrt opencv等,全部要包含,为了测试内存泄漏。测试文件在:Testlib文件夹下
cuda-memcheck --tool memcheck --leak-check full pro
# 结果:cuda-memcheck没有任何问题
# valgrind查看CPU内存泄漏
valgrind --leak-check=full ./pro
linux /proc/[pid]/fd 中socket描述符后面的数字是什么意思?inode(information node)_/proc/[pid]/fd inode定位文件位置-CSDN博客
1、tar –xf valgrind-3.17.0.tar.bz2
2、cd valgrind-3.17.0
3、./configure // 运行配置脚本生成makefile文件,可以--help查看配置项,自行按需配置,比如修改编译工具、修改安装路径等
4、make
5、make install //安装生成可执行文件,可执行文件的路径有参数--prefix指定,需要在PATH中添加环境变量;若不加参数--prefix指定,仅使用默认配置,则会自动关联
valgrind --leak-check=full ./pro
HEAP SUMMARY:
==22248== in use at exit: 1,855,624 bytes in 4,645 blocks
==22248== total heap usage: 11,181,264 allocs, 11,176,619 frees, 39,958,084,517 bytes allocated
==22248==
==22248== 72 bytes in 1 blocks are possibly lost in loss record 733 of 1,502
==22248== at 0x483C7F5: malloc (vg_replace_malloc.c:442)
==22248== by 0x36BB8690: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x369F192A: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BA522C: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36A0788F: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x5FA2D6E: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA4E70: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x579047E: __pthread_once_slow (pthread_once.c:116)
==22248== by 0x5FE5AF8: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5F9F58F: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA88BF: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FBE7FF: cudaSetDevice (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248==
==22248== 72 bytes in 1 blocks are possibly lost in loss record 734 of 1,502
==22248== at 0x4843FD3: calloc (vg_replace_malloc.c:1595)
==22248== by 0x369DFDCA: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x369E6A19: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x369E7293: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BA49ED: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36A0788F: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x5FA2D6E: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA4E70: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x579047E: __pthread_once_slow (pthread_once.c:116)
==22248== by 0x5FE5AF8: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5F9F58F: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA88BF: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248==
==22248== 152 bytes in 1 blocks are possibly lost in loss record 1,327 of 1,502
==22248== at 0x4843FD3: calloc (vg_replace_malloc.c:1595)
==22248== by 0x36A52F94: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BA48C4: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36A0788F: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x5FA2D6E: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA4E70: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x579047E: __pthread_once_slow (pthread_once.c:116)
==22248== by 0x5FE5AF8: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5F9F58F: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA88BF: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FBE7FF: cudaSetDevice (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x53257E1: set_device (simple_yolo.cu:334)
==22248== by 0x53257E1: SimpleYolo::set_device(int) (simple_yolo.cu:330)
==22248==
==22248== 152 bytes in 1 blocks are possibly lost in loss record 1,328 of 1,502
==22248== at 0x4843FD3: calloc (vg_replace_malloc.c:1595)
==22248== by 0x36A52F94: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BA4F93: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36A0788F: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x5FA2D6E: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA4E70: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x579047E: __pthread_once_slow (pthread_once.c:116)
==22248== by 0x5FE5AF8: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5F9F58F: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA88BF: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FBE7FF: cudaSetDevice (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x53257E1: set_device (simple_yolo.cu:334)
==22248== by 0x53257E1: SimpleYolo::set_device(int) (simple_yolo.cu:330)
==22248==
==22248== 152 bytes in 1 blocks are possibly lost in loss record 1,329 of 1,502
==22248== at 0x4843FD3: calloc (vg_replace_malloc.c:1595)
==22248== by 0x36A52F94: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BC0DCC: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BC22F9: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BC2444: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BA40B9: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36A0788F: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x5FA2D6E: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA4E70: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x579047E: __pthread_once_slow (pthread_once.c:116)
==22248== by 0x5FE5AF8: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5F9F58F: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248==
==22248== 152 bytes in 1 blocks are possibly lost in loss record 1,330 of 1,502
==22248== at 0x4843FD3: calloc (vg_replace_malloc.c:1595)
==22248== by 0x36A52F94: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BC0DCC: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BC22F9: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BC2467: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BA40B9: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36A0788F: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x5FA2D6E: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA4E70: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x579047E: __pthread_once_slow (pthread_once.c:116)
==22248== by 0x5FE5AF8: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5F9F58F: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248==
==22248== 152 bytes in 1 blocks are possibly lost in loss record 1,331 of 1,502
==22248== at 0x4843FD3: calloc (vg_replace_malloc.c:1595)
==22248== by 0x36A52F94: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BA4239: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36A0788F: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x5FA2D6E: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA4E70: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x579047E: __pthread_once_slow (pthread_once.c:116)
==22248== by 0x5FE5AF8: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5F9F58F: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA88BF: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FBE7FF: cudaSetDevice (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x53257E1: set_device (simple_yolo.cu:334)
==22248== by 0x53257E1: SimpleYolo::set_device(int) (simple_yolo.cu:330)
==22248==
==22248== 152 bytes in 1 blocks are possibly lost in loss record 1,332 of 1,502
==22248== at 0x4843FD3: calloc (vg_replace_malloc.c:1595)
==22248== by 0x36A52F94: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x369E66B9: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x369E72AF: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BA49ED: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36A0788F: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x5FA2D6E: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA4E70: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x579047E: __pthread_once_slow (pthread_once.c:116)
==22248== by 0x5FE5AF8: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5F9F58F: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA88BF: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248==
==22248== 152 bytes in 1 blocks are possibly lost in loss record 1,333 of 1,502
==22248== at 0x4843FD3: calloc (vg_replace_malloc.c:1595)
==22248== by 0x36A52F94: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BA4AAC: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36A0788F: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x5FA2D6E: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA4E70: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x579047E: __pthread_once_slow (pthread_once.c:116)
==22248== by 0x5FE5AF8: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5F9F58F: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA88BF: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FBE7FF: cudaSetDevice (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x53257E1: set_device (simple_yolo.cu:334)
==22248== by 0x53257E1: SimpleYolo::set_device(int) (simple_yolo.cu:330)
==22248==
==22248== 152 bytes in 1 blocks are possibly lost in loss record 1,334 of 1,502
==22248== at 0x4843FD3: calloc (vg_replace_malloc.c:1595)
==22248== by 0x36A52F94: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BA4C4E: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36A0788F: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x5FA2D6E: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA4E70: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x579047E: __pthread_once_slow (pthread_once.c:116)
==22248== by 0x5FE5AF8: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5F9F58F: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA88BF: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FBE7FF: cudaSetDevice (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x53257E1: set_device (simple_yolo.cu:334)
==22248== by 0x53257E1: SimpleYolo::set_device(int) (simple_yolo.cu:330)
==22248==
==22248== 152 bytes in 1 blocks are possibly lost in loss record 1,335 of 1,502
==22248== at 0x4843FD3: calloc (vg_replace_malloc.c:1595)
==22248== by 0x36A52F94: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x368DD177: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BA4CB4: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36A0788F: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x5FA2D6E: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA4E70: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x579047E: __pthread_once_slow (pthread_once.c:116)
==22248== by 0x5FE5AF8: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5F9F58F: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA88BF: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FBE7FF: cudaSetDevice (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248==
==22248== 152 bytes in 1 blocks are possibly lost in loss record 1,336 of 1,502
==22248== at 0x4843FD3: calloc (vg_replace_malloc.c:1595)
==22248== by 0x36A52F94: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x368DD197: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BA4CB4: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36A0788F: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x5FA2D6E: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA4E70: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x579047E: __pthread_once_slow (pthread_once.c:116)
==22248== by 0x5FE5AF8: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5F9F58F: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA88BF: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FBE7FF: cudaSetDevice (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248==
==22248== 152 bytes in 1 blocks are possibly lost in loss record 1,337 of 1,502
==22248== at 0x4843FD3: calloc (vg_replace_malloc.c:1595)
==22248== by 0x36A52F94: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x368DD1B7: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BA4CB4: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36A0788F: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x5FA2D6E: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA4E70: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x579047E: __pthread_once_slow (pthread_once.c:116)
==22248== by 0x5FE5AF8: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5F9F58F: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA88BF: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FBE7FF: cudaSetDevice (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248==
==22248== 400 bytes in 1 blocks are possibly lost in loss record 1,462 of 1,502
==22248== at 0x4843FD3: calloc (vg_replace_malloc.c:1595)
==22248== by 0x40149CA: allocate_dtv (dl-tls.c:286)
==22248== by 0x40149CA: _dl_allocate_tls (dl-tls.c:532)
==22248== by 0x5788322: allocate_stack (allocatestack.c:622)
==22248== by 0x5788322: pthread_create@@GLIBC_2.2.5 (pthread_create.c:660)
==22248== by 0x36A119B5: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36A51086: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36BA4F5D: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x36A0788F: ??? (in /usr/lib/x86_64-linux-gnu/libcuda.so.470.223.02)
==22248== by 0x5FA2D6E: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5FA4E70: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x579047E: __pthread_once_slow (pthread_once.c:116)
==22248== by 0x5FE5AF8: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248== by 0x5F9F58F: ??? (in /datav/lean/cuda-11.2/targets/x86_64-linux/lib/libcudart.so.11.2.152)
==22248==
==22248== LEAK SUMMARY:
==22248== definitely lost: 0 bytes in 0 blocks
==22248== indirectly lost: 0 bytes in 0 blocks
==22248== possibly lost: 2,216 bytes in 14 blocks
==22248== still reachable: 1,853,376 bytes in 4,630 blocks
==22248== of which reachable via heuristic:
==22248== stdstring : 77,859 bytes in 655 blocks
==22248== suppressed: 32 bytes in 1 blocks
==22248== Reachable blocks (those to which a pointer was found) are not shown.
==22248== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==22248==
==22248== For lists of detected and suppressed errors, rerun with: -s
==22248== ERROR SUMMARY: 14 errors from 14 contexts (suppressed: 0 from 0)
c++中能否判断一个指针指向栈还是堆? - 知乎
- valgrind检测的5种内存泄漏情况详解 valgrind检测的5种内存泄漏情况详解_still reachable-CSDN博客
debug可以发现:栈指针指向的是堆内存。
// CUDA运行时头文件
#include
#include
#include
#define checkRuntime(op) __check_cuda_runtime((op), #op, __FILE__, __LINE__)
bool __check_cuda_runtime(cudaError_t code, const char* op, const char* file, int line){
if(code != cudaSuccess){
const char* err_name = cudaGetErrorName(code);
const char* err_message = cudaGetErrorString(code);
printf("runtime error %s:%d %s failed. \n code = %s, message = %s\n", file, line, op, err_name, err_message);
return false;
}
return true;
}
int main(){
char count = 0;
do{
int device_id = 0;
checkRuntime(cudaSetDevice(device_id)); // CPU:1M->9M
cudaStream_t stream = nullptr;
// 这行语句执行后:显存开始有线程,并占用79M;内存从9M上升到100M
checkRuntime(cudaStreamCreate(&stream)); // 创建一个stream
// 在GPU上开辟空间
float* memory_device = nullptr;
checkRuntime(cudaMalloc(&memory_device, 100 * sizeof(float)));
// 在CPU上开辟空间并且放数据进去,将数据复制到GPU
float* memory_host = new float[100];
memory_host[2] = 520.25; // Async就是异步操作,之前的都是 同步操作;异步复制时,发出指令立即返回,并不等待复制结束
checkRuntime(cudaMemcpyAsync(memory_device, memory_host, sizeof(float) * 100, cudaMemcpyHostToDevice, stream)); // 异步复制操作,主线程不需要等待复制结束才继续
// 在CPU上开辟pin memory,并将GPU上的数据复制回来
float* memory_page_locked = nullptr;
checkRuntime(cudaMallocHost(&memory_page_locked, 100 * sizeof(float))); // 同样不等待复制完成,但在流中排队
checkRuntime(cudaMemcpyAsync(memory_page_locked, memory_device, sizeof(float) * 100, cudaMemcpyDeviceToHost, stream)); // 异步复制操作,主线程不需要等待复制结束才继续
printf("%f\n", memory_page_locked[2]); // 可能是错的,因为复制还没结束
checkRuntime(cudaStreamSynchronize(stream)); // 统一等待流队列中的所有操作结束
printf("%f\n", memory_page_locked[2]);
// 释放内存
checkRuntime(cudaFreeHost(memory_page_locked));
checkRuntime(cudaFree(memory_device));
checkRuntime(cudaStreamDestroy(stream));
delete [] memory_host;
count += 1;
}while(count < 2);
// 显存79M,内存103M;回不到原来去,这是没办法的
return 0;
}
关于线程: