python查看GPU显存占用情况

python查看GPU显存占用情况

from pynvml import nvmlDeviceGetHandleByIndex, nvmlInit, nvmlDeviceGetCount, nvmlDeviceGetMemoryInfo, nvmlDeviceGetName, \
    nvmlDeviceGetTemperature, nvmlShutdown

# 初始化
nvmlInit()
# 获取GPU个数
deviceCount = nvmlDeviceGetCount()
# 总显存
total_memory = 0
# 未用总显存
total_free = 0
# 已用总显存
total_used = 0
# 遍历查看每一个GPU的情况
for i in range(deviceCount):
    # 创建句柄
    handle = nvmlDeviceGetHandleByIndex(i)
    # 获取信息
    info = nvmlDeviceGetMemoryInfo(handle)
    # 获取gpu名称
    gpu_name = nvmlDeviceGetName(handle)
    # 查看型号、显存、温度、电源
    print("[ GPU{}: {}".format(i, gpu_name), end="    ")
    print("总共显存: {}G".format((info.total // 1048576) / 1024), end="    ")
    print("空余显存: {}G".format((info.free // 1048576) / 1024), end="    ")
    print("已用显存: {}G".format((info.used // 1048576) / 1024), end="    ")
    print("显存占用率: {:.2%}".format( info.used / info.total), end="    ")
    print("运行温度: {}摄氏度 ]".format(nvmlDeviceGetTemperature(handle, 0)))

    total_memory += (info.total // 1048576) / 1024
    total_free += (info.free // 1048576) / 1024
    total_used += (info.used // 1048576) / 1024
# 打印所有GPU信息
print("显卡名称:[{}],显卡数量:[{}],总共显存;[{}G],空余显存:[{}G],已用显存:[{}G],显存占用率:[{:.2%}]。".format(gpu_name, deviceCount, total_memory,
                                                                                                         total_free, total_used,
                                                                                                         (total_used / total_memory)))

# 关闭管理工具
nvmlShutdown()

python查看GPU显存占用情况_第1张图片

打印为字典格式

from pynvml import nvmlDeviceGetHandleByIndex, nvmlInit, nvmlDeviceGetCount, nvmlDeviceGetMemoryInfo, nvmlDeviceGetName, \
    nvmlDeviceGetTemperature, nvmlShutdown

# 初始化
nvmlInit()
# 获取GPU个数
deviceCount = nvmlDeviceGetCount()

all_info = []
# 遍历查看每一个GPU的情况
for i in range(deviceCount):

    # 创建句柄
    handle = nvmlDeviceGetHandleByIndex(i)
    # 获取信息
    info = nvmlDeviceGetMemoryInfo(handle)
    # 获取gpu名称
    gpu_name = nvmlDeviceGetName(handle)
    gpu_name_series = gpu_name.split(" ")[-1]
    # 查看型号、显存、温度、电源
    # print("[ GPU-{}: {}".format(i, gpu_name_series), end="    ")
    # print("空余显存: {:.1f}".format((info.free // 1048576) / 1024), end="    ")
    # print("已用显存: {:.1f}".format((info.used // 1048576) / 1024), end="    ")
    # print("显存占用率: {:.2%}".format( info.used / info.total), end="    ")
    # print("温度: {}度 ]".format(nvmlDeviceGetTemperature(handle, 0)))
    single_gpu_info ="[ GPU-{}: {}".format(i, gpu_name_series) + "  " +"空余显存: {:.1f}".format((info.free // 1048576) / 1024)+ "  "+"已用显存: {:.1f}".format((info.used // 1048576) / 1024)+"  " +"显存占用率: {:.2%}".format( info.used / info.total)+"  " + "温度: {}度 ]".format(nvmlDeviceGetTemperature(handle, 0))
    single_gpu_dict = {'index': i,'info':single_gpu_info}
    all_info.append(single_gpu_dict)


print(all_info)
# 关闭管理工具
nvmlShutdown()

你可能感兴趣的:(python,开发语言)