import time
import pynvml
import sys
import os
def getNVMUsed():
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
used = (meminfo.used / 1024) /1024
return used
class Logger(object):
def __init__(self, filename="Default.log"):
self.terminal = sys.stdout
self.log = open(filename, "a")
def write(self, message):
self.terminal.write(message)
self.log.write(message)
def flush(self):
pass
path = os.path.abspath(os.path.dirname(__file__))
type = sys.getfilesystemencoding()
sys.stdout = Logger('a.txt')
while True:
nvm_current = getNVMUsed()
print(nvm_current)
time.sleep(4)
我们只关心算法某一时刻的最大显存,看看会不会超过4G,(因为现场电脑配置是4G的)超出就会奔溃,所以上面的等待sleep4秒,这4秒不会检测显存,会有遗漏,如果去掉就是每时每刻都在打印,导致文本很大,一天下来,估计能写几十G,打开缓慢.其实我们只关心最大值,我们每时每刻监控显存,只保留最大值,并且每隔固定时长把最大值输出到本地就好!
import time
import pynvml
import sys
import os
def getNVMUsed(id = 0):
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(id)
meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
used = (meminfo.used / 1024) /1024
return used
class Logger(object):
def __init__(self, filename="Default.log"):
self.terminal = sys.stdout
self.log = open(filename, "a")
def write(self, message):
self.terminal.write(message)
self.log.write(message)
def flush(self):
pass
path = os.path.abspath(os.path.dirname(__file__))
type = sys.getfilesystemencoding()
sys.stdout = Logger('watch_nvidia.txt')
max_val = -1
pre_aim = -1
while True:
cur_time = time.localtime(time.time())
hour = cur_time.tm_hour
min = cur_time.tm_min
sec = cur_time.tm_sec
gpu_id = 0
nvm_current = getNVMUsed(gpu_id)
if nvm_current > max_val:
max_val = nvm_current
# if sec % 59==0 and 0 !=sec and pre_aim != min: #每隔59秒输出
# pre_aim = min
# print (time.strftime('%H:%M:%S', time.localtime(time.time())))
# print('gpu_id=%d,memory=%dM' % (gpu_id, max_val))
if min % 2==0 and pre_aim != min: #每隔2min输出
pre_aim = min
print (time.strftime('%H:%M:%S', time.localtime(time.time())))
print('gpu_id=%d,memory=%dM' % (gpu_id, max_val))
每隔59秒的输出
14:10:59
gpu_id=0,memory=708M
14:11:59
gpu_id=0,memory=708M
14:12:59
gpu_id=0,memory=3870M
14:13:59
gpu_id=0,memory=5769M
14:14:59
gpu_id=0,memory=5769M
14:15:59
gpu_id=0,memory=5769M
每隔2min的输出
14:18:33
gpu_id=0,memory=724M
14:20:00
gpu_id=0,memory=749M
14:22:00
gpu_id=0,memory=749M
14:24:00
gpu_id=0,memory=749M
14:26:00
gpu_id=0,memory=2560M
14:28:00
gpu_id=0,memory=2560M
14:30:00
gpu_id=0,memory=5865M
14:32:00
gpu_id=0,memory=5865M
本次修改是每间隔Tmin打印出这段Tmin的最大显存,并且实时的写入txt
#pip install nvidia-ml-py
import time
import pynvml
import sys
import os
def getNVMUsed(id = 0):
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(id)
meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
used = (meminfo.used / 1024) /1024
return used
class Logger(object):
def __init__(self, filename="Default.log"):
self.terminal = sys.stdout
self.log = open(filename, "aw")
def write(self, message):
self.terminal.write(message)
self.log.write(message)
self.terminal.flush()
self.log.flush()
def flush(self):
self.terminal.flush()
self.log.flush()
path = os.path.abspath(os.path.dirname(__file__))
type = sys.getfilesystemencoding()
sys.stdout = Logger('watch_nvidia.txt')
T_time_gap_min = 1 #min
max_val = -1
pre_aim = -1
while True:
cur_time = time.localtime(time.time())
hour = cur_time.tm_hour
min = cur_time.tm_min
sec = cur_time.tm_sec
gpu_id = 0
nvm_current = getNVMUsed(gpu_id)
if nvm_current > max_val:
max_val = nvm_current
if min % T_time_gap_min==0 and pre_aim != min:
pre_aim = min
print (time.strftime('%H:%M:%S', time.localtime(time.time())))
print('gpu_id=%d,memory=%dM' % (gpu_id, max_val))
max_val = -1