【深度学习】性能监控

性能监控

判断系统,然后再监控程序运行期间机器的性能

import psutil
import matplotlib.pyplot as plt
import time
import matplotlib
import subprocess
import platform
import os 

try:
    import GPUtil
    import pynvml
except ImportError as e:
    print(f"导入GPU模块失败,请先安装GPU驱动:{e}")



def is_windows():
    """检查当前系统是否为Windows"""
    return platform.system() == "Windows"

def is_linux():
    """检查当前系统是否为Linux"""
    return platform.system() == "Linux"


class Linux:
    """Linux系统性能监控类"""

    @staticmethod
    def get_cpu_info_linux():
        """获取Linux系统的CPU使用率和频率"""
        cpu_usage = psutil.cpu_percent(interval=1)
        cpu_freq = psutil.cpu_freq().current
        return cpu_usage, cpu_freq

    @staticmethod
    def get_memory_info_linux():
        """获取Linux系统的内存使用信息"""
        memory = psutil.virtual_memory()
        memory_used_gb = memory.used / (1024 ** 3)
        return memory_used_gb

    @staticmethod
    def get_gpu_info_linux():
        """获取Linux系统的GPU信息"""
        return 0, 0  # 示例值,需要实际实现

    @staticmethod
    def get_cpu_model_unix():
        """获取Unix系统的CPU型号"""
        try:
            if platform.system() == "Linux":
                with open('/proc/cpuinfo') as f:
                    for line in f:
                        if line.strip().startswith('model name'):
                            return line.strip().split(':')[1].strip()
            elif platform.system() == "Darwin":
                return subprocess.check_output(
                    ["sysctl", "-n", "machdep.cpu.brand_string"],
                    universal_newlines=True
                ).strip()
        except Exception as e:
            print(f"Error: {e}")
            return None


class run:
    """运行性能监控的主类"""

    def __init__(self):
        """初始化run类,创建windows和Linux类的实例"""
        self.win = windows()
        self.linux = Linux()

        # 设置matplotlib以支持中文字符
        matplotlib.rcParams['font.sans-serif'] = ['SimHei']  # 'SimHei'是常用的中文黑体字体
        matplotlib.rcParams['axes.unicode_minus'] = False    # 用于正确显示负号
        matplotlib.rcParams['font.size'] = 12                # 调整字体大小

    def collect_system_data(self, total_duration, interval):
        """根据当前系统是Windows还是Linux,收集系统数据"""
        if is_windows():
            return self.collect_system_data_windows(total_duration, interval)
        elif is_linux():
            return self.collect_system_data_linux(total_duration, interval)
        else:
            raise NotImplementedError("Unsupported operating system.")
    
    def collect_system_data_windows(self, total_duration, interval):
        """在Windows系统上收集系统数据"""
        start_time = time.time()
        data = []
        gpu, gpu_handle = self.win.initialize_gpu_info()
        
        while time.time() - start_time < total_duration:
            cpu_usage, cpu_freq = self.win.get_cpu_info()
            memory_used_gb = self.win.get_memory_info()
            gpu_usage, gpu_memory_used = self.win.get_gpu_info(gpu)
            gpu_temp = self.win.get_gpu_temperature(gpu_handle)
            
            data.append((cpu_usage, cpu_freq, memory_used_gb, gpu_usage, gpu_memory_used, gpu_temp))
            time.sleep(interval)
        
        return data

    def collect_system_data_linux(self, total_duration, interval):
        """在Linux系统上收集系统数据"""
        start_time = time.time()
        data = []
        
        while time.time() - start_time < total_duration:
            cpu_usage, cpu_freq = self.linux.get_cpu_info_linux()
            memory_used_gb = self.linux.get_memory_info_linux()
            gpu_usage, gpu_memory_used = self.linux.get_gpu_info_linux()
            
            data.append((cpu_usage, cpu_freq, memory_used_gb, gpu_usage, gpu_memory_used))
            time.sleep(interval)


    def plot_system_data(self, data, cpu_model, gpu_list):
        """绘制收集的系统数据并将其保存为PNG文件"""
        plt.figure(figsize=(14, 12))
        plt.subplots_adjust(hspace=0.5)

        titles = [
            'CPU 使用率 (%)', 'CPU 频率 (MHz)',
            '内存使用量 (GB)', 'GPU 使用率 (%)',
            'GPU 内存使用量 (MB)', 'GPU 温度 (°C)'
        ]

        for i in range(6):
            plt.subplot(3, 2, i + 1)
            plt.plot([entry[i] for entry in data], label=titles[i], color=['blue', 'green', 'red', 'purple', 'orange', 'cyan'][i])
            plt.title(f'{titles[i]}随时间变化')
            plt.xlabel('时间 (秒)')
            plt.ylabel(titles[i], rotation=0, labelpad=45)
            plt.legend()

        plt.suptitle(f'系统性能监控 \n\n CPU 型号:{cpu_model} \n GPU:{gpu_list[0]} \n 型号:{gpu_list[1]} \n 显存大小:{gpu_list[2]}(MB) \n', fontsize=16)
        plt.tight_layout()

        plt.savefig('间段性能图.png')
        # 显示图形
        plt.show()
        
        current_path = os.getcwd()
        print("当前工作目录是:", current_path)
        script_path = os.path.dirname(os.path.abspath(__file__))
        print("脚本所在目录是:", script_path)

        print("\n监控图表已保存为 '间段性能图.png'")


    # 还未调试完成
    def run_monitor(self, total_duration, interval):
        """监控指定时间的系统性能"""
        cpu_model = self.linux.get_cpu_model_unix()
        print(cpu_model)

        
        
        gpu_info = self.win.get_gpu_xh_info()
        gpu_list = []

        if gpu_info:
            for gpu in gpu_info:
                gpu_list.append(gpu['GPU'])
                gpu_list.append(gpu['型号'])
                gpu_list.append(gpu['显存大小 (MB)'])
        else:
            print("无法获取GPU信息。请确保已安装NVIDIA驱动和nvidia-smi工具。")

        data = self.collect_system_data(total_duration, interval)
        self.plot_system_data(data, gpu_list)

    def run_and_monitor(self, script_path, interval=1):
        """运行一个Python脚本并监控其性能"""
        spitted_test = script_path.split('.')[-1]
        if spitted_test == 'py': # python脚本
            process = subprocess.Popen(['python', script_path])
        elif spitted_test == 'ps1': # PowerShell
            process = subprocess.Popen(["powershell.exe", script_path], stdout=subprocess.PIPE)

        data = []

        while True:
            if process.poll() is not None:
                break  
            
            current_data = self.collect_system_data(1, interval)  
            if current_data:
                data.extend(current_data)
            
            time.sleep(interval)

        
        cpu_model = self.win.get_cpu_model_windows()
        gpu_info = self.win.get_gpu_xh_info()
        gpu_list = []
        if gpu_info:
            for gpu in gpu_info:
                gpu_list.append(gpu['GPU'])
                gpu_list.append(gpu['型号'])
                gpu_list.append(gpu['显存大小 (MB)'])
        else:
            print("无法获取GPU信息。请确保已安装NVIDIA驱动和nvidia-smi工具。")

        self.plot_system_data(data, cpu_model, gpu_list)


class windows:
    """Windows系统性能监控类"""

    def initialize_gpu_info(self):
        """初始化并返回使用pynvml和GPUtil的GPU信息"""
        pynvml.nvmlInit()
        gpus = GPUtil.getGPUs()
        if gpus:
            return gpus[0], pynvml.nvmlDeviceGetHandleByIndex(0)
        return None, None
    
    def get_cpu_info(self):
        """获取并返回当前CPU的使用率和频率"""
        cpu_usage = psutil.cpu_percent(interval=1)
        cpu_freq = psutil.cpu_freq().current
        return cpu_usage, cpu_freq

    def get_memory_info(self):
        """获取并返回以GB为单位的已使用内存量"""
        memory = psutil.virtual_memory()
        memory_used_gb = memory.used / (1024 ** 3)
        return memory_used_gb

    def get_gpu_info(self, gpu):
        """如果有GPU可用,则返回GPU的使用率和已使用内存"""
        if gpu:
            return gpu.load * 100, gpu.memoryUsed
        return 0, 0

    def get_gpu_temperature(self, handle):
        """根据其句柄返回GPU的温度"""
        if handle:
            return pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)
        return 0

    def get_cpu_model_windows(self):
        """获取Windows系统的CPU型号"""
        try:
            return subprocess.check_output(
                ["wmic", "cpu", "get", "name"], 
                universal_newlines=True
            ).strip().split('\n')[2]
        except Exception as e:
            print(f"Error: {e}")
            return None

    def get_gpu_xh_info(self):
        """使用nvidia-smi命令获取有关GPU的扩展信息"""
        try:
            output = subprocess.check_output(['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader,nounits'])
            output = output.decode('utf-8').strip().split('\n')
            gpu_info = [line.split(',') for line in output]
            gpu_data = []
            for idx, (model, memory) in enumerate(gpu_info):
                gpu_data.append({
                    'GPU': idx + 1,
                    '型号': model,
                    '显存大小 (MB)': int(memory)
                })
            return gpu_data
        except (subprocess.CalledProcessError, FileNotFoundError):
            return None


if __name__ == "__main__":
    print('监控程序运行时的机器性能状态...\n 支持 py 和 ps1')
    script_path = input('输入程序路径:')

    jk_start=run()
    # script_path = 'run.py'
    print('start...\n')
    jk_start.run_and_monitor(script_path)
    print('end\n')

    print('5秒之后退出')
    time.sleep(5)


有个问题还没解决

pyinstaller --onefile .\monitor_performance.py

打包成exe执行之后,保存的图片中并没有画出线条

你可能感兴趣的:(深度学习,人工智能)