java 获取GPU 信息(英伟达)

方案一

获取已用显存,总显存,gpu 名称,使用率等等

命令获取gpu信息

public class ShellService {

    @Value("${micronaut.server.port}")
    private Integer myServerPort;

    @Inject
    private TrainModelRepository trainModelRepository;

    private static HashMap processHashMap = new HashMap<>();

    public String getGPU() throws IOException {
        Process process = null;
        try {
            if (Platform.isWindows()) {
                process = Runtime.getRuntime().exec("nvidia-smi.exe");
            } else if (Platform.isLinux()) {
                String[] shell = {"/bin/bash", "-c", "nvidia-smi"};
                process = Runtime.getRuntime().exec(shell);
            }

            process.getOutputStream().close();
        } catch (IOException e) {
            e.printStackTrace();
            throw new IndaiException("显卡不存在或获取显卡信息失败");
        }

        BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()));

        StringBuffer stringBuffer = new StringBuffer();
        String line = "";
        while (null != (line = reader.readLine())) {
            stringBuffer.append(line + "\n");
        }

        return stringBuffer.toString();
    }
}

封装获取的信息

/**
 * @author fuchanghai
 */
@Controller("/api/GPU")
public class GPUController {

    @Inject
    ShellService shellService;

    @Get(value = "/get/gpu")
    public ApiResponse getGPU() throws IOException {
        List gpuInfoList = getGpuInfos();
        return ApiResponse.responseSuccess(gpuInfoList);
    }


    public List getGpuInfos() throws IOException {
        String gpus = null;

        gpus = shellService.getGPU();
		//命令行调用后获取的信息
        /*String gpus = "Mon Jun  1 10:47:16 2020       \n" +
                "+-----------------------------------------------------------------------------+\n" +
                "| NVIDIA-SMI 418.87.01    Driver Version: 418.87.01    CUDA Version: 10.1     |\n" +
                "|-------------------------------+----------------------+----------------------+\n" +
                "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n" +
                "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n" +
                "|===============================+======================+======================|\n" +
                "|   0  TITAN V             Off  | 00000000:2D:00.0  On |                  N/A |\n" +
                "| 29%   43C    P8    27W / 250W |   1123MiB / 12035MiB |      0%      Default |\n" +
                "+-------------------------------+----------------------+----------------------+\n" +
                "|   1  GeForce RTX 208...  Off  | 00000000:99:00.0 Off |                  N/A |\n" +
                "|  0%   29C    P8    20W / 260W |     11MiB / 10989MiB |      0%      Default |\n" +
                "+-------------------------------+----------------------+----------------------+\n" +
                "                                                                               \n" +
                "+-----------------------------------------------------------------------------+\n" +
                "| Processes:                                                       GPU Memory |\n" +
                "|  GPU       PID   Type   Process name                             Usage      |\n" +
                "|=============================================================================|\n" +
                "|    0     16841      C   inference_worker                            1077MiB |\n" +
                "|    0     19996      G   /usr/lib/xorg/Xorg                            33MiB |\n" +
                "+-----------------------------------------------------------------------------+\n";*/
        //System.out.println("命令行获取的结果: " + gpus);
        //分割废物信息
        String[] split = gpus.split("\\|===============================\\+======================\\+======================\\|");
        String[] gpusInfo = split[1].split("                                                                               ");
        // 分割多个gpu
        String[] gpuInfo = gpusInfo[0].split("\\+-------------------------------\\+----------------------\\+----------------------\\+");
        //System.out.println("000000000000000000000000000000000");
        List gpuInfoList = new ArrayList<>();
        for (int i = 0; i < gpuInfo.length - 1; i++) {
            GPUInfo gpuInfo1 = new GPUInfo();
            String[] nameAndInfo = gpuInfo[i].split("\n");
            //只要第二块的数据
            /*0
             *TITAN
             *V
             *Off
             * */
            String[] split1 = nameAndInfo[1].split("\\|")[1] // 0  TITAN V             Off
                    .split("\\s+");//去空格

            gpuInfo1.setNumber(Integer.parseInt(split1[1]));
            StringBuffer name = new StringBuffer();
            for (int j = 0; j < split1.length - 1; j++) {
                if (j > 1 && j != split1.length) {
                    name.append(split1[j] + " ");
                }
            }
            gpuInfo1.setName(name.toString());

            String[] info = nameAndInfo[2].split("\\|")[2].split("\\s+");
            /* System.out.println("biubiu~~~biubiu~~~biubiu~~~biubiu~~~biubiu~~~biubiu~~~biubiu~~~biubiu~~~biubiu~~~biubiu~~~");*/
            gpuInfo1.setUsedMemory(info[1]);
            gpuInfo1.setTotalMemory(info[3]);
            int useable = Integer.parseInt(gpuInfo1.getTotalMemory().split("MiB")[0]) - Integer.parseInt(gpuInfo1.getUsedMemory().split("MiB")[0]);
            gpuInfo1.setUseableMemory(useable + "MiB");
            Double usageRate = Integer.parseInt(gpuInfo1.getUsedMemory().split("MiB")[0]) * 100.00 / Integer.parseInt(gpuInfo1.getTotalMemory().split("MiB")[0]);
            gpuInfo1.setUsageRate(usageRate);
            gpuInfoList.add(gpuInfo1);

        }
        return gpuInfoList;
    }
}

gpu 实体类

@Data
public class GPUInfo {
    private Integer number;

    private String name;

    private String totalMemory;

    private String usedMemory;

    private String useableMemory;
    private Double usageRate;
}

方案二

英伟达开源工具

https://github.com/NVIDIA/dcgm-exporter

你可能感兴趣的:(工具类,运维)