记录一个StyleGANv2、Diffusion-GAN等中遇到的:ninja: build stopped: subcommand failed.

记录一个StyleGANv2、Diffusion-GAN等中遇到的:ninja: build stopped: subcommand failed.

在跑StyleGANv2和Diffusion-GAN时,因为用的是3090+py3.10+CUDA11.5+torch1.12.0,遇到下面的问题:

Traceback (most recent call last):
  File "/home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1808, in _run_ninja_build
    subprocess.run(
  File "/home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/subprocess.py", line 524, in run
    raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/yuyzh/code/eyes/cycle_diffusion_2/main_celeba3.py", line 3, in <module>
    from solver3 import Solver
  File "/data/yuyzh/code/eyes/cycle_diffusion_2/solver3.py", line 1, in <module>
    from models.model import Generator
  File "/data/yuyzh/code/eyes/cycle_diffusion_2/models/model.py", line 7, in <module>
    from . import up_or_down_sampling
  File "/data/yuyzh/code/eyes/cycle_diffusion_2/models/up_or_down_sampling.py", line 15, in <module>
    from op import upfirdn2d
  File "/data/yuyzh/code/eyes/cycle_diffusion_2/op/__init__.py", line 1, in <module>
    from .fused_act import FusedLeakyReLU, fused_leaky_relu
  File "/data/yuyzh/code/eyes/cycle_diffusion_2/op/fused_act.py", line 19, in <module>
    fused = load(
  File "/home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1202, in load
    return _jit_compile(
  File "/home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1425, in _jit_compile
    _write_ninja_file_and_build_library(
  File "/home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1537, in _write_ninja_file_and_build_library
    _run_ninja_build(
  File "/home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1824, in _run_ninja_build
    raise RuntimeError(message) from e
RuntimeError: Error building extension 'fused': [1/2] /usr/bin/nvcc  -DTORCH_EXTENSION_NAME=fused -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/include -isystem /home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/include/TH -isystem /home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/include/THC -isystem /home/yuyz/anaconda3/envs/eye_center_origin/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_75,code=sm_75 --compiler-options '-fPIC' -std=c++14 -c /data/yuyzh/code/eyes/cycle_diffusion_2/op/fused_bias_act_kernel.cu -o fused_bias_act_kernel.cuda.o 
FAILED: fused_bias_act_kernel.cuda.o 
/usr/bin/nvcc  -DTORCH_EXTENSION_NAME=fused -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/include -isystem /home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/include/TH -isystem /home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/include/THC -isystem /home/yuyz/anaconda3/envs/eye_center_origin/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_75,code=sm_75 --compiler-options '-fPIC' -std=c++14 -c /data/yuyzh/code/eyes/cycle_diffusion_2/op/fused_bias_act_kernel.cu -o fused_bias_act_kernel.cuda.o 
/usr/include/c++/11/bits/std_function.h:435:145: error: parameter packs not expanded with ‘...:
  435 |         function(_Functor&& __f)
      |                                                                                                                                                 ^ 
/usr/include/c++/11/bits/std_function.h:435:145: note:         ‘_ArgTypes’
/usr/include/c++/11/bits/std_function.h:530:146: error: parameter packs not expanded with ‘...:
  530 |         operator=(_Functor&& __f)
      |                                                                                                                                                  ^ 
/usr/include/c++/11/bits/std_function.h:530:146: note:         ‘_ArgTypes’
ninja: build stopped: subcommand failed.

搜索了相关资料,发现有可能是CUDA、gcc版本问题不匹配的问题,但是检查了一遍所有的版本型号,发现是完全匹配的。

后来参考https://blog.csdn.net/c2a2o2/article/details/120596305,替换掉原有的upfirdn2d.cpp的c++文件,编写下面的文件upfirdn2d.py:

import torch
from torch import nn
import torch.nn.functional as F

class FusedLeakyReLU(nn.Module):
    def __init__(self, channel, negative_slope=0.2, scale=2 ** 0.5):
        super().__init__()
        self.bias = nn.Parameter(torch.zeros(channel))
        self.negative_slope = negative_slope
        self.scale = scale
    def forward(self, input):
        return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale)

def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2 ** 0.5):
    return scale * F.leaky_relu(input + bias.view((1, -1) + (1,) * (len(input.shape) - 2)), negative_slope=negative_slope)

def upfirdn2d_native(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1):
    input = input.permute(0, 2, 3, 1)
    _, in_h, in_w, minor = input.shape
    kernel_h, kernel_w = kernel.shape
    out = input.view(-1, in_h, 1, in_w, 1, minor)
    out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1])
    out = out.view(-1, in_h * up_y, in_w * up_x, minor)
    out = F.pad(out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)])
    out = out[:, max(-pad_y0, 0): out.shape[1] - max(-pad_y1, 0), max(-pad_x0, 0): out.shape[2] - max(-pad_x1, 0), :, ]
    out = out.permute(0, 3, 1, 2)
    out = out.reshape([-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1])
    w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w)
    out = F.conv2d(out, w)
    out = out.reshape(-1, minor, in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1,)
    return out[:, :, ::down_y, ::down_x]

def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
    out = upfirdn2d_native(input, kernel, up, up, down, down, pad[0], pad[1], pad[0], pad[1])
    return out

然后在对应的索引中,修改引用该文件即可。

你可能感兴趣的:(python,pytorch,深度学习)