在跑StyleGANv2和Diffusion-GAN时,因为用的是3090+py3.10+CUDA11.5+torch1.12.0,遇到下面的问题:
Traceback (most recent call last):
File "/home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1808, in _run_ninja_build
subprocess.run(
File "/home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/subprocess.py", line 524, in run
raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/data/yuyzh/code/eyes/cycle_diffusion_2/main_celeba3.py", line 3, in <module>
from solver3 import Solver
File "/data/yuyzh/code/eyes/cycle_diffusion_2/solver3.py", line 1, in <module>
from models.model import Generator
File "/data/yuyzh/code/eyes/cycle_diffusion_2/models/model.py", line 7, in <module>
from . import up_or_down_sampling
File "/data/yuyzh/code/eyes/cycle_diffusion_2/models/up_or_down_sampling.py", line 15, in <module>
from op import upfirdn2d
File "/data/yuyzh/code/eyes/cycle_diffusion_2/op/__init__.py", line 1, in <module>
from .fused_act import FusedLeakyReLU, fused_leaky_relu
File "/data/yuyzh/code/eyes/cycle_diffusion_2/op/fused_act.py", line 19, in <module>
fused = load(
File "/home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1202, in load
return _jit_compile(
File "/home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1425, in _jit_compile
_write_ninja_file_and_build_library(
File "/home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1537, in _write_ninja_file_and_build_library
_run_ninja_build(
File "/home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1824, in _run_ninja_build
raise RuntimeError(message) from e
RuntimeError: Error building extension 'fused': [1/2] /usr/bin/nvcc -DTORCH_EXTENSION_NAME=fused -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/include -isystem /home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/include/TH -isystem /home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/include/THC -isystem /home/yuyz/anaconda3/envs/eye_center_origin/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_75,code=sm_75 --compiler-options '-fPIC' -std=c++14 -c /data/yuyzh/code/eyes/cycle_diffusion_2/op/fused_bias_act_kernel.cu -o fused_bias_act_kernel.cuda.o
FAILED: fused_bias_act_kernel.cuda.o
/usr/bin/nvcc -DTORCH_EXTENSION_NAME=fused -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/include -isystem /home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/include/TH -isystem /home/yuyz/anaconda3/envs/eye_center_origin/lib/python3.10/site-packages/torch/include/THC -isystem /home/yuyz/anaconda3/envs/eye_center_origin/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_75,code=sm_75 --compiler-options '-fPIC' -std=c++14 -c /data/yuyzh/code/eyes/cycle_diffusion_2/op/fused_bias_act_kernel.cu -o fused_bias_act_kernel.cuda.o
/usr/include/c++/11/bits/std_function.h:435:145: error: parameter packs not expanded with ‘...’:
435 | function(_Functor&& __f)
| ^
/usr/include/c++/11/bits/std_function.h:435:145: note: ‘_ArgTypes’
/usr/include/c++/11/bits/std_function.h:530:146: error: parameter packs not expanded with ‘...’:
530 | operator=(_Functor&& __f)
| ^
/usr/include/c++/11/bits/std_function.h:530:146: note: ‘_ArgTypes’
ninja: build stopped: subcommand failed.
搜索了相关资料,发现有可能是CUDA、gcc版本问题不匹配的问题,但是检查了一遍所有的版本型号,发现是完全匹配的。
后来参考https://blog.csdn.net/c2a2o2/article/details/120596305,替换掉原有的upfirdn2d.cpp的c++文件,编写下面的文件upfirdn2d.py:
import torch
from torch import nn
import torch.nn.functional as F
class FusedLeakyReLU(nn.Module):
def __init__(self, channel, negative_slope=0.2, scale=2 ** 0.5):
super().__init__()
self.bias = nn.Parameter(torch.zeros(channel))
self.negative_slope = negative_slope
self.scale = scale
def forward(self, input):
return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale)
def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2 ** 0.5):
return scale * F.leaky_relu(input + bias.view((1, -1) + (1,) * (len(input.shape) - 2)), negative_slope=negative_slope)
def upfirdn2d_native(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1):
input = input.permute(0, 2, 3, 1)
_, in_h, in_w, minor = input.shape
kernel_h, kernel_w = kernel.shape
out = input.view(-1, in_h, 1, in_w, 1, minor)
out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1])
out = out.view(-1, in_h * up_y, in_w * up_x, minor)
out = F.pad(out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)])
out = out[:, max(-pad_y0, 0): out.shape[1] - max(-pad_y1, 0), max(-pad_x0, 0): out.shape[2] - max(-pad_x1, 0), :, ]
out = out.permute(0, 3, 1, 2)
out = out.reshape([-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1])
w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w)
out = F.conv2d(out, w)
out = out.reshape(-1, minor, in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1,)
return out[:, :, ::down_y, ::down_x]
def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
out = upfirdn2d_native(input, kernel, up, up, down, down, pad[0], pad[1], pad[0], pad[1])
return out
然后在对应的索引中,修改引用该文件即可。