RuntimeError: DataLoader worker (pid(s) ***********) exited unexpectedly

报错代码:

RuntimeError: DataLoader worker (pid(s) 76756, 74568, 76080, 75256) exited unexpectedly 

 

Traceback (most recent call last):
  File "", line 1, in
  File "C:\Anaconda3\envs\Pytorch1-1\lib\multiprocessing\spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "C:\Anaconda3\envs\Pytorch1-1\lib\multiprocessing\spawn.py", line 125, in _main
    prepare(preparation_data)
  File "C:\Anaconda3\envs\Pytorch1-1\lib\multiprocessing\spawn.py", line 236, in prepare
    _fixup_main_from_path(data['init_main_from_path'])
  File "C:\Anaconda3\envs\Pytorch1-1\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
    main_content = runpy.run_path(main_path,
  File "C:\Anaconda3\envs\Pytorch1-1\lib\runpy.py", line 268, in run_path
    return _run_module_code(code, init_globals, run_name,
  File "C:\Anaconda3\envs\Pytorch1-1\lib\runpy.py", line 97, in _run_module_code
    _run_code(code, mod_globals, init_globals,
  File "C:\Anaconda3\envs\Pytorch1-1\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "D:\Pycharm-WorkSpace\d2l-zh-pytorch\5 卷积神经网络\5.6 深度卷积神经网络(AlexNet).py", line 74, in
    d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)
  File "C:\Anaconda3\envs\Pytorch1-1\lib\d2lzh_pytorch\utils.py", line 235, in train_ch5
    for X, y in train_iter:
  File "C:\Anaconda3\envs\Pytorch1-1\lib\site-packages\torch\utils\data\dataloader.py", line 444, in __iter__
    return self._get_iterator()
  File "C:\Anaconda3\envs\Pytorch1-1\lib\site-packages\torch\utils\data\dataloader.py", line 390, in _get_iterator
    return _MultiProcessingDataLoaderIter(self)
  File "C:\Anaconda3\envs\Pytorch1-1\lib\site-packages\torch\utils\data\dataloader.py", line 1077, in __init__
    w.start()
  File "C:\Anaconda3\envs\Pytorch1-1\lib\multiprocessing\process.py", line 121, in start
    self._popen = self._Popen(self)
  File "C:\Anaconda3\envs\Pytorch1-1\lib\multiprocessing\context.py", line 224, in _Popen
    return _default_context.get_context().Process._Popen(process_obj)
  File "C:\Anaconda3\envs\Pytorch1-1\lib\multiprocessing\context.py", line 327, in _Popen
    return Popen(process_obj)
  File "C:\Anaconda3\envs\Pytorch1-1\lib\multiprocessing\popen_spawn_win32.py", line 45, in __init__
    prep_data = spawn.get_preparation_data(process_obj._name)
  File "C:\Anaconda3\envs\Pytorch1-1\lib\multiprocessing\spawn.py", line 154, in get_preparation_data
    _check_not_importing_main()
  File "C:\Anaconda3\envs\Pytorch1-1\lib\multiprocessing\spawn.py", line 134, in _check_not_importing_main
    raise RuntimeError('''
RuntimeError: 
        An attempt has been made to start a new process before the
        current process has finished its bootstrapping phase.

        This probably means that you are not using fork to start your
        child processes and you have forgotten to use the proper idiom
        in the main module:

            if __name__ == '__main__':
                freeze_support()
                ...

        The "freeze_support()" line can be omitted if the program
        is not going to be frozen to produce an executable.
Traceback (most recent call last):
  File "C:\Anaconda3\envs\Pytorch1-1\lib\site-packages\torch\utils\data\dataloader.py", line 1163, in _try_get_data
    data = self._data_queue.get(timeout=timeout)
  File "C:\Anaconda3\envs\Pytorch1-1\lib\multiprocessing\queues.py", line 114, in get
    raise Empty
_queue.Empty

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "D:\Pycharm-WorkSpace\d2l-zh-pytorch\5 卷积神经网络\5.6 深度卷积神经网络(AlexNet).py", line 74, in
    d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)
  File "C:\Anaconda3\envs\Pytorch1-1\lib\d2lzh_pytorch\utils.py", line 235, in train_ch5
    for X, y in train_iter:
  File "C:\Anaconda3\envs\Pytorch1-1\lib\site-packages\torch\utils\data\dataloader.py", line 681, in __next__
    data = self._next_data()
  File "C:\Anaconda3\envs\Pytorch1-1\lib\site-packages\torch\utils\data\dataloader.py", line 1359, in _next_data
    idx, data = self._get_data()
  File "C:\Anaconda3\envs\Pytorch1-1\lib\site-packages\torch\utils\data\dataloader.py", line 1325, in _get_data
    success, data = self._try_get_data()
  File "C:\Anaconda3\envs\Pytorch1-1\lib\site-packages\torch\utils\data\dataloader.py", line 1176, in _try_get_data
    raise RuntimeError('DataLoader worker (pid(s) {}) exited unexpectedly'.format(pids_str)) from e
RuntimeError: DataLoader worker (pid(s) 76756, 74568, 76080, 75256) exited unexpectedly

Process finished with exit code 1
 

运行文件全部代码 :

import time
import torch
from torch import nn, optim
import torchvision

import sys
sys.path.append("C:\Anaconda3\envs\Pytorch1-1\Lib")
import d2lzh_pytorch as d2l
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 96, 11, 4), # in_channels, out_channels, kernel_size, stride, padding
            nn.ReLU(),
            nn.MaxPool2d(3, 2), # kernel_size, stride
            # 减小卷积窗口,使用填充为2来使得输入与输出的高和宽一致,且增大输出通道数
            nn.Conv2d(96, 256, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(3, 2),
            # 连续3个卷积层,且使用更小的卷积窗口。除了最后的卷积层外,进一步增大了输出通道数。
            # 前两个卷积层后不使用池化层来减小输入的高和宽
            nn.Conv2d(256, 384, 3, 1, 1),
            nn.ReLU(),
            nn.Conv2d(384, 384, 3, 1, 1),
            nn.ReLU(),
            nn.Conv2d(384, 256, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d(3, 2)
        )
         # 这里全连接层的输出个数比LeNet中的大数倍。使用丢弃层来缓解过拟合
        self.fc = nn.Sequential(
            nn.Linear(256*5*5, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            # 输出层。由于这里使用Fashion-MNIST,所以用类别数为10,而非论文中的1000
            nn.Linear(4096, 10),
        )

    def forward(self, img):
        feature = self.conv(img)
        output = self.fc(feature.view(img.shape[0], -1))
        return output
net = AlexNet()
print(net)
# 本函数已保存在d2lzh_pytorch包中方便以后使用
def load_data_fashion_mnist(batch_size, resize=None, root='./FashionMNIST'):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())

    transform = torchvision.transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)

    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=4)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=4)

    return train_iter, test_iter

batch_size = 128
# 如出现“out of memory”的报错信息,可减小batch_size或resize
train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)

lr, num_epochs = 0.001, 5
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
# if __name__ == '__main__':
d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

根据上方报错代码里的提示,可知道是这句代码出错了。

后经过查询, 报错里面有一句是说“if __name__ == '__main__':”的用途,即报错里面出现这个就是希望你封装你的运行代码在这个函数里。

解决办法:

if __name__ == '__main__':
    d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

注意:添加 if __name__ == '__main__':该行代码后,注意下一行的缩进问题

你可能感兴趣的:(杂,深度学习)