def construct(self, x):
att = self.DotKernel(x)
residual = x
g = self.g(x)
b, c, h, w = g.shape
g = self.transpose(g.view(b, c, -1), (0, 2, 1))
x_1 = msnp.ravel(self.transpose(g, (0, 2, 1))).view(b, c, h, w)
x_1 = self.w_1(x_1)
out = x_1 + residual
x_2 = self.bmm(att, g)
x_2 = self.transpose(x_2, (0, 2, 1))
# 出错代码
x_2 = x_2.resize(b, c, h, w)
out = out + x_2
以上代码将x_2的resize的操作换成如下操作
x_2 = self.transpose(x_2, (0, 2, 1))
x_2 = msnp.ravel(x_2)
x_2 = x_2.view(b, c, h, w)
也一样报错 具体报错信息如下
{'enable_modelarts': 'Whether training on modelarts default: False', 'data_url': 'Url for modelarts', 'train_url': 'Url for modelarts', 'data_path': 'The location of input data', 'output_pah': 'The location of the output file', 'device_target': 'device id of GPU or Ascend. (Default: None)', 'enable_profiling': 'Whether enable profiling while training default: False', 'is_distributed': 'distributed training', 'resume': 'resume training with existed checkpoint', 'model_size': 'shuffleNetV1 model size choices 2.0x, 1.5x, 1.0x, 0.5x', 'device_id': 'device id', 'file_name': 'output file name', 'file_format': 'file format choices [AIR MINDIR ONNX]'}
{'amp_level': 'O0',
'batch_size': 128,
'checkpoint_url': '',
'ckpt_path': '',
'config_path': '/root/shufflenetv1/src/model_utils/../../gpu_default_config.yaml',
'data_path': '/cache/data',
'data_url': '',
'decay_method': 'cosine',
'device_id': 0,
'device_target': 'GPU',
'enable_modelarts': False,
'enable_profiling': False,
'epoch_size': 1,
'eval_dataset_path': '',
'f': 'extra argument on jupyter',
'file_format': 'ONNX',
'file_name': 'shufflenetv1',
'is_distributed': False,
'is_transfer': True,
'keep_checkpoint_max': 5,
'label_smooth_factor': 0.2,
'load_path': '/cache/checkpoint_path',
'loss_scale': 1024,
'lr_end': 0.0,
'lr_init': 0.0,
'lr_max': 0.5,
'model_size': '2.0x',
'momentum': 0.9,
'num_classes': 20,
'onnx_dataset_path': '/root/shufflenetv1/Arch28_split/val/',
'onnx_path': 'shufflenetv1.onnx',
'output_path': '/cache/train',
'resume': '/root/shufflenetv1/ckpt/shufflenetv1.ckpt',
'save_checkpoint': True,
'save_checkpoint_epochs': 5,
'save_ckpt_path': '/root/autodl-tmp/ckpt/',
'train_dataset_path': '/root/shufflenetv1/Arch28_split/train/',
'train_url': '',
'warmup_epochs': 4,
'weight_decay': 4e-05}
Please check the above information for the configurations
[WARNING] ME(97662:140323767718080,MainProcess):2022-09-24-16:45:36.861.238 [mindspore/train/serialization.py:712] For 'load_param_into_net', 11 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint.
[WARNING] ME(97662:140323767718080,MainProcess):2022-09-24-16:45:36.861.423 [mindspore/train/serialization.py:714] sln.t.weight is not loaded.
[WARNING] ME(97662:140323767718080,MainProcess):2022-09-24-16:45:36.861.484 [mindspore/train/serialization.py:714] sln.p.weight is not loaded.
[WARNING] ME(97662:140323767718080,MainProcess):2022-09-24-16:45:36.861.536 [mindspore/train/serialization.py:714] sln.g.weight is not loaded.
[WARNING] ME(97662:140323767718080,MainProcess):2022-09-24-16:45:36.861.585 [mindspore/train/serialization.py:714] sln.bn.moving_mean is not loaded.
[WARNING] ME(97662:140323767718080,MainProcess):2022-09-24-16:45:36.861.644 [mindspore/train/serialization.py:714] sln.bn.moving_variance is not loaded.
[WARNING] ME(97662:140323767718080,MainProcess):2022-09-24-16:45:36.861.692 [mindspore/train/serialization.py:714] sln.bn.gamma is not loaded.
[WARNING] ME(97662:140323767718080,MainProcess):2022-09-24-16:45:36.861.739 [mindspore/train/serialization.py:714] sln.bn.beta is not loaded.
[WARNING] ME(97662:140323767718080,MainProcess):2022-09-24-16:45:36.861.785 [mindspore/train/serialization.py:714] sln.w_1.weight is not loaded.
[WARNING] ME(97662:140323767718080,MainProcess):2022-09-24-16:45:36.861.832 [mindspore/train/serialization.py:714] sln.w_2.weight is not loaded.
[WARNING] ME(97662:140323767718080,MainProcess):2022-09-24-16:45:36.861.879 [mindspore/train/serialization.py:714] classifier.weight is not loaded.
[WARNING] ME(97662:140323767718080,MainProcess):2022-09-24-16:45:36.861.925 [mindspore/train/serialization.py:714] classifier.bias is not loaded.
[WARNING] ME(97662:140323767718080,MainProcess):2022-09-24-16:45:36.996.728 [mindspore/train/model.py:1077] For ValAccMonitor callback, {'end', 'epoch_end'} methods may not be supported in later version, Use methods prefixed with 'on_train' or 'on_eval' instead when using customized callbacks.
Traceback (most recent call last):
File "/root/miniconda3/envs/shuffle/lib/python3.7/site-packages/mindspore/_extends/remote/kernel_build_server_akg.py", line 55, in
messager.run()
File "/root/miniconda3/envs/shuffle/lib/python3.7/site-packages/mindspore/_extends/remote/kernel_build_server.py", line 106, in run
self.loop()
File "/root/miniconda3/envs/shuffle/lib/python3.7/site-packages/mindspore/_extends/remote/kernel_build_server.py", line 103, in loop
self.handle()
File "/root/miniconda3/envs/shuffle/lib/python3.7/site-packages/mindspore/_extends/remote/kernel_build_server_akg.py", line 39, in handle
self.akg_builder.handle(self, arg)
File "/root/miniconda3/envs/shuffle/lib/python3.7/site-packages/mindspore/_extends/remote/kernel_build_server.py", line 168, in handle
res = self.compile()
File "/root/miniconda3/envs/shuffle/lib/python3.7/site-packages/mindspore/_extends/remote/kernel_build_server.py", line 145, in compile
return self.akg_processor.compile(self.attrs)
File "/root/miniconda3/envs/shuffle/lib/python3.7/site-packages/mindspore/_extends/parallel_compile/akg_compiler/akg_process.py", line 133, in compile
res.get(timeout=self.wait_time)
File "/root/miniconda3/envs/shuffle/lib/python3.7/multiprocessing/pool.py", line 653, in get
raise TimeoutError
multiprocessing.context.TimeoutError
model size is 2.0x
============== Starting Training ==============
Traceback (most recent call last):
File "train.py", line 149, in
train()
File "/root/shufflenetv1/src/model_utils/moxing_adapter.py", line 113, in wrapped_func
run_func(*args, **kwargs)
File "train.py", line 137, in train
model.train(config.epoch_size, train_dataset, callbacks=cb, dataset_sink_mode=False)
File "/root/miniconda3/envs/shuffle/lib/python3.7/site-packages/mindspore/train/model.py", line 1049, in train
initial_epoch=initial_epoch)
File "/root/miniconda3/envs/shuffle/lib/python3.7/site-packages/mindspore/train/model.py", line 98, in wrapper
func(self, *args, **kwargs)
File "/root/miniconda3/envs/shuffle/lib/python3.7/site-packages/mindspore/train/model.py", line 616, in _train
self._train_process(epoch, train_dataset, list_callback, cb_params, initial_epoch, valid_infos)
File "/root/miniconda3/envs/shuffle/lib/python3.7/site-packages/mindspore/train/model.py", line 907, in _train_process
outputs = self._train_network(*next_element)
File "/root/miniconda3/envs/shuffle/lib/python3.7/site-packages/mindspore/nn/cell.py", line 578, in __call__
out = self.compile_and_run(*args)
File "/root/miniconda3/envs/shuffle/lib/python3.7/site-packages/mindspore/nn/cell.py", line 965, in compile_and_run
self.compile(*inputs)
File "/root/miniconda3/envs/shuffle/lib/python3.7/site-packages/mindspore/nn/cell.py", line 938, in compile
jit_config_dict=self._jit_config_dict)
File "/root/miniconda3/envs/shuffle/lib/python3.7/site-packages/mindspore/common/api.py", line 1137, in compile
result = self._graph_executor.compile(obj, args_list, phase, self._use_vm_mode())
RuntimeError: Response is empty
----------------------------------------------------
- C++ Call Stack: (For framework developers)
----------------------------------------------------
mindspore/ccsrc/backend/common/session/kernel_build_client.h:110 Response
不知道是不是和算子融合有关系,但是关闭之后也试过,还是不行,又或者说数据太大了?
****************************************************解答*****************************************************
这个valueerror指明了是tensor的size过大造成的。您可能需要降低batch_size(如果它是x_2的某个维度的话),或者,ops.inplaceadd可能有用。