问题描述:
【功能模块】
使用mindspore自带的mindrecord自定义数据集后训练时出现问题。
训练集描述——是一个照片(jpg)二分类问题,1~4000的照片是1,4001~6000是0
环境:Windows 11,Mindspore-CPU 1.6.0,Python 3.9.0
【操作步骤&问题现象】
1、使用代码将datasets文件夹里的自定义图片(jpg)添加到数据集内。代码见下:
from mindspore.mindrecord import FileWriter
import cv2 # 引入opencv
from PIL import Image
import os
schema_json = {"label": {"type": "int32"},
"data": {"type": "bytes"}}
indexes = ["label"]
data = []
for i in range(1,6001): # 遍历照片jpg
img = Image.open("./dataset/img" + str(i) + ".jpg")
out = img.resize((224,224)) # 将照片转换为224,224
out.save("./datasets/img" + str(i) + ".jpg") # 保存
f = open("./datasets/img" + str(i) + ".jpg","rb+") # 以二进制读取
rbdata = f.read()
f.close()
label=1;
print(i)
if(i<=4000): # 4000张以前的都是1(我定好的)
label=1
else:
label=0
data.append({"label": label, "data": rbdata}) # 添加数据
writer = FileWriter(file_name = "test.mindrecord", shard_num = 1)
writer.add_schema(schema_json, "test_schema")
writer.add_index(indexes)
writer.write_raw_data(data)
writer.commit()
2、然后用以下代码进行训练:
from mindspore import context
import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.vision.c_transforms as CV
import mindspore.nn as nn
from mindspore.dataset.vision import Inter
from mindspore import dtype as mstype
from mindspore import Model
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
def create_dataset(data_path, batch_size=32, repeat_size=1,
num_parallel_workers=1):
# 定义数据集
my_ds = ds.MindDataset(data_path, columns_list=['data', 'label'])
decode = CV.Decode()
my_ds = my_ds.map(input_columns=["data"], operations=decode)
return my_ds
class MyNet(nn.Cell):
def __init__(self):
super(MyNet, self).__init__()
self.conv1 = nn.Conv2d(1, 96, 11, stride = 4, pad_mode = "valid")
self.conv2 = nn.Conv2d(96, 256, 5, stride = 1, pad_mode = "same")
self.conv3 = nn.Conv2d(256, 384, 3, stride = 1, pad_mode = "same")
self.conv4 = nn.Conv2d(384, 384, 3, stride = 1, pad_mode = "same")
self.conv5 = nn.Conv2d(384, 256, 3, stride = 1, pad_mode = "same")
self.relu = nn.ReLU()
self.max_pool2d = nn.MaxPool2d(kernel_size = 3, stride = 2)
self.flatten = nn.Flatten()
self.fc1 = nn.Dense(6 * 6 * 256, 4096)
self.fc2 = nn.Dense(4096, 4096)
self.fc3 = nn.Dense(4096, 1000)
self.fc4 = nn.Dense(1000, 1)
self.softmax = nn.Softmax()
def construct(self, x):
x = self.conv1(x)
x = self.relu(x)
x = self.max_pool2d(x)
x = self.conv2(x)
x = self.relu(x)
x = self.max_pool2d(x)
x = self.conv3(x)
x = self.relu(x)
x = self.max_pool2d(x)
x = self.conv4(x)
x = self.relu(x)
x = self.max_pool2d(x)
x = self.conv5(x)
x = self.relu(x)
x = self.max_pool2d(x)
x = self.flatten(x)
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
x = self.relu(x)
x = self.fc4(x)
x = self.relu(x)
x = self.softmax(x)
return x+self.y
epoch_size = 2
lr = 0.1
momentum = 0.9
dataset = create_dataset("test.mindrecord")
network = MyNet()
loss = nn.SoftmaxCrossEntropyWithLogits(sparse = True)
opt = nn.Momentum(network.trainable_params(), lr, momentum)
model = Model(network, loss_fn=loss, optimizer=opt, metrics={"Accuracy": nn.Accuracy()})
model.train(epoch_size, dataset, dataset_sink_mode = False)
出现错误:
Traceback (most recent call last):
File "D:\训练模型.py", line 73, in
model.train(epoch_size, dataset, dataset_sink_mode = False)
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\site-packages\mindspore\train\model.py", line 784, in train
self._train(epoch,
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\site-packages\mindspore\train\model.py", line 88, in wrapper
func(self, *args, **kwargs)
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\site-packages\mindspore\train\model.py", line 548, in _train
self._train_process(epoch, train_dataset, list_callback, cb_params)
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\site-packages\mindspore\train\model.py", line 681, in _train_process
outputs = self._train_network(*next_element)
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\site-packages\mindspore\nn\cell.py", line 477, in __call__
out = self.compile_and_run(*args)
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\site-packages\mindspore\nn\cell.py", line 803, in compile_and_run
self.compile(*inputs)
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\site-packages\mindspore\nn\cell.py", line 790, in compile
_cell_graph_executor.compile(self, *inputs, phase=self.phase, auto_parallel_mode=self._auto_parallel_mode)
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\site-packages\mindspore\common\api.py", line 632, in compile
result = self._graph_executor.compile(obj, args_list, phase, self._use_vm_mode())
TypeError
按照sunny的方法2,加上self.y后依旧出现了如上报错
解答:
dataset返回值与网络要输入值得数量不匹配。可以这样修改create_dataset()函数。
def create_dataset(data_path, batch_size=2, repeat_size=1,
num_parallel_workers=1):
# 定义数据集
mnist_ds = ds.MindDataset(data_path, columns_list=['data', 'label'])
decode = C.Decode()
mnist_ds = mnist_ds.map(input_columns=["data"], operations=decode)
mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
return mnist_ds
MindDataset使用见链接:
mindspore.dataset.MindDataset — MindSpore master documentation
from mindspore import nn
class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
def construct(self, x):
return x + self.y
net = Net()
net(1)
mindspore/ccsrc/frontend/operator/composite/multitype_funcgraph.cc:161 GenerateFromTypes] The 'add' operation does not support the type [Int64, kMetaTypeNone]
因为在construct中没有定义self.y,这样会把self.y当作None处理。