ValueError可知,我们的数据格式可能有问题,这里需要shape一下我们的数据,看一下数据的大小是否一致。
# 将数据集打乱随机分组
x_train, x_test, y_train, y_test = train_test_split(imgs,
labels,
test_size=0.4,
random_state=random.randint(0, 100))
for i in x_train:
print(i.shape)
结果发现,数据中有些图像的大小不一致。
# Inspired by: DuckSoft
# GitHub: https://github.com/DuckSoft
import os
import cv2
DATASET_ROOT = r"F:\datas\BreastCancerData\gray_breast_raw(902)"
DATASET_TARGET_ROOT = r'C:\Users\tianr\Desktop\Experiment _Data\Sampling_Images'
DATASET_TYPES = ['benign', 'malignant']
# cv2读取图像的属性:高、宽
IMAGE_SIZE = (580, 775)
# 将小尺寸的图像(630,500)上采样成统一的尺寸(775,580)
TARGET_SIZE = (775, 580)
for dataset_type in DATASET_TYPES:
for filename in os.listdir(os.path.join(DATASET_ROOT, dataset_type)):
path = os.path.join(DATASET_ROOT, dataset_type, filename)
# print(path)
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
print(img.shape)
if img.shape != IMAGE_SIZE:
cv2.imwrite(os.path.join(DATASET_TARGET_ROOT, dataset_type, filename), cv2.resize(img, TARGET_SIZE))
print(f"Up-sampling from {img.shape} to {TARGET_SIZE}: {path}")
将挑选出的已统一格式的图像复制到原数据集,更新成功。