pytorch读取VOC数据之VOCDetection之二

import torch
import torchvision.models as models
import torchvision

net = models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
pthfile = r'G:\Anaconda3\envs\pytorch\Lib\site-packages\torchvision\models\fasterrcnn_resnet50_fpn_coco-258fb6c6.pth'
net.load_state_dict(torch.load(pthfile))


images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4)
images

labels = torch.randint(1, 91, (4, 11))
labels

tensor([[38, 88, 79, 26, 31, 18, 33, 7, 57, 78, 18],
[21, 53, 85, 68, 34, 23, 10, 11, 87, 50, 53],
[69, 51, 38, 28, 13, 36, 42, 66, 23, 11, 25],
[39, 36, 74, 14, 62, 43, 19, 36, 32, 78, 79]])

images = list(image for image in images)
images
[tensor([[[0.1409, 0.3319, 0.2713, …, 0.2323, 0.3766, 0.5869],
[0.5968, 0.8838, 0.8885, …, 0.8908, 0.5393, 0.6148],
[0.0938, 0.1763, 0.8471, …, 0.0492, 0.9120, 0.2938],
…,
[0.7555, 0.6243, 0.2426, …, 0.1187, 0.1963, 0.5246],
[0.6922, 0.5805, 0.1448, …, 0.5581, 0.8128, 0.7880],
[0.5088, 0.7254, 0.9366, …, 0.3525, 0.2873, 0.2515]],

for i in range(len(images)):
     d = {}
     d['boxes'] = boxes[i]
     d['labels'] = labels[i]
     targets.append(d) 
targets

[{‘boxes’: tensor([[0.3797, 0.5451, 0.2937, 0.1000],
[0.4544, 0.9748, 0.2257, 0.7201],
[0.4218, 0.7900, 0.3847, 0.9659],
[0.6272, 0.9855, 0.7534, 0.6685],
[0.5671, 0.8893, 0.8936, 0.0085],
[0.5613, 0.2616, 0.0405, 0.4540],
[0.8088, 0.5188, 0.1928, 0.1219],
[0.7729, 0.8459, 0.3770, 0.7681],
[0.5068, 0.6987, 0.7738, 0.8730],
[0.5561, 0.4926, 0.7568, 0.5941],
[0.1353, 0.5855, 0.1306, 0.6851]]),
‘labels’: tensor([38, 88, 79, 26, 31, 18, 33, 7, 57, 78, 18])},
{‘boxes’: tensor([[0.5877, 0.9051, 0.7449, 0.2136],
[0.3387, 0.8206, 0.8216, 0.6426],
[0.3647, 0.3594, 0.9830, 0.5258],
[0.2084, 0.5330, 0.9197, 0.7441],

output =net(images, targets)
output

{‘loss_classifier’: tensor(0.1046, grad_fn=),
‘loss_box_reg’: tensor(0.0039, grad_fn=),
‘loss_objectness’: tensor(13.7724, grad_fn=),
‘loss_rpn_box_reg’: tensor(nan, grad_fn=)}

import torchvision.datasets as datasets
import numpy as np
import cv2
voc_trainset = datasets.VOCDetection('G:/jupyter/demo_voc',year='2007', image_set='train', download=False)
print('-'*40)
print('VOC2007-trainval')
print(len(voc_trainset))

VOC2007-trainval
2501

o1=next(iter(voc_trainset))
o1
image, annotation = voc_trainset[0][0],voc_trainset[0][1]
print(annotation)

{‘annotation’: {‘folder’: ‘VOC2007’, ‘filename’: ‘000012.jpg’, ‘source’: {‘database’: ‘The VOC2007 Database’, ‘annotation’: ‘PASCAL VOC2007’, ‘image’: ‘flickr’, ‘flickrid’: ‘207539885’}, ‘owner’: {‘flickrid’: ‘KevBow’, ‘name’: ‘?’}, ‘size’: {‘width’: ‘500’, ‘height’: ‘333’, ‘depth’: ‘3’}, ‘segmented’: ‘0’, ‘object’: {‘name’: ‘car’, ‘pose’: ‘Rear’, ‘truncated’: ‘0’, ‘difficult’: ‘0’, ‘bndbox’: {‘xmin’: ‘156’, ‘ymin’: ‘97’, ‘xmax’: ‘351’, ‘ymax’: ‘270’}}}}

你可能感兴趣的:(pytorch读取VOC数据之VOCDetection之二)