关键性代码在于 convert_to_example。比较重要的数据有:
1)GT的八个点
2)GT的四个点
3)image
def convert_to_example(image_data, filename, labels, ignored, labels_text, bboxes, oriented_bboxes, shape):
"""Build an Example proto for an image example.
Args:
image_data: string, JPEG encoding of RGB image;
labels: list of integers, identifier for the ground truth;
labels_text: list of strings, human-readable labels;
oriented_bboxes: list of bounding oriented boxes; each box is a list of floats in [0, 1];
specifying [x1, y1, x2, y2, x3, y3, x4, y4]
bboxes: list of bbox in rectangle, [xmin, ymin, xmax, ymax]
Returns:
Example proto
"""
image_format = b'JPEG'
oriented_bboxes = np.asarray(oriented_bboxes)
bboxes = np.asarray(bboxes)
example = tf.train.Example(features=tf.train.Features(feature={
'image/shape': int64_feature(list(shape)),
'image/object/bbox/xmin': float_feature(list(bboxes[:, 0])),
'image/object/bbox/ymin': float_feature(list(bboxes[:, 1])),
'image/object/bbox/xmax': float_feature(list(bboxes[:, 2])),
'image/object/bbox/ymax': float_feature(list(bboxes[:, 3])),
'image/object/bbox/x1': float_feature(list(oriented_bboxes[:, 0])),
'image/object/bbox/y1': float_feature(list(oriented_bboxes[:, 1])),
'image/object/bbox/x2': float_feature(list(oriented_bboxes[:, 2])),
'image/object/bbox/y2': float_feature(list(oriented_bboxes[:, 3])),
'image/object/bbox/x3': float_feature(list(oriented_bboxes[:, 4])),
'image/object/bbox/y3': float_feature(list(oriented_bboxes[:, 5])),
'image/object/bbox/x4': float_feature(list(oriented_bboxes[:, 6])),
'image/object/bbox/y4': float_feature(list(oriented_bboxes[:, 7])),
'image/object/bbox/label': int64_feature(labels),
'image/object/bbox/label_text': bytes_feature(labels_text),
'image/object/bbox/ignored': int64_feature(ignored),
'image/format': bytes_feature(image_format),
'image/filename': bytes_feature(filename),
'image/encoded': bytes_feature(image_data)}))
return example
def get_split(split_name, dataset_dir, file_pattern, num_samples, reader=None):
dataset_dir = util.io.get_absolute_path(dataset_dir)
if util.str.contains(file_pattern, '%'):
file_pattern = util.io.join_path(dataset_dir, file_pattern % split_name)
else:
file_pattern = util.io.join_path(dataset_dir, file_pattern)
# Allowing None in the signature so that dataset_factory can use the default.
if reader is None:
reader = tf.TFRecordReader
keys_to_features = {
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
'image/filename': tf.FixedLenFeature((), tf.string, default_value=''),
'image/shape': tf.FixedLenFeature([3], tf.int64),
'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/x1': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/x2': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/x3': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/x4': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/y1': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/y2': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/y3': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/y4': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ignored': tf.VarLenFeature(dtype=tf.int64),
'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
}
items_to_handlers = {
'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),
'shape': slim.tfexample_decoder.Tensor('image/shape'),
'filename': slim.tfexample_decoder.Tensor('image/filename'),
'object/bbox': slim.tfexample_decoder.BoundingBox(
['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'),
'object/oriented_bbox/x1': slim.tfexample_decoder.Tensor('image/object/bbox/x1'),
'object/oriented_bbox/x2': slim.tfexample_decoder.Tensor('image/object/bbox/x2'),
'object/oriented_bbox/x3': slim.tfexample_decoder.Tensor('image/object/bbox/x3'),
'object/oriented_bbox/x4': slim.tfexample_decoder.Tensor('image/object/bbox/x4'),
'object/oriented_bbox/y1': slim.tfexample_decoder.Tensor('image/object/bbox/y1'),
'object/oriented_bbox/y2': slim.tfexample_decoder.Tensor('image/object/bbox/y2'),
'object/oriented_bbox/y3': slim.tfexample_decoder.Tensor('image/object/bbox/y3'),
'object/oriented_bbox/y4': slim.tfexample_decoder.Tensor('image/object/bbox/y4'),
'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'),
'object/ignored': slim.tfexample_decoder.Tensor('image/object/bbox/ignored')
}
decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers)
labels_to_names = {0:'background', 1:'text'}
items_to_descriptions = {
'image': 'A color image of varying height and width.',
'shape': 'Shape of the image',
'object/bbox': 'A list of bounding boxes, one per each object.',
'object/label': 'A list of labels, one per each object.',
}
#Provider to get the dataset
return slim.dataset.Dataset(
data_sources=file_pattern, # data address
reader=reader,
decoder=decoder,
num_samples=num_samples,
items_to_descriptions=items_to_descriptions,
num_classes=2,
labels_to_names=labels_to_names)