Source code for torchsig.image_datasets.datasets.yolo_datasets

import numpy as np
import torch
from torch.utils.data import Dataset
import os
from torchsig.image_datasets.datasets.file_loading_datasets import load_image_grey
from torchsig.image_datasets.transforms.denoising import normalize_image, isolate_foreground_signal

[docs] class YOLODatum(): """ A class for wrapping YOLO data; contains a single datum for a YOLO dataset, with image and label data together. This class can be treated as a tuple of (image_data, labels/class_id), and can be returned in datasets. If no labels are provided, a class_id can be supplied, and the datum will be represented as (image_data, class_id), otherwise it will be (image_data, labels). A YOLODatum with a class_id and no labels is assumed to have one label at [class_id, 0.5, 0.5, 1, 1]. """
[docs] def __init__(self, img=None, labels=[]): self.img = img self._labels = labels if type(labels) is int: self._labels = [(labels, 0.5, 0.5, 1.0, 1.0)] elif type(labels) is tuple and len(labels) == 5: self._labels = [labels] elif not type(labels) is list: raise Exception("YOLODatum label must be an int class_id, a tuple of (class_id, cx, cy, width, height), or a list")
[docs] def has_labels(self): return self._labels != None
def __len__(self): return 2 @property def labels(self): return self._labels @labels.setter def labels(self, new_labels): self._labels = YOLODatum(self.img, new_labels)._labels def __getitem__(self,idx): if self._labels != None: return (self.img, self._labels)[idx] else: return (self.img, self.class_id)[idx] def __setitem__(self, idx, new_value): if idx == 0: self.img = new_value elif idx == 1: self._labels = new_value else: raise Exception("Cannot index past 0: img or 1: labels for YOLODatum object")
[docs] def size(self, ind): return self.img.size(ind)
@property def shape(self): return self.img.shape
[docs] def append_labels(self, new_labels): """ adds new labels to the list of labels; Inputs: new_labels: either a list of tuples to add, a single tuple of (class_id, cx, cy, width, height), or an int class_id, in which case (class_id, 0.5, 0.5, 1.0, 1.0) will be added """ if type(new_labels) is int: self._labels += [(new_labels, 0.5, 0.5, 1.0, 1.0)] elif type(new_labels) is list: self._labels += new_labels elif type(new_labels) is tuple: self._labels += [new_labels]
[docs] def transpose_yolo_labels(self, yolo_datum, top_left): """ A function for transposing YOLO labels for boxes in one image to the appropriate labels for the same boxes in a larger composite image containing the smaller image; Inputs: yolo_datum: the pair (img1, old_labels), where img1 is the smaller image on which old_labels are accurate as a torch [n_channels, height, width] tensor top_left: the coordinates of the top left corner of img1 within self.img, as (x,y). such that self.img[:,y,x] is the top left corner of img1 Outputs: new_labels: the new YOLO labels which describe the boxes from old_labels in self.img """ img1, old_labels = yolo_datum img2 = self.img new_labels = [] img1_width, img1_height = img1.size(2), img1.size(1) img2_width, img2_height = img2.size(2), img2.size(1) for old_label in old_labels: class_id, old_cx, old_cy, old_width, old_height = old_label px_width = old_width*img1_width px_height = old_height*img1_height old_x = old_cx*img1_width old_y = old_cy*img1_height new_x = (old_x + top_left[0]) new_y = (old_y + top_left[1]) sx = max(0,new_x - px_width//2) sy = max(0,new_y - px_height//2) ex = min(img2_width,new_x + px_width//2) ey = min(img2_height,new_y + px_height//2) new_width = (ex - sx)/img2_width new_height = (ey - sy)/img2_height new_cx = ((ex + sx)/2)/img2_width new_cy = ((ey + sy)/2)/img2_height new_labels += [(class_id, new_cx, new_cy, new_width, new_height)] return new_labels
[docs] def append_yolo_labels(self, yolo_datum, top_left): """ A function for adding YOLO labels for boxes in one image to the appropriate labels for the same boxes in a larger composite image containing the smaller image; automatically deletes labels for boxes which do not fall entirely inside of the larger image. this object will be modified to contain the labels from yolo_datum, trasposed appropriately. Inputs: yolo_datum: the pair (img1, old_labels), where img1 is the smaller image on which old_labels are accurate as a torch [n_channels, height, width] tensor top_left: the coordinates of the top left corner of img1 within img2, as (y,x). such that img2[:,y,x] is the top left corner of img1 """ self.append_labels(self.transpose_yolo_labels(yolo_datum, top_left))
[docs] def compose_yolo_data(self, yolo_datum, top_left, image_composition_mode = "add"): """ A function for composing this YOLODatum with another YOLODatum, such that the resulting image composes the two image with yolo_datum.img starting at top_left in self.img, and the resulting labels contain labels from both YOLODatum objects Inputs: yolo_datum: the datum to compose into this datum top_left: the top left corner as (x,y) in which to append yolo_datum.img image_composition_mode: a string denoting the mode in which to compose the image data from the two images; either 'replace', 'max', or 'add'; 'add' by default; """ self.append_yolo_labels(yolo_datum, top_left) start_x, start_y = top_left width = min(self.img.size(2), yolo_datum.img.size(2)) height = min(self.img.size(1), yolo_datum.img.size(1)) if image_composition_mode == 'replace': self.img[:, start_y:start_y+height, start_x:start_x+width] = yolo_datum.img[:,:height,:width] elif image_composition_mode == 'max': self.img[:, start_y:start_y+height, start_x:start_x+width] = torch.max(torch.stack([self.img[:, start_y:start_y+height, start_x:start_x+width], yolo_datum.img[:,:height,:width]]), axis=0) elif image_composition_mode == 'add': self.img[:, start_y:start_y+height, start_x:start_x+width] = self.img[:, start_y:start_y+height, start_x:start_x+width] + yolo_datum.img[:,:height,:width] else: raise Exception("invalid image composition mode; must be 'max', 'add', or 'replace'")
[docs] class YOLODatasetAdapter(Dataset): """ A class for adapting generic image datasets to YOLO image datasets. Expects a dataset which returns only image tensors, and a class label to apply to the dataset. All returned data will be of the form (image_data, [(class_id, 0.5, 0.5, 1.0 1.0)]), or (image_data, []) if class_id = None """
[docs] def __init__(self, dataset: Dataset, class_id: int = None): self.dataset = dataset self.class_id = class_id
def __len__(self): return len(self.dataset) def __getitem__(self, idx): if self.class_id == None: return YOLODatum(self.dataset[idx], []) return YOLODatum(self.dataset[idx], self.class_id)
[docs] class YOLOImageCompositeDatasetComponent(Dataset): """ Defines a component of a composite dataset; this will contain any information the composites should use to place instances of this component in the composites, such as how many instances should be place Inputs: component_dataset: a Dataset object which contains instances of this component, represented as (image_component: ndarray(c,height,width), class_id: int) min_to_add: the fewest instances of this component type to be placed in each composite max_to_add: the most instances of this type to be placed in each composite; the number of instances will be selected unifomly from min_to_add to max_to_add class_id: the int id to use for labeling data; if provided, all returned data will be of the form (component_dataset[n], (class_id, 0.5, 0.5, 1.0, 1.0)) representing a single box taking up the full image component of class class_id use_source_yolo_labels: if true, load YOLO labels from the component_dataset; otherwise component_dataset is assumed to return only image tensors; If neither class_id nor use_source_yolo_labels is provided, all data will be assumed to have no labels, and (component_dataset[n], []) will be returned """
[docs] def __init__(self, component_dataset, min_to_add=0, max_to_add=1, class_id=None, use_source_yolo_labels=False): self.component_dataset = component_dataset self.min_to_add = min_to_add self.max_to_add = max_to_add self.class_id = class_id self.use_source_yolo_labels = use_source_yolo_labels if class_id != None: self.component_dataset = YOLODatasetAdapter(component_dataset, class_id) elif not use_source_yolo_labels: self.component_dataset = YOLODatasetAdapter(component_dataset, [])
def __len__(self): return len(self.component_dataset) def __getitem__(self, idx): return self.component_dataset[idx]
[docs] def next(self): return self.component_dataset[np.random.randint(0,len(self.component_dataset))]
[docs] def get_components_to_add(self): num_to_add = np.random.randint(self.min_to_add, self.max_to_add + 1) to_add = [] for i in range(num_to_add): to_add += [self.next()] return to_add
[docs] class YOLOImageCompositeDataset(Dataset): """ A Dataset class generating synthetic composite images in yolo format from other image datasets Inputs: composite_scale: a tuple of the form (height, width, num_channels) specifying the scale of the image compisites to be generated; (if a 2d tuple is passed in, it will work in greyscale) transforms: either a single function or list of functions from images to images to be applied to each SOI; used for adding noise and impairments to data; defaults to None <NOTE>: The dataset will not have any components to add to the composite at initialization; these must be added by calling my_instance.add_component(image_dataset_to_add) All components should be torch datasets which output an image in the form of an ndarray and an integer class id label as: (image_height, image_width, ?image_depth), class_id """
[docs] def __init__(self, composite_scale, transforms=None, components = [], dataset_size = 10, max_add = False): self.composite_scale = composite_scale self.transforms = transforms self.components = []#components # list of YOLOImageCompositeDatasetComponent objects self.dataset_size = dataset_size self.max_add = max_add
def __len__(self): return self.dataset_size # placeholder value; this will generate new images, so there is in practice no fixed length of the dataset
[docs] def add_component(self, component_dataset, min_to_add=0, max_to_add=1, class_id=None, use_source_yolo_labels=False): self.components += [YOLOImageCompositeDatasetComponent(component_dataset, min_to_add=min_to_add, max_to_add=max_to_add, class_id=class_id, use_source_yolo_labels=use_source_yolo_labels)]
[docs] def get_components_to_add(self): to_add = [] for component in self.components: to_add += component.get_components_to_add() return to_add
[docs] def add_component_to_image_and_labels(self, datum, component): img_w = datum.img.shape[-1] img_h = datum.img.shape[-2] c_w = component.img.shape[-1] c_h = component.img.shape[-2] max_x = max(img_w - c_w, 0) max_y = max(img_h - c_h, 0) new_x = np.random.randint(0, max_x + 1) new_y = np.random.randint(0, max_y + 1) datum.compose_yolo_data(component, (new_x, new_y))
#x_end = min(img_w, c_w + new_x) #y_end = min(img_h, c_h + new_y) #new_width = x_end - new_x #new_height = y_end - new_y #yolo_x = (new_x + new_width/2)/img_w #yolo_y = (new_y + new_height/2)/img_h #yolo_w = new_width/img_w #yolo_h = new_height/img_h #labels += [(component_label, yolo_x, yolo_y, yolo_w, yolo_h)] #if not self.max_add: # image[:,new_y:new_y+new_height,new_x:new_x+new_width] = np.add(image[:,new_y:new_y+new_height,new_x:new_x+new_width], component_image[:,:new_height,:new_width]) #else: # image[:,new_y:new_y+new_height,new_x:new_x+new_width] = torch.max(torch.stack([image[:,new_y:new_y+new_height,new_x:new_x+new_width], component_image[:,:new_height,:new_width]], axis=0)) def __getitem__(self, idx): full_datum = YOLODatum(torch.zeros(self.composite_scale), []) for component in self.get_components_to_add(): self.add_component_to_image_and_labels(full_datum, component) if self.transforms: if type(self.transforms) == list: for transform in self.transforms: full_datum.img = transform(full_datum.img) else: full_datum.img = self.transforms(full_datum.img) return full_datum
[docs] def read_yolo_datum(root_dir, fname): """ loads a YOLODatum from a root directory and file name that point to a dataset in yolo format """ img = torch.Tensor(load_image_grey(root_dir + "images/" + fname + ".png")[None,:,:]) labels = [] labels_in_file = np.loadtxt(root_dir + "labels/" + fname + ".txt", delimiter=" ") if len(labels_in_file.shape) == 2: labels = list(labels_in_file) elif len(labels_in_file.shape) == 1: labels = [list(labels_in_file)] return YOLODatum(img, labels)
[docs] def yolo_to_pixels_on_image(img, box): """ returns the (x_start, y_start, x_end, y_end) pixels of an input box in the yolo format (cx, cy, width, height) on img """ cx, cy, width, height = box img_width, img_height = img.shape[1:] x_start = int((cx - width/2.0) * img_width) x_end = int((cx + width/2.0) * img_width) y_start = int((cy - height/2.0) * img_height) y_end = int((cy + height/2.0) * img_height) return (x_start, y_start, x_end, y_end)
[docs] def yolo_box_on_image(img, box): """ returns an image tensor containing the portion of img that falls within box, where box is a tuple (cx, cy, width, height) in yolo format """ x_start, y_start, x_end, y_end = yolo_to_pixels_on_image(img, box) return img[:, y_start:y_end, x_start:x_end]
[docs] def extract_yolo_boxes(yolo_datum): """ returns a list of new YOLODatum objects which each contain a single box from the input object """ img, labels = yolo_datum extracted_boxes = [] for label in labels: extracted_boxes += [YOLODatum(yolo_box_on_image(img, label[1:]), int(label[0]))] return extracted_boxes
[docs] class YOLOFileDataset(Dataset): """ A Dataset class for loading image and label files in YOLO format from a root directory Inputs: filepath: a string file path to a folder containing the yolo dataset transforms: either a single function or list of functions from images to images to be applied to each loaded image; used for adding noise and impairments to data; defaults to None read_black_hot: whether or not to read loaded images as black-hot; this will invert the value of loaded SOIs """
[docs] def __init__(self, filepath: str, transforms = None): self.root_filepath = filepath self.transforms = transforms self.fnames = [] for f in os.listdir(self.root_filepath + "images/"): if f.endswith(".png"): self.fnames.append(f[:-4])
def __len__(self): return len(self.fnames) def __getitem__(self, idx): image, labels = read_yolo_datum(self.root_filepath, self.fnames[idx]) if self.transforms: if type(self.transforms) == list: for transform in self.transforms: image = transform(image) else: image = self.transforms(image) return YOLODatum(image, labels)
[docs] def next(self): return self[np.random.randint(len(self))]
[docs] class YOLOSOIExtractorDataset(Dataset): """ A Dataset class for loading marked signals of interest (SOIs) from a yolo format dataset Inputs: filepath: a string file path to a folder containing images in which all signals of interest have been marked wit ha colored bounding box transforms: either a single function or list of functions from images to images to be applied to each SOI; used for adding noise and impairments to data; defaults to None read_black_hot: whether or not to read loaded images as black-hot; this will invert the value of loaded SOIs soi_classes: which classes from the yolo dataset are to be considered signals of interest; None for all classes; defaults to None """
[docs] def __init__(self, filepath: str, transforms = None, read_black_hot = False, soi_classes : list = None, filter_strength=1): self.root_filepath = filepath self.transforms = transforms self.soi_classes = soi_classes self.filter_strength = filter_strength self.sois = [] fnames = [] for f in os.listdir(self.root_filepath + "images/"): if f.endswith(".png"): fnames.append(f[:-4]) for fname in fnames: datum = read_yolo_datum(self.root_filepath, fname) new_sois = [soi[0] for soi in extract_yolo_boxes(datum) if not self.soi_classes or int(soi[1][0][0]) in self.soi_classes] # take only the image part new_sois = [soi for soi in new_sois if np.prod(soi.shape) > 0] # dont allow sois for boxes of null dimensions if read_black_hot: new_sois = [normalize_image(soi) for soi in new_sois] else: new_sois = [normalize_image(-soi) for soi in new_sois] self.sois += [isolate_foreground_signal(soi, self.filter_strength) for soi in new_sois]
def __len__(self): return len(self.sois) def __getitem__(self, idx): soi = torch.Tensor(self.sois[idx]) if self.transforms: if type(self.transforms) == list: for transform in self.transforms: soi = transform(soi) else: soi = self.transforms(soi) return soi
[docs] def next(self): return self[np.random.randint(len(self))]