Source code for torchsig.image_datasets.dataset_generation

import os
import cv2
import time
from torch import stack as stack

"""
saves a list of images and a list of labels as a png image file and a txt label file in yolo format
Inputs:
    images: list of images to be saved as a list of torch tensors
    labels: list of labels to be saved as a list of torch tensors or tuples of the form (class_id, center_x, center_y, width, height)
    path: a string filepath to the root directory for the dataset; it will contain subdirectories at <path>/images and <path>/labels; if these folders do not exist they will be created
    file_prefix: a string to prepend to the name of all generated files; used for batching or formatting file names
    black_hot: whether the output images are black-hot [if true, signals will appear black against a white background in the image files]
"""
[docs] def save_yolo_data(images, labels, path="./", file_prefix="1_", black_hot = True): images_path = path + "images/" labels_path = path + "labels/" images = stack(images)*255 if black_hot: images = 255 - images np_images = images.cpu().numpy().transpose(0,2,3,1) for i in range(len(images)): image_fname = images_path+file_prefix+str(i)+".png" labels_fname = labels_path+file_prefix+str(i)+".txt" cv2.imwrite(image_fname, np_images[i]) with open(labels_fname,'w') as labels_file: for label in labels[i]: labels_file.write(str(label[0])+" "+str(label[1])+" "+str(label[2])+" "+str(label[3])+" "+str(label[4])+"\n")
""" batch-by-batch generates and saves in yolo format a dataset of specified size from a torch Dataset object (this will only work on synthetic datasets which output in yolo format) Inputs: dataset: the source dataset to use for generation dataset_size: the desired size of the saved dataset output_path: the desired root directory of the saved dataset; it will contain subdirectories at <output_path>/images and <output_path>/labels; if these folders do not exist they will be created batch_size: the number of images to generate at once before saving them; this will be useful if generation is interrupted black_hot: whether the output images are black-hot [if true, signals will appear black against a white background in the image files] verbose: whether or not to print progress updates and total time taken to console batch_num: the number of the last batch completed before this function was called; used to restart generation of interrupted; defaults to -1, which will generate the whole dataset """
[docs] def batched_write_yolo_synthetic_dataset(dataset, dataset_size, output_path, batch_size=1000, verbose=False, black_hot=True, batch_num=-1): if verbose: stime = time.time() images_path = output_path + "images/" labels_path = output_path + "labels/" os.makedirs(output_path, exist_ok=True) os.makedirs(images_path, exist_ok=True) os.makedirs(labels_path, exist_ok=True) num_generated = (batch_num + 1)*batch_size while num_generated + batch_size < dataset_size: batch_num += 1 num_generated += batch_size images = [] labels = [] for i in range(batch_size): image, label_set = dataset[0] images += [image] labels += [label_set] save_yolo_data(images, labels, output_path, str(batch_num)+"_", black_hot=black_hot) if verbose: print("...batch #"+str(batch_num)+" complete...") batch_num += 1 images = [] labels = [] if dataset_size % batch_size > 0: for i in range(dataset_size % batch_size): image, label_set = dataset[0] images += [image] labels += [label_set] save_yolo_data(images, labels, output_path, str(batch_num)+"_", black_hot=black_hot) if verbose: print("...done!") etime = time.time() print("total time: ",str(etime-stime),"seconds")