Source code for torchsig.image_datasets.dataset_generation

import os
import cv2
import time
from torch import stack as stack

"""
saves a list of images and a list of labels as a png image file and a txt label file in yolo format
Inputs:
    images: list of images to be saved as a list of torch tensors
    labels: list of labels to be saved as a list of torch tensors or tuples of the form (class_id, center_x, center_y, width, height)
    path: a string filepath to the root directory for the dataset; it will contain subdirectories at <path>/images and <path>/labels; if these folders do not exist they will be created
    file_prefix: a string to prepend to the name of all generated files; used for batching or formatting file names
    black_hot: whether the output images are black-hot [if true, signals will appear black against a white background in the image files]
"""

[docs]
def save_yolo_data(images, labels, path="./", file_prefix="1_", black_hot = True):
    images_path = path + "images/"
    labels_path = path + "labels/"
    images = stack(images)*255
    if black_hot:
        images = 255 - images
    np_images = images.cpu().numpy().transpose(0,2,3,1)
    for i in range(len(images)):
        image_fname = images_path+file_prefix+str(i)+".png"
        labels_fname = labels_path+file_prefix+str(i)+".txt"
        cv2.imwrite(image_fname, np_images[i])
        with open(labels_fname,'w') as labels_file:
            for label in labels[i]:
                labels_file.write(str(label[0])+" "+str(label[1])+" "+str(label[2])+" "+str(label[3])+" "+str(label[4])+"\n")


"""
batch-by-batch generates and saves in yolo format a dataset of specified size from a torch Dataset object (this will only work on synthetic datasets which output in yolo format)
Inputs:
    dataset: the source dataset to use for generation
    dataset_size: the desired size of the saved dataset
    output_path: the desired root directory of the saved dataset; it will contain subdirectories at <output_path>/images and <output_path>/labels; if these folders do not exist they will be created
    batch_size: the number of images to generate at once before saving them; this will be useful if generation is interrupted
    black_hot: whether the output images are black-hot [if true, signals will appear black against a white background in the image files]
    verbose: whether or not to print progress updates and total time taken to console
    batch_num: the number of the last batch completed before this function was called; used to restart generation of interrupted; defaults to -1, which will generate the whole dataset
"""

[docs]
def batched_write_yolo_synthetic_dataset(dataset, dataset_size, output_path, batch_size=1000, verbose=False, black_hot=True, batch_num=-1):
    if verbose:
        stime = time.time()
    images_path = output_path + "images/"
    labels_path = output_path + "labels/"
    os.makedirs(output_path, exist_ok=True)
    os.makedirs(images_path, exist_ok=True)
    os.makedirs(labels_path, exist_ok=True)
    
    num_generated = (batch_num + 1)*batch_size
    
    while num_generated + batch_size < dataset_size:
        batch_num += 1
        num_generated += batch_size
        images = []
        labels = []
        for i in range(batch_size):
            image, label_set = dataset[0]
            images += [image]
            labels += [label_set]

        save_yolo_data(images, labels, output_path, str(batch_num)+"_", black_hot=black_hot)
        if verbose:
            print("...batch #"+str(batch_num)+" complete...")
    batch_num += 1
    images = []
    labels = []
    if dataset_size % batch_size > 0:
        for i in range(dataset_size % batch_size):
            image, label_set = dataset[0]
            images += [image]
            labels += [label_set]
        save_yolo_data(images, labels, output_path, str(batch_num)+"_", black_hot=black_hot)
    if verbose:
        print("...done!")
        etime = time.time()
        print("total time: ",str(etime-stime),"seconds")