Source code for torchsig.utils.file_handlers.base_handler

"""File Handler Base and Utility Classes for reading and writing datasets to/from disk."""

import pathlib
import shutil
from typing import Any

from torchsig.utils.printing import generate_repr_str


[docs] def reset_folder(path: str) -> None: """Resets a folder by deleting it if it exists and recreating it. Args: path (str): Path to the folder to reset. Raises: ValueError: If the path exists but is not a directory. """ folder_path = pathlib.Path(path) if folder_path.exists(): if folder_path.is_dir(): # To delete non-empty folder, use shutil.rmtree shutil.rmtree(folder_path) print(f"Deleted folder: {folder_path}") else: # folder is not a directory raise ValueError(f"Path is not a directory: {path}") # folder does not exists / is deleted # Recreate the folder folder_path.mkdir( parents=True, exist_ok=True ) # 'parents=True' allows creation of intermediate dirs if needed
[docs] class FileWriter: """Base class for writing datasets to disk. Attributes: root (pathlib.Path): Location on disk to write dataset. """
[docs] def __init__(self, root: str, **kwargs): """Initializes the FileWriter. Args: root (str): Location on disk to write dataset. """ self.root: pathlib.Path = pathlib.Path(root)
def _setup(self) -> None: """Hook for subclasses to perform setup after folder reset."""
[docs] def setup(self) -> None: """Prepare resources before writing begins. This resets the root folder and then calls the subclass `_setup`. """ reset_folder(self.root) self._setup()
[docs] def teardown(self) -> None: """Hook for cleaning up resources after writing is complete."""
[docs] def write(self, batch_idx: int, data: Any) -> None: """Write a single batch to disk. Args: batch_idx (int): Index of the batch being written. data (Any): Data to be written. Raises: NotImplementedError: Must be implemented in subclasses. """ raise NotImplementedError
[docs] def exists(self) -> bool: """Check if the dataset directory already exists. Returns: bool: True if `self.root` exists on disk, False otherwise. """ return self.root.exists()
def __del__(self): """Destructor to ensure clean resource cleanup""" try: self.teardown() except Exception: pass # Ignore errors during cleanup
[docs] def __str__(self) -> str: return f"{self.__class__.__name__}"
[docs] def __repr__(self) -> str: return generate_repr_str(self)
def __len__(self) -> int: raise NotImplementedError def __enter__(self): self.setup() return self def __exit__(self, exc_type, exc_value, traceback): self.teardown() return False
[docs] class FileReader: """Base class for reading datasets from disk. Attributes: root (pathlib.Path): Dataset location on disk. dataset_info_filepath (pathlib.Path): Path to dataset info file. """
[docs] def __init__(self, root: str, **kwargs): """File reader base class Args: root (str): Dataset location on disk. """ self.root = pathlib.Path(root) self.dataset_info_filepath = self.root.joinpath("dataset_info.yaml")
[docs] def read(self, idx: int) -> Any: """Load data from disk Args: idx (int): data item to load Raises: NotImplementedError: Subclasses must implement this method Returns: Any: data and targets """ raise NotImplementedError
[docs] def __str__(self) -> str: return f"{self.__class__.__name__}"
[docs] def __repr__(self) -> str: return generate_repr_str(self)
def __len__(self) -> int: raise NotImplementedError
[docs] class BaseFileHandler: """File handler base class. Not be instantiated. Usage: >>> BaseFileHandler.create_handler(mode = "r", root = "./) # create a reader >>> BaseFileHandler.create_handler(mode = "w", root = "./) # create a writer """ reader_class: FileReader = FileReader writer_class: FileWriter = FileWriter
[docs] @staticmethod def create_handler(mode: str, root: str, **kwargs) -> FileWriter | FileReader: """Creates FileWriter or FileReader Args: mode (str): read or write mode root (str): where file handler will be running Raises: ValueError: invalid model Returns: FileWriter | FileReader: FileHandler's reader or writer. """ if mode == "r": return BaseFileHandler.reader_class(root, **kwargs) if mode == "w": return BaseFileHandler.writer_class(root, **kwargs) raise ValueError(f"Invalid File Handler mode: {mode}")
[docs] def __str__(self) -> str: return f"{self.__class__.__name__}"
[docs] def __repr__(self) -> str: return generate_repr_str(self)