Source code for helios.core.utils

import importlib
import os
import pathlib
import platform
import re
import sys
import time
import types
import typing

import torch
import torchvision

from .._version import __version__

T = typing.TypeVar("T")
T_Any = typing.TypeVar("T_Any", bound=typing.Any)


[docs] def get_env_info_str() -> str: """ Return a string with the Helios header and the environment information. Returns: The message string. """ msg = r""" #===========================================================================# _______ _ _________ _______ _______ |\ /|( ____ \( \ \__ __/( ___ )( ____ \ | ) ( || ( \/| ( ) ( | ( ) || ( \/ | (___) || (__ | | | | | | | || (_____ | ___ || __) | | | | | | | |(_____ ) | ( ) || ( | | | | | | | | ) | | ) ( || (____/\| (____/\___) (___| (___) |/\____) | |/ \|(_______/(_______/\_______/(_______)\_______) """ msg += ( "\nEnvironment info: " f"\n\tHelios: {__version__}" f"\n\tPyTorch: {torch.__version__}" f"\n\tTorchVision: {torchvision.__version__}" f"\n\tOS: {platform.platform()}" f"\n\tPython: {sys.version_info[0]}.{sys.version_info[1]}.{sys.version_info[3]}" ) if torch.cuda.is_available(): msg += f"\n\tCUDA version: {torch.version.cuda}" msg += "\n" msg += "#===========================================================================#" msg += "\n\n" return msg
[docs] def get_from_optional(opt_var: T | None, raise_on_empty: bool = False) -> T: """ Ensure the given variable is not :code:`None` and return it. This is useful when dealing with variables that can be ``None`` at declaration but are set elsewhere. In those instances, mypy is unable to determine that the variable was set, so it will issue a warning. The workaround is to add asserts, but that can get tedious very quickly. This function can be used as an alternative. Example: .. code-block:: python var: int | None = None # ... Set var to a valid value some place else. assert var is not None v = var # Alternatively: v = core.get_from_optional(var) Args: opt_var: the optional variable. raise_on_empty: if True, an exception is raised when the optional is ``None``. Returns: The variable without the optional. Raises: RuntimeError: if the ``opt_var`` is ``None`` and ``raise_on_empty`` is true. """ if not raise_on_empty: assert opt_var is not None else: raise RuntimeError("error: optional cannot be empty") return opt_var
[docs] def convert_to_list(var: T | list[T] | tuple[T, ...]) -> list[T]: """ Convert the input into a list if it's not one already. Example: .. code-block:: python def some_fun(x: int | list[int]) -> None: if isinstance(x, list): x = [x] for elem in x: ... # The above code an be replaced with this: for elem in convert_to_list(x): ... Args: var: an object that can be either a single object or a list. Returns: If the input was a list, no operation is done. Otherwise, the object is converted to a list and returned. """ if isinstance(var, list): return var if isinstance(var, tuple): return list(var) return [var]
[docs] class ChdirContext: """ Allow switching between the current working directory and another within a scope. The intention is to facilitate temporary switches of the current working directory (such as when attempting to resolve relative paths) by creating a context in which the working directory is automatically switched to a new one. Upon exiting of the context, the original working directory is restored. Example: .. code-block:: python os.chdir(".") # <- Starting working directory with ChdirContext("/new/path") as prev_cwd: # prev_cwd is the starting working directory Path.cwd() # <- This is /new/path now ... Path.cwd() # <- Back to the starting working directory. Args: target_path: the path to switch to. """ def __init__(self, target_path: pathlib.Path): """ Create the context manager with the given path. Args: target_path: the path to switch to. """ self.start_path = pathlib.Path.cwd() self.target_path = target_path
[docs] def __enter__(self) -> pathlib.Path: """ Perform the switch from the current working directory to the new one. Returns: The previous working directory. """ os.chdir(self.target_path) return self.start_path
[docs] def __exit__( self, exc_type: type[Exception] | None, exc_value: Exception | None, exc_traceback: types.TracebackType | None, ) -> None: """Restores the previous working directory.""" os.chdir(self.start_path)
[docs] class AverageTimer: """ Compute elapsed times using moving average. The timer will determine the elapsed time between a series of points using a sliding window moving average. Args: sliding_window: number of steps over which the moving average will be computed. """ def __init__(self, sliding_window: int = 200): """ Create the timer with the given sliding window. Args: sliding_window: steps over which the moving average will be computed. """ self._sliding_window = sliding_window self._time_sum: float = 0 self._step_count: int = 0 self._current_time: float = 0 self._avg_time: float = 0 self.start()
[docs] def start(self) -> None: """Start the timer.""" self._current_time = time.time()
[docs] def record(self) -> None: """Record a new step in the timer.""" self._step_count += 1 self._time_sum += time.time() - self._current_time self._avg_time = self._time_sum / self._step_count if self._step_count > self._sliding_window: self._step_count = 0 self._time_sum = 0 self._current_time = time.time()
[docs] def get_average_time(self) -> float: """Return the moving average over the current step count.""" return self._avg_time
[docs] class Registry: """ Provides a name to object mapping to allow users to create custom types. Example: .. code-block:: python # Create a registry: TEST_REGISTRY = Registry("test") # Register as a decorator: @TEST_REGISTRY.register class TestClass: ... # Register in code: TEST_REGISTRY.register(TestClass) TEST_REGISTRY.register(test_function) Args: name: the name of the registry. """ def __init__(self, name: str): """ Create the registry with the given name. Args: name: the name of the registry. """ self._name = name self._obj_map: dict[str, typing.Any] = {} def _do_register(self, name: str, obj: typing.Any, suffix: str | None = None) -> None: """ Register the function/class. Args: name: the name of the object to register. obj: the object to register. suffix: (optional) suffix to add to the name upon registration. """ if isinstance(suffix, str): name = name + "_" + suffix assert ( name not in self._obj_map ), f"error: an object named '{name}' already exists in the " f"'{self._name}' registry" self._obj_map[name] = obj
[docs] def register(self, obj: T_Any, suffix: str | None = None) -> T_Any: """ Register the given object. Args: obj: the type to add. Must have a __name__ attribute. suffix: (optional) the suffix to add to the type name. Returns: The registered type. """ name = obj.__name__ self._do_register(name, obj, suffix) return obj
[docs] def get(self, name: str, suffix: str | None = None) -> typing.Any: """ Get the object that corresponds to the given name. Args: name: the name of the type. suffix: (optional) the suffix to use if the type isn't found with the given name. Returns: The requested type. Raises: KeyError: if no object with the given name is found in the registry. """ ret = self._obj_map.get(name) if ret is None and suffix is not None: name_suff = name + "_" + suffix ret = self._obj_map.get(name_suff) print(f"warning: found {name_suff} instead of {name}") if ret is None: raise KeyError( f"No object called '{name}' found in the '{self._name}' registrar" ) elif ret is None: raise KeyError( f"No object called '{name}' found in the '{self._name}' registrar" ) return ret
[docs] def __contains__(self, name: str) -> bool: """ Check if the registry contains the given name. Args: name: the name to check. Returns: True if the name exists, false otherwise. """ return name in self._obj_map
[docs] def __iter__(self) -> typing.Iterable: """Get an iterable over the registry items.""" return iter(self._obj_map.items())
[docs] def __str__(self) -> str: """Get the name of the registry.""" return self._name
[docs] def keys(self) -> typing.Iterable: """ Return a set-like object providing a view into the registry's keys. Return: An iterable of the registry keys. """ return self._obj_map.keys()
[docs] def update_all_registries( root: pathlib.Path, recurse: bool = True, import_prefix: str = "" ) -> None: """ Ensure all registered types get added to their corresponding registries. This function serves as a way of automatically registering all types into their corresponding registries within a package. Normally, you'd have to manually include each module that contains a registered type to ensure that it gets registered. This can easily cascade if modules are nested inside packages, whereby the top-level module has to (somehow) ensure that all child modules get imported to ensure everything works correctly. This function offers an alternative, whereby it will automatically scan all modules and sub-packages within a given package and import only those files that register a type. To do this, there are a few assumptions: #. Each package MUST contain an ``__init__.py`` (namespace packages are not supported) #. A module is included if and only if there is at least one line that contains the following pattern: ``@<any non-whitespace character(s)>.register``. Example: Suppose we have a project with the following structure: .. code-block:: text main.py my_package/ |---__init__.py |---some_class.py <- This registers a type. |---some_funcs.py <- Doesn't register anything. |---sub_package/ | |---__init__.py | |---another_type.py <- Registers | |---another_func.py <- Doesn't register. We can then do the following inside ``main.py``: .. code-block:: python import helios.core as hlc ... hlc.update_all_registries(Path.cwd() / "my_package", recurse=True) The function will recursively walk through ``my_package`` and import the following: * ``my_package.some_class`` * ``my_package.sub_package.another_type`` After the function returns, the corresponding registries will have been populated with the types and they can be used elsewhere in the code. Args: root: the path to the root package. recurse: if True, recursively search through sub-packages. Defaults to true. import_prefix: (optional) prefix to be added when imported. Defaults to empty. Raises: RuntimeError: if the given path isn't a valid directory or if the directory is not Python package with ``__init__.py``. """ if not root.is_dir(): raise RuntimeError(f"error: expected {str(root)} to be a valid directory") if import_prefix == "": import_prefix = root.stem else: import_prefix += f".{root.stem}" # Ensure the __init__.py exists init_path = root / "__init__.py" if not init_path.exists(): raise RuntimeError(f"error: {str(root)} is not a Python package") modules: list[tuple[pathlib.Path, str]] = [] for path in root.iterdir(): stem = path.stem if path.is_dir() and recurse: if stem.startswith(("__", ".")): continue update_all_registries(path, True, import_prefix) if path.is_file() and path.suffix == ".py" and stem != "__init__": modules.append((path, import_prefix + f".{path.stem}")) import_modules: list[str] = [] p = re.compile(r"@.+\.register\s") for path, tag in modules: with path.open("r", encoding="utf-8") as infile: lines = infile.readlines() for line in lines: if p.match(line): import_modules.append(tag) break for module in import_modules: importlib.import_module(module)
[docs] def safe_torch_load( f: str | os.PathLike | typing.BinaryIO | typing.IO[bytes], **kwargs: typing.Any, ) -> typing.Any: """ Wrap :code:`torch.load` to handle safe loading. This function will automatically set :code:`weights_only` to true when calling ``torch.load``. You are encouraged to use this function instead of the plain :code:`torch.load` to ensure safe loading. .. warning:: :code:`weights_only` is set automatically by this function. **do not** set this value yourself when using this function. args: f: a file-like object (has to implement ``read()``, ``readline()``, ``tell()``, and ``seek()``), or a string or a ``os.pathlike`` object containing a file name. **kwargs: keyword arguments to pass to :code:`torch.load`. Returns: The result of calling :code:`torch.load`. """ return torch.load(f, **kwargs, weights_only=True)