superleaf.utils.parallel#

Functions

parmap(func, iterable[, star, mode, ...])

Apply func to every item in iterable.

Classes

PyArrowArray(path)

PyArrowData(path)

Abstract base class for PyArrow data containers.

PyArrowDataFrame(path)

SharedDataDict(data)

SharedMemoryArray(shared_mem, shape, dtype)

SharedMemoryContainer()

Abstract base class for shared memory containers.

SharedMemoryList(shared_mem)

superleaf.utils.parallel.parmap(func, iterable, star=False, mode='process', n_workers=None, nthreads_per_process=None, pbar_desc=None, verbose=False)[source]#

Apply func to every item in iterable.

mode:
  • “thread”: use ThreadPoolExecutor with n_workers.

  • “process”: use ProcessPoolExecutor; each process runs a ThreadPoolExecutor of size nthreads_per_process.

A tqdm progress bar shows overall progress.

class superleaf.utils.parallel.SharedMemoryContainer[source]#

Bases: ABC

Abstract base class for shared memory containers.

abstractmethod classmethod create(*args, **kwargs) Self[source]#
abstractmethod load()[source]#

Load the data from shared memory.

abstractmethod close() Self[source]#

Close the shared memory.

Unlink the shared memory.

abstract property metadata: dict#
abstractmethod classmethod from_metadata(metadata: dict) Self[source]#
class superleaf.utils.parallel.SharedMemoryArray(shared_mem: SharedMemory, shape: tuple, dtype)[source]#

Bases: SharedMemoryContainer

classmethod create(array: ndarray, smm: SharedMemoryManager | None = None) Self[source]#
classmethod create_empty(shape: tuple, dtype, smm: SharedMemoryManager | None = None) Self[source]#
load() ndarray[source]#

Load the data from shared memory.

close() Self[source]#

Close the shared memory.

Unlink the shared memory.

property metadata: dict#
classmethod from_metadata(metadata: dict) Self[source]#
class superleaf.utils.parallel.SharedMemoryList(shared_mem: ShareableList)[source]#

Bases: SharedMemoryContainer

classmethod create(array: list, smm: SharedMemoryManager | None = None) Self[source]#
load() list[source]#

Load the data from shared memory.

close() Self[source]#

Close the shared memory.

Unlink the shared memory.

property metadata: dict#
classmethod from_metadata(metadata: dict) Self[source]#
class superleaf.utils.parallel.PyArrowData(path: str)[source]#

Bases: SharedMemoryContainer

Abstract base class for PyArrow data containers.

abstractmethod classmethod create(data, path: str | None = None, dir: str | None = None, overwrite: bool = False) Self[source]#
abstractmethod load()[source]#

Load the data from the file.

close() Self[source]#

Close the shared memory.

Unlink the shared memory.

property metadata: dict#
classmethod from_metadata(metadata: dict) Self[source]#
class superleaf.utils.parallel.PyArrowDataFrame(path: str)[source]#

Bases: PyArrowData

classmethod create(df: DataFrame, path: str | None = None, dir: str | None = None, overwrite: bool = False) Self[source]#
load() DataFrame[source]#

Load the data from the file.

class superleaf.utils.parallel.PyArrowArray(path: str)[source]#

Bases: PyArrowDataFrame

classmethod create(array: ndarray, path: str | None = None, dir: str | None = None, overwrite: bool = False) Self[source]#
load() ndarray[source]#

Load the data from the file.

class superleaf.utils.parallel.SharedDataDict(data: dict[str, SharedMemoryContainer])[source]#

Bases: SharedMemoryContainer

classmethod create(data: dict[str, SharedMemoryContainer]) Self[source]#
load() dict[source]#

Load the data from shared memory.

close() Self[source]#

Close the shared memory.

Unlink the shared memory.

property metadata: dict#
classmethod from_metadata(metadata: dict) Self[source]#