Skip to content

taps.data.file

PickleFileTransformerConfig

Bases: DataTransformerConfig

Pickle file transformer config.

add_argument_group() classmethod

add_argument_group(
    parser: ArgumentParser,
    *,
    argv: Sequence[str] | None = None,
    required: bool = True
) -> None

Add model fields as arguments of an argument group on the parser.

Parameters:

  • parser (ArgumentParser) –

    Parser to add a new argument group to.

  • argv (Sequence[str] | None, default: None ) –

    Optional sequence of string arguments.

  • required (bool, default: True ) –

    Mark arguments without defaults as required.

Source code in taps/config.py
@classmethod
def add_argument_group(
    cls,
    parser: argparse.ArgumentParser,
    *,
    argv: Sequence[str] | None = None,
    required: bool = True,
) -> None:
    """Add model fields as arguments of an argument group on the parser.

    Args:
        parser: Parser to add a new argument group to.
        argv: Optional sequence of string arguments.
        required: Mark arguments without defaults as required.
    """
    group = parser.add_argument_group(cls.__name__)
    for field_name, field_info in cls.model_fields.items():
        arg_name = field_name.replace('_', '-').lower()
        group.add_argument(
            f'--{arg_name}',
            dest=field_name,
            # type=field_info.annotation,
            default=field_info.get_default(),
            required=field_info.is_required() and required,
            help=field_info.description,
        )

get_transformer()

get_transformer() -> PickleFileTransformer

Create a transformer instance from the config.

Source code in taps/data/file.py
def get_transformer(self) -> PickleFileTransformer:
    """Create a transformer instance from the config."""
    return PickleFileTransformer(self.file_dir)

Identifier

Bases: NamedTuple

Object identifier.

Attributes:

  • cache_dir (Path) –

    Object directory.

  • obj_id (UUID) –

    Object ID.

path()

path() -> Path

Get path to the object.

Source code in taps/data/file.py
def path(self) -> pathlib.Path:
    """Get path to the object."""
    return self.cache_dir / str(self.obj_id)

PickleFileTransformer

PickleFileTransformer(cache_dir: Path | str)

Pickle file object transformer.

Parameters:

  • cache_dir (Path | str) –

    Directory to store pickled objects in.

Source code in taps/data/file.py
def __init__(
    self,
    cache_dir: pathlib.Path | str,
) -> None:
    self.cache_dir = pathlib.Path(cache_dir).resolve()

close()

close() -> None

Close the transformer.

Source code in taps/data/file.py
def close(self) -> None:
    """Close the transformer."""
    shutil.rmtree(self.cache_dir, ignore_errors=True)

is_identifier()

is_identifier(obj: Any) -> bool

Check if the object is an identifier instance.

Source code in taps/data/file.py
def is_identifier(self, obj: Any) -> bool:
    """Check if the object is an identifier instance."""
    return isinstance(obj, Identifier)

transform()

transform(obj: T) -> Identifier

Transform the object into an identifier.

Parameters:

  • obj (T) –

    Object to transform.

Returns:

  • Identifier

    Identifier object that can be used to resolve obj.

Source code in taps/data/file.py
def transform(self, obj: T) -> Identifier:
    """Transform the object into an identifier.

    Args:
        obj: Object to transform.

    Returns:
        Identifier object that can be used to resolve `obj`.
    """
    identifier = Identifier(self.cache_dir, uuid.uuid4())
    filepath = identifier.path()
    filepath.parent.mkdir(parents=True, exist_ok=True)

    with open(filepath, 'wb', buffering=0) as f:
        pickle.dump(obj, f)

    return identifier

resolve()

resolve(identifier: Identifier) -> Any

Resolve an object from an identifier.

Parameters:

  • identifier (Identifier) –

    Identifier to an object.

Returns:

  • Any

    The resolved object.

Source code in taps/data/file.py
def resolve(self, identifier: Identifier) -> Any:
    """Resolve an object from an identifier.

    Args:
        identifier: Identifier to an object.

    Returns:
        The resolved object.
    """
    filepath = identifier.path()
    with open(filepath, 'rb') as f:
        obj = pickle.load(f)
    return obj