docs for zanj v0.4.0
View Source on GitHub

zanj.externals

for storing/retrieving an item externally in a ZANJ archive


 1"""for storing/retrieving an item externally in a ZANJ archive"""
 2
 3from __future__ import annotations
 4
 5import json
 6from typing import IO, Any, Callable, Literal, NamedTuple, get_args
 7
 8import numpy as np
 9from muutils.json_serialize.json_serialize import ObjectPath
10from muutils.json_serialize.util import JSONitem
11
12# this is to make type checking work -- it will later be overridden
13_ZANJ_pre = Any
14
15ZANJ_MAIN: str = "__zanj__.json"
16ZANJ_META: str = "__zanj_meta__.json"
17
18ExternalItemType = Literal["jsonl", "npy"]
19
20ExternalItemType_vals = get_args(ExternalItemType)
21
22ExternalItem = NamedTuple(
23    "ExternalItem",
24    [
25        ("item_type", ExternalItemType),
26        ("data", Any),
27        ("path", ObjectPath),
28    ],
29)
30
31
32def load_jsonl(zanj: "LoadedZANJ", fp: IO[bytes]) -> list[JSONitem]:  # type: ignore[name-defined] # noqa: F821
33    return [json.loads(line) for line in fp]
34
35
36def load_npy(zanj: "LoadedZANJ", fp: IO[bytes]) -> np.ndarray:  # type: ignore[name-defined] # noqa: F821
37    return np.load(fp)
38
39
40EXTERNAL_LOAD_FUNCS: dict[ExternalItemType, Callable[[_ZANJ_pre, IO[bytes]], Any]] = {
41    "jsonl": load_jsonl,
42    "npy": load_npy,
43}
44
45
46def GET_EXTERNAL_LOAD_FUNC(item_type: str) -> Callable[[_ZANJ_pre, IO[bytes]], Any]:
47    if item_type not in EXTERNAL_LOAD_FUNCS:
48        raise ValueError(
49            f"unknown external item type: {item_type}, needs to be one of {EXTERNAL_LOAD_FUNCS.keys()}"
50        )
51    # safe to ignore since we just checked
52    return EXTERNAL_LOAD_FUNCS[item_type]  # type: ignore[index]

ZANJ_MAIN: str = '__zanj__.json'
ZANJ_META: str = '__zanj_meta__.json'
ExternalItemType = typing.Literal['jsonl', 'npy']
ExternalItemType_vals = ('jsonl', 'npy')
class ExternalItem(typing.NamedTuple):

ExternalItem(item_type, data, path)

ExternalItem( item_type: Literal['jsonl', 'npy'], data: Any, path: tuple[typing.Union[str, int], ...])

Create new instance of ExternalItem(item_type, data, path)

item_type: Literal['jsonl', 'npy']

Alias for field number 0

data: Any

Alias for field number 1

path: tuple[typing.Union[str, int], ...]

Alias for field number 2

Inherited Members
builtins.tuple
index
count
def load_jsonl( zanj: "'LoadedZANJ'", fp: IO[bytes]) -> list[typing.Union[bool, int, float, str, NoneType, typing.List[typing.Union[bool, int, float, str, NoneType, typing.List[typing.Any], typing.Dict[str, typing.Any]]], typing.Dict[str, typing.Union[bool, int, float, str, NoneType, typing.List[typing.Any], typing.Dict[str, typing.Any]]]]]:
33def load_jsonl(zanj: "LoadedZANJ", fp: IO[bytes]) -> list[JSONitem]:  # type: ignore[name-defined] # noqa: F821
34    return [json.loads(line) for line in fp]
def load_npy(zanj: "'LoadedZANJ'", fp: IO[bytes]) -> numpy.ndarray:
37def load_npy(zanj: "LoadedZANJ", fp: IO[bytes]) -> np.ndarray:  # type: ignore[name-defined] # noqa: F821
38    return np.load(fp)
EXTERNAL_LOAD_FUNCS: dict[typing.Literal['jsonl', 'npy'], typing.Callable[[zanj.ZANJ, typing.IO[bytes]], typing.Any]] = {'jsonl': <function load_jsonl>, 'npy': <function load_npy>}
def GET_EXTERNAL_LOAD_FUNC(item_type: str) -> Callable[[zanj.ZANJ, IO[bytes]], Any]:
47def GET_EXTERNAL_LOAD_FUNC(item_type: str) -> Callable[[_ZANJ_pre, IO[bytes]], Any]:
48    if item_type not in EXTERNAL_LOAD_FUNCS:
49        raise ValueError(
50            f"unknown external item type: {item_type}, needs to be one of {EXTERNAL_LOAD_FUNCS.keys()}"
51        )
52    # safe to ignore since we just checked
53    return EXTERNAL_LOAD_FUNCS[item_type]  # type: ignore[index]