zanj.externals
for storing/retrieving an item externally in a ZANJ archive
1"""for storing/retrieving an item externally in a ZANJ archive""" 2 3from __future__ import annotations 4 5import json 6from typing import IO, Any, Callable, Literal, NamedTuple, get_args 7 8import numpy as np 9from muutils.json_serialize.json_serialize import ObjectPath 10from muutils.json_serialize.util import JSONitem 11 12# this is to make type checking work -- it will later be overridden 13_ZANJ_pre = Any 14 15ZANJ_MAIN: str = "__zanj__.json" 16ZANJ_META: str = "__zanj_meta__.json" 17 18ExternalItemType = Literal["jsonl", "npy"] 19 20ExternalItemType_vals = get_args(ExternalItemType) 21 22ExternalItem = NamedTuple( 23 "ExternalItem", 24 [ 25 ("item_type", ExternalItemType), 26 ("data", Any), 27 ("path", ObjectPath), 28 ], 29) 30 31 32def load_jsonl(zanj: "LoadedZANJ", fp: IO[bytes]) -> list[JSONitem]: # type: ignore[name-defined] # noqa: F821 33 return [json.loads(line) for line in fp] 34 35 36def load_npy(zanj: "LoadedZANJ", fp: IO[bytes]) -> np.ndarray: # type: ignore[name-defined] # noqa: F821 37 return np.load(fp) 38 39 40EXTERNAL_LOAD_FUNCS: dict[ExternalItemType, Callable[[_ZANJ_pre, IO[bytes]], Any]] = { 41 "jsonl": load_jsonl, 42 "npy": load_npy, 43} 44 45 46def GET_EXTERNAL_LOAD_FUNC(item_type: str) -> Callable[[_ZANJ_pre, IO[bytes]], Any]: 47 if item_type not in EXTERNAL_LOAD_FUNCS: 48 raise ValueError( 49 f"unknown external item type: {item_type}, needs to be one of {EXTERNAL_LOAD_FUNCS.keys()}" 50 ) 51 # safe to ignore since we just checked 52 return EXTERNAL_LOAD_FUNCS[item_type] # type: ignore[index]
ZANJ_MAIN: str =
'__zanj__.json'
ZANJ_META: str =
'__zanj_meta__.json'
ExternalItemType =
typing.Literal['jsonl', 'npy']
ExternalItemType_vals =
('jsonl', 'npy')
class
ExternalItem(typing.NamedTuple):
ExternalItem(item_type, data, path)
ExternalItem( item_type: Literal['jsonl', 'npy'], data: Any, path: tuple[typing.Union[str, int], ...])
Create new instance of ExternalItem(item_type, data, path)
Inherited Members
- builtins.tuple
- index
- count
def
load_jsonl( zanj: "'LoadedZANJ'", fp: IO[bytes]) -> list[typing.Union[bool, int, float, str, NoneType, typing.List[typing.Union[bool, int, float, str, NoneType, typing.List[typing.Any], typing.Dict[str, typing.Any]]], typing.Dict[str, typing.Union[bool, int, float, str, NoneType, typing.List[typing.Any], typing.Dict[str, typing.Any]]]]]:
def
load_npy(zanj: "'LoadedZANJ'", fp: IO[bytes]) -> numpy.ndarray:
EXTERNAL_LOAD_FUNCS: dict[typing.Literal['jsonl', 'npy'], typing.Callable[[zanj.ZANJ, typing.IO[bytes]], typing.Any]] =
{'jsonl': <function load_jsonl>, 'npy': <function load_npy>}
47def GET_EXTERNAL_LOAD_FUNC(item_type: str) -> Callable[[_ZANJ_pre, IO[bytes]], Any]: 48 if item_type not in EXTERNAL_LOAD_FUNCS: 49 raise ValueError( 50 f"unknown external item type: {item_type}, needs to be one of {EXTERNAL_LOAD_FUNCS.keys()}" 51 ) 52 # safe to ignore since we just checked 53 return EXTERNAL_LOAD_FUNCS[item_type] # type: ignore[index]