Coverage for src/blob_dict/dict/path.py: 0%
108 statements
« prev ^ index » next coverage.py v7.8.1, created at 2025-05-23 02:51 -0700
« prev ^ index » next coverage.py v7.8.1, created at 2025-05-23 02:51 -0700
1import shutil
2from abc import abstractmethod
3from collections.abc import Iterator
4from mimetypes import guess_type
5from pathlib import Path
6from typing import Any, Literal, Protocol, override
8from extratools_core.typing import PathLike
10from ..blob import BytesBlob, StrBlob
11from ..blob.json import JsonDictBlob, YamlDictBlob
12from . import BlobDictBase
15class LocalPath(Path):
16 def rmtree(self) -> None:
17 shutil.rmtree(self)
20class ExtraPathLike(PathLike, Protocol):
21 @abstractmethod
22 def rmtree(self) -> None:
23 ...
26class PathBlobDict(BlobDictBase):
27 def __init__(
28 self,
29 path: ExtraPathLike | None = None,
30 *,
31 compression: bool = False,
32 blob_class: type[BytesBlob] = BytesBlob,
33 blob_class_args: dict[str, Any] | None = None,
34 ) -> None:
35 super().__init__()
37 if path is None:
38 path = LocalPath(".")
40 if isinstance(path, Path):
41 path = path.expanduser()
43 self.__path: ExtraPathLike = path
45 self.__compression: bool = compression
47 self.__blob_class: type[BytesBlob] = blob_class
48 self.__blob_class_args: dict[str, Any] = blob_class_args or {}
50 def create(self) -> None:
51 self.__path.mkdir(
52 parents=True,
53 exist_ok=True,
54 )
56 def delete(self) -> None:
57 self.__path.rmtree()
59 @override
60 def __contains__(self, key: object) -> bool:
61 return (self.__path / str(key)).is_file()
63 def __get_blob_class(self, key: str) -> type[BytesBlob]: # noqa: PLR0911
64 mime_type: str | None
65 mime_type, _ = guess_type(self.__path / key)
67 match mime_type:
68 case "application/json":
69 return JsonDictBlob
70 case "application/octet-stream":
71 return BytesBlob
72 case "application/yaml":
73 return YamlDictBlob
74 case "audo/mpeg":
75 # Import here as it has optional dependency
76 from ..blob.audio import AudioBlob # noqa: PLC0415
78 return AudioBlob
79 case "image/png":
80 # Import here as it has optional dependency
81 from ..blob.image import ImageBlob # noqa: PLC0415
83 return ImageBlob
84 case (
85 "text/css"
86 | "text/csv"
87 | "text/html"
88 | "text/javascript"
89 | "text/markdown"
90 | "text/plain"
91 | "text/xml"
92 ):
93 return StrBlob
94 case "video/mp4":
95 # Import here as it has optional dependency
96 from ..blob.video import VideoBlob # noqa: PLC0415
98 return VideoBlob
99 case _:
100 return self.__blob_class
102 def _get(self, key: str, blob_bytes: bytes) -> BytesBlob:
103 blob: BytesBlob = BytesBlob.from_bytes(blob_bytes, compression=self.__compression)
104 return blob.as_blob(
105 self.__get_blob_class(key),
106 self.__blob_class_args,
107 )
109 @override
110 def __getitem__(self, key: str, /) -> BytesBlob:
111 if key not in self:
112 raise KeyError
114 return self._get(key, (self.__path / key).read_bytes())
116 @override
117 def __iter__(self) -> Iterator[str]:
118 # The concept of relative path does not exist for `CloudPath`,
119 # and each walked path is always absolute for `CloudPath`.
120 # Therefore, we extract each key by removing the path prefix.
121 # In this way, the same logic works for both absolute and relative path.
122 prefix_len: int = (
123 len(str(self.__path.absolute()))
124 # Extra 1 is for separator `/` between prefix and filename
125 + 1
126 )
128 for parent, _, files in self.__path.walk():
129 for filename in files:
130 yield str((parent / filename).absolute())[prefix_len:]
132 @override
133 def clear(self) -> None:
134 for parent, dirs, files in self.__path.walk(top_down=False):
135 for filename in files:
136 (parent / filename).unlink()
137 for dirname in dirs:
138 (parent / dirname).rmdir()
140 def __cleanup(self, key: str) -> None:
141 (self.__path / key).unlink()
143 for parent in (self.__path / key).parents:
144 if parent == self.__path:
145 return
147 if parent.is_dir() and next(iter(parent.iterdir()), None) is None:
148 parent.rmdir()
150 @override
151 def pop[T: Any](
152 self,
153 key: str,
154 /,
155 default: BytesBlob | T | Literal["__DEFAULT"] = "__DEFAULT",
156 ) -> BytesBlob | T:
157 blob: BytesBlob | None = self.get(key)
158 if blob:
159 self.__cleanup(key)
161 if blob is not None:
162 return blob
164 if default == "__DEFAULT":
165 raise KeyError
167 return default
169 @override
170 def __delitem__(self, key: str, /) -> None:
171 if key not in self:
172 raise KeyError
174 self.__cleanup(key)
176 __BAD_BLOB_CLASS_ERROR_MESSAGE: str = "Must specify blob that is instance of {blob_class}"
178 @override
179 def __setitem__(self, key: str, blob: BytesBlob, /) -> None:
180 if not isinstance(blob, self.__blob_class):
181 raise TypeError(PathBlobDict.__BAD_BLOB_CLASS_ERROR_MESSAGE.format(
182 blob_class=self.__blob_class,
183 ))
185 (self.__path / key).parent.mkdir(
186 parents=True,
187 exist_ok=True,
188 )
190 blob_bytes: bytes = blob.as_bytes(compression=self.__compression)
191 (self.__path / key).write_bytes(blob_bytes)