Coverage for src/blob_dict/dict/path.py: 0%
112 statements
« prev ^ index » next coverage.py v7.8.1, created at 2025-05-29 23:07 -0700
« prev ^ index » next coverage.py v7.8.1, created at 2025-05-29 23:07 -0700
1import shutil
2from abc import abstractmethod
3from collections.abc import Iterator
4from datetime import UTC, datetime, timedelta
5from mimetypes import guess_type
6from pathlib import Path
7from typing import Any, Literal, Protocol, cast, override
9from extratools_core.path import rm_with_empty_parents
10from extratools_core.typing import PathLike
12from ..blob import BytesBlob, StrBlob
13from ..blob.json import JsonDictBlob, YamlDictBlob
14from . import BlobDictBase
17class LocalPath(Path):
18 def rmtree(self) -> None:
19 shutil.rmtree(self)
22class ExtraPathLike(PathLike, Protocol):
23 @abstractmethod
24 def rmtree(self) -> None:
25 ...
28class PathBlobDict(BlobDictBase):
29 def __init__(
30 self,
31 path: ExtraPathLike | None = None,
32 *,
33 compression: bool = False,
34 ttl: timedelta | None = None,
35 blob_class: type[BytesBlob] = BytesBlob,
36 blob_class_args: dict[str, Any] | None = None,
37 ) -> None:
38 super().__init__()
40 if path is None:
41 path = LocalPath(".")
43 if isinstance(path, Path):
44 path = path.expanduser()
46 self.__path: ExtraPathLike = path
48 self.__compression: bool = compression
50 # Note that we do not automatically cleanup by TTL for reasons below:
51 # - It is tricky to do so for local path without CRON job or daemon process
52 # - Multiple objects could actually use same directory with different TTLs
53 # Thus, it is best to depend on native solution for cleanup by TTL,
54 # like S3's object lifecycle management.
55 self.__ttl: timedelta | None = ttl
57 self.__blob_class: type[BytesBlob] = blob_class
58 self.__blob_class_args: dict[str, Any] = blob_class_args or {}
60 def create(self) -> None:
61 self.__path.mkdir(
62 parents=True,
63 exist_ok=True,
64 )
66 def delete(self) -> None:
67 self.__path.rmtree()
69 def __is_expired(self, key_path: PathLike) -> bool:
70 return (
71 datetime.now(UTC)
72 - datetime.fromtimestamp(key_path.stat().st_mtime, UTC)
73 > cast("timedelta", self.__ttl)
74 )
76 @override
77 def __contains__(self, key: object) -> bool:
78 key_path: PathLike = self.__path / str(key)
80 return (
81 key_path.is_file()
82 and (
83 not self.__ttl
84 or not self.__is_expired(key_path)
85 )
86 )
88 def __get_blob_class(self, key: str) -> type[BytesBlob]: # noqa: PLR0911
89 mime_type: str | None
90 mime_type, _ = guess_type(self.__path / key)
92 match mime_type:
93 case "application/json":
94 return JsonDictBlob
95 case "application/octet-stream":
96 return BytesBlob
97 case "application/yaml":
98 return YamlDictBlob
99 case "audo/mpeg":
100 # Import here as it has optional dependency
101 from ..blob.audio import AudioBlob # noqa: PLC0415
103 return AudioBlob
104 case "image/png":
105 # Import here as it has optional dependency
106 from ..blob.image import ImageBlob # noqa: PLC0415
108 return ImageBlob
109 case (
110 "text/css"
111 | "text/csv"
112 | "text/html"
113 | "text/javascript"
114 | "text/markdown"
115 | "text/plain"
116 | "text/xml"
117 ):
118 return StrBlob
119 case "video/mp4":
120 # Import here as it has optional dependency
121 from ..blob.video import VideoBlob # noqa: PLC0415
123 return VideoBlob
124 case _:
125 return self.__blob_class
127 def _get(self, key: str, blob_bytes: bytes) -> BytesBlob:
128 blob: BytesBlob = BytesBlob.from_bytes(blob_bytes, compression=self.__compression)
129 return blob.as_blob(
130 self.__get_blob_class(key),
131 self.__blob_class_args,
132 )
134 @override
135 def __getitem__(self, key: str, /) -> BytesBlob:
136 if key not in self:
137 raise KeyError
139 return self._get(key, (self.__path / key).read_bytes())
141 @override
142 def __iter__(self) -> Iterator[str]:
143 # The concept of relative path does not exist for `CloudPath`,
144 # and each walked path is always absolute for `CloudPath`.
145 # Therefore, we extract each key by removing the path prefix.
146 # In this way, the same logic works for both absolute and relative path.
147 prefix_len: int = (
148 len(str(self.__path.absolute()))
149 # Extra 1 is for separator `/` between prefix and filename
150 + 1
151 )
153 for parent, _, files in self.__path.walk():
154 for filename in files:
155 key_path: PathLike = parent / filename
156 if self.__ttl and self.__is_expired(key_path):
157 continue
159 yield str(key_path.absolute())[prefix_len:]
161 @override
162 def clear(self) -> None:
163 for parent, dirs, files in self.__path.walk(top_down=False):
164 for filename in files:
165 (parent / filename).unlink()
166 for dirname in dirs:
167 (parent / dirname).rmdir()
169 def __cleanup(self, key: str) -> None:
170 rm_with_empty_parents(self.__path / key, stop=self.__path)
172 @override
173 def pop[T: Any](
174 self,
175 key: str,
176 /,
177 default: BytesBlob | T | Literal["__DEFAULT"] = "__DEFAULT",
178 ) -> BytesBlob | T:
179 blob: BytesBlob | None = self.get(key)
180 if blob:
181 self.__cleanup(key)
183 if blob is not None:
184 return blob
186 if default == "__DEFAULT":
187 raise KeyError
189 return default
191 @override
192 def __delitem__(self, key: str, /) -> None:
193 if key not in self:
194 raise KeyError
196 self.__cleanup(key)
198 __BAD_BLOB_CLASS_ERROR_MESSAGE: str = "Must specify blob that is instance of {blob_class}"
200 @override
201 def __setitem__(self, key: str, blob: BytesBlob, /) -> None:
202 if not isinstance(blob, self.__blob_class):
203 raise TypeError(PathBlobDict.__BAD_BLOB_CLASS_ERROR_MESSAGE.format(
204 blob_class=self.__blob_class,
205 ))
207 (self.__path / key).parent.mkdir(
208 parents=True,
209 exist_ok=True,
210 )
212 blob_bytes: bytes = blob.as_bytes(compression=self.__compression)
213 (self.__path / key).write_bytes(blob_bytes)