Coverage for src/blob_dict/dict/path.py: 0%
86 statements
« prev ^ index » next coverage.py v7.7.1, created at 2025-03-27 22:31 -0700
« prev ^ index » next coverage.py v7.7.1, created at 2025-03-27 22:31 -0700
1import shutil
2from collections.abc import Iterator
3from pathlib import Path
4from typing import Any, override
6from cloudpathlib import CloudPath
7from simple_zstd import compress, decompress
9from ..blob import BytesBlob, StrBlob
10from ..blob.json import JsonDictBlob
11from . import BlobDictBase
14class LocalPath(Path):
15 def rmtree(self) -> None:
16 shutil.rmtree(self)
19class PathBlobDict(BlobDictBase):
20 def __init__(
21 self,
22 path: LocalPath | CloudPath,
23 *,
24 compression: bool = False,
25 blob_class: type[BytesBlob] = BytesBlob,
26 blob_class_args: dict[str, Any] | None = None,
27 ) -> None:
28 super().__init__()
30 self.__path: LocalPath | CloudPath = path
32 self.__compression: bool = compression
34 self.__blob_class: type[BytesBlob] = blob_class
35 self.__blob_class_args: dict[str, Any] = blob_class_args or {}
37 def create(self) -> None:
38 self.__path.mkdir(
39 parents=True,
40 exist_ok=True,
41 )
43 def delete(self) -> None:
44 self.__path.rmtree()
46 @override
47 def __contains__(self, key: str) -> bool:
48 return (self.__path / key).is_file()
50 def __get_blob_class(self, key: str) -> type[BytesBlob]:
51 match (self.__path / key).suffix.lower():
52 case ".json":
53 return JsonDictBlob
54 case ".png":
55 # Import here as it has optional dependency
56 from ..blob.image import ImageBlob # noqa: PLC0415
58 return ImageBlob
59 # Common text file extensions
60 # https://en.wikipedia.org/wiki/List_of_file_formats
61 case (
62 ".asc"
63 | ".bib"
64 | ".cfg"
65 | ".cnf"
66 | ".conf"
67 | ".csv"
68 | ".diff"
69 | ".htm"
70 | ".html"
71 | ".ini"
72 | ".log"
73 | ".markdown"
74 | ".md"
75 | ".tex"
76 | ".text"
77 | ".toml"
78 | ".tsv"
79 | ".txt"
80 | ".xhtml"
81 | ".xht"
82 | ".xml"
83 | ".yaml"
84 | ".yml"
85 ):
86 return StrBlob
87 case _:
88 return self.__blob_class
90 @override
91 def get(self, key: str, default: BytesBlob | None = None) -> BytesBlob | None:
92 if key not in self:
93 return default
95 blob_bytes: bytes = (self.__path / key).read_bytes()
96 if self.__compression:
97 blob_bytes = decompress(blob_bytes)
98 return self.__get_blob_class(key)(blob_bytes, **self.__blob_class_args)
100 @override
101 def __iter__(self) -> Iterator[str]:
102 # The concept of relative path does not exist for `CloudPath`,
103 # and each walked path is always absolute for `CloudPath`.
104 # Therefore, we extract each key by removing the path prefix.
105 # In this way, the same logic works for both absolute and relative path.
106 prefix_len: int = (
107 len(str(self.__path))
108 # Extra 1 is for separator `/` between prefix and filename
109 + 1
110 )
112 for parent, _, files in self.__path.walk(top_down=False):
113 for filename in files:
114 yield str(parent / filename)[prefix_len:]
116 @override
117 def clear(self) -> None:
118 for parent, dirs, files in self.__path.walk(top_down=False):
119 for filename in files:
120 (parent / filename).unlink()
121 for dirname in dirs:
122 (parent / dirname).rmdir()
124 def __cleanup(self, key: str) -> None:
125 (self.__path / key).unlink()
127 for parent in (self.__path / key).parents:
128 if parent == self.__path:
129 return
131 if parent.is_dir() and next(parent.iterdir(), None) is None:
132 parent.rmdir()
134 @override
135 def pop(self, key: str, default: BytesBlob | None = None) -> BytesBlob | None:
136 blob: BytesBlob | None = self.get(key)
137 if blob:
138 self.__cleanup(key)
140 return blob or default
142 @override
143 def __delitem__(self, key: str) -> None:
144 if key not in self:
145 raise KeyError
147 self.__cleanup(key)
149 __BAD_BLOB_CLASS_ERROR_MESSAGE: str = "Must specify blob that is instance of {blob_class}"
151 @override
152 def __setitem__(self, key: str, blob: BytesBlob) -> None:
153 if not isinstance(blob, self.__blob_class):
154 raise TypeError(PathBlobDict.__BAD_BLOB_CLASS_ERROR_MESSAGE.format(
155 blob_class=self.__blob_class,
156 ))
158 (self.__path / key).parent.mkdir(
159 parents=True,
160 exist_ok=True,
161 )
163 blob_bytes: bytes = blob.as_bytes()
164 if self.__compression:
165 blob_bytes = compress(blob_bytes)
166 (self.__path / key).write_bytes(blob_bytes)