Coverage for src/blob_dict/dict/path.py: 0%

112 statements  

« prev     ^ index     » next       coverage.py v7.8.1, created at 2025-05-29 23:07 -0700

1import shutil 

2from abc import abstractmethod 

3from collections.abc import Iterator 

4from datetime import UTC, datetime, timedelta 

5from mimetypes import guess_type 

6from pathlib import Path 

7from typing import Any, Literal, Protocol, cast, override 

8 

9from extratools_core.path import rm_with_empty_parents 

10from extratools_core.typing import PathLike 

11 

12from ..blob import BytesBlob, StrBlob 

13from ..blob.json import JsonDictBlob, YamlDictBlob 

14from . import BlobDictBase 

15 

16 

17class LocalPath(Path): 

18 def rmtree(self) -> None: 

19 shutil.rmtree(self) 

20 

21 

22class ExtraPathLike(PathLike, Protocol): 

23 @abstractmethod 

24 def rmtree(self) -> None: 

25 ... 

26 

27 

28class PathBlobDict(BlobDictBase): 

29 def __init__( 

30 self, 

31 path: ExtraPathLike | None = None, 

32 *, 

33 compression: bool = False, 

34 ttl: timedelta | None = None, 

35 blob_class: type[BytesBlob] = BytesBlob, 

36 blob_class_args: dict[str, Any] | None = None, 

37 ) -> None: 

38 super().__init__() 

39 

40 if path is None: 

41 path = LocalPath(".") 

42 

43 if isinstance(path, Path): 

44 path = path.expanduser() 

45 

46 self.__path: ExtraPathLike = path 

47 

48 self.__compression: bool = compression 

49 

50 # Note that we do not automatically cleanup by TTL for reasons below: 

51 # - It is tricky to do so for local path without CRON job or daemon process 

52 # - Multiple objects could actually use same directory with different TTLs 

53 # Thus, it is best to depend on native solution for cleanup by TTL, 

54 # like S3's object lifecycle management. 

55 self.__ttl: timedelta | None = ttl 

56 

57 self.__blob_class: type[BytesBlob] = blob_class 

58 self.__blob_class_args: dict[str, Any] = blob_class_args or {} 

59 

60 def create(self) -> None: 

61 self.__path.mkdir( 

62 parents=True, 

63 exist_ok=True, 

64 ) 

65 

66 def delete(self) -> None: 

67 self.__path.rmtree() 

68 

69 def __is_expired(self, key_path: PathLike) -> bool: 

70 return ( 

71 datetime.now(UTC) 

72 - datetime.fromtimestamp(key_path.stat().st_mtime, UTC) 

73 > cast("timedelta", self.__ttl) 

74 ) 

75 

76 @override 

77 def __contains__(self, key: object) -> bool: 

78 key_path: PathLike = self.__path / str(key) 

79 

80 return ( 

81 key_path.is_file() 

82 and ( 

83 not self.__ttl 

84 or not self.__is_expired(key_path) 

85 ) 

86 ) 

87 

88 def __get_blob_class(self, key: str) -> type[BytesBlob]: # noqa: PLR0911 

89 mime_type: str | None 

90 mime_type, _ = guess_type(self.__path / key) 

91 

92 match mime_type: 

93 case "application/json": 

94 return JsonDictBlob 

95 case "application/octet-stream": 

96 return BytesBlob 

97 case "application/yaml": 

98 return YamlDictBlob 

99 case "audo/mpeg": 

100 # Import here as it has optional dependency 

101 from ..blob.audio import AudioBlob # noqa: PLC0415 

102 

103 return AudioBlob 

104 case "image/png": 

105 # Import here as it has optional dependency 

106 from ..blob.image import ImageBlob # noqa: PLC0415 

107 

108 return ImageBlob 

109 case ( 

110 "text/css" 

111 | "text/csv" 

112 | "text/html" 

113 | "text/javascript" 

114 | "text/markdown" 

115 | "text/plain" 

116 | "text/xml" 

117 ): 

118 return StrBlob 

119 case "video/mp4": 

120 # Import here as it has optional dependency 

121 from ..blob.video import VideoBlob # noqa: PLC0415 

122 

123 return VideoBlob 

124 case _: 

125 return self.__blob_class 

126 

127 def _get(self, key: str, blob_bytes: bytes) -> BytesBlob: 

128 blob: BytesBlob = BytesBlob.from_bytes(blob_bytes, compression=self.__compression) 

129 return blob.as_blob( 

130 self.__get_blob_class(key), 

131 self.__blob_class_args, 

132 ) 

133 

134 @override 

135 def __getitem__(self, key: str, /) -> BytesBlob: 

136 if key not in self: 

137 raise KeyError 

138 

139 return self._get(key, (self.__path / key).read_bytes()) 

140 

141 @override 

142 def __iter__(self) -> Iterator[str]: 

143 # The concept of relative path does not exist for `CloudPath`, 

144 # and each walked path is always absolute for `CloudPath`. 

145 # Therefore, we extract each key by removing the path prefix. 

146 # In this way, the same logic works for both absolute and relative path. 

147 prefix_len: int = ( 

148 len(str(self.__path.absolute())) 

149 # Extra 1 is for separator `/` between prefix and filename 

150 + 1 

151 ) 

152 

153 for parent, _, files in self.__path.walk(): 

154 for filename in files: 

155 key_path: PathLike = parent / filename 

156 if self.__ttl and self.__is_expired(key_path): 

157 continue 

158 

159 yield str(key_path.absolute())[prefix_len:] 

160 

161 @override 

162 def clear(self) -> None: 

163 for parent, dirs, files in self.__path.walk(top_down=False): 

164 for filename in files: 

165 (parent / filename).unlink() 

166 for dirname in dirs: 

167 (parent / dirname).rmdir() 

168 

169 def __cleanup(self, key: str) -> None: 

170 rm_with_empty_parents(self.__path / key, stop=self.__path) 

171 

172 @override 

173 def pop[T: Any]( 

174 self, 

175 key: str, 

176 /, 

177 default: BytesBlob | T | Literal["__DEFAULT"] = "__DEFAULT", 

178 ) -> BytesBlob | T: 

179 blob: BytesBlob | None = self.get(key) 

180 if blob: 

181 self.__cleanup(key) 

182 

183 if blob is not None: 

184 return blob 

185 

186 if default == "__DEFAULT": 

187 raise KeyError 

188 

189 return default 

190 

191 @override 

192 def __delitem__(self, key: str, /) -> None: 

193 if key not in self: 

194 raise KeyError 

195 

196 self.__cleanup(key) 

197 

198 __BAD_BLOB_CLASS_ERROR_MESSAGE: str = "Must specify blob that is instance of {blob_class}" 

199 

200 @override 

201 def __setitem__(self, key: str, blob: BytesBlob, /) -> None: 

202 if not isinstance(blob, self.__blob_class): 

203 raise TypeError(PathBlobDict.__BAD_BLOB_CLASS_ERROR_MESSAGE.format( 

204 blob_class=self.__blob_class, 

205 )) 

206 

207 (self.__path / key).parent.mkdir( 

208 parents=True, 

209 exist_ok=True, 

210 ) 

211 

212 blob_bytes: bytes = blob.as_bytes(compression=self.__compression) 

213 (self.__path / key).write_bytes(blob_bytes)