Coverage for src/blob_dict/dict/path.py: 0%

108 statements  

« prev     ^ index     » next       coverage.py v7.8.1, created at 2025-05-23 02:51 -0700

1import shutil 

2from abc import abstractmethod 

3from collections.abc import Iterator 

4from mimetypes import guess_type 

5from pathlib import Path 

6from typing import Any, Literal, Protocol, override 

7 

8from extratools_core.typing import PathLike 

9 

10from ..blob import BytesBlob, StrBlob 

11from ..blob.json import JsonDictBlob, YamlDictBlob 

12from . import BlobDictBase 

13 

14 

15class LocalPath(Path): 

16 def rmtree(self) -> None: 

17 shutil.rmtree(self) 

18 

19 

20class ExtraPathLike(PathLike, Protocol): 

21 @abstractmethod 

22 def rmtree(self) -> None: 

23 ... 

24 

25 

26class PathBlobDict(BlobDictBase): 

27 def __init__( 

28 self, 

29 path: ExtraPathLike | None = None, 

30 *, 

31 compression: bool = False, 

32 blob_class: type[BytesBlob] = BytesBlob, 

33 blob_class_args: dict[str, Any] | None = None, 

34 ) -> None: 

35 super().__init__() 

36 

37 if path is None: 

38 path = LocalPath(".") 

39 

40 if isinstance(path, Path): 

41 path = path.expanduser() 

42 

43 self.__path: ExtraPathLike = path 

44 

45 self.__compression: bool = compression 

46 

47 self.__blob_class: type[BytesBlob] = blob_class 

48 self.__blob_class_args: dict[str, Any] = blob_class_args or {} 

49 

50 def create(self) -> None: 

51 self.__path.mkdir( 

52 parents=True, 

53 exist_ok=True, 

54 ) 

55 

56 def delete(self) -> None: 

57 self.__path.rmtree() 

58 

59 @override 

60 def __contains__(self, key: object) -> bool: 

61 return (self.__path / str(key)).is_file() 

62 

63 def __get_blob_class(self, key: str) -> type[BytesBlob]: # noqa: PLR0911 

64 mime_type: str | None 

65 mime_type, _ = guess_type(self.__path / key) 

66 

67 match mime_type: 

68 case "application/json": 

69 return JsonDictBlob 

70 case "application/octet-stream": 

71 return BytesBlob 

72 case "application/yaml": 

73 return YamlDictBlob 

74 case "audo/mpeg": 

75 # Import here as it has optional dependency 

76 from ..blob.audio import AudioBlob # noqa: PLC0415 

77 

78 return AudioBlob 

79 case "image/png": 

80 # Import here as it has optional dependency 

81 from ..blob.image import ImageBlob # noqa: PLC0415 

82 

83 return ImageBlob 

84 case ( 

85 "text/css" 

86 | "text/csv" 

87 | "text/html" 

88 | "text/javascript" 

89 | "text/markdown" 

90 | "text/plain" 

91 | "text/xml" 

92 ): 

93 return StrBlob 

94 case "video/mp4": 

95 # Import here as it has optional dependency 

96 from ..blob.video import VideoBlob # noqa: PLC0415 

97 

98 return VideoBlob 

99 case _: 

100 return self.__blob_class 

101 

102 def _get(self, key: str, blob_bytes: bytes) -> BytesBlob: 

103 blob: BytesBlob = BytesBlob.from_bytes(blob_bytes, compression=self.__compression) 

104 return blob.as_blob( 

105 self.__get_blob_class(key), 

106 self.__blob_class_args, 

107 ) 

108 

109 @override 

110 def __getitem__(self, key: str, /) -> BytesBlob: 

111 if key not in self: 

112 raise KeyError 

113 

114 return self._get(key, (self.__path / key).read_bytes()) 

115 

116 @override 

117 def __iter__(self) -> Iterator[str]: 

118 # The concept of relative path does not exist for `CloudPath`, 

119 # and each walked path is always absolute for `CloudPath`. 

120 # Therefore, we extract each key by removing the path prefix. 

121 # In this way, the same logic works for both absolute and relative path. 

122 prefix_len: int = ( 

123 len(str(self.__path.absolute())) 

124 # Extra 1 is for separator `/` between prefix and filename 

125 + 1 

126 ) 

127 

128 for parent, _, files in self.__path.walk(): 

129 for filename in files: 

130 yield str((parent / filename).absolute())[prefix_len:] 

131 

132 @override 

133 def clear(self) -> None: 

134 for parent, dirs, files in self.__path.walk(top_down=False): 

135 for filename in files: 

136 (parent / filename).unlink() 

137 for dirname in dirs: 

138 (parent / dirname).rmdir() 

139 

140 def __cleanup(self, key: str) -> None: 

141 (self.__path / key).unlink() 

142 

143 for parent in (self.__path / key).parents: 

144 if parent == self.__path: 

145 return 

146 

147 if parent.is_dir() and next(iter(parent.iterdir()), None) is None: 

148 parent.rmdir() 

149 

150 @override 

151 def pop[T: Any]( 

152 self, 

153 key: str, 

154 /, 

155 default: BytesBlob | T | Literal["__DEFAULT"] = "__DEFAULT", 

156 ) -> BytesBlob | T: 

157 blob: BytesBlob | None = self.get(key) 

158 if blob: 

159 self.__cleanup(key) 

160 

161 if blob is not None: 

162 return blob 

163 

164 if default == "__DEFAULT": 

165 raise KeyError 

166 

167 return default 

168 

169 @override 

170 def __delitem__(self, key: str, /) -> None: 

171 if key not in self: 

172 raise KeyError 

173 

174 self.__cleanup(key) 

175 

176 __BAD_BLOB_CLASS_ERROR_MESSAGE: str = "Must specify blob that is instance of {blob_class}" 

177 

178 @override 

179 def __setitem__(self, key: str, blob: BytesBlob, /) -> None: 

180 if not isinstance(blob, self.__blob_class): 

181 raise TypeError(PathBlobDict.__BAD_BLOB_CLASS_ERROR_MESSAGE.format( 

182 blob_class=self.__blob_class, 

183 )) 

184 

185 (self.__path / key).parent.mkdir( 

186 parents=True, 

187 exist_ok=True, 

188 ) 

189 

190 blob_bytes: bytes = blob.as_bytes(compression=self.__compression) 

191 (self.__path / key).write_bytes(blob_bytes)