Coverage for src/blob_dict/dict/path.py: 0%

94 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2025-03-31 18:44 -0700

1import shutil 

2from collections.abc import Iterator 

3from mimetypes import guess_type 

4from pathlib import Path 

5from typing import Any, override 

6 

7from cloudpathlib import CloudPath 

8 

9from ..blob import BytesBlob, StrBlob 

10from ..blob.json import JsonDictBlob, YamlDictBlob 

11from . import BlobDictBase 

12 

13 

14class LocalPath(Path): 

15 def rmtree(self) -> None: 

16 shutil.rmtree(self) 

17 

18 

19class PathBlobDict(BlobDictBase): 

20 def __init__( 

21 self, 

22 path: LocalPath | CloudPath, 

23 *, 

24 compression: bool = False, 

25 blob_class: type[BytesBlob] = BytesBlob, 

26 blob_class_args: dict[str, Any] | None = None, 

27 ) -> None: 

28 super().__init__() 

29 

30 self.__path: LocalPath | CloudPath = path 

31 

32 self.__compression: bool = compression 

33 

34 self.__blob_class: type[BytesBlob] = blob_class 

35 self.__blob_class_args: dict[str, Any] = blob_class_args or {} 

36 

37 def create(self) -> None: 

38 self.__path.mkdir( 

39 parents=True, 

40 exist_ok=True, 

41 ) 

42 

43 def delete(self) -> None: 

44 self.__path.rmtree() 

45 

46 @override 

47 def __contains__(self, key: str) -> bool: 

48 return (self.__path / key).is_file() 

49 

50 def __get_blob_class(self, key: str) -> type[BytesBlob]: # noqa: PLR0911 

51 mime_type: str | None 

52 mime_type, _ = guess_type(self.__path / key) 

53 

54 match mime_type: 

55 case "application/json": 

56 return JsonDictBlob 

57 case "application/octet-stream": 

58 return BytesBlob 

59 case "application/yaml": 

60 return YamlDictBlob 

61 case "audo/mpeg": 

62 # Import here as it has optional dependency 

63 from ..blob.audio import AudioBlob # noqa: PLC0415 

64 

65 return AudioBlob 

66 case "image/png": 

67 # Import here as it has optional dependency 

68 from ..blob.image import ImageBlob # noqa: PLC0415 

69 

70 return ImageBlob 

71 case ( 

72 "text/css" 

73 | "text/csv" 

74 | "text/html" 

75 | "text/javascript" 

76 | "text/markdown" 

77 | "text/plain" 

78 | "text/xml" 

79 ): 

80 return StrBlob 

81 case "video/mp4": 

82 # Import here as it has optional dependency 

83 from ..blob.video import VideoBlob # noqa: PLC0415 

84 

85 return VideoBlob 

86 case _: 

87 return self.__blob_class 

88 

89 @override 

90 def get(self, key: str, default: BytesBlob | None = None) -> BytesBlob | None: 

91 if key not in self: 

92 return default 

93 

94 blob_bytes: bytes = (self.__path / key).read_bytes() 

95 

96 blob: BytesBlob = BytesBlob.from_bytes(blob_bytes, compression=self.__compression) 

97 return blob.as_blob( 

98 self.__get_blob_class(key), 

99 self.__blob_class_args, 

100 ) 

101 

102 @override 

103 def __iter__(self) -> Iterator[str]: 

104 # The concept of relative path does not exist for `CloudPath`, 

105 # and each walked path is always absolute for `CloudPath`. 

106 # Therefore, we extract each key by removing the path prefix. 

107 # In this way, the same logic works for both absolute and relative path. 

108 prefix_len: int = ( 

109 len(str(self.__path)) 

110 # Extra 1 is for separator `/` between prefix and filename 

111 + 1 

112 ) 

113 

114 for parent, _, files in self.__path.walk(top_down=False): 

115 for filename in files: 

116 yield str(parent / filename)[prefix_len:] 

117 

118 @override 

119 def clear(self) -> None: 

120 for parent, dirs, files in self.__path.walk(top_down=False): 

121 for filename in files: 

122 (parent / filename).unlink() 

123 for dirname in dirs: 

124 (parent / dirname).rmdir() 

125 

126 def __cleanup(self, key: str) -> None: 

127 (self.__path / key).unlink() 

128 

129 for parent in (self.__path / key).parents: 

130 if parent == self.__path: 

131 return 

132 

133 if parent.is_dir() and next(parent.iterdir(), None) is None: 

134 parent.rmdir() 

135 

136 @override 

137 def pop(self, key: str, default: BytesBlob | None = None) -> BytesBlob | None: 

138 blob: BytesBlob | None = self.get(key) 

139 if blob: 

140 self.__cleanup(key) 

141 

142 return blob or default 

143 

144 @override 

145 def __delitem__(self, key: str) -> None: 

146 if key not in self: 

147 raise KeyError 

148 

149 self.__cleanup(key) 

150 

151 __BAD_BLOB_CLASS_ERROR_MESSAGE: str = "Must specify blob that is instance of {blob_class}" 

152 

153 @override 

154 def __setitem__(self, key: str, blob: BytesBlob) -> None: 

155 if not isinstance(blob, self.__blob_class): 

156 raise TypeError(PathBlobDict.__BAD_BLOB_CLASS_ERROR_MESSAGE.format( 

157 blob_class=self.__blob_class, 

158 )) 

159 

160 (self.__path / key).parent.mkdir( 

161 parents=True, 

162 exist_ok=True, 

163 ) 

164 

165 blob_bytes: bytes = blob.as_bytes(compression=self.__compression) 

166 (self.__path / key).write_bytes(blob_bytes)