Coverage for src/blob_dict/dict/path.py: 0%

86 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2025-03-27 22:31 -0700

1import shutil 

2from collections.abc import Iterator 

3from pathlib import Path 

4from typing import Any, override 

5 

6from cloudpathlib import CloudPath 

7from simple_zstd import compress, decompress 

8 

9from ..blob import BytesBlob, StrBlob 

10from ..blob.json import JsonDictBlob 

11from . import BlobDictBase 

12 

13 

14class LocalPath(Path): 

15 def rmtree(self) -> None: 

16 shutil.rmtree(self) 

17 

18 

19class PathBlobDict(BlobDictBase): 

20 def __init__( 

21 self, 

22 path: LocalPath | CloudPath, 

23 *, 

24 compression: bool = False, 

25 blob_class: type[BytesBlob] = BytesBlob, 

26 blob_class_args: dict[str, Any] | None = None, 

27 ) -> None: 

28 super().__init__() 

29 

30 self.__path: LocalPath | CloudPath = path 

31 

32 self.__compression: bool = compression 

33 

34 self.__blob_class: type[BytesBlob] = blob_class 

35 self.__blob_class_args: dict[str, Any] = blob_class_args or {} 

36 

37 def create(self) -> None: 

38 self.__path.mkdir( 

39 parents=True, 

40 exist_ok=True, 

41 ) 

42 

43 def delete(self) -> None: 

44 self.__path.rmtree() 

45 

46 @override 

47 def __contains__(self, key: str) -> bool: 

48 return (self.__path / key).is_file() 

49 

50 def __get_blob_class(self, key: str) -> type[BytesBlob]: 

51 match (self.__path / key).suffix.lower(): 

52 case ".json": 

53 return JsonDictBlob 

54 case ".png": 

55 # Import here as it has optional dependency 

56 from ..blob.image import ImageBlob # noqa: PLC0415 

57 

58 return ImageBlob 

59 # Common text file extensions 

60 # https://en.wikipedia.org/wiki/List_of_file_formats 

61 case ( 

62 ".asc" 

63 | ".bib" 

64 | ".cfg" 

65 | ".cnf" 

66 | ".conf" 

67 | ".csv" 

68 | ".diff" 

69 | ".htm" 

70 | ".html" 

71 | ".ini" 

72 | ".log" 

73 | ".markdown" 

74 | ".md" 

75 | ".tex" 

76 | ".text" 

77 | ".toml" 

78 | ".tsv" 

79 | ".txt" 

80 | ".xhtml" 

81 | ".xht" 

82 | ".xml" 

83 | ".yaml" 

84 | ".yml" 

85 ): 

86 return StrBlob 

87 case _: 

88 return self.__blob_class 

89 

90 @override 

91 def get(self, key: str, default: BytesBlob | None = None) -> BytesBlob | None: 

92 if key not in self: 

93 return default 

94 

95 blob_bytes: bytes = (self.__path / key).read_bytes() 

96 if self.__compression: 

97 blob_bytes = decompress(blob_bytes) 

98 return self.__get_blob_class(key)(blob_bytes, **self.__blob_class_args) 

99 

100 @override 

101 def __iter__(self) -> Iterator[str]: 

102 # The concept of relative path does not exist for `CloudPath`, 

103 # and each walked path is always absolute for `CloudPath`. 

104 # Therefore, we extract each key by removing the path prefix. 

105 # In this way, the same logic works for both absolute and relative path. 

106 prefix_len: int = ( 

107 len(str(self.__path)) 

108 # Extra 1 is for separator `/` between prefix and filename 

109 + 1 

110 ) 

111 

112 for parent, _, files in self.__path.walk(top_down=False): 

113 for filename in files: 

114 yield str(parent / filename)[prefix_len:] 

115 

116 @override 

117 def clear(self) -> None: 

118 for parent, dirs, files in self.__path.walk(top_down=False): 

119 for filename in files: 

120 (parent / filename).unlink() 

121 for dirname in dirs: 

122 (parent / dirname).rmdir() 

123 

124 def __cleanup(self, key: str) -> None: 

125 (self.__path / key).unlink() 

126 

127 for parent in (self.__path / key).parents: 

128 if parent == self.__path: 

129 return 

130 

131 if parent.is_dir() and next(parent.iterdir(), None) is None: 

132 parent.rmdir() 

133 

134 @override 

135 def pop(self, key: str, default: BytesBlob | None = None) -> BytesBlob | None: 

136 blob: BytesBlob | None = self.get(key) 

137 if blob: 

138 self.__cleanup(key) 

139 

140 return blob or default 

141 

142 @override 

143 def __delitem__(self, key: str) -> None: 

144 if key not in self: 

145 raise KeyError 

146 

147 self.__cleanup(key) 

148 

149 __BAD_BLOB_CLASS_ERROR_MESSAGE: str = "Must specify blob that is instance of {blob_class}" 

150 

151 @override 

152 def __setitem__(self, key: str, blob: BytesBlob) -> None: 

153 if not isinstance(blob, self.__blob_class): 

154 raise TypeError(PathBlobDict.__BAD_BLOB_CLASS_ERROR_MESSAGE.format( 

155 blob_class=self.__blob_class, 

156 )) 

157 

158 (self.__path / key).parent.mkdir( 

159 parents=True, 

160 exist_ok=True, 

161 ) 

162 

163 blob_bytes: bytes = blob.as_bytes() 

164 if self.__compression: 

165 blob_bytes = compress(blob_bytes) 

166 (self.__path / key).write_bytes(blob_bytes)