Coverage for C:\src\imod-python\imod\util\path.py: 98%

96 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-08 14:15 +0200

1""" 

2Conventional IDF filenames can be understood and constructed using 

3:func:`imod.util.path.decompose` and :func:`imod.util.path.compose`. These are used 

4automatically in :func:`imod.idf`. 

5""" 

6 

7import datetime 

8import pathlib 

9import re 

10import tempfile 

11from typing import Any, Dict, Optional 

12 

13import cftime 

14import numpy as np 

15 

16from imod.util.time import _compose_timestring, to_datetime 

17 

18Pattern = re.Pattern 

19 

20 

21def _custom_pattern_to_regex_pattern(pattern: str): 

22 """ 

23 Compile iMOD Python's simplified custom pattern to regex pattern: 

24 _custom_pattern_to_regex_pattern({name}_c{species}) 

25 is the same as calling: 

26 (?P<name>[\\w.-]+)_c(?P<species>[\\w.-]+)).compile() 

27 """ 

28 pattern = pattern.lower() 

29 # Get the variables between curly braces 

30 in_curly = re.compile(r"{(.*?)}").findall(pattern) 

31 regex_parts = {key: f"(?P<{key}>[\\w.-]+)" for key in in_curly} 

32 # Format the regex string, by filling in the variables 

33 simple_regex = pattern.format(**regex_parts) 

34 return re.compile(simple_regex) 

35 

36 

37def _groupdict(stem: str, pattern: Optional[str | Pattern]) -> Dict: 

38 if pattern is not None: 

39 if isinstance(pattern, Pattern): 

40 match = pattern.match(stem) 

41 if match is not None: 

42 d = match.groupdict() 

43 else: 

44 d = {} 

45 else: 

46 re_pattern = _custom_pattern_to_regex_pattern(pattern) 

47 # Use it to get the required variables 

48 d = re_pattern.match(stem).groupdict() 

49 else: # Default to "iMOD conventions": {name}_c{species}_{time}_l{layer} 

50 has_layer = bool(re.search(r"_l\d+$", stem)) 

51 has_species = bool( 

52 re.search(r"conc_c\d{1,3}_\d{8,14}", stem) 

53 ) # We are strict in recognizing species 

54 try: # try for time 

55 base_pattern = r"(?P<name>[\w-]+)" 

56 if has_species: 

57 base_pattern += r"_c(?P<species>[0-9]+)" 

58 base_pattern += r"_(?P<time>[0-9-]{6,})" 

59 if has_layer: 

60 base_pattern += r"_l(?P<layer>[0-9]+)" 

61 re_pattern = re.compile(base_pattern) 

62 d = re_pattern.match(stem).groupdict() 

63 except AttributeError: # probably no time 

64 base_pattern = r"(?P<name>[\w-]+)" 

65 if has_species: 

66 base_pattern += r"_c(?P<species>[0-9]+)" 

67 if has_layer: 

68 base_pattern += r"_l(?P<layer>[0-9]+)" 

69 re_pattern = re.compile(base_pattern) 

70 d = re_pattern.match(stem).groupdict() 

71 return d 

72 

73 

74def decompose(path, pattern: Optional[str] = None) -> Dict[str, Any]: 

75 r""" 

76 Parse a path, returning a dict of the parts, following the iMOD conventions. 

77 

78 Parameters 

79 ---------- 

80 path : str or pathlib.Path 

81 Path to the file. Upper case is ignored. 

82 pattern : str, regex pattern, optional 

83 If the path is not made up of standard paths, and the default decompose 

84 does not produce the right result, specify the used pattern here. See 

85 the examples below. 

86 

87 Returns 

88 ------- 

89 d : dict 

90 Dictionary with name of variable and dimensions 

91 

92 Examples 

93 -------- 

94 Decompose a path, relying on default conventions: 

95 

96 >>> decompose("head_20010101_l1.idf") 

97 

98 Do the same, by specifying a format string pattern, excluding extension: 

99 

100 >>> decompose("head_20010101_l1.idf", pattern="{name}_{time}_l{layer}") 

101 

102 This supports an arbitrary number of variables: 

103 

104 >>> decompose("head_slr_20010101_l1.idf", pattern="{name}_{scenario}_{time}_l{layer}") 

105 

106 The format string pattern will only work on tidy paths, where variables are 

107 separated by underscores. You can also pass a compiled regex pattern. 

108 Make sure to include the ``re.IGNORECASE`` flag since all paths are lowered. 

109 

110 >>> import re 

111 >>> pattern = re.compile(r"(?P<name>[\w]+)L(?P<layer>[\d+]*)") 

112 >>> decompose("headL11", pattern=pattern) 

113 

114 However, this requires constructing regular expressions, which is generally 

115 a fiddly process. The website https://regex101.com is a nice help. 

116 Alternatively, the most pragmatic solution may be to just rename your files. 

117 """ 

118 path = pathlib.Path(path) 

119 # We'll ignore upper case 

120 stem = path.stem.lower() 

121 d = _groupdict(stem, pattern) 

122 dims = list(d.keys()) 

123 # If name is not provided, generate one from other fields 

124 if "name" not in d.keys(): 

125 d["name"] = "_".join(d.values()) 

126 else: 

127 dims.remove("name") 

128 

129 # TODO: figure out what to with user specified variables 

130 # basically type inferencing via regex? 

131 # if purely numerical \d* -> int or float 

132 # if \d*\.\d* -> float 

133 # else: keep as string 

134 

135 # String -> type conversion 

136 if "layer" in d.keys(): 

137 d["layer"] = int(d["layer"]) 

138 if "species" in d.keys(): 

139 d["species"] = int(d["species"]) 

140 if "time" in d.keys(): 

141 d["time"] = to_datetime(d["time"]) 

142 if "steady-state" in d["name"]: 

143 # steady-state as time identifier isn't picked up by <time>[0-9] regex 

144 d["name"] = d["name"].replace("_steady-state", "") 

145 d["time"] = "steady-state" 

146 dims.append("time") 

147 

148 d["extension"] = path.suffix 

149 d["directory"] = path.parent 

150 d["dims"] = dims 

151 return d 

152 

153 

154def compose(d, pattern=None) -> pathlib.Path: 

155 """ 

156 From a dict of parts, construct a filename, following the iMOD 

157 conventions. 

158 """ 

159 haslayer = "layer" in d 

160 hastime = "time" in d 

161 hasspecies = "species" in d 

162 

163 if pattern is None: 

164 if hastime: 

165 time = d["time"] 

166 d["timestr"] = "_{}".format(_compose_timestring(time)) 

167 else: 

168 d["timestr"] = "" 

169 

170 if haslayer: 

171 d["layerstr"] = "_l{}".format(int(d["layer"])) 

172 else: 

173 d["layerstr"] = "" 

174 

175 if hasspecies: 

176 d["speciesstr"] = "_c{}".format(int(d["species"])) 

177 else: 

178 d["speciesstr"] = "" 

179 

180 s = "{name}{speciesstr}{timestr}{layerstr}{extension}".format(**d) 

181 else: 

182 if hastime: 

183 time = d["time"] 

184 if time != "steady-state": 

185 # Change time to datetime.datetime 

186 if isinstance(time, np.datetime64): 

187 d["time"] = time.astype("datetime64[us]").item() 

188 elif isinstance(time, cftime.datetime): 

189 # Take first six elements of timetuple and convert to datetime 

190 d["time"] = datetime.datetime(*time.timetuple()[:6]) 

191 s = pattern.format(**d) 

192 

193 if "directory" in d: 

194 return pathlib.Path(d["directory"]) / s 

195 else: 

196 return pathlib.Path(s) 

197 

198 

199def temporary_directory() -> pathlib.Path: 

200 tempdir = tempfile.TemporaryDirectory() 

201 return pathlib.Path(tempdir.name)