Coverage for C:\src\imod-python\imod\util\path.py: 92%

93 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-08 13:27 +0200

1""" 

2Conventional IDF filenames can be understood and constructed using 

3:func:`imod.util.path.decompose` and :func:`imod.util.path.compose`. These are used 

4automatically in :func:`imod.idf`. 

5""" 

6 

7import datetime 

8import pathlib 

9import re 

10import tempfile 

11from typing import Any, Dict 

12 

13import cftime 

14import numpy as np 

15 

16from imod.util.time import _compose_timestring, to_datetime 

17 

18Pattern = re.Pattern 

19 

20 

21def _custom_pattern_to_regex_pattern(pattern: str): 

22 """ 

23 Compile iMOD Python's simplified custom pattern to regex pattern: 

24 _custom_pattern_to_regex_pattern({name}_c{species}) 

25 is the same as calling: 

26 (?P<name>[\\w.-]+)_c(?P<species>[\\w.-]+)).compile() 

27 """ 

28 pattern = pattern.lower() 

29 # Get the variables between curly braces 

30 in_curly = re.compile(r"{(.*?)}").findall(pattern) 

31 regex_parts = {key: f"(?P<{key}>[\\w.-]+)" for key in in_curly} 

32 # Format the regex string, by filling in the variables 

33 simple_regex = pattern.format(**regex_parts) 

34 return re.compile(simple_regex) 

35 

36 

37def _groupdict(stem: str, pattern: str | Pattern) -> Dict: 

38 if pattern is not None: 

39 if isinstance(pattern, Pattern): 

40 d = pattern.match(stem).groupdict() 

41 else: 

42 re_pattern = _custom_pattern_to_regex_pattern(pattern) 

43 # Use it to get the required variables 

44 d = re_pattern.match(stem).groupdict() 

45 else: # Default to "iMOD conventions": {name}_c{species}_{time}_l{layer} 

46 has_layer = bool(re.search(r"_l\d+$", stem)) 

47 has_species = bool( 

48 re.search(r"conc_c\d{1,3}_\d{8,14}", stem) 

49 ) # We are strict in recognizing species 

50 try: # try for time 

51 base_pattern = r"(?P<name>[\w-]+)" 

52 if has_species: 

53 base_pattern += r"_c(?P<species>[0-9]+)" 

54 base_pattern += r"_(?P<time>[0-9-]{6,})" 

55 if has_layer: 

56 base_pattern += r"_l(?P<layer>[0-9]+)" 

57 re_pattern = re.compile(base_pattern) 

58 d = re_pattern.match(stem).groupdict() 

59 except AttributeError: # probably no time 

60 base_pattern = r"(?P<name>[\w-]+)" 

61 if has_species: 

62 base_pattern += r"_c(?P<species>[0-9]+)" 

63 if has_layer: 

64 base_pattern += r"_l(?P<layer>[0-9]+)" 

65 re_pattern = re.compile(base_pattern) 

66 d = re_pattern.match(stem).groupdict() 

67 return d 

68 

69 

70def decompose(path, pattern: str = None) -> Dict[str, Any]: 

71 r""" 

72 Parse a path, returning a dict of the parts, following the iMOD conventions. 

73 

74 Parameters 

75 ---------- 

76 path : str or pathlib.Path 

77 Path to the file. Upper case is ignored. 

78 pattern : str, regex pattern, optional 

79 If the path is not made up of standard paths, and the default decompose 

80 does not produce the right result, specify the used pattern here. See 

81 the examples below. 

82 

83 Returns 

84 ------- 

85 d : dict 

86 Dictionary with name of variable and dimensions 

87 

88 Examples 

89 -------- 

90 Decompose a path, relying on default conventions: 

91 

92 >>> decompose("head_20010101_l1.idf") 

93 

94 Do the same, by specifying a format string pattern, excluding extension: 

95 

96 >>> decompose("head_20010101_l1.idf", pattern="{name}_{time}_l{layer}") 

97 

98 This supports an arbitrary number of variables: 

99 

100 >>> decompose("head_slr_20010101_l1.idf", pattern="{name}_{scenario}_{time}_l{layer}") 

101 

102 The format string pattern will only work on tidy paths, where variables are 

103 separated by underscores. You can also pass a compiled regex pattern. 

104 Make sure to include the ``re.IGNORECASE`` flag since all paths are lowered. 

105 

106 >>> import re 

107 >>> pattern = re.compile(r"(?P<name>[\w]+)L(?P<layer>[\d+]*)") 

108 >>> decompose("headL11", pattern=pattern) 

109 

110 However, this requires constructing regular expressions, which is generally 

111 a fiddly process. The website https://regex101.com is a nice help. 

112 Alternatively, the most pragmatic solution may be to just rename your files. 

113 """ 

114 path = pathlib.Path(path) 

115 # We'll ignore upper case 

116 stem = path.stem.lower() 

117 

118 d = _groupdict(stem, pattern) 

119 dims = list(d.keys()) 

120 # If name is not provided, generate one from other fields 

121 if "name" not in d.keys(): 

122 d["name"] = "_".join(d.values()) 

123 else: 

124 dims.remove("name") 

125 

126 # TODO: figure out what to with user specified variables 

127 # basically type inferencing via regex? 

128 # if purely numerical \d* -> int or float 

129 # if \d*\.\d* -> float 

130 # else: keep as string 

131 

132 # String -> type conversion 

133 if "layer" in d.keys(): 

134 d["layer"] = int(d["layer"]) 

135 if "species" in d.keys(): 

136 d["species"] = int(d["species"]) 

137 if "time" in d.keys(): 

138 d["time"] = to_datetime(d["time"]) 

139 if "steady-state" in d["name"]: 

140 # steady-state as time identifier isn't picked up by <time>[0-9] regex 

141 d["name"] = d["name"].replace("_steady-state", "") 

142 d["time"] = "steady-state" 

143 dims.append("time") 

144 

145 d["extension"] = path.suffix 

146 d["directory"] = path.parent 

147 d["dims"] = dims 

148 return d 

149 

150 

151def compose(d, pattern=None) -> pathlib.Path: 

152 """ 

153 From a dict of parts, construct a filename, following the iMOD 

154 conventions. 

155 """ 

156 haslayer = "layer" in d 

157 hastime = "time" in d 

158 hasspecies = "species" in d 

159 

160 if pattern is None: 

161 if hastime: 

162 time = d["time"] 

163 d["timestr"] = "_{}".format(_compose_timestring(time)) 

164 else: 

165 d["timestr"] = "" 

166 

167 if haslayer: 

168 d["layerstr"] = "_l{}".format(int(d["layer"])) 

169 else: 

170 d["layerstr"] = "" 

171 

172 if hasspecies: 

173 d["speciesstr"] = "_c{}".format(int(d["species"])) 

174 else: 

175 d["speciesstr"] = "" 

176 

177 s = "{name}{speciesstr}{timestr}{layerstr}{extension}".format(**d) 

178 else: 

179 if hastime: 

180 time = d["time"] 

181 if time != "steady-state": 

182 # Change time to datetime.datetime 

183 if isinstance(time, np.datetime64): 

184 d["time"] = time.astype("datetime64[us]").item() 

185 elif isinstance(time, cftime.datetime): 

186 # Take first six elements of timetuple and convert to datetime 

187 d["time"] = datetime.datetime(*time.timetuple()[:6]) 

188 s = pattern.format(**d) 

189 

190 if "directory" in d: 

191 return pathlib.Path(d["directory"]) / s 

192 else: 

193 return pathlib.Path(s) 

194 

195 

196def temporary_directory() -> pathlib.Path: 

197 tempdir = tempfile.TemporaryDirectory() 

198 return pathlib.Path(tempdir.name)