Coverage for C:\src\imod-python\imod\util\path.py: 98%
96 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 14:15 +0200
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 14:15 +0200
1"""
2Conventional IDF filenames can be understood and constructed using
3:func:`imod.util.path.decompose` and :func:`imod.util.path.compose`. These are used
4automatically in :func:`imod.idf`.
5"""
7import datetime
8import pathlib
9import re
10import tempfile
11from typing import Any, Dict, Optional
13import cftime
14import numpy as np
16from imod.util.time import _compose_timestring, to_datetime
18Pattern = re.Pattern
21def _custom_pattern_to_regex_pattern(pattern: str):
22 """
23 Compile iMOD Python's simplified custom pattern to regex pattern:
24 _custom_pattern_to_regex_pattern({name}_c{species})
25 is the same as calling:
26 (?P<name>[\\w.-]+)_c(?P<species>[\\w.-]+)).compile()
27 """
28 pattern = pattern.lower()
29 # Get the variables between curly braces
30 in_curly = re.compile(r"{(.*?)}").findall(pattern)
31 regex_parts = {key: f"(?P<{key}>[\\w.-]+)" for key in in_curly}
32 # Format the regex string, by filling in the variables
33 simple_regex = pattern.format(**regex_parts)
34 return re.compile(simple_regex)
37def _groupdict(stem: str, pattern: Optional[str | Pattern]) -> Dict:
38 if pattern is not None:
39 if isinstance(pattern, Pattern):
40 match = pattern.match(stem)
41 if match is not None:
42 d = match.groupdict()
43 else:
44 d = {}
45 else:
46 re_pattern = _custom_pattern_to_regex_pattern(pattern)
47 # Use it to get the required variables
48 d = re_pattern.match(stem).groupdict()
49 else: # Default to "iMOD conventions": {name}_c{species}_{time}_l{layer}
50 has_layer = bool(re.search(r"_l\d+$", stem))
51 has_species = bool(
52 re.search(r"conc_c\d{1,3}_\d{8,14}", stem)
53 ) # We are strict in recognizing species
54 try: # try for time
55 base_pattern = r"(?P<name>[\w-]+)"
56 if has_species:
57 base_pattern += r"_c(?P<species>[0-9]+)"
58 base_pattern += r"_(?P<time>[0-9-]{6,})"
59 if has_layer:
60 base_pattern += r"_l(?P<layer>[0-9]+)"
61 re_pattern = re.compile(base_pattern)
62 d = re_pattern.match(stem).groupdict()
63 except AttributeError: # probably no time
64 base_pattern = r"(?P<name>[\w-]+)"
65 if has_species:
66 base_pattern += r"_c(?P<species>[0-9]+)"
67 if has_layer:
68 base_pattern += r"_l(?P<layer>[0-9]+)"
69 re_pattern = re.compile(base_pattern)
70 d = re_pattern.match(stem).groupdict()
71 return d
74def decompose(path, pattern: Optional[str] = None) -> Dict[str, Any]:
75 r"""
76 Parse a path, returning a dict of the parts, following the iMOD conventions.
78 Parameters
79 ----------
80 path : str or pathlib.Path
81 Path to the file. Upper case is ignored.
82 pattern : str, regex pattern, optional
83 If the path is not made up of standard paths, and the default decompose
84 does not produce the right result, specify the used pattern here. See
85 the examples below.
87 Returns
88 -------
89 d : dict
90 Dictionary with name of variable and dimensions
92 Examples
93 --------
94 Decompose a path, relying on default conventions:
96 >>> decompose("head_20010101_l1.idf")
98 Do the same, by specifying a format string pattern, excluding extension:
100 >>> decompose("head_20010101_l1.idf", pattern="{name}_{time}_l{layer}")
102 This supports an arbitrary number of variables:
104 >>> decompose("head_slr_20010101_l1.idf", pattern="{name}_{scenario}_{time}_l{layer}")
106 The format string pattern will only work on tidy paths, where variables are
107 separated by underscores. You can also pass a compiled regex pattern.
108 Make sure to include the ``re.IGNORECASE`` flag since all paths are lowered.
110 >>> import re
111 >>> pattern = re.compile(r"(?P<name>[\w]+)L(?P<layer>[\d+]*)")
112 >>> decompose("headL11", pattern=pattern)
114 However, this requires constructing regular expressions, which is generally
115 a fiddly process. The website https://regex101.com is a nice help.
116 Alternatively, the most pragmatic solution may be to just rename your files.
117 """
118 path = pathlib.Path(path)
119 # We'll ignore upper case
120 stem = path.stem.lower()
121 d = _groupdict(stem, pattern)
122 dims = list(d.keys())
123 # If name is not provided, generate one from other fields
124 if "name" not in d.keys():
125 d["name"] = "_".join(d.values())
126 else:
127 dims.remove("name")
129 # TODO: figure out what to with user specified variables
130 # basically type inferencing via regex?
131 # if purely numerical \d* -> int or float
132 # if \d*\.\d* -> float
133 # else: keep as string
135 # String -> type conversion
136 if "layer" in d.keys():
137 d["layer"] = int(d["layer"])
138 if "species" in d.keys():
139 d["species"] = int(d["species"])
140 if "time" in d.keys():
141 d["time"] = to_datetime(d["time"])
142 if "steady-state" in d["name"]:
143 # steady-state as time identifier isn't picked up by <time>[0-9] regex
144 d["name"] = d["name"].replace("_steady-state", "")
145 d["time"] = "steady-state"
146 dims.append("time")
148 d["extension"] = path.suffix
149 d["directory"] = path.parent
150 d["dims"] = dims
151 return d
154def compose(d, pattern=None) -> pathlib.Path:
155 """
156 From a dict of parts, construct a filename, following the iMOD
157 conventions.
158 """
159 haslayer = "layer" in d
160 hastime = "time" in d
161 hasspecies = "species" in d
163 if pattern is None:
164 if hastime:
165 time = d["time"]
166 d["timestr"] = "_{}".format(_compose_timestring(time))
167 else:
168 d["timestr"] = ""
170 if haslayer:
171 d["layerstr"] = "_l{}".format(int(d["layer"]))
172 else:
173 d["layerstr"] = ""
175 if hasspecies:
176 d["speciesstr"] = "_c{}".format(int(d["species"]))
177 else:
178 d["speciesstr"] = ""
180 s = "{name}{speciesstr}{timestr}{layerstr}{extension}".format(**d)
181 else:
182 if hastime:
183 time = d["time"]
184 if time != "steady-state":
185 # Change time to datetime.datetime
186 if isinstance(time, np.datetime64):
187 d["time"] = time.astype("datetime64[us]").item()
188 elif isinstance(time, cftime.datetime):
189 # Take first six elements of timetuple and convert to datetime
190 d["time"] = datetime.datetime(*time.timetuple()[:6])
191 s = pattern.format(**d)
193 if "directory" in d:
194 return pathlib.Path(d["directory"]) / s
195 else:
196 return pathlib.Path(s)
199def temporary_directory() -> pathlib.Path:
200 tempdir = tempfile.TemporaryDirectory()
201 return pathlib.Path(tempdir.name)