Coverage for C:\src\imod-python\imod\formats\prj\prj.py: 95%
498 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 14:15 +0200
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 14:15 +0200
1"""
2Utilities for parsing a project file.
3"""
5import shlex
6from collections import defaultdict
7from datetime import datetime
8from itertools import chain
9from os import PathLike
10from pathlib import Path
11from typing import Any, Dict, List, Sequence, Tuple, Union
13import numpy as np
14import pandas as pd
15import xarray as xr
17import imod
19FilePath = Union[str, "PathLike[str]"]
22KEYS = {
23 "(bnd)": ("ibound",),
24 "(top)": ("top",),
25 "(bot)": ("bottom",),
26 "(thk)": ("thickness",),
27 "(khv)": ("kh",),
28 "(kva)": ("vertical_anisotropy",),
29 "(kdw)": ("transmissivity",),
30 "(kvv)": ("kv",),
31 "(vcw)": ("resistance",),
32 "(shd)": ("head",),
33 "(sto)": ("storage_coefficient",),
34 "(spy)": ("specific_yield",),
35 "(por)": ("porosity",),
36 "(ani)": ("factor", "angle"),
37 "(hfb)": ("gen",),
38 "(ibs)": (None),
39 "(pwt)": (None),
40 "(sft)": (None),
41 "(obs)": (None),
42 "(cbi)": (None),
43 "(sco)": (None),
44 "(dsp)": (None),
45 "(ics)": (None),
46 "(fcs)": (None),
47 "(ssc)": (None),
48 "(fod)": (None),
49 "(fos)": (None),
50 "(rct)": (None),
51 "(con)": (None),
52 "(pst)": (None),
53}
55DATE_KEYS = {
56 "(uzf)": (None,),
57 "(rch)": ("rate",),
58 "(evt)": ("rate", "surface", "depth"),
59 "(drn)": ("conductance", "elevation"),
60 "(olf)": ("elevation",),
61 "(riv)": ("conductance", "stage", "bottom_elevation", "infiltration_factor"),
62 "(isg)": ("isg",),
63 "(sfr)": ("isg",),
64 "(lak)": (None,),
65 "(wel)": ("ipf",),
66 "(mnw)": (None,),
67 "(ghb)": ("conductance", "head"),
68 "(chd)": ("head",),
69 "(fhb)": (None,),
70 "(fde)": (None,),
71 "(tvc)": (None,),
72}
74METASWAP_VARS = (
75 "boundary",
76 "landuse",
77 "rootzone_thickness",
78 "soil_physical_unit",
79 "meteo_station_number",
80 "surface_elevation",
81 "artificial_recharge",
82 "artifical_recharge_layer",
83 "artificial_recharge_capacity",
84 "wetted_area",
85 "urban_area",
86 "urban_ponding_depth",
87 "rural_ponding_depth",
88 "urban_runoff_resistance",
89 "rural_runoff_resistance",
90 "urban_runon_resistance",
91 "rural_runon_resistance",
92 "urban_infiltration_capacity",
93 "rural_infiltration_capacity",
94 "perched_water_table_level",
95 "soil_moisture_fraction",
96 "conductivitiy_factor",
97 "plot_number",
98 "steering_location",
99 "plot_drainage_level",
100 "plot_drainage_resistance",
101)
104class _LineIterator:
105 """
106 Like iter(lines), but we can go back and we check if we're finished.
107 """
109 def __init__(self, lines: List[List[str]]):
110 self.lines = lines
111 self.count = -1
112 self.length = len(lines)
114 def __iter__(self):
115 return self
117 def __next__(self) -> List[str]:
118 if self.finished:
119 raise StopIteration
120 self.count += 1
121 return self.lines[self.count]
123 def back(self) -> None:
124 self.count = max(self.count - 1, -1)
126 @property
127 def finished(self) -> bool:
128 return (self.count + 1) >= self.length
131def _tokenize(line: str) -> List[str]:
132 """
133 A value separator in Fortran list-directed input is:
135 * A comma if period decimal edit mode is POINT.
136 * One or more contiguous spaces (blanks); no tabs.
138 Other remarks:
140 * Values, except for character strings, cannot contain blanks.
141 * Strings may be unquoted if they do not start with a digit and no value
142 separators.
143 * Character strings can be quoted strings, using pairs of quotes ("), pairs
144 of apostrophes (').
145 * A quote or apostrophe must be preceded by a value separator to initite a
146 quoted string.
147 * An empty entry consists of two consecutive commas (or semicolons).
149 For the use here (parsing IMOD's project files), we ignore:
151 * A semicolon value separator if period decimal edit mode is COMMA.
152 * Complex constants given as two real constants separated by a comma and
153 enclosed in parentheses.
154 * Repetition counts: 4*(3.,2.) 2*, 4*'hello'
156 Furthermore, we do not expect commas inside of the project file entries,
157 since we expect:
159 * Package names: unquoted character strings.
160 * File paths: will not contain commas, no single apostrophe, nor a single
161 quote symbol, may contain whitespace if quoted. * Integers for counts and
162 settings.
163 * Floats for addition and multiplication values.
164 * Simple character strings for period names (summer, winter). These
165 technically could contain commas if quoted, which is very unlikely.
166 * No quotes or apostrophes are escaped.
168 With these assumptions, we can limit complexity considerably (see the
169 PyLiDiRe link for a complete implementation):
171 * First we split by comma (we don't expected commas in quote strings).
172 * Next we split by whitespace, unless quoted.
174 We can expect both single and double quotes, even within a single line:
175 shlex.split() handles this. Note that additional entries are likely
176 allowed, as the Fortran implementation only reads what is necessary,
177 then stops parsing.
179 See also:
180 * https://stackoverflow.com/questions/36165050/python-equivalent-of-fortran-list-directed-input
181 * https://gitlab.com/everythingfunctional/PyLiDiRe
182 * https://docs.oracle.com/cd/E19957-01/805-4939/6j4m0vnc5/index.html
183 * The Fortran 2003 Handbook
185 Examples
186 --------
188 Raise ValueError, due to missing closing quotation. (Can be enabled
189 shlex.split(s, posix=False)):
191 >> _tokenize("That's life")
193 >> _tokenize("That 's life'")
194 >> ["That", "s life"]
196 >> _tokenize("That,'s life'")
197 >> ["That", "s life"]
198 """
199 values = [v.strip().replace("\\", "/") for v in line.split(",")]
200 tokens = list(chain.from_iterable(shlex.split(v) for v in values))
201 return tokens
204def _wrap_error_message(
205 exception: Exception, description: str, lines: _LineIterator
206) -> None:
207 lines.back()
208 content = next(lines)
209 number = lines.count + 1
210 raise type(exception)(
211 f"{exception}\n"
212 f"Failed to parse {description} for line {number} with content:\n{content}"
213 )
216def _parse_blockheader(lines: _LineIterator) -> Tuple[int, str, str]:
217 try:
218 no_result = None, None, None
219 line = next(lines)
221 # Skip if it's an empty line.
222 if len(line) == 0:
223 return no_result
225 first = line[0].lower()
226 if first in ("periods", "species"):
227 return 1, first, None
228 # The line must contain atleast nper, key, active.
229 elif len(line) >= 3:
230 n = int(first)
231 key = line[1].lower()
232 active = line[2]
233 return n, key, active
234 # It's a comment or something.
235 else:
236 return no_result
237 except Exception as e:
238 _wrap_error_message(e, "block header", lines)
241def _parse_time(lines: _LineIterator) -> str:
242 try:
243 line = next(lines)
244 date = line[0].lower()
245 if len(line) > 1:
246 time = line[1]
247 return f"{date} {time}"
248 else:
249 return date
250 except Exception as e:
251 _wrap_error_message(e, "date time", lines)
254def _parse_blockline(lines: _LineIterator, time: str = None) -> Dict[str, Any]:
255 try:
256 line = next(lines)
257 content = {
258 "active": bool(int(line[0])),
259 "is_constant": int(line[1]),
260 "layer": int(line[2]),
261 "factor": float(line[3]),
262 "addition": float(line[4]),
263 "constant": float(line[5]),
264 }
265 if content["is_constant"] == 2:
266 content["path"] = Path(line[6]).resolve()
267 if time is not None:
268 content["time"] = time
269 return content
270 except Exception as e:
271 _wrap_error_message(e, "entries", lines)
274def _parse_nsub_nsystem(lines: _LineIterator) -> Tuple[int, int]:
275 try:
276 line = next(lines)
277 n_entry = int(line[0])
278 n_system = int(line[1])
279 return n_entry, n_system
280 except Exception as e:
281 _wrap_error_message(e, "number of sub-entries and number of systems", lines)
284def _parse_notimeblock(
285 lines: _LineIterator,
286 fields: List[str],
287) -> Dict[str, Any]:
288 n_entry, n_system = _parse_nsub_nsystem(lines)
290 if len(fields) != n_entry:
291 raise ValueError(
292 f"Expected NSUB entry of {len(fields)} for {fields}, read: {n_entry}"
293 )
294 content = {
295 field: [_parse_blockline(lines) for _ in range(n_system)] for field in fields
296 }
297 content["n_system"] = n_system
298 return content
301def _parse_capblock(
302 lines: _LineIterator,
303) -> Dict[str, Any]:
304 fields = METASWAP_VARS
305 n_entry, n_system = _parse_nsub_nsystem(lines)
307 if n_entry == 21:
308 # Remove layer entry.
309 fields = list(fields[:22]).pop(8)
310 elif n_entry == 22:
311 fields = fields[:22]
312 elif n_entry == 26:
313 pass
314 else:
315 raise ValueError(
316 f"Expected NSUB entry of 21, 22, or 26 for {fields}, read: {n_entry}"
317 )
319 content = {
320 field: [_parse_blockline(lines) for _ in range(n_system)] for field in fields
321 }
322 content["n_system"] = n_system
323 return content
326def _parse_extrablock(lines: _LineIterator, n: int) -> Dict[str, List[str]]:
327 """Parse the MetaSWAP "extra files" block"""
328 return {"paths": [next(lines) for _ in range(n)]}
331def _parse_timeblock(
332 lines: List[str],
333 fields: List[str],
334 n: int,
335) -> Dict[str, Any]:
336 n_fields = len(fields)
337 content = defaultdict(list)
338 for _ in range(n):
339 time = _parse_time(lines)
340 content["time"].append(time)
341 n_entry, n_system = _parse_nsub_nsystem(lines)
343 if n_fields != n_entry:
344 raise ValueError(
345 f"Expected NSUB entry of {n_fields} for {fields}, read: {n_entry}"
346 )
348 for field in fields:
349 content[field].extend(
350 [_parse_blockline(lines, time) for _ in range(n_system)]
351 )
353 content["n_system"] = n_system
354 return content
357def _parse_pcgblock(lines: _LineIterator) -> Dict[str, Any]:
358 try:
359 line = next(lines)
361 # TODO: which are optional? How many to expect?
362 # Check for an empty line to terminate the block?
363 types = {
364 "mxiter": int,
365 "iter1": int,
366 "hclose": float,
367 "rclose": float,
368 "relax": float,
369 "npcond": int,
370 "iprpcg": int,
371 "mutpcg": int,
372 "damppcg": float,
373 "damppcgt": float,
374 "iqerror": int,
375 "qerror": float,
376 }
378 if len(line) == 12:
379 line_iterator = iter(line)
380 content = {
381 k: valuetype(next(line_iterator)) for k, valuetype in types.items()
382 }
383 elif any("=" in s for s in line):
384 pcglines = [line] + [next(lines) for _ in range(11)]
385 content = {}
386 for line in pcglines:
387 # undo separation, partition on equality sign instead.
388 line = "".join(line)
389 key, _, value = line.lower().partition("=")
390 value = types[key](value)
391 content[key] = value
392 else:
393 raise ValueError(
394 f"Expected 12 KEY = VALUE pairs, or 12 values. Found {len(line)}"
395 )
397 return content
398 except Exception as e:
399 _wrap_error_message(e, "PCG entry", lines)
402def _parse_periodsblock(lines: _LineIterator) -> Dict[str, str]:
403 try:
404 periods = {}
405 while not lines.finished:
406 line = next(lines)
407 # Stop if we encounter an empty line.
408 if len(line) == 0:
409 break
410 # Read the alias
411 alias = line[0]
412 # Now read the time associated with it.
413 start = _parse_time(lines)
414 periods[alias] = start
415 return periods
416 except Exception as e:
417 _wrap_error_message(e, "periods data block", lines)
420def _parse_speciesblock(lines: _LineIterator):
421 try:
422 species = {}
423 while not lines.finished:
424 line = next(lines)
425 # Stop if we encounter an empty line.
426 if len(line) == 0:
427 break
428 name, nr = line
429 nr = int(nr)
430 species[nr] = name
431 return species
432 except Exception as e:
433 _wrap_error_message(e, "species entry", lines)
436def _parse_block(lines: _LineIterator, content: Dict[str, Any]) -> None:
437 """
438 Mutates content dict.
439 """
440 n = key = active = None
442 # A project file may contain any number of lines outside of a "topic"
443 # block. _parse_blockheader will return triple None in that case.
444 while key is None and not lines.finished:
445 n, key, active = _parse_blockheader(lines)
447 try:
448 if key in KEYS:
449 if n != 1:
450 raise ValueError(f"Expected N=1 for {key}, read: {n}")
451 fields = KEYS[key]
452 blockcontent = _parse_notimeblock(lines, fields)
453 elif key in DATE_KEYS:
454 fields = DATE_KEYS[key]
455 blockcontent = _parse_timeblock(lines, fields, n)
456 elif key == "(cap)":
457 blockcontent = _parse_capblock(lines)
458 elif key == "(pcg)":
459 blockcontent = _parse_pcgblock(lines)
460 elif key == "periods":
461 blockcontent = _parse_periodsblock(lines)
462 elif key == "species":
463 blockcontent = _parse_speciesblock(lines)
464 elif key == "extra":
465 blockcontent = _parse_extrablock(lines, n)
466 else:
467 other = ("(pcg)", "(gcg)", "(vdf)")
468 options = tuple(KEYS.keys()) + tuple(DATE_KEYS.keys()) + other
469 lines.back()
470 line = next(lines)
471 number = lines.count + 1
472 raise ValueError(
473 f"Failed to recognize header keyword: {key}. Expected one of keywords {options}"
474 f"\nErrored in line {number} with entries:\n{line}"
475 )
477 except Exception as e:
478 raise type(e)(f"{e}\nError occurred for keyword: {key}")
480 if blockcontent is not None and active is not None:
481 blockcontent["active"] = active
483 content[key] = blockcontent
484 return
487def _process_package_entry(entry: Dict):
488 """
489 The iMOD project file supports constants in lieu of IDFs.
490 """
491 coords = {"layer": entry["layer"]}
492 dims = ("layer",)
494 if "path" not in entry:
495 path = None
496 header = {"coords": coords}
497 value = entry["constant"]
498 else:
499 path = entry["path"]
500 header = imod.idf.header(path, pattern="{name}")
501 value = None
503 header["dims"] = dims
504 return path, header, value
507def _merge_coords(headers: List[Dict[str, Any]]) -> Dict[str, np.ndarray]:
508 coords = defaultdict(list)
509 for header in headers:
510 for key, value in header["coords"].items():
511 coords[key].append(value)
512 return {k: np.unique(coords[k]) for k in coords}
515def _create_datarray_from_paths(paths: List[str], headers: List[Dict[str, Any]]):
516 da = imod.formats.array_io.reading._load(
517 paths, use_cftime=False, _read=imod.idf._read, headers=headers
518 )
519 return da
522def _create_dataarray_from_values(values: List[float], headers: List[Dict[str, Any]]):
523 coords = _merge_coords(headers)
524 firstdims = headers[0]["dims"]
525 shape = [len(coord) for coord in coords.values()]
526 da = xr.DataArray(np.reshape(values, shape), dims=firstdims, coords=coords)
527 return da
530def _create_dataarray(
531 paths: List[str], headers: List[Dict[str, Any]], values: List[float]
532) -> xr.DataArray:
533 """
534 Create a DataArray from a list of IDF paths, or from constant values.
535 """
536 values_valid = []
537 paths_valid = []
538 headers_paths = []
539 headers_values = []
540 for path, header, value in zip(paths, headers, values):
541 if path is None:
542 headers_values.append(header)
543 values_valid.append(value)
544 else:
545 headers_paths.append(header)
546 paths_valid.append(path)
548 if paths_valid and values_valid:
549 dap = _create_datarray_from_paths(paths_valid, headers_paths)
550 dav = _create_dataarray_from_values(values_valid, headers_values)
551 dap.name = "tmp"
552 dav.name = "tmp"
553 da = xr.merge((dap, dav), join="outer")["tmp"]
554 elif paths_valid:
555 da = _create_datarray_from_paths(paths_valid, headers_paths)
556 elif values_valid:
557 da = _create_dataarray_from_values(values_valid, headers_values)
559 da = apply_factor_and_addition(headers, da)
560 return da
563def apply_factor_and_addition(headers, da):
564 if not ("layer" in da.coords or "time" in da.dims):
565 factor = headers[0]["factor"]
566 addition = headers[0]["addition"]
567 da = da * factor + addition
568 elif "layer" in da.coords and "time" not in da.dims:
569 da = apply_factor_and_addition_per_layer(headers, da)
570 else:
571 header_per_time = defaultdict(list)
572 for time in da.coords["time"].values:
573 for header in headers:
574 if np.datetime64(header["time"]) == time:
575 header_per_time[time].append(header)
577 for time in da.coords["time"]:
578 da.loc[{"time": time}] = apply_factor_and_addition(
579 header_per_time[np.datetime64(time.values)],
580 da.sel(time=time, drop=True),
581 )
582 return da
585def apply_factor_and_addition_per_layer(headers, da):
586 layer = da.coords["layer"].values
587 header_per_layer = {}
588 for header in headers:
589 if header["layer"] in header_per_layer.keys():
590 raise ValueError("error in project file: layer repetition")
591 header_per_layer[header["layer"]] = header
592 addition_values = [header_per_layer[lay]["addition"] for lay in layer]
593 factor_values = [header_per_layer[lay]["factor"] for lay in layer]
594 addition = xr.DataArray(addition_values, coords={"layer": layer}, dims=("layer"))
595 factor = xr.DataArray(factor_values, coords={"layer": layer}, dims=("layer",))
596 da = da * factor + addition
597 return da
600def _open_package_idf(
601 block_content: Dict[str, Any], variables: Sequence[str]
602) -> List[xr.DataArray]:
603 das = {}
604 for variable in variables:
605 variable_content = block_content[variable]
606 paths = []
607 headers = []
608 values = []
609 for entry in variable_content:
610 path, header, value = _process_package_entry(entry)
611 header["name"] = variable
612 header["dims"] = ["layer"]
613 header["layer"] = entry["layer"]
614 header["addition"] = entry["addition"]
615 header["factor"] = entry["factor"]
616 paths.append(path)
617 headers.append(header)
618 values.append(value)
620 das[variable] = _create_dataarray(paths, headers, values)
622 return [das]
625def _process_time(time: str, yearfirst: bool = True):
626 if time == "steady-state":
627 time = None
628 else:
629 if yearfirst:
630 if len(time) == 19:
631 time = datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
632 elif len(time) == 10:
633 time = datetime.strptime(time, "%Y-%m-%d")
634 else:
635 raise ValueError(
636 f"time data {time} does not match format "
637 '"%Y-%m-%d %H:%M:%S" or "%Y-%m-%d"'
638 )
639 else:
640 if len(time) == 19:
641 time = datetime.strptime(time, "%d-%m-%Y %H:%M:%S")
642 elif len(time) == 10:
643 time = datetime.strptime(time, "%d-%m-%Y")
644 else:
645 raise ValueError(
646 f"time data {time} does not match format "
647 '"%d-%m-%Y %H:%M:%S" or "%d-%m-%Y"'
648 )
649 return time
652def _process_boundary_condition_entry(entry: Dict, periods: Dict[str, datetime]):
653 """
654 The iMOD project file supports constants in lieu of IDFs.
656 Also process repeated stress periods (on a yearly basis): substitute the
657 original date here.
658 """
659 coords = {}
660 timestring = entry["time"]
662 # Resolve repeating periods first:
663 time = periods.get(timestring)
664 if time is not None:
665 repeat = time
666 else:
667 # this resolves e.g. "steady-state"
668 time = _process_time(timestring)
669 repeat = None
671 if time is None:
672 dims = ()
673 else:
674 dims = ("time",)
675 coords["time"] = time
677 # 0 signifies that the layer must be determined on the basis of
678 # bottom elevation and stage.
679 layer = entry["layer"]
680 if layer <= 0:
681 layer is None
682 else:
683 coords["layer"] = layer
684 dims = dims + ("layer",)
686 if "path" not in entry:
687 path = None
688 header = {"coords": coords}
689 value = entry["constant"]
690 else:
691 path = entry["path"]
692 header = imod.idf.header(path, pattern="{name}")
693 value = None
694 header["addition"] = entry["addition"]
695 header["factor"] = entry["factor"]
696 header["dims"] = dims
697 if layer is not None:
698 header["layer"] = layer
699 if time is not None:
700 header["time"] = time
702 return path, header, value, repeat
705def _open_boundary_condition_idf(
706 block_content, variables, periods: Dict[str, datetime]
707) -> Tuple[List[Dict[str, xr.DataArray]], List[datetime]]:
708 """
709 Read the variables specified from block_content.
710 """
711 n_system = block_content["n_system"]
712 n_time = len(block_content["time"])
713 n_total = n_system * n_time
715 das = [{} for _ in range(n_system)]
716 for variable in variables:
717 variable_content = block_content[variable]
719 n = len(variable_content)
720 if n != n_total:
721 raise ValueError(
722 f"Expected n_time * n_system = {n_time} * {n_system} = "
723 f"{n_total} entries for variable {variable}. Received: {n}"
724 )
726 # Group the paths and headers by system.
727 system_paths = defaultdict(list)
728 system_headers = defaultdict(list)
729 system_values = defaultdict(list)
730 all_repeats = set()
731 for i, entry in enumerate(variable_content):
732 path, header, value, repeat = _process_boundary_condition_entry(
733 entry, periods
734 )
735 header["name"] = variable
736 key = i % n_system
737 system_paths[key].append(path)
738 system_headers[key].append(header)
739 system_values[key].append(value)
740 if repeat:
741 all_repeats.add(repeat)
743 # Concat one system at a time.
744 for i, (paths, headers, values) in enumerate(
745 zip(system_paths.values(), system_headers.values(), system_values.values())
746 ):
747 das[i][variable] = _create_dataarray(paths, headers, values)
749 repeats = sorted(all_repeats)
750 return das, repeats
753def _read_package_gen(
754 block_content: Dict[str, Any], has_topbot: bool
755) -> List[Dict[str, Any]]:
756 out = []
757 for entry in block_content["gen"]:
758 gdf = imod.gen.read(entry["path"])
759 if has_topbot:
760 gdf["resistance"] = entry["factor"] * entry["addition"]
761 else:
762 gdf["multiplier"] = entry["factor"] * entry["addition"]
763 d = {
764 "geodataframe": gdf,
765 "layer": entry["layer"],
766 }
767 out.append(d)
768 return out
771def _read_package_ipf(
772 block_content: Dict[str, Any], periods: Dict[str, datetime]
773) -> Tuple[List[Dict[str, Any]], List[datetime]]:
774 out = []
775 repeats = []
776 for entry in block_content["ipf"]:
777 timestring = entry["time"]
778 layer = entry["layer"]
779 time = periods.get(timestring)
780 factor = entry["factor"]
781 addition = entry["addition"]
782 if time is None:
783 time = _process_time(timestring)
784 else:
785 repeats.append(time)
787 # Ensure the columns are identifiable.
788 path = Path(entry["path"])
789 ipf_df, indexcol, ext = imod.ipf._read_ipf(path)
790 if indexcol == 0:
791 # No associated files
792 columns = ("x", "y", "rate")
793 if layer <= 0:
794 df = ipf_df.iloc[:, :5]
795 columns = columns + ("top", "bottom")
796 else:
797 df = ipf_df.iloc[:, :3]
798 df.columns = columns
799 else:
800 dfs = []
801 for row in ipf_df.itertuples():
802 filename = row[indexcol]
803 path_assoc = path.parent.joinpath(f"{filename}.{ext}")
804 df_assoc = imod.ipf.read_associated(path_assoc).iloc[:, :2]
805 df_assoc.columns = ["time", "rate"]
806 df_assoc["x"] = row[1]
807 df_assoc["y"] = row[2]
808 df_assoc["id"] = path_assoc.stem
809 if layer <= 0:
810 df_assoc["top"] = row[4]
811 df_assoc["bottom"] = row[5]
812 dfs.append(df_assoc)
813 df = pd.concat(dfs, ignore_index=True, sort=False)
814 df["rate"] = df["rate"] * factor + addition
816 d = {
817 "dataframe": df,
818 "layer": layer,
819 "time": time,
820 }
821 out.append(d)
822 repeats = sorted(repeats)
823 return out, repeats
826def read_projectfile(path: FilePath) -> Dict[str, Any]:
827 """
828 Read an iMOD project file into a collection of nested dictionaries.
830 The top-level keys are the "topic" entries such "bnd" or "riv" in the
831 project file. An example structure of the dictionaries is visualized below:
833 .. code-block::
835 content
836 ├── bnd
837 │ ├── active: bool
838 │ └── ibound: list of dictionaries for each layer
839 ├── riv
840 │ ├── active: bool
841 │ ├── conductance: list of dictionaries for each time and layer.
842 │ ├── stage: idem.
843 │ ├── bottom_elevation: idem.
844 │ └── infiltration_factor: idem.
845 etc.
847 Time and layer are flattened into a single list and time is included in
848 every dictionary:
850 .. code-block::
852 stage
853 ├── 0 # First entry in list
854 │ ├── active: bool
855 │ ├── is_constant: bool
856 │ ├── layer: int
857 │ ├── factor: float
858 │ ├── addition: float
859 │ ├── constant: float
860 │ ├── path: str
861 │ └── time: str
862 │
863 ├── 1 # Second entry in list
864 │ ├── etc.
865 etc.
868 Parameters
869 ----------
870 path: str or Path
872 Returns
873 -------
874 content: Dict[str, Any]
875 """
876 # Force to Path
877 path = Path(path)
879 with open(path) as f:
880 lines = f.readlines()
882 tokenized = []
883 for i, line in enumerate(lines):
884 try:
885 tokenized.append(_tokenize(line))
886 except Exception as e:
887 raise type(e)(f"{e}\nError occurred in line {i}")
889 lines = _LineIterator(tokenized)
890 content = {}
891 wdir = path.parent
892 # Change dir temporarely to projectfile dir to resolve relative paths
893 with imod.util.cd(wdir):
894 while not lines.finished:
895 _parse_block(lines, content)
897 return content
900def open_projectfile_data(path: FilePath) -> Dict[str, Any]:
901 """
902 Read the contents of an iMOD project file and read/open the data present in
903 it:
905 * IDF data is lazily loaded into xarray.DataArrays.
906 * GEN data is eagerly loaded into geopandas.GeoDataFrames
907 * IPF data is eagerly loaded into pandas.DataFrames
908 * Non-file based entries (such as the PCG settings) are kept as a dictionary.
910 When multiple systems are present, they are numbered starting from one, e.g.:
912 * drn-1
913 * drn-2
915 Xarray requires valid dates for the time coordinate. Aliases such as
916 "summer" and "winter" that are associated with dates in the project file
917 Periods block cannot be used in the time coordinate. Hence, this function
918 will instead insert the dates associated with the aliases, with the year
919 replaced by 1899; as the iMOD calendar starts at 1900, this ensures that
920 the repeats are always first and that no date collisions will occur.
922 Parameters
923 ----------
924 path: pathlib.Path or str.
926 Returns
927 -------
928 data: Dict[str, Any]
929 Keys are the iMOD project file "topics", without parentheses.
930 """
931 content = read_projectfile(path)
932 periods_block = content.pop("periods", None)
933 if periods_block is None:
934 periods = {}
935 else:
936 # Set the year of a repeat date to 1899: this ensures it falls outside
937 # of the iMOD calendar. Collisions are then always avoided.
938 periods = {
939 key: _process_time(time, yearfirst=False).replace(year=1899)
940 for key, time in periods_block.items()
941 }
943 # Pop species block, at the moment we do not do much with,
944 # since most regional models are without solute transport
945 content.pop("species", None)
947 has_topbot = "(top)" in content and "(bot)" in content
948 prj_data = {}
949 repeat_stress = {}
950 for key, block_content in content.items():
951 repeats = None
952 try:
953 if key == "(hfb)":
954 data = _read_package_gen(block_content, has_topbot)
955 elif key == "(wel)":
956 data, repeats = _read_package_ipf(block_content, periods)
957 elif key == "(cap)":
958 variables = set(METASWAP_VARS).intersection(block_content.keys())
959 data = _open_package_idf(block_content, variables)
960 elif key in ("extra", "(pcg)"):
961 data = [block_content]
962 elif key in KEYS:
963 variables = KEYS[key]
964 data = _open_package_idf(block_content, variables)
965 elif key in DATE_KEYS:
966 variables = DATE_KEYS[key]
967 data, repeats = _open_boundary_condition_idf(
968 block_content, variables, periods
969 )
970 else:
971 raise KeyError(f"Unsupported key: '{key}'")
972 except Exception as e:
973 raise type(e)(f"{e}. Errored while opening/reading data entries for: {key}")
975 strippedkey = key.strip("(").strip(")")
976 if len(data) > 1:
977 for i, da in enumerate(data):
978 numbered_key = f"{strippedkey}-{i + 1}"
979 prj_data[numbered_key] = da
980 repeat_stress[numbered_key] = repeats
981 else:
982 prj_data[strippedkey] = data[0]
983 repeat_stress[strippedkey] = repeats
985 repeat_stress = {k: v for k, v in repeat_stress.items() if v}
986 return prj_data, repeat_stress
989def read_timfile(path: FilePath) -> List[Dict]:
990 def parsetime(time: str) -> datetime:
991 # Check for steady-state:
992 if time == "00000000000000":
993 return None
994 return datetime.strptime(time, "%Y%m%d%H%M%S")
996 with open(path, "r") as f:
997 lines = f.readlines()
999 # A line contains 2, 3, or 4 values:
1000 # time, isave, nstp, tmult
1001 casters = {
1002 "time": parsetime,
1003 "save": lambda x: bool(int(x)),
1004 "n_timestep": int,
1005 "timestep_multiplier": float,
1006 }
1007 content = []
1008 for line in lines:
1009 stripped = line.strip()
1010 if stripped == "":
1011 continue
1012 parts = stripped.split(",")
1013 entry = {k: cast(s.strip()) for s, (k, cast) in zip(parts, casters.items())}
1014 content.append(entry)
1016 return content