Coverage for C:\src\imod-python\imod\wq\pkgbase.py: 14%
264 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-08 10:26 +0200
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-08 10:26 +0200
1import abc
2import pathlib
3import warnings
5import jinja2
6import numpy as np
7import xarray as xr
9import imod
10from imod.util.time import forcing_starts_ends, to_datetime_internal
13class Package(abc.ABC):
14 """
15 Base package for the different SEAWAT packages.
16 Every package contains a ``_pkg_id`` for identification.
17 Used to check for duplicate entries, or to group multiple systems together
18 (riv, ghb, drn).
20 The ``_template`` attribute is the template for a section of the runfile.
21 This is filled in based on the metadata from the DataArrays that are within
22 the Package.
24 The ``_keywords`` attribute is a dictionary that's used to replace
25 keyword argument by integer arguments for SEAWAT.
26 """
28 @classmethod
29 def from_file(cls, path, **kwargs):
30 """
31 Loads an imod-wq package from a file (currently only netcdf and zarr are supported).
32 Note that it is expected that this file was saved with imod.wq.Package.save(),
33 as the checks upon package initialization are not done again!
35 Parameters
36 ----------
37 path : str, pathlib.Path
38 Path to the file.
39 **kwargs : keyword arguments
40 Arbitrary keyword arguments forwarded to ``xarray.open_dataset()``, or
41 ``xarray.open_zarr()``.
42 Refer to the examples.
44 Returns
45 -------
46 package : imod.wq.Package
47 Returns a package with data loaded from file.
49 Examples
50 --------
52 To load a package from a file, e.g. a River package:
54 >>> river = imod.wq.River.from_file("river.nc")
56 For large datasets, you likely want to process it in chunks. You can
57 forward keyword arguments to ``xarray.open_dataset()`` or
58 ``xarray.open_zarr()``:
60 >>> river = imod.wq.River.from_file("river.nc", chunks={"time": 1})
62 Refer to the xarray documentation for the possible keyword arguments.
63 """
65 path = pathlib.Path(path)
67 # See https://stackoverflow.com/a/2169191
68 # We expect the data in the netcdf has been saved a a package
69 # thus the checks run by __init__ and __setitem__ do not have
70 # to be called again.
71 return_cls = cls.__new__(cls)
73 if path.suffix in (".zip", ".zarr"):
74 # TODO: seems like a bug? Remove str() call if fixed in xarray/zarr
75 return_cls.dataset = xr.open_zarr(str(path), **kwargs)
76 else:
77 return_cls.dataset = xr.open_dataset(path, **kwargs)
79 return return_cls
81 def __init__(self):
82 self.dataset = xr.Dataset()
84 def __setitem__(self, key, value):
85 if isinstance(value, xr.DataArray):
86 if "z" in value.dims:
87 if "layer" not in value.coords:
88 raise ValueError(
89 'Coordinate "layer" must be present in DataArrays with a "z" dimension'
90 )
91 value = value.swap_dims({"z": "layer"})
92 if "layer" in value.dims:
93 value = value.dropna(dim="layer", how="all")
94 self.dataset.__setitem__(key, value)
96 def __getitem__(self, key):
97 return self.dataset.__getitem__(key)
99 def isel(self):
100 raise NotImplementedError(
101 f"Selection on packages not yet supported. "
102 f"To make a selection on the xr.Dataset, call {self._pkg_id}.dataset.isel instead. "
103 f"You can create a new package with a selection by calling {__class__.__name__}(**{self._pkg_id}.dataset.isel(**selection))"
104 )
106 def sel(self):
107 raise NotImplementedError(
108 f"Selection on packages not yet supported. "
109 f"To make a selection on the xr.Dataset, call {self._pkg_id}.dataset.sel instead. "
110 f"You can create a new package with a selection by calling {__class__.__name__}(**{self._pkg_id}.dataset.sel(**selection))"
111 )
113 def _replace_keyword(self, d, key):
114 """
115 Method to replace a readable keyword value by the corresponding cryptic
116 integer value that SEAWAT demands.
118 Dict ``d`` is updated in place.
120 Parameters
121 ----------
122 d : dict
123 Updated in place.
124 key : str
125 key of value in dict ``d`` to replace.
126 """
127 keyword = d[key][()] # Get value from 0d np.array
128 value = self._keywords[key][keyword]
129 d[key] = value
131 def _render(self, *args, **kwargs):
132 """
133 Rendering method for simple keyword packages (vdf, pcg).
135 Returns
136 -------
137 rendered : str
138 The rendered runfile part for a single boundary condition system.
139 """
140 d = {k: v.values for k, v in self.dataset.data_vars.items()} # pylint: disable=no-member
141 if hasattr(self, "_keywords"):
142 for key in self._keywords.keys():
143 self._replace_keyword(d, key)
144 return self._template.format(**d)
146 def _compose_path(self, d, pattern=None):
147 # d : dict
148 # pattern : string or re.pattern
149 return imod.util.path.compose(d, pattern).as_posix()
151 def _compress_idflayers(self, values, range_path):
152 """
153 Compresses explicit layers into ranges
155 Saves on number of lines, makes the runfile smaller, and imod-wq faster.
156 """
157 layers = np.array(list(values.keys()))
158 if len(layers) == 1:
159 return values
161 breaks = np.argwhere(np.diff(layers) != 1)
162 if breaks.size == 0:
163 start = layers[0]
164 end = layers[-1]
165 return {f"{start}:{end}": range_path}
166 starts = [0] + list(breaks[:, 0] + 1)
167 ends = list(breaks[:, 0]) + [len(layers) - 1]
169 compressed = {}
170 for start_index, end_index in zip(starts, ends):
171 start = layers[start_index]
172 end = layers[end_index]
173 if start == end:
174 compressed[f"{start}"] = values[start]
175 else:
176 compressed[f"{start}:{end}"] = range_path
178 return compressed
180 def _compose_values_layer(
181 self, varname, directory, nlayer, time=None, da=None, compress=True
182 ):
183 """
184 Composes paths to files, or gets the appropriate scalar value for
185 a single variable in a dataset.
187 Parameters
188 ----------
189 varname : str
190 variable name of the DataArray
191 directory : str
192 Path to working directory, where files will be written.
193 Necessary to generate the paths for the runfile.
194 time : datetime like, optional
195 Time corresponding to the value.
196 da : xr.DataArray, optional
197 In some cases fetching the DataArray by varname is not desired.
198 It can be passed directly via this optional argument.
199 compress : boolean
200 Whether or not to compress the layers using the imod-wq macros.
201 Should be disabled for time-dependent input.
203 Returns
204 -------
205 values : dict
206 Dictionary containing the {layer number: path to file}.
207 Alternatively: {layer number: scalar value}. The layer number may be
208 a wildcard (e.g. '?').
209 """
210 pattern = "{name}"
212 values = {}
213 if da is None:
214 da = self.dataset[varname]
216 d = {"directory": directory, "name": varname, "extension": ".idf"}
218 if "species" in da.coords:
219 d["species"] = da.coords["species"].values
220 pattern += "_c{species}"
222 if time is not None:
223 d["time"] = time
224 pattern += "_{time:%Y%m%d%H%M%S}"
226 # Scalar value or not?
227 # If it's a scalar value we can immediately write
228 # otherwise, we have to write a file path
229 if "y" not in da.coords and "x" not in da.coords:
230 idf = False
231 else:
232 idf = True
234 if "layer" not in da.coords:
235 if idf:
236 # Special case concentration
237 # Using "?" results in too many sinks and sources according to imod-wq.
238 pattern += "{extension}"
239 if hasattr(self, "_ssm_layers"):
240 for layer in self._ssm_layers:
241 d["layer"] = layer
242 values[layer] = self._compose_path(d, pattern=pattern)
243 else:
244 values["?"] = self._compose_path(d, pattern=pattern)
245 else:
246 # Special case concentration
247 # Using "?" results in too many sinks and sources according to imod-wq.
248 if hasattr(self, "_ssm_layers"):
249 for layer in self._ssm_layers:
250 values[layer] = da.values[()]
251 values = self._compress_values(values)
252 else:
253 values["?"] = da.values[()]
255 else:
256 pattern += "_l{layer}{extension}"
257 for layer in np.atleast_1d(da.coords["layer"].values):
258 if idf:
259 d["layer"] = layer
260 values[layer] = self._compose_path(d, pattern=pattern)
261 else:
262 if "layer" in da.dims:
263 values[layer] = da.sel(layer=layer).values[()]
264 else:
265 values[layer] = da.values[()]
267 # Compress the runfile contents using the imod-wq macros
268 if "layer" in da.dims:
269 if idf and da["layer"].size == nlayer:
270 # Compose does not accept non-integers, so use 0, then replace
271 d["layer"] = 0
272 token_path = imod.util.path.compose(d, pattern=pattern).as_posix()
273 token_path = token_path.replace("_l0", "_l$")
274 values = {"$": token_path}
275 elif idf and compress:
276 # Compose does not accept non-integers, so use 0, then replace
277 d["layer"] = 0
278 range_path = imod.util.path.compose(d, pattern=pattern).as_posix()
279 range_path = range_path.replace("_l0", "_l:")
280 values = self._compress_idflayers(values, range_path)
281 elif compress:
282 values = self._compress_values(values)
284 return values
286 def _compress_values(self, values):
287 """
288 Compress repeated values into fewer lines, aimed at imod-wq macros.
289 """
290 keys = list(values.keys())
291 values = np.array(list(values.values()))
292 n = len(values)
293 # Try fast method first
294 try:
295 index_ends = np.argwhere(np.diff(values) != 0)
296 except np.core._exceptions.UFuncTypeError:
297 # Now try a fully general method
298 index_ends = []
299 for i in range(n - 1):
300 if values[i] != values[i + 1]:
301 index_ends.append(i)
302 index_ends = np.array(index_ends)
304 index_ends = np.append(index_ends, n - 1)
305 index_starts = np.insert(index_ends[:-1] + 1, 0, 0)
306 compressed = {}
307 for start_index, end_index in zip(index_starts, index_ends):
308 s = keys[start_index]
309 e = keys[end_index]
310 value = values[start_index]
311 if s == e:
312 compressed[f"{s}"] = value
313 else:
314 compressed[f"{s}:{e}"] = value
315 return compressed
317 def _compose_values_time(self, varname, globaltimes):
318 da = self.dataset[varname]
319 values = {}
321 if "time" in da.coords:
322 package_times = da.coords["time"].values
324 starts_ends = forcing_starts_ends(package_times, globaltimes)
325 for itime, start_end in enumerate(starts_ends):
326 # TODO: this now fails on a non-dim time too
327 # solution roughly the same as for layer above?
328 values[start_end] = da.isel(time=itime).values[()]
329 else:
330 values["?"] = da.values[()]
332 values = self._compress_values(values)
333 return values
335 def save(self, directory):
336 for name, da in self.dataset.data_vars.items(): # pylint: disable=no-member
337 if "y" in da.coords and "x" in da.coords:
338 path = pathlib.Path(directory).joinpath(name)
339 imod.idf.save(path, da)
341 def _check_positive(self, varnames):
342 for var in varnames:
343 # Take care with nan values
344 if (self.dataset[var] < 0).any():
345 raise ValueError(f"{var} in {self} must be positive")
347 def _check_range(self, varname, lower, upper):
348 # TODO: this isn't used anywhere so far.
349 warn = False
350 msg = ""
351 if (self.dataset[varname] < lower).any():
352 warn = True
353 msg += f"{varname} in {self}: values lower than {lower} detected. "
354 if (self.dataset[varname] > upper).any():
355 warn = True
356 msg += f"{varname} in {self}: values higher than {upper} detected."
357 if warn:
358 warnings.warn(msg, RuntimeWarning)
360 def _check_location_consistent(self, varnames):
361 dims = set(self.dataset.dims)
362 is_scalar = {}
363 for var in varnames:
364 scalar = (self.dataset[var].shape == ()) or not any(
365 dim in self.dataset[var].dims for dim in ["time", "layer", "y", "x"]
366 )
367 if not scalar: # skip scalar value
368 dims = dims.intersection(self.dataset[var].dims)
369 is_scalar[var] = scalar
371 is_nan = True
372 for var in varnames:
373 if not is_scalar[var]: # skip scalar values
374 # dimensions cannot change for in-place operations
375 # reduce to lowest set of dimension (e.g. just x and y)
376 var_dims = set(self.dataset[var].dims)
377 reduce_dims = var_dims.difference(dims)
378 # Inplace boolean operator
379 is_nan &= np.isnan(self.dataset[var]).all(dim=reduce_dims)
381 for var in varnames:
382 if not is_scalar[var]: # skip scalar values
383 if (np.isnan(self.dataset[var]) ^ is_nan).any():
384 raise ValueError(
385 f"{var} in {self} is not consistent with all variables in: "
386 f"{', '.join(varnames)}. nan values do not line up."
387 )
390class BoundaryCondition(Package, abc.ABC):
391 """
392 Base package for (transient) boundary conditions:
393 * recharge
394 * general head boundary
395 * constant head
396 * river
397 * drainage
398 """
400 _template = jinja2.Template(
401 " {%- for name, dictname in mapping -%}"
402 " {%- for time, timedict in dicts[dictname].items() -%}"
403 " {%- for layer, value in timedict.items() %}\n"
404 " {{name}}_p{{time}}_s{{system_index}}_l{{layer}} = {{value}}\n"
405 " {%- endfor -%}\n"
406 " {%- endfor -%}"
407 " {%- endfor -%}"
408 )
409 _ssm_template = jinja2.Template(
410 "{%- for species, timedict in concentration.items() -%}"
411 " {%- for time, layerdict in timedict.items() -%}"
412 " {%- for layer, value in layerdict.items() %}\n"
413 " c{{pkg_id}}_t{{species}}_p{{time}}_l{{layer}} = {{value}}\n"
414 " {%- endfor -%}"
415 " {%- endfor -%}"
416 "{%- endfor -%}"
417 )
419 def add_timemap(self, *args, **kwargs):
420 import warnings
422 warnings.warn(
423 "add_timemap is deprecated: use repeat_stress instead", FutureWarning
424 )
425 self.repeat_stress(*args, **kwargs)
427 def _repeat_stress(self, varname, value, use_cftime):
428 if value is not None:
429 if varname not in self.dataset.data_vars:
430 raise ValueError(
431 f"{varname} does not occur in {self}\n cannot repeat stress"
432 )
433 if "time" not in self.dataset[varname].coords:
434 raise ValueError(
435 f"{varname} in {self}\n does not have dimension time, cannot repeat stress."
436 )
438 # Replace both key and value by the right datetime type
439 d = {
440 to_datetime_internal(k, use_cftime): to_datetime_internal(v, use_cftime)
441 for k, v in value.items()
442 }
443 self.dataset[varname].attrs["stress_repeats"] = d
445 def _compose_values_timelayer(
446 self, varname, globaltimes, directory, nlayer, da=None
447 ):
448 """
449 Composes paths to files, or gets the appropriate scalar value for
450 a single variable in a dataset.
452 Parameters
453 ----------
454 varname : str
455 variable name of the DataArray
456 globaltimes : list, np.array
457 Holds the global times, i.e. the combined unique times of
458 every boundary condition that are used to define the stress
459 periods.
460 The times of the BoundaryCondition do not have to match all
461 the global times. When a globaltime is not present in the
462 BoundaryCondition, the value of the first previous available time is
463 filled in. The effective result is a forward fill in time.
464 directory : str
465 Path to working directory, where files will be written.
466 Necessary to generate the paths for the runfile.
467 da : xr.DataArray, optional
468 In some cases fetching the DataArray by varname is not desired.
469 It can be passed directly via this optional argument.
471 Returns
472 -------
473 values : dict
474 Dictionary containing the {stress period number: {layer number: path
475 to file}}. Alternatively: {stress period number: {layer number: scalar
476 value}}.
477 The stress period number and layer number may be wildcards (e.g. '?').
478 """
480 values = {}
482 if da is None:
483 da = self.dataset[varname]
485 if "time" in da.coords:
486 da_times = da.coords["time"].values
487 if "stress_repeats" in da.attrs:
488 stress_repeats_keys = np.array(list(da.attrs["stress_repeats"].keys()))
489 stress_repeats_values = np.array(
490 list(da.attrs["stress_repeats"].values())
491 )
492 package_times, inds = np.unique(
493 np.concatenate([da_times, stress_repeats_keys]), return_index=True
494 )
495 # Times to write in the runfile
496 runfile_times = np.concatenate([da_times, stress_repeats_values])[inds]
497 else:
498 runfile_times = package_times = da_times
500 starts_ends = forcing_starts_ends(package_times, globaltimes)
502 for time, start_end in zip(runfile_times, starts_ends):
503 # Check whether any range occurs in the input.
504 # If does does, compress should be False
505 compress = not (":" in start_end)
506 values[start_end] = self._compose_values_layer(
507 varname,
508 directory,
509 nlayer=nlayer,
510 time=time,
511 da=da,
512 compress=compress,
513 )
515 else:
516 values["?"] = self._compose_values_layer(
517 varname, directory, nlayer=nlayer, da=da
518 )
520 return values
522 def _max_active_n(self, varname, nlayer, nrow, ncol):
523 """
524 Determine the maximum active number of cells that are active
525 during a stress period.
527 Parameters
528 ----------
529 varname : str
530 name of the variable to use to calculate the maximum number of
531 active cells. Generally conductance.
532 shape : tuple of ints
533 nlay, nrow, ncol taken from ibound.
534 """
535 # First compute active number of cells
536 if "time" in self.dataset[varname].coords:
537 nmax = int(self.dataset[varname].groupby("time").count(xr.ALL_DIMS).max())
538 else:
539 nmax = int(self[varname].count())
540 if "layer" not in self.dataset.coords: # Then it applies to every layer
541 nmax *= nlayer
542 self._cellcount = nmax # Store cellcount so it can be re-used for ssm.
543 self._ssm_cellcount = nmax
545 # Second, compute active number of sinks and sources
546 # overwite _ssm_cellcount if more specific info is available.
547 if "concentration" in self.dataset.data_vars:
548 da = self["concentration"]
550 if "species" in da.coords:
551 nspecies = da.coords["species"].size
552 else:
553 nspecies = 1
555 if "y" not in da.coords and "x" not in da.coords:
556 # It's not idf data, but scalar instead
557 if "layer" in self.dataset.coords:
558 # Store layers for rendering
559 da_nlayer = self.dataset.coords["layer"].size
560 if da_nlayer == nlayer:
561 # Insert wildcard
562 self._ssm_layers = ["?"]
563 else:
564 self._ssm_layers = self.dataset.coords["layer"].values
565 nlayer = da_nlayer
566 # Sinks and sources are applied everywhere
567 # in contrast to other inputs
568 nmax = nlayer * nrow * ncol
570 self._ssm_cellcount = nmax * nspecies
572 return nmax
574 def _render(self, directory, globaltimes, system_index, nlayer):
575 """
576 Parameters
577 ----------
578 directory : str
579 Path to working directory, where files will be written.
580 Necessary to generate the paths for the runfile.
581 globaltimes : list, np.array
582 Holds the global times, i.e. the combined unique times of
583 every boundary condition that are used to define the stress
584 periods.
585 system_index : int
586 Drainage, River, and GeneralHeadBoundary support multiple systems.
587 This is the number that ends up in the runfile for a given
588 BoundaryCondition object.
589 Note that MT3DMS does not fully support multiple systems, and only
590 the first system can act as source of species.
592 Returns
593 -------
594 rendered : str
595 The rendered runfile part for a single boundary condition system.
596 """
597 mapping = tuple(
598 [(k, v) for k, v in self._mapping if v in self.dataset.data_vars]
599 )
600 d = {"mapping": mapping, "system_index": system_index}
601 dicts = {}
603 for varname in self.dataset.data_vars.keys(): # pylint: disable=no-member
604 if varname == "concentration":
605 continue
606 dicts[varname] = self._compose_values_timelayer(
607 varname, globaltimes, directory, nlayer=nlayer
608 )
610 d["dicts"] = dicts
612 return self._template.render(d)
614 def _render_ssm(self, directory, globaltimes, nlayer):
615 """
616 Parameters
617 ----------
618 directory : str
619 Path to working directory, where files will be written.
620 Necessary to generate the paths for the runfile.
621 globaltimes : list, np.array
622 Holds the global times, i.e. the combined unique times of
623 every boundary condition that are used to define the stress
624 periods.
626 Returns
627 -------
628 rendered : str
629 The rendered runfile SSM part for a single boundary condition system.
630 """
632 if "concentration" not in self.dataset.data_vars:
633 return ""
635 d = {"pkg_id": self._pkg_id}
636 if "species" in self.dataset["concentration"].coords:
637 concentration = {}
638 for species in self.dataset["concentration"]["species"].values:
639 concentration[species] = self._compose_values_timelayer(
640 varname="concentration",
641 da=self.dataset["concentration"].sel(species=species),
642 globaltimes=globaltimes,
643 directory=directory,
644 nlayer=nlayer,
645 )
646 else:
647 concentration = {
648 1: self._compose_values_timelayer(
649 varname="concentration",
650 da=self.dataset["concentration"],
651 globaltimes=globaltimes,
652 directory=directory,
653 nlayer=nlayer,
654 )
655 }
656 d["concentration"] = concentration
658 return self._ssm_template.render(d)