Coverage for C:\src\imod-python\imod\formats\idf.py: 85%
225 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 14:15 +0200
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 14:15 +0200
1"""
2Functions for reading and writing iMOD Data Files (IDFs) to ``xarray`` objects.
4The primary functions to use are :func:`imod.idf.open` and
5:func:`imod.idf.save`, though lower level functions are also available.
6"""
8import glob
9import pathlib
10import struct
11import warnings
12from collections import defaultdict
13from collections.abc import Iterable
14from pathlib import Path
15from re import Pattern
16from typing import Any
18import numpy as np
19import xarray as xr
21import imod
22from imod.formats import array_io
23from imod.typing.structured import merge_partitions
25# Make sure we can still use the built-in function...
26f_open = open
29def header(path, pattern):
30 """Read the IDF header information into a dictionary"""
31 attrs = imod.util.path.decompose(path, pattern)
32 with f_open(path, "rb") as f:
33 reclen_id = struct.unpack("i", f.read(4))[0] # Lahey RecordLength Ident.
34 if reclen_id == 1271:
35 floatsize = intsize = 4
36 floatformat = "f"
37 intformat = "i"
38 dtype = "float32"
39 doubleprecision = False
40 # 2296 was a typo in the iMOD manual. Keep 2296 around in case some IDFs
41 # were written with this identifier to avoid possible incompatibility
42 # issues.
43 elif reclen_id == 2295 or reclen_id == 2296:
44 floatsize = intsize = 8
45 floatformat = "d"
46 intformat = "q"
47 dtype = "float64"
48 doubleprecision = True
49 else:
50 raise ValueError(
51 f"Not a supported IDF file: {path}\n"
52 "Record length identifier should be 1271 or 2295, "
53 f"received {reclen_id} instead."
54 )
56 # Header is fully doubled in size in case of double precision ...
57 # This means integers are also turned into 8 bytes
58 # and requires padding with some additional bytes
59 if doubleprecision:
60 f.read(4) # not used
62 ncol = struct.unpack(intformat, f.read(intsize))[0]
63 nrow = struct.unpack(intformat, f.read(intsize))[0]
64 attrs["xmin"] = struct.unpack(floatformat, f.read(floatsize))[0]
65 attrs["xmax"] = struct.unpack(floatformat, f.read(floatsize))[0]
66 attrs["ymin"] = struct.unpack(floatformat, f.read(floatsize))[0]
67 attrs["ymax"] = struct.unpack(floatformat, f.read(floatsize))[0]
68 # dmin and dmax are recomputed during writing
69 f.read(floatsize) # dmin, minimum data value present
70 f.read(floatsize) # dmax, maximum data value present
71 nodata = struct.unpack(floatformat, f.read(floatsize))[0]
72 attrs["nodata"] = nodata
73 # flip definition here such that True means equidistant
74 # equidistant IDFs
75 ieq = not struct.unpack("?", f.read(1))[0]
76 itb = struct.unpack("?", f.read(1))[0]
78 f.read(2) # not used
79 if doubleprecision:
80 f.read(4) # not used
82 if ieq:
83 # dx and dy are stored positively in the IDF
84 # dy is made negative here to be consistent with the nonequidistant case
85 attrs["dx"] = struct.unpack(floatformat, f.read(floatsize))[0]
86 attrs["dy"] = -struct.unpack(floatformat, f.read(floatsize))[0]
88 if itb:
89 attrs["top"] = struct.unpack(floatformat, f.read(floatsize))[0]
90 attrs["bot"] = struct.unpack(floatformat, f.read(floatsize))[0]
92 if not ieq:
93 # dx and dy are stored positive in the IDF, but since the difference between
94 # successive y coordinates is negative, it is made negative here
95 attrs["dx"] = np.fromfile(f, dtype, ncol)
96 attrs["dy"] = -np.fromfile(f, dtype, nrow)
98 # These are derived, remove after using them downstream
99 attrs["headersize"] = f.tell()
100 attrs["ncol"] = ncol
101 attrs["nrow"] = nrow
102 attrs["dtype"] = dtype
104 return attrs
107def _read(path, headersize, nrow, ncol, nodata, dtype):
108 """
109 Read a single IDF file to a numpy.ndarray
111 Parameters
112 ----------
113 path : str or Path
114 Path to the IDF file to be read
115 headersize : int
116 byte size of header
117 nrow : int
118 ncol : int
119 nodata : np.float
121 Returns
122 -------
123 numpy.ndarray
124 A float numpy.ndarray with shape (nrow, ncol) of the values
125 in the IDF file. On opening all nodata values are changed
126 to NaN in the numpy.ndarray.
127 """
128 with f_open(path, "rb") as f:
129 f.seek(headersize)
130 a = np.reshape(np.fromfile(f, dtype, nrow * ncol), (nrow, ncol))
131 return array_io.reading._to_nan(a, nodata)
134def read(path, pattern=None):
135 """
136 Read a single IDF file to a numpy.ndarray
138 Parameters
139 ----------
140 path : str or Path
141 Path to the IDF file to be read
142 pattern : str, regex pattern, optional
143 If the filenames do match default naming conventions of
144 {name}_{time}_l{layer}, a custom pattern can be defined here either
145 as a string, or as a compiled regular expression pattern. Please refer
146 to the examples for ``imod.idf.open``.
148 Returns
149 -------
150 numpy.ndarray
151 A float numpy.ndarray with shape (nrow, ncol) of the values
152 in the IDF file. On opening all nodata values are changed
153 to NaN in the numpy.ndarray.
154 dict
155 A dict with all metadata.
156 """
157 warnings.warn(
158 "The idf.read() function is deprecated. To get a numpy array of an IDF, "
159 "use instead: imod.idf.open(path).values",
160 FutureWarning,
161 )
162 attrs = header(path, pattern)
163 headersize = attrs.pop("headersize")
164 nrow = attrs.pop("nrow")
165 ncol = attrs.pop("ncol")
166 nodata = attrs.pop("nodata")
167 dtype = attrs.pop("dtype")
168 return _read(path, headersize, nrow, ncol, nodata, dtype), attrs
171# Open IDFs for multiple times and/or layers into one DataArray
172def open(path, use_cftime=False, pattern=None):
173 r"""
174 Open one or more IDF files as an xarray.DataArray.
176 In accordance with xarray's design, ``open`` loads the data of IDF files
177 lazily. This means the data of the IDFs are not loaded into memory until the
178 data is needed. This allows for easier handling of large datasets, and
179 more efficient computations.
181 Parameters
182 ----------
183 path : str, Path or list
184 This can be a single file, 'head_l1.idf', a glob pattern expansion,
185 'head_l*.idf', or a list of files, ['head_l1.idf', 'head_l2.idf'].
186 Note that each file needs to be of the same name (part before the
187 first underscore) but have a different layer and/or timestamp,
188 such that they can be combined in a single xarray.DataArray.
189 use_cftime : bool, optional
190 Use ``cftime.DatetimeProlepticGregorian`` instead of `np.datetime64[ns]`
191 for the time axis.
193 Dates are normally encoded as ``np.datetime64[ns]``; however, if dates
194 fall before 1678 or after 2261, they are automatically encoded as
195 ``cftime.DatetimeProlepticGregorian`` objects rather than
196 ``np.datetime64[ns]``.
197 pattern : str, regex pattern, optional
198 If the filenames do match default naming conventions of
199 {name}_{time}_l{layer}, a custom pattern can be defined here either
200 as a string, or as a compiled regular expression pattern. See the
201 examples below.
203 Returns
204 -------
205 xarray.DataArray
206 A float xarray.DataArray of the values in the IDF file(s).
207 All metadata needed for writing the file to IDF or other formats
208 using imod.rasterio are included in the xarray.DataArray.attrs.
210 Examples
211 --------
212 Open an IDF file:
214 >>> da = imod.idf.open("example.idf")
216 Open an IDF file, relying on default naming conventions to identify
217 layer:
219 >>> da = imod.idf.open("example_l1.idf")
221 Open an IDF file, relying on default naming conventions to identify layer
222 and time:
224 >>> head = imod.idf.open("head_20010101_l1.idf")
226 To ignore the naming conventions, specify ``pattern="{name}"``. This will
227 disable parsing of the filename into xarray coordinates.
229 >>> head = imod.idf.open("head_20010101_l1.idf", pattern="{name}")
231 Open multiple IDF files, in this case files for the year 2001 for all
232 layers, again relying on default conventions for naming:
234 >>> head = imod.idf.open("head_2001*_l*.idf")
236 The same, this time explicitly specifying ``name``, ``time``, and ``layer``:
238 >>> head = imod.idf.open("head_2001*_l*.idf", pattern="{name}_{time}_l{layer}")
240 The format string pattern will only work on tidy paths, where variables are
241 separated by underscores. You can also pass a compiled regex pattern.
242 Make sure to include the ``re.IGNORECASE`` flag since all paths are lowered.
244 >>> import re
245 >>> pattern = re.compile(r"(?P<name>[\w]+)L(?P<layer>[\d+]*)", re.IGNORECASE)
246 >>> head = imod.idf.open("headL11", pattern=pattern)
248 However, this requires constructing regular expressions, which is
249 generally a fiddly process. Regex notation is also impossible to
250 remember. The website https://regex101.com is a nice help. Alternatively,
251 the most pragmatic solution may be to just rename your files.
252 """
253 return array_io.reading._open(path, use_cftime, pattern, header, _read)
256def _more_than_one_unique_value(values: Iterable[Any]):
257 """Returns if more than one unique value in list"""
258 return len(set(values)) != 1
261def open_subdomains(
262 path: str | Path, use_cftime: bool = False, pattern: str | Pattern = None
263) -> xr.DataArray:
264 """
265 Combine IDF files of multiple subdomains.
267 Parameters
268 ----------
269 path : str or Path
270 Global path.
271 use_cftime : bool, optional
272 pattern : str, regex pattern, optional
273 If no pattern is provided, the function will first try:
274 "{name}_c{species}_{time}_l{layer}_p{subdomain}"
275 and if that fails:
276 "{name}_{time}_l{layer}_p{subdomain}"
277 Following the iMOD5/iMOD-WQ filename conventions.
279 Returns
280 -------
281 xarray.DataArray
283 """
284 paths = sorted(glob.glob(str(path)))
286 if pattern is None:
287 # If no pattern provided test if
288 pattern = "{name}_c{species}_{time}_l{layer}_p{subdomain}"
289 re_pattern_species = imod.util.path._custom_pattern_to_regex_pattern(pattern)
290 has_species = re_pattern_species.search(paths[0])
291 if not has_species:
292 pattern = "{name}_{time}_l{layer}_p{subdomain}"
294 parsed = [imod.util.path.decompose(path, pattern) for path in paths]
295 grouped = defaultdict(list)
296 for match, path in zip(parsed, paths):
297 try:
298 key = match["subdomain"]
299 except KeyError as e:
300 raise KeyError(f"{e} in path: {path} with pattern: {pattern}")
301 grouped[key].append(path)
303 n_idf_per_subdomain = {
304 subdomain_id: len(path_ls) for subdomain_id, path_ls in grouped.items()
305 }
306 if _more_than_one_unique_value(n_idf_per_subdomain.values()):
307 raise ValueError(
308 f"Each subdomain must have the same number of IDF files, found: {n_idf_per_subdomain}"
309 )
311 das = []
312 for pathlist in grouped.values():
313 da = open(pathlist, use_cftime=use_cftime, pattern=pattern)
314 da = da.isel(subdomain=0, drop=True)
315 das.append(da)
317 name = das[0].name
318 return merge_partitions(das)[name] # as DataArray for backwards compatibility
321def open_dataset(globpath, use_cftime=False, pattern=None):
322 """
323 Open a set of IDFs to a dict of xarray.DataArrays.
325 Compared to imod.idf.open, this function lets you open multiple parameters
326 at once (for example kh values and starting heads of a model), which will
327 each be a separate entry in a dictionary, with as key the parameter name,
328 and as value the xarray.DataArray.
330 Parameters
331 ----------
332 globpath : str or Path
333 A glob pattern expansion such as ``'model/**/*.idf'``, which recursively
334 finds all IDF files under the model directory. Note that files with
335 the same name (part before the first underscore) wil be combined into
336 a single xarray.DataArray.
337 use_cftime : bool, optional
338 Use ``cftime.DatetimeProlepticGregorian`` instead of `np.datetime64[ns]`
339 for the time axis.
341 Dates are normally encoded as ``np.datetime64[ns]``; however, if dates
342 fall before 1679 or after 2262, they are automatically encoded as
343 ``cftime.DatetimeProlepticGregorian`` objects rather than
344 ``np.datetime64[ns]``.
345 pattern : str, regex pattern, optional
346 If the filenames do match default naming conventions of
347 {name}_{time}_l{layer}, a custom pattern can be defined here either
348 as a string, or as a compiled regular expression pattern. Please refer
349 to the examples for ``imod.idf.open``.
351 Returns
352 -------
353 dictionary
354 Dictionary of str (parameter name) to xarray.DataArray.
355 All metadata needed for writing the file to IDF or other formats
356 using imod.rasterio are included in the xarray.DataArray.attrs.
357 """
359 # convert since for Path.glob non-relative patterns are unsupported
360 if isinstance(globpath, pathlib.Path):
361 globpath = str(globpath)
363 paths = [pathlib.Path(p) for p in glob.glob(globpath, recursive=True)]
365 n = len(paths)
366 if n == 0:
367 raise FileNotFoundError("Could not find any files matching {}".format(globpath))
368 # group the DataArrays together using their name
369 # note that directory names are ignored, and in case of duplicates, the last one wins
370 names = [imod.util.path.decompose(path, pattern)["name"] for path in paths]
371 unique_names = list(np.unique(names))
372 d = {}
373 for n in unique_names:
374 d[n] = [] # prepare empty lists to append to
375 for p, n in zip(paths, names):
376 d[n].append(p)
378 # load each group into a DataArray
379 das = [
380 array_io.reading._load(v, use_cftime, pattern, _read, header)
381 for v in d.values()
382 ]
384 # store each DataArray under it's own name in a dictionary
385 dd = {da.name: da for da in das}
386 # Initially I wanted to return a xarray Dataset here,
387 # but then realised that it is not always aligned, and therefore not possible, see
388 # https://github.com/pydata/xarray/issues/1471#issuecomment-313719395
389 # It is not aligned when some parameters only have a non empty subset of a dimension,
390 # such as L2 + L3. This dict provides a similar interface anyway. If a Dataset is constructed
391 # from unaligned DataArrays it will make copies of the data, which we don't want.
392 return dd
395def write(path, a, nodata=1.0e20, dtype=np.float32):
396 """
397 Write a 2D xarray.DataArray to a IDF file
399 Parameters
400 ----------
401 path : str or Path
402 Path to the IDF file to be written
403 a : xarray.DataArray
404 DataArray to be written. It needs to have exactly a.dims == ('y', 'x').
405 nodata : float, optional
406 Nodata value in the saved IDF files. Xarray uses nan values to represent
407 nodata, but these tend to work unreliably in iMOD(FLOW).
408 Defaults to a value of 1.0e20.
409 dtype : type, ``{np.float32, np.float64}``, default is ``np.float32``.
410 Whether to write single precision (``np.float32``) or double precision
411 (``np.float64``) IDF files.
412 """
413 if not isinstance(a, xr.DataArray):
414 raise TypeError("Data to write must be an xarray.DataArray")
415 if not a.dims == ("y", "x"):
416 raise ValueError(
417 f"Dimensions must be exactly ('y', 'x'). Received {a.dims} instead."
418 )
420 flip = slice(None, None, -1)
421 if not a.indexes["x"].is_monotonic_increasing:
422 a = a.isel(x=flip)
423 if not a.indexes["y"].is_monotonic_decreasing:
424 a = a.isel(y=flip)
425 # TODO: check is_monotonic, but also for single col/row idfs...
427 # Header is fully doubled in size in case of double precision ...
428 # This means integers are also turned into 8 bytes
429 # and requires padding with some additional bytes
430 data_dtype = a.dtype
431 if dtype == np.float64:
432 if data_dtype != np.float64:
433 a = a.astype(np.float64)
434 reclenid = 2295
435 floatformat = "d"
436 intformat = "q"
437 doubleprecision = True
438 elif dtype == np.float32:
439 reclenid = 1271
440 floatformat = "f"
441 intformat = "i"
442 doubleprecision = False
443 if data_dtype != np.float32:
444 a = a.astype(np.float32)
445 else:
446 raise ValueError("Invalid dtype, IDF allows only np.float32 and np.float64")
448 # Only fillna if data can contain na values
449 if (data_dtype == np.float32) or (data_dtype == np.float64):
450 a = a.fillna(nodata)
452 with f_open(path, "wb") as f:
453 f.write(struct.pack("i", reclenid)) # Lahey RecordLength Ident.
454 if doubleprecision:
455 f.write(struct.pack("i", reclenid))
456 nrow = a.y.size
457 ncol = a.x.size
458 f.write(struct.pack(intformat, ncol))
459 f.write(struct.pack(intformat, nrow))
461 dx, xmin, xmax, dy, ymin, ymax = imod.util.spatial.spatial_reference(a)
463 f.write(struct.pack(floatformat, xmin))
464 f.write(struct.pack(floatformat, xmax))
465 f.write(struct.pack(floatformat, ymin))
466 f.write(struct.pack(floatformat, ymax))
467 f.write(struct.pack(floatformat, float(a.min()))) # dmin
468 f.write(struct.pack(floatformat, float(a.max()))) # dmax
469 f.write(struct.pack(floatformat, nodata))
471 if isinstance(dx, float) and isinstance(dy, float):
472 ieq = True # equidistant
473 else:
474 ieq = False # nonequidistant
475 f.write(struct.pack("?", not ieq)) # ieq
477 itb = False
478 if "z" in a.coords and "dz" in a.coords:
479 z = a.coords["z"]
480 dz = abs(a.coords["dz"])
481 try:
482 top = float(z + 0.5 * dz)
483 bot = float(z - 0.5 * dz)
484 itb = True
485 except TypeError: # not a scalar value
486 pass
488 f.write(struct.pack("?", itb))
489 f.write(struct.pack("xx")) # not used
490 if doubleprecision:
491 f.write(struct.pack("xxxx")) # not used
493 if ieq:
494 f.write(struct.pack(floatformat, abs(dx)))
495 f.write(struct.pack(floatformat, abs(dy)))
496 if itb:
497 f.write(struct.pack(floatformat, top))
498 f.write(struct.pack(floatformat, bot))
499 if not ieq:
500 np.abs(a.coords["dx"].values).astype(a.dtype).tofile(f)
501 np.abs(a.coords["dy"].values).astype(a.dtype).tofile(f)
502 a.values.tofile(f)
505def _as_voxeldata(a):
506 """
507 If "z" is present as a dimension, generate layer if necessary. Ensure that
508 layer is the dimension (via swap_dims). Infer "dz" if necessary, and if
509 possible.
511 Parameters
512 ----------
513 a : xr.DataArray
515 Returns
516 -------
517 a : xr.DataArray
518 copy of input a, with swapped dims and dz added, if necessary.
519 """
520 # Avoid side-effects
521 a = a.copy()
523 if "z" in a.coords:
524 if "z" in a.dims: # it's definitely 1D
525 # have to swap it with layer in this case
526 if "layer" not in a.coords:
527 a = a.assign_coords(layer=("z", np.arange(1, a["z"].size + 1)))
528 a = a.swap_dims({"z": "layer"})
530 # It'll raise an Error if it cannot infer dz
531 if "dz" not in a.coords:
532 dz, _, _ = imod.util.spatial.coord_reference(a["z"])
533 if isinstance(dz, float):
534 a = a.assign_coords(dz=dz)
535 else:
536 a = a.assign_coords(dz=("layer", dz))
538 elif len(a["z"].shape) == 1: # one dimensional
539 if "layer" in a.coords:
540 # Check if z depends only on layer
541 if tuple(a["z"].indexes.keys()) == ("layer",):
542 if "dz" not in a.coords:
543 # It'll raise an Error if it cannot infer dz
544 dz, _, _ = imod.util.spatial.coord_reference(a["z"])
545 if isinstance(dz, float):
546 a = a.assign_coords(dz=dz)
547 else:
548 a = a.assign_coords(dz=("layer", dz))
549 return a
552def save(path, a, nodata=1.0e20, pattern=None, dtype=np.float32):
553 """
554 Write a xarray.DataArray to one or more IDF files
556 If the DataArray only has ``y`` and ``x`` dimensions, a single IDF file is
557 written, like the ``imod.idf.write`` function. This function is more general
558 and also supports ``time`` and ``layer`` dimensions. It will split these up,
559 give them their own filename according to the conventions in
560 ``imod.util.path.compose``, and write them each.
562 Parameters
563 ----------
564 path : str or Path
565 Path to the IDF file to be written. This function decides on the
566 actual filename(s) using conventions.
567 a : xarray.DataArray
568 DataArray to be written. It needs to have dimensions ('y', 'x'), and
569 optionally ``layer`` and ``time``.
570 nodata : float, optional
571 Nodata value in the saved IDF files. Xarray uses nan values to represent
572 nodata, but these tend to work unreliably in iMOD(FLOW).
573 Defaults to a value of 1.0e20.
574 pattern : str
575 Format string which defines how to create the filenames. See examples.
576 dtype : type, ``{np.float32, np.float64}``, default is ``np.float32``.
577 Whether to write single precision (``np.float32``) or double precision
578 (``np.float64``) IDF files.
580 Example
581 -------
582 Consider a DataArray ``da`` that has dimensions ``('layer', 'y', 'x')``, with the
583 layer dimension consisting of layer 1 and 2:
585 >>> imod.idf.save('path/to/head', da)
587 This writes the following two IDF files: ``path/to/head_l1.idf`` and
588 ``path/to/head_l2.idf``.
590 To disable adding coordinates to the files, specify ``pattern="{name}"``:
592 >>> imod.idf.save('path/to/head', da, pattern="{name}")
594 The ".idf" extension will always be added automatically.
596 It is possible to generate custom filenames using a format string. The
597 default filenames would be generated by the following format string:
599 >>> imod.idf.save("example", pattern="{name}_l{layer}{extension}")
601 If you desire zero-padded numbers that show up neatly sorted in a
602 file manager, you may specify:
604 >>> imod.idf.save("example", pattern="{name}_l{layer:02d}{extension}")
606 In this case, a 0 will be padded for single digit numbers ('1' will become
607 '01').
609 To get a date with dashes, use the following pattern:
611 >>> pattern="{name}_{time:%Y-%m-%d}_l{layer}{extension}"
613 """
615 # Cast datatype if necessary
616 if dtype not in (np.float32, np.float64):
617 raise ValueError("Invalid dtype, IDF allows only np.float32 and np.float64")
619 # Swap coordinates if possible, add "dz" if possible.
620 a = _as_voxeldata(a)
622 # Deal with path
623 path = pathlib.Path(path)
625 if path.suffix == "":
626 path = path.with_suffix(".idf")
628 array_io.writing._save(path, a, nodata, pattern, dtype, write)