Coverage for C:\src\imod-python\imod\formats\array_io\writing.py: 96%
54 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 14:15 +0200
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 14:15 +0200
1import pathlib
3import xarray as xr
5import imod
8def _write_chunks(a, pattern, d, nodata, dtype, write):
9 """
10 This function writes one chunk of the DataArray 'a' at a time. This is
11 necessary to avoid heavily sub-optimal scheduling by xarray/dask when
12 writing data to idf's. The problem appears to be caused by the fact that
13 .groupby results in repeated computations for every single IDF chunk
14 (time and layer). Specifically, merging several subdomains with
15 open_subdomains, and then calling save ends up being extremely slow.
17 This functions avoids this by calling compute() on the individual chunk,
18 before writing (so the chunk therefore has to fit in memory). 'x' and 'y'
19 dimensions are not treated as chunks, as all values over x and y have to
20 end up in single IDF file.
22 The number of chunks is not known beforehand; it may vary per dimension,
23 and the number of dimensions may vary as well. The naive solution to this
24 is a variable number of for loops, writting explicitly beforehand. Instead,
25 this function uses recursion, selecting one chunk per dimension per time.
26 The base case is one where only a single chunks remains, and then the write
27 occurs (ignoring chunks in x and y).
28 """
29 dim = a.dims[0]
30 dim_is_xy = (dim == "x") or (dim == "y")
31 nochunks = a.chunks is None or max(map(len, a.chunks)) == 1
32 if nochunks or dim_is_xy: # Base case
33 a = a.compute()
34 extradims = list(filter(lambda dim: dim not in ("y", "x"), a.dims))
35 if extradims:
36 stacked = a.stack(idf=extradims)
37 for coordvals, a_yx in list(stacked.groupby("idf")):
38 # Groupby sometimes returns an extra 1-sized "idf" dim
39 if "idf" in a_yx.dims:
40 a_yx = a_yx.squeeze("idf")
41 # set the right layer/timestep/etc in the dict to make the filename
42 d.update(dict(zip(extradims, coordvals)))
43 fn = imod.util.path.compose(d, pattern)
44 write(fn, a_yx, nodata, dtype)
45 else:
46 fn = imod.util.path.compose(d, pattern)
47 write(fn, a, nodata, dtype)
48 else: # recursive case
49 for dim, chunksizes in zip(a.dims, a.chunks):
50 if len(chunksizes) > 1:
51 break
52 start = 0
53 for chunksize in chunksizes:
54 end = start + chunksize
55 b = a.isel({dim: slice(start, end)})
56 # Recurse
57 _write_chunks(b, pattern, d, nodata, dtype, write)
58 start = end
61def _save(path, a, nodata, pattern, dtype, write):
62 """
63 Write a xarray.DataArray to one or more IDF files
65 If the DataArray only has ``y`` and ``x`` dimensions, a single IDF file is
66 written, like the ``imod.idf.write`` function. This function is more general
67 and also supports ``time`` and ``layer`` dimensions. It will split these up,
68 give them their own filename according to the conventions in
69 ``imod.util.path.compose``, and write them each.
71 Parameters
72 ----------
73 path : str or Path
74 Path to the IDF file to be written. This function decides on the
75 actual filename(s) using conventions, so it only takes the directory and
76 name, and extension from this parameter.
77 a : xarray.DataArray
78 DataArray to be written. It needs to have dimensions ('y', 'x'), and
79 optionally ``layer`` and ``time``.
80 nodata : float, optional
81 Nodata value in the saved IDF files. Xarray uses nan values to represent
82 nodata, but these tend to work unreliably in iMOD(FLOW).
83 Defaults to a value of 1.0e20.
84 pattern : str
85 Format string which defines how to create the filenames. See examples.
86 write : function
87 Which function to use to write 2D arrays.
88 """
89 if not isinstance(a, xr.DataArray):
90 raise TypeError("Data to save must be an xarray.DataArray")
91 if a.dims[-2:] != ("y", "x"):
92 raise ValueError(
93 'Last two dimensions of DataArray to save must be ("y", "x"), '
94 f"found: {a.dims}"
95 )
97 path = pathlib.Path(path)
99 # A more flexible schema might be required to support additional variables
100 # such as species, for concentration. The straightforward way is by giving
101 # a format string, e.g.: {name}_{time}_l{layer}
102 # Find the vars in curly braces, and validate with da.coords
103 d = {"extension": path.suffix, "name": path.stem, "directory": path.parent}
104 d["directory"].mkdir(exist_ok=True, parents=True)
106 # Make sure all non-xy dims are ascending
107 # otherwise a.stack fails in _write_chunks.
108 # y has to be monotonic decreasing
109 flip = slice(None, None, -1)
110 for dim in a.dims:
111 if dim == "y":
112 if not a.indexes[dim].is_monotonic_decreasing:
113 a = a.isel({dim: flip})
114 else:
115 if not a.indexes[dim].is_monotonic_increasing:
116 a = a.isel({dim: flip})
118 # handle the case where they are not a dim but are a coord
119 # i.e. you only have one layer but you did a.assign_coords(layer=1)
120 # in this case we do want _l1 in the IDF file name
121 check_coords = ["layer", "time"]
122 for coord in check_coords:
123 if (coord in a.coords) and coord not in a.dims:
124 if coord == "time":
125 # .item() gives an integer for datetime64[ns], so convert first.
126 val = a.coords[coord].values
127 if not val == "steady-state":
128 val = a.coords[coord].values.astype("datetime64[us]").item()
129 else:
130 val = a.coords[coord].item()
131 d[coord] = val
133 # stack all non idf dims into one new idf dimension,
134 # over which we can then iterate to write all individual idfs
135 _write_chunks(a, pattern, d, nodata, dtype, write)