Coverage for C:\src\imod-python\imod\formats\array_io\writing.py: 96%

54 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-08 14:15 +0200

1import pathlib 

2 

3import xarray as xr 

4 

5import imod 

6 

7 

8def _write_chunks(a, pattern, d, nodata, dtype, write): 

9 """ 

10 This function writes one chunk of the DataArray 'a' at a time. This is 

11 necessary to avoid heavily sub-optimal scheduling by xarray/dask when 

12 writing data to idf's. The problem appears to be caused by the fact that 

13 .groupby results in repeated computations for every single IDF chunk 

14 (time and layer). Specifically, merging several subdomains with 

15 open_subdomains, and then calling save ends up being extremely slow. 

16 

17 This functions avoids this by calling compute() on the individual chunk, 

18 before writing (so the chunk therefore has to fit in memory). 'x' and 'y' 

19 dimensions are not treated as chunks, as all values over x and y have to 

20 end up in single IDF file. 

21 

22 The number of chunks is not known beforehand; it may vary per dimension, 

23 and the number of dimensions may vary as well. The naive solution to this 

24 is a variable number of for loops, writting explicitly beforehand. Instead, 

25 this function uses recursion, selecting one chunk per dimension per time. 

26 The base case is one where only a single chunks remains, and then the write 

27 occurs (ignoring chunks in x and y). 

28 """ 

29 dim = a.dims[0] 

30 dim_is_xy = (dim == "x") or (dim == "y") 

31 nochunks = a.chunks is None or max(map(len, a.chunks)) == 1 

32 if nochunks or dim_is_xy: # Base case 

33 a = a.compute() 

34 extradims = list(filter(lambda dim: dim not in ("y", "x"), a.dims)) 

35 if extradims: 

36 stacked = a.stack(idf=extradims) 

37 for coordvals, a_yx in list(stacked.groupby("idf")): 

38 # Groupby sometimes returns an extra 1-sized "idf" dim 

39 if "idf" in a_yx.dims: 

40 a_yx = a_yx.squeeze("idf") 

41 # set the right layer/timestep/etc in the dict to make the filename 

42 d.update(dict(zip(extradims, coordvals))) 

43 fn = imod.util.path.compose(d, pattern) 

44 write(fn, a_yx, nodata, dtype) 

45 else: 

46 fn = imod.util.path.compose(d, pattern) 

47 write(fn, a, nodata, dtype) 

48 else: # recursive case 

49 for dim, chunksizes in zip(a.dims, a.chunks): 

50 if len(chunksizes) > 1: 

51 break 

52 start = 0 

53 for chunksize in chunksizes: 

54 end = start + chunksize 

55 b = a.isel({dim: slice(start, end)}) 

56 # Recurse 

57 _write_chunks(b, pattern, d, nodata, dtype, write) 

58 start = end 

59 

60 

61def _save(path, a, nodata, pattern, dtype, write): 

62 """ 

63 Write a xarray.DataArray to one or more IDF files 

64 

65 If the DataArray only has ``y`` and ``x`` dimensions, a single IDF file is 

66 written, like the ``imod.idf.write`` function. This function is more general 

67 and also supports ``time`` and ``layer`` dimensions. It will split these up, 

68 give them their own filename according to the conventions in 

69 ``imod.util.path.compose``, and write them each. 

70 

71 Parameters 

72 ---------- 

73 path : str or Path 

74 Path to the IDF file to be written. This function decides on the 

75 actual filename(s) using conventions, so it only takes the directory and 

76 name, and extension from this parameter. 

77 a : xarray.DataArray 

78 DataArray to be written. It needs to have dimensions ('y', 'x'), and 

79 optionally ``layer`` and ``time``. 

80 nodata : float, optional 

81 Nodata value in the saved IDF files. Xarray uses nan values to represent 

82 nodata, but these tend to work unreliably in iMOD(FLOW). 

83 Defaults to a value of 1.0e20. 

84 pattern : str 

85 Format string which defines how to create the filenames. See examples. 

86 write : function 

87 Which function to use to write 2D arrays. 

88 """ 

89 if not isinstance(a, xr.DataArray): 

90 raise TypeError("Data to save must be an xarray.DataArray") 

91 if a.dims[-2:] != ("y", "x"): 

92 raise ValueError( 

93 'Last two dimensions of DataArray to save must be ("y", "x"), ' 

94 f"found: {a.dims}" 

95 ) 

96 

97 path = pathlib.Path(path) 

98 

99 # A more flexible schema might be required to support additional variables 

100 # such as species, for concentration. The straightforward way is by giving 

101 # a format string, e.g.: {name}_{time}_l{layer} 

102 # Find the vars in curly braces, and validate with da.coords 

103 d = {"extension": path.suffix, "name": path.stem, "directory": path.parent} 

104 d["directory"].mkdir(exist_ok=True, parents=True) 

105 

106 # Make sure all non-xy dims are ascending 

107 # otherwise a.stack fails in _write_chunks. 

108 # y has to be monotonic decreasing 

109 flip = slice(None, None, -1) 

110 for dim in a.dims: 

111 if dim == "y": 

112 if not a.indexes[dim].is_monotonic_decreasing: 

113 a = a.isel({dim: flip}) 

114 else: 

115 if not a.indexes[dim].is_monotonic_increasing: 

116 a = a.isel({dim: flip}) 

117 

118 # handle the case where they are not a dim but are a coord 

119 # i.e. you only have one layer but you did a.assign_coords(layer=1) 

120 # in this case we do want _l1 in the IDF file name 

121 check_coords = ["layer", "time"] 

122 for coord in check_coords: 

123 if (coord in a.coords) and coord not in a.dims: 

124 if coord == "time": 

125 # .item() gives an integer for datetime64[ns], so convert first. 

126 val = a.coords[coord].values 

127 if not val == "steady-state": 

128 val = a.coords[coord].values.astype("datetime64[us]").item() 

129 else: 

130 val = a.coords[coord].item() 

131 d[coord] = val 

132 

133 # stack all non idf dims into one new idf dimension, 

134 # over which we can then iterate to write all individual idfs 

135 _write_chunks(a, pattern, d, nodata, dtype, write)