Coverage for C:\src\imod-python\imod\prepare\common.py: 97%
356 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-08 13:27 +0200
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-08 13:27 +0200
1"""
2Common methods used for interpolation, voxelization.
4Includes methods for dealing with different coordinates and dimensions of the
5xarray.DataArrays, as well as aggregation methods operating on weights and
6values.
7"""
9import cftime
10import numba
11import numpy as np
13import imod
16@numba.njit
17def _starts(src_x, dst_x):
18 """
19 Calculate regridding weights for a single dimension
21 Parameters
22 ----------
23 src_x : np.array
24 vertex coordinates of source
25 dst_x: np.array
26 vertex coordinates of destination
27 """
28 i = 0
29 j = 0
30 while i < dst_x.size - 1:
31 x = dst_x[i]
32 while j < src_x.size:
33 if src_x[j] > x:
34 out = max(j - 1, 0)
35 yield (i, out)
36 break
37 else:
38 j += 1
39 i += 1
42def _weights_1d(src_x, dst_x, use_relative_weights=False):
43 """
44 Calculate regridding weights and indices for a single dimension
46 Parameters
47 ----------
48 src_x : np.array
49 vertex coordinates of source
50 dst_x: np.array
51 vertex coordinates of destination
53 Returns
54 -------
55 max_len : int
56 maximum number of source cells to a single destination cell for this
57 dimension
58 dst_inds : list of int
59 destination cell index
60 src_inds: list of list of int
61 source cell index, per destination index
62 weights : list of list of float
63 weight of source cell, per destination index
64 """
65 max_len = 0
66 dst_inds = []
67 src_inds = []
68 weights = []
69 rel_weights = []
71 # i is index of dst
72 # j is index of src
73 for i, j in _starts(src_x, dst_x):
74 dst_x0 = dst_x[i]
75 dst_x1 = dst_x[i + 1]
77 _inds = []
78 _weights = []
79 _rel_weights = []
80 has_value = False
81 while j < src_x.size - 1:
82 src_x0 = src_x[j]
83 src_x1 = src_x[j + 1]
84 overlap = _overlap((dst_x0, dst_x1), (src_x0, src_x1))
85 # No longer any overlap, continue to next dst cell
86 if overlap == 0:
87 break
88 else:
89 has_value = True
90 _inds.append(j)
91 _weights.append(overlap)
92 relative_overlap = overlap / (src_x1 - src_x0)
93 _rel_weights.append(relative_overlap)
94 j += 1
95 if has_value:
96 dst_inds.append(i)
97 src_inds.append(_inds)
98 weights.append(_weights)
99 rel_weights.append(_rel_weights)
100 # Save max number of source cells
101 # So we know how much to pre-allocate later on
102 inds_len = len(_inds)
103 if inds_len > max_len:
104 max_len = inds_len
106 # Convert all output to numpy arrays
107 # numba does NOT like arrays or lists in tuples
108 # Compilation time goes through the roof
109 nrow = len(dst_inds)
110 ncol = max_len
111 np_dst_inds = np.array(dst_inds)
113 np_src_inds = np.full((nrow, ncol), -1)
114 for i in range(nrow):
115 for j, ind in enumerate(src_inds[i]):
116 np_src_inds[i, j] = ind
118 np_weights = np.full((nrow, ncol), 0.0)
119 if use_relative_weights:
120 weights = rel_weights
121 for i in range(nrow):
122 for j, ind in enumerate(weights[i]):
123 np_weights[i, j] = ind
125 return max_len, (np_dst_inds, np_src_inds, np_weights)
128def _reshape(src, dst, ndim_regrid):
129 """
130 If ndim > ndim_regrid, the non regridding dimension are combined into
131 a single dimension, so we can use a single loop, irrespective of the
132 total number of dimensions.
133 (The alternative is pre-writing N for-loops for every N dimension we
134 intend to support.)
135 If ndims == ndim_regrid, all dimensions will be used in regridding
136 in that case no looping over other dimensions is required and we add
137 a dummy dimension here so there's something to iterate over.
138 """
139 src_shape = src.shape
140 dst_shape = dst.shape
141 ndim = len(src_shape)
143 if ndim == ndim_regrid:
144 n_iter = 1
145 else:
146 n_iter = int(np.product(src_shape[:-ndim_regrid]))
148 src_itershape = (n_iter, *src_shape[-ndim_regrid:])
149 dst_itershape = (n_iter, *dst_shape[-ndim_regrid:])
151 iter_src = np.reshape(src, src_itershape)
152 iter_dst = np.reshape(dst, dst_itershape)
154 return iter_src, iter_dst
157def _is_subset(a1, a2):
158 if np.in1d(a2, a1).all():
159 # This means all are present
160 # now check if it's an actual subset
161 # Generate number, and fetch only those present
162 idx = np.arange(a1.size)[np.in1d(a1, a2)]
163 if idx.size > 1:
164 increment = np.diff(idx)
165 # If the maximum increment is only 1, it's a subset
166 if increment.max() == 1:
167 return True
168 return False
171def _match_dims(src, like):
172 """
173 Parameters
174 ----------
175 source : xr.DataArray
176 The source DataArray to be regridded
177 like : xr.DataArray
178 Example DataArray that shows what the resampled result should look like
179 in terms of coordinates. `source` is regridded along dimensions of `like`
180 that have the same name, but have different values.
182 Returns
183 -------
184 matching_dims, regrid_dims, add_dims : tuple of lists
185 matching_dims: dimensions along which the coordinates match exactly
186 regrid_dims: dimensions along which source will be regridded
187 add_dims: dimensions that are not present in like
189 """
190 # TODO: deal with different extent?
191 # Do another check if not identical
192 # Check if subset or superset?
193 matching_dims = []
194 regrid_dims = []
195 add_dims = []
196 for dim in src.dims:
197 if dim not in like.dims:
198 add_dims.append(dim)
199 elif src[dim].size == 0: # zero overlap
200 regrid_dims.append(dim)
201 else:
202 try:
203 a1 = _coord(src, dim)
204 a2 = _coord(like, dim)
205 if np.array_equal(a1, a2) or _is_subset(a1, a2):
206 matching_dims.append(dim)
207 else:
208 regrid_dims.append(dim)
209 except TypeError:
210 first_type = type(like[dim].values[0])
211 if issubclass(first_type, (cftime.datetime, np.datetime64)):
212 raise RuntimeError(
213 "cannot regrid over datetime dimensions. "
214 "Use xarray.Dataset.resample() instead"
215 )
217 ndim_regrid = len(regrid_dims)
218 # Check number of dimension to regrid
219 if ndim_regrid > 3:
220 raise NotImplementedError("cannot regrid over more than three dimensions")
222 return matching_dims, regrid_dims, add_dims
225def _increasing_dims(da, dims):
226 flip_dims = []
227 for dim in dims:
228 if not da.indexes[dim].is_monotonic_increasing:
229 flip_dims.append(dim)
230 da = da.isel({dim: slice(None, None, -1)})
231 return da, flip_dims
234def _selection_indices(src_x, xmin, xmax, extra_overlap):
235 """Left-inclusive"""
236 # Extra overlap is needed, for example with (multi)linear interpolation
237 # We simply enlarge the slice at the start and at the end.
238 i0 = max(0, np.searchsorted(src_x, xmin, side="right") - 1 - extra_overlap)
239 i1 = np.searchsorted(src_x, xmax, side="left") + extra_overlap
240 return i0, i1
243def _slice_src(src, like, extra_overlap):
244 """
245 Make sure src matches dst in dims that do not have to be regridded
246 """
247 matching_dims, regrid_dims, _ = _match_dims(src, like)
248 dims = matching_dims + regrid_dims
250 slices = {}
251 for dim in dims:
252 # Generate vertices
253 src_x = _coord(src, dim)
254 _, xmin, xmax = imod.util.spatial.coord_reference(like[dim])
255 i0, i1 = _selection_indices(src_x, xmin, xmax, extra_overlap)
256 slices[dim] = slice(i0, i1)
257 return src.isel(slices)
260def _dst_coords(src, like, dims_from_src, dims_from_like):
261 """
262 Gather destination coordinates
263 """
265 dst_da_coords = {}
266 dst_shape = []
267 # TODO: do some more checking, more robust handling
268 like_coords = dict(like.coords)
269 for dim in dims_from_src:
270 try:
271 like_coords.pop(dim)
272 except KeyError:
273 pass
274 dst_da_coords[dim] = src[dim].values
275 dst_shape.append(src[dim].size)
276 for dim in dims_from_like:
277 try:
278 like_coords.pop(dim)
279 except KeyError:
280 pass
281 dst_da_coords[dim] = like[dim].values
282 dst_shape.append(like[dim].size)
284 dst_da_coords.update(like_coords)
285 return dst_da_coords, dst_shape
288def _check_monotonic(dxs, dim):
289 # use xor to check if one or the other
290 if not ((dxs > 0.0).all() ^ (dxs < 0.0).all()):
291 raise ValueError(f"{dim} is not only increasing or only decreasing")
294def _set_cellsizes(da, dims):
295 for dim in dims:
296 dx_string = f"d{dim}"
297 if dx_string not in da.coords:
298 dx, _, _ = imod.util.spatial.coord_reference(da.coords[dim])
299 if isinstance(dx, (int, float)):
300 dx = np.full(da.coords[dim].size, dx)
301 da = da.assign_coords({dx_string: (dim, dx)})
302 return da
305def _set_scalar_cellsizes(da):
306 for dim in da.dims:
307 dx_string = f"d{dim}"
308 if dx_string in da.coords:
309 dx = da.coords[dx_string]
310 # Ensure no leftover coordinates in scalar
311 if dx.ndim == 0: # Catch case where dx already is a scalar
312 dx_scalar = dx.values[()]
313 else:
314 dx_scalar = dx.values[0]
315 if np.allclose(dx, dx_scalar):
316 da = da.assign_coords({dx_string: dx_scalar})
317 return da
320def _coord(da, dim):
321 """
322 Transform N xarray midpoints into N + 1 vertex edges
323 """
324 delta_dim = "d" + dim # e.g. dx, dy, dz, etc.
326 # If empty array, return empty
327 if da[dim].size == 0:
328 return np.array(())
330 if delta_dim in da.coords: # equidistant or non-equidistant
331 dx = da[delta_dim].values
332 if dx.shape == () or dx.shape == (1,): # scalar -> equidistant
333 dxs = np.full(da[dim].size, dx)
334 else: # array -> non-equidistant
335 dxs = dx
336 _check_monotonic(dxs, dim)
338 else: # not defined -> equidistant
339 if da[dim].size == 1:
340 raise ValueError(
341 f"DataArray has size 1 along {dim}, so cellsize must be provided"
342 " as a coordinate."
343 )
344 dxs = np.diff(da[dim].values)
345 dx = dxs[0]
346 atolx = abs(1.0e-4 * dx)
347 if not np.allclose(dxs, dx, atolx):
348 raise ValueError(
349 f"DataArray has to be equidistant along {dim}, or cellsizes"
350 " must be provided as a coordinate."
351 )
352 dxs = np.full(da[dim].size, dx)
354 dxs = np.abs(dxs)
355 x = da[dim].values
356 if not da.indexes[dim].is_monotonic_increasing:
357 x = x[::-1]
358 dxs = dxs[::-1]
360 # This assumes the coordinate to be monotonic increasing
361 x0 = x[0] - 0.5 * dxs[0]
362 x = np.full(dxs.size + 1, x0)
363 x[1:] += np.cumsum(dxs)
364 return x
367def _define_single_dim_slices(src_x, dst_x, chunksizes):
368 n = len(chunksizes)
369 if not n > 0:
370 raise ValueError("n must be larger than zero")
371 if n == 1:
372 return [slice(None, None)]
374 chunk_indices = np.full(n + 1, 0)
375 chunk_indices[1:] = np.cumsum(chunksizes)
376 # Find locations to cut.
377 src_chunk_x = src_x[chunk_indices]
378 if dst_x[0] < src_chunk_x[0]:
379 src_chunk_x[0] = dst_x[0]
380 if dst_x[-1] > src_chunk_x[-1]:
381 src_chunk_x[-1] = dst_x[-1]
382 # Destinations should NOT have any overlap
383 # Sources may have overlap
384 # We find the most suitable places to cut.
385 dst_i = np.searchsorted(dst_x, src_chunk_x, "left")
386 dst_i[dst_i > dst_x.size - 1] = dst_x.size - 1
388 # Create slices, but only if start and end are different
389 # (otherwise, the slice would be empty)
390 dst_slices = [slice(s, e) for s, e in zip(dst_i[:-1], dst_i[1:]) if s != e]
391 return dst_slices
394def _define_slices(src, like):
395 """
396 Defines the slices for every dimension, based on the chunks that are
397 present within src.
399 First, we get a single list of chunks per dimension.
400 Next, these are expanded into an N-dimensional array, equal to the number
401 of dimensions that have chunks.
402 Finally, these arrays are ravelled, and stacked for easier iteration.
403 """
404 dst_dim_slices = []
405 dst_chunks_shape = []
406 for dim, chunksizes in zip(src.dims, src.chunks):
407 if dim in like.dims:
408 dst_slices = _define_single_dim_slices(
409 _coord(src, dim), _coord(like, dim), chunksizes
410 )
411 dst_dim_slices.append(dst_slices)
412 dst_chunks_shape.append(len(dst_slices))
414 dst_expanded_slices = np.stack(
415 [a.ravel() for a in np.meshgrid(*dst_dim_slices, indexing="ij")], axis=-1
416 )
417 return dst_expanded_slices, dst_chunks_shape
420def _sel_chunks(da, dims, expanded_slices):
421 """
422 Using the slices created with the functions above, use xarray's index
423 selection methods to create a list of "like" DataArrays which are used
424 to inform the regridding. During the regrid() call of the
425 imod.prepare.Regridder object, data from the input array is selected,
426 ideally one chunk at time, or 2 ** ndim_chunks if there is overlap
427 required due to cellsize differences.
428 """
429 das = []
430 for dim_slices in expanded_slices:
431 slice_dict = {}
432 for dim, dim_slice in zip(dims, dim_slices):
433 slice_dict[dim] = dim_slice
434 das.append(da.isel(**slice_dict))
435 return das
438def _get_method(method, methods):
439 if isinstance(method, str):
440 try:
441 _method = methods[method]
442 except KeyError as e:
443 raise ValueError(
444 "Invalid regridding method. Available methods are: {}".format(
445 methods.keys()
446 )
447 ) from e
448 elif callable(method):
449 _method = method
450 else:
451 raise TypeError("method must be a string or rasterio.enums.Resampling")
452 return _method
455@numba.njit
456def _overlap(a, b):
457 return max(0, min(a[1], b[1]) - max(a[0], b[0]))
460def mean(values, weights):
461 vsum = 0.0
462 wsum = 0.0
463 for i in range(values.size):
464 v = values[i]
465 w = weights[i]
466 if np.isnan(v):
467 continue
468 vsum += w * v
469 wsum += w
470 if wsum == 0:
471 return np.nan
472 else:
473 return vsum / wsum
476def harmonic_mean(values, weights):
477 v_agg = 0.0
478 w_sum = 0.0
479 for i in range(values.size):
480 v = values[i]
481 w = weights[i]
482 if np.isnan(v) or v == 0:
483 continue
484 if w > 0:
485 w_sum += w
486 v_agg += w / v
487 if v_agg == 0 or w_sum == 0:
488 return np.nan
489 else:
490 return w_sum / v_agg
493def geometric_mean(values, weights):
494 v_agg = 0.0
495 w_sum = 0.0
497 # Compute sum to ormalize weights to avoid tiny or huge values in exp
498 normsum = 0.0
499 for i in range(values.size):
500 normsum += weights[i]
501 # Early return if no values
502 if normsum == 0:
503 return np.nan
505 for i in range(values.size):
506 w = weights[i] / normsum
507 v = values[i]
508 # Skip if v == 0, v is NaN or w == 0 (no contribution)
509 if v > 0 and w > 0:
510 v_agg += w * np.log(abs(v))
511 w_sum += w
512 # Do not reduce over negative values: would require complex numbers.
513 elif v < 0:
514 return np.nan
516 if w_sum == 0:
517 return np.nan
518 else:
519 return np.exp((1.0 / w_sum) * v_agg)
522def sum(values, weights):
523 v_sum = 0.0
524 w_sum = 0.0
525 for i in range(values.size):
526 v = values[i]
527 w = weights[i]
528 if np.isnan(v):
529 continue
530 v_sum += v
531 w_sum += w
532 if w_sum == 0:
533 return np.nan
534 else:
535 return v_sum
538def minimum(values, weights):
539 return np.nanmin(values)
542def maximum(values, weights):
543 return np.nanmax(values)
546def mode(values, weights):
547 # Area weighted mode
548 # Reuse weights to do counting: no allocations
549 # The alternative is defining a separate frequency array in which to add
550 # the weights. This implementation is less efficient in terms of looping.
551 # With many unique values, it keeps having to loop through a big part of
552 # the weights array... but it would do so with a separate frequency array
553 # as well. There are somewhat more elements to traverse in this case.
554 s = values.size
555 w_sum = 0
556 for i in range(s):
557 v = values[i]
558 w = weights[i]
559 if np.isnan(v):
560 continue
561 w_sum += 1
562 for j in range(i): # Compare with previously found values
563 if values[j] == v: # matches previous value
564 weights[j] += w # increase previous weight
565 break
567 if w_sum == 0: # It skipped everything: only nodata values
568 return np.nan
569 else: # Find value with highest frequency
570 w_max = 0
571 for i in range(s):
572 w = weights[i]
573 if w > w_max:
574 w_max = w
575 v = values[i]
576 return v
579def median(values, weights):
580 return np.nanpercentile(values, 50)
583def conductance(values, weights):
584 v_agg = 0.0
585 w_sum = 0.0
586 for i in range(values.size):
587 v = values[i]
588 w = weights[i]
589 if np.isnan(v):
590 continue
591 v_agg += v * w
592 w_sum += w
593 if w_sum == 0:
594 return np.nan
595 else:
596 return v_agg
599def max_overlap(values, weights):
600 max_w = 0.0
601 v = np.nan
602 for i in range(values.size):
603 w = weights[i]
604 if w > max_w:
605 max_w = w
606 v = values[i]
607 return v
610METHODS = {
611 "nearest": "nearest",
612 "multilinear": "multilinear",
613 "mean": mean,
614 "harmonic_mean": harmonic_mean,
615 "geometric_mean": geometric_mean,
616 "sum": sum,
617 "minimum": minimum,
618 "maximum": maximum,
619 "mode": mode,
620 "median": median,
621 "conductance": conductance,
622 "max_overlap": max_overlap,
623}