Coverage for src/scores/continuous/isoreg.py: 100%
163 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-22 10:48 +1100
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-22 10:48 +1100
1"""
2An implementation of isotonic regression using the pool adjacent violators (PAV)
3algorithm. In the context of forecast verification, the regression explanatory variable
4is the forecast and the response variable is the observation. Forecasts and observations
5are assumed to be real-valued quantities.
7Isotonic regression exposes conditional forecast biases. Confidence bands for the
8regression fit can also be returned. This implementation includes option to specify what
9functional the forecast is targeting, so that appropriate regression can be performed on
10different types of real-valued forecasts, such as those which are a quantile of the
11predictive distribution.
12"""
14from functools import partial
15from typing import Callable, Literal, Optional, Tuple, Union
17import numpy as np
18import xarray as xr
19from scipy import interpolate
20from sklearn.isotonic import IsotonicRegression
23def isotonic_fit( # pylint: disable=too-many-locals, too-many-arguments
24 fcst: Union[np.ndarray, xr.DataArray],
25 obs: Union[np.ndarray, xr.DataArray],
26 weight: Optional[Union[np.ndarray, xr.DataArray]] = None,
27 functional: Optional[Literal["mean", "quantile"]] = "mean",
28 bootstraps: Optional[int] = None,
29 quantile_level: Optional[float] = None,
30 solver: Optional[Union[Callable[[np.ndarray, np.ndarray], float], Callable[[np.ndarray], float]]] = None,
31 confidence_level: Optional[float] = 0.9,
32 min_non_nan: Optional[int] = 1,
33 report_bootstrap_results: Optional[bool] = False,
34) -> dict:
35 """
36 Calculates an isotonic regression fit to observations given forecasts as the
37 explanatory values. Optionally returns confidence bands on the fit via bootstrapping.
38 Forecasts and observations are scalar-valued (i.e, integers, floats or NaN).
40 If forecasts target the mean or a quantile functional, then the user can select that
41 functional. Otherwise the user needs to supply a `solver` function
43 - of one variable (if no weights are supplied), or
44 - of two variables (if weights are supplied).
46 The solver function is used to find an optimal regression fit in each block, subject
47 to a non-decreasing monotonicity constraint, using the pool adjacent violators (PAV)
48 algorithm.
50 An example of a solver function of one variable is obs -> np.mean(obs)
51 An example of a solver function of two variables is (obs, weight) -> np.average(obs, weights=weight).
53 Ties (where one forecast value has more than one observation value) are handled as follows.
54 Forecasts are sorted in ascending order, and observations sorted to match. Where ties occur,
55 observations are sorted in descending order within each subset of ties. Isotonic regression
56 is then performed on this sorted observation sequence.
58 This implementation uses sklearn.isotonic.IsotonicRegression when `functional="mean"`.
60 Args:
61 fcst: np.array or xr.DataArray of forecast (or explanatory) values. Values must
62 be float, integer or NaN.
63 obs: np.array or xr.DataArray of corresponding observation (or response) values.
64 Must be the same shape as `fcst`. Values must be float, integer or NaN.
65 weight: positive weights to apply to each forecast-observation pair. Must be the
66 same shape as `fcst`, or `None` (which is equivalent to applying equal weights).
67 functional: Functional that the forecasts are targeting. Either "mean" or
68 "quantile" or `None`. If `None` then `solver` must be supplied. The current
69 implementation for "quantile" does not accept weights. If weighted quantile
70 regression is desired then the user should should supply an appropriate solver.
71 bootstraps: the number of bootstrap samples to perform for calculating the
72 regression confidence band. Set to `None` if a confidence band is not required.
73 quantile_level: the level of the quantile functional if `functional='quantile'`.
74 Must be strictly between 0 and 1.
75 solver: function that accepts 1D numpy array of observations and returns
76 a float. Function values give the regression fit for each block as determined
77 by the PAV algorithm. If weights are supplied, the function must have a second
78 argument that accepts 1D numpy array of weights of same length as the observations.
79 confidence_level: Confidence level of the confidence band, strictly between 0 and 1.
80 For example, a confidence level of 0.5 will calculate the confidence band by
81 computing the 25th and 75th percentiles of the bootstrapped samples.
82 min_non_nan: The minimum number of non-NaN bootstrapped values to calculate
83 confidence bands at a particular forecast value.
84 report_bootstrap_results: This specifies to whether keep the bootstrapped
85 regression values in return dictionary. Default is False to keep the output
86 result small.
88 Returns:
89 Dictionary with the following keys:
91 - "unique_fcst_sorted": 1D numpy array of remaining forecast values sorted in
92 ascending order, after any NaNs from `fcst`, `obs` and `weight` are removed,
93 and only unique values are kept to keep the output size reasonably small.
94 - "fcst_counts": 1D numpy array of forecast counts for unique values of forecast sorted
95 - "regression_values": 1D numpy array of regression values corresponding to
96 "unique_fcst_sorted" values.
97 - "regression_func": function that returns the regression fit based on linear
98 interpolation of ("fcst_sorted", "regression_values"), for any supplied
99 argument (1D numpy array) of potential forecast values.
100 - "bootstrap_results": in the case of `report_bootstrap_results=True`, 2D numpy
101 array of bootstrapped regression values is included in return dictionary.
102 Each row gives the interpolated regression values from a particular bootstrapped
103 sample, evaluated at "fcst_sorted" values. If `m` is the number of bootstrap
104 samples and `n = len(fcst_sorted)` then it is has shape `(m, n)`. We emphasise
105 that this array corresponds to `fcst_sorted` not `unique_fcst_sorted`.
106 - "confidence_band_lower_values": values of lower confidence band threshold, evaluated
107 at "unique_fcst_sorted" values.
108 - "confidence_band_upper_values": values of upper confidence band threshold, evaluated
109 at "unique_fcst_sorted" values.
110 - "confidence_band_lower_func": function that returns regression fit based on linear
111 interpolation of ("fcst_sorted", "confidence_band_lower_values"), given any
112 argument (1D numpy array) of potential forecast values.
113 - "confidence_band_upper_func": function that returns regression fit based on linear
114 interpolation of ("fcst_sorted", "confidence_band_upper_values"), given any
115 argument (1D numpy array) of potential forecast values.
116 - "confidence_band_levels": tuple giving the quantile levels used to calculate the
117 confidence band.
119 Raises:
120 ValueError: if `fcst` and `obs` are np.arrays and don't have the same shape.
121 ValueError: if `fcst` and `weight` are np.arrays and don't have the same shape.
122 ValueError: if `fcst` and `obs` are xarray.DataArrays and don't have the same dimensions.
123 ValueError: if `fcst` and `weight` are xarray.DataArrays and don't have the same dimensions.
124 ValueError: if any entries of `fcst`, `obs` or `weight` are not an integer, float or NaN.
125 ValueError: if there are no `fcst` and `obs` pairs after NaNs are removed.
126 ValueError: if `functional` is not one of "mean", "quantile" or `None`.
127 ValueError: if `quantile_level` is not strictly between 0 and 1 when `functional="quantile"`.
128 ValueError: if `weight` is not `None` when `functional="quantile"`.
129 ValueError: if `functional` and `solver` are both `None`.
130 ValueError: if not exactly one of `functional` and `solver` is `None`.
131 ValueError: if `bootstraps` is not a positive integer.
132 ValueError: if `confidence_level` is not strictly between 0 and 1.
134 Note: This function only keeps the unique values of `fcst_sorted` to keep the volume of
135 the return dictionary small. The forecast counts is also included, so users can it to
136 create forecast histogram (usually displayed in the reliability diagrams).
138 References
139 - de Leeuw, Hornik and Mair. "Isotone Optimization in R: Pool-Adjacent-Violators Algorithm (PAVA)
140 and Active Set Methods", Journal of Statistical Software, 2009.
141 - Dimitriadis, Gneiting and Jordan. "Stable reliability diagrams for probabilistic classifiers",
142 PNAS, Vol. 118 No. 8, 2020. Available at https://www.pnas.org/doi/10.1073/pnas.2016191118
143 - Jordan, Mühlemann, and Ziegel. "Optimal solutions to the isotonic regression problem",
144 2020 (version 2), available on arxiv at https://arxiv.org/abs/1904.04761
145 """
147 if isinstance(fcst, xr.DataArray):
148 fcst, obs, weight = _xr_to_np(fcst, obs, weight)
149 # now fcst, obs and weight (unless None) are np.arrays
151 _iso_arg_checks(
152 fcst,
153 obs,
154 weight,
155 functional,
156 quantile_level,
157 solver,
158 bootstraps,
159 confidence_level,
160 )
161 fcst_tidied, obs_tidied, weight_tidied = _tidy_ir_inputs(fcst, obs, weight)
162 y_out = _do_ir(fcst_tidied, obs_tidied, weight_tidied, functional, quantile_level, solver)
164 # calculate the fitting function
165 ir_func = interpolate.interp1d(fcst_tidied, y_out, bounds_error=False)
167 if bootstraps is not None:
168 boot_results = _bootstrap_ir(
169 fcst=fcst_tidied,
170 obs=obs_tidied,
171 weight=weight_tidied,
172 functional=functional,
173 quantile_level=quantile_level,
174 solver=solver,
175 bootstraps=bootstraps,
176 )
178 lower_pts, upper_pts = _confidence_band(boot_results, confidence_level, min_non_nan) # type: ignore
180 lower_func = interpolate.interp1d(fcst_tidied, lower_pts, bounds_error=False)
181 upper_func = interpolate.interp1d(fcst_tidied, upper_pts, bounds_error=False)
183 confband_levels = ((1 - confidence_level) / 2, 1 - (1 - confidence_level) / 2) # type: ignore
185 else:
186 boot_results = lower_pts = upper_pts = None
187 lower_func = upper_func = partial(np.full_like, fill_value=np.nan)
188 confband_levels = (None, None) # type: ignore
189 # To reduce the size of output dictionary, we only keep the unique values of
190 # forecasts and accordingly regression and confidence band values calculate by using
191 # unique forecast values. We also calculate forecast counts that can be used to create
192 # the forecast histogram.
193 unique_fcst_sorted, fcst_counts = np.unique(fcst_tidied, return_counts=True)
194 regression_values = ir_func(unique_fcst_sorted)
195 if bootstraps:
196 lower_pts = lower_func(unique_fcst_sorted)
197 upper_pts = upper_func(unique_fcst_sorted)
198 results = {
199 "fcst_sorted": unique_fcst_sorted,
200 "fcst_counts": fcst_counts,
201 "regression_values": regression_values,
202 "regression_func": ir_func,
203 "confidence_band_lower_values": lower_pts,
204 "confidence_band_upper_values": upper_pts,
205 "confidence_band_lower_func": lower_func,
206 "confidence_band_upper_func": upper_func,
207 "confidence_band_levels": confband_levels,
208 }
209 if report_bootstrap_results:
210 results["bootstrap_results"] = boot_results
212 return results
215def _xr_to_np(
216 fcst: xr.DataArray, obs: xr.DataArray, weight: Optional[xr.DataArray]
217) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
218 """
219 Conducts basic dimension checks to `isotonic_fit` inputs if they are xr.DataArray.
220 Then converts xr.DataArray objects into corresponding numpy arrays, with
221 corresponding datapoints.
223 Args:
224 fcst: DataArray of forecast values
225 obs: DataArray of observed values
226 weight: weights to apply to each forecast-observation pair. Must be the same
227 shape as `fcst`, or `None`.
228 Returns:
229 numpy arrays of `fcst`, `obs` and `weight`.
231 Raises:
232 ValueError: if `fcst` and `obs` do not have the same dimension.
233 ValueError: if `fcst` and `weight` do not have the same dimension.
234 """
235 fcst_dims = fcst.dims
237 if set(fcst_dims) != set(obs.dims):
238 raise ValueError("`fcst` and `obs` must have same dimensions.")
240 if weight is not None:
241 if set(fcst_dims) != set(weight.dims):
242 raise ValueError("`fcst` and `weight` must have same dimensions.")
243 merged_ds = xr.merge([fcst.rename("fcst"), obs.rename("obs"), weight.rename("weight")])
244 weight = merged_ds["weight"].transpose(*fcst_dims).values
245 else:
246 merged_ds = xr.merge([fcst.rename("fcst"), obs.rename("obs")])
248 fcst = merged_ds["fcst"].transpose(*fcst_dims).values
249 obs = merged_ds["obs"].transpose(*fcst_dims).values
251 return fcst, obs, weight
254def _iso_arg_checks( # pylint: disable=too-many-arguments, too-many-branches
255 fcst: np.ndarray,
256 obs: np.ndarray,
257 weight: np.ndarray,
258 functional: Optional[str] = None,
259 quantile_level: Optional[float] = None,
260 solver: Optional[Union[Callable[[np.ndarray, np.ndarray], float], Callable[[np.ndarray], float]]] = None,
261 bootstraps: Optional[int] = None,
262 confidence_level: Optional[float] = None,
263) -> None:
264 """Raises ValueError if isotonic_fit arguments are invalid."""
265 if fcst.shape != obs.shape:
266 raise ValueError("`fcst` and `obs` must have same shape.")
268 if not (np.issubdtype(fcst.dtype, np.integer) or np.issubdtype(fcst.dtype, np.floating)):
269 raise ValueError("`fcst` must be an array of floats or integers.")
271 if not (np.issubdtype(obs.dtype, np.integer) or np.issubdtype(obs.dtype, np.floating)):
272 raise ValueError("`obs` must be an array of floats or integers.")
274 if weight is not None:
275 if fcst.shape != weight.shape:
276 raise ValueError("`fcst` and `weight` must have same shape.")
278 if not (np.issubdtype(weight.dtype, np.integer) or np.issubdtype(weight.dtype, np.floating)):
279 raise ValueError("`weight` must be an array of floats or integers, or else `None`.")
281 if np.any(weight <= 0):
282 raise ValueError("`weight` values must be either positive or NaN.")
284 if functional not in ["mean", "quantile", None]:
285 raise ValueError("`functional` must be one of 'mean', 'quantile' or `None`.")
287 if functional == "quantile" and not 0 < quantile_level < 1: # type: ignore
288 raise ValueError("`quantile_level` must be strictly between 0 and 1.")
290 if functional == "quantile" and weight is not None:
291 msg = "Weighted quantile isotonic regression has not been implemented. "
292 msg += "Either (i) set `weight=None` or (ii) set `functional=None` and supply an appropriate "
293 msg += "quantile `solver` function with two arguments."
294 raise ValueError(msg)
296 if functional is None and solver is None:
297 raise ValueError("`functional` and `solver` cannot both be `None`.")
299 if None not in (functional, solver):
300 raise ValueError("One of `functional` or `solver` must be `None`.")
302 if bootstraps is not None:
303 if not isinstance(bootstraps, int) or bootstraps < 1:
304 raise ValueError("`bootstraps` must be a positive integer.")
305 if not 0 < confidence_level < 1: # type: ignore
306 raise ValueError("`confidence_level` must be strictly between 0 and 1.")
309def _tidy_ir_inputs(
310 fcst: np.ndarray,
311 obs: np.ndarray,
312 weight: Optional[np.ndarray] = None,
313) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
314 """
315 Tidies array inputs to `isotonic_fit` in preparation for the regression routine.
317 Arrays are flattened to 1D arrays. NaNs are jointly removed from fcst, obs, weights.
318 The array data is then sorted by fcst (ascending) then by obs (descending).
320 Args:
321 fcst: numpy array of forecast values.
322 obs: numpy array of corresponding observed values, with the same shape as `fcst`.
323 weight: Either `None`, or numpy array of corresponding weights with the same
324 shape as `fcst`.
326 Returns:
327 Tidied fcst, obs and weight arrays. If input weight is `None` then returned weight
328 is `None`.
330 Raises:
331 ValueError: if no forecast and observation pairs remains after removing NaNs.
332 """
333 fcst = fcst.flatten()
334 obs = obs.flatten()
336 if weight is not None:
337 weight = weight.flatten()
339 nan_locs = np.isnan(fcst) | np.isnan(obs)
341 if weight is not None:
342 nan_locs = nan_locs | np.isnan(weight)
344 new_fcst = fcst[~nan_locs]
345 new_obs = obs[~nan_locs]
347 if len(new_fcst) == 0:
348 raise ValueError("No (fcst, obs) pairs remaining after NaNs removed.")
350 # deal with ties in fcst: sort by fcst (ascending) then by obs (descending)
351 # this ensures that ties in fcst have same regression value in PAV algorithm
352 sorter = np.lexsort((-new_obs, new_fcst))
353 new_fcst = new_fcst[sorter]
354 new_obs = new_obs[sorter]
356 if weight is None:
357 new_weight = None
358 else:
359 new_weight = weight[~nan_locs]
360 new_weight = new_weight[sorter]
362 return new_fcst, new_obs, new_weight
365def _do_ir( # pylint: disable=too-many-arguments
366 fcst: np.ndarray,
367 obs: np.ndarray,
368 weight: Optional[np.ndarray],
369 functional: Optional[Literal["mean", "quantile"]],
370 quantile_level: Optional[float],
371 solver: Optional[Union[Callable[[np.ndarray, np.ndarray], float], Callable[[np.ndarray], float]]],
372) -> np.ndarray:
373 """
374 Returns the isotonic regression (IR) fit for specified functional or solver,
375 by passing the inputs to the appropriate IR algorithm.
377 The inputs `fcst`, `obs` and `weight` are 1D numpy arrays assumed to have been
378 mutually tidied via `_tidy_ir_inputs`.
380 Args:
381 fcst: tidied array of forecast values.
382 obs: tidied array of observed values.
383 weight: tidied array of weights.
384 functional: either "mean", "quantile" or None.
385 quantile_level: float strictly between 0 and 1 if functional="quantile",
386 else None.
387 solver: solver function as described in docstring of `isotonic_fit`, or `None`.
388 Cannot be `None` if `functional` is `None`.
390 Returns:
391 1D numpy array of values fitted using isotonic regression.
392 """
393 if functional == "mean":
394 y_out = _contiguous_mean_ir(fcst, obs, weight)
395 elif functional == "quantile":
396 y_out = _contiguous_quantile_ir(obs, quantile_level) # type: ignore
397 else:
398 y_out = _contiguous_ir(obs, solver, weight) # type: ignore
400 return y_out
403def _contiguous_ir(
404 y: np.ndarray,
405 solver: Union[Callable[[np.ndarray, np.ndarray], float], Callable[[np.ndarray], float]],
406 weight: Optional[np.ndarray] = None,
407) -> np.ndarray:
408 """
409 Given a 1D numpy array `y` of response values, returns fitted isotonic regression
410 with values for each valid regression block determined by `solver`.
412 This implementation uses the pool adjacent violators (PAV) algorithm, and is based on the squared loss
413 implementation at sklearn._isotonic._inplace_contiguous_isotonic_regression
414 https://github.com/scikit-learn/scikit-learn/blob/a6574655478a24247189236ce5f824e65ad0d369/sklearn/_isotonic.pyx
415 For an animated visualisation of the algorithm, see
416 https://www.r-bloggers.com/2020/05/what-is-isotonic-regression/
418 Args:
419 y: 1D numpy array of response (i.e., observed) values.
420 solver: function which determines values of each valid regression block
421 in the PAV algorithm, given response values in the block.
422 weight: 1D numpy array of weights, or else `None`.
423 If `weight` is not `None` then `solver` takes in two arrays as arguments
424 (response values and weights). If `weight` is `None` then `solver` takes
425 in a single array as argument (response values).
427 """
428 len_y = len(y)
430 if weight is not None and len(weight) != len_y:
431 raise ValueError("`y` and `weight` must have same length.")
433 # y_out will be the final regression output
434 y_out = y.copy()
436 # target describes a list of blocks. At any time, if [i..j] (inclusive) is
437 # an active block, then target[i] := j and target[j] := i.
438 target = np.arange(len_y, dtype=np.intp)
440 index = 0
441 while index < len_y: # pragma: no cover ... breaks are used to exit the loop but this is safer than while True
442 # idx_next_block is the index of beginning of the next block
443 idx_next_block = target[index] + 1
445 if idx_next_block == len_y: # there are no more blocks
446 break
447 if (
448 y_out[index] < y_out[idx_next_block]
449 ): # the regression value of the next block is greater than the active one
450 index = idx_next_block # that block becomes the active block
451 continue
453 while True:
454 # We are within a decreasing subsequence.
455 prev_y = y_out[idx_next_block] # this is the smaller y value after the active block
456 idx_next_block = target[idx_next_block] + 1 # advance index of next block
457 if idx_next_block == len_y or prev_y < y_out[idx_next_block]:
458 # update block indices
459 target[index] = idx_next_block - 1
460 target[idx_next_block - 1] = index
462 # regression value for current block
463 if weight is None:
464 y_out[index] = solver(y[index:idx_next_block]) # type: ignore
465 else:
466 y_out[index] = solver(y[index:idx_next_block], weight[index:idx_next_block]) # type: ignore
468 if index > 0: # pragma: no branch
469 # Backtrack if we can. This makes the algorithm
470 # single-pass and ensures O(n) complexity (subject to solver complexity)
471 index = target[index - 1]
472 # Otherwise, restart from the same point.
473 break
474 # Reconstruct the solution.
475 index = 0
476 while index < len_y:
477 idx_next_block = target[index] + 1
478 y_out[index + 1 : idx_next_block] = y_out[index]
479 index = idx_next_block
481 return y_out
484def _contiguous_quantile_ir(y: np.ndarray, alpha: float) -> np.ndarray:
485 """Performs contiguous quantile IR on tidied data y, for quantile-level alpha, with no weights."""
486 return _contiguous_ir(y, partial(np.quantile, q=alpha))
489def _contiguous_mean_ir(x: np.ndarray, y: np.ndarray, weight: Optional[np.ndarray]) -> np.ndarray:
490 """
491 Performs classical (i.e. for mean functional) contiguous quantile IR on tidied data x, y.
492 Uses sklearn implementation rather than supplying the mean solver function to `_contiguous_ir`,
493 as it is about 4 times faster (since it is optimised for mean).
494 """
495 return IsotonicRegression().fit_transform(x, y, sample_weight=weight)
498def _bootstrap_ir( # pylint: disable=too-many-arguments, too-many-locals
499 fcst: np.ndarray,
500 obs: np.ndarray,
501 bootstraps: int,
502 weight: Optional[np.ndarray],
503 functional: Optional[Literal["mean", "quantile"]],
504 quantile_level: Optional[float],
505 solver: Optional[Union[Callable[[np.ndarray, np.ndarray], float], Callable[[np.ndarray], float]]],
506):
507 """
508 Gives the isotonic fits of bootstrapped samples.
510 Args:
511 fcst: 1D numpy array of forecast values.
512 obs: 1D numpy array of observed values corresponding to `fcst`.
513 bootstraps: the number of samples to bootstrap.
514 weight: either `None`, or 1D numpy array of weights corresponding to `fcst`.
515 functional: either `None` or the target functional (one of "mean" or "quantile").
516 quantile_level: the quantile level if `functional="quantile"`.
517 solver: the regression solver function if `functional=None`.
519 Returns:
520 2D numpy array with one row for each bootstrapped sample and one column for each `fcst` value.
521 The [i,j] entry is the isotonic fit of the ith bootstrapped sample evaluated at the point
522 `fcst[j]`. Evaluations are determined using interpolation when necessary. If the [i,j] entry is
523 NaN then `fcst[j]` lay outside the range of the ith bootstrapped sample.
524 """
525 # initialise
526 fc_values_count = len(fcst)
527 result = np.full((bootstraps, fc_values_count), np.nan)
529 for boostrap_sample_num in range(bootstraps):
530 selection = np.random.randint(0, fc_values_count, fc_values_count)
531 fcst_sample = fcst[selection]
532 obs_sample = obs[selection]
533 if weight is None:
534 weight_sample = None
535 else:
536 weight_sample = weight[selection]
538 fcst_sample, obs_sample, weight_sample = _tidy_ir_inputs(fcst_sample, obs_sample, weight_sample)
539 ir_results = _do_ir(fcst_sample, obs_sample, weight_sample, functional, quantile_level, solver)
541 approximation_func = interpolate.interp1d(fcst_sample, ir_results, bounds_error=False)
543 result[boostrap_sample_num] = approximation_func(fcst)
545 return result
548def _confidence_band(
549 bootstrap_results: np.ndarray, confidence_level: float, min_non_nan: int
550) -> Tuple[np.ndarray, np.ndarray]:
551 """
552 Given bootstrapped results, return the lower and upper values of the corresponding confidence band
553 with specified `confidence_level`.
555 Args:
556 bootstrap_results: matrix output from `_bootstrap_ir`.
557 confidence_level: float between 0 and 1.
558 min_non_nan: the minimum number of non-NaN values a column of `bootstrap_results`
559 for which the confidence band will be calculated. If the minimum is not met,
560 a NaN value is returned.
562 Returns:
563 Tuple of two 1D numpy arrays. The first array gives is the
564 `(1-confidence_level)/2`-quantile of each column of `bootstrap_results`.
565 The second array gives is the `1-(1-confidence_level)/2`-quantile of each column.
566 """
567 sig_level = 1 - confidence_level
568 upper_level = 1 - sig_level / 2
569 lower_level = sig_level / 2
571 # confidence band values ignoring NaNs
572 upper = _nanquantile(bootstrap_results, upper_level)
573 lower = _nanquantile(bootstrap_results, lower_level)
575 # masking values with too many NaNs in bootstrap results
576 non_nan_count = np.count_nonzero(~np.isnan(bootstrap_results), axis=0)
577 upper = np.where(non_nan_count >= min_non_nan, upper, np.nan)
578 lower = np.where(non_nan_count >= min_non_nan, lower, np.nan)
580 return lower, upper
583def _nanquantile(arr: np.ndarray, quant: float) -> np.ndarray:
584 """Returns same* output as np.nanquantile but faster.
586 *almost equal
587 See https://github.com/numpy/numpy/issues/16575, this implementation is about 100
588 times faster compared to numpy 1.24.3.
590 Args:
591 arr: 2D numpy array with one row for each bootstrapped sample and one column for
592 each `fcst` value.
593 quant: Value between 0 and 1.
595 Returns:
596 1D numpy array of shape `arr.shape[0]` of values at each quantile.
597 """
598 arr = np.copy(arr)
599 valid_obs_count = np.sum(np.isfinite(arr), axis=0)
600 # Replace NaN with maximum (these values will be ignored)
601 max_val = np.nanmax(arr)
602 arr[np.isnan(arr)] = max_val
603 # Sort forecast values - former NaNs will move to the end
604 arr = np.sort(arr, axis=0)
605 result = np.zeros(shape=[arr.shape[1]])
607 # Get the index of the values at the requested quantile
608 desired_position = (valid_obs_count - 1) * quant
609 index_floor = np.floor(desired_position).astype(np.int32)
610 index_ceil = np.ceil(desired_position).astype(np.int32)
611 same_index = index_floor == index_ceil
613 # Linear interpolation - take the fractional part of desired position
614 floor_val = arr[index_floor, np.arange(arr.shape[1])]
615 floor_frac = index_ceil - desired_position
616 floor_frac_val = arr[index_floor, np.arange(arr.shape[1])] * floor_frac
617 ceil_frac = desired_position - index_floor
618 ceil_frac_val = arr[index_ceil, np.arange(arr.shape[1])] * ceil_frac
619 result = np.where(same_index, floor_val, floor_frac_val + ceil_frac_val)
620 return result