Coverage for src/scores/continuous/isoreg_impl.py: 100%

183 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2024-02-28 12:51 +1100

1""" 

2An implementation of isotonic regression using the pool adjacent violators (PAV) 

3algorithm. In the context of forecast verification, the regression explanatory variable 

4is the forecast and the response variable is the observation. Forecasts and observations 

5are assumed to be real-valued quantities. 

6 

7Isotonic regression exposes conditional forecast biases. Confidence bands for the 

8regression fit can also be returned. This implementation includes option to specify what 

9functional the forecast is targeting, so that appropriate regression can be performed on 

10different types of real-valued forecasts, such as those which are a quantile of the 

11predictive distribution. 

12""" 

13 

14from functools import partial 

15from typing import Callable, Literal, Optional, Tuple, Union 

16 

17import numpy as np 

18import xarray as xr 

19from scipy import interpolate 

20from sklearn.isotonic import IsotonicRegression 

21 

22 

23def isotonic_fit( # pylint: disable=too-many-locals, too-many-arguments 

24 fcst: Union[np.ndarray, xr.DataArray], 

25 obs: Union[np.ndarray, xr.DataArray], 

26 weight: Optional[Union[np.ndarray, xr.DataArray]] = None, 

27 functional: Optional[Literal["mean", "quantile"]] = "mean", 

28 bootstraps: Optional[int] = None, 

29 quantile_level: Optional[float] = None, 

30 solver: Optional[Union[Callable[[np.ndarray, np.ndarray], float], Callable[[np.ndarray], float]]] = None, 

31 confidence_level: Optional[float] = 0.9, 

32 min_non_nan: Optional[int] = 1, 

33 report_bootstrap_results: Optional[bool] = False, 

34) -> dict: 

35 """ 

36 Calculates an isotonic regression fit to observations given forecasts as the 

37 explanatory values. Optionally returns confidence bands on the fit via bootstrapping. 

38 Forecasts and observations are scalar-valued (i.e, integers, floats or NaN). 

39 

40 If forecasts target the mean or a quantile functional, then the user can select that 

41 functional. Otherwise the user needs to supply a `solver` function 

42 

43 - of one variable (if no weights are supplied), or 

44 - of two variables (if weights are supplied). 

45 

46 The solver function is used to find an optimal regression fit in each block, subject 

47 to a non-decreasing monotonicity constraint, using the pool adjacent violators (PAV) 

48 algorithm. 

49 

50 An example of a solver function of one variable is obs -> np.mean(obs) 

51 An example of a solver function of two variables is (obs, weight) -> np.average(obs, weights=weight). 

52 

53 Ties (where one forecast value has more than one observation value) are handled as follows. 

54 Forecasts are sorted in ascending order, and observations sorted to match. Where ties occur, 

55 observations are sorted in descending order within each subset of ties. Isotonic regression 

56 is then performed on this sorted observation sequence. 

57 

58 This implementation uses sklearn.isotonic.IsotonicRegression when `functional="mean"`. 

59 

60 Args: 

61 fcst: np.array or xr.DataArray of forecast (or explanatory) values. Values must 

62 be float, integer or NaN. 

63 obs: np.array or xr.DataArray of corresponding observation (or response) values. 

64 Must be the same shape as `fcst`. Values must be float, integer or NaN. 

65 weight: positive weights to apply to each forecast-observation pair. Must be the 

66 same shape as `fcst`, or `None` (which is equivalent to applying equal weights). 

67 functional: Functional that the forecasts are targeting. Either "mean" or 

68 "quantile" or `None`. If `None` then `solver` must be supplied. The current 

69 implementation for "quantile" does not accept weights. If weighted quantile 

70 regression is desired then the user should should supply an appropriate solver. 

71 bootstraps: the number of bootstrap samples to perform for calculating the 

72 regression confidence band. Set to `None` if a confidence band is not required. 

73 quantile_level: the level of the quantile functional if `functional='quantile'`. 

74 Must be strictly between 0 and 1. 

75 solver: function that accepts 1D numpy array of observations and returns 

76 a float. Function values give the regression fit for each block as determined 

77 by the PAV algorithm. If weights are supplied, the function must have a second 

78 argument that accepts 1D numpy array of weights of same length as the observations. 

79 confidence_level: Confidence level of the confidence band, strictly between 0 and 1. 

80 For example, a confidence level of 0.5 will calculate the confidence band by 

81 computing the 25th and 75th percentiles of the bootstrapped samples. 

82 min_non_nan: The minimum number of non-NaN bootstrapped values to calculate 

83 confidence bands at a particular forecast value. 

84 report_bootstrap_results: This specifies to whether keep the bootstrapped 

85 regression values in return dictionary. Default is False to keep the output 

86 result small. 

87 

88 Returns: 

89 Dictionary with the following keys: 

90 

91 - "unique_fcst_sorted": 1D numpy array of remaining forecast values sorted in 

92 ascending order, after any NaNs from `fcst`, `obs` and `weight` are removed, 

93 and only unique values are kept to keep the output size reasonably small. 

94 - "fcst_counts": 1D numpy array of forecast counts for unique values of forecast sorted 

95 - "regression_values": 1D numpy array of regression values corresponding to 

96 "unique_fcst_sorted" values. 

97 - "regression_func": function that returns the regression fit based on linear 

98 interpolation of ("fcst_sorted", "regression_values"), for any supplied 

99 argument (1D numpy array) of potential forecast values. 

100 - "bootstrap_results": in the case of `report_bootstrap_results=True`, 2D numpy 

101 array of bootstrapped regression values is included in return dictionary. 

102 Each row gives the interpolated regression values from a particular bootstrapped 

103 sample, evaluated at "fcst_sorted" values. If `m` is the number of bootstrap 

104 samples and `n = len(fcst_sorted)` then it is has shape `(m, n)`. We emphasise 

105 that this array corresponds to `fcst_sorted` not `unique_fcst_sorted`. 

106 - "confidence_band_lower_values": values of lower confidence band threshold, evaluated 

107 at "unique_fcst_sorted" values. 

108 - "confidence_band_upper_values": values of upper confidence band threshold, evaluated 

109 at "unique_fcst_sorted" values. 

110 - "confidence_band_lower_func": function that returns regression fit based on linear 

111 interpolation of ("fcst_sorted", "confidence_band_lower_values"), given any 

112 argument (1D numpy array) of potential forecast values. 

113 - "confidence_band_upper_func": function that returns regression fit based on linear 

114 interpolation of ("fcst_sorted", "confidence_band_upper_values"), given any 

115 argument (1D numpy array) of potential forecast values. 

116 - "confidence_band_levels": tuple giving the quantile levels used to calculate the 

117 confidence band. 

118 

119 Raises: 

120 ValueError: if `fcst` and `obs` are np.arrays and don't have the same shape. 

121 ValueError: if `fcst` and `weight` are np.arrays and don't have the same shape. 

122 ValueError: if `fcst` and `obs` are xarray.DataArrays and don't have the same dimensions. 

123 ValueError: if `fcst` and `weight` are xarray.DataArrays and don't have the same dimensions. 

124 ValueError: if any entries of `fcst`, `obs` or `weight` are not an integer, float or NaN. 

125 ValueError: if there are no `fcst` and `obs` pairs after NaNs are removed. 

126 ValueError: if `functional` is not one of "mean", "quantile" or `None`. 

127 ValueError: if `quantile_level` is not strictly between 0 and 1 when `functional="quantile"`. 

128 ValueError: if `weight` is not `None` when `functional="quantile"`. 

129 ValueError: if `functional` and `solver` are both `None`. 

130 ValueError: if not exactly one of `functional` and `solver` is `None`. 

131 ValueError: if `bootstraps` is not a positive integer. 

132 ValueError: if `confidence_level` is not strictly between 0 and 1. 

133 

134 Note: This function only keeps the unique values of `fcst_sorted` to keep the volume of 

135 the return dictionary small. The forecast counts is also included, so users can it to 

136 create forecast histogram (usually displayed in the reliability diagrams). 

137 

138 References 

139 - de Leeuw, Hornik and Mair. "Isotone Optimization in R: Pool-Adjacent-Violators Algorithm (PAVA) 

140 and Active Set Methods", Journal of Statistical Software, 2009. 

141 - Dimitriadis, Gneiting and Jordan. "Stable reliability diagrams for probabilistic classifiers", 

142 PNAS, Vol. 118 No. 8, 2020. Available at https://www.pnas.org/doi/10.1073/pnas.2016191118 

143 - Jordan, Mühlemann, and Ziegel. "Optimal solutions to the isotonic regression problem", 

144 2020 (version 2), available on arxiv at https://arxiv.org/abs/1904.04761 

145 """ 

146 

147 if isinstance(fcst, xr.DataArray): 

148 fcst, obs, weight = _xr_to_np(fcst, obs, weight) 

149 # now fcst, obs and weight (unless None) are np.arrays 

150 

151 _iso_arg_checks( 

152 fcst, 

153 obs, 

154 weight, 

155 functional, 

156 quantile_level, 

157 solver, 

158 bootstraps, 

159 confidence_level, 

160 ) 

161 fcst_tidied, obs_tidied, weight_tidied = _tidy_ir_inputs(fcst, obs, weight) 

162 y_out = _do_ir(fcst_tidied, obs_tidied, weight_tidied, functional, quantile_level, solver) 

163 

164 # calculate the fitting function 

165 ir_func = interpolate.interp1d(fcst_tidied, y_out, bounds_error=False) 

166 

167 if bootstraps is not None: 

168 boot_results = _bootstrap_ir( 

169 fcst=fcst_tidied, 

170 obs=obs_tidied, 

171 weight=weight_tidied, 

172 functional=functional, 

173 quantile_level=quantile_level, 

174 solver=solver, 

175 bootstraps=bootstraps, 

176 ) 

177 

178 lower_pts, upper_pts = _confidence_band(boot_results, confidence_level, min_non_nan) # type: ignore 

179 

180 lower_func = interpolate.interp1d(fcst_tidied, lower_pts, bounds_error=False) 

181 upper_func = interpolate.interp1d(fcst_tidied, upper_pts, bounds_error=False) 

182 

183 confband_levels = ((1 - confidence_level) / 2, 1 - (1 - confidence_level) / 2) # type: ignore 

184 

185 else: 

186 boot_results = lower_pts = upper_pts = None 

187 lower_func = upper_func = partial(np.full_like, fill_value=np.nan) 

188 confband_levels = (None, None) # type: ignore 

189 # To reduce the size of output dictionary, we only keep the unique values of 

190 # forecasts and accordingly regression and confidence band values calculate by using 

191 # unique forecast values. We also calculate forecast counts that can be used to create 

192 # the forecast histogram. 

193 unique_fcst_sorted, fcst_counts = np.unique(fcst_tidied, return_counts=True) 

194 regression_values = ir_func(unique_fcst_sorted) 

195 if bootstraps: 

196 lower_pts = lower_func(unique_fcst_sorted) 

197 upper_pts = upper_func(unique_fcst_sorted) 

198 results = { 

199 "fcst_sorted": unique_fcst_sorted, 

200 "fcst_counts": fcst_counts, 

201 "regression_values": regression_values, 

202 "regression_func": ir_func, 

203 "confidence_band_lower_values": lower_pts, 

204 "confidence_band_upper_values": upper_pts, 

205 "confidence_band_lower_func": lower_func, 

206 "confidence_band_upper_func": upper_func, 

207 "confidence_band_levels": confband_levels, 

208 } 

209 if report_bootstrap_results: 

210 results["bootstrap_results"] = boot_results 

211 

212 return results 

213 

214 

215def _xr_to_np( 

216 fcst: xr.DataArray, obs: xr.DataArray, weight: Optional[xr.DataArray] 

217) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: 

218 """ 

219 Conducts basic dimension checks to `isotonic_fit` inputs if they are xr.DataArray. 

220 Then converts xr.DataArray objects into corresponding numpy arrays, with 

221 corresponding datapoints. 

222 

223 Args: 

224 fcst: DataArray of forecast values 

225 obs: DataArray of observed values 

226 weight: weights to apply to each forecast-observation pair. Must be the same 

227 shape as `fcst`, or `None`. 

228 Returns: 

229 numpy arrays of `fcst`, `obs` and `weight`. 

230 

231 Raises: 

232 ValueError: if `fcst` and `obs` do not have the same dimension. 

233 ValueError: if `fcst` and `weight` do not have the same dimension. 

234 """ 

235 fcst_dims = fcst.dims 

236 

237 if set(fcst_dims) != set(obs.dims): 

238 raise ValueError("`fcst` and `obs` must have same dimensions.") 

239 

240 if weight is not None: 

241 if set(fcst_dims) != set(weight.dims): 

242 raise ValueError("`fcst` and `weight` must have same dimensions.") 

243 merged_ds = xr.merge([fcst.rename("fcst"), obs.rename("obs"), weight.rename("weight")]) 

244 weight = merged_ds["weight"].transpose(*fcst_dims).values 

245 else: 

246 merged_ds = xr.merge([fcst.rename("fcst"), obs.rename("obs")]) 

247 

248 fcst = merged_ds["fcst"].transpose(*fcst_dims).values 

249 obs = merged_ds["obs"].transpose(*fcst_dims).values 

250 

251 return fcst, obs, weight 

252 

253 

254def _iso_arg_checks( # pylint: disable=too-many-arguments, too-many-branches 

255 fcst: np.ndarray, 

256 obs: np.ndarray, 

257 weight: np.ndarray, 

258 functional: Optional[str] = None, 

259 quantile_level: Optional[float] = None, 

260 solver: Optional[Union[Callable[[np.ndarray, np.ndarray], float], Callable[[np.ndarray], float]]] = None, 

261 bootstraps: Optional[int] = None, 

262 confidence_level: Optional[float] = None, 

263) -> None: 

264 """Raises ValueError if isotonic_fit arguments are invalid.""" 

265 if fcst.shape != obs.shape: 

266 raise ValueError("`fcst` and `obs` must have same shape.") 

267 

268 if not (np.issubdtype(fcst.dtype, np.integer) or np.issubdtype(fcst.dtype, np.floating)): 

269 raise ValueError("`fcst` must be an array of floats or integers.") 

270 

271 if not (np.issubdtype(obs.dtype, np.integer) or np.issubdtype(obs.dtype, np.floating)): 

272 raise ValueError("`obs` must be an array of floats or integers.") 

273 

274 if weight is not None: 

275 if fcst.shape != weight.shape: 

276 raise ValueError("`fcst` and `weight` must have same shape.") 

277 

278 if not (np.issubdtype(weight.dtype, np.integer) or np.issubdtype(weight.dtype, np.floating)): 

279 raise ValueError("`weight` must be an array of floats or integers, or else `None`.") 

280 

281 if np.any(weight <= 0): 

282 raise ValueError("`weight` values must be either positive or NaN.") 

283 

284 if functional not in ["mean", "quantile", None]: 

285 raise ValueError("`functional` must be one of 'mean', 'quantile' or `None`.") 

286 

287 if functional == "quantile" and not 0 < quantile_level < 1: # type: ignore 

288 raise ValueError("`quantile_level` must be strictly between 0 and 1.") 

289 

290 if functional == "quantile" and weight is not None: 

291 msg = "Weighted quantile isotonic regression has not been implemented. " 

292 msg += "Either (i) set `weight=None` or (ii) set `functional=None` and supply an appropriate " 

293 msg += "quantile `solver` function with two arguments." 

294 raise ValueError(msg) 

295 

296 if functional is None and solver is None: 

297 raise ValueError("`functional` and `solver` cannot both be `None`.") 

298 

299 if None not in (functional, solver): 

300 raise ValueError("One of `functional` or `solver` must be `None`.") 

301 

302 if bootstraps is not None: 

303 if not isinstance(bootstraps, int) or bootstraps < 1: 

304 raise ValueError("`bootstraps` must be a positive integer.") 

305 if not 0 < confidence_level < 1: # type: ignore 

306 raise ValueError("`confidence_level` must be strictly between 0 and 1.") 

307 

308 

309def _tidy_ir_inputs( 

310 fcst: np.ndarray, 

311 obs: np.ndarray, 

312 weight: Optional[np.ndarray] = None, 

313) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: 

314 """ 

315 Tidies array inputs to `isotonic_fit` in preparation for the regression routine. 

316 

317 Arrays are flattened to 1D arrays. NaNs are jointly removed from fcst, obs, weights. 

318 The array data is then sorted by fcst (ascending) then by obs (descending). 

319 

320 Args: 

321 fcst: numpy array of forecast values. 

322 obs: numpy array of corresponding observed values, with the same shape as `fcst`. 

323 weight: Either `None`, or numpy array of corresponding weights with the same 

324 shape as `fcst`. 

325 

326 Returns: 

327 Tidied fcst, obs and weight arrays. If input weight is `None` then returned weight 

328 is `None`. 

329 

330 Raises: 

331 ValueError: if no forecast and observation pairs remains after removing NaNs. 

332 """ 

333 fcst = fcst.flatten() 

334 obs = obs.flatten() 

335 

336 if weight is not None: 

337 weight = weight.flatten() 

338 

339 nan_locs = np.isnan(fcst) | np.isnan(obs) 

340 

341 if weight is not None: 

342 nan_locs = nan_locs | np.isnan(weight) 

343 

344 new_fcst = fcst[~nan_locs] 

345 new_obs = obs[~nan_locs] 

346 

347 if len(new_fcst) == 0: 

348 raise ValueError("No (fcst, obs) pairs remaining after NaNs removed.") 

349 

350 # deal with ties in fcst: sort by fcst (ascending) then by obs (descending) 

351 # this ensures that ties in fcst have same regression value in PAV algorithm 

352 sorter = np.lexsort((-new_obs, new_fcst)) 

353 new_fcst = new_fcst[sorter] 

354 new_obs = new_obs[sorter] 

355 

356 if weight is None: 

357 new_weight = None 

358 else: 

359 new_weight = weight[~nan_locs] 

360 new_weight = new_weight[sorter] 

361 

362 return new_fcst, new_obs, new_weight 

363 

364 

365def _do_ir( # pylint: disable=too-many-arguments 

366 fcst: np.ndarray, 

367 obs: np.ndarray, 

368 weight: Optional[np.ndarray], 

369 functional: Optional[Literal["mean", "quantile"]], 

370 quantile_level: Optional[float], 

371 solver: Optional[Union[Callable[[np.ndarray, np.ndarray], float], Callable[[np.ndarray], float]]], 

372) -> np.ndarray: 

373 """ 

374 Returns the isotonic regression (IR) fit for specified functional or solver, 

375 by passing the inputs to the appropriate IR algorithm. 

376 

377 The inputs `fcst`, `obs` and `weight` are 1D numpy arrays assumed to have been 

378 mutually tidied via `_tidy_ir_inputs`. 

379 

380 Args: 

381 fcst: tidied array of forecast values. 

382 obs: tidied array of observed values. 

383 weight: tidied array of weights. 

384 functional: either "mean", "quantile" or None. 

385 quantile_level: float strictly between 0 and 1 if functional="quantile", 

386 else None. 

387 solver: solver function as described in docstring of `isotonic_fit`, or `None`. 

388 Cannot be `None` if `functional` is `None`. 

389 

390 Returns: 

391 1D numpy array of values fitted using isotonic regression. 

392 """ 

393 if functional == "mean": 

394 y_out = _contiguous_mean_ir(fcst, obs, weight) 

395 elif functional == "quantile": 

396 y_out = _contiguous_quantile_ir(obs, quantile_level) # type: ignore 

397 else: 

398 y_out = _contiguous_ir(obs, solver, weight) # type: ignore 

399 

400 return y_out 

401 

402 

403def _contiguous_ir( 

404 y: np.ndarray, 

405 solver: Union[Callable[[np.ndarray, np.ndarray], float], Callable[[np.ndarray], float]], 

406 weight: Optional[np.ndarray] = None, 

407) -> np.ndarray: 

408 """ 

409 Given a 1D numpy array `y` of response values, returns fitted isotonic regression 

410 with values for each valid regression block determined by `solver`. 

411 

412 This implementation uses the pool adjacent violators (PAV) algorithm, and is based on the squared loss 

413 implementation at sklearn._isotonic._inplace_contiguous_isotonic_regression 

414 https://github.com/scikit-learn/scikit-learn/blob/a6574655478a24247189236ce5f824e65ad0d369/sklearn/_isotonic.pyx 

415 For an animated visualisation of the algorithm, see 

416 https://www.r-bloggers.com/2020/05/what-is-isotonic-regression/ 

417 

418 Args: 

419 y: 1D numpy array of response (i.e., observed) values. 

420 solver: function which determines values of each valid regression block 

421 in the PAV algorithm, given response values in the block. 

422 weight: 1D numpy array of weights, or else `None`. 

423 If `weight` is not `None` then `solver` takes in two arrays as arguments 

424 (response values and weights). If `weight` is `None` then `solver` takes 

425 in a single array as argument (response values). 

426 

427 """ 

428 len_y = len(y) 

429 

430 if weight is not None and len(weight) != len_y: 

431 raise ValueError("`y` and `weight` must have same length.") 

432 

433 # y_out will be the final regression output 

434 y_out = y.copy() 

435 

436 # target describes a list of blocks. At any time, if [i..j] (inclusive) is 

437 # an active block, then target[i] := j and target[j] := i. 

438 target = np.arange(len_y, dtype=np.intp) 

439 

440 index = 0 

441 while index < len_y: 

442 # idx_next_block is the index of beginning of the next block 

443 idx_next_block = target[index] + 1 

444 

445 if idx_next_block == len_y: # there are no more blocks 

446 break 

447 if ( 

448 y_out[index] < y_out[idx_next_block] 

449 ): # the regression value of the next block is greater than the active one 

450 index = idx_next_block # that block becomes the active block 

451 continue 

452 

453 while True: 

454 # We are within a decreasing subsequence. 

455 prev_y = y_out[idx_next_block] # this is the smaller y value after the active block 

456 idx_next_block = target[idx_next_block] + 1 # advance index of next block 

457 if idx_next_block == len_y or prev_y < y_out[idx_next_block]: 

458 # update block indices 

459 target[index] = idx_next_block - 1 

460 target[idx_next_block - 1] = index 

461 

462 # regression value for current block 

463 if weight is None: 

464 y_out[index] = solver(y[index:idx_next_block]) # type: ignore 

465 else: 

466 y_out[index] = solver(y[index:idx_next_block], weight[index:idx_next_block]) # type: ignore 

467 

468 if index > 0: 

469 # Backtrack if we can. This makes the algorithm 

470 # single-pass and ensures O(n) complexity (subject to solver complexity) 

471 index = target[index - 1] 

472 # Otherwise, restart from the same point. 

473 break 

474 # Reconstruct the solution. 

475 index = 0 

476 while index < len_y: 

477 idx_next_block = target[index] + 1 

478 y_out[index + 1 : idx_next_block] = y_out[index] 

479 index = idx_next_block 

480 

481 return y_out 

482 

483 

484def _contiguous_quantile_ir(y: np.ndarray, alpha: float) -> np.ndarray: 

485 """Performs contiguous quantile IR on tidied data y, for quantile-level alpha, with no weights.""" 

486 return _contiguous_ir(y, partial(np.quantile, q=alpha)) 

487 

488 

489def _contiguous_mean_ir(x: np.ndarray, y: np.ndarray, weight: Optional[np.ndarray]) -> np.ndarray: 

490 """ 

491 Performs classical (i.e. for mean functional) contiguous quantile IR on tidied data x, y. 

492 Uses sklearn implementation rather than supplying the mean solver function to `_contiguous_ir`, 

493 as it is about 4 times faster (since it is optimised for mean). 

494 """ 

495 return IsotonicRegression().fit_transform(x, y, sample_weight=weight) 

496 

497 

498def _bootstrap_ir( # pylint: disable=too-many-arguments, too-many-locals 

499 fcst: np.ndarray, 

500 obs: np.ndarray, 

501 bootstraps: int, 

502 weight: Optional[np.ndarray], 

503 functional: Optional[Literal["mean", "quantile"]], 

504 quantile_level: Optional[float], 

505 solver: Optional[Union[Callable[[np.ndarray, np.ndarray], float], Callable[[np.ndarray], float]]], 

506): 

507 """ 

508 Gives the isotonic fits of bootstrapped samples. 

509 

510 Args: 

511 fcst: 1D numpy array of forecast values. 

512 obs: 1D numpy array of observed values corresponding to `fcst`. 

513 bootstraps: the number of samples to bootstrap. 

514 weight: either `None`, or 1D numpy array of weights corresponding to `fcst`. 

515 functional: either `None` or the target functional (one of "mean" or "quantile"). 

516 quantile_level: the quantile level if `functional="quantile"`. 

517 solver: the regression solver function if `functional=None`. 

518 

519 Returns: 

520 2D numpy array with one row for each bootstrapped sample and one column for each `fcst` value. 

521 The [i,j] entry is the isotonic fit of the ith bootstrapped sample evaluated at the point 

522 `fcst[j]`. Evaluations are determined using interpolation when necessary. If the [i,j] entry is 

523 NaN then `fcst[j]` lay outside the range of the ith bootstrapped sample. 

524 """ 

525 # initialise 

526 fc_values_count = len(fcst) 

527 result = np.full((bootstraps, fc_values_count), np.nan) 

528 

529 for boostrap_sample_num in range(bootstraps): 

530 selection = np.random.randint(0, fc_values_count, fc_values_count) 

531 fcst_sample = fcst[selection] 

532 obs_sample = obs[selection] 

533 if weight is None: 

534 weight_sample = None 

535 else: 

536 weight_sample = weight[selection] 

537 

538 fcst_sample, obs_sample, weight_sample = _tidy_ir_inputs(fcst_sample, obs_sample, weight_sample) 

539 ir_results = _do_ir(fcst_sample, obs_sample, weight_sample, functional, quantile_level, solver) 

540 

541 approximation_func = interpolate.interp1d(fcst_sample, ir_results, bounds_error=False) 

542 

543 result[boostrap_sample_num] = approximation_func(fcst) 

544 

545 return result 

546 

547 

548def _confidence_band( 

549 bootstrap_results: np.ndarray, confidence_level: float, min_non_nan: int 

550) -> Tuple[np.ndarray, np.ndarray]: 

551 """ 

552 Given bootstrapped results, return the lower and upper values of the corresponding confidence band 

553 with specified `confidence_level`. 

554 

555 Args: 

556 bootstrap_results: matrix output from `_bootstrap_ir`. 

557 confidence_level: float between 0 and 1. 

558 min_non_nan: the minimum number of non-NaN values a column of `bootstrap_results` 

559 for which the confidence band will be calculated. If the minimum is not met, 

560 a NaN value is returned. 

561 

562 Returns: 

563 Tuple of two 1D numpy arrays. The first array gives is the 

564 `(1-confidence_level)/2`-quantile of each column of `bootstrap_results`. 

565 The second array gives is the `1-(1-confidence_level)/2`-quantile of each column. 

566 """ 

567 sig_level = 1 - confidence_level 

568 upper_level = 1 - sig_level / 2 

569 lower_level = sig_level / 2 

570 

571 # confidence band values ignoring NaNs 

572 upper = _nanquantile(bootstrap_results, upper_level) 

573 lower = _nanquantile(bootstrap_results, lower_level) 

574 

575 # masking values with too many NaNs in bootstrap results 

576 non_nan_count = np.count_nonzero(~np.isnan(bootstrap_results), axis=0) 

577 upper = np.where(non_nan_count >= min_non_nan, upper, np.nan) 

578 lower = np.where(non_nan_count >= min_non_nan, lower, np.nan) 

579 

580 return lower, upper 

581 

582 

583def _nanquantile(arr: np.ndarray, quant: float) -> np.ndarray: 

584 """Returns same* output as np.nanquantile but faster. 

585 

586 *almost equal 

587 See https://github.com/numpy/numpy/issues/16575, this implementation is about 100 

588 times faster compared to numpy 1.24.3. 

589 

590 Args: 

591 arr: 2D numpy array with one row for each bootstrapped sample and one column for 

592 each `fcst` value. 

593 quant: Value between 0 and 1. 

594 

595 Returns: 

596 1D numpy array of shape `arr.shape[0]` of values at each quantile. 

597 """ 

598 arr = np.copy(arr) 

599 valid_obs_count = np.sum(np.isfinite(arr), axis=0) 

600 # Replace NaN with maximum (these values will be ignored) 

601 if not np.isnan(arr).all(): 

602 max_val = np.nanmax(arr) 

603 arr[np.isnan(arr)] = max_val 

604 # Sort forecast values - former NaNs will move to the end 

605 arr = np.sort(arr, axis=0) 

606 result = np.zeros(shape=[arr.shape[1]]) 

607 

608 # Get the index of the values at the requested quantile 

609 desired_position = (valid_obs_count - 1) * quant 

610 index_floor = np.floor(desired_position).astype(np.int32) 

611 index_ceil = np.ceil(desired_position).astype(np.int32) 

612 same_index = index_floor == index_ceil 

613 

614 # Linear interpolation - take the fractional part of desired position 

615 floor_val = arr[index_floor, np.arange(arr.shape[1])] 

616 floor_frac = index_ceil - desired_position 

617 floor_frac_val = arr[index_floor, np.arange(arr.shape[1])] * floor_frac 

618 ceil_frac = desired_position - index_floor 

619 ceil_frac_val = arr[index_ceil, np.arange(arr.shape[1])] * ceil_frac 

620 result = np.where(same_index, floor_val, floor_frac_val + ceil_frac_val) 

621 return result