Coverage for src/scores/categorical/binary_impl.py: 100%
37 statements
« prev ^ index » next coverage.py v7.3.2, created at 2024-02-28 12:51 +1100
« prev ^ index » next coverage.py v7.3.2, created at 2024-02-28 12:51 +1100
1"""
2This module contains methods for binary categories
3"""
4from typing import Optional
6import numpy as np
7import xarray as xr
9from scores.functions import apply_weights
10from scores.processing import check_binary
11from scores.typing import FlexibleDimensionTypes, XarrayLike
12from scores.utils import gather_dimensions
15def probability_of_detection(
16 fcst: XarrayLike,
17 obs: XarrayLike,
18 reduce_dims: FlexibleDimensionTypes = None,
19 preserve_dims: FlexibleDimensionTypes = None,
20 weights: Optional[xr.DataArray] = None,
21 check_args: Optional[bool] = True,
22) -> XarrayLike:
23 """
24 Calculates the Probability of Detection (POD), also known as the Hit Rate.
25 This is the proportion of observed events (obs = 1) that were correctly
26 forecast as an event (fcst = 1).
28 Args:
29 fcst: An array containing binary values in the set {0, 1, np.nan}
30 obs: An array containing binary values in the set {0, 1, np.nan}
31 reduce_dims: Optionally specify which dimensions to sum when
32 calculating the POD. All other dimensions will be not summed. As a
33 special case, 'all' will allow all dimensions to be summed. Only one
34 of `reduce_dims` and `preserve_dims` can be supplied. The default behaviour
35 if neither are supplied is to sum across all dims.
36 preserve_dims: Optionally specify which dimensions to not sum
37 when calculating the POD. All other dimensions will be summed.
38 As a special case, 'all' will allow all dimensions to be
39 not summed. In this case, the result will be in the same
40 shape/dimensionality as the forecast, and the errors will be
41 the POD score at each point (i.e. single-value comparison
42 against observed), and the forecast and observed dimensions
43 must match precisely. Only one of `reduce_dims` and `preserve_dims` can be
44 supplied. The default behaviour if neither are supplied is to reduce all dims.
45 weights: Optionally provide an array for weighted averaging (e.g. by area, by latitude,
46 by population, custom)
47 check_args: Checks if `fcst and `obs` data only contains values in the set
48 {0, 1, np.nan}. You may want to skip this check if you are sure about your
49 input data and want to improve the performance when working with dask.
51 Returns:
52 A DataArray of the Probability of Detection.
54 Raises:
55 ValueError: if there are values in `fcst` and `obs` that are not in the
56 set {0, 1, np.nan} and `check_args` is true.
58 """
59 # fcst & obs must be 0s and 1s
60 if check_args:
61 check_binary(fcst, "fcst")
62 check_binary(obs, "obs")
63 dims_to_sum = gather_dimensions(fcst.dims, obs.dims, reduce_dims, preserve_dims)
65 misses = (obs == 1) & (fcst == 0)
66 hits = (obs == 1) & (fcst == 1)
68 # preserve NaNs
69 misses = misses.where((~np.isnan(fcst)) & (~np.isnan(obs)))
70 hits = hits.where((~np.isnan(fcst)) & (~np.isnan(obs)))
72 misses = apply_weights(misses, weights)
73 hits = apply_weights(hits, weights)
75 misses = misses.sum(dim=dims_to_sum)
76 hits = hits.sum(dim=dims_to_sum)
78 pod = hits / (hits + misses)
79 return pod
82def probability_of_false_detection(
83 fcst: XarrayLike,
84 obs: XarrayLike,
85 reduce_dims: FlexibleDimensionTypes = None,
86 preserve_dims: FlexibleDimensionTypes = None,
87 weights: Optional[xr.DataArray] = None,
88 check_args: Optional[bool] = True,
89) -> XarrayLike:
90 """
91 Calculates the Probability of False Detection (POFD), also known as False
92 Alarm Rate (not to be confused with the False Alarm Ratio). The POFD is
93 the proportion of observed non-events (obs = 0) that were incorrectly
94 forecast as event (i.e. fcst = 1).
96 Args:
97 fcst: An array containing binary values in the set {0, 1, np.nan}
98 obs: An array containing binary values in the set {0, 1, np.nan}
99 reduce_dims: Optionally specify which dimensions to sum when
100 calculating the POFD. All other dimensions will be not summed. As a
101 special case, 'all' will allow all dimensions to be summed. Only one
102 of `reduce_dims` and `preserve_dims` can be supplied. The default behaviour
103 if neither are supplied is to sum across all dims.
104 preserve_dims: Optionally specify which dimensions to not sum
105 when calculating the POFD. All other dimensions will be summed.
106 As a special case, 'all' will allow all dimensions to be
107 not summed. In this case, the result will be in the same
108 shape/dimensionality as the forecast, and the errors will be
109 the POD score at each point (i.e. single-value comparison
110 against observed), and the forecast and observed dimensions
111 must match precisely. Only one of `reduce_dims` and `preserve_dims` can be
112 supplied. The default behaviour if neither are supplied is to reduce all dims.
113 weights: Optionally provide an array for weighted averaging (e.g. by area, by latitude,
114 by population, custom)
115 check_args: Checks if `fcst and `obs` data only contains values in the set
116 {0, 1, np.nan}. You may want to skip this check if you are sure about your
117 input data and want to improve the performance when working with dask.
119 Returns:
120 A DataArray of the Probability of False Detection.
122 Raises:
123 ValueError: if there are values in `fcst` and `obs` that are not in the
124 set {0, 1, np.nan} and `check_args` is true.
125 """
126 # fcst & obs must be 0s and 1s
127 if check_args:
128 check_binary(fcst, "fcst")
129 check_binary(obs, "obs")
130 dims_to_sum = gather_dimensions(fcst.dims, obs.dims, reduce_dims, preserve_dims)
132 false_alarms = (obs == 0) & (fcst == 1)
133 correct_negatives = (obs == 0) & (fcst == 0)
135 # preserve NaNs
136 false_alarms = false_alarms.where((~np.isnan(fcst)) & (~np.isnan(obs)))
137 correct_negatives = correct_negatives.where((~np.isnan(fcst)) & (~np.isnan(obs)))
139 false_alarms = apply_weights(false_alarms, weights)
140 correct_negatives = apply_weights(correct_negatives, weights)
142 false_alarms = false_alarms.sum(dim=dims_to_sum)
143 correct_negatives = correct_negatives.sum(dim=dims_to_sum)
145 pofd = false_alarms / (false_alarms + correct_negatives)
146 return pofd