Coverage for src/scores/categorical/binary_impl.py: 100%

37 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2024-02-28 12:51 +1100

1""" 

2This module contains methods for binary categories 

3""" 

4from typing import Optional 

5 

6import numpy as np 

7import xarray as xr 

8 

9from scores.functions import apply_weights 

10from scores.processing import check_binary 

11from scores.typing import FlexibleDimensionTypes, XarrayLike 

12from scores.utils import gather_dimensions 

13 

14 

15def probability_of_detection( 

16 fcst: XarrayLike, 

17 obs: XarrayLike, 

18 reduce_dims: FlexibleDimensionTypes = None, 

19 preserve_dims: FlexibleDimensionTypes = None, 

20 weights: Optional[xr.DataArray] = None, 

21 check_args: Optional[bool] = True, 

22) -> XarrayLike: 

23 """ 

24 Calculates the Probability of Detection (POD), also known as the Hit Rate. 

25 This is the proportion of observed events (obs = 1) that were correctly 

26 forecast as an event (fcst = 1). 

27 

28 Args: 

29 fcst: An array containing binary values in the set {0, 1, np.nan} 

30 obs: An array containing binary values in the set {0, 1, np.nan} 

31 reduce_dims: Optionally specify which dimensions to sum when 

32 calculating the POD. All other dimensions will be not summed. As a 

33 special case, 'all' will allow all dimensions to be summed. Only one 

34 of `reduce_dims` and `preserve_dims` can be supplied. The default behaviour 

35 if neither are supplied is to sum across all dims. 

36 preserve_dims: Optionally specify which dimensions to not sum 

37 when calculating the POD. All other dimensions will be summed. 

38 As a special case, 'all' will allow all dimensions to be 

39 not summed. In this case, the result will be in the same 

40 shape/dimensionality as the forecast, and the errors will be 

41 the POD score at each point (i.e. single-value comparison 

42 against observed), and the forecast and observed dimensions 

43 must match precisely. Only one of `reduce_dims` and `preserve_dims` can be 

44 supplied. The default behaviour if neither are supplied is to reduce all dims. 

45 weights: Optionally provide an array for weighted averaging (e.g. by area, by latitude, 

46 by population, custom) 

47 check_args: Checks if `fcst and `obs` data only contains values in the set 

48 {0, 1, np.nan}. You may want to skip this check if you are sure about your 

49 input data and want to improve the performance when working with dask. 

50 

51 Returns: 

52 A DataArray of the Probability of Detection. 

53 

54 Raises: 

55 ValueError: if there are values in `fcst` and `obs` that are not in the 

56 set {0, 1, np.nan} and `check_args` is true. 

57 

58 """ 

59 # fcst & obs must be 0s and 1s 

60 if check_args: 

61 check_binary(fcst, "fcst") 

62 check_binary(obs, "obs") 

63 dims_to_sum = gather_dimensions(fcst.dims, obs.dims, reduce_dims, preserve_dims) 

64 

65 misses = (obs == 1) & (fcst == 0) 

66 hits = (obs == 1) & (fcst == 1) 

67 

68 # preserve NaNs 

69 misses = misses.where((~np.isnan(fcst)) & (~np.isnan(obs))) 

70 hits = hits.where((~np.isnan(fcst)) & (~np.isnan(obs))) 

71 

72 misses = apply_weights(misses, weights) 

73 hits = apply_weights(hits, weights) 

74 

75 misses = misses.sum(dim=dims_to_sum) 

76 hits = hits.sum(dim=dims_to_sum) 

77 

78 pod = hits / (hits + misses) 

79 return pod 

80 

81 

82def probability_of_false_detection( 

83 fcst: XarrayLike, 

84 obs: XarrayLike, 

85 reduce_dims: FlexibleDimensionTypes = None, 

86 preserve_dims: FlexibleDimensionTypes = None, 

87 weights: Optional[xr.DataArray] = None, 

88 check_args: Optional[bool] = True, 

89) -> XarrayLike: 

90 """ 

91 Calculates the Probability of False Detection (POFD), also known as False 

92 Alarm Rate (not to be confused with the False Alarm Ratio). The POFD is 

93 the proportion of observed non-events (obs = 0) that were incorrectly 

94 forecast as event (i.e. fcst = 1). 

95 

96 Args: 

97 fcst: An array containing binary values in the set {0, 1, np.nan} 

98 obs: An array containing binary values in the set {0, 1, np.nan} 

99 reduce_dims: Optionally specify which dimensions to sum when 

100 calculating the POFD. All other dimensions will be not summed. As a 

101 special case, 'all' will allow all dimensions to be summed. Only one 

102 of `reduce_dims` and `preserve_dims` can be supplied. The default behaviour 

103 if neither are supplied is to sum across all dims. 

104 preserve_dims: Optionally specify which dimensions to not sum 

105 when calculating the POFD. All other dimensions will be summed. 

106 As a special case, 'all' will allow all dimensions to be 

107 not summed. In this case, the result will be in the same 

108 shape/dimensionality as the forecast, and the errors will be 

109 the POD score at each point (i.e. single-value comparison 

110 against observed), and the forecast and observed dimensions 

111 must match precisely. Only one of `reduce_dims` and `preserve_dims` can be 

112 supplied. The default behaviour if neither are supplied is to reduce all dims. 

113 weights: Optionally provide an array for weighted averaging (e.g. by area, by latitude, 

114 by population, custom) 

115 check_args: Checks if `fcst and `obs` data only contains values in the set 

116 {0, 1, np.nan}. You may want to skip this check if you are sure about your 

117 input data and want to improve the performance when working with dask. 

118 

119 Returns: 

120 A DataArray of the Probability of False Detection. 

121 

122 Raises: 

123 ValueError: if there are values in `fcst` and `obs` that are not in the 

124 set {0, 1, np.nan} and `check_args` is true. 

125 """ 

126 # fcst & obs must be 0s and 1s 

127 if check_args: 

128 check_binary(fcst, "fcst") 

129 check_binary(obs, "obs") 

130 dims_to_sum = gather_dimensions(fcst.dims, obs.dims, reduce_dims, preserve_dims) 

131 

132 false_alarms = (obs == 0) & (fcst == 1) 

133 correct_negatives = (obs == 0) & (fcst == 0) 

134 

135 # preserve NaNs 

136 false_alarms = false_alarms.where((~np.isnan(fcst)) & (~np.isnan(obs))) 

137 correct_negatives = correct_negatives.where((~np.isnan(fcst)) & (~np.isnan(obs))) 

138 

139 false_alarms = apply_weights(false_alarms, weights) 

140 correct_negatives = apply_weights(correct_negatives, weights) 

141 

142 false_alarms = false_alarms.sum(dim=dims_to_sum) 

143 correct_negatives = correct_negatives.sum(dim=dims_to_sum) 

144 

145 pofd = false_alarms / (false_alarms + correct_negatives) 

146 return pofd