Coverage for src/scores/sample_data.py: 100%

45 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2024-02-28 12:51 +1100

1""" 

2Module to generate simple sample data for users (not tests). Supports tutorials and demos. 

3""" 

4 

5import numpy as np 

6import pandas as pd 

7import xarray as xr 

8from scipy.stats import skewnorm 

9 

10 

11def simple_forecast() -> xr.DataArray: 

12 """Generate a simple series of prediction values""" 

13 return xr.DataArray(data=[10, 10, 11, 13, 14, 17, 15, 14]) 

14 

15 

16def simple_observations() -> xr.DataArray: 

17 """Generate a simple series of observation values""" 

18 return xr.DataArray(data=[11, 11, 12, 14, 11, 14, 12, 11]) 

19 

20 

21def continuous_observations(large_size: bool = False) -> xr.DataArray: 

22 """Creates a obs array with continuous values. 

23 

24 Args: 

25 large_size (bool): If True, then returns a large global array with ~0.5 degree 

26 grid spacing, otherwise returns a cut down, lower resolution array. 

27 

28 Returns: 

29 xr.Datarray: Containing synthetic observation data. 

30 """ 

31 

32 num_lats = 10 

33 num_lons = 20 

34 periods = 10 

35 

36 if large_size: # pragma: no cover 

37 num_lats = 364 # pragma: no cover - used in notebooks and tested manually 

38 num_lons = 720 # pragma: no cover - used in notebooks and tested manually 

39 periods = 240 # pragma: no cover - used in notebooks and tested manually 

40 

41 lat = np.linspace(-90, 90, num_lats) 

42 lon = np.linspace(0, 360, num_lons) 

43 time_series = pd.date_range( 

44 start="2022-11-20T01:00:00.000000000", 

45 freq="1H", 

46 periods=periods, 

47 ) 

48 

49 np.random.seed(42) 

50 data = 10 * np.random.rand(len(lat), len(lon), len(time_series)) 

51 obs = xr.DataArray(coords={"lat": lat, "lon": lon, "time": time_series}, data=data) 

52 

53 return obs 

54 

55 

56def continuous_forecast(large_size: bool = False, lead_days: bool = False) -> xr.DataArray: 

57 """Creates a forecast array with continuous values. 

58 

59 Args: 

60 large_size (bool): If True, then returns a large global array with ~0.5 degree 

61 grid spacing, otherwise returns a cut down, lower resolution array. 

62 lead_days (bool): If True, returns an array with a "lead_day" dimension. 

63 

64 Returns: 

65 xr.Datarray: Containing synthetic forecast data. 

66 """ 

67 obs = continuous_observations(large_size) 

68 np.random.seed(42) 

69 forecast = obs + np.random.normal(0, 2, obs.shape) 

70 if lead_days: 

71 forecast2 = obs + np.random.normal(0, 3, obs.shape) 

72 forecast = xr.concat([forecast, forecast2], dim="lead_time") 

73 forecast = forecast.assign_coords(lead_time=[1, 2]) 

74 return forecast 

75 

76 

77def cdf_forecast(lead_days: bool = False) -> xr.DataArray: 

78 """ 

79 Creates a forecast array with a CDF at each point. 

80 

81 Args: 

82 lead_days (bool): If True, returns an array with a "lead_day" dimension. 

83 

84 Returns: 

85 xr.Datarray: Containing synthetic CDF forecast data. 

86 """ 

87 x = np.arange(0, 10, 0.1) 

88 cdf_list = [] 

89 

90 if lead_days: 

91 for _ in np.arange(0, 16): 

92 cdf_list.append(skewnorm.cdf(x, a=10, loc=2, scale=2)) 

93 cdfs = np.reshape(cdf_list, (2, 2, 2, 2, 100)) 

94 forecast = xr.DataArray( 

95 coords={ 

96 "x": [10, 20], 

97 "y": [30, 40], 

98 "time": [10, 20], 

99 "lead_day": pd.date_range("2022-01-01", "2022-01-02"), 

100 "threshold": x, 

101 }, 

102 data=cdfs, 

103 ) 

104 else: 

105 for _ in np.arange(0, 8): 

106 cdf_list.append(skewnorm.cdf(x, a=10, loc=2, scale=2)) 

107 cdfs = np.reshape(cdf_list, (2, 2, 2, 100)) 

108 forecast = xr.DataArray( 

109 coords={ 

110 "x": [10, 20], 

111 "y": [30, 40], 

112 "time": [10, 20], 

113 "threshold": x, 

114 }, 

115 data=cdfs, 

116 ) 

117 

118 return forecast 

119 

120 

121def cdf_observations() -> xr.DataArray: 

122 """ 

123 Creates an obs array to use with `cdf_forecast`. 

124 

125 Returns: 

126 xr.Datarray: Containing synthetic observations betwen 0 and 9.9 

127 """ 

128 np.random.seed(42) 

129 obs = xr.DataArray( 

130 coords={ 

131 "x": [10, 20], 

132 "y": [30, 40], 

133 "time": [10, 20], 

134 }, 

135 data=10 * np.random.uniform(high=9.9, size=(2, 2, 2)), 

136 ) 

137 return obs