Coverage for src/scores/sample_data.py: 100%
45 statements
« prev ^ index » next coverage.py v7.3.2, created at 2024-02-28 12:51 +1100
« prev ^ index » next coverage.py v7.3.2, created at 2024-02-28 12:51 +1100
1"""
2Module to generate simple sample data for users (not tests). Supports tutorials and demos.
3"""
5import numpy as np
6import pandas as pd
7import xarray as xr
8from scipy.stats import skewnorm
11def simple_forecast() -> xr.DataArray:
12 """Generate a simple series of prediction values"""
13 return xr.DataArray(data=[10, 10, 11, 13, 14, 17, 15, 14])
16def simple_observations() -> xr.DataArray:
17 """Generate a simple series of observation values"""
18 return xr.DataArray(data=[11, 11, 12, 14, 11, 14, 12, 11])
21def continuous_observations(large_size: bool = False) -> xr.DataArray:
22 """Creates a obs array with continuous values.
24 Args:
25 large_size (bool): If True, then returns a large global array with ~0.5 degree
26 grid spacing, otherwise returns a cut down, lower resolution array.
28 Returns:
29 xr.Datarray: Containing synthetic observation data.
30 """
32 num_lats = 10
33 num_lons = 20
34 periods = 10
36 if large_size: # pragma: no cover
37 num_lats = 364 # pragma: no cover - used in notebooks and tested manually
38 num_lons = 720 # pragma: no cover - used in notebooks and tested manually
39 periods = 240 # pragma: no cover - used in notebooks and tested manually
41 lat = np.linspace(-90, 90, num_lats)
42 lon = np.linspace(0, 360, num_lons)
43 time_series = pd.date_range(
44 start="2022-11-20T01:00:00.000000000",
45 freq="1H",
46 periods=periods,
47 )
49 np.random.seed(42)
50 data = 10 * np.random.rand(len(lat), len(lon), len(time_series))
51 obs = xr.DataArray(coords={"lat": lat, "lon": lon, "time": time_series}, data=data)
53 return obs
56def continuous_forecast(large_size: bool = False, lead_days: bool = False) -> xr.DataArray:
57 """Creates a forecast array with continuous values.
59 Args:
60 large_size (bool): If True, then returns a large global array with ~0.5 degree
61 grid spacing, otherwise returns a cut down, lower resolution array.
62 lead_days (bool): If True, returns an array with a "lead_day" dimension.
64 Returns:
65 xr.Datarray: Containing synthetic forecast data.
66 """
67 obs = continuous_observations(large_size)
68 np.random.seed(42)
69 forecast = obs + np.random.normal(0, 2, obs.shape)
70 if lead_days:
71 forecast2 = obs + np.random.normal(0, 3, obs.shape)
72 forecast = xr.concat([forecast, forecast2], dim="lead_time")
73 forecast = forecast.assign_coords(lead_time=[1, 2])
74 return forecast
77def cdf_forecast(lead_days: bool = False) -> xr.DataArray:
78 """
79 Creates a forecast array with a CDF at each point.
81 Args:
82 lead_days (bool): If True, returns an array with a "lead_day" dimension.
84 Returns:
85 xr.Datarray: Containing synthetic CDF forecast data.
86 """
87 x = np.arange(0, 10, 0.1)
88 cdf_list = []
90 if lead_days:
91 for _ in np.arange(0, 16):
92 cdf_list.append(skewnorm.cdf(x, a=10, loc=2, scale=2))
93 cdfs = np.reshape(cdf_list, (2, 2, 2, 2, 100))
94 forecast = xr.DataArray(
95 coords={
96 "x": [10, 20],
97 "y": [30, 40],
98 "time": [10, 20],
99 "lead_day": pd.date_range("2022-01-01", "2022-01-02"),
100 "threshold": x,
101 },
102 data=cdfs,
103 )
104 else:
105 for _ in np.arange(0, 8):
106 cdf_list.append(skewnorm.cdf(x, a=10, loc=2, scale=2))
107 cdfs = np.reshape(cdf_list, (2, 2, 2, 100))
108 forecast = xr.DataArray(
109 coords={
110 "x": [10, 20],
111 "y": [30, 40],
112 "time": [10, 20],
113 "threshold": x,
114 },
115 data=cdfs,
116 )
118 return forecast
121def cdf_observations() -> xr.DataArray:
122 """
123 Creates an obs array to use with `cdf_forecast`.
125 Returns:
126 xr.Datarray: Containing synthetic observations betwen 0 and 9.9
127 """
128 np.random.seed(42)
129 obs = xr.DataArray(
130 coords={
131 "x": [10, 20],
132 "y": [30, 40],
133 "time": [10, 20],
134 },
135 data=10 * np.random.uniform(high=9.9, size=(2, 2, 2)),
136 )
137 return obs