Coverage for C:\src\imod-python\imod\evaluate\head.py: 91%
89 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-08 13:27 +0200
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-08 13:27 +0200
1import warnings
3import numpy as np
4import pandas as pd
5import xarray as xr
8def convert_pointwaterhead_freshwaterhead(
9 pointwaterhead, density, elevation, density_fresh=1000.0
10):
11 r"""Function to convert point water head (as outputted by seawat)
12 into freshwater head, using Eq.3 from Guo, W., & Langevin, C. D. (2002):
14 .. math:: h_{f}=\frac{\rho}{\rho_{f}}h-\frac{\rho-\rho_{f}}{\rho_{f}}Z
16 An edge case arises when the head is below the cell centre, or entirely below
17 the cell. Strictly applying Eq.3 would result in freshwater heads that are
18 lower than the original point water head, which is physically impossible. This
19 function then outputs the freshwaterhead for the uppermost underlying cell where
20 the original point water head exceeds the cell centre.
22 Parameters
23 ----------
24 pointwaterhead : float or xr.DataArray of floats
25 the point water head as outputted by SEAWAT, in m.
26 density : float or xr.DataArray of floats
27 the water density at the same locations as `pointwaterhead`.
28 elevation : float or xr.DataArray of floats
29 elevation at the same locations as `pointwaterhead`, in m.
30 density_fresh : float, optional
31 the density of freshwater (1000 kg/m3), or a different value if
32 different units are used, or a different density reference is required.
34 Returns
35 -------
36 freshwaterhead : float or xr.DataArray of floats
37 """
39 freshwaterhead = (
40 density / density_fresh * pointwaterhead
41 - (density - density_fresh) / density_fresh * elevation
42 )
44 # edge case: point water head below z
45 # return freshwater head of top underlying cell where elevation < pointwaterhead
46 # only for xr.DataArrays
47 if isinstance(pointwaterhead, xr.DataArray) and "layer" in pointwaterhead.dims:
48 freshwaterhead = freshwaterhead.where(pointwaterhead > elevation)
49 freshwaterhead = freshwaterhead.bfill(dim="layer")
51 return freshwaterhead
54def _calculate_gxg(
55 head_bimonthly: xr.DataArray, below_surfacelevel: bool = False
56) -> xr.DataArray:
57 import bottleneck as bn
59 # Most efficient way of finding the three highest and three lowest is via a
60 # partition. See:
61 # https://bottleneck.readthedocs.io/en/latest/reference.html#bottleneck.partition
63 def lowest3_mean(da: xr.DataArray):
64 a = bn.partition(da.values, kth=2, axis=-1)
66 with warnings.catch_warnings():
67 warnings.simplefilter("ignore", category=RuntimeWarning)
68 result = np.nanmean(a[..., :3], axis=-1)
70 template = da.isel(bimonth=0)
71 return template.copy(data=result)
73 def highest3_mean(da: xr.DataArray):
74 a = bn.partition(-da.values, kth=2, axis=-1)
76 with warnings.catch_warnings():
77 warnings.simplefilter("ignore", category=RuntimeWarning)
78 result = np.nanmean(-a[..., :3], axis=-1)
80 template = da.isel(bimonth=0)
81 return template.copy(data=result)
83 timesize = head_bimonthly["time"].size
84 if timesize % 24 != 0:
85 raise ValueError("head is not bimonthly for a full set of years")
86 n_year = int(timesize / 24)
88 # First and second date of March: 4, 5; first date of April: 6.
89 month_index = np.array([4, 5, 6])
90 # Repeat this for every year in dataset, and increment by 24 per repetition.
91 yearly_increments = (np.arange(n_year) * 24)[:, np.newaxis]
92 # Broadcast to a full set
93 gvg_index = xr.DataArray(
94 data=(month_index + yearly_increments), dims=("hydroyear", "bimonth")
95 )
96 gvg_data = head_bimonthly.isel(time=gvg_index)
97 # Filters years without 3 available measurments.
98 gvg_years = gvg_data.count("bimonth") == 3
99 gvg_data = gvg_data.where(gvg_years)
101 # Hydrological years: running from 1 April to 1 April in the Netherlands.
102 # Increment run from April (6th date) to April (30th date) for every year.
103 # Broadcast to a full set
104 newdims = ("hydroyear", "bimonth")
105 gxg_index = xr.DataArray(
106 data=(np.arange(6, 30) + yearly_increments[:-1]),
107 dims=newdims,
108 )
109 gxg_data = head_bimonthly.isel(time=gxg_index)
110 dims = [dim for dim in gxg_data.dims if dim not in newdims]
111 dims.extend(newdims)
112 gxg_data = gxg_data.transpose(*dims)
114 # Filter years without 24 measurements.
115 gxg_years = gxg_data.count("bimonth") == 24
116 gxg_data = gxg_data.where(gxg_years)
118 # First compute LG3 and HG3 per hydrological year, then compute the mean over the total.
119 if gxg_data.chunks is not None:
120 # If data is lazily loaded/chunked, process data of one year at a time.
121 gxg_data = gxg_data.chunk({"hydroyear": 1})
122 lg3 = xr.map_blocks(lowest3_mean, gxg_data, template=gxg_data.isel(bimonth=0))
123 hg3 = xr.map_blocks(highest3_mean, gxg_data, template=gxg_data.isel(bimonth=0))
124 else:
125 # Otherwise, just compute it in a single go.
126 lg3 = lowest3_mean(gxg_data)
127 hg3 = highest3_mean(gxg_data)
129 gxg = xr.Dataset()
130 gxg["gvg"] = gvg_data.mean(("hydroyear", "bimonth"))
132 ghg = hg3.mean("hydroyear")
133 glg = lg3.mean("hydroyear")
134 if below_surfacelevel:
135 gxg["glg"] = ghg
136 gxg["ghg"] = glg
137 else:
138 gxg["glg"] = glg
139 gxg["ghg"] = ghg
141 # Add the numbers of years used in the calculation
142 gxg["n_years_gvg"] = gvg_years.sum("hydroyear")
143 gxg["n_years_gxg"] = gxg_years.sum("hydroyear")
144 return gxg
147def calculate_gxg_points(
148 df: pd.DataFrame,
149 id: str = "id",
150 time: str = "time",
151 head: str = "head",
152 below_surfacelevel: bool = False,
153 tolerance: pd.Timedelta = pd.Timedelta(days=7),
154) -> pd.DataFrame:
155 """
156 Calculate GxG groundwater characteristics from head time series.
158 GLG and GHG (average lowest and average highest groundwater level respectively) are
159 calculated as the average of the three lowest (GLG) or highest (GHG) head values per
160 Dutch hydrological year (april - april), for head values measured at a semi-monthly frequency
161 (14th and 28th of every month). GVG (average spring groundwater level) is calculated as
162 the average of groundwater level on 14th and 28th of March, and 14th of April. Supplied head
163 values are resampled (nearest) to the 14/28 frequency.
165 Hydrological years without all 24 14/28 dates present are discarded for glg and ghg.
166 Years without the 3 dates for gvg are discarded.
168 Parameters
169 ----------
170 df: pd.DataFrame
171 Dataframe containing the piezometer IDs, dates of measurement, and
172 measured heads.
173 id: str
174 Column name of piezometer ID.
175 time: str
176 Column name of datetime.
177 head: str
178 Column name of head measurement.
179 below_surfacelevel: bool
180 False (default) if heads are relative to a datum (e.g. sea level). If
181 True, heads are taken as m below surface level.
182 tolerance: pd.Timedelta, default: 7 days.
183 Maximum time window allowed when searching for dates around the 14th
184 and 28th of every month.
186 Returns
187 -------
188 gxg : pd.DataFrame
189 Dataframe containing ``glg``: average lowest head, ``ghg``: average
190 highest head, ``gvg``: average spring head, ``n_years_gvg``: numbers of
191 years used for gvg, ``n_years_gxg``: numbers of years used for glg and
192 ghg.
194 Examples
195 --------
197 Read some IPF data and compute the GxG values, while specifying the
198 (non-standard) column names:
200 >>> import imod
201 >>> df = imod.ipf.read("piezometers.ipf")
202 >>> gxg = imod.evaluate.calculate_gxg_points(
203 >>> df=df,
204 >>> id="ID-column",
205 >>> time="Date",
206 >>> head="Piezometer head (m)",
207 >>> )
208 """
210 def bimonthly(series: pd.Series, dates: pd.DatetimeIndex, tolerance: pd.Timedelta):
211 series = series[~series.index.duplicated(keep="first")]
212 return series.reindex(dates, method="nearest", tolerance=tolerance)
214 if not isinstance(df, pd.DataFrame):
215 raise TypeError(f"df must be a DataFrame, received: {type(df).__name__}")
216 for name in (id, time, head):
217 if name not in df:
218 raise ValueError(f"Column {name} not present in dataframe")
220 # Create a bi-monthly date range
221 start = f"{df[time].min().year}-01-01"
222 end = f"{df[time].max().year}-12-31"
223 dates = pd.date_range(
224 start=start, end=end, freq="SMS", name="time"
225 ) + pd.DateOffset(days=13)
227 # Convert for every location the time series to the same date range. This
228 # forms a rectangular array which we can represent directly as a DataArray.
229 bimonthly_series = (
230 df.set_index(time).groupby(id)[head].apply(bimonthly, dates, tolerance)
231 )
232 head_bimonthly = bimonthly_series.to_xarray()
234 # Calculate GXG values per location
235 gxg = _calculate_gxg(head_bimonthly, below_surfacelevel)
236 # Transform back to a DataFrame
237 return gxg.to_dataframe()
240def calculate_gxg(
241 head: xr.DataArray,
242 below_surfacelevel: bool = False,
243 tolerance: pd.Timedelta = pd.Timedelta(days=7),
244) -> xr.DataArray:
245 """
246 Calculate GxG groundwater characteristics from head time series.
248 GLG and GHG (average lowest and average highest groundwater level respectively) are
249 calculated as the average of the three lowest (GLG) or highest (GHG) head values per
250 Dutch hydrological year (april - april), for head values measured at a semi-monthly frequency
251 (14th and 28th of every month). GVG (average spring groundwater level) is calculated as
252 the average of groundwater level on 14th and 28th of March, and 14th of April. Supplied head
253 values are resampled (nearest) to the 14/28 frequency.
255 Hydrological years without all 24 14/28 dates present are discarded for glg and ghg.
256 Years without the 3 dates for gvg are discarded.
258 Parameters
259 ----------
260 head : xr.DataArray of floats
261 Head relative to sea level, in m, or m below surface level if `below_surfacelevel` is
262 set to True. Must be of dimensions ``("time", "y", "x")``.
263 below_surfacelevel : boolean, optional, default: False.
264 False (default) if heads are relative to a datum (e.g. sea level). If
265 True, heads are taken as m below surface level.
266 tolerance: pd.Timedelta, default: 7 days.
267 Maximum time window allowed when searching for dates around the 14th
268 and 28th of every month.
270 Returns
271 -------
272 gxg : xr.Dataset
273 Dataset containing ``glg``: average lowest head, ``ghg``: average
274 highest head, ``gvg``: average spring head, ``n_years_gvg``: numbers of
275 years used for gvg, ``n_years_gxg``: numbers of years used for glg and
276 ghg.
278 Examples
279 --------
280 Load the heads, and calculate groundwater characteristics after the year 2000:
282 >>> import imod
283 >>> heads = imod.idf.open("head*.idf")
284 >>> heads = heads.sel(time=heads.time.dt.year >= 2000, layer=1)
285 >>> gxg = imod.evaluate.calculate_gxg(heads)
287 Transform to meters below surface level by substracting from surface level:
289 >>> surflevel = imod.idf.open("surfacelevel.idf")
290 >>> gxg = surflevel - gxg
292 Or calculate from groundwater level relative to surface level directly:
294 >>> gwl = surflevel - heads
295 >>> gxg = imod.evaluate.calculate_gxg(gwl, below_surfacelevel=True)
297 """
298 if not head.dims == ("time", "y", "x"):
299 raise ValueError('Dimensions must be ("time", "y", "x")')
300 if not np.issubdtype(head["time"].dtype, np.datetime64):
301 raise ValueError("Time must have dtype numpy datetime64")
303 # Reindex to GxG frequency date_range: every 14th and 28th of the month.
304 start = f"{int(head['time'][0].dt.year)}-01-01"
305 end = f"{int(head['time'][-1].dt.year)}-12-31"
306 dates = pd.date_range(start=start, end=end, freq="SMS") + pd.DateOffset(days=13)
307 head_bimonthly = head.reindex(time=dates, method="nearest", tolerance=tolerance)
309 gxg = _calculate_gxg(head_bimonthly, below_surfacelevel)
310 return gxg