Coverage for C:\src\imod-python\imod\formats\rasterio.py: 75%
169 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-08 13:27 +0200
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-08 13:27 +0200
1"""
2Functions that make use of `rasterio
3<https://rasterio.readthedocs.io/en/stable/>`_ for input and output to other
4raster formats.
6Currently only :func:`imod.rasterio.write` is implemented.
7"""
9import pathlib
10import warnings
11from typing import Dict
13import numpy as np
14import pandas as pd
16import imod
17from imod.formats import array_io
18from imod.util.imports import MissingOptionalModule
20# since rasterio is a big dependency that is sometimes hard to install
21# and not always required, we made this an optional dependency
22try:
23 import rasterio
24except ImportError:
25 rasterio = MissingOptionalModule("rasterio")
27f_open = open
30# Based on this comment
31# https://github.com/mapbox/rasterio/issues/265#issuecomment-367044836
32def _create_ext_driver_code_map():
33 from osgeo import gdal
35 if hasattr(gdal, "DCAP_RASTER"):
37 def _check_driver(drv):
38 return drv.GetMetadataItem(gdal.DCAP_RASTER)
40 else:
42 def _check_driver(drv):
43 return True
45 output = {}
46 for i in range(gdal.GetDriverCount()):
47 drv = gdal.GetDriver(i)
48 if _check_driver(drv):
49 if drv.GetMetadataItem(gdal.DCAP_CREATE) or drv.GetMetadataItem(
50 gdal.DCAP_CREATECOPY
51 ):
52 ext = drv.GetMetadataItem(gdal.DMD_EXTENSION)
53 if ext is not None and len(ext) > 0:
54 output[drv.GetMetadataItem(gdal.DMD_EXTENSION)] = drv.ShortName
55 sortedkeys = sorted(output.keys())
56 output = {k: output[k] for k in sortedkeys}
57 return output
60# tiff and jpeg keys have been added manually.
61EXTENSION_GDAL_DRIVER_CODE_MAP = {
62 "asc": "AAIGrid",
63 "bag": "BAG",
64 "bil": "EHdr",
65 "blx": "BLX",
66 "bmp": "BMP",
67 "bt": "BT",
68 "dat": "ZMap",
69 "dem": "USGSDEM",
70 "ers": "ERS",
71 "gen": "ADRG",
72 "gif": "GIF",
73 "gpkg": "GPKG",
74 "grd": "NWT_GRD",
75 "gsb": "NTv2",
76 "gtx": "GTX",
77 "hdr": "MFF",
78 "hf2": "HF2",
79 "hgt": "SRTMHGT",
80 "img": "HFA",
81 "jp2": "JP2OpenJPEG",
82 "jpg": "JPEG",
83 "jpeg": "JPEG",
84 "kea": "KEA",
85 "kro": "KRO",
86 "lcp": "LCP",
87 "map": "PCRaster",
88 "mbtiles": "MBTiles",
89 "mrf": "MRF",
90 "nc": "netCDF",
91 "ntf": "NITF",
92 "pdf": "PDF",
93 "pix": "PCIDSK",
94 "png": "PNG",
95 "rda": "R",
96 "rgb": "SGI",
97 "rst": "RST",
98 "rsw": "RMF",
99 "sigdem": "SIGDEM",
100 "sqlite": "Rasterlite",
101 "ter": "Terragen",
102 "tif": "GTiff",
103 "tiff": "GTiff",
104 "vrt": "VRT",
105 "xml": "PDS4",
106 "xpm": "XPM",
107 "xyz": "XYZ",
108}
111def _get_driver(path):
112 ext = path.suffix.lower()[1:] # skip the period
113 try:
114 return EXTENSION_GDAL_DRIVER_CODE_MAP[ext]
115 except KeyError:
116 raise ValueError(
117 f'Unknown extension "{ext}", available extensions: '
118 f'{", ".join(EXTENSION_GDAL_DRIVER_CODE_MAP.keys())}'
119 )
122def _limitations(riods, path):
123 if riods.count != 1:
124 raise NotImplementedError(
125 f"Cannot open multi-band grid: {path}. Try rioxarray instead."
126 )
127 if not riods.transform.is_rectilinear:
128 raise NotImplementedError(
129 f"Cannot open non-rectilinear grid: {path}. Try rioxarray instead."
130 )
133def header(path, pattern):
134 attrs = imod.util.path.decompose(path, pattern)
136 # TODO:
137 # Check bands, rotation, etc.
138 # Raise NotImplementedErrors and point to rioxarray.open_rasterio
139 with rasterio.open(path, "r") as riods:
140 _limitations(riods, path)
141 attrs["nrow"] = riods.height
142 attrs["ncol"] = riods.width
143 xmin, ymin, xmax, ymax = riods.bounds
144 attrs["dx"] = riods.transform[0]
145 attrs["dy"] = riods.transform[4]
146 attrs["nodata"] = riods.nodata
147 attrs["dtype"] = riods.dtypes[0]
148 crs = riods.crs
149 if crs is not None:
150 attrs["crs"] = crs
152 attrs["xmin"] = xmin
153 attrs["xmax"] = xmax
154 attrs["ymin"] = ymin
155 attrs["ymax"] = ymax
156 attrs["headersize"] = None
157 return attrs
160def _read(path, headersize, nrow, ncol, nodata, dtype):
161 with rasterio.open(path, "r") as dataset:
162 a = dataset.read(1)
164 # None signifies no replacement; skip if nodata already is nan
165 if (nodata is None) or np.isnan(nodata):
166 return a
167 # Only set nodata to nan if the dtype supports it
168 if (a.dtype == np.float64) or (a.dtype == np.float32):
169 return array_io.reading._to_nan(a, nodata)
170 else:
171 return a
174def read(path, use_cftime=False, pattern=None):
175 warnings.warn(
176 "The imod.rasterio.read() function is deprecated. Instead use imod.rasterio.open().",
177 FutureWarning,
178 )
179 return open(path, use_cftime=False, pattern=None).load()
182def open(path, use_cftime=False, pattern=None):
183 r"""
184 Open one or more GDAL supported raster files as an xarray.DataArray.
186 In accordance with xarray's design, ``open`` loads the data of the files
187 lazily. This means the data of the rasters are not loaded into memory until the
188 data is needed. This allows for easier handling of large datasets, and
189 more efficient computations.
191 Parameters
192 ----------
193 path : str, Path or list
194 This can be a single file, 'head_l1.tif', a glob pattern expansion,
195 'head_l*.tif', or a list of files, ['head_l1.tif', 'head_l2.tif'].
196 Note that each file needs to be of the same name (part before the
197 first underscore) but have a different layer and/or timestamp,
198 such that they can be combined in a single xarray.DataArray.
199 use_cftime : bool, optional
200 Use ``cftime.DatetimeProlepticGregorian`` instead of `np.datetime64[ns]`
201 for the time axis.
203 Dates are normally encoded as ``np.datetime64[ns]``; however, if dates
204 fall before 1678 or after 2261, they are automatically encoded as
205 ``cftime.DatetimeProlepticGregorian`` objects rather than
206 ``np.datetime64[ns]``.
207 pattern : str, regex pattern, optional
208 If the filenames do match default naming conventions of
209 {name}_{time}_l{layer}, a custom pattern can be defined here either
210 as a string, or as a compiled regular expression pattern. See the
211 examples below.
213 Returns
214 -------
215 xarray.DataArray
216 A float32 xarray.DataArray of the values in the raster file(s).
217 All metadata needed for writing the file to raster or other formats
218 using imod.rasterio are included in the xarray.DataArray.attrs.
220 Examples
221 --------
222 Open a raster file:
224 >>> da = imod.rasterio.open("example.tif")
226 Open a raster file, relying on default naming conventions to identify
227 layer:
229 >>> da = imod.rasterio.open("example_l1.tif")
231 Open an IDF file, relying on default naming conventions to identify layer
232 and time:
234 >>> head = imod.rasterio.open("head_20010101_l1.tif")
236 Open multiple files, in this case files for the year 2001 for all
237 layers, again relying on default conventions for naming:
239 >>> head = imod.rasterio.open("head_2001*_l*.tif")
241 The same, this time explicitly specifying ``name``, ``time``, and ``layer``:
243 >>> head = imod.rasterio.open("head_2001*_l*.tif", pattern="{name}_{time}_l{layer}")
245 The format string pattern will only work on tidy paths, where variables are
246 separated by underscores. You can also pass a compiled regex pattern.
247 Make sure to include the ``re.IGNORECASE`` flag since all paths are lowered.
249 >>> import re
250 >>> pattern = re.compile(r"(?P<name>[\w]+)L(?P<layer>[\d+]*)", re.IGNORECASE)
251 >>> head = imod.idf.open("headL11", pattern=pattern)
253 However, this requires constructing regular expressions, which is
254 generally a fiddly process. Regex notation is also impossible to
255 remember. The website https://regex101.com is a nice help. Alternatively,
256 the most pragmatic solution may be to just rename your files.
257 """
258 return array_io.reading._open(path, use_cftime, pattern, header, _read)
261def write_aaigrid(path: pathlib.Path, a: np.ndarray, profile: Dict) -> None:
262 """
263 Fall-back function to write ESRII ASCII grids even if rasterio is not
264 installed.
266 This function takes care to mimick the idiosyncracies of the GDAL driver.
268 Parameters
269 ----------
270 path: str or Path
271 path to the output raster
272 a: np.ndarray
273 The raster data.
274 profile: dict
275 The rasterio profile metadata.
276 """
277 dtype = profile["dtype"]
278 nodata = profile["nodata"]
279 df = pd.DataFrame(a.astype(dtype))
280 # GDAL writes white space before every line. pandas doesn't support this,
281 # but it will write nodata values -- encoded as NaN -- as white space.
282 df.index = np.full(len(df), np.nan)
283 if np.issubdtype(dtype, np.integer):
284 is_float = False
285 space = ""
286 fmt = "%d"
287 str_nodata = f"{int(nodata)}"
288 elif np.issubdtype(dtype, np.floating):
289 # For some reason, GDAL inserts a space before the nodata value if
290 # dtype is float.
291 is_float = True
292 space = " "
293 precision = profile.get("decimal_precision")
294 digits = profile.get("significant_digits")
295 # See: https://docs.python.org/3/library/string.html#formatspec
296 if precision is not None:
297 fmt = f"%.{precision}f"
298 str_nodata = f"{nodata:.{precision}f}"
299 elif digits is not None:
300 fmt = f"%.{digits}g"
301 str_nodata = f"{nodata:.{digits}g}"
302 else:
303 fmt = "%.20g"
304 str_nodata = f"{nodata:.20g}"
305 else:
306 raise TypeError(f"invalid dtype: {dtype}")
308 dx, _, xmin, _, dy, ymax = profile["transform"][:6]
309 ymin = ymax + profile["height"] * dy
311 header = (
312 f'ncols {profile["width"]}\n'
313 f'nrows {profile["height"]}\n'
314 f"xllcorner {xmin:.12f}\n"
315 f"yllcorner {ymin:.12f}\n"
316 f"cellsize {dx:.12f}\n"
317 f"NODATA_value {space}{str_nodata}\n"
318 )
320 # GDAL writes only a linefeed, not a carriage return. By default, Python
321 # adds a carriage return as well, on Windows. This is disabled by
322 # explicitly setting the newline argument.
323 with f_open(path, "w", newline="") as f:
324 f.write(header)
326 first = df.iloc[0, 0]
327 # is_float is the result of a typecheck above.
328 # is_integer() checks whether a float has a decimal fraction:
329 # (1.0).is_integer() -> True
330 # (1.1).is_integer() -> False
331 if is_float and first.is_integer():
332 # GDAL uses the "general" float (g) format by default. However, if
333 # the first value is a float without decimals, it will write a
334 # single trailing 0 for the first value, presumably to aid type
335 # inference when reading values back in. All subsequent values are
336 # written without decimals, however.
337 precision = profile.get("decimal_precision", 1)
338 # Write the first value, with trailing zero if needed.
339 f.write(f" {first:.{precision}f} ")
340 # Write remainder of first row. Since we've already written the
341 # first value with a space in front of it, we skip writing the NaN
342 # index value here.
343 df.iloc[[0], 1:].to_csv(
344 f, index=False, header=False, sep=" ", float_format=fmt
345 )
346 # Write all other rows.
347 df.iloc[1:].to_csv(f, index=True, header=False, sep=" ", float_format=fmt)
348 else:
349 df.to_csv(f, index=True, header=False, sep=" ", float_format=fmt)
351 return
354def write(path, da, driver=None, nodata=np.nan, dtype=None):
355 """Write ``xarray.DataArray`` to GDAL supported geospatial rasters using ``rasterio``.
357 Parameters
358 ----------
359 path: str or Path
360 path to the output raster
361 da: xarray DataArray
362 The DataArray to be written. Should have only x and y dimensions.
363 driver: str; optional
364 Which GDAL format driver to use. The complete list is at
365 https://gdal.org/drivers/raster/index.html.
366 By default tries to guess from the file extension.
367 nodata: float
368 Nodata value to use. Should be convertible to the DataArray and GDAL dtype.
369 Default value is np.nan
371 Examples
372 --------
373 Save ``xarray.DataArray`` in ASCII format:
375 >>> imod.rasterio.write("example.asc", da)
377 Save ``xarray.DataArray`` in ASCII format, with 6 significant digits:
379 >>> da.attrs['SIGNIFICANT_DIGITS'] = 6
380 >>> imod.rasterio.write("example.asc", da)
381 """
382 # Not directly related to iMOD, but provides a missing link, together
383 # with xioxarray.
384 # Note that this function can quickly become dstdated as
385 # the xarray rasterio connection matures, see for instance:
386 # https://github.com/pydata/xarray/issues/1736
387 # https://github.com/pydata/xarray/pull/1712
388 path = pathlib.Path(path)
389 profile = da.attrs.copy()
390 if driver is None:
391 driver = _get_driver(path)
393 flip = slice(None, None, -1)
394 if not da.indexes["x"].is_monotonic_increasing:
395 da = da.isel(x=flip)
396 if not da.indexes["y"].is_monotonic_decreasing:
397 da = da.isel(y=flip)
399 # Only try to fill data that can contains nan's
400 # Do this before casting to another type!
401 ignore_nodata = (nodata is None) or np.isnan(nodata)
402 if not ignore_nodata:
403 if (da.dtype == np.float32) or (da.dtype == np.float64):
404 # NaN is the default missing value in xarray
405 # None is different in that the raster won't have a nodata value
406 da = da.fillna(nodata)
408 # Cast to dtype if dtype is given
409 if dtype is not None:
410 if da.dtype != dtype:
411 da = da.astype(dtype)
413 # Cast to supported pcraster dtypes
414 if driver == "PCRaster":
415 if da.dtype == "float64":
416 da = da.astype("float32")
417 elif da.dtype == "int64":
418 da = da.astype("int32")
419 elif da.dtype == "bool":
420 da = da.astype("uint8")
421 if "PCRASTER_VALUESCALE" not in profile:
422 if da.dtype == "int32":
423 profile["PCRASTER_VALUESCALE"] = "VS_NOMINAL"
424 elif da.dtype == "uint8":
425 profile["PCRASTER_VALUESCALE"] = "VS_BOOLEAN"
426 else:
427 profile["PCRASTER_VALUESCALE"] = "VS_SCALAR"
429 extradims = list(filter(lambda dim: dim not in ("y", "x"), da.dims))
430 # TODO only equidistant IDFs are compatible with GDAL / rasterio
431 # TODO try squeezing extradims here, such that 1 layer, 1 time, etc. is acccepted
432 if extradims:
433 raise ValueError(f"Only x and y dimensions supported, found {da.dims}")
434 # transform will be affine object in next xarray
435 profile["transform"] = imod.util.spatial.transform(da)
436 profile["driver"] = driver
437 profile["height"] = da.y.size
438 profile["width"] = da.x.size
439 profile["count"] = 1
440 profile["dtype"] = da.dtype
441 profile["nodata"] = nodata
443 # Allow writing ASCII grids even if rasterio isn't installed. This is
444 # useful for e.g. MetaSWAP input.
445 if isinstance(rasterio, MissingOptionalModule) and driver == "AAIGrid":
446 write_aaigrid(path, da.values, profile)
447 else:
448 with rasterio.Env():
449 with rasterio.open(path, "w", **profile) as ds:
450 ds.write(da.values, 1)
451 return
454def save(path, a, driver=None, nodata=np.nan, pattern=None, dtype=None):
455 """
456 Write a xarray.DataArray to one or more rasterio supported files
458 If the DataArray only has ``y`` and ``x`` dimensions, a single raster file is
459 written, like the ``imod.rasterio.write`` function. This function is more general
460 and also supports ``time`` and ``layer`` and other dimensions. It will split these up,
461 give them their own filename according to the conventions in
462 ``imod.util.path.compose``, and write them each.
464 Parameters
465 ----------
466 path : str or Path
467 Path to the raster file to be written. This function decides on the
468 actual filename(s) using conventions, so it only takes the directory and
469 name from this parameter.
470 a : xarray.DataArray
471 DataArray to be written. It needs to have dimensions ('y', 'x'), and
472 optionally ``layer`` and ``time``.
473 driver: str, optional
474 Which GDAL format driver to use. The complete list is at
475 https://gdal.org/drivers/raster/index.html.
476 By default tries to guess from the file extension.
477 nodata : float, optional
478 Nodata value in the saved raster files. Defaults to a value of nan.
479 pattern : str
480 Format string which defines how to create the filenames. See examples.
482 Example
483 -------
484 Consider a DataArray ``da`` that has dimensions 'layer', 'y' and 'x', with the
485 'layer' dimension consisting of layer 1 and 2::
487 save('path/to/head', da)
489 This writes the following two tif files: 'path/to/head_l1.tif' and
490 'path/to/head_l2.tif'.
493 It is possible to generate custom filenames using a format string. The
494 default filenames would be generated by the following format string:
496 save("example", pattern="{name}_l{layer}{extension}")
498 If you desire zero-padded numbers that show up neatly sorted in a
499 file manager, you may specify:
501 save("example", pattern="{name}_l{layer:02d}{extension}")
503 In this case, a 0 will be padded for single digit numbers ('1' will become
504 '01').
506 To get a date with dashes, use the following pattern:
508 "{name}_{time:%Y-%m-%d}_l{layer}{extension}"
510 """
511 path = pathlib.Path(path)
512 # defaults to geotiff
513 if driver is None:
514 if path.suffix == "":
515 path = path.with_suffix(".tif")
516 driver = "GTiff"
517 else:
518 driver = _get_driver(path)
520 # Use a closure to skip the driver argument
521 # so it takes the same arguments as the idf write
522 def _write(path, a, nodata, dtype):
523 return write(path, a, driver, nodata, dtype)
525 array_io.writing._save(path, a, nodata, pattern, dtype, _write)