Coverage for C:\src\imod-python\imod\formats\rasterio.py: 75%

169 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-08 13:27 +0200

1""" 

2Functions that make use of `rasterio 

3<https://rasterio.readthedocs.io/en/stable/>`_ for input and output to other 

4raster formats. 

5 

6Currently only :func:`imod.rasterio.write` is implemented. 

7""" 

8 

9import pathlib 

10import warnings 

11from typing import Dict 

12 

13import numpy as np 

14import pandas as pd 

15 

16import imod 

17from imod.formats import array_io 

18from imod.util.imports import MissingOptionalModule 

19 

20# since rasterio is a big dependency that is sometimes hard to install 

21# and not always required, we made this an optional dependency 

22try: 

23 import rasterio 

24except ImportError: 

25 rasterio = MissingOptionalModule("rasterio") 

26 

27f_open = open 

28 

29 

30# Based on this comment 

31# https://github.com/mapbox/rasterio/issues/265#issuecomment-367044836 

32def _create_ext_driver_code_map(): 

33 from osgeo import gdal 

34 

35 if hasattr(gdal, "DCAP_RASTER"): 

36 

37 def _check_driver(drv): 

38 return drv.GetMetadataItem(gdal.DCAP_RASTER) 

39 

40 else: 

41 

42 def _check_driver(drv): 

43 return True 

44 

45 output = {} 

46 for i in range(gdal.GetDriverCount()): 

47 drv = gdal.GetDriver(i) 

48 if _check_driver(drv): 

49 if drv.GetMetadataItem(gdal.DCAP_CREATE) or drv.GetMetadataItem( 

50 gdal.DCAP_CREATECOPY 

51 ): 

52 ext = drv.GetMetadataItem(gdal.DMD_EXTENSION) 

53 if ext is not None and len(ext) > 0: 

54 output[drv.GetMetadataItem(gdal.DMD_EXTENSION)] = drv.ShortName 

55 sortedkeys = sorted(output.keys()) 

56 output = {k: output[k] for k in sortedkeys} 

57 return output 

58 

59 

60# tiff and jpeg keys have been added manually. 

61EXTENSION_GDAL_DRIVER_CODE_MAP = { 

62 "asc": "AAIGrid", 

63 "bag": "BAG", 

64 "bil": "EHdr", 

65 "blx": "BLX", 

66 "bmp": "BMP", 

67 "bt": "BT", 

68 "dat": "ZMap", 

69 "dem": "USGSDEM", 

70 "ers": "ERS", 

71 "gen": "ADRG", 

72 "gif": "GIF", 

73 "gpkg": "GPKG", 

74 "grd": "NWT_GRD", 

75 "gsb": "NTv2", 

76 "gtx": "GTX", 

77 "hdr": "MFF", 

78 "hf2": "HF2", 

79 "hgt": "SRTMHGT", 

80 "img": "HFA", 

81 "jp2": "JP2OpenJPEG", 

82 "jpg": "JPEG", 

83 "jpeg": "JPEG", 

84 "kea": "KEA", 

85 "kro": "KRO", 

86 "lcp": "LCP", 

87 "map": "PCRaster", 

88 "mbtiles": "MBTiles", 

89 "mrf": "MRF", 

90 "nc": "netCDF", 

91 "ntf": "NITF", 

92 "pdf": "PDF", 

93 "pix": "PCIDSK", 

94 "png": "PNG", 

95 "rda": "R", 

96 "rgb": "SGI", 

97 "rst": "RST", 

98 "rsw": "RMF", 

99 "sigdem": "SIGDEM", 

100 "sqlite": "Rasterlite", 

101 "ter": "Terragen", 

102 "tif": "GTiff", 

103 "tiff": "GTiff", 

104 "vrt": "VRT", 

105 "xml": "PDS4", 

106 "xpm": "XPM", 

107 "xyz": "XYZ", 

108} 

109 

110 

111def _get_driver(path): 

112 ext = path.suffix.lower()[1:] # skip the period 

113 try: 

114 return EXTENSION_GDAL_DRIVER_CODE_MAP[ext] 

115 except KeyError: 

116 raise ValueError( 

117 f'Unknown extension "{ext}", available extensions: ' 

118 f'{", ".join(EXTENSION_GDAL_DRIVER_CODE_MAP.keys())}' 

119 ) 

120 

121 

122def _limitations(riods, path): 

123 if riods.count != 1: 

124 raise NotImplementedError( 

125 f"Cannot open multi-band grid: {path}. Try rioxarray instead." 

126 ) 

127 if not riods.transform.is_rectilinear: 

128 raise NotImplementedError( 

129 f"Cannot open non-rectilinear grid: {path}. Try rioxarray instead." 

130 ) 

131 

132 

133def header(path, pattern): 

134 attrs = imod.util.path.decompose(path, pattern) 

135 

136 # TODO: 

137 # Check bands, rotation, etc. 

138 # Raise NotImplementedErrors and point to rioxarray.open_rasterio 

139 with rasterio.open(path, "r") as riods: 

140 _limitations(riods, path) 

141 attrs["nrow"] = riods.height 

142 attrs["ncol"] = riods.width 

143 xmin, ymin, xmax, ymax = riods.bounds 

144 attrs["dx"] = riods.transform[0] 

145 attrs["dy"] = riods.transform[4] 

146 attrs["nodata"] = riods.nodata 

147 attrs["dtype"] = riods.dtypes[0] 

148 crs = riods.crs 

149 if crs is not None: 

150 attrs["crs"] = crs 

151 

152 attrs["xmin"] = xmin 

153 attrs["xmax"] = xmax 

154 attrs["ymin"] = ymin 

155 attrs["ymax"] = ymax 

156 attrs["headersize"] = None 

157 return attrs 

158 

159 

160def _read(path, headersize, nrow, ncol, nodata, dtype): 

161 with rasterio.open(path, "r") as dataset: 

162 a = dataset.read(1) 

163 

164 # None signifies no replacement; skip if nodata already is nan 

165 if (nodata is None) or np.isnan(nodata): 

166 return a 

167 # Only set nodata to nan if the dtype supports it 

168 if (a.dtype == np.float64) or (a.dtype == np.float32): 

169 return array_io.reading._to_nan(a, nodata) 

170 else: 

171 return a 

172 

173 

174def read(path, use_cftime=False, pattern=None): 

175 warnings.warn( 

176 "The imod.rasterio.read() function is deprecated. Instead use imod.rasterio.open().", 

177 FutureWarning, 

178 ) 

179 return open(path, use_cftime=False, pattern=None).load() 

180 

181 

182def open(path, use_cftime=False, pattern=None): 

183 r""" 

184 Open one or more GDAL supported raster files as an xarray.DataArray. 

185 

186 In accordance with xarray's design, ``open`` loads the data of the files 

187 lazily. This means the data of the rasters are not loaded into memory until the 

188 data is needed. This allows for easier handling of large datasets, and 

189 more efficient computations. 

190 

191 Parameters 

192 ---------- 

193 path : str, Path or list 

194 This can be a single file, 'head_l1.tif', a glob pattern expansion, 

195 'head_l*.tif', or a list of files, ['head_l1.tif', 'head_l2.tif']. 

196 Note that each file needs to be of the same name (part before the 

197 first underscore) but have a different layer and/or timestamp, 

198 such that they can be combined in a single xarray.DataArray. 

199 use_cftime : bool, optional 

200 Use ``cftime.DatetimeProlepticGregorian`` instead of `np.datetime64[ns]` 

201 for the time axis. 

202 

203 Dates are normally encoded as ``np.datetime64[ns]``; however, if dates 

204 fall before 1678 or after 2261, they are automatically encoded as 

205 ``cftime.DatetimeProlepticGregorian`` objects rather than 

206 ``np.datetime64[ns]``. 

207 pattern : str, regex pattern, optional 

208 If the filenames do match default naming conventions of 

209 {name}_{time}_l{layer}, a custom pattern can be defined here either 

210 as a string, or as a compiled regular expression pattern. See the 

211 examples below. 

212 

213 Returns 

214 ------- 

215 xarray.DataArray 

216 A float32 xarray.DataArray of the values in the raster file(s). 

217 All metadata needed for writing the file to raster or other formats 

218 using imod.rasterio are included in the xarray.DataArray.attrs. 

219 

220 Examples 

221 -------- 

222 Open a raster file: 

223 

224 >>> da = imod.rasterio.open("example.tif") 

225 

226 Open a raster file, relying on default naming conventions to identify 

227 layer: 

228 

229 >>> da = imod.rasterio.open("example_l1.tif") 

230 

231 Open an IDF file, relying on default naming conventions to identify layer 

232 and time: 

233 

234 >>> head = imod.rasterio.open("head_20010101_l1.tif") 

235 

236 Open multiple files, in this case files for the year 2001 for all 

237 layers, again relying on default conventions for naming: 

238 

239 >>> head = imod.rasterio.open("head_2001*_l*.tif") 

240 

241 The same, this time explicitly specifying ``name``, ``time``, and ``layer``: 

242 

243 >>> head = imod.rasterio.open("head_2001*_l*.tif", pattern="{name}_{time}_l{layer}") 

244 

245 The format string pattern will only work on tidy paths, where variables are 

246 separated by underscores. You can also pass a compiled regex pattern. 

247 Make sure to include the ``re.IGNORECASE`` flag since all paths are lowered. 

248 

249 >>> import re 

250 >>> pattern = re.compile(r"(?P<name>[\w]+)L(?P<layer>[\d+]*)", re.IGNORECASE) 

251 >>> head = imod.idf.open("headL11", pattern=pattern) 

252 

253 However, this requires constructing regular expressions, which is 

254 generally a fiddly process. Regex notation is also impossible to 

255 remember. The website https://regex101.com is a nice help. Alternatively, 

256 the most pragmatic solution may be to just rename your files. 

257 """ 

258 return array_io.reading._open(path, use_cftime, pattern, header, _read) 

259 

260 

261def write_aaigrid(path: pathlib.Path, a: np.ndarray, profile: Dict) -> None: 

262 """ 

263 Fall-back function to write ESRII ASCII grids even if rasterio is not 

264 installed. 

265 

266 This function takes care to mimick the idiosyncracies of the GDAL driver. 

267 

268 Parameters 

269 ---------- 

270 path: str or Path 

271 path to the output raster 

272 a: np.ndarray 

273 The raster data. 

274 profile: dict 

275 The rasterio profile metadata. 

276 """ 

277 dtype = profile["dtype"] 

278 nodata = profile["nodata"] 

279 df = pd.DataFrame(a.astype(dtype)) 

280 # GDAL writes white space before every line. pandas doesn't support this, 

281 # but it will write nodata values -- encoded as NaN -- as white space. 

282 df.index = np.full(len(df), np.nan) 

283 if np.issubdtype(dtype, np.integer): 

284 is_float = False 

285 space = "" 

286 fmt = "%d" 

287 str_nodata = f"{int(nodata)}" 

288 elif np.issubdtype(dtype, np.floating): 

289 # For some reason, GDAL inserts a space before the nodata value if 

290 # dtype is float. 

291 is_float = True 

292 space = " " 

293 precision = profile.get("decimal_precision") 

294 digits = profile.get("significant_digits") 

295 # See: https://docs.python.org/3/library/string.html#formatspec 

296 if precision is not None: 

297 fmt = f"%.{precision}f" 

298 str_nodata = f"{nodata:.{precision}f}" 

299 elif digits is not None: 

300 fmt = f"%.{digits}g" 

301 str_nodata = f"{nodata:.{digits}g}" 

302 else: 

303 fmt = "%.20g" 

304 str_nodata = f"{nodata:.20g}" 

305 else: 

306 raise TypeError(f"invalid dtype: {dtype}") 

307 

308 dx, _, xmin, _, dy, ymax = profile["transform"][:6] 

309 ymin = ymax + profile["height"] * dy 

310 

311 header = ( 

312 f'ncols {profile["width"]}\n' 

313 f'nrows {profile["height"]}\n' 

314 f"xllcorner {xmin:.12f}\n" 

315 f"yllcorner {ymin:.12f}\n" 

316 f"cellsize {dx:.12f}\n" 

317 f"NODATA_value {space}{str_nodata}\n" 

318 ) 

319 

320 # GDAL writes only a linefeed, not a carriage return. By default, Python 

321 # adds a carriage return as well, on Windows. This is disabled by 

322 # explicitly setting the newline argument. 

323 with f_open(path, "w", newline="") as f: 

324 f.write(header) 

325 

326 first = df.iloc[0, 0] 

327 # is_float is the result of a typecheck above. 

328 # is_integer() checks whether a float has a decimal fraction: 

329 # (1.0).is_integer() -> True 

330 # (1.1).is_integer() -> False 

331 if is_float and first.is_integer(): 

332 # GDAL uses the "general" float (g) format by default. However, if 

333 # the first value is a float without decimals, it will write a 

334 # single trailing 0 for the first value, presumably to aid type 

335 # inference when reading values back in. All subsequent values are 

336 # written without decimals, however. 

337 precision = profile.get("decimal_precision", 1) 

338 # Write the first value, with trailing zero if needed. 

339 f.write(f" {first:.{precision}f} ") 

340 # Write remainder of first row. Since we've already written the 

341 # first value with a space in front of it, we skip writing the NaN 

342 # index value here. 

343 df.iloc[[0], 1:].to_csv( 

344 f, index=False, header=False, sep=" ", float_format=fmt 

345 ) 

346 # Write all other rows. 

347 df.iloc[1:].to_csv(f, index=True, header=False, sep=" ", float_format=fmt) 

348 else: 

349 df.to_csv(f, index=True, header=False, sep=" ", float_format=fmt) 

350 

351 return 

352 

353 

354def write(path, da, driver=None, nodata=np.nan, dtype=None): 

355 """Write ``xarray.DataArray`` to GDAL supported geospatial rasters using ``rasterio``. 

356 

357 Parameters 

358 ---------- 

359 path: str or Path 

360 path to the output raster 

361 da: xarray DataArray 

362 The DataArray to be written. Should have only x and y dimensions. 

363 driver: str; optional 

364 Which GDAL format driver to use. The complete list is at 

365 https://gdal.org/drivers/raster/index.html. 

366 By default tries to guess from the file extension. 

367 nodata: float 

368 Nodata value to use. Should be convertible to the DataArray and GDAL dtype. 

369 Default value is np.nan 

370 

371 Examples 

372 -------- 

373 Save ``xarray.DataArray`` in ASCII format: 

374 

375 >>> imod.rasterio.write("example.asc", da) 

376 

377 Save ``xarray.DataArray`` in ASCII format, with 6 significant digits: 

378 

379 >>> da.attrs['SIGNIFICANT_DIGITS'] = 6 

380 >>> imod.rasterio.write("example.asc", da) 

381 """ 

382 # Not directly related to iMOD, but provides a missing link, together 

383 # with xioxarray. 

384 # Note that this function can quickly become dstdated as 

385 # the xarray rasterio connection matures, see for instance: 

386 # https://github.com/pydata/xarray/issues/1736 

387 # https://github.com/pydata/xarray/pull/1712 

388 path = pathlib.Path(path) 

389 profile = da.attrs.copy() 

390 if driver is None: 

391 driver = _get_driver(path) 

392 

393 flip = slice(None, None, -1) 

394 if not da.indexes["x"].is_monotonic_increasing: 

395 da = da.isel(x=flip) 

396 if not da.indexes["y"].is_monotonic_decreasing: 

397 da = da.isel(y=flip) 

398 

399 # Only try to fill data that can contains nan's 

400 # Do this before casting to another type! 

401 ignore_nodata = (nodata is None) or np.isnan(nodata) 

402 if not ignore_nodata: 

403 if (da.dtype == np.float32) or (da.dtype == np.float64): 

404 # NaN is the default missing value in xarray 

405 # None is different in that the raster won't have a nodata value 

406 da = da.fillna(nodata) 

407 

408 # Cast to dtype if dtype is given 

409 if dtype is not None: 

410 if da.dtype != dtype: 

411 da = da.astype(dtype) 

412 

413 # Cast to supported pcraster dtypes 

414 if driver == "PCRaster": 

415 if da.dtype == "float64": 

416 da = da.astype("float32") 

417 elif da.dtype == "int64": 

418 da = da.astype("int32") 

419 elif da.dtype == "bool": 

420 da = da.astype("uint8") 

421 if "PCRASTER_VALUESCALE" not in profile: 

422 if da.dtype == "int32": 

423 profile["PCRASTER_VALUESCALE"] = "VS_NOMINAL" 

424 elif da.dtype == "uint8": 

425 profile["PCRASTER_VALUESCALE"] = "VS_BOOLEAN" 

426 else: 

427 profile["PCRASTER_VALUESCALE"] = "VS_SCALAR" 

428 

429 extradims = list(filter(lambda dim: dim not in ("y", "x"), da.dims)) 

430 # TODO only equidistant IDFs are compatible with GDAL / rasterio 

431 # TODO try squeezing extradims here, such that 1 layer, 1 time, etc. is acccepted 

432 if extradims: 

433 raise ValueError(f"Only x and y dimensions supported, found {da.dims}") 

434 # transform will be affine object in next xarray 

435 profile["transform"] = imod.util.spatial.transform(da) 

436 profile["driver"] = driver 

437 profile["height"] = da.y.size 

438 profile["width"] = da.x.size 

439 profile["count"] = 1 

440 profile["dtype"] = da.dtype 

441 profile["nodata"] = nodata 

442 

443 # Allow writing ASCII grids even if rasterio isn't installed. This is 

444 # useful for e.g. MetaSWAP input. 

445 if isinstance(rasterio, MissingOptionalModule) and driver == "AAIGrid": 

446 write_aaigrid(path, da.values, profile) 

447 else: 

448 with rasterio.Env(): 

449 with rasterio.open(path, "w", **profile) as ds: 

450 ds.write(da.values, 1) 

451 return 

452 

453 

454def save(path, a, driver=None, nodata=np.nan, pattern=None, dtype=None): 

455 """ 

456 Write a xarray.DataArray to one or more rasterio supported files 

457 

458 If the DataArray only has ``y`` and ``x`` dimensions, a single raster file is 

459 written, like the ``imod.rasterio.write`` function. This function is more general 

460 and also supports ``time`` and ``layer`` and other dimensions. It will split these up, 

461 give them their own filename according to the conventions in 

462 ``imod.util.path.compose``, and write them each. 

463 

464 Parameters 

465 ---------- 

466 path : str or Path 

467 Path to the raster file to be written. This function decides on the 

468 actual filename(s) using conventions, so it only takes the directory and 

469 name from this parameter. 

470 a : xarray.DataArray 

471 DataArray to be written. It needs to have dimensions ('y', 'x'), and 

472 optionally ``layer`` and ``time``. 

473 driver: str, optional 

474 Which GDAL format driver to use. The complete list is at 

475 https://gdal.org/drivers/raster/index.html. 

476 By default tries to guess from the file extension. 

477 nodata : float, optional 

478 Nodata value in the saved raster files. Defaults to a value of nan. 

479 pattern : str 

480 Format string which defines how to create the filenames. See examples. 

481 

482 Example 

483 ------- 

484 Consider a DataArray ``da`` that has dimensions 'layer', 'y' and 'x', with the 

485 'layer' dimension consisting of layer 1 and 2:: 

486 

487 save('path/to/head', da) 

488 

489 This writes the following two tif files: 'path/to/head_l1.tif' and 

490 'path/to/head_l2.tif'. 

491 

492 

493 It is possible to generate custom filenames using a format string. The 

494 default filenames would be generated by the following format string: 

495 

496 save("example", pattern="{name}_l{layer}{extension}") 

497 

498 If you desire zero-padded numbers that show up neatly sorted in a 

499 file manager, you may specify: 

500 

501 save("example", pattern="{name}_l{layer:02d}{extension}") 

502 

503 In this case, a 0 will be padded for single digit numbers ('1' will become 

504 '01'). 

505 

506 To get a date with dashes, use the following pattern: 

507 

508 "{name}_{time:%Y-%m-%d}_l{layer}{extension}" 

509 

510 """ 

511 path = pathlib.Path(path) 

512 # defaults to geotiff 

513 if driver is None: 

514 if path.suffix == "": 

515 path = path.with_suffix(".tif") 

516 driver = "GTiff" 

517 else: 

518 driver = _get_driver(path) 

519 

520 # Use a closure to skip the driver argument 

521 # so it takes the same arguments as the idf write 

522 def _write(path, a, nodata, dtype): 

523 return write(path, a, driver, nodata, dtype) 

524 

525 array_io.writing._save(path, a, nodata, pattern, dtype, _write)