Coverage for C:\src\imod-python\imod\mf6\out\dis.py: 97%

171 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-08 13:27 +0200

1import os 

2import struct 

3from typing import Any, BinaryIO, Dict, List, Optional, Tuple 

4 

5import dask 

6import numba 

7import numpy as np 

8import xarray as xr 

9 

10import imod 

11from imod.mf6.utilities.dataset import assign_datetime_coords 

12 

13from . import cbc 

14from .common import FilePath, FloatArray, IntArray, _to_nan 

15 

16 

17# Binary Grid File / DIS Grids 

18# https://water.usgs.gov/water-resources/software/MODFLOW-6/mf6io_6.0.4.pdf#page=162 

19def read_grb(f: BinaryIO, ntxt: int, lentxt: int) -> Dict[str, Any]: 

20 # we don't need any information from the the text lines that follow, 

21 # they are definitions that aim to make the file more portable, 

22 # so let's skip straight to the binary data 

23 f.seek(ntxt * lentxt, 1) 

24 

25 ncells = struct.unpack("i", f.read(4))[0] 

26 nlayer = struct.unpack("i", f.read(4))[0] 

27 nrow = struct.unpack("i", f.read(4))[0] 

28 ncol = struct.unpack("i", f.read(4))[0] 

29 nja = struct.unpack("i", f.read(4))[0] 

30 if ncells != (nlayer * nrow * ncol): 

31 raise ValueError(f"Invalid file {ncells} {nlayer} {nrow} {ncol}") 

32 xorigin = struct.unpack("d", f.read(8))[0] 

33 yorigin = struct.unpack("d", f.read(8))[0] 

34 f.seek(8, 1) # skip angrot 

35 delr = np.fromfile(f, np.float64, ncol) 

36 delc = np.fromfile(f, np.float64, nrow) 

37 top_np = np.reshape(np.fromfile(f, np.float64, nrow * ncol), (nrow, ncol)) 

38 bottom_np = np.reshape(np.fromfile(f, np.float64, ncells), (nlayer, nrow, ncol)) 

39 ia = np.fromfile(f, np.int32, ncells + 1) 

40 ja = np.fromfile(f, np.int32, nja) 

41 idomain_np = np.reshape(np.fromfile(f, np.int32, ncells), (nlayer, nrow, ncol)) 

42 icelltype_np = np.reshape(np.fromfile(f, np.int32, ncells), (nlayer, nrow, ncol)) 

43 

44 bounds = (xorigin, xorigin + delr.sum(), yorigin, yorigin + delc.sum()) 

45 coords = imod.util.spatial._xycoords(bounds, (delr, -delc)) 

46 top = xr.DataArray(top_np, coords, ("y", "x"), name="top") 

47 coords["layer"] = np.arange(1, nlayer + 1) 

48 dims = ("layer", "y", "x") 

49 bottom = xr.DataArray(bottom_np, coords, dims, name="bottom") 

50 idomain = xr.DataArray(idomain_np, coords, dims, name="idomain") 

51 icelltype = xr.DataArray(icelltype_np, coords, dims, name="icelltype") 

52 

53 return { 

54 "distype": "dis", 

55 "top": top, 

56 "bottom": bottom, 

57 "coords": coords, 

58 "ncells": ncells, 

59 "nlayer": nlayer, 

60 "nrow": nrow, 

61 "ncol": ncol, 

62 "nja": nja, 

63 "ia": ia, 

64 "ja": ja, 

65 "idomain": idomain, 

66 "icelltype": icelltype, 

67 } 

68 

69 

70def read_times( 

71 path: FilePath, ntime: int, nlayer: int, nrow: int, ncol: int 

72) -> FloatArray: 

73 """ 

74 Reads all total simulation times. 

75 """ 

76 times = np.empty(ntime, dtype=np.float64) 

77 

78 # Compute how much to skip to the next timestamp 

79 start_of_header = 16 

80 rest_of_header = 28 

81 data_single_layer = nrow * ncol * 8 

82 header = 52 

83 nskip = ( 

84 rest_of_header 

85 + data_single_layer 

86 + (nlayer - 1) * (header + data_single_layer) 

87 + start_of_header 

88 ) 

89 

90 with open(path, "rb") as f: 

91 f.seek(start_of_header) 

92 for i in range(ntime): 

93 times[i] = struct.unpack("d", f.read(8))[0] # total simulation time 

94 f.seek(nskip, 1) 

95 return times 

96 

97 

98def read_hds_timestep( 

99 path: FilePath, nlayer: int, nrow: int, ncol: int, dry_nan: bool, pos: int 

100) -> FloatArray: 

101 """ 

102 Reads all values of one timestep. 

103 """ 

104 ncell_per_layer = nrow * ncol 

105 with open(path, "rb") as f: 

106 f.seek(pos) 

107 a1d = np.empty(nlayer * nrow * ncol, dtype=np.float64) 

108 for k in range(nlayer): 

109 f.seek(52, 1) # skip kstp, kper, pertime 

110 a1d[k * ncell_per_layer : (k + 1) * ncell_per_layer] = np.fromfile( 

111 f, np.float64, nrow * ncol 

112 ) 

113 

114 a3d = a1d.reshape((nlayer, nrow, ncol)) 

115 return _to_nan(a3d, dry_nan) 

116 

117 

118def open_hds( 

119 path: FilePath, 

120 grid_info: Dict[str, Any], 

121 dry_nan: bool, 

122 simulation_start_time: Optional[np.datetime64] = None, 

123 time_unit: Optional[str] = "d", 

124) -> xr.DataArray: 

125 nlayer, nrow, ncol = grid_info["nlayer"], grid_info["nrow"], grid_info["ncol"] 

126 filesize = os.path.getsize(path) 

127 ntime = filesize // (nlayer * (52 + (nrow * ncol * 8))) 

128 times = read_times(path, ntime, nlayer, nrow, ncol) 

129 coords = grid_info["coords"] 

130 coords["time"] = times 

131 

132 dask_list = [] 

133 # loop over times and add delayed arrays 

134 for i in range(ntime): 

135 # TODO verify dimension order 

136 pos = i * (nlayer * (52 + nrow * ncol * 8)) 

137 a = dask.delayed(read_hds_timestep)(path, nlayer, nrow, ncol, dry_nan, pos) 

138 x = dask.array.from_delayed(a, shape=(nlayer, nrow, ncol), dtype=np.float64) 

139 dask_list.append(x) 

140 

141 daskarr = dask.array.stack(dask_list, axis=0) 

142 data_array = xr.DataArray( 

143 daskarr, coords, ("time", "layer", "y", "x"), name=grid_info["name"] 

144 ) 

145 if simulation_start_time is not None: 

146 data_array = assign_datetime_coords( 

147 data_array, simulation_start_time, time_unit 

148 ) 

149 return data_array 

150 

151 

152def open_imeth1_budgets( 

153 cbc_path: FilePath, grb_content: dict, header_list: List[cbc.Imeth1Header] 

154) -> xr.DataArray: 

155 """ 

156 Open the data for an imeth==1 budget section. Data is read lazily per 

157 timestep. 

158 

159 Can be used for: 

160 

161 * STO-SS 

162 * STO-SY 

163 * CSUB-CGELASTIC 

164 * CSUB-WATERCOMP 

165 

166 Utilizes the shape information from the DIS GRB file to create a dense 

167 array; (lazily) allocates for the entire domain (all layers, rows, columns) 

168 per timestep. 

169 

170 Parameters 

171 ---------- 

172 cbc_path: str, pathlib.Path 

173 grb_content: dict 

174 header_list: List[Imeth1Header] 

175 

176 Returns 

177 ------- 

178 xr.DataArray with dims ("time", "layer", "y", "x") 

179 """ 

180 nlayer = grb_content["nlayer"] 

181 nrow = grb_content["nrow"] 

182 ncol = grb_content["ncol"] 

183 budgets = cbc.open_imeth1_budgets(cbc_path, header_list) 

184 # Merge dictionaries 

185 coords = grb_content["coords"] | {"time": budgets["time"]} 

186 

187 return xr.DataArray( 

188 data=budgets.data.reshape((budgets["time"].size, nlayer, nrow, ncol)), 

189 coords=coords, 

190 dims=("time", "layer", "y", "x"), 

191 name=budgets.name, 

192 ) 

193 

194 

195def open_imeth6_budgets( 

196 cbc_path: FilePath, 

197 grb_content: dict, 

198 header_list: List[cbc.Imeth6Header], 

199 return_variable: str = "budget", 

200) -> xr.DataArray: 

201 """ 

202 Open the data for an imeth==6 budget section. 

203 

204 Uses the information of the DIS GRB file to create the properly sized dense 

205 xr.DataArrays (which store the entire domain). Doing so ignores the boundary 

206 condition internal index (id2) and any present auxiliary columns. 

207 

208 Parameters 

209 ---------- 

210 cbc_path: str, pathlib.Path 

211 grb_content: dict 

212 header_list: List[Imeth1Header] 

213 

214 Returns 

215 ------- 

216 xr.DataArray with dims ("time", "layer", "y", "x") 

217 """ 

218 # Allocates dense arrays for the entire model domain 

219 dtype = np.dtype( 

220 [("id1", np.int32), ("id2", np.int32), ("budget", np.float64)] 

221 + [(name, np.float64) for name in header_list[0].auxtxt] 

222 ) 

223 shape = (grb_content["nlayer"], grb_content["nrow"], grb_content["ncol"]) 

224 size = np.product(shape) 

225 dask_list = [] 

226 time = np.empty(len(header_list), dtype=np.float64) 

227 for i, header in enumerate(header_list): 

228 time[i] = header.totim 

229 a = dask.delayed(cbc.read_imeth6_budgets_dense)( 

230 cbc_path, header.nlist, dtype, header.pos, size, shape, return_variable 

231 ) 

232 x = dask.array.from_delayed(a, shape=shape, dtype=np.float64) 

233 dask_list.append(x) 

234 

235 daskarr = dask.array.stack(dask_list, axis=0) 

236 coords = grb_content["coords"] 

237 coords["time"] = time 

238 name = header_list[0].text 

239 return xr.DataArray(daskarr, coords, ("time", "layer", "y", "x"), name=name) 

240 

241 

242@numba.njit 

243def dis_indices( 

244 ia: IntArray, 

245 ja: IntArray, 

246 ncells: int, 

247 nlayer: int, 

248 nrow: int, 

249 ncol: int, 

250) -> Tuple[IntArray, IntArray, IntArray]: 

251 """ 

252 Infer type of connection via cell number comparison. Returns arrays that can 

253 be used for extracting right, front, and lower face flow from the 

254 flow-ja-face array. 

255 

256 In a structured grid, using a linear index: 

257 * the right neighbor is +(1) 

258 * the front neighbor is +(number of cells in a column) 

259 * the lower neighbor is +(number of cells in a layer) 

260 * lower "pass-through" cells (idomain == -1) are multitude of (number of 

261 cells in a layer) 

262 

263 Parameters 

264 ---------- 

265 ia: Array of ints 

266 Row index of Compressed Sparse Row (CSR) connectivity matrix. 

267 ja: Array of ints 

268 Column index of CSR connectivity matrix. Every entry represents a 

269 cell-to-cell connection. 

270 ncells: int 

271 nlayer: int 

272 nrow: int 

273 ncol: int 

274 

275 Returns 

276 ------- 

277 right: 3D array of ints 

278 front: 3D array of ints 

279 lower: 3D array of ints 

280 """ 

281 shape = (nlayer, nrow, ncol) 

282 ncells_per_layer = nrow * ncol 

283 right = np.full(ncells, -1, np.int64) 

284 front = np.full(ncells, -1, np.int64) 

285 lower = np.full(ncells, -1, np.int64) 

286 

287 for i in range(ncells): 

288 for nzi in range(ia[i], ia[i + 1]): 

289 nzi -= 1 # python is 0-based, modflow6 is 1-based 

290 j = ja[nzi] - 1 # python is 0-based, modflow6 is 1-based 

291 d = j - i 

292 if d <= 0: # left, back, upper 

293 continue 

294 elif d == 1: # right neighbor 

295 right[i] = nzi 

296 elif d == ncol: # front neighbor 

297 front[i] = nzi 

298 elif d == ncells_per_layer: # lower neighbor 

299 lower[i] = nzi 

300 else: # skips one: must be pass through 

301 npassed = int(d / ncells_per_layer) 

302 for ipass in range(0, npassed): 

303 lower[i + ipass * ncells_per_layer] = nzi 

304 

305 return right.reshape(shape), front.reshape(shape), lower.reshape(shape) 

306 

307 

308def dis_to_right_front_lower_indices( 

309 grb_content: dict, 

310) -> Tuple[xr.DataArray, xr.DataArray, xr.DataArray]: 

311 """ 

312 Infer the indices to extract right, front, and lower face flows from the 

313 flow-ja-face array. 

314 

315 Parameters 

316 ---------- 

317 grb_content: dict 

318 

319 Returns 

320 ------- 

321 right: xr.DataArray of ints with dims ("layer", "y", "x") 

322 front: xr.DataArray of ints with dims ("layer", "y", "x") 

323 lower: xr.DataArray of ints with dims ("layer", "y", "x") 

324 """ 

325 right, front, lower = dis_indices( 

326 ia=grb_content["ia"], 

327 ja=grb_content["ja"], 

328 ncells=grb_content["ncells"], 

329 nlayer=grb_content["nlayer"], 

330 nrow=grb_content["nrow"], 

331 ncol=grb_content["ncol"], 

332 ) 

333 return ( 

334 xr.DataArray(right, grb_content["coords"], ("layer", "y", "x")), 

335 xr.DataArray(front, grb_content["coords"], ("layer", "y", "x")), 

336 xr.DataArray(lower, grb_content["coords"], ("layer", "y", "x")), 

337 ) 

338 

339 

340def dis_extract_face_budgets( 

341 budgets: xr.DataArray, index: xr.DataArray 

342) -> xr.DataArray: 

343 """ 

344 Grab right, front, or lower face flows from the flow-ja-face array. 

345 

346 This could be done by a single .isel() indexing operation, but those 

347 are extremely slow in this case, which seems to be an xarray issue. 

348 

349 Parameters 

350 ---------- 

351 budgets: xr.DataArray of floats 

352 flow-ja-face array, dims ("time", "linear_index") 

353 The linear index enumerates cell-to-cell connections in this case, not 

354 the individual cells. 

355 index: xr.DataArray of ints 

356 right, front, or lower index array with dims("layer", "y", "x") 

357 

358 Returns 

359 ------- 

360 xr.DataArray of floats with dims ("time", "layer", "y", "x") 

361 """ 

362 coords = dict(index.coords) 

363 coords["time"] = budgets["time"] 

364 # isel with a 3D array is extremely slow 

365 # this followed by the dask reshape is much faster for some reason. 

366 data = budgets.isel(linear_index=index.values.ravel()).data 

367 da = xr.DataArray( 

368 data=data.reshape((budgets["time"].size, *index.shape)), 

369 coords=coords, 

370 dims=("time", "layer", "y", "x"), 

371 name="flow-ja-face", 

372 ) 

373 return da.where(index >= 0, other=0.0) 

374 

375 

376def dis_open_face_budgets( 

377 cbc_path: FilePath, grb_content: dict, header_list: List[cbc.Imeth1Header] 

378) -> Tuple[xr.DataArray, xr.DataArray, xr.DataArray]: 

379 """ 

380 Open the flow-ja-face, and extract right, front, and lower face flows. 

381 

382 Parameters 

383 ---------- 

384 cbc_path: str, pathlib.Path 

385 grb_content: dict 

386 header_list: List[Imeth1Header] 

387 

388 Returns 

389 ------- 

390 right: xr.DataArray of floats with dims ("time", "layer", "y", "x") 

391 front: xr.DataArray of floats with dims ("time", "layer", "y", "x") 

392 lower: xr.DataArray of floats with dims ("time", "layer", "y", "x") 

393 """ 

394 right_index, front_index, lower_index = dis_to_right_front_lower_indices( 

395 grb_content 

396 ) 

397 budgets = cbc.open_imeth1_budgets(cbc_path, header_list) 

398 right = dis_extract_face_budgets(budgets, right_index) 

399 front = dis_extract_face_budgets(budgets, front_index) 

400 lower = dis_extract_face_budgets(budgets, lower_index) 

401 return right, front, lower 

402 

403 

404# TODO: Currently assumes dis grb, can be checked & dispatched 

405def open_cbc( 

406 cbc_path: FilePath, 

407 grb_content: Dict[str, Any], 

408 flowja: bool = False, 

409 simulation_start_time: Optional[np.datetime64] = None, 

410 time_unit: Optional[str] = "d", 

411) -> Dict[str, xr.DataArray]: 

412 headers = cbc.read_cbc_headers(cbc_path) 

413 cbc_content = {} 

414 for key, header_list in headers.items(): 

415 # TODO: validate homogeneity of header_list, ndat consistent, nlist consistent etc. 

416 if key == "flow-ja-face": 

417 if flowja: 

418 flowja, nm = cbc.open_face_budgets_as_flowja( 

419 cbc_path, header_list, grb_content 

420 ) 

421 cbc_content["flow-ja-face"] = flowja 

422 cbc_content["connectivity"] = nm 

423 else: 

424 right, front, lower = dis_open_face_budgets( 

425 cbc_path, grb_content, header_list 

426 ) 

427 cbc_content["flow-right-face"] = right 

428 cbc_content["flow-front-face"] = front 

429 cbc_content["flow-lower-face"] = lower 

430 else: 

431 if isinstance(header_list[0], cbc.Imeth1Header): 

432 cbc_content[key] = open_imeth1_budgets( 

433 cbc_path, grb_content, header_list 

434 ) 

435 elif isinstance(header_list[0], cbc.Imeth6Header): 

436 # for non cell flow budget terms, use auxiliary variables as return value 

437 if header_list[0].text.startswith("data-"): 

438 for return_variable in header_list[0].auxtxt: 

439 key_aux = header_list[0].txt2id1 + "-" + return_variable 

440 cbc_content[key_aux] = open_imeth6_budgets( 

441 cbc_path, grb_content, header_list, return_variable 

442 ) 

443 else: 

444 cbc_content[key] = open_imeth6_budgets( 

445 cbc_path, grb_content, header_list 

446 ) 

447 if simulation_start_time is not None: 

448 for cbc_name, cbc_array in cbc_content.items(): 

449 cbc_content[cbc_name] = assign_datetime_coords( 

450 cbc_array, simulation_start_time, time_unit 

451 ) 

452 

453 return cbc_content 

454 

455 

456def grid_info(like: xr.DataArray) -> Dict[str, Any]: 

457 return { 

458 "nlayer": like["layer"].size, 

459 "nrow": like["y"].size, 

460 "ncol": like["x"].size, 

461 "coords": { 

462 "layer": like["layer"], 

463 "y": like["y"], 

464 "x": like["x"], 

465 }, 

466 }