Coverage for C:\src\imod-python\imod\formats\prj\prj.py: 95%

498 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-08 14:15 +0200

1""" 

2Utilities for parsing a project file. 

3""" 

4 

5import shlex 

6from collections import defaultdict 

7from datetime import datetime 

8from itertools import chain 

9from os import PathLike 

10from pathlib import Path 

11from typing import Any, Dict, List, Sequence, Tuple, Union 

12 

13import numpy as np 

14import pandas as pd 

15import xarray as xr 

16 

17import imod 

18 

19FilePath = Union[str, "PathLike[str]"] 

20 

21 

22KEYS = { 

23 "(bnd)": ("ibound",), 

24 "(top)": ("top",), 

25 "(bot)": ("bottom",), 

26 "(thk)": ("thickness",), 

27 "(khv)": ("kh",), 

28 "(kva)": ("vertical_anisotropy",), 

29 "(kdw)": ("transmissivity",), 

30 "(kvv)": ("kv",), 

31 "(vcw)": ("resistance",), 

32 "(shd)": ("head",), 

33 "(sto)": ("storage_coefficient",), 

34 "(spy)": ("specific_yield",), 

35 "(por)": ("porosity",), 

36 "(ani)": ("factor", "angle"), 

37 "(hfb)": ("gen",), 

38 "(ibs)": (None), 

39 "(pwt)": (None), 

40 "(sft)": (None), 

41 "(obs)": (None), 

42 "(cbi)": (None), 

43 "(sco)": (None), 

44 "(dsp)": (None), 

45 "(ics)": (None), 

46 "(fcs)": (None), 

47 "(ssc)": (None), 

48 "(fod)": (None), 

49 "(fos)": (None), 

50 "(rct)": (None), 

51 "(con)": (None), 

52 "(pst)": (None), 

53} 

54 

55DATE_KEYS = { 

56 "(uzf)": (None,), 

57 "(rch)": ("rate",), 

58 "(evt)": ("rate", "surface", "depth"), 

59 "(drn)": ("conductance", "elevation"), 

60 "(olf)": ("elevation",), 

61 "(riv)": ("conductance", "stage", "bottom_elevation", "infiltration_factor"), 

62 "(isg)": ("isg",), 

63 "(sfr)": ("isg",), 

64 "(lak)": (None,), 

65 "(wel)": ("ipf",), 

66 "(mnw)": (None,), 

67 "(ghb)": ("conductance", "head"), 

68 "(chd)": ("head",), 

69 "(fhb)": (None,), 

70 "(fde)": (None,), 

71 "(tvc)": (None,), 

72} 

73 

74METASWAP_VARS = ( 

75 "boundary", 

76 "landuse", 

77 "rootzone_thickness", 

78 "soil_physical_unit", 

79 "meteo_station_number", 

80 "surface_elevation", 

81 "artificial_recharge", 

82 "artifical_recharge_layer", 

83 "artificial_recharge_capacity", 

84 "wetted_area", 

85 "urban_area", 

86 "urban_ponding_depth", 

87 "rural_ponding_depth", 

88 "urban_runoff_resistance", 

89 "rural_runoff_resistance", 

90 "urban_runon_resistance", 

91 "rural_runon_resistance", 

92 "urban_infiltration_capacity", 

93 "rural_infiltration_capacity", 

94 "perched_water_table_level", 

95 "soil_moisture_fraction", 

96 "conductivitiy_factor", 

97 "plot_number", 

98 "steering_location", 

99 "plot_drainage_level", 

100 "plot_drainage_resistance", 

101) 

102 

103 

104class _LineIterator: 

105 """ 

106 Like iter(lines), but we can go back and we check if we're finished. 

107 """ 

108 

109 def __init__(self, lines: List[List[str]]): 

110 self.lines = lines 

111 self.count = -1 

112 self.length = len(lines) 

113 

114 def __iter__(self): 

115 return self 

116 

117 def __next__(self) -> List[str]: 

118 if self.finished: 

119 raise StopIteration 

120 self.count += 1 

121 return self.lines[self.count] 

122 

123 def back(self) -> None: 

124 self.count = max(self.count - 1, -1) 

125 

126 @property 

127 def finished(self) -> bool: 

128 return (self.count + 1) >= self.length 

129 

130 

131def _tokenize(line: str) -> List[str]: 

132 """ 

133 A value separator in Fortran list-directed input is: 

134 

135 * A comma if period decimal edit mode is POINT. 

136 * One or more contiguous spaces (blanks); no tabs. 

137 

138 Other remarks: 

139 

140 * Values, except for character strings, cannot contain blanks. 

141 * Strings may be unquoted if they do not start with a digit and no value 

142 separators. 

143 * Character strings can be quoted strings, using pairs of quotes ("), pairs 

144 of apostrophes ('). 

145 * A quote or apostrophe must be preceded by a value separator to initite a 

146 quoted string. 

147 * An empty entry consists of two consecutive commas (or semicolons). 

148 

149 For the use here (parsing IMOD's project files), we ignore: 

150 

151 * A semicolon value separator if period decimal edit mode is COMMA. 

152 * Complex constants given as two real constants separated by a comma and 

153 enclosed in parentheses. 

154 * Repetition counts: 4*(3.,2.) 2*, 4*'hello' 

155 

156 Furthermore, we do not expect commas inside of the project file entries, 

157 since we expect: 

158 

159 * Package names: unquoted character strings. 

160 * File paths: will not contain commas, no single apostrophe, nor a single 

161 quote symbol, may contain whitespace if quoted. * Integers for counts and 

162 settings. 

163 * Floats for addition and multiplication values. 

164 * Simple character strings for period names (summer, winter). These 

165 technically could contain commas if quoted, which is very unlikely. 

166 * No quotes or apostrophes are escaped. 

167 

168 With these assumptions, we can limit complexity considerably (see the 

169 PyLiDiRe link for a complete implementation): 

170 

171 * First we split by comma (we don't expected commas in quote strings). 

172 * Next we split by whitespace, unless quoted. 

173 

174 We can expect both single and double quotes, even within a single line: 

175 shlex.split() handles this. Note that additional entries are likely 

176 allowed, as the Fortran implementation only reads what is necessary, 

177 then stops parsing. 

178 

179 See also: 

180 * https://stackoverflow.com/questions/36165050/python-equivalent-of-fortran-list-directed-input 

181 * https://gitlab.com/everythingfunctional/PyLiDiRe 

182 * https://docs.oracle.com/cd/E19957-01/805-4939/6j4m0vnc5/index.html 

183 * The Fortran 2003 Handbook 

184 

185 Examples 

186 -------- 

187 

188 Raise ValueError, due to missing closing quotation. (Can be enabled 

189 shlex.split(s, posix=False)): 

190 

191 >> _tokenize("That's life") 

192 

193 >> _tokenize("That 's life'") 

194 >> ["That", "s life"] 

195 

196 >> _tokenize("That,'s life'") 

197 >> ["That", "s life"] 

198 """ 

199 values = [v.strip().replace("\\", "/") for v in line.split(",")] 

200 tokens = list(chain.from_iterable(shlex.split(v) for v in values)) 

201 return tokens 

202 

203 

204def _wrap_error_message( 

205 exception: Exception, description: str, lines: _LineIterator 

206) -> None: 

207 lines.back() 

208 content = next(lines) 

209 number = lines.count + 1 

210 raise type(exception)( 

211 f"{exception}\n" 

212 f"Failed to parse {description} for line {number} with content:\n{content}" 

213 ) 

214 

215 

216def _parse_blockheader(lines: _LineIterator) -> Tuple[int, str, str]: 

217 try: 

218 no_result = None, None, None 

219 line = next(lines) 

220 

221 # Skip if it's an empty line. 

222 if len(line) == 0: 

223 return no_result 

224 

225 first = line[0].lower() 

226 if first in ("periods", "species"): 

227 return 1, first, None 

228 # The line must contain atleast nper, key, active. 

229 elif len(line) >= 3: 

230 n = int(first) 

231 key = line[1].lower() 

232 active = line[2] 

233 return n, key, active 

234 # It's a comment or something. 

235 else: 

236 return no_result 

237 except Exception as e: 

238 _wrap_error_message(e, "block header", lines) 

239 

240 

241def _parse_time(lines: _LineIterator) -> str: 

242 try: 

243 line = next(lines) 

244 date = line[0].lower() 

245 if len(line) > 1: 

246 time = line[1] 

247 return f"{date} {time}" 

248 else: 

249 return date 

250 except Exception as e: 

251 _wrap_error_message(e, "date time", lines) 

252 

253 

254def _parse_blockline(lines: _LineIterator, time: str = None) -> Dict[str, Any]: 

255 try: 

256 line = next(lines) 

257 content = { 

258 "active": bool(int(line[0])), 

259 "is_constant": int(line[1]), 

260 "layer": int(line[2]), 

261 "factor": float(line[3]), 

262 "addition": float(line[4]), 

263 "constant": float(line[5]), 

264 } 

265 if content["is_constant"] == 2: 

266 content["path"] = Path(line[6]).resolve() 

267 if time is not None: 

268 content["time"] = time 

269 return content 

270 except Exception as e: 

271 _wrap_error_message(e, "entries", lines) 

272 

273 

274def _parse_nsub_nsystem(lines: _LineIterator) -> Tuple[int, int]: 

275 try: 

276 line = next(lines) 

277 n_entry = int(line[0]) 

278 n_system = int(line[1]) 

279 return n_entry, n_system 

280 except Exception as e: 

281 _wrap_error_message(e, "number of sub-entries and number of systems", lines) 

282 

283 

284def _parse_notimeblock( 

285 lines: _LineIterator, 

286 fields: List[str], 

287) -> Dict[str, Any]: 

288 n_entry, n_system = _parse_nsub_nsystem(lines) 

289 

290 if len(fields) != n_entry: 

291 raise ValueError( 

292 f"Expected NSUB entry of {len(fields)} for {fields}, read: {n_entry}" 

293 ) 

294 content = { 

295 field: [_parse_blockline(lines) for _ in range(n_system)] for field in fields 

296 } 

297 content["n_system"] = n_system 

298 return content 

299 

300 

301def _parse_capblock( 

302 lines: _LineIterator, 

303) -> Dict[str, Any]: 

304 fields = METASWAP_VARS 

305 n_entry, n_system = _parse_nsub_nsystem(lines) 

306 

307 if n_entry == 21: 

308 # Remove layer entry. 

309 fields = list(fields[:22]).pop(8) 

310 elif n_entry == 22: 

311 fields = fields[:22] 

312 elif n_entry == 26: 

313 pass 

314 else: 

315 raise ValueError( 

316 f"Expected NSUB entry of 21, 22, or 26 for {fields}, read: {n_entry}" 

317 ) 

318 

319 content = { 

320 field: [_parse_blockline(lines) for _ in range(n_system)] for field in fields 

321 } 

322 content["n_system"] = n_system 

323 return content 

324 

325 

326def _parse_extrablock(lines: _LineIterator, n: int) -> Dict[str, List[str]]: 

327 """Parse the MetaSWAP "extra files" block""" 

328 return {"paths": [next(lines) for _ in range(n)]} 

329 

330 

331def _parse_timeblock( 

332 lines: List[str], 

333 fields: List[str], 

334 n: int, 

335) -> Dict[str, Any]: 

336 n_fields = len(fields) 

337 content = defaultdict(list) 

338 for _ in range(n): 

339 time = _parse_time(lines) 

340 content["time"].append(time) 

341 n_entry, n_system = _parse_nsub_nsystem(lines) 

342 

343 if n_fields != n_entry: 

344 raise ValueError( 

345 f"Expected NSUB entry of {n_fields} for {fields}, read: {n_entry}" 

346 ) 

347 

348 for field in fields: 

349 content[field].extend( 

350 [_parse_blockline(lines, time) for _ in range(n_system)] 

351 ) 

352 

353 content["n_system"] = n_system 

354 return content 

355 

356 

357def _parse_pcgblock(lines: _LineIterator) -> Dict[str, Any]: 

358 try: 

359 line = next(lines) 

360 

361 # TODO: which are optional? How many to expect? 

362 # Check for an empty line to terminate the block? 

363 types = { 

364 "mxiter": int, 

365 "iter1": int, 

366 "hclose": float, 

367 "rclose": float, 

368 "relax": float, 

369 "npcond": int, 

370 "iprpcg": int, 

371 "mutpcg": int, 

372 "damppcg": float, 

373 "damppcgt": float, 

374 "iqerror": int, 

375 "qerror": float, 

376 } 

377 

378 if len(line) == 12: 

379 line_iterator = iter(line) 

380 content = { 

381 k: valuetype(next(line_iterator)) for k, valuetype in types.items() 

382 } 

383 elif any("=" in s for s in line): 

384 pcglines = [line] + [next(lines) for _ in range(11)] 

385 content = {} 

386 for line in pcglines: 

387 # undo separation, partition on equality sign instead. 

388 line = "".join(line) 

389 key, _, value = line.lower().partition("=") 

390 value = types[key](value) 

391 content[key] = value 

392 else: 

393 raise ValueError( 

394 f"Expected 12 KEY = VALUE pairs, or 12 values. Found {len(line)}" 

395 ) 

396 

397 return content 

398 except Exception as e: 

399 _wrap_error_message(e, "PCG entry", lines) 

400 

401 

402def _parse_periodsblock(lines: _LineIterator) -> Dict[str, str]: 

403 try: 

404 periods = {} 

405 while not lines.finished: 

406 line = next(lines) 

407 # Stop if we encounter an empty line. 

408 if len(line) == 0: 

409 break 

410 # Read the alias 

411 alias = line[0] 

412 # Now read the time associated with it. 

413 start = _parse_time(lines) 

414 periods[alias] = start 

415 return periods 

416 except Exception as e: 

417 _wrap_error_message(e, "periods data block", lines) 

418 

419 

420def _parse_speciesblock(lines: _LineIterator): 

421 try: 

422 species = {} 

423 while not lines.finished: 

424 line = next(lines) 

425 # Stop if we encounter an empty line. 

426 if len(line) == 0: 

427 break 

428 name, nr = line 

429 nr = int(nr) 

430 species[nr] = name 

431 return species 

432 except Exception as e: 

433 _wrap_error_message(e, "species entry", lines) 

434 

435 

436def _parse_block(lines: _LineIterator, content: Dict[str, Any]) -> None: 

437 """ 

438 Mutates content dict. 

439 """ 

440 n = key = active = None 

441 

442 # A project file may contain any number of lines outside of a "topic" 

443 # block. _parse_blockheader will return triple None in that case. 

444 while key is None and not lines.finished: 

445 n, key, active = _parse_blockheader(lines) 

446 

447 try: 

448 if key in KEYS: 

449 if n != 1: 

450 raise ValueError(f"Expected N=1 for {key}, read: {n}") 

451 fields = KEYS[key] 

452 blockcontent = _parse_notimeblock(lines, fields) 

453 elif key in DATE_KEYS: 

454 fields = DATE_KEYS[key] 

455 blockcontent = _parse_timeblock(lines, fields, n) 

456 elif key == "(cap)": 

457 blockcontent = _parse_capblock(lines) 

458 elif key == "(pcg)": 

459 blockcontent = _parse_pcgblock(lines) 

460 elif key == "periods": 

461 blockcontent = _parse_periodsblock(lines) 

462 elif key == "species": 

463 blockcontent = _parse_speciesblock(lines) 

464 elif key == "extra": 

465 blockcontent = _parse_extrablock(lines, n) 

466 else: 

467 other = ("(pcg)", "(gcg)", "(vdf)") 

468 options = tuple(KEYS.keys()) + tuple(DATE_KEYS.keys()) + other 

469 lines.back() 

470 line = next(lines) 

471 number = lines.count + 1 

472 raise ValueError( 

473 f"Failed to recognize header keyword: {key}. Expected one of keywords {options}" 

474 f"\nErrored in line {number} with entries:\n{line}" 

475 ) 

476 

477 except Exception as e: 

478 raise type(e)(f"{e}\nError occurred for keyword: {key}") 

479 

480 if blockcontent is not None and active is not None: 

481 blockcontent["active"] = active 

482 

483 content[key] = blockcontent 

484 return 

485 

486 

487def _process_package_entry(entry: Dict): 

488 """ 

489 The iMOD project file supports constants in lieu of IDFs. 

490 """ 

491 coords = {"layer": entry["layer"]} 

492 dims = ("layer",) 

493 

494 if "path" not in entry: 

495 path = None 

496 header = {"coords": coords} 

497 value = entry["constant"] 

498 else: 

499 path = entry["path"] 

500 header = imod.idf.header(path, pattern="{name}") 

501 value = None 

502 

503 header["dims"] = dims 

504 return path, header, value 

505 

506 

507def _merge_coords(headers: List[Dict[str, Any]]) -> Dict[str, np.ndarray]: 

508 coords = defaultdict(list) 

509 for header in headers: 

510 for key, value in header["coords"].items(): 

511 coords[key].append(value) 

512 return {k: np.unique(coords[k]) for k in coords} 

513 

514 

515def _create_datarray_from_paths(paths: List[str], headers: List[Dict[str, Any]]): 

516 da = imod.formats.array_io.reading._load( 

517 paths, use_cftime=False, _read=imod.idf._read, headers=headers 

518 ) 

519 return da 

520 

521 

522def _create_dataarray_from_values(values: List[float], headers: List[Dict[str, Any]]): 

523 coords = _merge_coords(headers) 

524 firstdims = headers[0]["dims"] 

525 shape = [len(coord) for coord in coords.values()] 

526 da = xr.DataArray(np.reshape(values, shape), dims=firstdims, coords=coords) 

527 return da 

528 

529 

530def _create_dataarray( 

531 paths: List[str], headers: List[Dict[str, Any]], values: List[float] 

532) -> xr.DataArray: 

533 """ 

534 Create a DataArray from a list of IDF paths, or from constant values. 

535 """ 

536 values_valid = [] 

537 paths_valid = [] 

538 headers_paths = [] 

539 headers_values = [] 

540 for path, header, value in zip(paths, headers, values): 

541 if path is None: 

542 headers_values.append(header) 

543 values_valid.append(value) 

544 else: 

545 headers_paths.append(header) 

546 paths_valid.append(path) 

547 

548 if paths_valid and values_valid: 

549 dap = _create_datarray_from_paths(paths_valid, headers_paths) 

550 dav = _create_dataarray_from_values(values_valid, headers_values) 

551 dap.name = "tmp" 

552 dav.name = "tmp" 

553 da = xr.merge((dap, dav), join="outer")["tmp"] 

554 elif paths_valid: 

555 da = _create_datarray_from_paths(paths_valid, headers_paths) 

556 elif values_valid: 

557 da = _create_dataarray_from_values(values_valid, headers_values) 

558 

559 da = apply_factor_and_addition(headers, da) 

560 return da 

561 

562 

563def apply_factor_and_addition(headers, da): 

564 if not ("layer" in da.coords or "time" in da.dims): 

565 factor = headers[0]["factor"] 

566 addition = headers[0]["addition"] 

567 da = da * factor + addition 

568 elif "layer" in da.coords and "time" not in da.dims: 

569 da = apply_factor_and_addition_per_layer(headers, da) 

570 else: 

571 header_per_time = defaultdict(list) 

572 for time in da.coords["time"].values: 

573 for header in headers: 

574 if np.datetime64(header["time"]) == time: 

575 header_per_time[time].append(header) 

576 

577 for time in da.coords["time"]: 

578 da.loc[{"time": time}] = apply_factor_and_addition( 

579 header_per_time[np.datetime64(time.values)], 

580 da.sel(time=time, drop=True), 

581 ) 

582 return da 

583 

584 

585def apply_factor_and_addition_per_layer(headers, da): 

586 layer = da.coords["layer"].values 

587 header_per_layer = {} 

588 for header in headers: 

589 if header["layer"] in header_per_layer.keys(): 

590 raise ValueError("error in project file: layer repetition") 

591 header_per_layer[header["layer"]] = header 

592 addition_values = [header_per_layer[lay]["addition"] for lay in layer] 

593 factor_values = [header_per_layer[lay]["factor"] for lay in layer] 

594 addition = xr.DataArray(addition_values, coords={"layer": layer}, dims=("layer")) 

595 factor = xr.DataArray(factor_values, coords={"layer": layer}, dims=("layer",)) 

596 da = da * factor + addition 

597 return da 

598 

599 

600def _open_package_idf( 

601 block_content: Dict[str, Any], variables: Sequence[str] 

602) -> List[xr.DataArray]: 

603 das = {} 

604 for variable in variables: 

605 variable_content = block_content[variable] 

606 paths = [] 

607 headers = [] 

608 values = [] 

609 for entry in variable_content: 

610 path, header, value = _process_package_entry(entry) 

611 header["name"] = variable 

612 header["dims"] = ["layer"] 

613 header["layer"] = entry["layer"] 

614 header["addition"] = entry["addition"] 

615 header["factor"] = entry["factor"] 

616 paths.append(path) 

617 headers.append(header) 

618 values.append(value) 

619 

620 das[variable] = _create_dataarray(paths, headers, values) 

621 

622 return [das] 

623 

624 

625def _process_time(time: str, yearfirst: bool = True): 

626 if time == "steady-state": 

627 time = None 

628 else: 

629 if yearfirst: 

630 if len(time) == 19: 

631 time = datetime.strptime(time, "%Y-%m-%d %H:%M:%S") 

632 elif len(time) == 10: 

633 time = datetime.strptime(time, "%Y-%m-%d") 

634 else: 

635 raise ValueError( 

636 f"time data {time} does not match format " 

637 '"%Y-%m-%d %H:%M:%S" or "%Y-%m-%d"' 

638 ) 

639 else: 

640 if len(time) == 19: 

641 time = datetime.strptime(time, "%d-%m-%Y %H:%M:%S") 

642 elif len(time) == 10: 

643 time = datetime.strptime(time, "%d-%m-%Y") 

644 else: 

645 raise ValueError( 

646 f"time data {time} does not match format " 

647 '"%d-%m-%Y %H:%M:%S" or "%d-%m-%Y"' 

648 ) 

649 return time 

650 

651 

652def _process_boundary_condition_entry(entry: Dict, periods: Dict[str, datetime]): 

653 """ 

654 The iMOD project file supports constants in lieu of IDFs. 

655 

656 Also process repeated stress periods (on a yearly basis): substitute the 

657 original date here. 

658 """ 

659 coords = {} 

660 timestring = entry["time"] 

661 

662 # Resolve repeating periods first: 

663 time = periods.get(timestring) 

664 if time is not None: 

665 repeat = time 

666 else: 

667 # this resolves e.g. "steady-state" 

668 time = _process_time(timestring) 

669 repeat = None 

670 

671 if time is None: 

672 dims = () 

673 else: 

674 dims = ("time",) 

675 coords["time"] = time 

676 

677 # 0 signifies that the layer must be determined on the basis of 

678 # bottom elevation and stage. 

679 layer = entry["layer"] 

680 if layer <= 0: 

681 layer is None 

682 else: 

683 coords["layer"] = layer 

684 dims = dims + ("layer",) 

685 

686 if "path" not in entry: 

687 path = None 

688 header = {"coords": coords} 

689 value = entry["constant"] 

690 else: 

691 path = entry["path"] 

692 header = imod.idf.header(path, pattern="{name}") 

693 value = None 

694 header["addition"] = entry["addition"] 

695 header["factor"] = entry["factor"] 

696 header["dims"] = dims 

697 if layer is not None: 

698 header["layer"] = layer 

699 if time is not None: 

700 header["time"] = time 

701 

702 return path, header, value, repeat 

703 

704 

705def _open_boundary_condition_idf( 

706 block_content, variables, periods: Dict[str, datetime] 

707) -> Tuple[List[Dict[str, xr.DataArray]], List[datetime]]: 

708 """ 

709 Read the variables specified from block_content. 

710 """ 

711 n_system = block_content["n_system"] 

712 n_time = len(block_content["time"]) 

713 n_total = n_system * n_time 

714 

715 das = [{} for _ in range(n_system)] 

716 for variable in variables: 

717 variable_content = block_content[variable] 

718 

719 n = len(variable_content) 

720 if n != n_total: 

721 raise ValueError( 

722 f"Expected n_time * n_system = {n_time} * {n_system} = " 

723 f"{n_total} entries for variable {variable}. Received: {n}" 

724 ) 

725 

726 # Group the paths and headers by system. 

727 system_paths = defaultdict(list) 

728 system_headers = defaultdict(list) 

729 system_values = defaultdict(list) 

730 all_repeats = set() 

731 for i, entry in enumerate(variable_content): 

732 path, header, value, repeat = _process_boundary_condition_entry( 

733 entry, periods 

734 ) 

735 header["name"] = variable 

736 key = i % n_system 

737 system_paths[key].append(path) 

738 system_headers[key].append(header) 

739 system_values[key].append(value) 

740 if repeat: 

741 all_repeats.add(repeat) 

742 

743 # Concat one system at a time. 

744 for i, (paths, headers, values) in enumerate( 

745 zip(system_paths.values(), system_headers.values(), system_values.values()) 

746 ): 

747 das[i][variable] = _create_dataarray(paths, headers, values) 

748 

749 repeats = sorted(all_repeats) 

750 return das, repeats 

751 

752 

753def _read_package_gen( 

754 block_content: Dict[str, Any], has_topbot: bool 

755) -> List[Dict[str, Any]]: 

756 out = [] 

757 for entry in block_content["gen"]: 

758 gdf = imod.gen.read(entry["path"]) 

759 if has_topbot: 

760 gdf["resistance"] = entry["factor"] * entry["addition"] 

761 else: 

762 gdf["multiplier"] = entry["factor"] * entry["addition"] 

763 d = { 

764 "geodataframe": gdf, 

765 "layer": entry["layer"], 

766 } 

767 out.append(d) 

768 return out 

769 

770 

771def _read_package_ipf( 

772 block_content: Dict[str, Any], periods: Dict[str, datetime] 

773) -> Tuple[List[Dict[str, Any]], List[datetime]]: 

774 out = [] 

775 repeats = [] 

776 for entry in block_content["ipf"]: 

777 timestring = entry["time"] 

778 layer = entry["layer"] 

779 time = periods.get(timestring) 

780 factor = entry["factor"] 

781 addition = entry["addition"] 

782 if time is None: 

783 time = _process_time(timestring) 

784 else: 

785 repeats.append(time) 

786 

787 # Ensure the columns are identifiable. 

788 path = Path(entry["path"]) 

789 ipf_df, indexcol, ext = imod.ipf._read_ipf(path) 

790 if indexcol == 0: 

791 # No associated files 

792 columns = ("x", "y", "rate") 

793 if layer <= 0: 

794 df = ipf_df.iloc[:, :5] 

795 columns = columns + ("top", "bottom") 

796 else: 

797 df = ipf_df.iloc[:, :3] 

798 df.columns = columns 

799 else: 

800 dfs = [] 

801 for row in ipf_df.itertuples(): 

802 filename = row[indexcol] 

803 path_assoc = path.parent.joinpath(f"{filename}.{ext}") 

804 df_assoc = imod.ipf.read_associated(path_assoc).iloc[:, :2] 

805 df_assoc.columns = ["time", "rate"] 

806 df_assoc["x"] = row[1] 

807 df_assoc["y"] = row[2] 

808 df_assoc["id"] = path_assoc.stem 

809 if layer <= 0: 

810 df_assoc["top"] = row[4] 

811 df_assoc["bottom"] = row[5] 

812 dfs.append(df_assoc) 

813 df = pd.concat(dfs, ignore_index=True, sort=False) 

814 df["rate"] = df["rate"] * factor + addition 

815 

816 d = { 

817 "dataframe": df, 

818 "layer": layer, 

819 "time": time, 

820 } 

821 out.append(d) 

822 repeats = sorted(repeats) 

823 return out, repeats 

824 

825 

826def read_projectfile(path: FilePath) -> Dict[str, Any]: 

827 """ 

828 Read an iMOD project file into a collection of nested dictionaries. 

829 

830 The top-level keys are the "topic" entries such "bnd" or "riv" in the 

831 project file. An example structure of the dictionaries is visualized below: 

832 

833 .. code-block:: 

834 

835 content 

836 ├── bnd 

837 │ ├── active: bool 

838 │ └── ibound: list of dictionaries for each layer 

839 ├── riv 

840 │ ├── active: bool 

841 │ ├── conductance: list of dictionaries for each time and layer. 

842 │ ├── stage: idem. 

843 │ ├── bottom_elevation: idem. 

844 │ └── infiltration_factor: idem. 

845 etc. 

846 

847 Time and layer are flattened into a single list and time is included in 

848 every dictionary: 

849 

850 .. code-block:: 

851 

852 stage 

853 ├── 0 # First entry in list 

854 │ ├── active: bool 

855 │ ├── is_constant: bool 

856 │ ├── layer: int 

857 │ ├── factor: float 

858 │ ├── addition: float 

859 │ ├── constant: float 

860 │ ├── path: str 

861 │ └── time: str 

862 

863 ├── 1 # Second entry in list 

864 │ ├── etc. 

865 etc. 

866 

867 

868 Parameters 

869 ---------- 

870 path: str or Path 

871 

872 Returns 

873 ------- 

874 content: Dict[str, Any] 

875 """ 

876 # Force to Path 

877 path = Path(path) 

878 

879 with open(path) as f: 

880 lines = f.readlines() 

881 

882 tokenized = [] 

883 for i, line in enumerate(lines): 

884 try: 

885 tokenized.append(_tokenize(line)) 

886 except Exception as e: 

887 raise type(e)(f"{e}\nError occurred in line {i}") 

888 

889 lines = _LineIterator(tokenized) 

890 content = {} 

891 wdir = path.parent 

892 # Change dir temporarely to projectfile dir to resolve relative paths 

893 with imod.util.cd(wdir): 

894 while not lines.finished: 

895 _parse_block(lines, content) 

896 

897 return content 

898 

899 

900def open_projectfile_data(path: FilePath) -> Dict[str, Any]: 

901 """ 

902 Read the contents of an iMOD project file and read/open the data present in 

903 it: 

904 

905 * IDF data is lazily loaded into xarray.DataArrays. 

906 * GEN data is eagerly loaded into geopandas.GeoDataFrames 

907 * IPF data is eagerly loaded into pandas.DataFrames 

908 * Non-file based entries (such as the PCG settings) are kept as a dictionary. 

909 

910 When multiple systems are present, they are numbered starting from one, e.g.: 

911 

912 * drn-1 

913 * drn-2 

914 

915 Xarray requires valid dates for the time coordinate. Aliases such as 

916 "summer" and "winter" that are associated with dates in the project file 

917 Periods block cannot be used in the time coordinate. Hence, this function 

918 will instead insert the dates associated with the aliases, with the year 

919 replaced by 1899; as the iMOD calendar starts at 1900, this ensures that 

920 the repeats are always first and that no date collisions will occur. 

921 

922 Parameters 

923 ---------- 

924 path: pathlib.Path or str. 

925 

926 Returns 

927 ------- 

928 data: Dict[str, Any] 

929 Keys are the iMOD project file "topics", without parentheses. 

930 """ 

931 content = read_projectfile(path) 

932 periods_block = content.pop("periods", None) 

933 if periods_block is None: 

934 periods = {} 

935 else: 

936 # Set the year of a repeat date to 1899: this ensures it falls outside 

937 # of the iMOD calendar. Collisions are then always avoided. 

938 periods = { 

939 key: _process_time(time, yearfirst=False).replace(year=1899) 

940 for key, time in periods_block.items() 

941 } 

942 

943 # Pop species block, at the moment we do not do much with, 

944 # since most regional models are without solute transport 

945 content.pop("species", None) 

946 

947 has_topbot = "(top)" in content and "(bot)" in content 

948 prj_data = {} 

949 repeat_stress = {} 

950 for key, block_content in content.items(): 

951 repeats = None 

952 try: 

953 if key == "(hfb)": 

954 data = _read_package_gen(block_content, has_topbot) 

955 elif key == "(wel)": 

956 data, repeats = _read_package_ipf(block_content, periods) 

957 elif key == "(cap)": 

958 variables = set(METASWAP_VARS).intersection(block_content.keys()) 

959 data = _open_package_idf(block_content, variables) 

960 elif key in ("extra", "(pcg)"): 

961 data = [block_content] 

962 elif key in KEYS: 

963 variables = KEYS[key] 

964 data = _open_package_idf(block_content, variables) 

965 elif key in DATE_KEYS: 

966 variables = DATE_KEYS[key] 

967 data, repeats = _open_boundary_condition_idf( 

968 block_content, variables, periods 

969 ) 

970 else: 

971 raise KeyError(f"Unsupported key: '{key}'") 

972 except Exception as e: 

973 raise type(e)(f"{e}. Errored while opening/reading data entries for: {key}") 

974 

975 strippedkey = key.strip("(").strip(")") 

976 if len(data) > 1: 

977 for i, da in enumerate(data): 

978 numbered_key = f"{strippedkey}-{i + 1}" 

979 prj_data[numbered_key] = da 

980 repeat_stress[numbered_key] = repeats 

981 else: 

982 prj_data[strippedkey] = data[0] 

983 repeat_stress[strippedkey] = repeats 

984 

985 repeat_stress = {k: v for k, v in repeat_stress.items() if v} 

986 return prj_data, repeat_stress 

987 

988 

989def read_timfile(path: FilePath) -> List[Dict]: 

990 def parsetime(time: str) -> datetime: 

991 # Check for steady-state: 

992 if time == "00000000000000": 

993 return None 

994 return datetime.strptime(time, "%Y%m%d%H%M%S") 

995 

996 with open(path, "r") as f: 

997 lines = f.readlines() 

998 

999 # A line contains 2, 3, or 4 values: 

1000 # time, isave, nstp, tmult 

1001 casters = { 

1002 "time": parsetime, 

1003 "save": lambda x: bool(int(x)), 

1004 "n_timestep": int, 

1005 "timestep_multiplier": float, 

1006 } 

1007 content = [] 

1008 for line in lines: 

1009 stripped = line.strip() 

1010 if stripped == "": 

1011 continue 

1012 parts = stripped.split(",") 

1013 entry = {k: cast(s.strip()) for s, (k, cast) in zip(parts, casters.items())} 

1014 content.append(entry) 

1015 

1016 return content