Coverage for C:\src\imod-python\imod\formats\prj\prj.py: 92%

462 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-08 13:27 +0200

1""" 

2Utilities for parsing a project file. 

3""" 

4 

5import shlex 

6from collections import defaultdict 

7from datetime import datetime 

8from itertools import chain 

9from os import PathLike 

10from pathlib import Path 

11from typing import Any, Dict, List, Sequence, Tuple, Union 

12 

13import numpy as np 

14import pandas as pd 

15import xarray as xr 

16 

17import imod 

18 

19FilePath = Union[str, "PathLike[str]"] 

20 

21 

22KEYS = { 

23 "(bnd)": ("ibound",), 

24 "(top)": ("top",), 

25 "(bot)": ("bottom",), 

26 "(thk)": ("thickness",), 

27 "(khv)": ("kh",), 

28 "(kva)": ("vertical_anisotropy",), 

29 "(kdw)": ("transmissivity",), 

30 "(kvv)": ("kv",), 

31 "(vcw)": ("resistance",), 

32 "(shd)": ("head",), 

33 "(sto)": ("storage_coefficient",), 

34 "(spy)": ("specific_yield",), 

35 "(por)": ("porosity",), 

36 "(ani)": ("factor", "angle"), 

37 "(hfb)": ("gen",), 

38 "(ibs)": (None), 

39 "(pwt)": (None), 

40 "(sft)": (None), 

41 "(obs)": (None), 

42 "(cbi)": (None), 

43 "(sco)": (None), 

44 "(dsp)": (None), 

45 "(ics)": (None), 

46 "(fcs)": (None), 

47 "(ssc)": (None), 

48 "(fod)": (None), 

49 "(fos)": (None), 

50 "(rct)": (None), 

51 "(con)": (None), 

52 "(pst)": (None), 

53} 

54 

55DATE_KEYS = { 

56 "(uzf)": (None,), 

57 "(rch)": ("rate",), 

58 "(evt)": ("rate", "surface", "depth"), 

59 "(drn)": ("conductance", "elevation"), 

60 "(olf)": ("elevation",), 

61 "(riv)": ("conductance", "stage", "bottom_elevation", "infiltration_factor"), 

62 "(isg)": ("isg",), 

63 "(sfr)": ("isg",), 

64 "(lak)": (None,), 

65 "(wel)": ("ipf",), 

66 "(mnw)": (None,), 

67 "(ghb)": ("conductance", "head"), 

68 "(chd)": ("head",), 

69 "(fhb)": (None,), 

70 "(fde)": (None,), 

71 "(tvc)": (None,), 

72} 

73 

74METASWAP_VARS = ( 

75 "boundary", 

76 "landuse", 

77 "rootzone_thickness", 

78 "soil_physical_unit", 

79 "meteo_station_number", 

80 "surface_elevation", 

81 "artificial_recharge", 

82 "artifical_recharge_layer", 

83 "artificial_recharge_capacity", 

84 "wetted_area", 

85 "urban_area", 

86 "urban_ponding_depth", 

87 "rural_ponding_depth", 

88 "urban_runoff_resistance", 

89 "rural_runoff_resistance", 

90 "urban_runon_resistance", 

91 "rural_runon_resistance", 

92 "urban_infiltration_capacity", 

93 "rural_infiltration_capacity", 

94 "perched_water_table_level", 

95 "soil_moisture_fraction", 

96 "conductivitiy_factor", 

97 "plot_number", 

98 "steering_location", 

99 "plot_drainage_level", 

100 "plot_drainage_resistance", 

101) 

102 

103 

104class _LineIterator: 

105 """ 

106 Like iter(lines), but we can go back and we check if we're finished. 

107 """ 

108 

109 def __init__(self, lines: List[List[str]]): 

110 self.lines = lines 

111 self.count = -1 

112 self.length = len(lines) 

113 

114 def __iter__(self): 

115 return self 

116 

117 def __next__(self) -> List[str]: 

118 if self.finished: 

119 raise StopIteration 

120 self.count += 1 

121 return self.lines[self.count] 

122 

123 def back(self) -> None: 

124 self.count = max(self.count - 1, -1) 

125 

126 @property 

127 def finished(self) -> bool: 

128 return (self.count + 1) >= self.length 

129 

130 

131def _tokenize(line: str) -> List[str]: 

132 """ 

133 A value separator in Fortran list-directed input is: 

134 

135 * A comma if period decimal edit mode is POINT. 

136 * One or more contiguous spaces (blanks); no tabs. 

137 

138 Other remarks: 

139 

140 * Values, except for character strings, cannot contain blanks. 

141 * Strings may be unquoted if they do not start with a digit and no value 

142 separators. 

143 * Character strings can be quoted strings, using pairs of quotes ("), pairs 

144 of apostrophes ('). 

145 * A quote or apostrophe must be preceded by a value separator to initite a 

146 quoted string. 

147 * An empty entry consists of two consecutive commas (or semicolons). 

148 

149 For the use here (parsing IMOD's project files), we ignore: 

150 

151 * A semicolon value separator if period decimal edit mode is COMMA. 

152 * Complex constants given as two real constants separated by a comma and 

153 enclosed in parentheses. 

154 * Repetition counts: 4*(3.,2.) 2*, 4*'hello' 

155 

156 Furthermore, we do not expect commas inside of the project file entries, 

157 since we expect: 

158 

159 * Package names: unquoted character strings. 

160 * File paths: will not contain commas, no single apostrophe, nor a single 

161 quote symbol, may contain whitespace if quoted. * Integers for counts and 

162 settings. 

163 * Floats for addition and multiplication values. 

164 * Simple character strings for period names (summer, winter). These 

165 technically could contain commas if quoted, which is very unlikely. 

166 * No quotes or apostrophes are escaped. 

167 

168 With these assumptions, we can limit complexity considerably (see the 

169 PyLiDiRe link for a complete implementation): 

170 

171 * First we split by comma (we don't expected commas in quote strings). 

172 * Next we split by whitespace, unless quoted. 

173 

174 We can expect both single and double quotes, even within a single line: 

175 shlex.split() handles this. Note that additional entries are likely 

176 allowed, as the Fortran implementation only reads what is necessary, 

177 then stops parsing. 

178 

179 See also: 

180 * https://stackoverflow.com/questions/36165050/python-equivalent-of-fortran-list-directed-input 

181 * https://gitlab.com/everythingfunctional/PyLiDiRe 

182 * https://docs.oracle.com/cd/E19957-01/805-4939/6j4m0vnc5/index.html 

183 * The Fortran 2003 Handbook 

184 

185 Examples 

186 -------- 

187 

188 Raise ValueError, due to missing closing quotation. (Can be enabled 

189 shlex.split(s, posix=False)): 

190 

191 >> _tokenize("That's life") 

192 

193 >> _tokenize("That 's life'") 

194 >> ["That", "s life"] 

195 

196 >> _tokenize("That,'s life'") 

197 >> ["That", "s life"] 

198 """ 

199 values = [v.strip().replace("\\", "/") for v in line.split(",")] 

200 tokens = list(chain.from_iterable(shlex.split(v) for v in values)) 

201 return tokens 

202 

203 

204def _wrap_error_message( 

205 exception: Exception, description: str, lines: _LineIterator 

206) -> None: 

207 lines.back() 

208 content = next(lines) 

209 number = lines.count + 1 

210 raise type(exception)( 

211 f"{exception}\n" 

212 f"Failed to parse {description} for line {number} with content:\n{content}" 

213 ) 

214 

215 

216def _parse_blockheader(lines: _LineIterator) -> Tuple[int, str, str]: 

217 try: 

218 no_result = None, None, None 

219 line = next(lines) 

220 

221 # Skip if it's an empty line. 

222 if len(line) == 0: 

223 return no_result 

224 

225 first = line[0].lower() 

226 if first in ("periods", "species"): 

227 return 1, first, None 

228 # The line must contain atleast nper, key, active. 

229 elif len(line) >= 3: 

230 n = int(first) 

231 key = line[1].lower() 

232 active = line[2] 

233 return n, key, active 

234 # It's a comment or something. 

235 else: 

236 return no_result 

237 except Exception as e: 

238 _wrap_error_message(e, "block header", lines) 

239 

240 

241def _parse_time(lines: _LineIterator) -> str: 

242 try: 

243 line = next(lines) 

244 date = line[0].lower() 

245 if len(line) > 1: 

246 time = line[1] 

247 return f"{date} {time}" 

248 else: 

249 return date 

250 except Exception as e: 

251 _wrap_error_message(e, "date time", lines) 

252 

253 

254def _parse_blockline(lines: _LineIterator, time: str = None) -> Dict[str, Any]: 

255 try: 

256 line = next(lines) 

257 content = { 

258 "active": bool(int(line[0])), 

259 "is_constant": int(line[1]), 

260 "layer": int(line[2]), 

261 "factor": float(line[3]), 

262 "addition": float(line[4]), 

263 "constant": float(line[5]), 

264 } 

265 if content["is_constant"] == 2: 

266 content["path"] = Path(line[6]).resolve() 

267 if time is not None: 

268 content["time"] = time 

269 return content 

270 except Exception as e: 

271 _wrap_error_message(e, "entries", lines) 

272 

273 

274def _parse_nsub_nsystem(lines: _LineIterator) -> Tuple[int, int]: 

275 try: 

276 line = next(lines) 

277 n_entry = int(line[0]) 

278 n_system = int(line[1]) 

279 return n_entry, n_system 

280 except Exception as e: 

281 _wrap_error_message(e, "number of sub-entries and number of systems", lines) 

282 

283 

284def _parse_notimeblock( 

285 lines: _LineIterator, 

286 fields: List[str], 

287) -> Dict[str, Any]: 

288 n_entry, n_system = _parse_nsub_nsystem(lines) 

289 

290 if len(fields) != n_entry: 

291 raise ValueError( 

292 f"Expected NSUB entry of {len(fields)} for {fields}, read: {n_entry}" 

293 ) 

294 content = { 

295 field: [_parse_blockline(lines) for _ in range(n_system)] for field in fields 

296 } 

297 content["n_system"] = n_system 

298 return content 

299 

300 

301def _parse_capblock( 

302 lines: _LineIterator, 

303) -> Dict[str, Any]: 

304 fields = METASWAP_VARS 

305 n_entry, n_system = _parse_nsub_nsystem(lines) 

306 

307 if n_entry == 21: 

308 # Remove layer entry. 

309 fields = list(fields[:22]).pop(8) 

310 elif n_entry == 22: 

311 fields = fields[:22] 

312 elif n_entry == 26: 

313 pass 

314 else: 

315 raise ValueError( 

316 f"Expected NSUB entry of 21, 22, or 26 for {fields}, read: {n_entry}" 

317 ) 

318 

319 content = { 

320 field: [_parse_blockline(lines) for _ in range(n_system)] for field in fields 

321 } 

322 content["n_system"] = n_system 

323 return content 

324 

325 

326def _parse_extrablock(lines: _LineIterator, n: int) -> Dict[str, List[str]]: 

327 """Parse the MetaSWAP "extra files" block""" 

328 return {"paths": [next(lines) for _ in range(n)]} 

329 

330 

331def _parse_timeblock( 

332 lines: List[str], 

333 fields: List[str], 

334 n: int, 

335) -> Dict[str, Any]: 

336 n_fields = len(fields) 

337 content = defaultdict(list) 

338 for _ in range(n): 

339 time = _parse_time(lines) 

340 content["time"].append(time) 

341 n_entry, n_system = _parse_nsub_nsystem(lines) 

342 

343 if n_fields != n_entry: 

344 raise ValueError( 

345 f"Expected NSUB entry of {n_fields} for {fields}, read: {n_entry}" 

346 ) 

347 

348 for field in fields: 

349 content[field].extend( 

350 [_parse_blockline(lines, time) for _ in range(n_system)] 

351 ) 

352 

353 content["n_system"] = n_system 

354 return content 

355 

356 

357def _parse_pcgblock(lines: _LineIterator) -> Dict[str, Any]: 

358 try: 

359 line = next(lines) 

360 

361 # TODO: which are optional? How many to expect? 

362 # Check for an empty line to terminate the block? 

363 types = { 

364 "mxiter": int, 

365 "iter1": int, 

366 "hclose": float, 

367 "rclose": float, 

368 "relax": float, 

369 "npcond": int, 

370 "iprpcg": int, 

371 "mutpcg": int, 

372 "damppcg": float, 

373 "damppcgt": float, 

374 "iqerror": int, 

375 "qerror": float, 

376 } 

377 

378 if len(line) == 12: 

379 line_iterator = iter(line) 

380 content = { 

381 k: valuetype(next(line_iterator)) for k, valuetype in types.items() 

382 } 

383 elif any("=" in s for s in line): 

384 pcglines = [line] + [next(lines) for _ in range(11)] 

385 content = {} 

386 for line in pcglines: 

387 # undo separation, partition on equality sign instead. 

388 line = "".join(line) 

389 key, _, value = line.lower().partition("=") 

390 value = types[key](value) 

391 content[key] = value 

392 else: 

393 raise ValueError( 

394 f"Expected 12 KEY = VALUE pairs, or 12 values. Found {len(line)}" 

395 ) 

396 

397 return content 

398 except Exception as e: 

399 _wrap_error_message(e, "PCG entry", lines) 

400 

401 

402def _parse_periodsblock(lines: _LineIterator) -> Dict[str, str]: 

403 try: 

404 periods = {} 

405 while not lines.finished: 

406 line = next(lines) 

407 # Stop if we encounter an empty line. 

408 if len(line) == 0: 

409 break 

410 # Read the alias 

411 alias = line[0] 

412 # Now read the time associated with it. 

413 start = _parse_time(lines) 

414 periods[alias] = start 

415 return periods 

416 except Exception as e: 

417 _wrap_error_message(e, "periods data block", lines) 

418 

419 

420def _parse_speciesblock(lines: _LineIterator): 

421 try: 

422 species = {} 

423 while not lines.finished: 

424 line = next(lines) 

425 # Stop if we encounter an empty line. 

426 if len(line) == 0: 

427 break 

428 name, nr = line 

429 nr = int(nr) 

430 species[nr] = name 

431 return species 

432 except Exception as e: 

433 _wrap_error_message(e, "species entry", lines) 

434 

435 

436def _parse_block(lines: _LineIterator, content: Dict[str, Any]) -> None: 

437 """ 

438 Mutates content dict. 

439 """ 

440 n = key = active = None 

441 

442 # A project file may contain any number of lines outside of a "topic" 

443 # block. _parse_blockheader will return triple None in that case. 

444 while key is None and not lines.finished: 

445 n, key, active = _parse_blockheader(lines) 

446 

447 try: 

448 if key in KEYS: 

449 if n != 1: 

450 raise ValueError(f"Expected N=1 for {key}, read: {n}") 

451 fields = KEYS[key] 

452 blockcontent = _parse_notimeblock(lines, fields) 

453 elif key in DATE_KEYS: 

454 fields = DATE_KEYS[key] 

455 blockcontent = _parse_timeblock(lines, fields, n) 

456 elif key == "(cap)": 

457 blockcontent = _parse_capblock(lines) 

458 elif key == "(pcg)": 

459 blockcontent = _parse_pcgblock(lines) 

460 elif key == "periods": 

461 blockcontent = _parse_periodsblock(lines) 

462 elif key == "species": 

463 blockcontent = _parse_speciesblock(lines) 

464 elif key == "extra": 

465 blockcontent = _parse_extrablock(lines, n) 

466 else: 

467 other = ("(pcg)", "(gcg)", "(vdf)") 

468 options = tuple(KEYS.keys()) + tuple(DATE_KEYS.keys()) + other 

469 lines.back() 

470 line = next(lines) 

471 number = lines.count + 1 

472 raise ValueError( 

473 f"Failed to recognize header keyword: {key}. Expected one of keywords {options}" 

474 f"\nErrored in line {number} with entries:\n{line}" 

475 ) 

476 

477 except Exception as e: 

478 raise type(e)(f"{e}\nError occurred for keyword: {key}") 

479 

480 if blockcontent is not None and active is not None: 

481 blockcontent["active"] = active 

482 

483 content[key] = blockcontent 

484 return 

485 

486 

487def _process_package_entry(entry: Dict): 

488 """ 

489 The iMOD project file supports constants in lieu of IDFs. 

490 """ 

491 coords = {"layer": entry["layer"]} 

492 dims = ("layer",) 

493 

494 if "path" not in entry: 

495 path = None 

496 header = {"coords": coords} 

497 value = entry["constant"] 

498 else: 

499 path = entry["path"] 

500 header = imod.idf.header(path, pattern="{name}") 

501 value = None 

502 

503 header["dims"] = dims 

504 return path, header, value 

505 

506 

507def _merge_coords(headers: List[Dict[str, Any]]) -> Dict[str, np.ndarray]: 

508 coords = defaultdict(list) 

509 for header in headers: 

510 for key, value in header["coords"].items(): 

511 coords[key].append(value) 

512 return {k: np.unique(coords[k]) for k in coords} 

513 

514 

515def _create_datarray_from_paths(paths: List[str], headers: List[Dict[str, Any]]): 

516 da = imod.formats.array_io.reading._load( 

517 paths, use_cftime=False, _read=imod.idf._read, headers=headers 

518 ) 

519 return da 

520 

521 

522def _create_dataarray_from_values(values: List[float], headers: List[Dict[str, Any]]): 

523 coords = _merge_coords(headers) 

524 firstdims = headers[0]["dims"] 

525 shape = [len(coord) for coord in coords.values()] 

526 da = xr.DataArray(np.reshape(values, shape), dims=firstdims, coords=coords) 

527 return da 

528 

529 

530def _create_dataarray( 

531 paths: List[str], headers: List[Dict[str, Any]], values: List[float] 

532) -> xr.DataArray: 

533 """ 

534 Create a DataArray from a list of IDF paths, or from constant values. 

535 """ 

536 values_valid = [] 

537 paths_valid = [] 

538 headers_paths = [] 

539 headers_values = [] 

540 for path, header, value in zip(paths, headers, values): 

541 if path is None: 

542 headers_values.append(header) 

543 values_valid.append(value) 

544 else: 

545 headers_paths.append(header) 

546 paths_valid.append(path) 

547 

548 if paths_valid and values_valid: 

549 dap = _create_datarray_from_paths(paths_valid, headers_paths) 

550 dav = _create_dataarray_from_values(values_valid, headers_values) 

551 dap.name = "tmp" 

552 dav.name = "tmp" 

553 da = xr.merge((dap, dav), join="outer")["tmp"] 

554 elif paths_valid: 

555 da = _create_datarray_from_paths(paths_valid, headers_paths) 

556 elif values_valid: 

557 da = _create_dataarray_from_values(values_valid, headers_values) 

558 

559 return da 

560 

561 

562def _open_package_idf( 

563 block_content: Dict[str, Any], variables: Sequence[str] 

564) -> List[xr.DataArray]: 

565 das = {} 

566 for variable in variables: 

567 variable_content = block_content[variable] 

568 paths = [] 

569 headers = [] 

570 values = [] 

571 for entry in variable_content: 

572 path, header, value = _process_package_entry(entry) 

573 header["name"] = variable 

574 header["dims"] = ["layer"] 

575 header["layer"] = entry["layer"] 

576 paths.append(path) 

577 headers.append(header) 

578 values.append(value) 

579 

580 das[variable] = _create_dataarray(paths, headers, values) 

581 

582 return [das] 

583 

584 

585def _process_time(time: str, yearfirst: bool = True): 

586 if time == "steady-state": 

587 time = None 

588 else: 

589 if yearfirst: 

590 if len(time) == 19: 

591 time = datetime.strptime(time, "%Y-%m-%d %H:%M:%S") 

592 elif len(time) == 10: 

593 time = datetime.strptime(time, "%Y-%m-%d") 

594 else: 

595 raise ValueError( 

596 f"time data {time} does not match format " 

597 '"%Y-%m-%d %H:%M:%S" or "%Y-%m-%d"' 

598 ) 

599 else: 

600 if len(time) == 19: 

601 time = datetime.strptime(time, "%d-%m-%Y %H:%M:%S") 

602 elif len(time) == 10: 

603 time = datetime.strptime(time, "%d-%m-%Y") 

604 else: 

605 raise ValueError( 

606 f"time data {time} does not match format " 

607 '"%d-%m-%Y %H:%M:%S" or "%d-%m-%Y"' 

608 ) 

609 return time 

610 

611 

612def _process_boundary_condition_entry(entry: Dict, periods: Dict[str, datetime]): 

613 """ 

614 The iMOD project file supports constants in lieu of IDFs. 

615 

616 Also process repeated stress periods (on a yearly basis): substitute the 

617 original date here. 

618 """ 

619 coords = {} 

620 timestring = entry["time"] 

621 

622 # Resolve repeating periods first: 

623 time = periods.get(timestring) 

624 if time is not None: 

625 repeat = time 

626 else: 

627 # this resolves e.g. "steady-state" 

628 time = _process_time(timestring) 

629 repeat = None 

630 

631 if time is None: 

632 dims = () 

633 else: 

634 dims = ("time",) 

635 coords["time"] = time 

636 

637 # 0 signifies that the layer must be determined on the basis of 

638 # bottom elevation and stage. 

639 layer = entry["layer"] 

640 if layer <= 0: 

641 layer is None 

642 else: 

643 coords["layer"] = layer 

644 dims = dims + ("layer",) 

645 

646 if "path" not in entry: 

647 path = None 

648 header = {"coords": coords} 

649 value = entry["constant"] 

650 else: 

651 path = entry["path"] 

652 header = imod.idf.header(path, pattern="{name}") 

653 value = None 

654 

655 header["dims"] = dims 

656 if layer is not None: 

657 header["layer"] = layer 

658 if time is not None: 

659 header["time"] = time 

660 

661 return path, header, value, repeat 

662 

663 

664def _open_boundary_condition_idf( 

665 block_content, variables, periods: Dict[str, datetime] 

666) -> Tuple[List[Dict[str, xr.DataArray]], List[datetime]]: 

667 """ 

668 Read the variables specified from block_content. 

669 """ 

670 n_system = block_content["n_system"] 

671 n_time = len(block_content["time"]) 

672 n_total = n_system * n_time 

673 

674 das = [{} for _ in range(n_system)] 

675 for variable in variables: 

676 variable_content = block_content[variable] 

677 

678 n = len(variable_content) 

679 if n != n_total: 

680 raise ValueError( 

681 f"Expected n_time * n_system = {n_time} * {n_system} = " 

682 f"{n_total} entries for variable {variable}. Received: {n}" 

683 ) 

684 

685 # Group the paths and headers by system. 

686 system_paths = defaultdict(list) 

687 system_headers = defaultdict(list) 

688 system_values = defaultdict(list) 

689 all_repeats = set() 

690 for i, entry in enumerate(variable_content): 

691 path, header, value, repeat = _process_boundary_condition_entry( 

692 entry, periods 

693 ) 

694 header["name"] = variable 

695 key = i % n_system 

696 system_paths[key].append(path) 

697 system_headers[key].append(header) 

698 system_values[key].append(value) 

699 if repeat: 

700 all_repeats.add(repeat) 

701 

702 # Concat one system at a time. 

703 for i, (paths, headers, values) in enumerate( 

704 zip(system_paths.values(), system_headers.values(), system_values.values()) 

705 ): 

706 das[i][variable] = _create_dataarray(paths, headers, values) 

707 

708 repeats = sorted(all_repeats) 

709 return das, repeats 

710 

711 

712def _read_package_gen( 

713 block_content: Dict[str, Any], has_topbot: bool 

714) -> List[Dict[str, Any]]: 

715 out = [] 

716 for entry in block_content["gen"]: 

717 gdf = imod.gen.read(entry["path"]) 

718 if has_topbot: 

719 gdf["resistance"] = entry["factor"] * entry["addition"] 

720 else: 

721 gdf["multiplier"] = entry["factor"] * entry["addition"] 

722 d = { 

723 "geodataframe": gdf, 

724 "layer": entry["layer"], 

725 } 

726 out.append(d) 

727 return out 

728 

729 

730def _read_package_ipf( 

731 block_content: Dict[str, Any], periods: Dict[str, datetime] 

732) -> Tuple[List[Dict[str, Any]], List[datetime]]: 

733 out = [] 

734 repeats = [] 

735 for entry in block_content["ipf"]: 

736 timestring = entry["time"] 

737 layer = entry["layer"] 

738 time = periods.get(timestring) 

739 if time is None: 

740 time = _process_time(timestring) 

741 else: 

742 repeats.append(time) 

743 

744 # Ensure the columns are identifiable. 

745 path = Path(entry["path"]) 

746 ipf_df, indexcol, ext = imod.ipf._read_ipf(path) 

747 if indexcol == 0: 

748 # No associated files 

749 columns = ("x", "y", "rate") 

750 if layer <= 0: 

751 df = ipf_df.iloc[:, :5] 

752 columns = columns + ("top", "bottom") 

753 else: 

754 df = ipf_df.iloc[:, :3] 

755 df.columns = columns 

756 else: 

757 dfs = [] 

758 for row in ipf_df.itertuples(): 

759 filename = row[indexcol] 

760 path_assoc = path.parent.joinpath(f"{filename}.{ext}") 

761 df_assoc = imod.ipf.read_associated(path_assoc).iloc[:, :2] 

762 df_assoc.columns = ["time", "rate"] 

763 df_assoc["x"] = row[1] 

764 df_assoc["y"] = row[2] 

765 df_assoc["id"] = path_assoc.stem 

766 if layer <= 0: 

767 df_assoc["top"] = row[4] 

768 df_assoc["bottom"] = row[5] 

769 dfs.append(df_assoc) 

770 df = pd.concat(dfs, ignore_index=True, sort=False) 

771 

772 d = { 

773 "dataframe": df, 

774 "layer": layer, 

775 "time": time, 

776 } 

777 out.append(d) 

778 

779 repeats = sorted(repeats) 

780 return out, repeats 

781 

782 

783def read_projectfile(path: FilePath) -> Dict[str, Any]: 

784 """ 

785 Read an iMOD project file into a collection of nested dictionaries. 

786 

787 The top-level keys are the "topic" entries such "bnd" or "riv" in the 

788 project file. An example structure of the dictionaries is visualized below: 

789 

790 .. code-block:: 

791 

792 content 

793 ├── bnd 

794 │ ├── active: bool 

795 │ └── ibound: list of dictionaries for each layer 

796 ├── riv 

797 │ ├── active: bool 

798 │ ├── conductance: list of dictionaries for each time and layer. 

799 │ ├── stage: idem. 

800 │ ├── bottom_elevation: idem. 

801 │ └── infiltration_factor: idem. 

802 etc. 

803 

804 Time and layer are flattened into a single list and time is included in 

805 every dictionary: 

806 

807 .. code-block:: 

808 

809 stage 

810 ├── 0 # First entry in list 

811 │ ├── active: bool 

812 │ ├── is_constant: bool 

813 │ ├── layer: int 

814 │ ├── factor: float 

815 │ ├── addition: float 

816 │ ├── constant: float 

817 │ ├── path: str 

818 │ └── time: str 

819 

820 ├── 1 # Second entry in list 

821 │ ├── etc. 

822 etc. 

823 

824 

825 Parameters 

826 ---------- 

827 path: str or Path 

828 

829 Returns 

830 ------- 

831 content: Dict[str, Any] 

832 """ 

833 # Force to Path 

834 path = Path(path) 

835 

836 with open(path) as f: 

837 lines = f.readlines() 

838 

839 tokenized = [] 

840 for i, line in enumerate(lines): 

841 try: 

842 tokenized.append(_tokenize(line)) 

843 except Exception as e: 

844 raise type(e)(f"{e}\nError occurred in line {i}") 

845 

846 lines = _LineIterator(tokenized) 

847 content = {} 

848 wdir = path.parent 

849 # Change dir temporarely to projectfile dir to resolve relative paths 

850 with imod.util.cd(wdir): 

851 while not lines.finished: 

852 _parse_block(lines, content) 

853 

854 return content 

855 

856 

857def open_projectfile_data(path: FilePath) -> Dict[str, Any]: 

858 """ 

859 Read the contents of an iMOD project file and read/open the data present in 

860 it: 

861 

862 * IDF data is lazily loaded into xarray.DataArrays. 

863 * GEN data is eagerly loaded into geopandas.GeoDataFrames 

864 * IPF data is eagerly loaded into pandas.DataFrames 

865 * Non-file based entries (such as the PCG settings) are kept as a dictionary. 

866 

867 When multiple systems are present, they are numbered starting from one, e.g.: 

868 

869 * drn-1 

870 * drn-2 

871 

872 Xarray requires valid dates for the time coordinate. Aliases such as 

873 "summer" and "winter" that are associated with dates in the project file 

874 Periods block cannot be used in the time coordinate. Hence, this function 

875 will instead insert the dates associated with the aliases, with the year 

876 replaced by 1899; as the iMOD calendar starts at 1900, this ensures that 

877 the repeats are always first and that no date collisions will occur. 

878 

879 Parameters 

880 ---------- 

881 path: pathlib.Path or str. 

882 

883 Returns 

884 ------- 

885 data: Dict[str, Any] 

886 Keys are the iMOD project file "topics", without parentheses. 

887 """ 

888 content = read_projectfile(path) 

889 periods_block = content.pop("periods", None) 

890 if periods_block is None: 

891 periods = {} 

892 else: 

893 # Set the year of a repeat date to 1899: this ensures it falls outside 

894 # of the iMOD calendar. Collisions are then always avoided. 

895 periods = { 

896 key: _process_time(time, yearfirst=False).replace(year=1899) 

897 for key, time in periods_block.items() 

898 } 

899 

900 # Pop species block, at the moment we do not do much with, 

901 # since most regional models are without solute transport 

902 content.pop("species", None) 

903 

904 has_topbot = "(top)" in content and "(bot)" in content 

905 prj_data = {} 

906 repeat_stress = {} 

907 for key, block_content in content.items(): 

908 repeats = None 

909 try: 

910 if key == "(hfb)": 

911 data = _read_package_gen(block_content, has_topbot) 

912 elif key == "(wel)": 

913 data, repeats = _read_package_ipf(block_content, periods) 

914 elif key == "(cap)": 

915 variables = set(METASWAP_VARS).intersection(block_content.keys()) 

916 data = _open_package_idf(block_content, variables) 

917 elif key in ("extra", "(pcg)"): 

918 data = [block_content] 

919 elif key in KEYS: 

920 variables = KEYS[key] 

921 data = _open_package_idf(block_content, variables) 

922 elif key in DATE_KEYS: 

923 variables = DATE_KEYS[key] 

924 data, repeats = _open_boundary_condition_idf( 

925 block_content, variables, periods 

926 ) 

927 else: 

928 raise KeyError(f"Unsupported key: '{key}'") 

929 except Exception as e: 

930 raise type(e)(f"{e}. Errored while opening/reading data entries for: {key}") 

931 

932 strippedkey = key.strip("(").strip(")") 

933 if len(data) > 1: 

934 for i, da in enumerate(data): 

935 numbered_key = f"{strippedkey}-{i + 1}" 

936 prj_data[numbered_key] = da 

937 repeat_stress[numbered_key] = repeats 

938 else: 

939 prj_data[strippedkey] = data[0] 

940 repeat_stress[strippedkey] = repeats 

941 

942 repeat_stress = {k: v for k, v in repeat_stress.items() if v} 

943 return prj_data, repeat_stress 

944 

945 

946def read_timfile(path: FilePath) -> List[Dict]: 

947 def parsetime(time: str) -> datetime: 

948 # Check for steady-state: 

949 if time == "00000000000000": 

950 return None 

951 return datetime.strptime(time, "%Y%m%d%H%M%S") 

952 

953 with open(path, "r") as f: 

954 lines = f.readlines() 

955 

956 # A line contains 2, 3, or 4 values: 

957 # time, isave, nstp, tmult 

958 casters = { 

959 "time": parsetime, 

960 "save": lambda x: bool(int(x)), 

961 "n_timestep": int, 

962 "timestep_multiplier": float, 

963 } 

964 content = [] 

965 for line in lines: 

966 stripped = line.strip() 

967 if stripped == "": 

968 continue 

969 parts = stripped.split(",") 

970 entry = {k: cast(s.strip()) for s, (k, cast) in zip(parts, casters.items())} 

971 content.append(entry) 

972 

973 return content