pyhdf5_handler.src.hdf5_handler
1from __future__ import annotations 2 3import os 4import h5py 5import numpy as np 6import numbers 7import pandas as pd 8import datetime 9import time 10 11from ..src import object_handler 12import gc 13 14 15def close_all_hdf5_file(): 16 17 for obj in gc.get_objects(): # Browse through ALL objects 18 if isinstance(obj, h5py.File): # Just HDF5 files 19 try: 20 print(f"try closing {obj}") 21 obj.close() 22 except: 23 pass # Was already closed 24 25 26def open_hdf5(path, read_only=False, replace=False, wait_time=0): 27 """ 28 29 Open or create an HDF5 file. 30 31 Parameters 32 ---------- 33 34 path : str 35 The file path. 36 37 read_only : boolean 38 If true the access to the hdf5 fil is in read-only mode. Multi process can read the same hdf5 file simulteneously. This is not possible when access mode are append 'a' or write 'w'. 39 40 replace: Boolean 41 If true, the existing hdf5file is erased 42 43 wait_time: int 44 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 45 46 Returns 47 ------- 48 49 f : 50 A HDF5 object. 51 52 Examples 53 -------- 54 55 >>> hdf5=smash.tools.hdf5_handler.open_hdf5("./my_hdf5.hdf5") 56 >>> hdf5.keys() 57 >>> hdf5.attrs.keys() 58 59 """ 60 f = None 61 wait = 0 62 while wait <= wait_time: 63 64 f = None 65 exist_file=True 66 67 try: 68 69 if read_only: 70 if os.path.isfile(path): 71 f = h5py.File(path, "r") 72 73 else: 74 exist_file=False 75 raise ValueError(f"File {path} does not exist.") 76 77 else: 78 if replace: 79 f = h5py.File(path, "w") 80 81 else: 82 if os.path.isfile(path): 83 f = h5py.File(path, "a") 84 85 else: 86 f = h5py.File(path, "w") 87 except: 88 pass 89 90 if f is None: 91 if not exist_file: 92 print(f"File {path} does not exist.") 93 return f 94 else: 95 print( 96 f"The file {path} is unvailable, waiting {wait}/{wait_time}s") 97 98 wait = wait + 1 99 100 if wait_time > 0: 101 time.sleep(1) 102 103 else: 104 break 105 106 return f 107 108 109def add_hdf5_sub_group(hdf5, subgroup=None): 110 """ 111 Create a new subgroup in a HDF5 object 112 113 Parameters 114 ---------- 115 116 hdf5 : h5py.File 117 An hdf5 object opened with open_hdf5() 118 119 subgroup: str 120 Path to a subgroub that must be created 121 122 Returns 123 ------- 124 125 hdf5 : 126 the HDF5 object. 127 128 Examples 129 -------- 130 131 >>> hdf5=smash.tools.hdf5_handler.open_hdf5("./model_subgroup.hdf5", replace=True) 132 >>> hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="mygroup") 133 >>> hdf5.keys() 134 >>> hdf5.attrs.keys() 135 136 """ 137 if subgroup is not None: 138 if subgroup == "": 139 subgroup = "./" 140 141 hdf5.require_group(subgroup) 142 143 return hdf5 144 145 146def _dump_object_to_hdf5_from_list_attribute(hdf5, instance, list_attr): 147 """ 148 dump a object to a hdf5 file from a list of attributes 149 150 Parameters 151 ---------- 152 hdf5 : h5py.File 153 an hdf5 object 154 155 instance : object 156 a custom python object. 157 158 list_attr : list 159 a list of attribute 160 161 """ 162 if isinstance(list_attr, list): 163 for attr in list_attr: 164 if isinstance(attr, str): 165 _dump_object_to_hdf5_from_str_attribute(hdf5, instance, attr) 166 167 elif isinstance(attr, list): 168 _dump_object_to_hdf5_from_list_attribute(hdf5, instance, attr) 169 170 elif isinstance(attr, dict): 171 _dump_object_to_hdf5_from_dict_attribute(hdf5, instance, attr) 172 173 else: 174 raise ValueError( 175 f"inconsistent {attr} in {list_attr}. {attr} must be a an instance of dict, list or str" 176 ) 177 178 else: 179 raise ValueError(f"{list_attr} must be a instance of list.") 180 181 182def _dump_object_to_hdf5_from_dict_attribute(hdf5, instance, dict_attr): 183 """ 184 dump a object to a hdf5 file from a dictionary of attributes 185 186 Parameters 187 ---------- 188 189 hdf5 : h5py.File 190 an hdf5 object 191 192 instance : object 193 a custom python object. 194 195 dict_attr : dict 196 a dictionary of attribute 197 198 """ 199 if isinstance(dict_attr, dict): 200 for attr, value in dict_attr.items(): 201 hdf5 = add_hdf5_sub_group(hdf5, subgroup=attr) 202 203 try: 204 sub_instance = getattr(instance, attr) 205 206 except: 207 sub_instance = instance 208 209 if isinstance(value, dict): 210 _dump_object_to_hdf5_from_dict_attribute( 211 hdf5[attr], sub_instance, value 212 ) 213 214 if isinstance(value, list): 215 _dump_object_to_hdf5_from_list_attribute( 216 hdf5[attr], sub_instance, value 217 ) 218 219 elif isinstance(value, str): 220 _dump_object_to_hdf5_from_str_attribute( 221 hdf5[attr], sub_instance, value) 222 223 else: 224 raise ValueError( 225 f"inconsistent '{attr}' in '{dict_attr}'. Dict({attr}) must be a instance of dict, list or str" 226 ) 227 228 else: 229 raise ValueError(f"{dict_attr} must be a instance of dict.") 230 231 232 233 234def _dump_object_to_hdf5_from_str_attribute(hdf5, instance, str_attr): 235 """ 236 dump a object to a hdf5 file from a string attribute 237 238 Parameters 239 ---------- 240 241 hdf5 : h5py.File 242 an hdf5 object 243 244 instance : object 245 a custom python object. 246 247 str_attr : str 248 a string attribute 249 250 """ 251 if isinstance(str_attr, str): 252 try: 253 value = getattr(instance, str_attr) 254 255 if isinstance(value, (np.ndarray, list)): 256 257 # TODO: do the same than save_dict_to_hdf5 258 259 if isinstance(value, list): 260 value = np.array(value) 261 262 if value.dtype == "object" or value.dtype.char == "U": 263 value = value.astype("S") 264 265 # remove dataset if exist 266 if str_attr in hdf5.keys(): 267 del hdf5[str_attr] 268 269 hdf5.create_dataset( 270 str_attr, 271 shape=value.shape, 272 dtype=value.dtype, 273 data=value, 274 compression="gzip", 275 chunks=True, 276 ) 277 278 elif value is None: 279 hdf5.attrs[str_attr] = "_None_" 280 281 elif isinstance(value, str): 282 hdf5.attrs[str_attr] = value.encode() 283 284 else: 285 hdf5.attrs[str_attr] = value 286 287 except: 288 raise ValueError( 289 f"Unable to dump attribute {str_attr} with value {value} from {instance}" 290 ) 291 292 else: 293 raise ValueError(f"{str_attr} must be a instance of str.") 294 295 296def _dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable=None): 297 """ 298 dump a object to a hdf5 file from a iteratable object list or dict 299 300 Parameters 301 ---------- 302 303 hdf5 : h5py.File 304 an hdf5 object 305 instance : object 306 a custom python object. 307 iteratable : list | dict 308 a list or a dict of attribute 309 310 Examples 311 -------- 312 313 >>> setup, mesh = smash.load_dataset("cance") 314 >>> model = smash.Model(setup, mesh) 315 >>> model.run(inplace=True) 316 >>> 317 >>> hdf5=smash.tools.hdf5_handler.open_hdf5("./model.hdf5", replace=True) 318 >>> hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="model1") 319 >>> keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="medium") 320 >>> smash.tools.hdf5_handler._dump_object_to_hdf5_from_iteratable(hdf5["model1"], model, keys_data) 321 >>> 322 >>> hdf5=smash.tools.hdf5_handler.open_hdf5("./model.hdf5", replace=False) 323 >>> hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="model2") 324 >>> keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="light") 325 >>> smash.tools.hdf5_handler._dump_object_to_hdf5_from_iteratable(hdf5["model2"], model, keys_data) 326 327 """ 328 if isinstance(iteratable, list): 329 _dump_object_to_hdf5_from_list_attribute(hdf5, instance, iteratable) 330 331 elif isinstance(iteratable, dict): 332 _dump_object_to_hdf5_from_dict_attribute(hdf5, instance, iteratable) 333 334 else: 335 raise ValueError(f"{iteratable} must be a instance of list or dict.") 336 337 338def _hdf5_handle_str(name,value): 339 340 dataset={ 341 "name":name, 342 "attr_value":str(type(value)), 343 "dataset_value":value, 344 "shape":1, 345 "dtype":h5py.string_dtype(encoding='utf-8'), 346 } 347 348 return dataset 349 350 351def _hdf5_handle_numbers(name:str,value:numbers.Number): 352 353 arr=np.array([value]) 354 dataset={ 355 "name":name, 356 "attr_value":str(type(value)), 357 "dataset_value":arr, 358 "shape":arr.shape, 359 "dtype":arr.dtype, 360 } 361 362 return dataset 363 364 365def _hdf5_handle_none(name:str,value:None): 366 367 dataset={ 368 "name":name, 369 "attr_value":"_None_", 370 "dataset_value":"_None_", 371 "shape":1, 372 "dtype":h5py.string_dtype(encoding='utf-8'), 373 } 374 375 return dataset 376 377 378def _hdf5_handle_timestamp(name:str,value:(pd.Timestamp | np.datetime64 | datetime.date)): 379 380 dtype=type(value) 381 382 if isinstance(value, (np.datetime64)): 383 value=value.tolist() 384 385 dataset={ 386 "name":name, 387 "attr_value":str(dtype), 388 "dataset_value":value.strftime("%Y-%m-%d %H:%M"), 389 "shape":1, 390 "dtype":h5py.string_dtype(encoding='utf-8'), 391 } 392 393 394 return dataset 395 396 397def _hdf5_handle_DatetimeIndex(name: str,value:pd.DatetimeIndex): 398 399 dataset=_hdf5_handle_array(name,value) 400 401 return dataset 402 403 404def _hdf5_handle_list(name:str,value:(list | tuple)): 405 406 arr=np.array(value) 407 408 dataset=_hdf5_handle_array(name,arr) 409 410 return dataset 411 412 413def _hdf5_handle_array(name:str,value:np.ndarray): 414 415 dtype_attr=type(value) 416 dtype=value.dtype 417 418 if value.dtype.char == "M": 419 420 ListDate = value.tolist() 421 ListDateStr = list() 422 for date in ListDate: 423 ListDateStr.append( 424 date.strftime("%Y-%m-%d %H:%M")) 425 value = np.array(ListDateStr) 426 value = value.astype("O") 427 dtype = h5py.string_dtype(encoding='utf-8') 428 429 elif value.dtype == "object": 430 431 value = value.astype("S") 432 dtype = h5py.string_dtype(encoding='utf-8') 433 434 elif value.dtype.char == "U": 435 value = value.astype("S") 436 dtype = h5py.string_dtype(encoding='utf-8') 437 438 dataset={ 439 "name":name, 440 "attr_value":str(dtype_attr), 441 "dataset_value":value, 442 "shape":value.shape, 443 "dtype":dtype, 444 } 445 446 return dataset 447 448 449def _hdf5_handle_ndarray(hdf5:h5py.File,name:str,value:np.ndarray): 450 451 hdf5 = add_hdf5_sub_group( 452 hdf5, 453 subgroup=name) 454 _dump_ndarray_to_hdf5(hdf5[name], 455 value) 456 457 458def _hdf5_create_dataset(hdf5:h5py.File, dataset:dict): 459 460 if dataset["name"] in hdf5.keys(): 461 del hdf5[dataset["name"]] 462 463 hdf5.create_dataset( 464 dataset["name"], 465 shape=dataset["shape"], 466 dtype=dataset["dtype"], 467 data=dataset["dataset_value"], 468 compression="gzip", 469 chunks=True, 470 ) 471 472 if "_"+dataset["name"] in list(hdf5.attrs.keys()): 473 del hdf5.attrs["_"+dataset["name"]] 474 475 hdf5.attrs["_"+dataset["name"]]=dataset["attr_value"] 476 477 478def hdf5_dataset_creator(hdf5:h5py.File,name:str,value): 479 """ 480 Write any value in an hdf5 object 481 482 Parameters 483 ---------- 484 485 hdf5 : h5py.File 486 an hdf5 object 487 488 name : str 489 name of the dataset 490 491 value : any 492 value to write in the hdf5 493 494 """ 495 # save ndarray datast 496 if isinstance(value, str): 497 dataset=_hdf5_handle_str(name,value) 498 499 elif isinstance(value, numbers.Number): 500 dataset=_hdf5_handle_numbers(name,value) 501 502 elif value is None: 503 dataset=_hdf5_handle_none(name,value) 504 505 elif isinstance(value, (pd.Timestamp, np.datetime64, datetime.date)): 506 dataset=_hdf5_handle_timestamp(name,value) 507 508 elif isinstance(value, pd.DatetimeIndex): 509 dataset=_hdf5_handle_DatetimeIndex(name,value) 510 511 elif isinstance(value, list): 512 dataset=_hdf5_handle_list(name,value) 513 514 elif isinstance(value, tuple): 515 dataset=_hdf5_handle_list(name,value) 516 517 elif isinstance(value, np.ndarray): 518 519 if len(value.dtype) > 0 and len(value.dtype.names) > 0: 520 _hdf5_handle_ndarray(hdf5,name,value) 521 return 522 else: 523 dataset=_hdf5_handle_array(name,value) 524 525 else: 526 527 hdf5 = add_hdf5_sub_group( 528 hdf5, subgroup=name) 529 530 newdict = object_handler.read_object_as_dict(value) 531 532 save_dict_to_hdf5(hdf5[name], newdict) 533 534 _hdf5_create_dataset(hdf5, dataset) 535 536 537 538def _dump_ndarray_to_hdf5(hdf5, value): 539 """ 540 dump a ndarray data structure to an hdf5 file: this functions create a group ndarray_ds and store each component of the ndarray as a dataset. Plus it add 2 datasets which store the dtypes (ndarray_dtype) and labels (ndarray_indexes). 541 542 Parameters 543 ---------- 544 545 hdf5 : h5py.File 546 an hdf5 object 547 548 value : ndarray 549 an ndarray data structure with different datatype 550 551 """ 552 # save ndarray datastructure 553 554 hdf5 = add_hdf5_sub_group(hdf5, subgroup="ndarray_ds") 555 hdf5_data = hdf5["ndarray_ds"] 556 557 for item in value.dtype.names: 558 559 hdf5_dataset_creator(hdf5=hdf5_data,name=item,value=value[item]) 560 561 562 index = np.array(value.dtype.descr)[:, 0] 563 dtype = np.array(value.dtype.descr)[:, 1] 564 index = index.astype("O") 565 dtype = dtype.astype("O") 566 data_type = h5py.string_dtype(encoding='utf-8') 567 568 if "ndarray_dtype" in hdf5_data.keys(): 569 del hdf5_data["ndarray_dtype"] 570 571 hdf5_data.create_dataset( 572 "ndarray_dtype", 573 shape=dtype.shape, 574 dtype=data_type, 575 data=dtype, 576 compression="gzip", 577 chunks=True, 578 ) 579 580 if "ndarray_indexes" in hdf5_data.keys(): 581 del hdf5_data["ndarray_indexes"] 582 583 hdf5_data.create_dataset( 584 "ndarray_indexes", 585 shape=index.shape, 586 dtype=data_type, 587 data=index, 588 compression="gzip", 589 chunks=True, 590 ) 591 592 593def _read_ndarray_datastructure(hdf5): 594 """ 595 read a ndarray data structure from hdf5 file 596 597 Parameters 598 ---------- 599 600 hdf5 : h5py.File 601 an hdf5 object at the roots of the ndarray datastructure 602 603 Return 604 ------ 605 606 ndarray : the ndarray 607 608 """ 609 610 if 'ndarray_ds' in list(hdf5.keys()): 611 612 decoded_item = list() 613 for it in hdf5['ndarray_ds/ndarray_dtype'][:]: 614 decoded_item.append(it.decode()) 615 list_dtypes = decoded_item 616 617 decoded_item = list() 618 for it in hdf5['ndarray_ds/ndarray_indexes'][:]: 619 decoded_item.append(it.decode()) 620 list_indexes = decoded_item 621 622 len_data = len(hdf5[f'ndarray_ds/{list_indexes[0]}'][:]) 623 624 list_datatype = list() 625 for i in range(len(list_indexes)): 626 list_datatype.append((list_indexes[i], list_dtypes[i])) 627 628 datatype = np.dtype(list_datatype) 629 630 ndarray = np.zeros(len_data, dtype=datatype) 631 632 for i in range(len(list_indexes)): 633 634 expected_type=list_dtypes[i] 635 636 values=hdf5_read_dataset(hdf5[f'ndarray_ds/{list_indexes[i]}'],expected_type) 637 638 639 ndarray[list_indexes[i]] = values 640 641 return ndarray 642 643 644def save_dict_to_hdf5(hdf5, dictionary): 645 """ 646 647 dump a dictionary to an hdf5 file 648 649 Parameters 650 ---------- 651 652 hdf5 : h5py.File 653 an hdf5 object 654 655 dictionary : dict 656 a custom python dictionary 657 658 """ 659 if isinstance(dictionary, dict): 660 for attr, value in dictionary.items(): 661 #print("looping:",attr,value) 662 try: 663 664 attribute_name = str(attr) 665 for character in '/ ': 666 attribute_name = attribute_name.replace( 667 character, '_') 668 669 if isinstance(value, dict): 670 #print("---> dictionary: ",attr, value) 671 672 hdf5 = add_hdf5_sub_group(hdf5, subgroup=attribute_name) 673 save_dict_to_hdf5(hdf5[attribute_name], value) 674 675 else: 676 677 hdf5_dataset_creator(hdf5,attribute_name,value) 678 679 except: 680 681 raise ValueError( 682 f"Unable to save attribute {str(attr)} with value {value}") 683 684 else: 685 686 raise ValueError(f"{dictionary} must be a instance of dict.") 687 688 689 690def save_dict_to_hdf5file(path_to_hdf5, dictionary=None, location="./", replace=False, wait_time=0): 691 """ 692 693 dump a dictionary to an hdf5 file 694 695 Parameters 696 ---------- 697 698 path_to_hdf5 : str 699 path to the hdf5 file 700 701 dictionary : dict | None 702 a dictionary containing the data to be saved 703 704 location : str 705 path location or subgroup where to write data in the hdf5 file 706 707 replace : Boolean 708 replace an existing hdf5 file. Default is False 709 710 wait_time: int 711 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 712 713 Examples 714 -------- 715 716 >>> setup, mesh = smash.load_dataset("cance") 717 >>> model = smash.Model(setup, mesh) 718 >>> model.run(inplace=True) 719 >>> 720 >>> smash.tools.hdf5_handler.save_dict_to_hdf5("saved_dictionary.hdf5",mesh) 721 722 """ 723 if isinstance(dictionary, dict): 724 hdf5 = open_hdf5(path_to_hdf5, replace=replace, wait_time=wait_time) 725 726 if hdf5 is None: 727 return 728 729 hdf5 = add_hdf5_sub_group(hdf5, subgroup=location) 730 save_dict_to_hdf5(hdf5[location], dictionary) 731 732 else: 733 raise ValueError(f"The input {dictionary} must be a instance of dict.") 734 735 hdf5.close() 736 737 738def save_object_to_hdf5file( 739 path_to_hdf5, instance, keys_data=None, location="./", sub_data=None, replace=False, wait_time=0 740): 741 """ 742 743 dump an object to an hdf5 file 744 745 Parameters 746 ---------- 747 748 path_to_hdf5 : str 749 path to the hdf5 file 750 751 instance : object 752 A custom python object to be saved into an hdf5 753 754 keys_data : list | dict 755 optional, a list or a dictionary of the attribute to be saved 756 757 location : str 758 path location or subgroup where to write data in the hdf5 file 759 760 sub_data : dict | None 761 optional, a extra dictionary containing extra-data to be saved along the object 762 763 replace : Boolean 764 replace an existing hdf5 file. Default is False 765 766 wait_time: int 767 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 768 769 """ 770 771 if keys_data is None: 772 keys_data = object_handler.generate_object_structure(instance) 773 774 # print(keys_data) 775 776 hdf5 = open_hdf5(path_to_hdf5, replace=replace, wait_time=wait_time) 777 778 if hdf5 is None: 779 return None 780 781 hdf5 = add_hdf5_sub_group(hdf5, subgroup=location) 782 783 _dump_object_to_hdf5_from_iteratable(hdf5[location], instance, keys_data) 784 785 if isinstance(sub_data, dict): 786 save_dict_to_hdf5(hdf5[location], sub_data) 787 788 hdf5.close() 789 790 791def read_hdf5file_as_dict(path_to_hdf5, location="./", wait_time=0, read_attrs=True,read_dataset_attrs=False): 792 """ 793 794 Open, read and close an hdf5 file 795 796 Parameters 797 ---------- 798 799 path_to_hdf5 : str 800 path to the hdf5 file 801 802 location: str 803 place in the hdf5 from which we start reading the file 804 805 read_attrs : bool 806 read and import attributes in the dicitonnary. 807 808 read_dataset_attrs : bool 809 read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original dataype of the data stored in the dataset. 810 811 Return 812 -------- 813 814 dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file 815 816 wait_time: int 817 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 818 819 Examples 820 -------- 821 822 read an hdf5 file 823 dictionary=hdf5_handler.read_hdf5file_as_dict(hdf5["model1"]) 824 """ 825 826 hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 827 828 if hdf5 is None: 829 return None 830 831 dictionary = read_hdf5_as_dict(hdf5[location], read_attrs=read_attrs, read_dataset_attrs=read_dataset_attrs) 832 833 hdf5.close() 834 835 return dictionary 836 837 838 839def read_hdf5_as_dict(hdf5, read_attrs=True, read_dataset_attrs=False): 840 """ 841 Load an hdf5 file 842 843 Parameters 844 ---------- 845 846 hdf5 : h5py.File 847 an instance of hdf5, open with the function open_hdf5() 848 849 read_attrs : bool 850 read and import attributes in the dicitonnary. 851 852 read_dataset_attrs : bool 853 read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original dataype of the data stored in the dataset. 854 855 Return 856 -------- 857 858 dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file 859 860 Examples 861 -------- 862 863 read only a part of an hdf5 file 864 >>> hdf5=hdf5_handler.open_hdf5("./multi_model.hdf5") 865 >>> dictionary=hdf5_handler.read_hdf5_as_dict(hdf5["model1"]) 866 >>> dictionary.keys() 867 868 """ 869 870 if not isinstance(hdf5, (h5py.File, h5py.Group, h5py.Dataset, h5py.Datatype)): 871 print('Error: input arg is not an instance of hdf5.File()') 872 return {} 873 874 dictionary = {} 875 876 for key, item in hdf5.items(): 877 878 if str(type(item)).find("group") != -1: 879 880 if key == 'ndarray_ds': 881 882 # dictionary.update({key: _read_ndarray_datastructure(hdf5)}) 883 return _read_ndarray_datastructure(hdf5) 884 885 else: 886 887 dictionary.update({key: read_hdf5_as_dict(item)}) 888 889 890 if str(type(item)).find("dataset") != -1: 891 892 if "_"+key in hdf5.attrs.keys(): 893 expected_type=hdf5.attrs["_"+key] 894 values=hdf5_read_dataset(item, expected_type) 895 896 else: 897 898 values=item[:] 899 900 dictionary.update({key: values}) 901 902 903 list_attribute=[] 904 if read_attrs or read_dataset_attrs: 905 tmp_list_attribute=list(hdf5.attrs.keys()) 906 hdf5_item_matching_attributes=["_"+element for element in list(hdf5.keys())] 907 908 if read_attrs: 909 910 list_attribute.extend(list(filter( lambda l: l not in hdf5_item_matching_attributes, tmp_list_attribute))) 911 912 if read_dataset_attrs: 913 914 list_attribute.extend(list(filter( lambda l: l in hdf5_item_matching_attributes, tmp_list_attribute))) 915 916 for key in list_attribute: 917 dictionary.update({key: hdf5.attrs[key]}) 918 919 return dictionary 920 921 922 923def hdf5_read_dataset(item,expected_type=None): 924 """ 925 Read a dataset stored in an hdf5 database 926 927 Parameters 928 ---------- 929 930 item : h5py.File 931 an hdf5 dataset/item 932 933 expected_type: str 934 the expected dtype as string str(type()) 935 936 Return 937 -------- 938 939 value : the value read from the hdf5, any type matching the expected type 940 941 942 """ 943 944 if expected_type == str(type("str")): 945 946 values=item[0].decode() 947 948 elif expected_type == str(type(1.0)): 949 950 values=item[0] 951 952 elif expected_type == "_None_": 953 954 values=None 955 956 elif expected_type in (str(pd.Timestamp), str(np.datetime64), str(datetime.datetime)) : 957 958 if expected_type==str(pd.Timestamp): 959 values=pd.Timestamp(item[0].decode()) 960 961 elif expected_type==str(np.datetime64): 962 values=np.datetime64(item[0].decode()) 963 964 elif expected_type==str(datetime.datetime): 965 values=datetime.datetime.fromisoformat(item[0].decode()) 966 967 else: 968 values=item[0].decode() 969 970 else: 971 972 if item[:].dtype.char == "S": 973 974 values = item[:].astype("U") 975 976 elif item[:].dtype.char == "O": 977 978 # decode list if required 979 decoded_item = list() 980 for it in item[:]: 981 982 decoded_item.append(it.decode()) 983 984 values = decoded_item 985 986 else: 987 values = item[:] 988 989 return values 990 991 992 993def get_hdf5file_attribute(path_to_hdf5=str(), location="./", attribute=None, wait_time=0): 994 """ 995 Get the value of an attribute in the hdf5file 996 997 Parameters 998 ---------- 999 1000 path_to_hdf5 : str 1001 the path to the hdf5file 1002 1003 location : str 1004 path inside the hdf5 where the attribute is stored 1005 1006 attribute: str 1007 attribute name 1008 1009 wait_time: int 1010 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1011 1012 Return 1013 -------- 1014 1015 return_attribute : the value of the attribute 1016 1017 Examples 1018 -------- 1019 1020 get an attribute 1021 >>> attribute=hdf5_handler.get_hdf5_attribute("./multi_model.hdf5",attribute=my_attribute_name) 1022 1023 """ 1024 1025 hdf5_base = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1026 1027 if hdf5_base is None: 1028 return None 1029 1030 hdf5 = hdf5_base[location] 1031 1032 return_attribute = hdf5.attrs[attribute] 1033 1034 hdf5_base.close() 1035 1036 return return_attribute 1037 1038 1039def get_hdf5file_dataset(path_to_hdf5=str(), location="./", dataset=None, wait_time=0): 1040 """ 1041 Get the value of an attribute in the hdf5file 1042 1043 Parameters 1044 ---------- 1045 1046 path_to_hdf5 : str 1047 the path to the hdf5file 1048 1049 location : str 1050 path inside the hdf5 where the attribute is stored 1051 1052 dataset: str 1053 dataset name 1054 1055 wait_time: int 1056 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1057 1058 Return 1059 -------- 1060 1061 return_dataset : the value of the attribute 1062 1063 Examples 1064 -------- 1065 1066 get a dataset 1067 >>> dataset=hdf5_handler.get_hdf5_dataset("./multi_model.hdf5",dataset=my_dataset_name) 1068 1069 """ 1070 1071 hdf5_base = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1072 1073 if hdf5_base is None: 1074 return None 1075 1076 hdf5 = hdf5_base[location] 1077 1078 if "_"+dataset in hdf5.attrs.keys(): 1079 expected_type=hdf5.attrs["_"+dataset] 1080 return_dataset=hdf5_read_dataset(hdf5, expected_type) 1081 1082 else: 1083 return_dataset = hdf5[dataset][:] 1084 1085 hdf5_base.close() 1086 1087 return return_dataset 1088 1089 1090def get_hdf5file_item(path_to_hdf5=str(), location="./", item=None, wait_time=0, search_attrs=False): 1091 """ 1092 1093 Get a custom item in an hdf5file 1094 1095 Parameters 1096 ---------- 1097 1098 path_to_hdf5 : str 1099 the path to the hdf5file 1100 1101 location : str 1102 path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location) 1103 1104 item: str 1105 item name 1106 1107 wait_time: int 1108 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1109 1110 search_attrs: bool 1111 Default is False. If True, the function will also search in the item in the attribute first. 1112 1113 Return 1114 -------- 1115 1116 return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ... 1117 1118 Examples 1119 -------- 1120 1121 get the dataset 'dataset' 1122 >>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset") 1123 1124 """ 1125 1126 hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1127 1128 if hdf5 is None: 1129 return None 1130 1131 hdf5_item = get_hdf5_item(hdf5_instance=hdf5, location=location, item=item, search_attrs=search_attrs) 1132 1133 hdf5.close() 1134 1135 return hdf5_item 1136 1137 1138def get_hdf5_item(hdf5_instance=None, location="./", item=None, search_attrs=False): 1139 """ 1140 1141 Get a custom item in an hdf5file 1142 1143 Parameters 1144 ---------- 1145 1146 hdf5_instance : h5py.File 1147 an instance of an hdf5 1148 1149 location : str 1150 path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location) 1151 1152 item: str 1153 item name 1154 1155 search_attrs: bool 1156 Default is False. If True, the function will search in the item in the attribute first. 1157 1158 Return 1159 ------ 1160 1161 return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ... 1162 1163 Examples 1164 -------- 1165 1166 get the dataset 'dataset' 1167 >>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset") 1168 1169 """ 1170 1171 if item is None and isinstance(location, str): 1172 head, tail = os.path.split(location) 1173 if len(tail) > 0: 1174 item = tail 1175 location = head 1176 1177 if not isinstance(item, str): 1178 print(f"Bad search item:{item}") 1179 return None 1180 1181 return None 1182 1183 # print(f"Getting item '{item}' at location '{location}'") 1184 hdf5 = hdf5_instance[location] 1185 1186 # first search in the attribute 1187 if search_attrs: 1188 list_attribute = hdf5.attrs.keys() 1189 if item in list_attribute: 1190 return hdf5.attrs[item] 1191 1192 # then search in groups and dataset 1193 list_keys = hdf5.keys() 1194 if item in list_keys: 1195 1196 hdf5_item = hdf5[item] 1197 1198 # print("Got Item ", hdf5_item) 1199 1200 if str(type(hdf5_item)).find("group") != -1: 1201 1202 if item == 'ndarray_ds': 1203 1204 return _read_ndarray_datastructure(hdf5) 1205 1206 else: 1207 1208 returned_dict = read_hdf5_as_dict(hdf5_item) 1209 1210 return returned_dict 1211 1212 elif str(type(hdf5_item)).find("dataset") != -1: 1213 1214 if "_"+item in hdf5.attrs.keys(): 1215 expected_type=hdf5.attrs["_"+item] 1216 values=hdf5_read_dataset(hdf5_item, expected_type) 1217 else: 1218 values=hdf5_item[:] 1219 1220 return values 1221 1222 else: 1223 1224 return hdf5_item 1225 1226 else: 1227 1228 return None 1229 1230 1231def search_in_hdf5file(path_to_hdf5, key=None, location="./", wait_time=0, search_attrs=False): 1232 """ 1233 1234 Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise) 1235 1236 Parameters 1237 ---------- 1238 1239 path_to_hdf5 : str 1240 the path to the hdf5file 1241 1242 key: str 1243 key to search in the hdf5file 1244 1245 location : str 1246 path inside the hdf5 where to start the research 1247 1248 wait_time: int 1249 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1250 1251 search_attrs : Bool 1252 Default false, search in the attributes 1253 1254 Return 1255 ------ 1256 1257 return_dataset : the value of the attribute 1258 1259 Examples 1260 -------- 1261 1262 search in a hdf5file 1263 >>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./") 1264 1265 """ 1266 if key is None: 1267 print("Nothing to search, use key=") 1268 return [] 1269 1270 hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1271 1272 if hdf5 is None: 1273 return None 1274 1275 results = search_in_hdf5(hdf5, key, location=location, search_attrs=search_attrs) 1276 1277 hdf5.close() 1278 1279 return results 1280 1281 1282def search_in_hdf5(hdf5_base, key=None, location="./", search_attrs=False): 1283 """ 1284 1285 Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise) 1286 1287 Parameters 1288 ---------- 1289 1290 hdf5_base : h5py.File 1291 opened instance of the hdf5 1292 1293 key: str 1294 key to search in the hdf5file 1295 1296 location : str 1297 path inside the hdf5 where to start the research 1298 1299 search_attrs : Bool 1300 Default false, search in the attributes 1301 1302 Return 1303 ------ 1304 1305 return_dataset : the value of the attribute 1306 1307 Examples 1308 -------- 1309 1310 search in a hdf5 1311 >>> hdf5=hdf5_handler.open_hdf5(hdf5_file) 1312 >>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./") 1313 >>> hdf5.close() 1314 1315 """ 1316 if key is None: 1317 print("Nothing to search, use key=") 1318 return [] 1319 1320 result = [] 1321 1322 hdf5 = hdf5_base[location] 1323 1324 if search_attrs: 1325 list_attribute = hdf5.attrs.keys() 1326 1327 if key in list_attribute: 1328 result.append({"path": location, "key": key, 1329 "datatype": "attribute", "value": hdf5.attrs[key]}) 1330 1331 for hdf5_key, item in hdf5.items(): 1332 1333 if str(type(item)).find("group") != -1: 1334 1335 sub_location = os.path.join(location, hdf5_key) 1336 1337 # print(hdf5_key,sub_location,list(hdf5.keys())) 1338 1339 if hdf5_key == key: 1340 1341 if "ndarray_ds" in item.keys(): 1342 1343 result.append({"path": sub_location, 1344 "key": None, 1345 "datatype": "ndarray", 1346 "value": _read_ndarray_datastructure(item)}) 1347 1348 else: 1349 1350 result.append({"path": sub_location, "key": None, 1351 "datatype": "group", "value": None}) 1352 1353 res = search_in_hdf5(hdf5_base, key, sub_location) 1354 1355 if len(res) > 0: 1356 for element in res: 1357 result.append(element) 1358 1359 1360 if str(type(item)).find("dataset") != -1: 1361 1362 if hdf5_key == key: 1363 1364 if item[:].dtype.char == "S": 1365 1366 values = item[:].astype("U") 1367 1368 elif item[:].dtype.char == "O": 1369 1370 # decode list if required 1371 decoded_item = list() 1372 for it in item[:]: 1373 decoded_item.append(it.decode()) 1374 1375 values = decoded_item 1376 1377 else: 1378 1379 values = item[:] 1380 1381 result.append({"path": location, 1382 "key": key, 1383 "datatype": "dataset", 1384 "value": values}) 1385 1386 return result 1387 1388 1389def hdf5file_view(path_to_hdf5, location="./", max_depth=None, level_base='>', level_sep="--", depth=None, wait_time=0, list_attrs=True, list_dataset_attrs=False, return_view=False): 1390 """ 1391 1392 Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise) 1393 1394 Parameters 1395 ---------- 1396 1397 1398 path_to_hdf5 : str 1399 Path to an hdf5 database 1400 1401 location : str 1402 path inside the hdf5 where to start the research 1403 1404 max_depth: str 1405 Max deph of the search in the hdf5 1406 1407 level_base: str 1408 string used as separator at the lower level (default '>') 1409 1410 level_sep: str 1411 string used as separator at higher level (default '--') 1412 1413 depth: int 1414 current depth level 1415 1416 list_attrs: bool 1417 default is True, list the attributes 1418 1419 list_dataset_attrs: bool 1420 default is False, list the special attributes defined for each dataset by pyhdf5_handler 1421 1422 return_view: bool 1423 retrun the object view in a dictionnary (do not print at screen) 1424 1425 wait_time: int 1426 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1427 1428 Return 1429 -------- 1430 1431 dictionnary : optional, the view of the hdf5 1432 1433 Examples 1434 -------- 1435 1436 search in a hdf5file 1437 >>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./") 1438 1439 """ 1440 1441 hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1442 1443 if hdf5 is None: 1444 return None 1445 1446 results = hdf5_view(hdf5, 1447 location=location, 1448 max_depth=max_depth, 1449 level_base=level_base, 1450 level_sep=level_sep, 1451 depth=depth, 1452 list_attrs=list_attrs, 1453 list_dataset_attrs=list_dataset_attrs, 1454 return_view=return_view) 1455 1456 hdf5.close() 1457 1458 return results 1459 1460 1461 1462def hdf5file_ls(path_to_hdf5, location='./'): 1463 """ 1464 List dataset in an hdf5file. 1465 1466 Parameters 1467 ---------- 1468 1469 path_to_hdf5 : str 1470 path to a hdf5file 1471 1472 location: str 1473 path inside the hdf5 where to start the research 1474 1475 Example 1476 ------- 1477 1478 >>> hdf5file_ls(test.hdf5) 1479 1480 """ 1481 1482 hdf5 = open_hdf5(path_to_hdf5, read_only=True) 1483 1484 hdf5_view(hdf5, location=location, max_depth=0, level_base='>', level_sep="--", list_attrs=False, return_view=False) 1485 1486 1487 1488def hdf5_ls(hdf5): 1489 """ 1490 List dataset in an hdf5 instance. 1491 1492 Parameters 1493 ---------- 1494 1495 hdf5 : h5py.File 1496 hdf5 instance 1497 1498 location: str 1499 path inside the hdf5 where to start the research 1500 1501 Example 1502 ------- 1503 1504 >>> hdf5 = open_hdf5(path_to_hdf5, read_only=True) 1505 >>> hdf5_ls(hdf5) 1506 1507 """ 1508 1509 hdf5_view(hdf5, location="./", max_depth=0, level_base='>', level_sep="--", list_attrs=False, return_view=False) 1510 1511 1512 1513def hdf5_view(hdf5_obj, location="./", max_depth=None, level_base='>', level_sep="--", depth=None, list_attrs=True, list_dataset_attrs=False, return_view=False): 1514 """ 1515 List recursively all dataset (and attributes) in an hdf5 object. 1516 1517 Parameters 1518 ---------- 1519 1520 hdf5_obj : h5py.File 1521 opened instance of the hdf5 1522 1523 location : str 1524 path inside the hdf5 where to start the research 1525 1526 max_depth: str 1527 Max deph of the search in the hdf5 1528 1529 level_base: str 1530 string used as separator at the lower level (default '>') 1531 1532 level_sep: str 1533 string used as separator at higher level (default '--') 1534 1535 depth: int 1536 current level depth 1537 1538 list_attrs: bool 1539 default is True, list the attributes 1540 1541 list_dataset_attrs: bool 1542 default is False, list the special attributes defined for each dataset by pyhdf5_handler 1543 1544 return_view: bool 1545 retrun the object view in a dictionnary 1546 1547 Return 1548 -------- 1549 1550 dictionnary : optional, the view of the hdf5 1551 1552 Examples 1553 -------- 1554 1555 search in a hdf5 1556 >>> hdf5=hdf5_handler.open_hdf5(hdf5_file) 1557 >>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./") 1558 >>> hdf5.close() 1559 1560 """ 1561 1562 result = [] 1563 1564 if max_depth is not None: 1565 1566 if depth is not None: 1567 depth = depth+1 1568 else: 1569 depth = 0 1570 1571 if depth > max_depth: 1572 return result 1573 1574 hdf5 = hdf5_obj[location] 1575 1576 list_attribute=[] 1577 if list_attrs or list_dataset_attrs: 1578 tmp_list_attribute = list(hdf5.attrs.keys()) 1579 list_keys_matching_attributes=["_" + element for element in list(hdf5.keys())] 1580 1581 if list_attrs: 1582 1583 list_attribute.extend(list(filter( lambda l: l not in list_keys_matching_attributes, tmp_list_attribute))) 1584 1585 if list_dataset_attrs: 1586 1587 list_attribute.extend(list(filter( lambda l: l in list_keys_matching_attributes, tmp_list_attribute))) 1588 1589 1590 for key in list_attribute: 1591 values = hdf5.attrs[key] 1592 sub_location = os.path.join(location, key) 1593 if isinstance(values, (int, float, np.int64, np.float64, np.int32, np.float32, np.bool)): 1594 result.append( 1595 f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, value={values}") 1596 elif isinstance(values, (str)) and len(values) < 20: 1597 result.append( 1598 f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, len={len(values)}, value={values}") 1599 else: 1600 result.append( 1601 f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, len={len(values)}, value={values[0:20]}...") 1602 1603 if not return_view: 1604 if len(result)>0: 1605 print(result[-1]) 1606 1607 for hdf5_key, item in hdf5.items(): 1608 1609 if str(type(item)).find("group") != -1: 1610 1611 sub_location = os.path.join(location, hdf5_key) 1612 1613 if "ndarray_ds" in item.keys(): 1614 result.append(f"{level_base}| {sub_location}, ndarray") 1615 else: 1616 result.append(f"{level_base}| {sub_location}, group") 1617 1618 if not return_view: 1619 if len(result)>0: 1620 print(result[-1]) 1621 1622 res = hdf5_view(hdf5_obj, sub_location, max_depth=max_depth, 1623 level_base=level_base+level_sep, depth=depth, return_view=True) 1624 1625 # if len(res)>0: 1626 for key, item in enumerate(res): 1627 result.append(item) 1628 1629 if str(type(item)).find("dataset") != -1: 1630 1631 if item[:].dtype.char == "S": 1632 values = item[:].astype("U") 1633 else: 1634 values = item[:] 1635 1636 sub_location = os.path.join(location, hdf5_key) 1637 # result.append({"path":location, "key":key, "datatype":"dataset","value":values}) 1638 result.append( 1639 f"{level_base}| {sub_location}, dataset, type={type(values)}, shape={values.shape}") 1640 1641 if not return_view: 1642 if len(result)>0: 1643 print(result[-1]) 1644 1645 if return_view: 1646 return result
28def open_hdf5(path, read_only=False, replace=False, wait_time=0): 29 """ 30 31 Open or create an HDF5 file. 32 33 Parameters 34 ---------- 35 36 path : str 37 The file path. 38 39 read_only : boolean 40 If true the access to the hdf5 fil is in read-only mode. Multi process can read the same hdf5 file simulteneously. This is not possible when access mode are append 'a' or write 'w'. 41 42 replace: Boolean 43 If true, the existing hdf5file is erased 44 45 wait_time: int 46 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 47 48 Returns 49 ------- 50 51 f : 52 A HDF5 object. 53 54 Examples 55 -------- 56 57 >>> hdf5=smash.tools.hdf5_handler.open_hdf5("./my_hdf5.hdf5") 58 >>> hdf5.keys() 59 >>> hdf5.attrs.keys() 60 61 """ 62 f = None 63 wait = 0 64 while wait <= wait_time: 65 66 f = None 67 exist_file=True 68 69 try: 70 71 if read_only: 72 if os.path.isfile(path): 73 f = h5py.File(path, "r") 74 75 else: 76 exist_file=False 77 raise ValueError(f"File {path} does not exist.") 78 79 else: 80 if replace: 81 f = h5py.File(path, "w") 82 83 else: 84 if os.path.isfile(path): 85 f = h5py.File(path, "a") 86 87 else: 88 f = h5py.File(path, "w") 89 except: 90 pass 91 92 if f is None: 93 if not exist_file: 94 print(f"File {path} does not exist.") 95 return f 96 else: 97 print( 98 f"The file {path} is unvailable, waiting {wait}/{wait_time}s") 99 100 wait = wait + 1 101 102 if wait_time > 0: 103 time.sleep(1) 104 105 else: 106 break 107 108 return f
Open or create an HDF5 file.
Parameters
path : str The file path.
read_only : boolean If true the access to the hdf5 fil is in read-only mode. Multi process can read the same hdf5 file simulteneously. This is not possible when access mode are append 'a' or write 'w'.
replace: Boolean If true, the existing hdf5file is erased
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
Returns
f : A HDF5 object.
Examples
>>> hdf5=smash.tools.hdf5_handler.open_hdf5("./my_hdf5.hdf5")
>>> hdf5.keys()
>>> hdf5.attrs.keys()
111def add_hdf5_sub_group(hdf5, subgroup=None): 112 """ 113 Create a new subgroup in a HDF5 object 114 115 Parameters 116 ---------- 117 118 hdf5 : h5py.File 119 An hdf5 object opened with open_hdf5() 120 121 subgroup: str 122 Path to a subgroub that must be created 123 124 Returns 125 ------- 126 127 hdf5 : 128 the HDF5 object. 129 130 Examples 131 -------- 132 133 >>> hdf5=smash.tools.hdf5_handler.open_hdf5("./model_subgroup.hdf5", replace=True) 134 >>> hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="mygroup") 135 >>> hdf5.keys() 136 >>> hdf5.attrs.keys() 137 138 """ 139 if subgroup is not None: 140 if subgroup == "": 141 subgroup = "./" 142 143 hdf5.require_group(subgroup) 144 145 return hdf5
Create a new subgroup in a HDF5 object
Parameters
hdf5 : h5py.File An hdf5 object opened with open_hdf5()
subgroup: str Path to a subgroub that must be created
Returns
hdf5 : the HDF5 object.
Examples
>>> hdf5=smash.tools.hdf5_handler.open_hdf5("./model_subgroup.hdf5", replace=True)
>>> hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="mygroup")
>>> hdf5.keys()
>>> hdf5.attrs.keys()
480def hdf5_dataset_creator(hdf5:h5py.File,name:str,value): 481 """ 482 Write any value in an hdf5 object 483 484 Parameters 485 ---------- 486 487 hdf5 : h5py.File 488 an hdf5 object 489 490 name : str 491 name of the dataset 492 493 value : any 494 value to write in the hdf5 495 496 """ 497 # save ndarray datast 498 if isinstance(value, str): 499 dataset=_hdf5_handle_str(name,value) 500 501 elif isinstance(value, numbers.Number): 502 dataset=_hdf5_handle_numbers(name,value) 503 504 elif value is None: 505 dataset=_hdf5_handle_none(name,value) 506 507 elif isinstance(value, (pd.Timestamp, np.datetime64, datetime.date)): 508 dataset=_hdf5_handle_timestamp(name,value) 509 510 elif isinstance(value, pd.DatetimeIndex): 511 dataset=_hdf5_handle_DatetimeIndex(name,value) 512 513 elif isinstance(value, list): 514 dataset=_hdf5_handle_list(name,value) 515 516 elif isinstance(value, tuple): 517 dataset=_hdf5_handle_list(name,value) 518 519 elif isinstance(value, np.ndarray): 520 521 if len(value.dtype) > 0 and len(value.dtype.names) > 0: 522 _hdf5_handle_ndarray(hdf5,name,value) 523 return 524 else: 525 dataset=_hdf5_handle_array(name,value) 526 527 else: 528 529 hdf5 = add_hdf5_sub_group( 530 hdf5, subgroup=name) 531 532 newdict = object_handler.read_object_as_dict(value) 533 534 save_dict_to_hdf5(hdf5[name], newdict) 535 536 _hdf5_create_dataset(hdf5, dataset)
Write any value in an hdf5 object
Parameters
hdf5 : h5py.File an hdf5 object
name : str name of the dataset
value : any value to write in the hdf5
646def save_dict_to_hdf5(hdf5, dictionary): 647 """ 648 649 dump a dictionary to an hdf5 file 650 651 Parameters 652 ---------- 653 654 hdf5 : h5py.File 655 an hdf5 object 656 657 dictionary : dict 658 a custom python dictionary 659 660 """ 661 if isinstance(dictionary, dict): 662 for attr, value in dictionary.items(): 663 #print("looping:",attr,value) 664 try: 665 666 attribute_name = str(attr) 667 for character in '/ ': 668 attribute_name = attribute_name.replace( 669 character, '_') 670 671 if isinstance(value, dict): 672 #print("---> dictionary: ",attr, value) 673 674 hdf5 = add_hdf5_sub_group(hdf5, subgroup=attribute_name) 675 save_dict_to_hdf5(hdf5[attribute_name], value) 676 677 else: 678 679 hdf5_dataset_creator(hdf5,attribute_name,value) 680 681 except: 682 683 raise ValueError( 684 f"Unable to save attribute {str(attr)} with value {value}") 685 686 else: 687 688 raise ValueError(f"{dictionary} must be a instance of dict.")
dump a dictionary to an hdf5 file
Parameters
hdf5 : h5py.File an hdf5 object
dictionary : dict a custom python dictionary
692def save_dict_to_hdf5file(path_to_hdf5, dictionary=None, location="./", replace=False, wait_time=0): 693 """ 694 695 dump a dictionary to an hdf5 file 696 697 Parameters 698 ---------- 699 700 path_to_hdf5 : str 701 path to the hdf5 file 702 703 dictionary : dict | None 704 a dictionary containing the data to be saved 705 706 location : str 707 path location or subgroup where to write data in the hdf5 file 708 709 replace : Boolean 710 replace an existing hdf5 file. Default is False 711 712 wait_time: int 713 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 714 715 Examples 716 -------- 717 718 >>> setup, mesh = smash.load_dataset("cance") 719 >>> model = smash.Model(setup, mesh) 720 >>> model.run(inplace=True) 721 >>> 722 >>> smash.tools.hdf5_handler.save_dict_to_hdf5("saved_dictionary.hdf5",mesh) 723 724 """ 725 if isinstance(dictionary, dict): 726 hdf5 = open_hdf5(path_to_hdf5, replace=replace, wait_time=wait_time) 727 728 if hdf5 is None: 729 return 730 731 hdf5 = add_hdf5_sub_group(hdf5, subgroup=location) 732 save_dict_to_hdf5(hdf5[location], dictionary) 733 734 else: 735 raise ValueError(f"The input {dictionary} must be a instance of dict.") 736 737 hdf5.close()
dump a dictionary to an hdf5 file
Parameters
path_to_hdf5 : str path to the hdf5 file
dictionary : dict | None a dictionary containing the data to be saved
location : str path location or subgroup where to write data in the hdf5 file
replace : Boolean replace an existing hdf5 file. Default is False
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
Examples
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
>>> model.run(inplace=True)
>>>
>>> smash.tools.hdf5_handler.save_dict_to_hdf5("saved_dictionary.hdf5",mesh)
740def save_object_to_hdf5file( 741 path_to_hdf5, instance, keys_data=None, location="./", sub_data=None, replace=False, wait_time=0 742): 743 """ 744 745 dump an object to an hdf5 file 746 747 Parameters 748 ---------- 749 750 path_to_hdf5 : str 751 path to the hdf5 file 752 753 instance : object 754 A custom python object to be saved into an hdf5 755 756 keys_data : list | dict 757 optional, a list or a dictionary of the attribute to be saved 758 759 location : str 760 path location or subgroup where to write data in the hdf5 file 761 762 sub_data : dict | None 763 optional, a extra dictionary containing extra-data to be saved along the object 764 765 replace : Boolean 766 replace an existing hdf5 file. Default is False 767 768 wait_time: int 769 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 770 771 """ 772 773 if keys_data is None: 774 keys_data = object_handler.generate_object_structure(instance) 775 776 # print(keys_data) 777 778 hdf5 = open_hdf5(path_to_hdf5, replace=replace, wait_time=wait_time) 779 780 if hdf5 is None: 781 return None 782 783 hdf5 = add_hdf5_sub_group(hdf5, subgroup=location) 784 785 _dump_object_to_hdf5_from_iteratable(hdf5[location], instance, keys_data) 786 787 if isinstance(sub_data, dict): 788 save_dict_to_hdf5(hdf5[location], sub_data) 789 790 hdf5.close()
dump an object to an hdf5 file
Parameters
path_to_hdf5 : str path to the hdf5 file
instance : object A custom python object to be saved into an hdf5
keys_data : list | dict optional, a list or a dictionary of the attribute to be saved
location : str path location or subgroup where to write data in the hdf5 file
sub_data : dict | None optional, a extra dictionary containing extra-data to be saved along the object
replace : Boolean replace an existing hdf5 file. Default is False
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
793def read_hdf5file_as_dict(path_to_hdf5, location="./", wait_time=0, read_attrs=True,read_dataset_attrs=False): 794 """ 795 796 Open, read and close an hdf5 file 797 798 Parameters 799 ---------- 800 801 path_to_hdf5 : str 802 path to the hdf5 file 803 804 location: str 805 place in the hdf5 from which we start reading the file 806 807 read_attrs : bool 808 read and import attributes in the dicitonnary. 809 810 read_dataset_attrs : bool 811 read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original dataype of the data stored in the dataset. 812 813 Return 814 -------- 815 816 dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file 817 818 wait_time: int 819 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 820 821 Examples 822 -------- 823 824 read an hdf5 file 825 dictionary=hdf5_handler.read_hdf5file_as_dict(hdf5["model1"]) 826 """ 827 828 hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 829 830 if hdf5 is None: 831 return None 832 833 dictionary = read_hdf5_as_dict(hdf5[location], read_attrs=read_attrs, read_dataset_attrs=read_dataset_attrs) 834 835 hdf5.close() 836 837 return dictionary
Open, read and close an hdf5 file
Parameters
path_to_hdf5 : str path to the hdf5 file
location: str place in the hdf5 from which we start reading the file
read_attrs : bool read and import attributes in the dicitonnary.
read_dataset_attrs : bool read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original dataype of the data stored in the dataset.
Return
dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
Examples
read an hdf5 file
dictionary=hdf5_handler.read_hdf5file_as_dict(hdf5["model1"])
841def read_hdf5_as_dict(hdf5, read_attrs=True, read_dataset_attrs=False): 842 """ 843 Load an hdf5 file 844 845 Parameters 846 ---------- 847 848 hdf5 : h5py.File 849 an instance of hdf5, open with the function open_hdf5() 850 851 read_attrs : bool 852 read and import attributes in the dicitonnary. 853 854 read_dataset_attrs : bool 855 read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original dataype of the data stored in the dataset. 856 857 Return 858 -------- 859 860 dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file 861 862 Examples 863 -------- 864 865 read only a part of an hdf5 file 866 >>> hdf5=hdf5_handler.open_hdf5("./multi_model.hdf5") 867 >>> dictionary=hdf5_handler.read_hdf5_as_dict(hdf5["model1"]) 868 >>> dictionary.keys() 869 870 """ 871 872 if not isinstance(hdf5, (h5py.File, h5py.Group, h5py.Dataset, h5py.Datatype)): 873 print('Error: input arg is not an instance of hdf5.File()') 874 return {} 875 876 dictionary = {} 877 878 for key, item in hdf5.items(): 879 880 if str(type(item)).find("group") != -1: 881 882 if key == 'ndarray_ds': 883 884 # dictionary.update({key: _read_ndarray_datastructure(hdf5)}) 885 return _read_ndarray_datastructure(hdf5) 886 887 else: 888 889 dictionary.update({key: read_hdf5_as_dict(item)}) 890 891 892 if str(type(item)).find("dataset") != -1: 893 894 if "_"+key in hdf5.attrs.keys(): 895 expected_type=hdf5.attrs["_"+key] 896 values=hdf5_read_dataset(item, expected_type) 897 898 else: 899 900 values=item[:] 901 902 dictionary.update({key: values}) 903 904 905 list_attribute=[] 906 if read_attrs or read_dataset_attrs: 907 tmp_list_attribute=list(hdf5.attrs.keys()) 908 hdf5_item_matching_attributes=["_"+element for element in list(hdf5.keys())] 909 910 if read_attrs: 911 912 list_attribute.extend(list(filter( lambda l: l not in hdf5_item_matching_attributes, tmp_list_attribute))) 913 914 if read_dataset_attrs: 915 916 list_attribute.extend(list(filter( lambda l: l in hdf5_item_matching_attributes, tmp_list_attribute))) 917 918 for key in list_attribute: 919 dictionary.update({key: hdf5.attrs[key]}) 920 921 return dictionary
Load an hdf5 file
Parameters
hdf5 : h5py.File an instance of hdf5, open with the function open_hdf5()
read_attrs : bool read and import attributes in the dicitonnary.
read_dataset_attrs : bool read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original dataype of the data stored in the dataset.
Return
dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file
Examples
read only a part of an hdf5 file
>>> hdf5=hdf5_handler.open_hdf5("./multi_model.hdf5")
>>> dictionary=hdf5_handler.read_hdf5_as_dict(hdf5["model1"])
>>> dictionary.keys()
925def hdf5_read_dataset(item,expected_type=None): 926 """ 927 Read a dataset stored in an hdf5 database 928 929 Parameters 930 ---------- 931 932 item : h5py.File 933 an hdf5 dataset/item 934 935 expected_type: str 936 the expected dtype as string str(type()) 937 938 Return 939 -------- 940 941 value : the value read from the hdf5, any type matching the expected type 942 943 944 """ 945 946 if expected_type == str(type("str")): 947 948 values=item[0].decode() 949 950 elif expected_type == str(type(1.0)): 951 952 values=item[0] 953 954 elif expected_type == "_None_": 955 956 values=None 957 958 elif expected_type in (str(pd.Timestamp), str(np.datetime64), str(datetime.datetime)) : 959 960 if expected_type==str(pd.Timestamp): 961 values=pd.Timestamp(item[0].decode()) 962 963 elif expected_type==str(np.datetime64): 964 values=np.datetime64(item[0].decode()) 965 966 elif expected_type==str(datetime.datetime): 967 values=datetime.datetime.fromisoformat(item[0].decode()) 968 969 else: 970 values=item[0].decode() 971 972 else: 973 974 if item[:].dtype.char == "S": 975 976 values = item[:].astype("U") 977 978 elif item[:].dtype.char == "O": 979 980 # decode list if required 981 decoded_item = list() 982 for it in item[:]: 983 984 decoded_item.append(it.decode()) 985 986 values = decoded_item 987 988 else: 989 values = item[:] 990 991 return values
Read a dataset stored in an hdf5 database
Parameters
item : h5py.File an hdf5 dataset/item
expected_type: str the expected dtype as string str(type())
Return
value : the value read from the hdf5, any type matching the expected type
995def get_hdf5file_attribute(path_to_hdf5=str(), location="./", attribute=None, wait_time=0): 996 """ 997 Get the value of an attribute in the hdf5file 998 999 Parameters 1000 ---------- 1001 1002 path_to_hdf5 : str 1003 the path to the hdf5file 1004 1005 location : str 1006 path inside the hdf5 where the attribute is stored 1007 1008 attribute: str 1009 attribute name 1010 1011 wait_time: int 1012 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1013 1014 Return 1015 -------- 1016 1017 return_attribute : the value of the attribute 1018 1019 Examples 1020 -------- 1021 1022 get an attribute 1023 >>> attribute=hdf5_handler.get_hdf5_attribute("./multi_model.hdf5",attribute=my_attribute_name) 1024 1025 """ 1026 1027 hdf5_base = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1028 1029 if hdf5_base is None: 1030 return None 1031 1032 hdf5 = hdf5_base[location] 1033 1034 return_attribute = hdf5.attrs[attribute] 1035 1036 hdf5_base.close() 1037 1038 return return_attribute
Get the value of an attribute in the hdf5file
Parameters
path_to_hdf5 : str the path to the hdf5file
location : str path inside the hdf5 where the attribute is stored
attribute: str attribute name
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
Return
return_attribute : the value of the attribute
Examples
get an attribute
>>> attribute=hdf5_handler.get_hdf5_attribute("./multi_model.hdf5",attribute=my_attribute_name)
1041def get_hdf5file_dataset(path_to_hdf5=str(), location="./", dataset=None, wait_time=0): 1042 """ 1043 Get the value of an attribute in the hdf5file 1044 1045 Parameters 1046 ---------- 1047 1048 path_to_hdf5 : str 1049 the path to the hdf5file 1050 1051 location : str 1052 path inside the hdf5 where the attribute is stored 1053 1054 dataset: str 1055 dataset name 1056 1057 wait_time: int 1058 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1059 1060 Return 1061 -------- 1062 1063 return_dataset : the value of the attribute 1064 1065 Examples 1066 -------- 1067 1068 get a dataset 1069 >>> dataset=hdf5_handler.get_hdf5_dataset("./multi_model.hdf5",dataset=my_dataset_name) 1070 1071 """ 1072 1073 hdf5_base = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1074 1075 if hdf5_base is None: 1076 return None 1077 1078 hdf5 = hdf5_base[location] 1079 1080 if "_"+dataset in hdf5.attrs.keys(): 1081 expected_type=hdf5.attrs["_"+dataset] 1082 return_dataset=hdf5_read_dataset(hdf5, expected_type) 1083 1084 else: 1085 return_dataset = hdf5[dataset][:] 1086 1087 hdf5_base.close() 1088 1089 return return_dataset
Get the value of an attribute in the hdf5file
Parameters
path_to_hdf5 : str the path to the hdf5file
location : str path inside the hdf5 where the attribute is stored
dataset: str dataset name
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
Return
return_dataset : the value of the attribute
Examples
get a dataset
>>> dataset=hdf5_handler.get_hdf5_dataset("./multi_model.hdf5",dataset=my_dataset_name)
1092def get_hdf5file_item(path_to_hdf5=str(), location="./", item=None, wait_time=0, search_attrs=False): 1093 """ 1094 1095 Get a custom item in an hdf5file 1096 1097 Parameters 1098 ---------- 1099 1100 path_to_hdf5 : str 1101 the path to the hdf5file 1102 1103 location : str 1104 path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location) 1105 1106 item: str 1107 item name 1108 1109 wait_time: int 1110 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1111 1112 search_attrs: bool 1113 Default is False. If True, the function will also search in the item in the attribute first. 1114 1115 Return 1116 -------- 1117 1118 return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ... 1119 1120 Examples 1121 -------- 1122 1123 get the dataset 'dataset' 1124 >>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset") 1125 1126 """ 1127 1128 hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1129 1130 if hdf5 is None: 1131 return None 1132 1133 hdf5_item = get_hdf5_item(hdf5_instance=hdf5, location=location, item=item, search_attrs=search_attrs) 1134 1135 hdf5.close() 1136 1137 return hdf5_item
Get a custom item in an hdf5file
Parameters
path_to_hdf5 : str the path to the hdf5file
location : str path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location)
item: str item name
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
search_attrs: bool Default is False. If True, the function will also search in the item in the attribute first.
Return
return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ...
Examples
get the dataset 'dataset'
>>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset")
1140def get_hdf5_item(hdf5_instance=None, location="./", item=None, search_attrs=False): 1141 """ 1142 1143 Get a custom item in an hdf5file 1144 1145 Parameters 1146 ---------- 1147 1148 hdf5_instance : h5py.File 1149 an instance of an hdf5 1150 1151 location : str 1152 path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location) 1153 1154 item: str 1155 item name 1156 1157 search_attrs: bool 1158 Default is False. If True, the function will search in the item in the attribute first. 1159 1160 Return 1161 ------ 1162 1163 return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ... 1164 1165 Examples 1166 -------- 1167 1168 get the dataset 'dataset' 1169 >>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset") 1170 1171 """ 1172 1173 if item is None and isinstance(location, str): 1174 head, tail = os.path.split(location) 1175 if len(tail) > 0: 1176 item = tail 1177 location = head 1178 1179 if not isinstance(item, str): 1180 print(f"Bad search item:{item}") 1181 return None 1182 1183 return None 1184 1185 # print(f"Getting item '{item}' at location '{location}'") 1186 hdf5 = hdf5_instance[location] 1187 1188 # first search in the attribute 1189 if search_attrs: 1190 list_attribute = hdf5.attrs.keys() 1191 if item in list_attribute: 1192 return hdf5.attrs[item] 1193 1194 # then search in groups and dataset 1195 list_keys = hdf5.keys() 1196 if item in list_keys: 1197 1198 hdf5_item = hdf5[item] 1199 1200 # print("Got Item ", hdf5_item) 1201 1202 if str(type(hdf5_item)).find("group") != -1: 1203 1204 if item == 'ndarray_ds': 1205 1206 return _read_ndarray_datastructure(hdf5) 1207 1208 else: 1209 1210 returned_dict = read_hdf5_as_dict(hdf5_item) 1211 1212 return returned_dict 1213 1214 elif str(type(hdf5_item)).find("dataset") != -1: 1215 1216 if "_"+item in hdf5.attrs.keys(): 1217 expected_type=hdf5.attrs["_"+item] 1218 values=hdf5_read_dataset(hdf5_item, expected_type) 1219 else: 1220 values=hdf5_item[:] 1221 1222 return values 1223 1224 else: 1225 1226 return hdf5_item 1227 1228 else: 1229 1230 return None
Get a custom item in an hdf5file
Parameters
hdf5_instance : h5py.File an instance of an hdf5
location : str path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location)
item: str item name
search_attrs: bool Default is False. If True, the function will search in the item in the attribute first.
Return
return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ...
Examples
get the dataset 'dataset'
>>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset")
1233def search_in_hdf5file(path_to_hdf5, key=None, location="./", wait_time=0, search_attrs=False): 1234 """ 1235 1236 Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise) 1237 1238 Parameters 1239 ---------- 1240 1241 path_to_hdf5 : str 1242 the path to the hdf5file 1243 1244 key: str 1245 key to search in the hdf5file 1246 1247 location : str 1248 path inside the hdf5 where to start the research 1249 1250 wait_time: int 1251 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1252 1253 search_attrs : Bool 1254 Default false, search in the attributes 1255 1256 Return 1257 ------ 1258 1259 return_dataset : the value of the attribute 1260 1261 Examples 1262 -------- 1263 1264 search in a hdf5file 1265 >>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./") 1266 1267 """ 1268 if key is None: 1269 print("Nothing to search, use key=") 1270 return [] 1271 1272 hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1273 1274 if hdf5 is None: 1275 return None 1276 1277 results = search_in_hdf5(hdf5, key, location=location, search_attrs=search_attrs) 1278 1279 hdf5.close() 1280 1281 return results
Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)
Parameters
path_to_hdf5 : str the path to the hdf5file
key: str key to search in the hdf5file
location : str path inside the hdf5 where to start the research
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
search_attrs : Bool Default false, search in the attributes
Return
return_dataset : the value of the attribute
Examples
search in a hdf5file
>>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./")
1284def search_in_hdf5(hdf5_base, key=None, location="./", search_attrs=False): 1285 """ 1286 1287 Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise) 1288 1289 Parameters 1290 ---------- 1291 1292 hdf5_base : h5py.File 1293 opened instance of the hdf5 1294 1295 key: str 1296 key to search in the hdf5file 1297 1298 location : str 1299 path inside the hdf5 where to start the research 1300 1301 search_attrs : Bool 1302 Default false, search in the attributes 1303 1304 Return 1305 ------ 1306 1307 return_dataset : the value of the attribute 1308 1309 Examples 1310 -------- 1311 1312 search in a hdf5 1313 >>> hdf5=hdf5_handler.open_hdf5(hdf5_file) 1314 >>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./") 1315 >>> hdf5.close() 1316 1317 """ 1318 if key is None: 1319 print("Nothing to search, use key=") 1320 return [] 1321 1322 result = [] 1323 1324 hdf5 = hdf5_base[location] 1325 1326 if search_attrs: 1327 list_attribute = hdf5.attrs.keys() 1328 1329 if key in list_attribute: 1330 result.append({"path": location, "key": key, 1331 "datatype": "attribute", "value": hdf5.attrs[key]}) 1332 1333 for hdf5_key, item in hdf5.items(): 1334 1335 if str(type(item)).find("group") != -1: 1336 1337 sub_location = os.path.join(location, hdf5_key) 1338 1339 # print(hdf5_key,sub_location,list(hdf5.keys())) 1340 1341 if hdf5_key == key: 1342 1343 if "ndarray_ds" in item.keys(): 1344 1345 result.append({"path": sub_location, 1346 "key": None, 1347 "datatype": "ndarray", 1348 "value": _read_ndarray_datastructure(item)}) 1349 1350 else: 1351 1352 result.append({"path": sub_location, "key": None, 1353 "datatype": "group", "value": None}) 1354 1355 res = search_in_hdf5(hdf5_base, key, sub_location) 1356 1357 if len(res) > 0: 1358 for element in res: 1359 result.append(element) 1360 1361 1362 if str(type(item)).find("dataset") != -1: 1363 1364 if hdf5_key == key: 1365 1366 if item[:].dtype.char == "S": 1367 1368 values = item[:].astype("U") 1369 1370 elif item[:].dtype.char == "O": 1371 1372 # decode list if required 1373 decoded_item = list() 1374 for it in item[:]: 1375 decoded_item.append(it.decode()) 1376 1377 values = decoded_item 1378 1379 else: 1380 1381 values = item[:] 1382 1383 result.append({"path": location, 1384 "key": key, 1385 "datatype": "dataset", 1386 "value": values}) 1387 1388 return result
Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)
Parameters
hdf5_base : h5py.File opened instance of the hdf5
key: str key to search in the hdf5file
location : str path inside the hdf5 where to start the research
search_attrs : Bool Default false, search in the attributes
Return
return_dataset : the value of the attribute
Examples
search in a hdf5
>>> hdf5=hdf5_handler.open_hdf5(hdf5_file)
>>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./")
>>> hdf5.close()
1391def hdf5file_view(path_to_hdf5, location="./", max_depth=None, level_base='>', level_sep="--", depth=None, wait_time=0, list_attrs=True, list_dataset_attrs=False, return_view=False): 1392 """ 1393 1394 Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise) 1395 1396 Parameters 1397 ---------- 1398 1399 1400 path_to_hdf5 : str 1401 Path to an hdf5 database 1402 1403 location : str 1404 path inside the hdf5 where to start the research 1405 1406 max_depth: str 1407 Max deph of the search in the hdf5 1408 1409 level_base: str 1410 string used as separator at the lower level (default '>') 1411 1412 level_sep: str 1413 string used as separator at higher level (default '--') 1414 1415 depth: int 1416 current depth level 1417 1418 list_attrs: bool 1419 default is True, list the attributes 1420 1421 list_dataset_attrs: bool 1422 default is False, list the special attributes defined for each dataset by pyhdf5_handler 1423 1424 return_view: bool 1425 retrun the object view in a dictionnary (do not print at screen) 1426 1427 wait_time: int 1428 If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database. 1429 1430 Return 1431 -------- 1432 1433 dictionnary : optional, the view of the hdf5 1434 1435 Examples 1436 -------- 1437 1438 search in a hdf5file 1439 >>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./") 1440 1441 """ 1442 1443 hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time) 1444 1445 if hdf5 is None: 1446 return None 1447 1448 results = hdf5_view(hdf5, 1449 location=location, 1450 max_depth=max_depth, 1451 level_base=level_base, 1452 level_sep=level_sep, 1453 depth=depth, 1454 list_attrs=list_attrs, 1455 list_dataset_attrs=list_dataset_attrs, 1456 return_view=return_view) 1457 1458 hdf5.close() 1459 1460 return results
Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)
Parameters
path_to_hdf5 : str Path to an hdf5 database
location : str path inside the hdf5 where to start the research
max_depth: str Max deph of the search in the hdf5
level_base: str string used as separator at the lower level (default '>')
level_sep: str string used as separator at higher level (default '--')
depth: int current depth level
list_attrs: bool default is True, list the attributes
list_dataset_attrs: bool default is False, list the special attributes defined for each dataset by pyhdf5_handler
return_view: bool retrun the object view in a dictionnary (do not print at screen)
wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
Return
dictionnary : optional, the view of the hdf5
Examples
search in a hdf5file
>>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./")
1464def hdf5file_ls(path_to_hdf5, location='./'): 1465 """ 1466 List dataset in an hdf5file. 1467 1468 Parameters 1469 ---------- 1470 1471 path_to_hdf5 : str 1472 path to a hdf5file 1473 1474 location: str 1475 path inside the hdf5 where to start the research 1476 1477 Example 1478 ------- 1479 1480 >>> hdf5file_ls(test.hdf5) 1481 1482 """ 1483 1484 hdf5 = open_hdf5(path_to_hdf5, read_only=True) 1485 1486 hdf5_view(hdf5, location=location, max_depth=0, level_base='>', level_sep="--", list_attrs=False, return_view=False)
List dataset in an hdf5file.
Parameters
path_to_hdf5 : str path to a hdf5file
location: str path inside the hdf5 where to start the research
Example
>>> hdf5file_ls(test.hdf5)
1490def hdf5_ls(hdf5): 1491 """ 1492 List dataset in an hdf5 instance. 1493 1494 Parameters 1495 ---------- 1496 1497 hdf5 : h5py.File 1498 hdf5 instance 1499 1500 location: str 1501 path inside the hdf5 where to start the research 1502 1503 Example 1504 ------- 1505 1506 >>> hdf5 = open_hdf5(path_to_hdf5, read_only=True) 1507 >>> hdf5_ls(hdf5) 1508 1509 """ 1510 1511 hdf5_view(hdf5, location="./", max_depth=0, level_base='>', level_sep="--", list_attrs=False, return_view=False)
List dataset in an hdf5 instance.
Parameters
hdf5 : h5py.File hdf5 instance
location: str path inside the hdf5 where to start the research
Example
>>> hdf5 = open_hdf5(path_to_hdf5, read_only=True)
>>> hdf5_ls(hdf5)
1515def hdf5_view(hdf5_obj, location="./", max_depth=None, level_base='>', level_sep="--", depth=None, list_attrs=True, list_dataset_attrs=False, return_view=False): 1516 """ 1517 List recursively all dataset (and attributes) in an hdf5 object. 1518 1519 Parameters 1520 ---------- 1521 1522 hdf5_obj : h5py.File 1523 opened instance of the hdf5 1524 1525 location : str 1526 path inside the hdf5 where to start the research 1527 1528 max_depth: str 1529 Max deph of the search in the hdf5 1530 1531 level_base: str 1532 string used as separator at the lower level (default '>') 1533 1534 level_sep: str 1535 string used as separator at higher level (default '--') 1536 1537 depth: int 1538 current level depth 1539 1540 list_attrs: bool 1541 default is True, list the attributes 1542 1543 list_dataset_attrs: bool 1544 default is False, list the special attributes defined for each dataset by pyhdf5_handler 1545 1546 return_view: bool 1547 retrun the object view in a dictionnary 1548 1549 Return 1550 -------- 1551 1552 dictionnary : optional, the view of the hdf5 1553 1554 Examples 1555 -------- 1556 1557 search in a hdf5 1558 >>> hdf5=hdf5_handler.open_hdf5(hdf5_file) 1559 >>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./") 1560 >>> hdf5.close() 1561 1562 """ 1563 1564 result = [] 1565 1566 if max_depth is not None: 1567 1568 if depth is not None: 1569 depth = depth+1 1570 else: 1571 depth = 0 1572 1573 if depth > max_depth: 1574 return result 1575 1576 hdf5 = hdf5_obj[location] 1577 1578 list_attribute=[] 1579 if list_attrs or list_dataset_attrs: 1580 tmp_list_attribute = list(hdf5.attrs.keys()) 1581 list_keys_matching_attributes=["_" + element for element in list(hdf5.keys())] 1582 1583 if list_attrs: 1584 1585 list_attribute.extend(list(filter( lambda l: l not in list_keys_matching_attributes, tmp_list_attribute))) 1586 1587 if list_dataset_attrs: 1588 1589 list_attribute.extend(list(filter( lambda l: l in list_keys_matching_attributes, tmp_list_attribute))) 1590 1591 1592 for key in list_attribute: 1593 values = hdf5.attrs[key] 1594 sub_location = os.path.join(location, key) 1595 if isinstance(values, (int, float, np.int64, np.float64, np.int32, np.float32, np.bool)): 1596 result.append( 1597 f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, value={values}") 1598 elif isinstance(values, (str)) and len(values) < 20: 1599 result.append( 1600 f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, len={len(values)}, value={values}") 1601 else: 1602 result.append( 1603 f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, len={len(values)}, value={values[0:20]}...") 1604 1605 if not return_view: 1606 if len(result)>0: 1607 print(result[-1]) 1608 1609 for hdf5_key, item in hdf5.items(): 1610 1611 if str(type(item)).find("group") != -1: 1612 1613 sub_location = os.path.join(location, hdf5_key) 1614 1615 if "ndarray_ds" in item.keys(): 1616 result.append(f"{level_base}| {sub_location}, ndarray") 1617 else: 1618 result.append(f"{level_base}| {sub_location}, group") 1619 1620 if not return_view: 1621 if len(result)>0: 1622 print(result[-1]) 1623 1624 res = hdf5_view(hdf5_obj, sub_location, max_depth=max_depth, 1625 level_base=level_base+level_sep, depth=depth, return_view=True) 1626 1627 # if len(res)>0: 1628 for key, item in enumerate(res): 1629 result.append(item) 1630 1631 if str(type(item)).find("dataset") != -1: 1632 1633 if item[:].dtype.char == "S": 1634 values = item[:].astype("U") 1635 else: 1636 values = item[:] 1637 1638 sub_location = os.path.join(location, hdf5_key) 1639 # result.append({"path":location, "key":key, "datatype":"dataset","value":values}) 1640 result.append( 1641 f"{level_base}| {sub_location}, dataset, type={type(values)}, shape={values.shape}") 1642 1643 if not return_view: 1644 if len(result)>0: 1645 print(result[-1]) 1646 1647 if return_view: 1648 return result
List recursively all dataset (and attributes) in an hdf5 object.
Parameters
hdf5_obj : h5py.File opened instance of the hdf5
location : str path inside the hdf5 where to start the research
max_depth: str Max deph of the search in the hdf5
level_base: str string used as separator at the lower level (default '>')
level_sep: str string used as separator at higher level (default '--')
depth: int current level depth
list_attrs: bool default is True, list the attributes
list_dataset_attrs: bool default is False, list the special attributes defined for each dataset by pyhdf5_handler
return_view: bool retrun the object view in a dictionnary
Return
dictionnary : optional, the view of the hdf5
Examples
search in a hdf5
>>> hdf5=hdf5_handler.open_hdf5(hdf5_file)
>>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./")
>>> hdf5.close()