pyhdf5_handler.src.hdf5_handler

   1from __future__ import annotations
   2
   3import os
   4import h5py
   5import numpy as np
   6import numbers
   7import pandas as pd
   8import datetime
   9import time
  10
  11from ..src import object_handler
  12import gc
  13
  14
  15def close_all_hdf5_file():
  16
  17    for obj in gc.get_objects():   # Browse through ALL objects
  18        if isinstance(obj, h5py.File):   # Just HDF5 files
  19            try:
  20                print(f"try closing {obj}")
  21                obj.close()
  22            except:
  23                pass  # Was already closed
  24
  25
  26def open_hdf5(path, read_only=False, replace=False, wait_time=0):
  27    """
  28    
  29    Open or create an HDF5 file.
  30
  31    Parameters
  32    ----------
  33    
  34    path : str
  35        The file path.
  36    
  37    read_only : boolean
  38        If true the access to the hdf5 fil is in read-only mode. Multi process can read the same hdf5 file simulteneously. This is not possible when access mode are append 'a' or write 'w'.
  39    
  40    replace: Boolean
  41        If true, the existing hdf5file is erased
  42    
  43    wait_time: int
  44        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
  45
  46    Returns
  47    -------
  48    
  49    f :
  50        A HDF5 object.
  51
  52    Examples
  53    --------
  54    
  55    >>> hdf5=smash.tools.hdf5_handler.open_hdf5("./my_hdf5.hdf5")  
  56    >>> hdf5.keys()  
  57    >>> hdf5.attrs.keys()  
  58    
  59    """
  60    f = None
  61    wait = 0
  62    while wait <= wait_time:
  63
  64        f = None
  65        exist_file=True
  66        
  67        try:
  68
  69            if read_only:
  70                if os.path.isfile(path):
  71                    f = h5py.File(path, "r")
  72
  73                else:
  74                    exist_file=False
  75                    raise ValueError(f"File {path} does not exist.")
  76
  77            else:
  78                if replace:
  79                    f = h5py.File(path, "w")
  80
  81                else:
  82                    if os.path.isfile(path):
  83                        f = h5py.File(path, "a")
  84
  85                    else:
  86                        f = h5py.File(path, "w")
  87        except:
  88            pass
  89
  90        if f is None:
  91            if not exist_file:
  92                print(f"File {path} does not exist.")
  93                return f
  94            else:
  95                print(
  96                    f"The file {path} is unvailable, waiting {wait}/{wait_time}s")
  97
  98            wait = wait + 1
  99
 100            if wait_time > 0:
 101                time.sleep(1)
 102
 103        else:
 104            break
 105
 106    return f
 107
 108
 109def add_hdf5_sub_group(hdf5, subgroup=None):
 110    """
 111    Create a new subgroup in a HDF5 object
 112
 113    Parameters
 114    ----------
 115    
 116    hdf5 : h5py.File
 117        An hdf5 object opened with open_hdf5()
 118    
 119    subgroup: str
 120        Path to a subgroub that must be created
 121
 122    Returns
 123    -------
 124    
 125    hdf5 :
 126        the HDF5 object.
 127
 128    Examples
 129    --------
 130    
 131    >>> hdf5=smash.tools.hdf5_handler.open_hdf5("./model_subgroup.hdf5", replace=True)  
 132    >>> hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="mygroup")  
 133    >>> hdf5.keys()  
 134    >>> hdf5.attrs.keys()  
 135    
 136    """
 137    if subgroup is not None:
 138        if subgroup == "":
 139            subgroup = "./"
 140
 141        hdf5.require_group(subgroup)
 142
 143    return hdf5
 144
 145
 146def _dump_object_to_hdf5_from_list_attribute(hdf5, instance, list_attr):
 147    """
 148    dump a object to a hdf5 file from a list of attributes
 149
 150    Parameters
 151    ----------
 152    hdf5 : h5py.File
 153        an hdf5 object
 154    
 155    instance : object
 156        a custom python object.
 157    
 158    list_attr : list
 159        a list of attribute
 160    
 161    """
 162    if isinstance(list_attr, list):
 163        for attr in list_attr:
 164            if isinstance(attr, str):
 165                _dump_object_to_hdf5_from_str_attribute(hdf5, instance, attr)
 166
 167            elif isinstance(attr, list):
 168                _dump_object_to_hdf5_from_list_attribute(hdf5, instance, attr)
 169
 170            elif isinstance(attr, dict):
 171                _dump_object_to_hdf5_from_dict_attribute(hdf5, instance, attr)
 172
 173            else:
 174                raise ValueError(
 175                    f"inconsistent {attr} in {list_attr}. {attr} must be a an instance of dict, list or str"
 176                )
 177
 178    else:
 179        raise ValueError(f"{list_attr} must be a instance of list.")
 180
 181
 182def _dump_object_to_hdf5_from_dict_attribute(hdf5, instance, dict_attr):
 183    """
 184    dump a object to a hdf5 file from a dictionary of attributes
 185
 186    Parameters
 187    ----------
 188    
 189    hdf5 : h5py.File
 190        an hdf5 object
 191    
 192    instance : object
 193        a custom python object.
 194    
 195    dict_attr : dict
 196        a dictionary of attribute
 197        
 198    """
 199    if isinstance(dict_attr, dict):
 200        for attr, value in dict_attr.items():
 201            hdf5 = add_hdf5_sub_group(hdf5, subgroup=attr)
 202
 203            try:
 204                sub_instance = getattr(instance, attr)
 205
 206            except:
 207                sub_instance = instance
 208
 209            if isinstance(value, dict):
 210                _dump_object_to_hdf5_from_dict_attribute(
 211                    hdf5[attr], sub_instance, value
 212                )
 213
 214            if isinstance(value, list):
 215                _dump_object_to_hdf5_from_list_attribute(
 216                    hdf5[attr], sub_instance, value
 217                )
 218
 219            elif isinstance(value, str):
 220                _dump_object_to_hdf5_from_str_attribute(
 221                    hdf5[attr], sub_instance, value)
 222
 223            else:
 224                raise ValueError(
 225                    f"inconsistent '{attr}' in '{dict_attr}'. Dict({attr}) must be a instance of dict, list or str"
 226                )
 227
 228    else:
 229        raise ValueError(f"{dict_attr} must be a instance of dict.")
 230
 231
 232
 233
 234def _dump_object_to_hdf5_from_str_attribute(hdf5, instance, str_attr):
 235    """
 236    dump a object to a hdf5 file from a string attribute
 237
 238    Parameters
 239    ----------
 240    
 241    hdf5 : h5py.File
 242        an hdf5 object
 243    
 244    instance : object
 245        a custom python object.
 246    
 247    str_attr : str
 248        a string attribute
 249        
 250    """
 251    if isinstance(str_attr, str):
 252        try:
 253            value = getattr(instance, str_attr)
 254
 255            if isinstance(value, (np.ndarray, list)):
 256
 257                # TODO: do the same than save_dict_to_hdf5
 258
 259                if isinstance(value, list):
 260                    value = np.array(value)
 261
 262                if value.dtype == "object" or value.dtype.char == "U":
 263                    value = value.astype("S")
 264
 265                # remove dataset if exist
 266                if str_attr in hdf5.keys():
 267                    del hdf5[str_attr]
 268
 269                hdf5.create_dataset(
 270                    str_attr,
 271                    shape=value.shape,
 272                    dtype=value.dtype,
 273                    data=value,
 274                    compression="gzip",
 275                    chunks=True,
 276                )
 277
 278            elif value is None:
 279                hdf5.attrs[str_attr] = "_None_"
 280
 281            elif isinstance(value, str):
 282                hdf5.attrs[str_attr] = value.encode()
 283
 284            else:
 285                hdf5.attrs[str_attr] = value
 286
 287        except:
 288            raise ValueError(
 289                f"Unable to dump attribute {str_attr} with value {value} from {instance}"
 290            )
 291
 292    else:
 293        raise ValueError(f"{str_attr} must be a instance of str.")
 294
 295
 296def _dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable=None):
 297    """
 298    dump a object to a hdf5 file from a iteratable object list or dict
 299
 300    Parameters
 301    ----------
 302    
 303    hdf5 : h5py.File
 304        an hdf5 object
 305    instance : object
 306        a custom python object.
 307    iteratable : list | dict
 308        a list or a dict of attribute
 309
 310    Examples
 311    --------
 312    
 313    >>> setup, mesh = smash.load_dataset("cance")  
 314    >>> model = smash.Model(setup, mesh)  
 315    >>> model.run(inplace=True)  
 316    >>>   
 317    >>> hdf5=smash.tools.hdf5_handler.open_hdf5("./model.hdf5", replace=True)  
 318    >>> hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="model1")  
 319    >>> keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="medium")  
 320    >>> smash.tools.hdf5_handler._dump_object_to_hdf5_from_iteratable(hdf5["model1"], model, keys_data)  
 321    >>>   
 322    >>> hdf5=smash.tools.hdf5_handler.open_hdf5("./model.hdf5", replace=False)  
 323    >>> hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="model2")  
 324    >>> keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="light")  
 325    >>> smash.tools.hdf5_handler._dump_object_to_hdf5_from_iteratable(hdf5["model2"], model, keys_data)  
 326    
 327    """
 328    if isinstance(iteratable, list):
 329        _dump_object_to_hdf5_from_list_attribute(hdf5, instance, iteratable)
 330
 331    elif isinstance(iteratable, dict):
 332        _dump_object_to_hdf5_from_dict_attribute(hdf5, instance, iteratable)
 333
 334    else:
 335        raise ValueError(f"{iteratable} must be a instance of list or dict.")
 336
 337
 338def _hdf5_handle_str(name,value):
 339    
 340    dataset={
 341        "name":name,
 342        "attr_value":str(type(value)),
 343        "dataset_value":value,
 344        "shape":1,
 345        "dtype":h5py.string_dtype(encoding='utf-8'),
 346    }
 347    
 348    return dataset
 349
 350
 351def _hdf5_handle_numbers(name:str,value:numbers.Number):
 352    
 353    arr=np.array([value])
 354    dataset={
 355        "name":name,
 356        "attr_value":str(type(value)),
 357        "dataset_value":arr,
 358        "shape":arr.shape,
 359        "dtype":arr.dtype,
 360    }
 361    
 362    return dataset
 363
 364
 365def _hdf5_handle_none(name:str,value:None):
 366    
 367    dataset={
 368        "name":name,
 369        "attr_value":"_None_",
 370        "dataset_value":"_None_",
 371        "shape":1,
 372        "dtype":h5py.string_dtype(encoding='utf-8'),
 373    }
 374    
 375    return dataset
 376
 377
 378def _hdf5_handle_timestamp(name:str,value:(pd.Timestamp | np.datetime64 | datetime.date)):
 379    
 380    dtype=type(value)
 381    
 382    if isinstance(value, (np.datetime64)):
 383        value=value.tolist()
 384    
 385    dataset={
 386        "name":name,
 387        "attr_value":str(dtype),
 388        "dataset_value":value.strftime("%Y-%m-%d %H:%M"),
 389        "shape":1,
 390        "dtype":h5py.string_dtype(encoding='utf-8'),
 391    }
 392    
 393    
 394    return dataset
 395
 396
 397def _hdf5_handle_DatetimeIndex(name: str,value:pd.DatetimeIndex):
 398    
 399    dataset=_hdf5_handle_array(name,value)
 400    
 401    return dataset
 402
 403
 404def _hdf5_handle_list(name:str,value:(list | tuple)):
 405    
 406    arr=np.array(value)
 407    
 408    dataset=_hdf5_handle_array(name,arr)
 409    
 410    return dataset
 411
 412
 413def _hdf5_handle_array(name:str,value:np.ndarray):
 414    
 415    dtype_attr=type(value)
 416    dtype=value.dtype
 417    
 418    if value.dtype.char == "M":
 419        
 420        ListDate = value.tolist()
 421        ListDateStr = list()
 422        for date in ListDate:
 423            ListDateStr.append(
 424                date.strftime("%Y-%m-%d %H:%M"))
 425        value = np.array(ListDateStr)
 426        value = value.astype("O")
 427        dtype = h5py.string_dtype(encoding='utf-8')
 428    
 429    elif value.dtype == "object":
 430        
 431        value = value.astype("S")
 432        dtype = h5py.string_dtype(encoding='utf-8')
 433        
 434    elif value.dtype.char == "U":
 435        value = value.astype("S")
 436        dtype = h5py.string_dtype(encoding='utf-8')
 437    
 438    dataset={
 439        "name":name,
 440        "attr_value":str(dtype_attr),
 441        "dataset_value":value,
 442        "shape":value.shape,
 443        "dtype":dtype,
 444    }
 445    
 446    return dataset
 447
 448
 449def _hdf5_handle_ndarray(hdf5:h5py.File,name:str,value:np.ndarray):
 450    
 451    hdf5 = add_hdf5_sub_group(
 452        hdf5, 
 453        subgroup=name)
 454    _dump_ndarray_to_hdf5(hdf5[name],
 455                          value)
 456
 457
 458def _hdf5_create_dataset(hdf5:h5py.File, dataset:dict):
 459    
 460    if dataset["name"] in hdf5.keys():
 461        del hdf5[dataset["name"]]
 462    
 463    hdf5.create_dataset(
 464                    dataset["name"],
 465                    shape=dataset["shape"],
 466                    dtype=dataset["dtype"],
 467                    data=dataset["dataset_value"],
 468                    compression="gzip",
 469                    chunks=True,
 470                )
 471    
 472    if "_"+dataset["name"] in list(hdf5.attrs.keys()):
 473        del hdf5.attrs["_"+dataset["name"]]
 474    
 475    hdf5.attrs["_"+dataset["name"]]=dataset["attr_value"]
 476
 477
 478def hdf5_dataset_creator(hdf5:h5py.File,name:str,value):
 479    """
 480    Write any value in an hdf5 object
 481
 482    Parameters
 483    ----------
 484    
 485    hdf5 : h5py.File
 486        an hdf5 object
 487    
 488    name : str
 489        name of the dataset
 490    
 491    value : any
 492        value to write in the hdf5
 493    
 494    """
 495    # save ndarray datast
 496    if isinstance(value, str):
 497        dataset=_hdf5_handle_str(name,value)
 498    
 499    elif isinstance(value, numbers.Number):
 500        dataset=_hdf5_handle_numbers(name,value)
 501    
 502    elif value is None:
 503        dataset=_hdf5_handle_none(name,value)
 504    
 505    elif isinstance(value, (pd.Timestamp, np.datetime64, datetime.date)):
 506        dataset=_hdf5_handle_timestamp(name,value)
 507    
 508    elif isinstance(value,  pd.DatetimeIndex):
 509        dataset=_hdf5_handle_DatetimeIndex(name,value)
 510        
 511    elif isinstance(value, list):
 512        dataset=_hdf5_handle_list(name,value)
 513    
 514    elif isinstance(value, tuple):
 515        dataset=_hdf5_handle_list(name,value)
 516    
 517    elif isinstance(value, np.ndarray):
 518        
 519        if len(value.dtype) > 0 and len(value.dtype.names) > 0:
 520            _hdf5_handle_ndarray(hdf5,name,value)
 521            return
 522        else:
 523            dataset=_hdf5_handle_array(name,value)
 524    
 525    else:
 526        
 527        hdf5 = add_hdf5_sub_group(
 528            hdf5, subgroup=name)
 529
 530        newdict = object_handler.read_object_as_dict(value)
 531
 532        save_dict_to_hdf5(hdf5[name], newdict)
 533    
 534    _hdf5_create_dataset(hdf5, dataset)
 535
 536
 537
 538def _dump_ndarray_to_hdf5(hdf5, value):
 539    """
 540    dump a ndarray data structure to an hdf5 file: this functions create a group ndarray_ds and store each component of the ndarray as a dataset. Plus it add 2 datasets which store the dtypes (ndarray_dtype) and labels (ndarray_indexes).
 541
 542    Parameters
 543    ----------
 544    
 545    hdf5 : h5py.File
 546        an hdf5 object
 547    
 548    value : ndarray
 549        an ndarray data structure with different datatype
 550    
 551    """
 552    # save ndarray datastructure
 553
 554    hdf5 = add_hdf5_sub_group(hdf5, subgroup="ndarray_ds")
 555    hdf5_data = hdf5["ndarray_ds"]
 556
 557    for item in value.dtype.names:
 558        
 559        hdf5_dataset_creator(hdf5=hdf5_data,name=item,value=value[item])
 560        
 561
 562    index = np.array(value.dtype.descr)[:, 0]
 563    dtype = np.array(value.dtype.descr)[:, 1]
 564    index = index.astype("O")
 565    dtype = dtype.astype("O")
 566    data_type = h5py.string_dtype(encoding='utf-8')
 567
 568    if "ndarray_dtype" in hdf5_data.keys():
 569        del hdf5_data["ndarray_dtype"]
 570
 571    hdf5_data.create_dataset(
 572        "ndarray_dtype",
 573        shape=dtype.shape,
 574        dtype=data_type,
 575        data=dtype,
 576        compression="gzip",
 577        chunks=True,
 578    )
 579
 580    if "ndarray_indexes" in hdf5_data.keys():
 581        del hdf5_data["ndarray_indexes"]
 582
 583    hdf5_data.create_dataset(
 584        "ndarray_indexes",
 585        shape=index.shape,
 586        dtype=data_type,
 587        data=index,
 588        compression="gzip",
 589        chunks=True,
 590    )
 591
 592
 593def _read_ndarray_datastructure(hdf5):
 594    """
 595    read a ndarray data structure from hdf5 file
 596
 597    Parameters
 598    ----------
 599    
 600    hdf5 : h5py.File
 601        an hdf5 object at the roots of the ndarray datastructure
 602
 603    Return
 604    ------
 605    
 606    ndarray : the ndarray
 607    
 608    """
 609
 610    if 'ndarray_ds' in list(hdf5.keys()):
 611
 612        decoded_item = list()
 613        for it in hdf5['ndarray_ds/ndarray_dtype'][:]:
 614            decoded_item.append(it.decode())
 615        list_dtypes = decoded_item
 616
 617        decoded_item = list()
 618        for it in hdf5['ndarray_ds/ndarray_indexes'][:]:
 619            decoded_item.append(it.decode())
 620        list_indexes = decoded_item
 621
 622        len_data = len(hdf5[f'ndarray_ds/{list_indexes[0]}'][:])
 623
 624        list_datatype = list()
 625        for i in range(len(list_indexes)):
 626            list_datatype.append((list_indexes[i], list_dtypes[i]))
 627
 628        datatype = np.dtype(list_datatype)
 629
 630        ndarray = np.zeros(len_data, dtype=datatype)
 631
 632        for i in range(len(list_indexes)):
 633
 634            expected_type=list_dtypes[i]
 635            
 636            values=hdf5_read_dataset(hdf5[f'ndarray_ds/{list_indexes[i]}'],expected_type)
 637            
 638
 639            ndarray[list_indexes[i]] = values
 640
 641        return ndarray
 642
 643
 644def save_dict_to_hdf5(hdf5, dictionary):
 645    """
 646    
 647    dump a dictionary to an hdf5 file
 648
 649    Parameters
 650    ----------
 651    
 652    hdf5 : h5py.File
 653        an hdf5 object
 654    
 655    dictionary : dict
 656        a custom python dictionary
 657    
 658    """
 659    if isinstance(dictionary, dict):
 660        for attr, value in dictionary.items():
 661            #print("looping:",attr,value)
 662            try:
 663
 664                attribute_name = str(attr)
 665                for character in '/ ':
 666                    attribute_name = attribute_name.replace(
 667                        character, '_')
 668
 669                if isinstance(value, dict):
 670                    #print("---> dictionary: ",attr, value)
 671
 672                    hdf5 = add_hdf5_sub_group(hdf5, subgroup=attribute_name)
 673                    save_dict_to_hdf5(hdf5[attribute_name], value)
 674
 675                else:
 676
 677                   hdf5_dataset_creator(hdf5,attribute_name,value)
 678                   
 679            except:
 680
 681                raise ValueError(
 682                    f"Unable to save attribute {str(attr)} with value {value}")
 683
 684    else:
 685
 686        raise ValueError(f"{dictionary} must be a instance of dict.")
 687
 688
 689
 690def save_dict_to_hdf5file(path_to_hdf5, dictionary=None, location="./", replace=False, wait_time=0):
 691    """
 692    
 693    dump a dictionary to an hdf5 file
 694
 695    Parameters
 696    ----------
 697    
 698    path_to_hdf5 : str
 699        path to the hdf5 file
 700    
 701    dictionary : dict | None
 702        a dictionary containing the data to be saved
 703    
 704    location : str
 705        path location or subgroup where to write data in the hdf5 file
 706    
 707    replace : Boolean
 708        replace an existing hdf5 file. Default is False
 709    
 710    wait_time: int
 711        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
 712
 713    Examples
 714    --------
 715    
 716    >>> setup, mesh = smash.load_dataset("cance")  
 717    >>> model = smash.Model(setup, mesh)  
 718    >>> model.run(inplace=True)  
 719    >>>  
 720    >>> smash.tools.hdf5_handler.save_dict_to_hdf5("saved_dictionary.hdf5",mesh)
 721    
 722    """
 723    if isinstance(dictionary, dict):
 724        hdf5 = open_hdf5(path_to_hdf5, replace=replace, wait_time=wait_time)
 725
 726        if hdf5 is None:
 727            return
 728
 729        hdf5 = add_hdf5_sub_group(hdf5, subgroup=location)
 730        save_dict_to_hdf5(hdf5[location], dictionary)
 731
 732    else:
 733        raise ValueError(f"The input {dictionary} must be a instance of dict.")
 734
 735    hdf5.close()
 736
 737
 738def save_object_to_hdf5file(
 739    path_to_hdf5, instance, keys_data=None, location="./", sub_data=None, replace=False, wait_time=0
 740):
 741    """
 742    
 743    dump an object to an hdf5 file
 744
 745    Parameters
 746    ----------
 747    
 748    path_to_hdf5 : str
 749        path to the hdf5 file
 750    
 751    instance : object
 752        A custom python object to be saved into an hdf5
 753    
 754    keys_data : list | dict
 755        optional, a list or a dictionary of the attribute to be saved
 756    
 757    location : str
 758        path location or subgroup where to write data in the hdf5 file
 759    
 760    sub_data : dict | None
 761        optional, a extra dictionary containing extra-data to be saved along the object
 762    
 763    replace : Boolean
 764        replace an existing hdf5 file. Default is False
 765    
 766    wait_time: int
 767        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
 768    
 769    """
 770
 771    if keys_data is None:
 772        keys_data = object_handler.generate_object_structure(instance)
 773
 774    # print(keys_data)
 775
 776    hdf5 = open_hdf5(path_to_hdf5, replace=replace, wait_time=wait_time)
 777
 778    if hdf5 is None:
 779        return None
 780
 781    hdf5 = add_hdf5_sub_group(hdf5, subgroup=location)
 782
 783    _dump_object_to_hdf5_from_iteratable(hdf5[location], instance, keys_data)
 784
 785    if isinstance(sub_data, dict):
 786        save_dict_to_hdf5(hdf5[location], sub_data)
 787
 788    hdf5.close()
 789
 790
 791def read_hdf5file_as_dict(path_to_hdf5, location="./", wait_time=0, read_attrs=True,read_dataset_attrs=False):
 792    """
 793    
 794    Open, read and close an hdf5 file
 795
 796    Parameters
 797    ----------
 798    
 799    path_to_hdf5 : str
 800        path to the hdf5 file
 801    
 802    location: str
 803        place in the hdf5 from which we start reading the file
 804    
 805    read_attrs : bool
 806        read and import attributes in the dicitonnary.
 807    
 808    read_dataset_attrs : bool
 809        read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original dataype of the data stored in the dataset.
 810
 811    Return
 812    --------
 813    
 814    dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file
 815    
 816    wait_time: int
 817        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
 818
 819    Examples
 820    --------
 821    
 822    read an hdf5 file  
 823    dictionary=hdf5_handler.read_hdf5file_as_dict(hdf5["model1"])  
 824    """
 825
 826    hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
 827
 828    if hdf5 is None:
 829        return None
 830
 831    dictionary = read_hdf5_as_dict(hdf5[location], read_attrs=read_attrs, read_dataset_attrs=read_dataset_attrs)
 832
 833    hdf5.close()
 834
 835    return dictionary
 836
 837
 838
 839def read_hdf5_as_dict(hdf5, read_attrs=True, read_dataset_attrs=False):
 840    """
 841    Load an hdf5 file
 842
 843    Parameters
 844    ----------
 845    
 846    hdf5 : h5py.File
 847        an instance of hdf5, open with the function open_hdf5()
 848    
 849    read_attrs : bool
 850        read and import attributes in the dicitonnary.
 851    
 852    read_dataset_attrs : bool
 853        read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original dataype of the data stored in the dataset.
 854    
 855    Return
 856    --------
 857    
 858    dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file
 859
 860    Examples
 861    --------
 862    
 863    read only a part of an hdf5 file  
 864    >>> hdf5=hdf5_handler.open_hdf5("./multi_model.hdf5")  
 865    >>> dictionary=hdf5_handler.read_hdf5_as_dict(hdf5["model1"])  
 866    >>> dictionary.keys()  
 867    
 868    """
 869
 870    if not isinstance(hdf5, (h5py.File, h5py.Group, h5py.Dataset, h5py.Datatype)):
 871        print('Error: input arg is not an instance of hdf5.File()')
 872        return {}
 873
 874    dictionary = {}
 875
 876    for key, item in hdf5.items():
 877
 878        if str(type(item)).find("group") != -1:
 879
 880            if key == 'ndarray_ds':
 881
 882                # dictionary.update({key: _read_ndarray_datastructure(hdf5)})
 883                return _read_ndarray_datastructure(hdf5)
 884
 885            else:
 886
 887                dictionary.update({key: read_hdf5_as_dict(item)})
 888
 889
 890        if str(type(item)).find("dataset") != -1:
 891            
 892            if "_"+key in hdf5.attrs.keys():
 893                expected_type=hdf5.attrs["_"+key]
 894                values=hdf5_read_dataset(item, expected_type)
 895                
 896            else:
 897                
 898                values=item[:]
 899                
 900            dictionary.update({key: values})
 901    
 902    
 903    list_attribute=[]
 904    if read_attrs or read_dataset_attrs:
 905        tmp_list_attribute=list(hdf5.attrs.keys())
 906        hdf5_item_matching_attributes=["_"+element for element in list(hdf5.keys())]
 907    
 908    if read_attrs:
 909        
 910        list_attribute.extend(list(filter( lambda l: l not in hdf5_item_matching_attributes, tmp_list_attribute)))
 911    
 912    if read_dataset_attrs:
 913        
 914        list_attribute.extend(list(filter( lambda l: l in hdf5_item_matching_attributes, tmp_list_attribute)))
 915    
 916    for key in list_attribute:
 917        dictionary.update({key: hdf5.attrs[key]})
 918    
 919    return dictionary
 920
 921
 922
 923def hdf5_read_dataset(item,expected_type=None):
 924    """
 925    Read a dataset stored in an hdf5 database
 926    
 927    Parameters
 928    ----------
 929    
 930    item : h5py.File
 931        an hdf5 dataset/item
 932    
 933    expected_type: str
 934        the expected dtype as string str(type())
 935    
 936    Return
 937    --------
 938    
 939    value : the value read from the hdf5, any type matching the expected type
 940    
 941    
 942    """
 943    
 944    if expected_type == str(type("str")):
 945        
 946        values=item[0].decode()
 947    
 948    elif expected_type == str(type(1.0)):
 949        
 950        values=item[0]
 951        
 952    elif expected_type == "_None_":
 953        
 954        values=None
 955        
 956    elif expected_type in (str(pd.Timestamp), str(np.datetime64), str(datetime.datetime)) :
 957
 958        if expected_type==str(pd.Timestamp):
 959            values=pd.Timestamp(item[0].decode())
 960            
 961        elif expected_type==str(np.datetime64):
 962            values=np.datetime64(item[0].decode())
 963            
 964        elif expected_type==str(datetime.datetime):
 965            values=datetime.datetime.fromisoformat(item[0].decode())
 966            
 967        else:
 968            values=item[0].decode()
 969    
 970    else:
 971        
 972        if item[:].dtype.char == "S":
 973
 974            values = item[:].astype("U")
 975
 976        elif item[:].dtype.char == "O":
 977
 978            # decode list if required
 979            decoded_item = list()
 980            for it in item[:]:
 981                
 982                decoded_item.append(it.decode())
 983                
 984            values = decoded_item
 985        
 986        else:
 987            values = item[:]
 988    
 989    return values
 990
 991
 992
 993def get_hdf5file_attribute(path_to_hdf5=str(), location="./", attribute=None, wait_time=0):
 994    """
 995    Get the value of an attribute in the hdf5file
 996
 997    Parameters
 998    ----------
 999    
1000    path_to_hdf5 : str
1001        the path to the hdf5file
1002    
1003    location : str
1004        path inside the hdf5 where the attribute is stored
1005    
1006    attribute: str
1007        attribute name
1008    
1009    wait_time: int
1010        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1011
1012    Return
1013    --------
1014    
1015    return_attribute : the value of the attribute
1016
1017    Examples
1018    --------
1019    
1020    get an attribute  
1021    >>> attribute=hdf5_handler.get_hdf5_attribute("./multi_model.hdf5",attribute=my_attribute_name)  
1022    
1023    """
1024
1025    hdf5_base = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1026
1027    if hdf5_base is None:
1028        return None
1029
1030    hdf5 = hdf5_base[location]
1031
1032    return_attribute = hdf5.attrs[attribute]
1033
1034    hdf5_base.close()
1035
1036    return return_attribute
1037
1038
1039def get_hdf5file_dataset(path_to_hdf5=str(), location="./", dataset=None, wait_time=0):
1040    """
1041    Get the value of an attribute in the hdf5file
1042
1043    Parameters
1044    ----------
1045    
1046    path_to_hdf5 : str
1047        the path to the hdf5file
1048    
1049    location : str
1050        path inside the hdf5 where the attribute is stored
1051    
1052    dataset: str
1053        dataset name
1054    
1055    wait_time: int
1056        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1057
1058    Return
1059    --------
1060    
1061    return_dataset : the value of the attribute
1062
1063    Examples
1064    --------
1065    
1066    get a dataset  
1067    >>> dataset=hdf5_handler.get_hdf5_dataset("./multi_model.hdf5",dataset=my_dataset_name)  
1068    
1069    """
1070
1071    hdf5_base = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1072
1073    if hdf5_base is None:
1074        return None
1075
1076    hdf5 = hdf5_base[location]
1077    
1078    if "_"+dataset in hdf5.attrs.keys():
1079        expected_type=hdf5.attrs["_"+dataset]
1080        return_dataset=hdf5_read_dataset(hdf5, expected_type)
1081        
1082    else:
1083        return_dataset = hdf5[dataset][:]
1084
1085    hdf5_base.close()
1086
1087    return return_dataset
1088
1089
1090def get_hdf5file_item(path_to_hdf5=str(), location="./", item=None, wait_time=0, search_attrs=False):
1091    """
1092    
1093    Get a custom item in an hdf5file
1094
1095    Parameters
1096    ----------
1097    
1098    path_to_hdf5 : str
1099        the path to the hdf5file
1100    
1101    location : str
1102        path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location)
1103    
1104    item: str
1105        item name
1106    
1107    wait_time: int
1108        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1109    
1110    search_attrs: bool
1111        Default is False. If True, the function will also search in the item in the attribute first.
1112
1113    Return
1114    --------
1115    
1116    return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ...
1117
1118    Examples
1119    --------
1120    
1121    get the dataset 'dataset'  
1122    >>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset")  
1123    
1124    """
1125
1126    hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1127
1128    if hdf5 is None:
1129        return None
1130
1131    hdf5_item = get_hdf5_item(hdf5_instance=hdf5, location=location, item=item, search_attrs=search_attrs)
1132
1133    hdf5.close()
1134
1135    return hdf5_item
1136
1137
1138def get_hdf5_item(hdf5_instance=None, location="./", item=None, search_attrs=False):
1139    """
1140    
1141    Get a custom item in an hdf5file
1142
1143    Parameters
1144    ----------
1145
1146    hdf5_instance : h5py.File
1147        an instance of an hdf5
1148    
1149    location : str
1150        path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location)
1151    
1152    item: str
1153        item name
1154    
1155    search_attrs: bool
1156        Default is False. If True, the function will search in the item in the attribute first.
1157
1158    Return
1159    ------
1160    
1161    return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ...
1162
1163    Examples
1164    --------
1165    
1166    get the dataset 'dataset'  
1167    >>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset")  
1168    
1169    """
1170
1171    if item is None and isinstance(location, str):
1172        head, tail = os.path.split(location)
1173        if len(tail) > 0:
1174            item = tail
1175        location = head
1176
1177    if not isinstance(item, str):
1178        print(f"Bad search item:{item}")
1179        return None
1180
1181        return None
1182
1183    # print(f"Getting item '{item}' at location '{location}'")
1184    hdf5 = hdf5_instance[location]
1185
1186    # first search in the attribute
1187    if search_attrs:
1188        list_attribute = hdf5.attrs.keys()
1189        if item in list_attribute:
1190            return hdf5.attrs[item]
1191
1192    # then search in groups and dataset
1193    list_keys = hdf5.keys()
1194    if item in list_keys:
1195
1196        hdf5_item = hdf5[item]
1197
1198        # print("Got Item ", hdf5_item)
1199
1200        if str(type(hdf5_item)).find("group") != -1:
1201            
1202            if item == 'ndarray_ds':
1203
1204                return _read_ndarray_datastructure(hdf5)
1205            
1206            else:
1207                
1208                returned_dict = read_hdf5_as_dict(hdf5_item)
1209                
1210                return returned_dict
1211
1212        elif str(type(hdf5_item)).find("dataset") != -1:
1213            
1214            if "_"+item in hdf5.attrs.keys():
1215                expected_type=hdf5.attrs["_"+item]
1216                values=hdf5_read_dataset(hdf5_item, expected_type)
1217            else:
1218                values=hdf5_item[:]
1219
1220            return values
1221
1222        else:
1223
1224            return hdf5_item
1225
1226    else:
1227
1228        return None
1229
1230
1231def search_in_hdf5file(path_to_hdf5, key=None, location="./", wait_time=0, search_attrs=False):
1232    """
1233    
1234    Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)
1235
1236    Parameters
1237    ----------
1238    
1239    path_to_hdf5 : str
1240        the path to the hdf5file
1241    
1242    key: str
1243        key to search in the hdf5file
1244    
1245    location : str
1246        path inside the hdf5 where to start the research
1247    
1248    wait_time: int
1249        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1250    
1251    search_attrs : Bool
1252        Default false, search in the attributes
1253    
1254    Return
1255    ------
1256    
1257    return_dataset : the value of the attribute
1258
1259    Examples
1260    --------
1261    
1262    search in a hdf5file  
1263    >>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./")  
1264    
1265    """
1266    if key is None:
1267        print("Nothing to search, use key=")
1268        return []
1269
1270    hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1271
1272    if hdf5 is None:
1273        return None
1274
1275    results = search_in_hdf5(hdf5, key, location=location, search_attrs=search_attrs)
1276
1277    hdf5.close()
1278
1279    return results
1280
1281
1282def search_in_hdf5(hdf5_base, key=None, location="./", search_attrs=False):
1283    """
1284    
1285    Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)
1286
1287    Parameters
1288    ----------
1289    
1290    hdf5_base : h5py.File
1291        opened instance of the hdf5
1292    
1293    key: str
1294        key to search in the hdf5file
1295    
1296    location : str
1297        path inside the hdf5 where to start the research
1298    
1299    search_attrs : Bool
1300        Default false, search in the attributes
1301    
1302    Return
1303    ------
1304    
1305    return_dataset : the value of the attribute
1306
1307    Examples
1308    --------
1309    
1310    search in a hdf5  
1311    >>> hdf5=hdf5_handler.open_hdf5(hdf5_file)  
1312    >>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./")  
1313    >>> hdf5.close()  
1314    
1315    """
1316    if key is None:
1317        print("Nothing to search, use key=")
1318        return []
1319
1320    result = []
1321
1322    hdf5 = hdf5_base[location]
1323
1324    if search_attrs:
1325        list_attribute = hdf5.attrs.keys()
1326    
1327        if key in list_attribute:
1328            result.append({"path": location, "key": key,
1329                          "datatype": "attribute", "value": hdf5.attrs[key]})
1330        
1331    for hdf5_key, item in hdf5.items():
1332
1333        if str(type(item)).find("group") != -1:
1334
1335            sub_location = os.path.join(location, hdf5_key)
1336            
1337            # print(hdf5_key,sub_location,list(hdf5.keys()))
1338            
1339            if hdf5_key == key:
1340                
1341                if "ndarray_ds" in item.keys():
1342                    
1343                    result.append({"path": sub_location, 
1344                                   "key": None,
1345                                   "datatype": "ndarray", 
1346                                   "value": _read_ndarray_datastructure(item)})
1347                    
1348                else:
1349                    
1350                    result.append({"path": sub_location, "key": None,
1351                              "datatype": "group", "value": None})
1352
1353            res = search_in_hdf5(hdf5_base, key, sub_location)
1354            
1355            if len(res) > 0:
1356                for element in res:
1357                    result.append(element)
1358                
1359
1360        if str(type(item)).find("dataset") != -1:
1361
1362            if hdf5_key == key:
1363
1364                if item[:].dtype.char == "S":
1365
1366                    values = item[:].astype("U")
1367
1368                elif item[:].dtype.char == "O":
1369
1370                    # decode list if required
1371                    decoded_item = list()
1372                    for it in item[:]:
1373                        decoded_item.append(it.decode())
1374
1375                    values = decoded_item
1376
1377                else:
1378
1379                    values = item[:]
1380                
1381                result.append({"path": location, 
1382                               "key": key,
1383                              "datatype": "dataset", 
1384                              "value": values})
1385    
1386    return result
1387
1388
1389def hdf5file_view(path_to_hdf5, location="./", max_depth=None, level_base='>', level_sep="--", depth=None, wait_time=0, list_attrs=True, list_dataset_attrs=False, return_view=False):
1390    """
1391    
1392    Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)
1393
1394    Parameters
1395    ----------
1396    
1397    
1398    path_to_hdf5 : str
1399        Path to an hdf5 database
1400    
1401    location : str
1402        path inside the hdf5 where to start the research
1403    
1404    max_depth: str
1405        Max deph of the search in the hdf5
1406    
1407    level_base: str
1408        string used as separator at the lower level (default '>')
1409    
1410    level_sep: str
1411        string used as separator at higher level (default '--')
1412    
1413    depth: int
1414        current depth level
1415    
1416    list_attrs: bool
1417        default is True, list the attributes
1418    
1419    list_dataset_attrs: bool
1420        default is False, list the special attributes defined for each dataset by pyhdf5_handler
1421    
1422    return_view: bool
1423        retrun the object view in a dictionnary (do not print at screen)
1424    
1425    wait_time: int
1426        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1427
1428    Return
1429    --------
1430    
1431    dictionnary : optional, the view of the hdf5
1432
1433    Examples
1434    --------
1435    
1436    search in a hdf5file  
1437    >>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./")  
1438    
1439    """
1440
1441    hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1442
1443    if hdf5 is None:
1444        return None
1445
1446    results = hdf5_view(hdf5, 
1447                        location=location, 
1448                        max_depth=max_depth,
1449                        level_base=level_base, 
1450                        level_sep=level_sep, 
1451                        depth=depth,
1452                        list_attrs=list_attrs,
1453                        list_dataset_attrs=list_dataset_attrs,
1454                        return_view=return_view)
1455
1456    hdf5.close()
1457
1458    return results
1459
1460
1461
1462def hdf5file_ls(path_to_hdf5, location='./'):
1463    """
1464    List dataset in an hdf5file. 
1465
1466    Parameters
1467    ----------
1468    
1469    path_to_hdf5 : str
1470        path to a hdf5file
1471    
1472    location: str
1473        path inside the hdf5 where to start the research
1474    
1475    Example
1476    -------
1477    
1478    >>> hdf5file_ls(test.hdf5)
1479    
1480    """
1481    
1482    hdf5 = open_hdf5(path_to_hdf5, read_only=True)
1483    
1484    hdf5_view(hdf5, location=location, max_depth=0, level_base='>', level_sep="--", list_attrs=False, return_view=False)
1485
1486
1487
1488def hdf5_ls(hdf5):
1489    """
1490    List dataset in an hdf5 instance.
1491
1492    Parameters
1493    ----------
1494    
1495    hdf5 : h5py.File
1496        hdf5 instance
1497    
1498    location: str
1499        path inside the hdf5 where to start the research
1500    
1501    Example
1502    -------
1503    
1504    >>> hdf5 = open_hdf5(path_to_hdf5, read_only=True)
1505    >>> hdf5_ls(hdf5)
1506    
1507    """
1508    
1509    hdf5_view(hdf5, location="./", max_depth=0, level_base='>', level_sep="--", list_attrs=False, return_view=False)
1510
1511
1512
1513def hdf5_view(hdf5_obj, location="./", max_depth=None, level_base='>', level_sep="--", depth=None, list_attrs=True, list_dataset_attrs=False, return_view=False):
1514    """
1515    List recursively all dataset (and attributes) in an hdf5 object.
1516
1517    Parameters
1518    ----------
1519    
1520    hdf5_obj : h5py.File
1521        opened instance of the hdf5
1522    
1523    location : str
1524        path inside the hdf5 where to start the research
1525    
1526    max_depth: str
1527        Max deph of the search in the hdf5
1528    
1529    level_base: str
1530        string used as separator at the lower level (default '>')
1531    
1532    level_sep: str
1533        string used as separator at higher level (default '--')
1534    
1535    depth: int
1536        current level depth
1537    
1538    list_attrs: bool
1539        default is True, list the attributes
1540    
1541    list_dataset_attrs: bool
1542        default is False, list the special attributes defined for each dataset by pyhdf5_handler
1543    
1544    return_view: bool
1545        retrun the object view in a dictionnary
1546    
1547    Return
1548    --------
1549    
1550    dictionnary : optional, the view of the hdf5
1551
1552    Examples
1553    --------
1554    
1555    search in a hdf5  
1556    >>> hdf5=hdf5_handler.open_hdf5(hdf5_file)  
1557    >>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./")  
1558    >>> hdf5.close()  
1559    
1560    """
1561
1562    result = []
1563
1564    if max_depth is not None:
1565
1566        if depth is not None:
1567            depth = depth+1
1568        else:
1569            depth = 0
1570
1571        if depth > max_depth:
1572            return result
1573
1574    hdf5 = hdf5_obj[location]
1575    
1576    list_attribute=[]
1577    if list_attrs or list_dataset_attrs:
1578        tmp_list_attribute = list(hdf5.attrs.keys())
1579        list_keys_matching_attributes=["_" + element for element in list(hdf5.keys())]
1580    
1581    if list_attrs:
1582        
1583        list_attribute.extend(list(filter( lambda l: l not in list_keys_matching_attributes, tmp_list_attribute)))
1584    
1585    if list_dataset_attrs:
1586        
1587        list_attribute.extend(list(filter( lambda l: l in list_keys_matching_attributes, tmp_list_attribute)))
1588    
1589    
1590    for key in list_attribute:
1591        values = hdf5.attrs[key]
1592        sub_location = os.path.join(location, key)
1593        if isinstance(values, (int, float, np.int64, np.float64, np.int32, np.float32, np.bool)):
1594            result.append(
1595                f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, value={values}")
1596        elif isinstance(values, (str)) and len(values) < 20:
1597            result.append(
1598                f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, len={len(values)}, value={values}")
1599        else:
1600            result.append(
1601                f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, len={len(values)}, value={values[0:20]}...")
1602            
1603        if not return_view:
1604            if len(result)>0:
1605                print(result[-1])
1606
1607    for hdf5_key, item in hdf5.items():
1608
1609        if str(type(item)).find("group") != -1:
1610            
1611            sub_location = os.path.join(location, hdf5_key)
1612            
1613            if "ndarray_ds" in item.keys():
1614                result.append(f"{level_base}| {sub_location}, ndarray")
1615            else:
1616                result.append(f"{level_base}| {sub_location}, group")
1617            
1618            if not return_view:
1619                if len(result)>0:
1620                    print(result[-1])
1621
1622            res = hdf5_view(hdf5_obj, sub_location, max_depth=max_depth,
1623                            level_base=level_base+level_sep, depth=depth, return_view=True)
1624
1625            # if len(res)>0:
1626            for key, item in enumerate(res):
1627                result.append(item)
1628
1629        if str(type(item)).find("dataset") != -1:
1630
1631            if item[:].dtype.char == "S":
1632                values = item[:].astype("U")
1633            else:
1634                values = item[:]
1635
1636            sub_location = os.path.join(location, hdf5_key)
1637            # result.append({"path":location, "key":key, "datatype":"dataset","value":values})
1638            result.append(
1639                f"{level_base}| {sub_location}, dataset, type={type(values)}, shape={values.shape}")
1640            
1641            if not return_view:
1642                if len(result)>0:
1643                    print(result[-1])
1644    
1645    if return_view:
1646        return result
def close_all_hdf5_file():
17def close_all_hdf5_file():
18
19    for obj in gc.get_objects():   # Browse through ALL objects
20        if isinstance(obj, h5py.File):   # Just HDF5 files
21            try:
22                print(f"try closing {obj}")
23                obj.close()
24            except:
25                pass  # Was already closed
def open_hdf5(path, read_only=False, replace=False, wait_time=0):
 28def open_hdf5(path, read_only=False, replace=False, wait_time=0):
 29    """
 30    
 31    Open or create an HDF5 file.
 32
 33    Parameters
 34    ----------
 35    
 36    path : str
 37        The file path.
 38    
 39    read_only : boolean
 40        If true the access to the hdf5 fil is in read-only mode. Multi process can read the same hdf5 file simulteneously. This is not possible when access mode are append 'a' or write 'w'.
 41    
 42    replace: Boolean
 43        If true, the existing hdf5file is erased
 44    
 45    wait_time: int
 46        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
 47
 48    Returns
 49    -------
 50    
 51    f :
 52        A HDF5 object.
 53
 54    Examples
 55    --------
 56    
 57    >>> hdf5=smash.tools.hdf5_handler.open_hdf5("./my_hdf5.hdf5")  
 58    >>> hdf5.keys()  
 59    >>> hdf5.attrs.keys()  
 60    
 61    """
 62    f = None
 63    wait = 0
 64    while wait <= wait_time:
 65
 66        f = None
 67        exist_file=True
 68        
 69        try:
 70
 71            if read_only:
 72                if os.path.isfile(path):
 73                    f = h5py.File(path, "r")
 74
 75                else:
 76                    exist_file=False
 77                    raise ValueError(f"File {path} does not exist.")
 78
 79            else:
 80                if replace:
 81                    f = h5py.File(path, "w")
 82
 83                else:
 84                    if os.path.isfile(path):
 85                        f = h5py.File(path, "a")
 86
 87                    else:
 88                        f = h5py.File(path, "w")
 89        except:
 90            pass
 91
 92        if f is None:
 93            if not exist_file:
 94                print(f"File {path} does not exist.")
 95                return f
 96            else:
 97                print(
 98                    f"The file {path} is unvailable, waiting {wait}/{wait_time}s")
 99
100            wait = wait + 1
101
102            if wait_time > 0:
103                time.sleep(1)
104
105        else:
106            break
107
108    return f

Open or create an HDF5 file.

Parameters

path : str The file path.

read_only : boolean If true the access to the hdf5 fil is in read-only mode. Multi process can read the same hdf5 file simulteneously. This is not possible when access mode are append 'a' or write 'w'.

replace: Boolean If true, the existing hdf5file is erased

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

Returns

f : A HDF5 object.

Examples

>>> hdf5=smash.tools.hdf5_handler.open_hdf5("./my_hdf5.hdf5")  
>>> hdf5.keys()  
>>> hdf5.attrs.keys()
def add_hdf5_sub_group(hdf5, subgroup=None):
111def add_hdf5_sub_group(hdf5, subgroup=None):
112    """
113    Create a new subgroup in a HDF5 object
114
115    Parameters
116    ----------
117    
118    hdf5 : h5py.File
119        An hdf5 object opened with open_hdf5()
120    
121    subgroup: str
122        Path to a subgroub that must be created
123
124    Returns
125    -------
126    
127    hdf5 :
128        the HDF5 object.
129
130    Examples
131    --------
132    
133    >>> hdf5=smash.tools.hdf5_handler.open_hdf5("./model_subgroup.hdf5", replace=True)  
134    >>> hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="mygroup")  
135    >>> hdf5.keys()  
136    >>> hdf5.attrs.keys()  
137    
138    """
139    if subgroup is not None:
140        if subgroup == "":
141            subgroup = "./"
142
143        hdf5.require_group(subgroup)
144
145    return hdf5

Create a new subgroup in a HDF5 object

Parameters

hdf5 : h5py.File An hdf5 object opened with open_hdf5()

subgroup: str Path to a subgroub that must be created

Returns

hdf5 : the HDF5 object.

Examples

>>> hdf5=smash.tools.hdf5_handler.open_hdf5("./model_subgroup.hdf5", replace=True)  
>>> hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="mygroup")  
>>> hdf5.keys()  
>>> hdf5.attrs.keys()
def hdf5_dataset_creator(hdf5: h5py._hl.files.File, name: str, value):
480def hdf5_dataset_creator(hdf5:h5py.File,name:str,value):
481    """
482    Write any value in an hdf5 object
483
484    Parameters
485    ----------
486    
487    hdf5 : h5py.File
488        an hdf5 object
489    
490    name : str
491        name of the dataset
492    
493    value : any
494        value to write in the hdf5
495    
496    """
497    # save ndarray datast
498    if isinstance(value, str):
499        dataset=_hdf5_handle_str(name,value)
500    
501    elif isinstance(value, numbers.Number):
502        dataset=_hdf5_handle_numbers(name,value)
503    
504    elif value is None:
505        dataset=_hdf5_handle_none(name,value)
506    
507    elif isinstance(value, (pd.Timestamp, np.datetime64, datetime.date)):
508        dataset=_hdf5_handle_timestamp(name,value)
509    
510    elif isinstance(value,  pd.DatetimeIndex):
511        dataset=_hdf5_handle_DatetimeIndex(name,value)
512        
513    elif isinstance(value, list):
514        dataset=_hdf5_handle_list(name,value)
515    
516    elif isinstance(value, tuple):
517        dataset=_hdf5_handle_list(name,value)
518    
519    elif isinstance(value, np.ndarray):
520        
521        if len(value.dtype) > 0 and len(value.dtype.names) > 0:
522            _hdf5_handle_ndarray(hdf5,name,value)
523            return
524        else:
525            dataset=_hdf5_handle_array(name,value)
526    
527    else:
528        
529        hdf5 = add_hdf5_sub_group(
530            hdf5, subgroup=name)
531
532        newdict = object_handler.read_object_as_dict(value)
533
534        save_dict_to_hdf5(hdf5[name], newdict)
535    
536    _hdf5_create_dataset(hdf5, dataset)

Write any value in an hdf5 object

Parameters

hdf5 : h5py.File an hdf5 object

name : str name of the dataset

value : any value to write in the hdf5

def save_dict_to_hdf5(hdf5, dictionary):
646def save_dict_to_hdf5(hdf5, dictionary):
647    """
648    
649    dump a dictionary to an hdf5 file
650
651    Parameters
652    ----------
653    
654    hdf5 : h5py.File
655        an hdf5 object
656    
657    dictionary : dict
658        a custom python dictionary
659    
660    """
661    if isinstance(dictionary, dict):
662        for attr, value in dictionary.items():
663            #print("looping:",attr,value)
664            try:
665
666                attribute_name = str(attr)
667                for character in '/ ':
668                    attribute_name = attribute_name.replace(
669                        character, '_')
670
671                if isinstance(value, dict):
672                    #print("---> dictionary: ",attr, value)
673
674                    hdf5 = add_hdf5_sub_group(hdf5, subgroup=attribute_name)
675                    save_dict_to_hdf5(hdf5[attribute_name], value)
676
677                else:
678
679                   hdf5_dataset_creator(hdf5,attribute_name,value)
680                   
681            except:
682
683                raise ValueError(
684                    f"Unable to save attribute {str(attr)} with value {value}")
685
686    else:
687
688        raise ValueError(f"{dictionary} must be a instance of dict.")

dump a dictionary to an hdf5 file

Parameters

hdf5 : h5py.File an hdf5 object

dictionary : dict a custom python dictionary

def save_dict_to_hdf5file( path_to_hdf5, dictionary=None, location='./', replace=False, wait_time=0):
692def save_dict_to_hdf5file(path_to_hdf5, dictionary=None, location="./", replace=False, wait_time=0):
693    """
694    
695    dump a dictionary to an hdf5 file
696
697    Parameters
698    ----------
699    
700    path_to_hdf5 : str
701        path to the hdf5 file
702    
703    dictionary : dict | None
704        a dictionary containing the data to be saved
705    
706    location : str
707        path location or subgroup where to write data in the hdf5 file
708    
709    replace : Boolean
710        replace an existing hdf5 file. Default is False
711    
712    wait_time: int
713        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
714
715    Examples
716    --------
717    
718    >>> setup, mesh = smash.load_dataset("cance")  
719    >>> model = smash.Model(setup, mesh)  
720    >>> model.run(inplace=True)  
721    >>>  
722    >>> smash.tools.hdf5_handler.save_dict_to_hdf5("saved_dictionary.hdf5",mesh)
723    
724    """
725    if isinstance(dictionary, dict):
726        hdf5 = open_hdf5(path_to_hdf5, replace=replace, wait_time=wait_time)
727
728        if hdf5 is None:
729            return
730
731        hdf5 = add_hdf5_sub_group(hdf5, subgroup=location)
732        save_dict_to_hdf5(hdf5[location], dictionary)
733
734    else:
735        raise ValueError(f"The input {dictionary} must be a instance of dict.")
736
737    hdf5.close()

dump a dictionary to an hdf5 file

Parameters

path_to_hdf5 : str path to the hdf5 file

dictionary : dict | None a dictionary containing the data to be saved

location : str path location or subgroup where to write data in the hdf5 file

replace : Boolean replace an existing hdf5 file. Default is False

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

Examples

>>> setup, mesh = smash.load_dataset("cance")  
>>> model = smash.Model(setup, mesh)  
>>> model.run(inplace=True)  
>>>  
>>> smash.tools.hdf5_handler.save_dict_to_hdf5("saved_dictionary.hdf5",mesh)
def save_object_to_hdf5file( path_to_hdf5, instance, keys_data=None, location='./', sub_data=None, replace=False, wait_time=0):
740def save_object_to_hdf5file(
741    path_to_hdf5, instance, keys_data=None, location="./", sub_data=None, replace=False, wait_time=0
742):
743    """
744    
745    dump an object to an hdf5 file
746
747    Parameters
748    ----------
749    
750    path_to_hdf5 : str
751        path to the hdf5 file
752    
753    instance : object
754        A custom python object to be saved into an hdf5
755    
756    keys_data : list | dict
757        optional, a list or a dictionary of the attribute to be saved
758    
759    location : str
760        path location or subgroup where to write data in the hdf5 file
761    
762    sub_data : dict | None
763        optional, a extra dictionary containing extra-data to be saved along the object
764    
765    replace : Boolean
766        replace an existing hdf5 file. Default is False
767    
768    wait_time: int
769        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
770    
771    """
772
773    if keys_data is None:
774        keys_data = object_handler.generate_object_structure(instance)
775
776    # print(keys_data)
777
778    hdf5 = open_hdf5(path_to_hdf5, replace=replace, wait_time=wait_time)
779
780    if hdf5 is None:
781        return None
782
783    hdf5 = add_hdf5_sub_group(hdf5, subgroup=location)
784
785    _dump_object_to_hdf5_from_iteratable(hdf5[location], instance, keys_data)
786
787    if isinstance(sub_data, dict):
788        save_dict_to_hdf5(hdf5[location], sub_data)
789
790    hdf5.close()

dump an object to an hdf5 file

Parameters

path_to_hdf5 : str path to the hdf5 file

instance : object A custom python object to be saved into an hdf5

keys_data : list | dict optional, a list or a dictionary of the attribute to be saved

location : str path location or subgroup where to write data in the hdf5 file

sub_data : dict | None optional, a extra dictionary containing extra-data to be saved along the object

replace : Boolean replace an existing hdf5 file. Default is False

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

def read_hdf5file_as_dict( path_to_hdf5, location='./', wait_time=0, read_attrs=True, read_dataset_attrs=False):
793def read_hdf5file_as_dict(path_to_hdf5, location="./", wait_time=0, read_attrs=True,read_dataset_attrs=False):
794    """
795    
796    Open, read and close an hdf5 file
797
798    Parameters
799    ----------
800    
801    path_to_hdf5 : str
802        path to the hdf5 file
803    
804    location: str
805        place in the hdf5 from which we start reading the file
806    
807    read_attrs : bool
808        read and import attributes in the dicitonnary.
809    
810    read_dataset_attrs : bool
811        read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original dataype of the data stored in the dataset.
812
813    Return
814    --------
815    
816    dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file
817    
818    wait_time: int
819        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
820
821    Examples
822    --------
823    
824    read an hdf5 file  
825    dictionary=hdf5_handler.read_hdf5file_as_dict(hdf5["model1"])  
826    """
827
828    hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
829
830    if hdf5 is None:
831        return None
832
833    dictionary = read_hdf5_as_dict(hdf5[location], read_attrs=read_attrs, read_dataset_attrs=read_dataset_attrs)
834
835    hdf5.close()
836
837    return dictionary

Open, read and close an hdf5 file

Parameters

path_to_hdf5 : str path to the hdf5 file

location: str place in the hdf5 from which we start reading the file

read_attrs : bool read and import attributes in the dicitonnary.

read_dataset_attrs : bool read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original dataype of the data stored in the dataset.

Return

dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

Examples

read an hdf5 file
dictionary=hdf5_handler.read_hdf5file_as_dict(hdf5["model1"])

def read_hdf5_as_dict(hdf5, read_attrs=True, read_dataset_attrs=False):
841def read_hdf5_as_dict(hdf5, read_attrs=True, read_dataset_attrs=False):
842    """
843    Load an hdf5 file
844
845    Parameters
846    ----------
847    
848    hdf5 : h5py.File
849        an instance of hdf5, open with the function open_hdf5()
850    
851    read_attrs : bool
852        read and import attributes in the dicitonnary.
853    
854    read_dataset_attrs : bool
855        read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original dataype of the data stored in the dataset.
856    
857    Return
858    --------
859    
860    dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file
861
862    Examples
863    --------
864    
865    read only a part of an hdf5 file  
866    >>> hdf5=hdf5_handler.open_hdf5("./multi_model.hdf5")  
867    >>> dictionary=hdf5_handler.read_hdf5_as_dict(hdf5["model1"])  
868    >>> dictionary.keys()  
869    
870    """
871
872    if not isinstance(hdf5, (h5py.File, h5py.Group, h5py.Dataset, h5py.Datatype)):
873        print('Error: input arg is not an instance of hdf5.File()')
874        return {}
875
876    dictionary = {}
877
878    for key, item in hdf5.items():
879
880        if str(type(item)).find("group") != -1:
881
882            if key == 'ndarray_ds':
883
884                # dictionary.update({key: _read_ndarray_datastructure(hdf5)})
885                return _read_ndarray_datastructure(hdf5)
886
887            else:
888
889                dictionary.update({key: read_hdf5_as_dict(item)})
890
891
892        if str(type(item)).find("dataset") != -1:
893            
894            if "_"+key in hdf5.attrs.keys():
895                expected_type=hdf5.attrs["_"+key]
896                values=hdf5_read_dataset(item, expected_type)
897                
898            else:
899                
900                values=item[:]
901                
902            dictionary.update({key: values})
903    
904    
905    list_attribute=[]
906    if read_attrs or read_dataset_attrs:
907        tmp_list_attribute=list(hdf5.attrs.keys())
908        hdf5_item_matching_attributes=["_"+element for element in list(hdf5.keys())]
909    
910    if read_attrs:
911        
912        list_attribute.extend(list(filter( lambda l: l not in hdf5_item_matching_attributes, tmp_list_attribute)))
913    
914    if read_dataset_attrs:
915        
916        list_attribute.extend(list(filter( lambda l: l in hdf5_item_matching_attributes, tmp_list_attribute)))
917    
918    for key in list_attribute:
919        dictionary.update({key: hdf5.attrs[key]})
920    
921    return dictionary

Load an hdf5 file

Parameters

hdf5 : h5py.File an instance of hdf5, open with the function open_hdf5()

read_attrs : bool read and import attributes in the dicitonnary.

read_dataset_attrs : bool read and import special attributes linked to any dataset and created by pyhdf5_handler. These attributes only store the original dataype of the data stored in the dataset.

Return

dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file

Examples

read only a part of an hdf5 file

>>> hdf5=hdf5_handler.open_hdf5("./multi_model.hdf5")  
>>> dictionary=hdf5_handler.read_hdf5_as_dict(hdf5["model1"])  
>>> dictionary.keys()
def hdf5_read_dataset(item, expected_type=None):
925def hdf5_read_dataset(item,expected_type=None):
926    """
927    Read a dataset stored in an hdf5 database
928    
929    Parameters
930    ----------
931    
932    item : h5py.File
933        an hdf5 dataset/item
934    
935    expected_type: str
936        the expected dtype as string str(type())
937    
938    Return
939    --------
940    
941    value : the value read from the hdf5, any type matching the expected type
942    
943    
944    """
945    
946    if expected_type == str(type("str")):
947        
948        values=item[0].decode()
949    
950    elif expected_type == str(type(1.0)):
951        
952        values=item[0]
953        
954    elif expected_type == "_None_":
955        
956        values=None
957        
958    elif expected_type in (str(pd.Timestamp), str(np.datetime64), str(datetime.datetime)) :
959
960        if expected_type==str(pd.Timestamp):
961            values=pd.Timestamp(item[0].decode())
962            
963        elif expected_type==str(np.datetime64):
964            values=np.datetime64(item[0].decode())
965            
966        elif expected_type==str(datetime.datetime):
967            values=datetime.datetime.fromisoformat(item[0].decode())
968            
969        else:
970            values=item[0].decode()
971    
972    else:
973        
974        if item[:].dtype.char == "S":
975
976            values = item[:].astype("U")
977
978        elif item[:].dtype.char == "O":
979
980            # decode list if required
981            decoded_item = list()
982            for it in item[:]:
983                
984                decoded_item.append(it.decode())
985                
986            values = decoded_item
987        
988        else:
989            values = item[:]
990    
991    return values

Read a dataset stored in an hdf5 database

Parameters

item : h5py.File an hdf5 dataset/item

expected_type: str the expected dtype as string str(type())

Return

value : the value read from the hdf5, any type matching the expected type

def get_hdf5file_attribute(path_to_hdf5='', location='./', attribute=None, wait_time=0):
 995def get_hdf5file_attribute(path_to_hdf5=str(), location="./", attribute=None, wait_time=0):
 996    """
 997    Get the value of an attribute in the hdf5file
 998
 999    Parameters
1000    ----------
1001    
1002    path_to_hdf5 : str
1003        the path to the hdf5file
1004    
1005    location : str
1006        path inside the hdf5 where the attribute is stored
1007    
1008    attribute: str
1009        attribute name
1010    
1011    wait_time: int
1012        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1013
1014    Return
1015    --------
1016    
1017    return_attribute : the value of the attribute
1018
1019    Examples
1020    --------
1021    
1022    get an attribute  
1023    >>> attribute=hdf5_handler.get_hdf5_attribute("./multi_model.hdf5",attribute=my_attribute_name)  
1024    
1025    """
1026
1027    hdf5_base = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1028
1029    if hdf5_base is None:
1030        return None
1031
1032    hdf5 = hdf5_base[location]
1033
1034    return_attribute = hdf5.attrs[attribute]
1035
1036    hdf5_base.close()
1037
1038    return return_attribute

Get the value of an attribute in the hdf5file

Parameters

path_to_hdf5 : str the path to the hdf5file

location : str path inside the hdf5 where the attribute is stored

attribute: str attribute name

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

Return

return_attribute : the value of the attribute

Examples

get an attribute

>>> attribute=hdf5_handler.get_hdf5_attribute("./multi_model.hdf5",attribute=my_attribute_name)
def get_hdf5file_dataset(path_to_hdf5='', location='./', dataset=None, wait_time=0):
1041def get_hdf5file_dataset(path_to_hdf5=str(), location="./", dataset=None, wait_time=0):
1042    """
1043    Get the value of an attribute in the hdf5file
1044
1045    Parameters
1046    ----------
1047    
1048    path_to_hdf5 : str
1049        the path to the hdf5file
1050    
1051    location : str
1052        path inside the hdf5 where the attribute is stored
1053    
1054    dataset: str
1055        dataset name
1056    
1057    wait_time: int
1058        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1059
1060    Return
1061    --------
1062    
1063    return_dataset : the value of the attribute
1064
1065    Examples
1066    --------
1067    
1068    get a dataset  
1069    >>> dataset=hdf5_handler.get_hdf5_dataset("./multi_model.hdf5",dataset=my_dataset_name)  
1070    
1071    """
1072
1073    hdf5_base = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1074
1075    if hdf5_base is None:
1076        return None
1077
1078    hdf5 = hdf5_base[location]
1079    
1080    if "_"+dataset in hdf5.attrs.keys():
1081        expected_type=hdf5.attrs["_"+dataset]
1082        return_dataset=hdf5_read_dataset(hdf5, expected_type)
1083        
1084    else:
1085        return_dataset = hdf5[dataset][:]
1086
1087    hdf5_base.close()
1088
1089    return return_dataset

Get the value of an attribute in the hdf5file

Parameters

path_to_hdf5 : str the path to the hdf5file

location : str path inside the hdf5 where the attribute is stored

dataset: str dataset name

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

Return

return_dataset : the value of the attribute

Examples

get a dataset

>>> dataset=hdf5_handler.get_hdf5_dataset("./multi_model.hdf5",dataset=my_dataset_name)
def get_hdf5file_item( path_to_hdf5='', location='./', item=None, wait_time=0, search_attrs=False):
1092def get_hdf5file_item(path_to_hdf5=str(), location="./", item=None, wait_time=0, search_attrs=False):
1093    """
1094    
1095    Get a custom item in an hdf5file
1096
1097    Parameters
1098    ----------
1099    
1100    path_to_hdf5 : str
1101        the path to the hdf5file
1102    
1103    location : str
1104        path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location)
1105    
1106    item: str
1107        item name
1108    
1109    wait_time: int
1110        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1111    
1112    search_attrs: bool
1113        Default is False. If True, the function will also search in the item in the attribute first.
1114
1115    Return
1116    --------
1117    
1118    return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ...
1119
1120    Examples
1121    --------
1122    
1123    get the dataset 'dataset'  
1124    >>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset")  
1125    
1126    """
1127
1128    hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1129
1130    if hdf5 is None:
1131        return None
1132
1133    hdf5_item = get_hdf5_item(hdf5_instance=hdf5, location=location, item=item, search_attrs=search_attrs)
1134
1135    hdf5.close()
1136
1137    return hdf5_item

Get a custom item in an hdf5file

Parameters

path_to_hdf5 : str the path to the hdf5file

location : str path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location)

item: str item name

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

search_attrs: bool Default is False. If True, the function will also search in the item in the attribute first.

Return

return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ...

Examples

get the dataset 'dataset'

>>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset")
def get_hdf5_item(hdf5_instance=None, location='./', item=None, search_attrs=False):
1140def get_hdf5_item(hdf5_instance=None, location="./", item=None, search_attrs=False):
1141    """
1142    
1143    Get a custom item in an hdf5file
1144
1145    Parameters
1146    ----------
1147
1148    hdf5_instance : h5py.File
1149        an instance of an hdf5
1150    
1151    location : str
1152        path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location)
1153    
1154    item: str
1155        item name
1156    
1157    search_attrs: bool
1158        Default is False. If True, the function will search in the item in the attribute first.
1159
1160    Return
1161    ------
1162    
1163    return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ...
1164
1165    Examples
1166    --------
1167    
1168    get the dataset 'dataset'  
1169    >>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset")  
1170    
1171    """
1172
1173    if item is None and isinstance(location, str):
1174        head, tail = os.path.split(location)
1175        if len(tail) > 0:
1176            item = tail
1177        location = head
1178
1179    if not isinstance(item, str):
1180        print(f"Bad search item:{item}")
1181        return None
1182
1183        return None
1184
1185    # print(f"Getting item '{item}' at location '{location}'")
1186    hdf5 = hdf5_instance[location]
1187
1188    # first search in the attribute
1189    if search_attrs:
1190        list_attribute = hdf5.attrs.keys()
1191        if item in list_attribute:
1192            return hdf5.attrs[item]
1193
1194    # then search in groups and dataset
1195    list_keys = hdf5.keys()
1196    if item in list_keys:
1197
1198        hdf5_item = hdf5[item]
1199
1200        # print("Got Item ", hdf5_item)
1201
1202        if str(type(hdf5_item)).find("group") != -1:
1203            
1204            if item == 'ndarray_ds':
1205
1206                return _read_ndarray_datastructure(hdf5)
1207            
1208            else:
1209                
1210                returned_dict = read_hdf5_as_dict(hdf5_item)
1211                
1212                return returned_dict
1213
1214        elif str(type(hdf5_item)).find("dataset") != -1:
1215            
1216            if "_"+item in hdf5.attrs.keys():
1217                expected_type=hdf5.attrs["_"+item]
1218                values=hdf5_read_dataset(hdf5_item, expected_type)
1219            else:
1220                values=hdf5_item[:]
1221
1222            return values
1223
1224        else:
1225
1226            return hdf5_item
1227
1228    else:
1229
1230        return None

Get a custom item in an hdf5file

Parameters

hdf5_instance : h5py.File an instance of an hdf5

location : str path inside the hdf5 where the attribute is stored. If item is None, item is set to basename(location)

item: str item name

search_attrs: bool Default is False. If True, the function will search in the item in the attribute first.

Return

return : custom value. can be an hdf5 object (group), an numpy array, a string, a float, an int ...

Examples

get the dataset 'dataset'

>>> dataset=hdf5_handler.get_hdf5_item("./multi_model.hdf5",location="path/in/hdf5/dataset")
def search_in_hdf5file( path_to_hdf5, key=None, location='./', wait_time=0, search_attrs=False):
1233def search_in_hdf5file(path_to_hdf5, key=None, location="./", wait_time=0, search_attrs=False):
1234    """
1235    
1236    Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)
1237
1238    Parameters
1239    ----------
1240    
1241    path_to_hdf5 : str
1242        the path to the hdf5file
1243    
1244    key: str
1245        key to search in the hdf5file
1246    
1247    location : str
1248        path inside the hdf5 where to start the research
1249    
1250    wait_time: int
1251        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1252    
1253    search_attrs : Bool
1254        Default false, search in the attributes
1255    
1256    Return
1257    ------
1258    
1259    return_dataset : the value of the attribute
1260
1261    Examples
1262    --------
1263    
1264    search in a hdf5file  
1265    >>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./")  
1266    
1267    """
1268    if key is None:
1269        print("Nothing to search, use key=")
1270        return []
1271
1272    hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1273
1274    if hdf5 is None:
1275        return None
1276
1277    results = search_in_hdf5(hdf5, key, location=location, search_attrs=search_attrs)
1278
1279    hdf5.close()
1280
1281    return results

Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)

Parameters

path_to_hdf5 : str the path to the hdf5file

key: str key to search in the hdf5file

location : str path inside the hdf5 where to start the research

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

search_attrs : Bool Default false, search in the attributes

Return

return_dataset : the value of the attribute

Examples

search in a hdf5file

>>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./")
def search_in_hdf5(hdf5_base, key=None, location='./', search_attrs=False):
1284def search_in_hdf5(hdf5_base, key=None, location="./", search_attrs=False):
1285    """
1286    
1287    Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)
1288
1289    Parameters
1290    ----------
1291    
1292    hdf5_base : h5py.File
1293        opened instance of the hdf5
1294    
1295    key: str
1296        key to search in the hdf5file
1297    
1298    location : str
1299        path inside the hdf5 where to start the research
1300    
1301    search_attrs : Bool
1302        Default false, search in the attributes
1303    
1304    Return
1305    ------
1306    
1307    return_dataset : the value of the attribute
1308
1309    Examples
1310    --------
1311    
1312    search in a hdf5  
1313    >>> hdf5=hdf5_handler.open_hdf5(hdf5_file)  
1314    >>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./")  
1315    >>> hdf5.close()  
1316    
1317    """
1318    if key is None:
1319        print("Nothing to search, use key=")
1320        return []
1321
1322    result = []
1323
1324    hdf5 = hdf5_base[location]
1325
1326    if search_attrs:
1327        list_attribute = hdf5.attrs.keys()
1328    
1329        if key in list_attribute:
1330            result.append({"path": location, "key": key,
1331                          "datatype": "attribute", "value": hdf5.attrs[key]})
1332        
1333    for hdf5_key, item in hdf5.items():
1334
1335        if str(type(item)).find("group") != -1:
1336
1337            sub_location = os.path.join(location, hdf5_key)
1338            
1339            # print(hdf5_key,sub_location,list(hdf5.keys()))
1340            
1341            if hdf5_key == key:
1342                
1343                if "ndarray_ds" in item.keys():
1344                    
1345                    result.append({"path": sub_location, 
1346                                   "key": None,
1347                                   "datatype": "ndarray", 
1348                                   "value": _read_ndarray_datastructure(item)})
1349                    
1350                else:
1351                    
1352                    result.append({"path": sub_location, "key": None,
1353                              "datatype": "group", "value": None})
1354
1355            res = search_in_hdf5(hdf5_base, key, sub_location)
1356            
1357            if len(res) > 0:
1358                for element in res:
1359                    result.append(element)
1360                
1361
1362        if str(type(item)).find("dataset") != -1:
1363
1364            if hdf5_key == key:
1365
1366                if item[:].dtype.char == "S":
1367
1368                    values = item[:].astype("U")
1369
1370                elif item[:].dtype.char == "O":
1371
1372                    # decode list if required
1373                    decoded_item = list()
1374                    for it in item[:]:
1375                        decoded_item.append(it.decode())
1376
1377                    values = decoded_item
1378
1379                else:
1380
1381                    values = item[:]
1382                
1383                result.append({"path": location, 
1384                               "key": key,
1385                              "datatype": "dataset", 
1386                              "value": values})
1387    
1388    return result

Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)

Parameters

hdf5_base : h5py.File opened instance of the hdf5

key: str key to search in the hdf5file

location : str path inside the hdf5 where to start the research

search_attrs : Bool Default false, search in the attributes

Return

return_dataset : the value of the attribute

Examples

search in a hdf5

>>> hdf5=hdf5_handler.open_hdf5(hdf5_file)  
>>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./")  
>>> hdf5.close()
def hdf5file_view( path_to_hdf5, location='./', max_depth=None, level_base='>', level_sep='--', depth=None, wait_time=0, list_attrs=True, list_dataset_attrs=False, return_view=False):
1391def hdf5file_view(path_to_hdf5, location="./", max_depth=None, level_base='>', level_sep="--", depth=None, wait_time=0, list_attrs=True, list_dataset_attrs=False, return_view=False):
1392    """
1393    
1394    Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)
1395
1396    Parameters
1397    ----------
1398    
1399    
1400    path_to_hdf5 : str
1401        Path to an hdf5 database
1402    
1403    location : str
1404        path inside the hdf5 where to start the research
1405    
1406    max_depth: str
1407        Max deph of the search in the hdf5
1408    
1409    level_base: str
1410        string used as separator at the lower level (default '>')
1411    
1412    level_sep: str
1413        string used as separator at higher level (default '--')
1414    
1415    depth: int
1416        current depth level
1417    
1418    list_attrs: bool
1419        default is True, list the attributes
1420    
1421    list_dataset_attrs: bool
1422        default is False, list the special attributes defined for each dataset by pyhdf5_handler
1423    
1424    return_view: bool
1425        retrun the object view in a dictionnary (do not print at screen)
1426    
1427    wait_time: int
1428        If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.
1429
1430    Return
1431    --------
1432    
1433    dictionnary : optional, the view of the hdf5
1434
1435    Examples
1436    --------
1437    
1438    search in a hdf5file  
1439    >>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./")  
1440    
1441    """
1442
1443    hdf5 = open_hdf5(path_to_hdf5, read_only=True, wait_time=wait_time)
1444
1445    if hdf5 is None:
1446        return None
1447
1448    results = hdf5_view(hdf5, 
1449                        location=location, 
1450                        max_depth=max_depth,
1451                        level_base=level_base, 
1452                        level_sep=level_sep, 
1453                        depth=depth,
1454                        list_attrs=list_attrs,
1455                        list_dataset_attrs=list_dataset_attrs,
1456                        return_view=return_view)
1457
1458    hdf5.close()
1459
1460    return results

Search key in an hdf5 and return a list of [locations, datatype, key name, values]. Value and key are returned only if the key is an attribute or a dataset (None otherwise)

Parameters

path_to_hdf5 : str Path to an hdf5 database

location : str path inside the hdf5 where to start the research

max_depth: str Max deph of the search in the hdf5

level_base: str string used as separator at the lower level (default '>')

level_sep: str string used as separator at higher level (default '--')

depth: int current depth level

list_attrs: bool default is True, list the attributes

list_dataset_attrs: bool default is False, list the special attributes defined for each dataset by pyhdf5_handler

return_view: bool retrun the object view in a dictionnary (do not print at screen)

wait_time: int If the hdf5 is unavailable, the function will try to access serveral time and will wait wait_time seconds maximum. If this time is elapsed, the file won't be opened and the funciton will return None. This parameter is usefull if several program or threads need to read/write simultaneously in the same hdf5 database.

Return

dictionnary : optional, the view of the hdf5

Examples

search in a hdf5file

>>> matchkey=hdf5_handler.search_in_hdf5file(hdf5filename, key='Nom_du_BV',location="./")
def hdf5file_ls(path_to_hdf5, location='./'):
1464def hdf5file_ls(path_to_hdf5, location='./'):
1465    """
1466    List dataset in an hdf5file. 
1467
1468    Parameters
1469    ----------
1470    
1471    path_to_hdf5 : str
1472        path to a hdf5file
1473    
1474    location: str
1475        path inside the hdf5 where to start the research
1476    
1477    Example
1478    -------
1479    
1480    >>> hdf5file_ls(test.hdf5)
1481    
1482    """
1483    
1484    hdf5 = open_hdf5(path_to_hdf5, read_only=True)
1485    
1486    hdf5_view(hdf5, location=location, max_depth=0, level_base='>', level_sep="--", list_attrs=False, return_view=False)

List dataset in an hdf5file.

Parameters

path_to_hdf5 : str path to a hdf5file

location: str path inside the hdf5 where to start the research

Example

>>> hdf5file_ls(test.hdf5)
def hdf5_ls(hdf5):
1490def hdf5_ls(hdf5):
1491    """
1492    List dataset in an hdf5 instance.
1493
1494    Parameters
1495    ----------
1496    
1497    hdf5 : h5py.File
1498        hdf5 instance
1499    
1500    location: str
1501        path inside the hdf5 where to start the research
1502    
1503    Example
1504    -------
1505    
1506    >>> hdf5 = open_hdf5(path_to_hdf5, read_only=True)
1507    >>> hdf5_ls(hdf5)
1508    
1509    """
1510    
1511    hdf5_view(hdf5, location="./", max_depth=0, level_base='>', level_sep="--", list_attrs=False, return_view=False)

List dataset in an hdf5 instance.

Parameters

hdf5 : h5py.File hdf5 instance

location: str path inside the hdf5 where to start the research

Example

>>> hdf5 = open_hdf5(path_to_hdf5, read_only=True)
>>> hdf5_ls(hdf5)
def hdf5_view( hdf5_obj, location='./', max_depth=None, level_base='>', level_sep='--', depth=None, list_attrs=True, list_dataset_attrs=False, return_view=False):
1515def hdf5_view(hdf5_obj, location="./", max_depth=None, level_base='>', level_sep="--", depth=None, list_attrs=True, list_dataset_attrs=False, return_view=False):
1516    """
1517    List recursively all dataset (and attributes) in an hdf5 object.
1518
1519    Parameters
1520    ----------
1521    
1522    hdf5_obj : h5py.File
1523        opened instance of the hdf5
1524    
1525    location : str
1526        path inside the hdf5 where to start the research
1527    
1528    max_depth: str
1529        Max deph of the search in the hdf5
1530    
1531    level_base: str
1532        string used as separator at the lower level (default '>')
1533    
1534    level_sep: str
1535        string used as separator at higher level (default '--')
1536    
1537    depth: int
1538        current level depth
1539    
1540    list_attrs: bool
1541        default is True, list the attributes
1542    
1543    list_dataset_attrs: bool
1544        default is False, list the special attributes defined for each dataset by pyhdf5_handler
1545    
1546    return_view: bool
1547        retrun the object view in a dictionnary
1548    
1549    Return
1550    --------
1551    
1552    dictionnary : optional, the view of the hdf5
1553
1554    Examples
1555    --------
1556    
1557    search in a hdf5  
1558    >>> hdf5=hdf5_handler.open_hdf5(hdf5_file)  
1559    >>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./")  
1560    >>> hdf5.close()  
1561    
1562    """
1563
1564    result = []
1565
1566    if max_depth is not None:
1567
1568        if depth is not None:
1569            depth = depth+1
1570        else:
1571            depth = 0
1572
1573        if depth > max_depth:
1574            return result
1575
1576    hdf5 = hdf5_obj[location]
1577    
1578    list_attribute=[]
1579    if list_attrs or list_dataset_attrs:
1580        tmp_list_attribute = list(hdf5.attrs.keys())
1581        list_keys_matching_attributes=["_" + element for element in list(hdf5.keys())]
1582    
1583    if list_attrs:
1584        
1585        list_attribute.extend(list(filter( lambda l: l not in list_keys_matching_attributes, tmp_list_attribute)))
1586    
1587    if list_dataset_attrs:
1588        
1589        list_attribute.extend(list(filter( lambda l: l in list_keys_matching_attributes, tmp_list_attribute)))
1590    
1591    
1592    for key in list_attribute:
1593        values = hdf5.attrs[key]
1594        sub_location = os.path.join(location, key)
1595        if isinstance(values, (int, float, np.int64, np.float64, np.int32, np.float32, np.bool)):
1596            result.append(
1597                f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, value={values}")
1598        elif isinstance(values, (str)) and len(values) < 20:
1599            result.append(
1600                f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, len={len(values)}, value={values}")
1601        else:
1602            result.append(
1603                f"{level_base}| {sub_location}, attribute, type={type(hdf5.attrs[key])}, len={len(values)}, value={values[0:20]}...")
1604            
1605        if not return_view:
1606            if len(result)>0:
1607                print(result[-1])
1608
1609    for hdf5_key, item in hdf5.items():
1610
1611        if str(type(item)).find("group") != -1:
1612            
1613            sub_location = os.path.join(location, hdf5_key)
1614            
1615            if "ndarray_ds" in item.keys():
1616                result.append(f"{level_base}| {sub_location}, ndarray")
1617            else:
1618                result.append(f"{level_base}| {sub_location}, group")
1619            
1620            if not return_view:
1621                if len(result)>0:
1622                    print(result[-1])
1623
1624            res = hdf5_view(hdf5_obj, sub_location, max_depth=max_depth,
1625                            level_base=level_base+level_sep, depth=depth, return_view=True)
1626
1627            # if len(res)>0:
1628            for key, item in enumerate(res):
1629                result.append(item)
1630
1631        if str(type(item)).find("dataset") != -1:
1632
1633            if item[:].dtype.char == "S":
1634                values = item[:].astype("U")
1635            else:
1636                values = item[:]
1637
1638            sub_location = os.path.join(location, hdf5_key)
1639            # result.append({"path":location, "key":key, "datatype":"dataset","value":values})
1640            result.append(
1641                f"{level_base}| {sub_location}, dataset, type={type(values)}, shape={values.shape}")
1642            
1643            if not return_view:
1644                if len(result)>0:
1645                    print(result[-1])
1646    
1647    if return_view:
1648        return result

List recursively all dataset (and attributes) in an hdf5 object.

Parameters

hdf5_obj : h5py.File opened instance of the hdf5

location : str path inside the hdf5 where to start the research

max_depth: str Max deph of the search in the hdf5

level_base: str string used as separator at the lower level (default '>')

level_sep: str string used as separator at higher level (default '--')

depth: int current level depth

list_attrs: bool default is True, list the attributes

list_dataset_attrs: bool default is False, list the special attributes defined for each dataset by pyhdf5_handler

return_view: bool retrun the object view in a dictionnary

Return

dictionnary : optional, the view of the hdf5

Examples

search in a hdf5

>>> hdf5=hdf5_handler.open_hdf5(hdf5_file)  
>>> matchkey=hdf5_handler.search_in_hdf5(hdf5, key='Nom_du_BV',location="./")  
>>> hdf5.close()