Source code for namdtools.io.read

"""
read.py
language: Python3
author: C. Lockhart <clockha2@gmu.edu>
"""

from namdtools.core import Log


# Read output from NAMD run
# Convert to object? Store raw output?
[docs]def read_log(fname, glob=None, usecols=None): """ Read output from NAMD. Parameters ---------- fname : str Name of NAMD output file. glob : bool or dict Does `fname` need to be globbed? If a boolean, uses :ref:`glob`. If dictionary, uses :ref:`vglob`. (Default: None) usecols : list-like or callable (Optional) Specify columns to return. Returns ------- Log """ # Import to save time from functools import partial import pandas as pd # If glob, change fname to include all globbed files if glob: from pathogen import Path, vglob # # Convert glob to a empty dictionary if necessary if not isinstance(glob, dict): glob = {} # Glob first; if glob is empty, throw an error fname_glob = vglob(fname, errors='raise', **glob) if not fname_glob: raise FileNotFoundError(fname) # Sort glob # fnames = sorted(fname_glob) fnames = fname_glob else: fnames = [fname] # Cycle over fnames and read in # df = None # for fname in fnames: # data = _read_log(fname) # if df is None: # df = data # else: # df = pd.concat([df, data], ignore_index=True) data = list(map(partial(_read_log, usecols=usecols), fnames)) if glob: data = [table.assign(**Path(fname).metadata) for fname, table in zip(fnames, data)] # noqa # Concatenate data = data[0] if len(data) == 1 else pd.concat(data, ignore_index=True) # Return return Log(data)
# TODO make a Cython backend? Or a C backend? This is still slow. def _read_log(fname, usecols=None): """ Read NAMD output file. Parameters ---------- fname : str Name of NAMD output file. usecols : list-like or callable (Optional) Specific columns to read in. Returns ------- pandas.DataFrame """ # # Import relevant packages # import numpy as np # import pandas as pd # import re # # # Read in entire log file # with open(fname) as stream: # records = stream.read() # # # Find ETITLE, we only need the first record. Otherwise, guess that ETITLE follows standard format # etitle_start = records.find('ETITLE') # if etitle_start >= 0: # etitle_end = records.find('ENERGY', etitle_start) # etitle = records[etitle_start:etitle_end].lower().split()[1:] # first column is ETITLE # else: # etitle = ['ts', 'bond', 'angle', 'dihed', 'imprp', 'elect', 'vdw', 'boundary', 'misc', 'kinetic', 'total', # 'temp', 'potential', 'total3', 'tempavg', 'pressure', 'gpressure', 'volume', 'pressavg', 'gpressavg'] # # # Convert usecols to integer if collection of strings # if usecols is not None: # usecols = np.array(usecols) # if issubclass(usecols.dtype.type, str): # usecols = np.flatnonzero(np.in1d(etitle, usecols)) # else: # usecols = np.arange(len(etitle)) # # # Extract only ENERGY records, then generate numpy array. We skip the first column which is ENERGY # energy_records = re.sub(r'^(?!ENERGY).*$', '', records, flags=re.MULTILINE) #.split('\n') # energy_records = re.split('\n+', energy_records.strip()) # might be unnecessary # energy = np.genfromtxt(energy_records, autostrip=True, usecols=usecols+1) # # # Return as DataFrame # return pd.DataFrame(energy, columns=np.array(etitle)[usecols]) # .set_index(etitle[0]) # Import relevant packages from namdtools.io._read_utils import _parse_energy, _parse_etitle import numpy as np import pandas as pd # Parse etitle etitle = _parse_etitle(fname) # Convert usecols to integer if collection of strings if usecols is not None: usecols = np.array(usecols) if issubclass(usecols.dtype.type, str): candidate_usecols = np.flatnonzero(np.in1d(etitle, usecols)) if np.sum(candidate_usecols) != len(usecols): missing = usecols[np.flatnonzero(np.in1d(usecols, etitle))] raise AttributeError(f'columns {missing} not found') usecols = candidate_usecols # accept candidate else: usecols = np.arange(len(etitle)) # Parse energy energy = _parse_energy(fname, usecols) # Return as DataFrame return pd.DataFrame(energy, columns=etitle[usecols]) def _read_log_old(fname): """ Parameters ---------- fname : str Name of NAMD output file. Returns ------- """ # Import pandas if not already loaded (to speed up namdtools in general) import pandas as pd # Initialize DataFrame information columns = None records = [] # Read through log file and extract energy records # TODO read in with regex with open(fname, 'r') as stream: for line in stream.readlines(): # Read first ETITLE if columns is None and line[:6] == 'ETITLE': columns = line.lower().split()[1:] # Save each energy record if line[:6] == 'ENERGY': records.append(line.split()[1:]) # What if our file doesn't contain ETITLE? Should this return an error, or can we assume the columns? columns = ['ts', 'bond', 'angle', 'dihed', 'imprp', 'elect', 'vdw', 'boundary', 'misc', 'kinetic', 'total', 'temp', 'potential', 'total3', 'tempavg', 'pressure', 'gpressure', 'volume', 'pressavg', 'gpressavg'] # Return DataFrame return pd.DataFrame(records, columns=columns).set_index(columns[0]).astype(float)