Source code for MDAnalysis.topology.PrimitivePDBParser

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8
#
# MDAnalysis --- http://www.MDAnalysis.org
# Copyright (c) 2006-2015 Naveen Michaud-Agrawal, Elizabeth J. Denning, Oliver Beckstein
# and contributors (see AUTHORS for the full list)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#

"""
Primitive PDB topology parser
=============================

This topology parser uses a standard PDB file to build a minimum
internal structure representation (list of atoms).

The topology reader reads a PDB file line by line and ignores atom
numbers but only reads residue numbers up to 9,999 correctly. If you
have systems containing at least 10,000 residues then you need to use
a different file format (e.g. the "extended" PDB, *XPDB* format, see
:mod:`~MDAnalysis.topology.ExtendedPDBParser`) that can handle residue
numbers up to 99,999.

.. Note::

   The parser processes atoms and their names. Masses are guessed and set to 0
   if unknown. Partial charges are not set.

.. SeeAlso::

   * :mod:`MDAnalysis.topology.ExtendedPDBParser`
   * :class:`MDAnalysis.coordinates.PDB.PrimitivePDBReader`
   * :class:`MDAnalysis.core.AtomGroup.Universe`

Classes
-------

.. autoclass:: PrimitivePDBParser
   :members:
   :inherited-members:

"""
from __future__ import absolute_import

import warnings

from ..core.AtomGroup import Atom
from .core import get_atom_mass, guess_atom_element
from ..lib.util import openany
from .base import TopologyReader


[docs]class PrimitivePDBParser(TopologyReader): """Parser that obtains a list of atoms from a standard PDB file. .. seealso:: :class:`MDAnalysis.coordinates.PDB.PrimitivePDBReader` .. versionadded:: 0.8 """ format = 'PDB'
[docs] def parse(self): """Parse atom information from PDB file *filename*. :Returns: MDAnalysis internal *structure* dict .. SeeAlso:: The *structure* dict is defined in `MDAnalysis.topology` and the file is read with :class:`MDAnalysis.coordinates.PDB.PrimitivePDBReader`. """ structure = {} atoms = self._parseatoms() structure['atoms'] = atoms bonds = self._parsebonds(atoms) structure['bonds'] = bonds return structure
def _parseatoms(self): iatom = 0 atoms = [] with openany(self.filename) as f: resid_prev = 0 # resid looping hack for i, line in enumerate(f): line = line.strip() # Remove extra spaces if len(line) == 0: # Skip line if empty continue record = line[:6].strip() if record.startswith('END'): break elif line[:6] in ('ATOM ', 'HETATM'): try: serial = int(line[6:11]) except ValueError: # serial can become '***' when they get too high self._wrapped_serials = True serial = None name = line[12:16].strip() altLoc = line[16:17].strip() resName = line[17:21].strip() chainID = line[21:22].strip() # empty chainID is a single space ' '! if self.format == "XPDB": # fugly but keeps code DRY resSeq = int(line[22:27]) # extended non-standard format used by VMD resid = resSeq else: resSeq = int(line[22:26]) resid = resSeq while resid - resid_prev < -5000: resid += 10000 resid_prev = resid # insertCode = _c(27, 27, str) # not used # occupancy = float(line[54:60]) try: tempFactor = float(line[60:66]) except ValueError: tempFactor = 0.0 segID = line[66:76].strip() element = line[76:78].strip() segid = segID.strip() or chainID.strip() or "SYSTEM" elem = guess_atom_element(name) atomtype = element or elem mass = get_atom_mass(elem) # charge = guess_atom_charge(name) charge = 0.0 atom = Atom(iatom, name, atomtype, resName, resid, segid, mass, charge, bfactor=tempFactor, serial=serial, altLoc=altLoc, universe=self._u, resnum=resSeq) iatom += 1 atoms.append(atom) return atoms def _parsebonds(self, atoms): # Could optimise this by saving lines in the main loop # then doing post processing after all Atoms have been read # ie do one pass through the file only # Problem is that in multiframe PDB, the CONECT is at end of file, # so the "break" call happens before bonds are reached. # If the serials wrapped, this won't work if hasattr(self, '_wrapped_serials'): warnings.warn("Invalid atom serials were present, bonds will not" " be parsed") return tuple([]) # Mapping between the atom array indicies a.index and atom ids # (serial) in the original PDB file mapping = dict((a.serial, a.index) for a in atoms) bonds = set() with openany(self.filename, "r") as f: lines = (line[6:].split() for line in f if line[:6] == "CONECT") for bond in lines: atom, atoms = int(bond[0]), map(int, bond[1:]) for a in atoms: bond = tuple([mapping[atom], mapping[a]]) bonds.add(bond) bonds = tuple(bonds) return bonds