Source code for MDAnalysis.topology.PrimitivePDBParser

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8
#
# MDAnalysis --- http://www.MDAnalysis.org
# Copyright (c) 2006-2015 Naveen Michaud-Agrawal, Elizabeth J. Denning, Oliver Beckstein
# and contributors (see AUTHORS for the full list)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#

"""
Primitive PDB topology parser
=============================

This topology parser uses a standard PDB file to build a minimum
internal structure representation (list of atoms).

The topology reader reads a PDB file line by line and ignores atom
numbers but only reads residue numbers up to 9,999 correctly. If you
have systems containing at least 10,000 residues then you need to use
a different file format (e.g. the "extended" PDB, *XPDB* format, see
:mod:`~MDAnalysis.topology.ExtendedPDBParser`) that can handle residue
numbers up to 99,999.

.. Note::

   The parser processes atoms and their names. Masses are guessed and set to 0
   if unknown. Partial charges are not set. Bond connectivity can be guessed if
   the ``bonds=True`` keyword is set for
   :class:`~MDAnalysis.core.AtomGroup.Universe`.

.. SeeAlso::

   * :mod:`MDAnalysis.topology.ExtendedPDBParser`
   * :class:`MDAnalysis.coordinates.PDB.PrimitivePDBReader`
   * :class:`MDAnalysis.core.AtomGroup.Universe`

Classes
-------

.. autoclass:: PrimitivePDBParser
   :members:
   :inherited-members:

"""
from __future__ import absolute_import

import numpy as np

from ..core.AtomGroup import Atom
from .core import (guess_atom_type, guess_atom_mass,
                   guess_atom_charge, guess_bonds)
from ..core.util import openany
from .base import TopologyReader


[docs]class PrimitivePDBParser(TopologyReader): """Parser that obtains a list of atoms from a standard PDB file. .. versionadded:: 0.8 """ def __init__(self, filename, **kwargs): super(PrimitivePDBParser, self).__init__(filename, **kwargs) from ..coordinates.PDB import PrimitivePDBReader self.PDBReader = PrimitivePDBReader
[docs] def parse(self): """Parse atom information from PDB file *filename*. :Returns: MDAnalysis internal *structure* dict .. SeeAlso:: The *structure* dict is defined in `MDAnalysis.topology` and the file is read with :class:`MDAnalysis.coordinates.PDB.PrimitivePDBReader`. """ structure = {} try: pdb = self.PDBReader(self.filename) except ValueError: raise IOError("Failed to open and read PDB file") atoms = self._parseatoms(pdb) structure['atoms'] = atoms if self.guess_bonds_mode: guessed_bonds = self._guess_bonds(pdb, atoms) structure['guessed_bonds'] = guessed_bonds bonds = self._parsebonds(pdb, atoms) structure['bonds'] = bonds return structure
def _parseatoms(self, pdb): atoms = [] # translate list of atoms to MDAnalysis Atom. for iatom, atom in enumerate(pdb._atoms): # ATOM if len(atom.__dict__) == 10: atomname = atom.name atomtype = atom.element or guess_atom_type(atomname) resname = atom.resName resid = int(atom.resSeq) chain = atom.chainID.strip() # no empty segids (or Universe throws IndexError) segid = atom.segID.strip() or chain or "SYSTEM" mass = guess_atom_mass(atomname) charge = guess_atom_charge(atomname) bfactor = atom.tempFactor # occupancy = atom.occupancy altLoc = atom.altLoc atoms.append(Atom(iatom, atomname, atomtype, resname, resid, segid, mass, charge, bfactor=bfactor, serial=atom.serial, altLoc=altLoc, universe=self._u)) # TER atoms #elif len(atom.__dict__) == 5: # pass # #atoms.append(None) return atoms def _guess_bonds(self, primitive_pdb_reader, atoms): guessed_bonds = guess_bonds(atoms, np.array(primitive_pdb_reader.ts)) return guessed_bonds def _parsebonds(self, primitive_pdb_reader, atoms): # Mapping between the atom array indicies a.number and atom ids # (serial) in the original PDB file mapping = dict((a.serial, a.number) for a in atoms) bonds = set() with openany(self.filename, "r") as fname: lines = ((num, line[6:].split()) for num, line in enumerate(fname) if line[:6] == "CONECT") for num, bond in lines: atom, atoms = int(bond[0]), map(int, bond[1:]) for a in atoms: bond = tuple([mapping[atom], mapping[a]]) bonds.add(bond) bonds = tuple(bonds) return bonds