Source code for MDAnalysis.topology.PDBParser

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- http://mdanalysis.googlecode.com
# Copyright (c) 2006-2011 Naveen Michaud-Agrawal,
#               Elizabeth J. Denning, Oliver Beckstein,
#               and contributors (see website for details)
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
#     N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and
#     O. Beckstein. MDAnalysis: A Toolkit for the Analysis of
#     Molecular Dynamics Simulations. J. Comput. Chem. 32 (2011), 2319--2327,
#     doi:10.1002/jcc.21787
#

"""
PDB topology parser
===================

Use a PDB file to build a minimum internal structure representation.

.. Warning:: Only cares for atoms and their names; neither
             connectivity nor (partial) charges are deduced. Masses
             are guessed and set to 0 if unknown.
"""

import os.path
try:
    # BioPython is overkill but potentially extensible (altLoc etc)
    import Bio.PDB
except ImportError:
    raise ImportError("Bio.PDB from biopython not found. Required for PDB->PSF parser.")

import MDAnalysis.coordinates.pdb.extensions
from MDAnalysis.topology.core import guess_atom_type, guess_atom_mass, guess_atom_charge

[docs]class PDBParseError(Exception): """Signifies an error while reading a PDB file.""" pass
[docs]def parse(pdbfile): """Parse atom information from PDB file *pdbfile*. Only reads the list of atoms. This functions uses the :class:`Bio.PDB.PDBParser` as used by :func:`MDAnalysis.coordinates.pdb.extensions.get_structure`. :Returns: MDAnalysis internal *structure* dict .. SeeAlso:: The *structure* dict is defined in :func:`MDAnalysis.topology.PSFParser.parse`. """ root,ext = os.path.splitext(pdbfile) if ext.lower() not in ('.pdb', '.ent'): raise PDBParseError("%(pdbfile)r is probably not in PDB format (wrong extension).") structure = {} # use Sloppy PDB parser to cope with big PDBs! pdb = MDAnalysis.coordinates.pdb.extensions.get_structure(pdbfile,"0UNK") __parseatoms_(pdb, structure) # TODO: reconstruct bonds from CONECT or guess from distance search # (e.g. like VMD) return structure
def __parseatoms_(pdb, structure): from MDAnalysis.core.AtomGroup import Atom attr = "_atoms" # name of the atoms section atoms = [] # list of Atom objects # translate Bio.PDB atom objects to MDAnalysis Atom. for iatom,atom in enumerate(pdb.get_atoms()): residue = atom.parent chain_id = residue.parent.id atomname = atom.name atomtype = guess_atom_type(atomname) resname = residue.resname resid = residue.id[1] segid = residue.get_segid().strip() or chain_id or "SYSTEM" # no empty segids (or Universe throws IndexError) mass = guess_atom_mass(atomname) charge = guess_atom_charge(atomname) bfactor = atom.bfactor occupancy = atom.occupancy atoms.append(Atom(iatom,atomname,atomtype,resname,int(resid),segid,float(mass),float(charge), bfactor=bfactor)) structure[attr] = atoms