##
## Biskit, a toolkit for the manipulation of macromolecular structures
## Copyright (C) 2004-2016 Raik Gruenberg
##
## This program is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 3 of the
## License, or any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You find a copy of the GNU General Public License in the file
## license.txt along with this program; if not, write to the Free
## Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
"""
Collect and index AmberResidueType instances from amber topology files.
"""
from biskit import PDBModel, AmberPrepParser, StdLog
import biskit.tools as T
class AmberResidueLibraryError( Exception ):
pass
[docs]class AmberResidueLibrary( object ):
"""
A collection of reference residue types taken from Amber topology files.
By default, the collection is initialized from the four all-atom
library or "off files" in Biskit/data/amber/residues.
The idea here is that the residues of some PDB structure can be matched
against this library in order to assign charges or other properties.
Matching residue types are indentified by their 'atom key', which is simply
the concatenation of a residue's atom names in alphabetical order. This
means residues are matched by atom content, not by residue name. That's
important for modified residues, for example, a C-terminal ALA (with an
additional oxygen) will be matched to the AmberResidueType CALA rather than
to ALA.
The default all-atom topologies include hydrogen atoms. Structures without
hydrogens will *not* match. You can add hydrogens with the Reduce class
(Biskit.reduce).
Atom names need to conform to Amber conventions --
this can be ensured with `PDBModel.xplor2amber()`.
Use
===
>>> model = PDBModel('mystructure.pdb')
>>> residue = model.resModels()[0]
>>>
>>> lib = AmberResidueLibrary()
>>> refres = lib[ residue ]
>>>
>>> ## or alternatively:
>>> refres = lib['all_amino03', 'ALA']
"""
## list of Amber topology files in decending priority
F_RESTYPES = ['all_amino03.in',
'all_aminoct03.in',
'all_aminont03.in',
'all_nuc02.in' ]
[docs] def __init__(self, topofiles=F_RESTYPES, log=None, verbose=False):
"""
:param topofiles: list of topology file names \
(default `all_*in` in `Biskit/data/amber/residues`)
:type topofiles: [ str ]
:param log: optional LogFile instance (default STDOUT)
:type log: biskit.LogFile
:param verbose: add messages to log (default False)
:type verbose: bool
"""
self.aindex = {} ## residue types indexed by atom key
self.topoindex = {} ## residue types indexed by topo and resname
self.log = log or StdLog()
self.verbose = verbose
for f in topofiles:
self.addTopology( f )
[docs] def addTopology(self, topofile, override=False):
"""
Include an additional topology (off) library in the collection.
:param topofile: file name of topology, either full path or
simple file name which will then be looked for in
Biskit/data/amber/residues.
:type topofile: str
:param override: override topologies or residue entries with same name
(default False)
:type override: False
:return: dictionary of all residue types parsed from topofile indexed
by three-letter residue name
:rtype : {str : AmberResidueType}
:raise: AmberResidueLibraryError if override==False and a topology or
a residue with identical atom content have already been
registered.
"""
fbase = T.stripFilename( topofile )
if fbase in self.topoindex and not override:
raise AmberResidueLibraryError('duplicate topology '+fbase)
if self.verbose:
self.log.add('parsing %s...' % topofile )
resdic = AmberPrepParser( topofile ).residueDict()
if self.verbose:
self.log.add( 'Read %i residue definitions.\n' % len(resdic) )
self.topoindex[ fbase ] = resdic
for resname, restype in resdic.items():
akey = restype.atomkey(compress=False)
if akey in self.aindex and not override:
raise AmberResidueLibraryError('duplicate residue entry: %s -> %s' %\
(resname, self.aindex[akey].code))
self.aindex[ akey ] = restype
return self.topoindex[ fbase ]
[docs] def atomkey( self, residue ):
"""
Create a string key encoding the atom content of residue.
:param residue: model or AmberResidue
:type residue: PDBModel or AmberResidue
:return: key formed from alphabetically sorted atom content of residue
:rtype: str
"""
return residue.atomkey(compress=False)
[docs] def byAtoms(self, akey, default=None ):
"""
Identify a matching reference residue by atom content.
:param akey: atomkey or PDBModel with a single residue
:type akey: str or PDBModel
:return: matching reference residue OR None
:rtype: AmberResidueType
"""
if isinstance( akey, PDBModel ):
akey = akey.atomkey(compress=False)
return self.aindex.get(akey, default)
[docs] def byName(self, rescode, topo=None ):
"""
Identify matching reference residue by residue name. Note: residue
names are not guaranteed to be unique if several topology files have
been read in (the default set of Amber topologies uses unique names
though). The optional topo parameter can be used to specify in
which topology the residue is looked up.
Note: residue 3 letter names are all UPPERCASE.
:param rescode: three-letter name of residue to look up
:type rescode: str
:param topo: optional (file) name of topology (see also: `topokeys()` )
:type topo: str
:return: matching reference residue
:rtype: AmberResidueType
:raise: KeyError if the topology or residue name are not found
"""
if topo:
fbase = T.stripFilename( topo )
return self.topoindex[ fbase ][ rescode ]
for topo, residues in self.topoindex.items():
if rescode in residues:
return residues[rescode]
raise KeyError('No residue type found for name '+str(rescode))
def __len__( self ):
return len(self.aindex)
def __getitem__( self, key ):
"""
Examples:
- `reslib[ PDBModel ]` -> `ResidueType` for matching residue
- `reslib[ str(atomkey) ]` -> `ResidueType` with same atom key
"""
if type(key) is str:
return self.aindex[key]
if isinstance(key, PDBModel):
return self.aindex[ key.atomkey(compress=False) ]
def topokeys( self ):
return list(self.topoindex.keys())
def keys( self ):
return list(self.aindex.keys())
def values( self ):
return list(self.aindex.values())
#############
## TESTING
#############
import biskit.test as BT
class Test(BT.BiskitTest):
"""Test class"""
def test_amberResidueLibrary( self ):
"""AmberResidueLibrary test"""
self.lib = AmberResidueLibrary(verbose=self.local)
ala = self.lib.byName('ALA', 'all_amino03')
r = self.lib[ ala ]
self.assertEqual( r, ala )
r = self.lib.byName( 'ALA' )
self.assertEqual( r, ala )
self.assertEqual( len( self.lib ), 114 )
if __name__ == '__main__':
BT.localTest(debug=False)