## numpy-oldnumeric calls replaced by custom script; 09/06/2016
##
## Biskit, a toolkit for the manipulation of macromolecular structures
## Copyright (C) 2004-2016 Raik Gruenberg & Johan Leckner
##
## This program is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 3 of the
## License, or any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You find a copy of the GNU General Public License in the file
## license.txt along with this program; if not, write to the Free
## Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
##
##
"""
Store and manipulate coordinates and atom information.
"""
import biskit.tools as T
from biskit import molUtils
from biskit import mathUtils
from biskit import match2seq
from biskit import rmsFit
from biskit.core.localpath import LocalPath
from biskit.errors import BiskitError
from biskit.profileCollection import ProfileCollection, ProfileError
from biskit.core.pdbparserFactory import PDBParserFactory
from biskit.core.pdbparseFile import PDBParseFile
from biskit import biounit as BU
from biskit.core import oldnumeric as N0
from biskit.core.scientificIO import PDB as IO
from biskit import EHandler
from biskit.future import Residue
import biskit as B
import numpy as N
import os, sys
import copy
import time
import string
import types
import functools
class PDBProfiles( ProfileCollection ):
"""
A ProfileCollection that triggers an update() of its parent :class:`PDBModel`
if an empty or (optionally) missing profile is requested.
.. seealso:: `biskit.ProfileCollection`
"""
def __init__(self, model=None, profiles=None, infos=None ):
"""
:param model: parent model of this ProfileCollection
:type model: PDBModel
:param profiles: dictionary of existing profiles
:type profiles: { 'name' : list/array }
:param infos: dictionary of existing meta infos
:type infos: { 'name' : { 'date' : ... } }
"""
ProfileCollection.__init__( self, profiles=profiles, infos=infos )
self.model = model
def clone(self):
"""
Include reference to (same) parent model.
"""
return self.__class__(self.model, copy.deepcopy(self.profiles),
copy.deepcopy(self.infos))
def version( self ):
return ProfileCollection.version(self)
## should work but leads to some kind of loop condition
## def profLength(self, default=0):
## r = ProfileCollection.profLength(self, default=None)
## if r is not None:
## return r
##
## if self.model and self.model.xyz is not None:
## return len(self.model.xyz)
##
## return default
def get( self, name, default=None, update=True, updateMissing=False ):
"""
Fetch a profile::
get( profKey, [default] ) -> list or array of values
Or:
get( (profKey, infoKey), [default] ) -> single value of metainfo dict
This method extends the standard :class:`ProfileCollection.get` by the
ability to fetch empty (None) or missing profiles from a source
file or model.
:param name: profile key or profile and info key
:type name: str OR (str, str)
:param default: default result if no profile is found,
if None and no profile is found, attempt update
:type default: any
:param update: update from source before returning empty profile [0]
:type update: bool
:param updateMissing: update from source before reporting missing
profile [0]
:type updateMissing: bool
:raise ProfileError: if no profile is found with |name|
"""
try:
r = ProfileCollection.get( self, name, default=default)
except ProfileError as e:
if updateMissing:
r = None
else:
raise ProfileError(e)
if r is None and (update or updateMissing):
## only read PDB source if this is indeed useful
if not name in PDBModel.PDB_KEYS and \
PDBParseFile.supports( self.model.validSource() ):
return None
self.model.update( updateMissing=updateMissing )
## try again
r = ProfileCollection.get(self, name )
return r
class PDBResidueProfiles(PDBProfiles):
"""Work in progress -- give residue profiles a default length"""
def profLength(self, default=0):
r = ProfileCollection.profLength(self, default=None)
if r is not None:
return r
if self.model and self.model._resIndex is not None:
return len(self.model._resIndex)
return default
[docs]class PDBError(BiskitError):
pass
class PDBIndexError( PDBError):
"""Errors warning of issues with residue or chain index."""
pass
[docs]class PDBModel:
"""
Store and manipulate coordinates and atom infos stemming from a PDB file.
Coordinates are stored in the numpy array 'xyz'; the additional atom infos
from the PDB (name, residue_name, and many more) are efficiently stored in
a :class:`PDBProfiles` instance 'atoms' which can be used to also associate
arbitrary other data to the atoms. Moreover, a similar collection
'residues' can hold data associated to residues (but is initially empty).
A normal dictionary 'info' accepts any information about the whole model.
For detailed documentation,
see http://biskit.pasteur.fr/doc/handling_structures/PDBModel
@todo:
* outsource validSource into PDBParserFactory
* prevent repeated loading of test PDB for each test
"""
#: keys of all atom profiles that are read directly from the PDB file
PDB_KEYS = ['name', 'residue_number', 'insertion_code', 'alternate',
'name_original', 'chain_id', 'occupancy', 'element',
'segment_id', 'charge', 'residue_name', 'after_ter',
'serial_number', 'type', 'temperature_factor']
[docs] def __init__( self, source=None, pdbCode=None, noxyz=0, skipRes=None,
headPatterns=[] ):
"""
Examples:
- `PDBModel()` creates an empty Model to which coordinates (field xyz)
and PDB records (atom profiles) have still to be added.
- `PDBModel( file_name )` creates a complete model with coordinates
and PDB records from file_name (pdb, pdb.gz, or pickled PDBModel)
- `PDBModel( PDBModel )` creates a copy of the given model
- `PDBModel( PDBModel, noxyz=1 )` creates a copy without coordinates
:param source: str, file name of pdb/pdb.gz file OR pickled PDBModel OR
PDBModel, template structure to copy atoms/xyz field from
:type source: str or PDBModel
:param pdbCode: PDB code, is extracted from file name otherwise
:type pdbCode: str or None
:param noxyz: 0 (default) || 1, create without coordinates
:type noxyz: 0||1
:param headPatterns: [(putIntoKey, regex)] extract given REMARK values
:type headPatterns: [(str, str)]
:raise PDBError: if file exists but can't be read
"""
self.source = source
if type( source ) is str and ( len(source) != 4 or \
os.path.isfile( source ) ):
self.source = LocalPath( source )
self.__validSource = 0
self.fileName = None
self.pdbCode = pdbCode
self.xyz = None
#: save atom-/residue-based values
self.residues = PDBResidueProfiles( self )
self.atoms = PDBProfiles( self )
#: cached atom masks, calculated when first needed
self.__maskCA = None
self.__maskBB = None
self.__maskHeavy = None
#: cache position of chain breaks (clear when xyz changes)
self.__chainBreaks = None
#: starting positions of each residue
self._resIndex = None
#: starting positions of each chain
self._chainIndex = None
if noxyz:
## trick update to leave xyz untouched
self.xyz = 0
#: monitor changes of coordinates
self.xyzChanged = 0
self.forcePickle = 0
#: version as of creation of this object
self.initVersion = self.version()
#: to collect further informations
self.info = { 'date':T.dateSortString() }
if source != None:
self.update( skipRes=skipRes, updateMissing=1, force=1,
headPatterns=headPatterns )
if noxyz:
## discard coordinates, even when read from PDB file
self.xyz = None
def version( self ):
return 'PDBModel $Revision$'
def __getstate__(self):
"""
called before pickling the object. (but also called by deepcopy)
"""
self.slim()
self.forcePickle = 0
return self.__dict__
def __setstate__(self, state ):
"""
called for unpickling the object.
"""
## until 2.0.1 PDBModel.atoms contained a list of dictionaries
## now we merged this info into PDBModel.aProfiles and renamed
## aProfiles back to .atoms
if 'atoms' in state and type( state['atoms'] ) in [list, type(None)] :
state['old_atoms'] = state['atoms']
del state['atoms']
self.__dict__ = state
## backwards compability
self.__defaults()
def __len__(self):
return self.lenAtoms()
def __getitem__( self, k ):
"""
Get atom profile or profile item or CrossView for one atom::
m['prof1'] <==> m.atoms.get( 'prof1' )
m['prof1','info1'] <==> m.atoms.get( 'prof1','info1' )
m[10] <==> CrossView( m.atoms, 10 )
:return: profile OR meta infos thereof OR CrossView dict
:rtype: list OR array OR any OR CrossView
"""
if type( k ) is str:
if k in self.atoms:
return self.atoms.get( k )
if k in self.residues:
return self.residues.get( k )
if k in self.info:
return self.info[ k ]
return self.profile( k )
if type( k ) is tuple:
return self.profileInfo( k[0] )[ k[1] ]
return self.atoms[k]
def __setitem__( self, k, v ):
"""
Set atom profile or profile item (or meta info)::
m['prof1'] = range(10)
is same as <==> m.atoms.set( 'prof1', range(10) )
OR <==> m.residues.set( 'prof1', range(10) )
m['prof1','info1'] = 'comment'
is same as <==> m.atoms.setInfo('prof1',info1='comment')
OR <==> m.residues.setInfo('prof1',info1='comment')
m['version'] = '1.0.0' <==> m.info['version'] = '1.0.0'
(but only if 'version' already exists in `m.info`)
:return: item
:rtype: any
"""
if type( k ) is str:
if k in self.atoms:
return self.atoms.set( k, v )
if k in self.residues:
return self.residues.set( k, v )
if k in self.info:
self.info[ k ] = v
return
if v is not None and len( v ) == self.lenAtoms():
return self.atoms.set( k, v )
if v is not None and len( v ) == self.lenResidues():
return self.residues.set( k, v )
raise ProfileError('Value cannot clearly be assigned to either atom or '+\
'residue profiles')
if type( k ) is tuple:
key, infokey = k
if key in self.residues:
self.residues[key, infokey] = v
return
self.atoms[key, infokey] = v
return
raise ProfileError('Cannot interpret %r as profile name or profile info record' % k)
def __getslice__( self, *arg ):
"""
Get list of CrossViews::
m[0:100:5] <==> [ CrossView(m.atoms, i) for i in range(0,100,5) ]
"""
return self.atoms.__getslice__( *arg )
def __iter__( self ):
return self.atoms.iterCrossViews()
def __repr__( self ):
code = self.pdbCode or ''
return '[%s %s %5i atoms, %4i residues, %2i chains]' % \
( self.__class__.__name__, code, self.lenAtoms(lookup=False),
self.lenResidues(), self.lenChains() )
def __str__( self ):
return self.__repr__()
[docs] def report( self, prnt=True, plot=False, clipseq=60 ):
"""
Print (or return) a brief description of this model.
:param prnt: directly print report to STDOUT (default True)
:type prnt: bool
:param plot: show simple 2-D line plot using gnuplot [False]
:type plot: bool
:param clipseq: clip chain sequences at this number of letters [60]
:type clipseq: int
:return: if prnt==True: None, else: formatted description of this model
:rtype: None or str
"""
r = self.__repr__()
for c in range( self.lenChains() ):
m = self.takeChains( [c] )
r += '\n\t* chain %-2i(%s): %s' % ( c, m['chain_id'][0],
T.clipStr( m.sequence(), clipseq ) )
r += '\n source: ' + repr( self.source )
r += '\n %2i atom profiles: %s' % ( len( self.atoms ),
T.clipStr( repr(list(self.atoms.keys())), 57 ))
r += '\n %2i residue profiles: %s' % ( len( self.residues ),
T.clipStr( repr(list(self.residues.keys())), 57 ))
r += '\n %2i info records: %s' % ( len( self.info ),
T.clipStr( repr( list(self.info.keys()) ), 57 ))
if plot:
self.plot()
if prnt:
print(r)
else:
return r
[docs] def plot( self, hetatm=False ):
"""
Get a quick & dirty overview over the content of a PDBModel. plot
simply creates a 2-D plot of all x-coordinates versus all y coordinates,
colored by chain. This is obviously not publication-quality ;-).
Use the Biskit.Pymoler class for real visalization.
:param hetatm: include hetero & solvent atoms (default False)
:type hetatm: bool
"""
from biskit import gnuplot
m = self
if not hetatm:
mask = self.maskHetatm()
mask = mask + m.maskSolvent()
m = self.compress( N0.logical_not( mask ) )
chains = [ self.takeChains( [i] ) for i in range( m.lenChains())]
xy = [ list(zip( m.xyz[:,0], m.xyz[:,1] )) for m in chains ]
gnuplot.plot( *xy )
def __vintageCompatibility( self ):
"""
backward compatibility to vintage PDBModels < 2.0.0
"""
## convert first generation profile dictionaries into new ProfileCollections
if 'resProfiles' in self.__dict__:
self.residues=PDBProfiles( self,
profiles=getattr(self,'resProfiles',{}),
infos=getattr(self,'resProfiles_info',{}) )
try:
del self.resProfiles; del self.resProfiles_info
except: pass
if 'atomProfiles' in self.__dict__:
self.atoms=PDBProfiles( self,
profiles=getattr(self,'atomProfiles',{}),
infos=getattr(self,'atomProfiles_info',{}) )
try:
del self.atomProfiles; del self.atomProfiles_info
except: pass
## fix old bug: slim() was creating a self.xyx instead of xyz
if getattr( self, 'xyx', 0 ):
del self.xyx
## first generation source was just a simple string
if type( self.source ) == str:
self.source = LocalPath( self.source )
self.__validSource = getattr( self, '_PDBModel__validSource', 0)
self.initVersion = getattr( self, 'initVersion', 'old PDBModel')
self.forcePickle = getattr( self, 'forcePickle', 0 )
self.info = getattr( self, 'info', { 'date':T.dateSortString() } )
## fix previous bug; old PDBModel pickles often have stray terAtoms
## records
if getattr( self, '_PDBParseFile__terAtoms', None) is not None:
del self._PDBParseFile__terAtoms
def __defaults(self ):
"""
backwards compatibility to earlier pickled models
"""
self.__vintageCompatibility()
## if there were not even old profiles...
if getattr( self, 'atoms', 0) is 0:
self.atoms = PDBProfiles(self)
if getattr( self, 'residues', 0) is 0:
self.residues = PDBResidueProfiles(self)
## between release 2.0.1 and 2.1, aProfiles were renamed to atoms
if getattr( self, 'aProfiles', None) is not None:
self.atoms = self.aProfiles
del self.aProfiles
## between release 2.0.1 and 2.1, rProfiles were renamed to residues
if getattr( self, 'rProfiles', None) is not None:
self.residues = self.rProfiles
del self.rProfiles
## old aProfiles and rProfiles didn't keep a reference to the parent
if getattr( self.atoms, 'model', 0) is 0:
self.atoms.model = self
self.residues.model = self
## biskit <= 2.0.1 kept PDB infos in list of dictionaries
atoms = getattr( self, 'old_atoms', 0)
if not atoms is 0:
## atoms to be fetched from external source
if atoms is None:
for k in self.PDB_KEYS:
self.atoms.set( k, None, changed=0 )
else:
atoms = B.DictList( atoms )
for k in self.PDB_KEYS:
self.atoms.set( k, atoms.valuesOf( k ),
changed=getattr(self, 'atomsChanged',1) )
del self.old_atoms
del self.atomsChanged
## biskit <= 2.0.1 kept positions of TER records in a separate list
ter_atoms = getattr( self, '_PDBModel__terAtoms', 0)
if ter_atoms:
mask = N0.zeros( self.atoms.profLength() )
N0.put( mask, ter_atoms, 1 )
self.atoms.set('after_ter', mask,
comment='rebuilt from old PDBModel.__terAtoms')
if ter_atoms is not 0:
del self.__terAtoms
## biskit <= 2.0.1 cached a volatile index in __resIndex & __chainIndex
self._resIndex = getattr( self, '_resIndex', None)
self._chainIndex=getattr( self, '_chainIndex', None)
if getattr( self, '__resIndex', None) is not None:
try:
del self.__resIndex, self.__chainIndex
except:
print('DEBUG ', T.lastError())
pass
self.__maskCA = getattr( self, '__maskCA', None )
self.__maskBB = getattr( self, '__maskBB', None )
self.__maskHeavy = getattr( self, '__maskHeavy', None )
## test cases of biskit < 2.3 still contain Numeric arrays
if self.xyz is not None and type( self.xyz ) is not N.ndarray:
self.xyz = N0.array( self.xyz )
if self._resIndex is not None and type( self._resIndex ) is not N.ndarray:
self._resIndex = N0.array( self._resIndex )
if self._chainIndex is not None and type(self._chainIndex) is not N.ndarray:
self._chainIndex = N0.array( self._chainIndex )
try:
del self.caMask, self.bbMask, self.heavyMask
except:
pass
[docs] def update( self, skipRes=None, updateMissing=0, force=0,
headPatterns=[] ):
"""
Read coordinates, atoms, fileName, etc. from PDB or
pickled PDBModel - but only if they are currently empty.
The atomsChanged and xyzChanged flags are not changed.
:param skipRes: names of residues to skip if updating from PDB
:type skipRes: list of str
:param updateMissing: 0(default): update only existing profiles
:type updateMissing: 0|1
:param force: ignore invalid source (0) or report error (1)
:type force: 0|1
:param headPatterns: [(putIntoKey, regex)] extract given REMARKS
:type headPatterns: [(str, str)]
:raise PDBError: if file can't be unpickled or read:
"""
source = self.validSource()
if source is None and force:
raise PDBError( str(self.source) + ' is not a valid source.')
if source is None:
return
parser = PDBParserFactory.getParser( source )
parser.update(self, source, skipRes=skipRes,
updateMissing=updateMissing, force=force,
headPatterns=headPatterns )
[docs] def setXyz(self, xyz ):
"""
Replace coordinates.
:param xyz: Numpy array ( 3 x N_atoms ) of float
:type xyz: array
:return: array( 3 x N_atoms ) or None, old coordinates
:rtype: array
"""
old = self.xyz
self.xyz = xyz
self.xyzChanged = self.xyzChanged or \
not mathUtils.arrayEqual(self.xyz,old )
return old
[docs] def setSource( self, source ):
"""
:param source: LocalPath OR PDBModel OR str
"""
if type( source ) == str and len( source ) != 4:
self.source = LocalPath( source )
else:
self.source = source
self.__validSource = 0
[docs] def getXyz( self, mask=None ):
"""
Get coordinates, fetch from source PDB or pickled PDBModel,
if necessary.
:param mask: atom mask
:type mask: list of int OR array of 1||0
:return: xyz-coordinates, array( 3 x N_atoms, Float32 )
:rtype: array
"""
if self.xyz is None and self.validSource() is not None:
self.update( force=1 )
if self.xyz is None:
## empty array that can be concatenated to other xyz arrays
return N0.zeros( (0,3), N0.Float32 )
if mask is None:
return self.xyz
return N0.compress( mask, self.xyz, 0 )
[docs] def getAtoms( self, mask=None ):
"""
Get atom CrossViews that can be used like dictionaries.
Note that the direct manipulation of individual profiles is more
efficient than the manipulation of CrossViews (on profiles)!
:param mask: atom mask
:type mask: list of int OR array of 1||0
:return: list of CrossView dictionaries
:rtype: [ :class:`ProfileCollection.CrossView` ]
"""
r = self.atoms.toCrossViews()
if mask is None:
return r
return [ r[i] for i in N0.nonzero( mask ) ]
[docs] def profile( self, name, default=None, update=True, updateMissing=False ):
"""
Use::
profile( name, updateMissing=0) -> atom or residue profile
:param name: name to access profile
:type name: str
:param default: default result if no profile is found, if None,
try to update from source and raise error [None]
:type default: any
:param update: update from source before returning empty profile [True]
:type update: bool
:param updateMissing: update from source before reporting missing
profile [False]
:type updateMissing: 0||1
:raise ProfileError: if neither atom- nor rProfiles contains |name|
"""
if updateMissing and not name in self.atoms and \
not name in self.residues:
self.update( updateMissing=True )
if name in self.atoms:
return self.atoms.get( name, default,
update=update, updateMissing=0)
if name in self.residues:
return self.residues.get( name, default,
update=update, updateMissing=0)
if default is not None:
return default
raise ProfileError( 'No profile info found with name '+str(name))
[docs] def profileInfo( self, name, updateMissing=0 ):
"""
Use:
profileInfo( name ) -> dict with infos about profile
:param name: name to access profile
:type name: str
:param updateMissing: update from source before reporting missing \
profile. Guaranteed infos are:
- 'version' (str)
- 'comment' (str)
- 'changed' (1||0)
:type updateMissing: 0|1
:raise ProfileError: if neither atom - nor rProfiles contains |name|
"""
if updateMissing and not name in self.atoms and \
not name in self.residues:
self.update()
if name in self.atoms:
return self.atoms.getInfo( name )
if name in self.residues:
return self.residues.getInfo( name )
raise ProfileError( 'No profile info found with name '+str(name))
[docs] def removeProfile( self, *names ):
"""
Remove residue or atom profile(s)
Use:
removeProfile( str_name [,name2, name3] ) -> 1|0,
:param names: name or list of residue or atom profiles
:type names: str OR list of str
:return: 1 if at least 1 profile has been deleted,
0 if none has been found
:rtype: int
"""
r = 0
for n in names:
if n in self.atoms:
del self.atoms[n]
r = 1
if n in self.residues:
del self.residues[n]
r = 1
return r
[docs] def xyzIsChanged(self):
"""
Tell if xyz or atoms have been changed compared to source file or
source object (which can be still in memory).
:return: xyz field has been changed with respect to source
:rtype: (1||0, 1||0)
"""
return self.xyzChanged
[docs] def xyzChangedFromDisc(self):
"""
Tell whether xyz can currently be reconstructed from a
source on disc. Same as xyzChanged() unless source is another not yet
saved PDBModel instance that made changes relative to its own source.
:return: xyz has been changed
:rtype: bool
"""
if self.validSource() is None:
return True
if isinstance( self.source, B.PDBModel ):
return self.xyzIsChanged() or \
self.source.xyzChangedFromDisc()
return self.xyzIsChanged()
[docs] def profileChangedFromDisc(self, pname):
"""
Check if profile has changed compared to source.
:return: 1, if profile |pname| can currently not be
reconstructed from a source on disc.
:rtype: int
:raise ProfileError: if there is no atom or res profile with pname
"""
if self.validSource() is None:
return True
if isinstance( self.source, B.PDBModel ):
return self.profileInfo( pname )['changed'] or \
self.source.profileChangedFromDisc( pname )
return self.profileInfo( pname )['changed']
def __slimProfiles(self):
"""
Remove profiles, that haven't been changed from a direct
or indirect source on disc
**AUTOMATICALLY CALLED BEFORE PICKLING and by deepcopy**
"""
for key in self.residues:
if not self.profileChangedFromDisc( key ):
self.residues.set( key, None )
for key in self.atoms:
if not self.profileChangedFromDisc( key ):
self.atoms.set( key, None )
[docs] def slim( self ):
"""
Remove xyz array and profiles if they haven't been changed and
could hence be loaded from the source file (only if there is a source
file...).
**AUTOMATICALLY CALLED BEFORE PICKLING**
**Currently also called by deepcopy via getstate**
"""
## remove atoms/coordinates if they are unchanged from an existing
## source
## override this behaviour with forcePickle
if not self.forcePickle:
if not self.xyzChangedFromDisc():
self.xyz = None
if type( self.xyz ) is N0.arraytype and self.xyz.dtype.char != 'f':
self.xyz = self.xyz.astype(N0.Float32)
self.__slimProfiles()
self.__maskCA = self.__maskBB = self.__maskHeavy = None
self.__validSource = 0
[docs] def validSource(self):
"""
Check for a valid source on disk.
:return: str or PDBModel, None if this model has no valid source
:rtype: str or PDBModel or None
"""
if self.__validSource == 0:
if isinstance( self.source, LocalPath ) and self.source.exists():
self.__validSource = self.source.local()
else:
if isinstance( self.source, B.PDBModel ):
self.__validSource = self.source
else:
## risky: the PDB code may not exist!
if type( self.source ) is str and len( self.source )==4:
self.__validSource = self.source
else:
self.__validSource = None
return self.__validSource
[docs] def sourceFile( self ):
"""
Name of pickled source or PDB file. If this model has another
PDBModel as source, the request is passed on to this one.
:return: file name of pickled source or PDB file
:rtype: str
:raise PDBError: if there is no valid source
"""
s = self.validSource()
if s is None:
raise PDBError('no valid source')
if type( s ) == str:
return s
return self.source.sourceFile()
[docs] def disconnect( self ):
"""
Disconnect this model from its source (if any).
.. note::
If this model has an (in-memory) PDBModel instance as source,
the entries of 'atoms' could still reference the same dictionaries.
"""
self.update()
try:
self.fileName = self.fileName or self.sourceFile()
except:
pass
self.setSource( None )
self.xyzChanged = 1
for p in self.residues:
self.residues.setInfo( p, changed=1 )
for p in self.atoms:
self.atoms.setInfo( p, changed=1 )
[docs] def getPdbCode(self):
"""
Return pdb code of model.
:return: pdb code
:rtype: str
"""
return self.pdbCode
[docs] def setPdbCode(self, code ):
"""
Set model pdb code.
:param code: new pdb code
:type code: str
"""
self.pdbCode = code
[docs] def sequence(self, mask=None, xtable=molUtils.xxDic ):
"""
Amino acid sequence in one letter code.
:param mask: atom mask, to apply before (default None)
:type mask: list or array
:param xtable: dict {str:str}, additional residue:single_letter mapping
for non-standard residues (default molUtils.xxDic)
[currently not used]
:type xtable: dict
:return: 1-letter-code AA sequence (based on first atom of each res).
:rtype: str
"""
firstAtm = self.resIndex()
if mask is not None:
m_first = N0.zeros( self.lenAtoms() )
N0.put( m_first, firstAtm, 1 )
m_first = mask * m_first
firstAtm = N0.nonzero( m_first )
l = self.atoms['residue_name']
l = [ l[i] for i in firstAtm ]
return ''.join( molUtils.singleAA( l, xtable ) )
[docs] def xplor2amber( self, aatm=True, parm10=False ):
"""
Rename atoms so that tleap from Amber can read the PDB.
If HIS residues contain atoms named HE2 or/and HD2, the residue
name is changed to HIE or HID or HIP, respectively. Disulfide bonds
are not yet identified - CYS -> CYX renaming must be done manually
(see AmberParmBuilder for an example).
Internally amber uses H atom names ala HD21 while (old) standard pdb
files use 1HD2. By default, ambpdb produces 'standard' pdb atom names
but it can output the less ambiguous amber names with switch -aatm.
:param change: change this model's atoms directly (default:1)
:type change: 1|0
:param aatm: use, for example, HG23 instead of 3HG2 (default:1)
:type aatm: 1|0
:param parm10: adapt nucleic acid atom names to 2010 Amber forcefield
:type parm10: 1|0
:return: [ {..} ], list of atom dictionaries
:rtype: list of atom dictionaries
"""
numbers = list(map( str, list(range(10)) ))
## nucleic acid atom names have changed in parm10;
if parm10: ## this evidently is a bit of a hack. Should be revisited.
def __parm10rename( a ):
if "'1" in a: return a.replace( "'1", "'" )
if "'2" in a: return a.replace( "'2", "''" )
if a == 'O1P': return 'OP1'
if a == 'O2P': return 'OP2'
if a == 'H5T': return "HO5'"
if a == 'H3T': return "HO3'"
return a
self.atoms['name'] = list(map( __parm10rename, self.atoms['name'] ))
resI = self.resIndex().tolist()
resI = N0.concatenate( (resI, [len(self)] ) )
names = self.atoms['name']
resnames = self.atoms['residue_name']
for i in range( len(resI)-1 ):
first = resI[i]
last = resI[i+1]
res = resnames[first]
for j in range(first, last):
if aatm:
a = names[j]
if len(a)>2 and a[0] in numbers:
names[j] = a[1:] + a[0]
if res == 'HIS':
anames = names[first:last]
if 'HE2' in anames: resnames[first:last]= ['HIE'] *(last-first)
if 'HD1' in anames: resnames[first:last]= ['HID'] *(last-first)
if 'HE2' in anames and 'HD1' in anames:
resnames[first:last] = ['HIP'] *(last-first)
[docs] def renameAmberRes( self ):
"""
Rename special residue names from Amber back into standard names
(i.e CYX S{->} CYS )
"""
l = self.atoms['residue_name']
for i in range(len(l)):
if l[i] == 'CYX':
l[i] = 'CYS'
if l[i] in ['HIE','HID','HIP']:
l[i] = 'HIS'
[docs] def writePdb( self, fname, ter=1, amber=0, original=0, left=0, wrap=0,
headlines=None, taillines=None):
"""
Save model as PDB file.
:param fname: name of new file
:type fname: str
:param ter: Option of how to treat the terminal record:
* 0 - don't write any TER statements
* 1 - restore original TER statements (doesn't work, \
if preceeding atom has been deleted) [default]
* 2 - put TER between all detected chains
* 3 - as 2 but also detect and split discontinuous chains
:type ter: int
:param amber: amber formatted atom names
(implies ter=3, left=1, wrap=0) (default 0)
:type amber: 1||0
:param original: revert atom names to the ones parsed in from PDB
(default 0)
:type original: 1||0
:param left: left-align atom names (as in amber pdbs)(default 0)
:type left: 1||0
:param wrap: write e.g. 'NH12' as '2NH1' (default 0)
:type wrap: 1||0
:param headlines: [( str, dict or str)], list of record / data tuples::
e.g. [ ('SEQRES', ' 1 A 22 ALA GLY ALA'), ]
:type headlines: list of tuples
:param taillines: same as headlines but appended at the end of file
:type taillines: list of tuples
"""
try:
f = IO.PDBFile( fname, mode='w' )
numbers = list(map( str, list(range(10)) ))
if amber:
__resnames = copy.copy(self.atoms['residue_name'])
__anames = copy.copy(self.atoms['name'])
self.xplor2amber()
ter = 3
wrap = 0
left = 1
if ter == 2 or ter == 3:
## tolist to workaround numeric bug
terIndex = N0.array( self.chainIndex( breaks=(ter==3) ).tolist()[1:] )
if ter == 1:
terIndex = N0.nonzero( self.atoms['after_ter'] )
if headlines:
for l in headlines:
f.writeLine( l[0], l[1] )
if taillines:
for l in taillines:
f.writeLine( l[0], l[1] )
i = -1
for a in self.atoms.toDicts():
i += 1
## fetch coordinates Vector
a['position'] = self.xyz[ i ]
aname = a['name']
if original and not amber:
aname = a['name_original']
## PDBFile prints atom names 1 column too far left
if wrap and len(aname) == 4 and aname[0] in numbers:
aname = aname[1:] + aname[0]
if not left and len(aname) < 4:
aname = ' ' + aname.strip()
a['name'] = aname
## write line
f.writeLine( a['type'], a )
## write TER line with details from previous atom
if (ter>0 and i+1 in terIndex):
f.writeLine('TER', a )
f.close()
if amber:
self.atoms['residue_name'] = __resnames
self.atoms['name'] = __anames
except:
EHandler.error( "Error writing "+fname )
[docs] def saveAs(self, path):
"""
Pickle this PDBModel to a file, set the 'source' field to
this file name and mark atoms, xyz, and profiles as unchanged.
Normal pickling of the object will only dump those data that can not
be reconstructed from the source of this model (if any).
saveAs creates a 'new source' without further dependencies.
:param path: target file name
:type path: str OR LocalPath instance
"""
try:
self.update()
## pickle all atoms, coordinates, profiles, even if unchanged
self.forcePickle = 1
self.setSource( path )
self.xyzChanged = 0
for p in self.residues:
self.residues.setInfo( p, changed=0 )
for p in self.atoms:
self.atoms.setInfo( p, changed=0 )
T.dump( self, str(path) )
except IOError as err:
raise PDBError("Can't open %s for writing." % T.absfile(str(path)))
[docs] def maskF(self, atomFunction, numpy=1 ):
"""
Create list whith result of atomFunction( atom ) for each
atom. (Depending on the return value of atomFunction, the
result is not necessarily a mask of 0 and 1. Creating masks
should be just the most common usage).
Note:
This method is slow compared to maskFrom because the dictionaries
that are given to the atomFunction have to be created from aProfiles
on the fly. If performance matters, better combine the result from
several maskFrom calls, e.g. instead of::
r = m.maskF( lambda a: a['name']=='CA' and a['residue_name']=='ALA' )
use::
r = m.maskFrom( 'name', 'CA' ) * m.maskFrom('residue_name', 'ALA')
:param atomFunction: function( dict_from_aProfiles.toDict() ),
true || false (Condition)
:type atomFunction: 1||0
:param numpy: 1(default)||0, convert result to Numpy array of int
:type numpy: int
:return: Numpy array( [0,1,1,0,0,0,1,0,..], Int) or list
:rtype: array or list
"""
try:
result = list(map( atomFunction, self.atoms.toDicts() ))
except:
## fall-back solution: assign 0 to all entries that raise
## exception
EHandler.warning("mask(): Error while mapping funtion "+
"to all atoms.")
result = []
for a in self.atoms.iterDicts():
try:
result.append( atomFunction( a ) )
## put 0 if something goes wrong
except :
EHandler.warning("mask(): Error while save-mapping ")
result.append(0)
if numpy:
return N0.array( result )
return result
[docs] def maskFrom( self, key, cond ):
"""
Create an atom mask from the values of a specific profile.
Example, the following three statements are equivalent:
>>> mask = m.maskFrom( 'name', 'CA' )
>>> mask = m.maskFrom( 'name', lambda a: a == 'CA' )
>>> mask = N0.array( [ a == 'CA' for a in m.atoms['name'] ] )
However, the same can be also achieved with standard numpy operators:
>>> mask = numpy.array(m.atoms['name']) == 'CA'
:param key: the name of the profile to use
:type key: str
:param cond: either a function accepting a single value or a value or
an iterable of values (to allow several alternatives)
:type cond: function OR any OR [ any ]
:return: array or list of indices where condition is met
:rtype: list or array of int
"""
if type( cond ) is types.FunctionType:
return N0.array( list(map( cond, self.atoms[ key ] )) )
## several allowed values given
elif type( cond ) in [ list, tuple ]:
return N0.array( [ x in cond for x in self.atoms[key] ] )
## one allowed value given
## Numeric splits lists of str into 2-D char arrays, 'O' prevents that
else:
return N0.array( self.atoms[key] ) == cond
[docs] def maskCA( self, force=0 ):
"""
Short cut for mask of all CA atoms.
:param force: force calculation even if cached mask is available
:type force: 0||1
:return: array( 1 x N_atoms ) of 0||1
:rtype: array
"""
if self.__maskCA is None or force:
self.__maskCA = self.maskFrom( 'name', 'CA' )
return self.__maskCA
[docs] def maskBB( self, force=0, solvent=0 ):
"""
Short cut for mask of all backbone atoms. Supports standard protein
and DNA atom names. Any residues classified as solvent (water, ions)
are filtered out.
:param force: force calculation even if cached mask is available
:type force: 0||1
:param solvent: include solvent residues (default: false)
:type solvent: 1||0
:return: array( 1 x N_atoms ) of 0||1
:rtype: array
"""
if self.__maskBB is None or force or solvent:
mask = self.maskFrom( 'name', ['CA', 'C', 'N', 'O', 'H','OXT',
"P","O5'","C5'","C4'","C3'","O3'"])
if not solvent:
mask = N0.logical_not(self.maskSolvent()) * mask
self.__maskBB = mask ## cache
else:
return mask ## don't cache
return self.__maskBB
[docs] def maskHeavy( self, force=0 ):
"""
Short cut for mask of all heavy atoms. ('element' <> H)
:param force: force calculation even if cached mask is available
:type force: 0||1
:return: array( 1 x N_atoms ) of 0||1
:rtype: array
"""
if self.__maskHeavy is None or force:
self.__maskHeavy = self.maskFrom( 'element', lambda a: a != 'H' )
return self.__maskHeavy
[docs] def maskH( self ):
"""
Short cut for mask of hydrogens. ('element' == H)
:return: array( 1 x N_atoms ) of 0||1
:rtype: array
"""
return N0.logical_not( self.maskHeavy() )
def _maskCB( self ):
"""
Short cut for mask of all CB I{and} CA of GLY.
:return: mask of all CB and CA of GLY
:rtype: array
"""
f = lambda a: a['name'] == 'CB' or\
a['residue_name'] == 'GLY' and a['name'] == 'CA'
return self.maskF( f )
[docs] def maskCB( self ):
"""
Short cut for mask of all CB I{and} CA of GLY.
:return: mask of all CB plus CA of GLY
:rtype: array
"""
m_cb = self.maskFrom( 'name', 'CB' )
m_g = self.maskFrom( 'residue_name', 'GLY' )
m_ca = self.maskCA()
return m_cb + (m_g * m_ca)
[docs] def maskH2O( self ):
"""
Short cut for mask of all atoms in residues named TIP3, HOH and WAT
:return: array( 1 x N_atoms ) of 0||1
:rtype: array
"""
return self.maskFrom( 'residue_name', ['TIP3','HOH','WAT'] )
[docs] def maskSolvent( self ):
"""
Short cut for mask of all atoms in residues named
TIP3, HOH, WAT, Na+, Cl-, CA, ZN
:return: array( 1 x N_atoms ) of 0||1
:rtype: array
"""
return self.maskFrom('residue_name', ['TIP3','HOH','WAT','Na+', 'Cl-',
'CA', 'ZN'])
[docs] def maskHetatm( self ):
"""
Short cut for mask of all HETATM
:return: array( 1 x N_atoms ) of 0||1
:rtype: array
"""
return self.maskFrom( 'type', 'HETATM' )
[docs] def maskProtein( self, standard=0 ):
"""
Short cut for mask containing all atoms of amino acids.
:param standard: only standard residue names (not CYX, NME,..)
(default 0)
:type standard: 0|1
:return: array( 1 x N_atoms ) of 0||1,
mask of all protein atoms (based on residue name)
:rtype: array
"""
d = molUtils.aaDic
if standard:
d = molUtils.aaDicStandard
names = list(map( str.upper, list(d.keys()) ))
return N0.array(
[ n.upper() in names for n in self.atoms['residue_name'] ] )
[docs] def maskDNA( self ):
"""
Short cut for mask of all atoms in DNA (based on residue name).
:return: array( 1 x N_atoms ) of 0||1
:rtype: array
"""
return self.maskFrom( 'residue_name', ['DA','DC','DG','DT'] )
[docs] def maskRNA( self ):
"""
Short cut for mask of all atoms in RNA (based on residue name).
:return: array( 1 x N_atoms ) of 0||1
:rtype: array
"""
return self.maskFrom( 'residue_name', ['A','C','G','U'] )
[docs] def maskNA( self ):
"""
Short cut for mask of all atoms in DNA or RNA
(based on residue name).
:return: array( 1 x N_atoms ) of 0||1
:rtype: array
"""
return self.maskFrom( 'residue_name',
['A','C','G','U','T','DA','DC','DG','DT'] )
[docs] def indicesFrom( self, key, cond ):
"""
Get atom indices conforming condition applied to an atom profile.
Corresponds to::
>>> numpy.nonzero( m.maskFrom( key, cond) )
:param key: the name of the profile to use
:type key: str
:param cond: either a function accepting a single value or a value or
an iterable of values
:type cond: function OR any OR [any]
:return: array of indices where condition is met
:rtype : array of int
"""
return N0.nonzero( self.maskFrom( key, cond) )
[docs] def indices( self, what ):
"""
Get atom indices conforming condition. This is a convenience method
to 'normalize' different kind of selections (masks, atom names,
indices, functions) to indices as they are e.g. required by
:class:`PDBModel.take`.
:param what: Selection::
- function applied to each atom entry,
e.g. lambda a: a['residue_name']=='GLY'
- list of str, allowed atom names
- list of int, allowed atom indices OR mask with only 1 and 0
- int, single allowed atom index
:type what: function OR list of str or int OR int
:return: N_atoms x 1 (0||1 )
:rtype: Numeric array
:raise PDBError: if what is neither of above
"""
## lambda funcion
if type( what ) is types.FunctionType:
return N0.nonzero( self.maskF( what) )
if type( what ) is list or type( what ) is N0.arraytype:
## atom names
if type( what[0] ) == str:
return self.indicesFrom( 'name', what )
if isinstance( what[0] , int) or \
(isinstance(what, N.ndarray) and what.dtype in [int, bool]):
## mask
if len( what ) == self.lenAtoms() and max( what ) < 2:
return N0.nonzero( what )
## list of indices
else:
return what
## single index
if isinstance( what , int):
return N0.array( [what], N0.Int )
raise PDBError("PDBModel.indices(): Could not interpret condition ")
[docs] def mask( self, what ):
"""
Get atom mask. This is a convenience method to 'normalize'
different kind of selections (masks, atom names, indices,
functions) to a mask as it is e.g. required by :class:`PDBModel.compress`.
:param what: Selection::
- function applied to each atom entry,
e.g. lambda a: a['residue_name']=='GLY'
- list of str, allowed atom names
- list of int, allowed atom indices OR mask with
only 1 and 0
- int, single allowed atom index
:type what: function OR list of str or int OR int
:return: N_atoms x 1 (0||1 )
:rtype: Numeric array
:raise PDBError: if what is neither of above
"""
## lambda funcion
if type( what ) == types.FunctionType:
return self.maskF( what )
if type( what ) == list or type( what ) is N0.arraytype:
## atom names
if type( what[0] ) == str:
return self.maskFrom( 'name', what)
if isinstance( what[0] , int) or \
(isinstance(what, N.ndarray) and what.dtype in [int, bool]):
## mask
if len( what ) == self.lenAtoms() and max( what ) < 2:
return what
## list of indices
else:
r = N0.zeros( self.lenAtoms(),N0.Int )
N0.put( r, what, 1 )
return r
## single index
if isinstance( what , int):
return self.mask( [what] )
raise PDBError("PDBModel.mask(): Could not interpret condition ")
[docs] def index2map( self, index, len_i ):
"""
Create a map of len_i length, giving the residue(/chain) numer of
each atom, from list of residue(/chain) starting positions.
:param index: list of starting positions, e.g. [0, 3, 8]
:type index: [ int ] or array of int
:param len_i: length of target map, e.g. 10
:type len_i: int
:return: list mapping atom positions to residue(/chain) number,
e.g. [0,0,0, 1,1,1,1,1, 2,2] from above example
:rtype: array of int (and of len_i length)
"""
index = N0.concatenate( (index, [len_i]) )
delta = index[1:] - index[:-1]
## Numeric: delta = N0.take( index, range(1, len(index) ) ) - index[:-1]
return N0.repeat( list(range(len(delta))), delta.astype( N0.Int32) )
[docs] def map2index( self, imap ):
"""
Identify the starting positions of each residue(/chain) from a map
giving the residue(/chain) number of each atom.
:param imap: something like [0,0,0,1,1,1,1,1,2,2,2,...]
:type imap: [ int ]
:return: list of starting positions, e.g. [0, 3, 8, ...] in above ex.
:rtype: array of int
"""
try:
imap = N0.concatenate( (imap, [imap[-1]] ) )
delta = imap[1:] - imap[:-1]
# Numeric: delta = N0.take( imap, range(1, len(imap) ) ) - imap[:-1]
r = N0.nonzero( delta ) + 1
return N0.concatenate( ( [0], r ) )
except IndexError:
## handle empty imap parameter
return N0.zeros(0)
[docs] def extendMask( self, mask, index, len_i ):
"""
Translate a mask that is defined,e.g., on residues(/chains) to a mask
that is defined on atoms.
:param mask : mask marking positions in the list of residues or chains
:type mask : [ bool ] or array of bool or of 1||0
:param index: starting positions of all residues or chains
:type index: [ int ] or array of int
:param len_i: length of target mask
:type len_i: int
:return: mask that blows up the residue / chain mask to an atom mask
:rtype: array of bool
"""
index = N0.concatenate( (index, [len_i]) )
delta = index[1:] - index[:-1]
return N0.repeat( mask, delta.astype( N0.Int32 ) )
[docs] def extendIndex( self, i, index, len_i ):
"""
Translate a list of positions that is defined, e.g., on residues
(/chains) to a list of atom positions AND also return the starting
position of each residue (/chain) in the new sub-list of atoms.
:param i : positions in higher level list of residues or chains
:type i : [ int ] or array of int
:param index: atomic starting positions of all residues or chains
:type index: [ int ] or array of int
:param len_i: length of atom index (total number of atoms)
:type len_i: int
:return: (ri, rindex) - atom positions & new index
:rtype: array of int, array of int
"""
## catch invalid indices
i = self.__convert_negative_indices( i, len( index ) )
if len(i)==0:
return i, N0.array( [], int )
if max( i ) >= len( index ) or min( i ) < 0:
raise PDBError("invalid indices")
## last atom of each residue / chain
stop = N0.concatenate( (index[1:], [len_i]) ) - 1
ifrom = N0.take( index, i )
ito = N0.take( stop, i )
## number of atoms in each of the new residues
rangelen = ito - ifrom + 1
rindex = N0.concatenate( ([0], N0.cumsum( rangelen[:-1] )) )
## (1) repeat position of first atom in each residue as often as there
## are atoms in this residue. (2) add a range array so that numbers
## are increasing from each atom to the next but (3) reset the added
## range to 0 at each residue starting position (-delta).
ri = N0.repeat( ifrom, rangelen )
delta = N0.repeat( rindex, rangelen )
ri = ri + N0.arange( len(ri), dtype=N0.Int32 ) - delta
return ri, rindex
[docs] def atom2resMask( self, atomMask ):
"""
Mask (set 0) residues for which all atoms are masked (0) in atomMask.
:param atomMask: list/array of int, 1 x N_atoms
:type atomMask: list/array of int
:return: 1 x N_residues (0||1 )
:rtype: array of int
"""
res_indices = self.atom2resIndices( N0.nonzero( atomMask) )
r = N0.zeros( self.lenResidues() )
N0.put( r, res_indices, 1 )
return r
[docs] def atom2resIndices( self, indices ):
"""
Get list of indices of residues for which any atom is in indices.
Note: in the current implementation, the resulting residues are
returned in their old order, regardless of the order of input
positions.
:param indices: list of atom indices
:type indices: list of int
:return: indices of residues
:rtype: list of int
"""
new_resmap = N0.take( self.resMap(), indices )
resIndex = self.map2index( new_resmap )
return N0.take( new_resmap, resIndex )
[docs] def res2atomMask( self, resMask ):
"""
Convert residue mask to atom mask.
:param resMask: list/array of int, 1 x N_residues
:type resMask: list/array of int
:return: 1 x N_atoms
:rtype: array of int
"""
return self.extendMask( resMask, self.resIndex(), self.lenAtoms() )
def __convert_negative_indices( self, indices, length ):
"""
Replace negative indices by their positive equivalent.
:return: modified copy of indices (or unchanged indices itself)
:rtype: array of int
"""
if len(indices)==0 or min( indices ) >= 0:
return indices
indices = copy.copy( N0.array( indices ) )
negatives = N.flatnonzero( indices < 0 )
a = N0.zeros( len( indices ) )
N0.put( a, negatives, length )
indices += a
#for i in negatives:
#indices[ i ] = length + indices[i] ## substract from end
return indices
[docs] def res2atomIndices( self, indices ):
"""
Convert residue indices to atom indices.
:param indices: list/array of residue indices
:type indices: list/array of int
:return: array of atom positions
:rtype: array of int
"""
if max( indices ) > self.lenResidues() or min( indices ) < 0:
raise PDBError("invalid residue indices")
return self.extendIndex( indices, self.resIndex(), self.lenAtoms() )[0]
[docs] def atom2chainIndices( self, indices, breaks=0 ):
"""
Convert atom indices to chain indices. Each chain is only
returned once.
:param indices: list of atom indices
:type indices: list of int
:param breaks: look for chain breaks in backbone coordinates (def. 0)
:type breaks: 0||1
:return: chains any atom which is in indices
:rtype: list of int
"""
new_map = N0.take( self.chainMap( breaks=breaks ), indices )
index = self.map2index( new_map )
return N0.take( new_map, index )
[docs] def atom2chainMask( self, atomMask, breaks=0 ):
"""
Mask (set to 0) chains for which all atoms are masked (0) in atomMask.
Put another way: Mark all chains that contain any atom that is marked
'1' in atomMask.
:param atomMask: list/array of int, 1 x N_atoms
:type atomMask: list/array of int
:return: 1 x N_residues (0||1 )
:rtype: array of int
"""
indices = self.atom2chainIndices( N0.nonzero( atomMask), breaks=breaks )
r = N0.zeros( self.lenChains(breaks=breaks) )
N0.put( r, indices, 1 )
return r
[docs] def chain2atomMask( self, chainMask, breaks=0 ):
"""
Convert chain mask to atom mask.
:param chainMask: list/array of int, 1 x N_chains
:type chainMask: list/array of int
:param breaks: look for chain breaks in backbone coordinates (def. 0)
:type breaks: 0||1
:return: 1 x N_atoms
:rtype: array of int
"""
return self.extendMask( chainMask, self.chainIndex( breaks=breaks ),
self.lenAtoms() )
[docs] def chain2atomIndices( self, indices, breaks=0 ):
"""
Convert chain indices into atom indices.
:param indices: list/array of chain indices
:type indices: list/array of int
:return: array of atom positions, new chain index
:rtype: array of int
"""
if max( N0.absolute(indices) ) > self.lenChains( breaks=breaks ):
raise PDBError("invalid chain indices")
return self.extendIndex( indices, self.chainIndex( breaks=breaks ),
self.lenAtoms() )[0]
[docs] def res2atomProfile( self, p ):
"""
Get an atom profile where each atom has the value its residue has
in the residue profile.
:param p: name of existing residue profile OR ...
[ any ], list of lenResidues() length
:type p: str
:return: [ any ] OR array, atom profile
:rtype: list or array
"""
if type( p ) is str:
p = self.residues.get( p )
isArray = isinstance( p, N0.arraytype )
resMap = self.resMap()
r = [ p[ resMap[a] ] for a in range( len(resMap) ) ]
if isArray:
r = N0.array( r )
return r
[docs] def atom2resProfile( self, p, f=None ):
"""
Get a residue profile where each residue has the value that its first
atom has in the atom profile.
:param p: name of existing atom profile OR ...
[ any ], list of lenAtoms() length
:type p: str
:param f: function to calculate single residue from many atom values
f( [atom_value1, atom_value2,...] ) -> res_value
(default None, simply take value of first atom in each res.)
:type f: func
:return: [ any ] OR array, residue profile
:rtype: list or array
"""
if type( p ) is str:
p = self.atoms.get( p )
isArray = isinstance( p, N0.arraytype )
if not f:
r = N0.take( p, self.resIndex() )
else:
r = [ f( values ) for values in self.profile2resList( p ) ]
r = N0.array( r )
if not isArray:
return r.tolist()
return r
[docs] def profile2mask(self, profName, cutoff_min=None, cutoff_max=None ):
"""
profile2mask( str_profname, [cutoff_min, cutoff_max=None])
:param cutoff_min: low value cutoff (all values >= cutoff_min)
:type cutoff_min: float
:param cutoff_max: high value cutoff (all values < cutoff_max)
:type cutoff_max: float
:return: mask len( profile(profName) ) x 1||0
:rtype: array
:raise ProfileError: if no profile is found with name profName
"""
if profName in self.atoms:
return self.atoms.profile2mask( profName, cutoff_min, cutoff_max)
return self.residues.profile2mask( profName, cutoff_min, cutoff_max)
[docs] def profile2atomMask( self, profName, cutoff_min=None, cutoff_max=None ):
"""
profile2atomMask( str_profname, [cutoff_min, cutoff_max=None])
Same as :class:`profile2mask`, but converts residue mask to atom mask.
:param cutoff_min: low value cutoff
:type cutoff_min: float
:param cutoff_max: high value cutoff
:type cutoff_max: float
:return: mask N_atoms x 1|0
:rtype: array
:raise ProfileError: if no profile is found with name profName
"""
r = self.profile2mask( profName, cutoff_min, cutoff_max )
if len( r ) == self.lenResidues():
r = self.res2atomMask( r )
return r
[docs] def profile2resList( self, p ):
"""
Group the profile values of each residue's atoms into a separate list.
:param p: name of existing atom profile OR ...
[ any ], list of lenAtoms() length
:return: a list (one entry per residue) of lists (one entry per resatom)
:rtype: [ [ any ] ]
"""
if type( p ) is str:
p = self.atoms.get( p )
rI = self.resIndex() # starting atom of each residue
rE = self.resEndIndex() # ending atom of each residue
r = [ p[ rI[res] : rE[res]+1 ] for res in range( self.lenResidues() ) ]
return r
[docs] def mergeChains( self, c1, id='', segid='', rmOxt=True,
renumberAtoms=False, renumberResidues=True):
"""
Merge two adjacent chains. This merely removes all internal markers
for a chain boundary. Atom content or coordinates are not modified.
PDBModel tracks chain boundaries in an internal _chainIndex. However,
there are cases when this chainIndex needs to be re-built and new
chain boundaries are then infered from jumps in chain- or segment
labelling or residue numbering. mergeChains automatically
re-assigns PDB chain- and segment IDs as well as residue numbering
to prepare for this situation.
:param c1 : first of the two chains to be merged
:type c1 : int
:param id : chain ID of the new chain (default: ID of first chain)
:type id : str
:param segid: ew chain's segid (default: SEGID of first chain)
:type segid: str
:param renumberAtoms: rewrite PDB serial numbering of the adjacent
chain to be consequtive to the last atom of the
first chain (default: False)
:type renumberAtoms: bool
:param renumberResidues: shift PDB residue numbering so that the first
residue of the adjacent chain follows the
previous residue. Other than for atom
numbering, later jumps in residue numbering
are preserved. (default: True)
:type renumberResidues: bool
"""
c1 = self.__convert_negative_indices( [c1], self.lenChains() )[0]
oldI = self.chainIndex()
assert len(oldI) > c1 + 1, 'no adjacent chain to be merged'
## remove chain boundary from chainIndex
self._chainIndex = N0.concatenate( (oldI[:c1+1], oldI[c1+2:] ) )
## starting and ending position of (old) second chain
i_start= oldI[ c1 ]
if len( oldI ) > c1+2:
i_next = oldI[ c1+2 ]
else:
i_next = len( self )
i_scar = oldI[ c1+1 ] ## (old) starting position of second chain
n_atoms= i_next - i_start
## remove trace of PDB TER statement (if any)
self['after_ter'][ i_scar ] = 0
## harmonize chain ID
id = id or self['chain_id'][i_scar-1]
self['chain_id'][ i_start : i_next ] = [ id ] * n_atoms
## harmonize segID
segid = segid or self['segment_id'][i_scar-1]
self['segment_id'][ i_start : i_next ] = [ segid ] * n_atoms
## harmonize PDB residue numbering (by *shifting* current numbers)
if renumberResidues:
first = self['residue_number'][ i_scar-1 ] + 1
delta = first - self['residue_number'][ i_scar ]
profile = self['residue_number'][i_scar:i_next] + delta
self['residue_number'][i_scar:i_next] = profile
## harmonize PDB atom numbering (by *rewriting* current numbers)
## usually though, atoms already have consequtive numbering through the PDB
if renumberAtoms:
n = i_next - i_scar
first = self['serial_number'][ i_scar-1 ] + 1
self['serial_number'][i_scar:i_next] = N0.arange(first, first + n)
## remove OXT and OT2 if requested
if rmOxt:
## overkill: we actually would only need to look into last residue
anames = N0.array( self.atoms['name'][i_start:i_scar] )
i_oxt = N.flatnonzero( N0.logical_or( anames=='OXT', anames=='OT2' ))
if len( i_oxt ) > 0:
self.remove( i_oxt )
[docs] def mergeResidues( self, r1, name='', residue_number=None,
chain_id='', segment_id='',
renumberAtoms=False ):
"""
Merge two adjacent residues. Duplicate atoms are labelled with
alternate codes 'A' (first occurrence) to 'B' or later.
:param r1: first of the two residues to be merged
:type r1: int
:param name: name of the new residue (default: name of first residue)
:type name: str
"""
r1 = self.__convert_negative_indices( [r1], self.lenResidues() )[0]
oldI = self.resIndex()
assert len(oldI) > r1 + 1, 'no adjacent residue to be merged'
## remove residue boundary from residue Index
self._resIndex = N0.concatenate( (oldI[:r1+1], oldI[r1+2:] ) )
## starting and ending position of new fused and (old) second residue
i_start= oldI[ r1 ]
if len( oldI ) > r1+2:
i_next = oldI[ r1+2 ]
else:
i_next = len( self )
i_scar = oldI[ r1+1 ] ## (old) starting position of second residue
n_atoms= i_next - i_start
## move PDB TER statement (if any) to end of fused residue
if i_next < len( self ):
self['after_ter'][ i_next ] = self['after_ter'][ i_scar ]
self['after_ter'][ i_scar ] = 0
## harmonize residue name
name = name or self['residue_name'][i_scar-1]
self['residue_name'][ i_start : i_next ] = [ name ] * n_atoms
## harmonize chain ID
id = chain_id or self['chain_id'][i_scar-1]
self['chain_id'][ i_start : i_next ] = [ id ] * n_atoms
## harmonize segID
segid = segment_id or self['segment_id'][i_scar-1]
self['segment_id'][ i_start : i_next ] = [ segid ] * n_atoms
## harmonize PDB residue numbering
residue_number = residue_number or self['residue_number'][i_scar-1]
self['residue_number'][i_start:i_next] = [residue_number] * n_atoms
## harmonize PDB atom numbering (by *rewriting* current numbers)
if renumberAtoms:
n = i_next - i_scar
first = self['serial_number'][ i_scar-1 ] + 1
self['serial_number'][i_scar:i_next] = N0.arange(first, first + n)
## shift chain boundary (if any) to end of fused residue
## unless it's the end of the model or there is already a boundary there
if i_scar in self.chainIndex():
i = N.flatnonzero( self._chainIndex == i_scar )[0]
if (not i_next in self._chainIndex) and (i_next != len(self)):
self._chainIndex[ i ] = i_next
else:
self._chainIndex = N0.concatenate( self._chainIndex[:i],
self._chainIndex[i+1:] )
## mark duplicate atoms in the 'alternate' field of the new residue
r = Residue( self, r1 )
r.labelDuplicateAtoms()
[docs] def concat( self, *models, **kw ):
"""
Concatenate atoms, coordinates and profiles. source and fileName
are lost, so are profiles that are not available in all models.
model0.concat( model1 [, model2, ..]) -> single PDBModel.
:param models: models to concatenate
:type models: one or more PDBModel instances
:param newRes: treat beginning of second model as new residue (True)
:type newRes: bool
:param newChain: treat beginning of second model as new chain (True)
:type newChain: bool
Note: info records of given models are lost.
"""
newRes = kw.get('newRes', True)
newChain= kw.get('newChain', True)
if len( models ) == 0:
return self
m = models[0]
r = self.__class__()
self.update() ## trigger update if xyz or any profile is None
r.setXyz( N0.concatenate( ( self.getXyz(), m.getXyz() ) ) )
r.setPdbCode( self.pdbCode )
r.atoms = self.atoms.concat( m.atoms, )
r.residues = self.residues.concat( m.residues, )
r.residues.model = r
r.atoms.model = r
append_I = m.resIndex() + self.lenAtoms()
r._resIndex = N0.concatenate((self.resIndex(), append_I ))
append_I = m.chainIndex() +self.lenAtoms()
r._chainIndex =N0.concatenate((self.chainIndex( singleRes=1 ), append_I))
## remove traces of residue or chain breaks
if not newChain:
r.mergeChains( self.lenChains() - 1 )
if not newRes:
r.mergeResidues( self.lenResidues() -1 )
r.info = copy.deepcopy( self.info )
## leads to bug 3611835
## try:
## k = max(self.biounit.keys())+1
## r.residues['biomol'][self.lenResidues():] += k
## r.biounit = self.biounit.append(m.biounit)
## r.biounit.model = r
## except AttributeError:
## pass
return r.concat( *models[1:] )
## def removeChainBreaks( self, chains, breaks=False ):
## """
## Remove chain boundaries *before* given chain indices.
## Example:
## removeChainBreaks( [1,3] ) --> removes the first and third chain
## break but keeps the second, e.g. this joins first and second chain
## but also second and third chain.
## Coordinates are not modified. removeChainBreaks( [0] ) doesn't make
## sense.
## :param chains: [ int ], chain breaks
## """
## if 0 in chains:
## raise PDBError, 'cannot remove chain break 0'
## cindex = self.chainIndex( breaks=breaks )
## ## simple removal of terminal OXT and TER label, make it more robust!
## remove = []
## for i in chains:
## lastatom = cindex[i] - 1
## if self[ lastatom ]['name'] in ['OXT', 'OT2']:
## remove += [ lastatom ]
## self['after_ter'][lastatom+1] = 0
## self.remove( remove )
## ## update chain index
## cindex = self.chainIndex( breaks=breaks )
## mask = N0.ones( len( cindex ) )
## N0.put( mask, chains, 0 )
## self._chainIndex = N0.compress( mask, cindex )
[docs] def take( self, i, rindex=None, cindex=None,
*initArgs, **initKw ):
"""
Extract a PDBModel with a subset of atoms:
take( atomIndices ) -> PDBModel
All other PDBModel methods that extract portions of the model (e.g.
compress, takeChains, takeResidues, keep, clone, remove) are ultimately
using `take()` at their core.
Note:
take employs fast numpy vector mapping methods to re-calculate
the residue and chain index of the result model. The methods generally
work but there is one scenario were this mechanism can fail: If take
is used to create repetitions of residues or chains directly next to
each other, these residues or chains can get accidentally merged. For
this reason, calling methods can optionally pre-calculate and provide
a correct version of the new residue or chain index (which will then
be used as is).
:param i: atomIndices, positions to take in the order to take
:type i: list/array of int
:param rindex: optional residue index for result model after extraction
:type rindex: array of int
:param cindex: optional chain index for result model after extraction
:type cindex: array of int
:param initArgs: any number of additional arguments for constructor of \
result model
:param initKw: any additional keyword arguments for constructure of \
result model
:return: new PDBModel or sub-class
:rtype: PDBModel
"""
r = self.__class__( *initArgs, **initKw )
## the easy part: extract coordinates and atoms
r.xyz = N0.take( self.getXyz(), i )
r.xyzChanged = self.xyzChanged or not N.all(r.xyz == self.xyz)
r.atoms = self.atoms.take( i, r )
## more tricky: rescue residue borders and extract residue profiles
new_resmap = N0.take( self.resMap(), i )
if rindex is not None:
r._resIndex = rindex
else:
## this can fail if residues are repeated in the selection
r._resIndex = self.map2index( new_resmap )
i_res = N0.take( new_resmap, r._resIndex )
r.residues = self.residues.take( i_res, r )
## now the same with chain borders (and later profiles)
if cindex is not None:
r._chainIndex = cindex
else:
## this can fail if chains are repeated next to each other in i
new_chainmap = N0.take( self.chainMap(), i )
r._chainIndex = self.map2index( new_chainmap )
## copy non-sequential infos
r.info = copy.deepcopy( self.info )
r.pdbCode = self.pdbCode
r.fileName = self.fileName
r.source = self.source
## copy the biounit
## try:
## r.biounit = self.biounit.take(i)
## r.biounit.model = r
## except AttributeError:
## pass
return r
[docs] def keep( self, i ):
"""
Replace atoms,coordinates,profiles of this(!) model with sub-set.
(in-place version of N0.take() )
:param i: atom positions to be kept
:type i: list or array of int
"""
if len(i)==self.lenAtoms() and max(i)<2:
EHandler.warning('dont use PDBModel.keep() with mask.', trace=0)
r = self.take( i )
self.xyz = r.xyz
self.xyzChanged = r.xyzChanged
self._resIndex = r._resIndex
self._chainIndex= r._chainIndex
self.atoms = r.atoms
self.residues = r.residues
self.info = r.info
self.__maskCA = self.__maskBB = self.__maskHeavy = None
self.__chainBreaks = None
[docs] def clone( self ):
"""
Clone PDBModel.
:return: PDBModel / subclass, copy of this model,
see comments to numpy.take()
:rtype: PDBModel
"""
return self.take( self.atomRange() )
[docs] def compress( self, mask, *initArgs, **initKw ):
"""
Compress PDBmodel using mask.
compress( mask ) -> PDBModel
:param mask: array( 1 x N_atoms of 1 or 0 ):
* 1 .. keep this atom
:type mask: array
:return: compressed PDBModel using mask
:rtype: PDBModel
"""
return self.take( N0.nonzero( mask ), *initArgs, **initKw )
[docs] def remove( self, what ):
"""
Convenience access to the 3 different remove methods.
The mask used to remove atoms is returned. This mask can be used
to apply the same change to another array of same dimension as
the old(!) xyz and atoms.
:param what: Decription of what to remove:
- function( atom_dict ) -> 1 || 0 (1..remove) OR
- list of int [4, 5, 6, 200, 201..], indices of atoms to remove
- list of int [11111100001101011100..N_atoms], mask (1..remove)
- int, remove atom with this index
:type what: list of int or int
:return: array(1 x N_atoms_old) of 0||1, mask used to compress the
atoms and xyz arrays.
:rtype: array
:raise PDBError: if what is neither of above
"""
mask = N0.logical_not( self.mask( what ) )
self.keep( N0.nonzero(mask) )
return mask
[docs] def takeResidues( self, i ):
"""
Copy the given residues into a new model.
:param i: residue indices
:type i: [ int ]
:return: PDBModel with given residues in given order
:rtype: PDBModel
"""
##i, index = self.res2atomIndices( i )
i, index = self.extendIndex( i, self.resIndex(), self.lenAtoms() )
return self.take( i, rindex=index )
[docs] def takeChains( self, chains, breaks=0, force=0 ):
"""
Get copy of this model with only the given chains.
Note, there is one very special scenario where chain boundaries can get
lost: If breaks=1 (chain positions are based on normal chain boundaries
as well as structure-based chain break detection) AND one or more
chains are extracted several times next to each other, for example
chains=[0, 1, 1, 2], then the repeated chain will be merged. So in
the given example, the new model would have chainLength()==3. This case
is tested for and a PDBIndexError is raised. Override with force=1 and
proceed at your own risk. Which, in this case, simply means you should
re-calculate the chain index after takeChains(). Example::
>>> repeat = model.takeChains( [0,0,0], breaks=1, force=1 )
>>> repeat.chainIndex( force=1, cache=1 )
This works because the new model will have back-jumps in residue
numbering.
:param chains: list of chains, e.g. [0,2] for first and third
:type chains: list of int
:param breaks: split chains at chain breaks (default 0)
:type breaks: 0|1
:param maxDist: (if breaks=1) chain break threshold in Angstrom
:type maxDist: float
:param force: override check for chain repeats (only for breaks==1)
:type force: bool
:return: PDBModel consisting of the given chains in the given order
:rtype: PDBModel
"""
## i, index = self.chain2atomIndices( chains, breaks=breaks )
i, index = self.extendIndex( chains, self.chainIndex( breaks=breaks ),
self.lenAtoms() )
if not breaks or len(chains)==0:
return self.take( i, cindex=index )
## test for repeats:
if not force:
chains = N0.array( chains, int )
delta = N0.concatenate( (chains[1:], [chains[-1]+1]) ) - chains
if not N.all( delta != 0 ):
raise PDBIndexError('Chain boundaries cannot be preserved for repeats.' +\
"Use 'force=1' to override, then re-calculate chainIndex().")
## Give up on repeat treatement:
## more important: the new model's chain index should NOT include breaks
return self.take( i )
[docs] def addChainFromSegid(self, verbose=1):
"""
Takes the last letter of the segment ID and adds it as chain ID.
"""
chain_ids = self.atoms['chain_id']
segment_ids = self.atoms['segment_id']
for i in self.atomRange():
try:
chain_ids[i] = segment_ids[i][-1]
except:
if verbose:
EHandler.warning("addChainId(): Problem with atom "+str(a))
[docs] def addChainId( self, first_id=None, keep_old=0, breaks=0 ):
"""
Assign consecutive chain identifiers A - Z to all atoms.
:param first_id: str (A - Z), first letter instead of 'A'
:type first_id: str
:param keep_old: don't override existing chain IDs (default 0)
:type keep_old: 1|0
:param breaks: consider chain break as start of new chain (default 0)
:type breaks: 1|0
"""
ids = self.atoms['chain_id']
old_chains = []
if keep_old:
old_chains = N0.take( ids, self.resIndex() )
old_chains = mathUtils.nonredundant( old_chains )
if '' in old_chains: old_chains.remove('')
letters = string.ascii_uppercase
if first_id:
letters = letters[ letters.index( first_id ): ]
letters = mathUtils.difference( letters, old_chains )
chainMap = self.chainMap( breaks=breaks )
try:
for i in self.atomRange():
if not (keep_old and ids[i] in old_chains):
ids[i] = letters[ chainMap[i] ]
except IndexError:
raise PDBError('Too many chains, running out of letters.')
[docs] def renumberResidues( self, mask=None, start=1, addChainId=1 ):
"""
Make all residue numbers consecutive and remove any insertion
code letters. Note that a backward jump in residue numbering
(among other things) is interpreted as end of chain by
chainMap() and chainIndex() when a PDB file is loaded.
:param mask: [ 0||1 x N_atoms ] atom mask to apply BEFORE
:type mask: list of int
:param start: starting number (default 1)
:type start: int
:param addChainId: add chain IDs if they are missing
:type addChainId: 1|0
"""
if addChainId:
self.addChainId( keep_old=1, breaks=0 )
i = start
for res in self.resList( mask ):
for a in res:
a['residue_number'] = i
a['insertion_code'] = ''
i += 1
[docs] def atomRange( self ):
"""
>>> m.atomRange() == range( m.lenAtoms() )
:return: integer range for lenght of this model
:rtype: [ int ]
"""
return list(range( self.lenAtoms()))
[docs] def lenAtoms( self, lookup=True ):
"""
Number of atoms in model.
:return: number of atoms
:rtype: int
"""
if not self.xyz is None:
return len( self.xyz )
if len( self.atoms ) > 0:
r = self.atoms.profLength( default=-1 )
if r != -1:
return r
if self.source is None and not lookup:
return 0
return len( self.getXyz() )
[docs] def lenResidues( self ):
"""
Number of residues in model.
:return: total number of residues
:rtype: int
"""
## if self._resIndex is None:
## return 0
return len( self.resIndex() )
[docs] def lenChains( self, breaks=0, maxDist=None, singleRes=0, solvent=0 ):
"""
Number of chains in model.
:param breaks: detect chain breaks from backbone atom distances (def 0)
:type breaks: 0||1
:param maxDist: maximal distance between consequtive residues
[ None ] .. defaults to twice the average distance
:type maxDist: float
:param singleRes: allow chains consisting of single residues (def 0)
:type singleRes: 1||0
:param solvent: also check solvent residues for "chain breaks" (def 0)
:type solvent: 1||0
:return: total number of chains
:rtype: int
"""
try:
return len( self.chainIndex( breaks=breaks, maxDist=maxDist,
singleRes=singleRes, solvent=solvent))
except IndexError: ## empty residue map
return 0
[docs] def resList( self, mask=None ):
"""
Return list of lists of atom pseudo dictionaries per residue,
which allows to iterate over residues and atoms of residues.
:param mask: [ 0||1 x N_atoms ] atom mask to apply BEFORE
:type mask:
:return: a list (one per residue) of lists (one per atom) of dictionaries
::
[ [ CrossView{'name':'N', ' residue_name':'LEU', ..},
CrossView{'name':'CA', 'residue_name':'LEU', ..} ],
[ CrossView{'name':'CA', 'residue_name':'GLY', ..}, .. ]
]
:rtype: [ [ `biskit.ProfileCollection.CrossView` ] ]
"""
ri = N0.concatenate( (self.resIndex( mask=mask ), [self.lenAtoms()] ) )
resLen = len( ri ) - 1
atoms = self.getAtoms()
if mask is not None:
atoms = N0.compress( mask, atoms ).tolist()
return [ atoms[ ri[res] : ri[res+1] ] for res in range( resLen ) ]
[docs] def resModels( self, i=None ):
"""
Creates one new PDBModel for each residue in the parent PDBModel.
:param i: range of residue positions (default: all residues)
:type i: [ int ] or array( int )
:return: list of PDBModels, one for each residue
:rtype: [ `PDBModel` ]
"""
ri = self.resIndex()
re = self.resEndIndex()
if i is None:
i = N0.arange( len(ri) )
## xyz = self.getXyz()
result = [ self.take(N0.arange(ri[x],re[x]+1)) for x in i ]
return result
[docs] def resMapOriginal(self, mask=None):
"""
Generate list to map from any atom to its ORIGINAL(!) PDB
residue number.
:param mask: [00111101011100111...] consider atom: yes or no
len(mask) == N_atoms
:type mask: list of int (1||0)
:return: list all [000111111333344444..] with residue number
for each atom
:rtype: list of int
"""
## by default take all atoms
if mask is None: mask = N0.ones( self.lenAtoms() , N0.Int )
## apply mask to this list
return N0.compress( mask, self.atoms['residue_number'] )
def __inferResIndex( self ):
"""
Determine residue borders.
:return: starting position of each residue
:rtype: list of int
"""
result = []
if self.lenAtoms() == 0:
return N0.array( result, N0.Int )
lastResNumber = -100
lastResName = ''
index = -1
lastAlt = 'x'
lastSegid = -1
res_nrs = self.atoms['residue_number']
res_nam = self.atoms['residue_name']
ins_cod = self.atoms['insertion_code']
seg_id = self.atoms['segment_id']
## create residue numbering for selected atoms
for i in range( self.lenAtoms() ):
if res_nrs[i] != lastResNumber or \
res_nam[i] != lastResName or \
seg_id[i] != lastSegid or \
ins_cod[i] != lastAlt:
## start of new residue
lastResNumber = res_nrs[i]
lastResName = res_nam[i]
lastAlt = ins_cod[i]
lastSegid = seg_id[i]
index += 1
result.append( i )
return N0.array(result, N0.Int)
[docs] def resIndex( self, mask=None, force=0, cache=1 ):
"""
Get the position of the each residue's first atom.
:param force: re-calculate even if cached result is available (def 0)
:type force: 1||0
:param cache: cache the result if new (def 1)
:type cache: 1||0
:param mask: atom mask to apply before (i.e. result indices refer to \
compressed model)
:type mask: list of int (1||0)
:return: index of the first atom of each residue
:rtype: list of int
"""
if self._resIndex is not None and not force and mask is None:
return self._resIndex
r = self.__inferResIndex()
if mask is not None:
m = self.index2map( r, len( mask ) )
m = N0.compress( mask, m )
r = self.map2index( m )
if mask is None and cache:
self._resIndex = r
return r
[docs] def resMap( self, force=0, cache=1 ):
"""
Get list to map from any atom to a continuous residue numbering
(starting with 0). A new residue is assumed to start whenever the
'residue_number' or the 'residue_name' record changes between 2
atoms.
See :class:`resList()` for an example of how to use the residue map.
:param force: recalculate map even if cached one is available (def 0)
:type force: 0||1
:param cache: cache new map (def 1)
:type cache: 0||1
:return: array [00011111122223333..], residue index for each atom
:rtype: list of int
"""
return self.index2map( self.resIndex( force=force,cache=cache ),
self.lenAtoms() )
[docs] def resEndIndex( self ):
"""
Get the position of the each residue's last atom.
:return: index of the last atom of each residue
:rtype: list of int
"""
r = self.resIndex()
return N0.concatenate( (r[1:], [self.lenAtoms()]) ) - 1
def __inferChainIndex( self ):
result = []
if self.lenAtoms() == 0:
return N0.array( result, N0.Int )
lastResidue = -100
lastChainID = None
lastSegID = None
chn_ids = self.atoms['chain_id']
seg_ids = self.atoms['segment_id']
res_nrs = self.atoms['residue_number']
ter_atm = self.atoms['after_ter']
for i in self.atomRange():
if chn_ids[i] != lastChainID or \
seg_ids[i] != lastSegID or \
res_nrs[i] < lastResidue or \
ter_atm[i]:
result.append( i )
lastResidue = res_nrs[i]
lastChainID = chn_ids[i]
lastSegID = seg_ids[i]
return N0.array( result, N0.Int )
def __filterSingleResChains( self, chainindex, ignore_resnumbers=0 ):
"""
Join chains containing single residues with identical name into
one chain. Typically required for waters or ions if they are
separated by TER or picked up by the chain break detection
:param check_resnumbers: (def 1)
:type check_resnumbers: 1||0
"""
# residue name of first atom of each chain
res_names = N0.take( self.atoms['residue_name'], chainindex )
# residue number of first atom of each chain
res_nmbrs = N0.take( self.atoms['residue_number'], chainindex )
# chain id of first atom of each chain
chain_ids = N0.take( self.atoms['chain_id'], chainindex )
#segid of first atom of each chain
seg_ids = N0.take( self.atoms['segment_id'], chainindex )
res_names = N0.concatenate( (['-1'], res_names) )
chain_ids = N0.concatenate( (['-1'], chain_ids) )
seg_ids = N0.concatenate( (['-1'], seg_ids ) )
res_nmbrs = N0.concatenate( ([-100], res_nmbrs) )
delta = res_nmbrs[1:] - res_nmbrs[:-1]
same_name = res_names[1:] == res_names[:-1]
same_chain= chain_ids[1:] == chain_ids[:-1]
same_seg = seg_ids[1:] == seg_ids[:-1]
if ignore_resnumbers:
delta = N0.ones( len(delta), N0.Int )
is_single = (delta==1) \
* same_name * same_chain * same_seg
return N0.compress( N0.logical_not(is_single), chainindex)
[docs] def chainIndex( self, breaks=0, maxDist=None, force=0, cache=0,
singleRes=0, solvent=0 ):
"""
Get indices of first atom of each chain.
:param breaks: split chains at chain breaks (def 0)
:type breaks: 1||0
:param maxDist: (if breaks=1) chain break threshold in Angstrom
:type maxDist: float
:param force: re-analyze residue numbering, chain and segids to
find chain boundaries, use with care! (def 0)
:type force: 1||0
:param cache: cache new index even if it was derrived from
non-default parameters (def 0)
**Note:** a simple m.chainIndex() will always cache
:type cache: 1||0
:param singleRes: allow chains consisting of single residues (def 0)
Otherwise group consecutive residues with identical
name into one chain.
:type singleRes: 1||0
:param solvent: also check solvent residues for "chain breaks"
(default: false)
:type solvent: 1||0
:return: array (1 x N_chains) of int
:rtype: list of int
"""
## fast track
if not (breaks or force or maxDist or solvent) \
and self._chainIndex is not None:
return self._chainIndex
r = self._chainIndex
if r is None or force:
r = self.__inferChainIndex()
if breaks:
break_pos = self.chainBreaks( breaks_only=1, maxDist=maxDist,
solvent=solvent, force=force )
break_pos = break_pos + 1 ## chainBreaks reports last atom of each chain
r = mathUtils.union( break_pos, r )
r.sort()
## filter out chains consisting only of a single residue
if len(r)>0 and not singleRes:
## r = self.__filterSingleResChains( r, ignore_resnumbers=breaks )
r = self.__filterSingleResChains( r, ignore_resnumbers=False)
## cache the result if it has been computed with default parameters
if not(breaks or force or maxDist or singleRes or solvent) or cache:
self._chainIndex = r
return N0.array( r, N0.Int )
[docs] def chainEndIndex( self, breaks=0, solvent=0 ):
"""
Get the position of the each residue's last atom.
:return: index of the last atom of each residue
:rtype: list of int
"""
r = self.chainIndex( breaks=breaks, solvent=solvent )
return N0.concatenate( (r[1:], [self.lenAtoms()]) ) - 1
[docs] def chainMap( self, breaks=0, maxDist=None ):
"""
Get chain index of each atom. A new chain is started between 2 atoms if
the chain_id or segment_id changes, the residue numbering jumps back or
a TER record was found.
:param breaks: split chains at chain breaks (def 0)
:type breaks: 1||0
:param maxDist: (if breaks=1) chain break threshold in Angstrom
:type maxDist: float
:return: array 1 x N_atoms of int, e.g. [000000011111111111122222...]
:rtype: list of int
"""
return self.index2map( self.chainIndex( breaks=breaks, maxDist=maxDist ),
self.lenAtoms() )
[docs] def chainBreaks( self, breaks_only=1, maxDist=None, force=0, solvent=0,
z=6. ):
"""
Identify discontinuities in the molecule's backbone. By default,
breaks are identified from the distribution of distances between the
last backbone atom of a residue and the first backbone atom of the
next residue. The median distance and standard deviation are
determined iteratively and outliers (i.e. breaks) are identified
as any pairs of residues with a distance that is more than z standard
deviations (default 10) above the median. This heuristics can be
overriden by specifiying a hard distance cutoff (maxDist).
:param breaks_only: don't report ends of regular chains (def 1)
:type breaks_only: 1|0
:param maxDist: maximal distance between consequtive residues
[ None ] .. defaults median + z * standard dev.
:type maxDist: float
:param z : z-score for outlier distances between residues (def 6.)
:type z : float
:param solvent: also check selected solvent residues (buggy!) (def 0)
:type solvent: 1||0
:param force: force re-calculation, do not use cached positions (def 0)
:type force: 1||0
:return: atom indices of last atom **before** a probable chain break
:rtype: list of int
"""
if self.__chainBreaks is not None and not force and \
maxDist is None and breaks_only and not solvent and z==6.:
r = self.__chainBreaks
else:
i_bb = N0.nonzero( self.maskBB( solvent=solvent ) )
## outlier detection only works with more than 2,
## hard cutoff works with more than 1
if len(i_bb) < 2:
r = []
else:
bb = self.take( i_bb )
bb_ri= bb.resIndex()
bb_re= bb.resEndIndex()
## xyz = [ bb.xyz[ bb_ri[i] : bb_ri[i+1] ] for i in range(len(bb_ri)-1) ]
## xyz +=[ bb.xyz[ bb_ri[-1]: len(bb) ] ]
## centroid = N0.array([ N0.average( x ) for x in xyz ])
last = N0.take( bb.xyz, bb_re )[:-1]
first= N0.take( bb.xyz, bb_ri )[1:]
## dist = N0.sqrt( N0.sum( N0.power(centroid[:-1]-centroid[1:],2),
## axis=1 ) )
dist = N0.sqrt( N0.sum( N0.power(last-first,2), axis=1 ) )
outliers, median, sd = mathUtils.outliers( dist, z=z, it=5 )
## get distances above mean
cutoff = maxDist or median + z * sd
r = N0.nonzero( N0.greater( dist, cutoff ) )
if len(r) > 0:
## ## can probably be simplified with self.resEndIndex()
## ri = self.resIndex()
## ri_to_e = {}
## for i in range( len(ri)-1 ):
## ri_to_e[ ri[i] ] = ri[ i+1 ]-1
##
## ## map back to the original atom indices
## r = [ ri_to_e[ i_bb[ bb_ri[i] ] ] for i in r ]
## this replacement didn't work out (fails PDBCleaner testcase):
re = self.resEndIndex()
r = [ re[i] for i in r ]
if breaks_only:
ri = self.chainIndex( breaks=0, solvent=solvent )
r = [ x for x in r if not x+1 in ri ]
if maxDist is None and not solvent and z==6.:
self.__chainBreaks = r
return N0.array( r, int )
[docs] def removeRes( self, what ):
"""
Remove all atoms with a certain residue name.
:param what: indices or name(s) of residue to be removed
:type what: str OR [ str ] OR int OR [ int ]
"""
if not isinstance( what, list ) or isinstance( what, N.ndarray):
what = T.toList( what )
if type( what[0] ) is str:
return self.remove( self.maskFrom( 'residue_name', what) )
if type( what[0] ) is int:
return self.remove( self.res2atomIndices( what ) )
return False
[docs] def rms( self, other, mask=None, mask_fit=None, fit=1, n_it=1 ):
"""
Rmsd between two PDBModels.
:param other: other model to compare this one with
:type other: PDBModel
:param mask: atom mask for rmsd calculation
:type mask: list of int
:param mask_fit: atom mask for superposition (default: same as mask)
:type mask_fit: list of int
:param fit: superimpose first (default 1)
:type fit: 1||0
:param n_it: number of fit iterations::
1 - classic single fit (default)
0 - until convergence, kicking out outliers on the way
:type n_it: int
:return: rms in Angstrom
:rtype: float
"""
x, y = self.getXyz(), other.getXyz()
if mask_fit is None: mask_fit = mask
if fit:
fx, fy = x, y
if mask_fit is not None:
fx = N0.compress( mask_fit, x, 0 )
fy = N0.compress( mask_fit, y, 0 )
## find transformation for best match
r, t = rmsFit.match( fx, fy, n_iterations=n_it )[0]
## transform coordinates
y = N0.dot(y, N0.transpose(r)) + t
if mask is not None:
x = N0.compress( mask, x, 0 )
y = N0.compress( mask, y, 0 )
## calculate row distances
d = N0.sqrt(N0.sum(N0.power(x - y, 2), 1))
return N0.sqrt( N0.average(d**2) )
[docs] def fit( self, refModel, mask=None, n_it=1,
z=2, eps_rmsd=0.5, eps_stdv=0.05,
profname='rms_outlier'):
"""
Least-square fit this model onto refMode
:param refModel: reference PDBModel
:type refModel: PDBModel
:param mask: atom mask for superposition
:type mask: list of int (1||0)
:param n_it: number of fit iterations::
1 - classic single fit (default)
0 - until convergence
:type n_it: int
:param z: number of standard deviations for outlier definition
(default 2)
:type z: float
:param eps_rmsd: tolerance in rmsd (default 0.5)
:type eps_rmsd: float
:param eps_stdv: tolerance in standard deviations (default 0.05)
:type eps_stdv: float
:param profname: name of new atom profile containing outlier flag
:type profname: str
:return: PDBModel with transformed coordinates
:rtype: PDBModel
"""
return self.transform(
self.transformation( refModel, mask, n_it, eps_rmsd=eps_rmsd,
eps_stdv=eps_stdv, profname=profname ) )
[docs] def magicFit( self, refModel, mask=None ):
"""
Superimpose this model onto a ref. model with similar atom content.
magicFit( refModel [, mask ] ) -> PDBModel (or subclass )
:param refModel: reference PDBModel
:type refModel: PDBModel
:param mask: atom mask to use for the fit
:type mask: list of int (1||0)
:return: fitted PDBModel or sub-class
:rtype: PDBModel
"""
if mask is not None:
m_this = self.compress( mask )
else:
m_this = self
i_this, i_ref = m_this.compareAtoms( refModel )
m_this = m_this.take( i_this )
m_ref = refModel.take(i_ref )
## find transformation for best match
r,t = rmsFit.findTransformation( m_ref.getXyz(), m_this.getXyz() )
result = self.transform( r, t )
return result
[docs] def structureFit( self, refModel, mask=None ):
"""
Structure-align this model onto a reference model using the external
TM-Align program (which needs to be installed).
structureFit( refModel [, mask] ) -> PDBModel (or subclass)
The result model has additional TM-Align statistics in its info record:
r = m.structureFit( ref )
r.info['tm_score'] -> TM-Align score
the other keys are: 'tm_rmsd', 'tm_len', 'tm_id'
.. seealso:: `biskit.TMAlign`
:param refModel: reference PDBModel
:type refModel: PDBModel
:param mask: atom mask to use for the fit
:type mask: list of int (1||0)
:return: fitted PDBModel or sub-class
:rtype: PDBModel
"""
from biskit.exe import tmalign
if mask is not None:
m_this = self.compress( mask )
else:
m_this = self
tm = tmalign.TMAlign( m_this, refModel )
r = tm.run()
return tm.applyTransformation( self )
[docs] def centered( self, mask=None ):
"""
Get model with centered coordinates.
:param mask: atom mask applied before calculating the center
:type mask: list of int (1||0)
:return: model with centered coordinates
:rtype: PDBModel
"""
r = self.clone()
if mask is None: mask = N0.ones( len(self) )
avg = N0.average( N0.compress( mask, r.getXyz(), 0 ) )
r.setXyz( r.getXyz() - avg )
return r
[docs] def center( self, mask=None ):
"""
Geometric centar of model.
:param mask: atom mask applied before calculating the center
:type mask: list of int (1||0)
:return: xyz coordinates of center
:rtype: (float, float, float)
"""
if mask is None:
return N0.average( self.getXyz() )
return N0.average( N0.compress( mask, self.getXyz(), axis=0 ) )
[docs] def centerOfMass( self ):
"""
Center of mass of PDBModel.
:return: array(Float32)
:rtype: (float, float, float)
"""
M = self.masses()
return mathUtils.wMean( self.getXyz(), M )
[docs] def masses( self ):
"""
Collect the molecular weight of all atoms in PDBModel.
:return: 1-D array with mass of every atom in 1/12 of C12 mass.
:rtype: array of floats
:raise PDBError: if the model contains elements of unknown mass
"""
try:
M = [ molUtils.atomMasses[e] for e in self.atoms['element'] ]
except KeyError as why:
raise PDBError('Cannot find mass for '+str(why))
return N0.array( M )
[docs] def mass( self ):
"""
Molecular weight of PDBModel.
:return: total mass in 1/12 of C12 mass
:rtype: float
:raise PDBError: if the model contains elements of unknown mass
"""
return N0.sum( self.masses() )
[docs] def residusMaximus( self, atomValues, mask=None ):
"""
Take list of value per atom, return list where all atoms of any
residue are set to the highest value of any atom in that residue.
(after applying mask)
:param atomValues: values per atom
:type atomValues: list
:param mask: atom mask
:type mask: list of int (1||0)
:return: array with values set to the maximal intra-residue value
:rtype: array of float
"""
if mask is None:
mask = N0.ones( len(atomValues) )
## eliminate all values that do not belong to the selected atoms
masked = atomValues * mask
result = []
## set all atoms of each residue to uniform value
for res in range( 0, self.lenResidues() ):
## get atom entries for this residue
resAtoms = N0.compress( N0.equal( self.resMap(), res ), masked )
## get maximum value
masterValue = max( resAtoms )
result += resAtoms * 0.0 + masterValue
return N0.array( result, N0.Float32 )
[docs] def argsort( self, cmpfunc=None ):
"""
Prepare sorting atoms within residues according to comparison function.
:param cmpfunc: old style function(m.atoms[i], m.atoms[j]) -> -1, 0, +1
:type cmpfunc: function
:param key: new style sort key function(m.atoms[i]) -> sortable
:type key: function
:return: suggested position of each atom in re-sorted model
( e.g. [2,1,4,6,5,0,..] )
:rtype: list of int
"""
## cmp vanished in python 3.x (but still available in past.builtins)
cmp = lambda x, y: (x > y) - (x < y)
## by default sort alphabetically by atom name
cmpfunc = cmpfunc or ( lambda a1, a2: cmp(a1['name'],a2['name']) )
result = []
pos = 0
## get sort list for each residue
for resAtoms in self.resList():
resIndex = list(range(0, len( resAtoms )))
## convert atom-based function into index-based function
f_cmp = lambda i,j,atoms=resAtoms : cmpfunc( atoms[i], atoms[j] )
## convert py 2.x cmp to py 3.x key method
f_key = functools.cmp_to_key(f_cmp)
## get sortMap for this residue (numbering starts with 0)
resIndex.sort( key=f_key )
## add first residue atom's position in self.atoms to each entry
resIndex = list(map( lambda i, delta=pos: i + delta, resIndex ))
## concatenate to result list
result += resIndex
pos += len( resAtoms )
return result
[docs] def sort( self, sortArg=None ):
"""
Apply a given sort list to the atoms of this model.
:param sortArg: comparison function
:type sortArg: function
:return: copy of this model with re-sorted atoms (see numpy.take() )
:rtype: PDBModel
"""
sortArg = sortArg or self.argsort()
r = self.take( sortArg )
return r
[docs] def unsort( self, sortList ):
"""
Undo a previous sorting on the model itself (no copy).
:param sortList: sort list used for previous sorting.
:type sortList: list of int
:return: the (back)sort list used ( to undo the undo...)
:rtype: list of int
:raise PDBError: if sorting changed atom number
"""
## prepare sort functions (py 2.x / py 3.x)
cmp = lambda x, y: (x > y) - (x < y)
f_key = functools.cmp_to_key(lambda i,j, l=sortList: cmp(l[i],l[j]))
## get new sort list that reverts the old one
backSortList = list(range(0, self.lenAtoms()))
backSortList.sort( key=f_key )
## get re-sorted PDBModel copy
self.keep( backSortList )
return backSortList
[docs] def atomNames(self, start=None, stop=None):
"""
Return a list of atom names from start to stop RESIDUE index
:param start: index of first residue
:type start: int
:param stop: index of last residue
:type stop: int
:return: ['C','CA','CB' .... ]
:rtype: list of str
"""
## By default return list of all atoms
start = start or 0
if stop is None: ## don't use "stop = stop or ..", stop might be 0!
stop = self.lenResidues()-1
## first get atom indexes of residues
i = self.resIndex()[start]
if stop == self.lenResidues()-1:
j = self.lenAtoms()
else:
j = self.resIndex()[stop+1]
return self.atoms['name'][i:j]
def __testDict_and( self, dic, condition ):
"""
Test if **all** key-value pairs of condition are matched in dic
:param condition: {..}, key-value pairs to be matched
:type condition: dictionary
:param dic: {..}, dictionary to be tested
:type dic: dictionary
:return: 1|0, 1 if all key-value pairs of condition are matched in dic
:rtype: 1|0
"""
for k,v in list(condition.items()):
if dic.get( k, None ) not in v:
return 0
return 1
def __testDict_or( self, dic, condition ):
"""
Test if **any** key-value pairs of condition are matched in dic
:param condition: {..}, key-value pairs to be matched
:type condition: dictionary
:param dic: {..}, dictionary to be tested
:type dic: dictionary
:return: 1|0, 1 if any key-value pairs of condition are matched in dic
:rtype: 1|0
"""
for k,v in list(condition.items()):
if dic.get( k, None ) in v:
return 1
return 0
[docs] def filterIndex( self, mode=0, **kw ):
"""
Get atom positions that match a combination of key=values.
E.g. filter( chain_id='A', name=['CA','CB'] ) -> index
:param mode: 0 combine with AND (default), 1 combine with OR
:type mode: 0||1
:param kw: combination of atom dictionary keys and
values/list of values that will be used to filter
:type kw: filter options, see example
:return: sort list
:rtype: list of int
"""
## cache to minimize function lookup
atoms = self.atoms.toDicts()
if mode == 0:
f_test = self.__testDict_and
else:
f_test = self.__testDict_or
for k in kw:
kw[ k ] = T.toList( kw[ k ] )
r = [ i for i in range(self.lenAtoms()) if f_test( atoms[i], kw ) ]
return r
[docs] def filter( self, mode=0, **kw):
"""
Extract atoms that match a combination of key=values.
E.g. filter( chain_id='A', name=['CA','CB'] ) -> PDBModel
:param mode: 0 combine with AND (default), 1 combine with OR
:type mode: 0||1
:param kw: combination of atom dictionary keys and
values/list of values that will be used to filter
:type kw: filter options, see example
:return: filterd PDBModel
:rtype: PDBModel
"""
return self.take( self.filterIndex( mode=mode, **kw ) )
[docs] def equals(self, ref, start=None, stop=None):
"""
Compares the residue and atom sequence in the given range.
Coordinates are not checked, other profiles are not checked.
:param start: index of first residue
:type start: int
:param stop: index of last residue
:type stop: int
:return: [ 1||0, 1||0 ],
first position sequence identity 0|1,
second positio atom identity 0|1
:rtype: list if int
"""
## By default compare all residues
start = start or 0
if stop is None: ## don't use stop = stop or .. stop might be 0!
stop = self.lenResidues()
## set to 1 when identical
seqID, atmID = 0, 0
## compare sequences
if self.sequence()[start:stop] == ref.sequence()[start:stop]:
seqID = 1
## then compare atoms
if self.atomNames(start,stop-1) == ref.atomNames(start,stop-1):
atmID = 1
return [seqID, atmID]
[docs] def compareAtoms( self, ref ):
"""
Get list of atom indices for this and reference model that converts
both into 2 models with identical residue and atom content.
E.g.
>>> m2 = m1.sort() ## m2 has now different atom order
>>> i2, i1 = m2.compareAtoms( m1 )
>>> m1 = m1.take( i1 ); m2 = m2.take( i2 )
>>> m1.atomNames() == m2.atomNames() ## m2 has again same atom order
:return: indices, indices_ref
:rtype: ([int], [int])
"""
## compare sequences
seqMask, seqMask_ref = match2seq.compareModels(self, ref)
## get list of matching RESIDUES
equal = N0.nonzero(seqMask)
equal_ref = N0.nonzero(seqMask_ref)
result, result_ref = [], []
rI = self.resIndex()
rIref = ref.resIndex()
## check that all atoms are equal in matching residues
for i in range(0, len(equal)):
## atom name lists for current residue
aa = self.atomNames( equal[i],equal[i] )
aa_ref = ref.atomNames( equal_ref[i],equal_ref[i] )
## starting atom of current residue
ind = rI[ equal[i] ]
ind_ref = rIref[ equal_ref[i] ]
for j in range( len(aa_ref) ):
try:
##shortcut for mostly equal models
if aa_ref[j] == aa[j]: ## throws IndexError
result += [ind + j]
result_ref += [ind_ref + j]
continue
except IndexError:
pass
try:
pos = aa.index( aa_ref[j] ) ## throws ValueError
result += [ind + pos]
result_ref += [ind_ref + j]
except ValueError:
pass
return result, result_ref
[docs] def unequalAtoms( self, ref, i=None, iref=None ):
"""
Identify atoms that are not matching between two models.
This method returns somewhat of the opposite of compareAtoms().
Not matching means: (1) residue is missing, (2) missing atom within a
residue, (3) atom name is different. Differences in coordinates or
other atom profiles are NOT evaluated and will be ignored.
(not speed-optimized)
:param ref: reference model to compare to
:type ref: PDBModel
:param i: pre-computed positions that are equal in this model \
(first value returned by compareAtoms() )
:type i: array( int ) or [ int ]
:param iref: pre-computed positions that are equal in ref model \
(first value returned by compareAtoms() )
:type i: array( int ) or [ int ]
:return: missmatching atoms of self, missmatching atoms of ref
:rtype: array(int), array(int)
"""
if i is None or iref is None:
i, iref = self.compareAtoms( ref )
mask_self = N0.ones( len(self), int )
mask_ref = N0.ones( len(ref ), int )
N0.put( mask_self, i, 0 )
N0.put( mask_ref, iref, 0 )
return N0.nonzero( mask_self ), N0.nonzero( mask_ref )
[docs] def reportAtoms( self, i=None, n=None ):
"""
:param i: optional list of atom positions to report (default: all)
:type i: [ int ]
:return: formatted string with atom and residue names similar to PDB
:rtype: str
"""
m = self
n = n or len(m)
if i is not None:
m = self.take( i )
s = '%(serial_number)4i %(name)5s %(residue_name)3s %(chain_id)1s '+\
'%(residue_number)3i'
atm = [ s % a for a in m ]
r = '\n'.join( atm[:n] )
if n < len( m ):
r += ' ...'
return r
def __chainFraction( self, chain, ref ):
"""
Look how well a given chain matches a continuous stretch of residues
in ref.
:param chain: chain index
:type chain: int
:param ref: reference PDBModel
:type ref: PDBModel
:return: average relative length of matching chain fragments
:rtype: float
"""
m = self.takeChains([chain])
if len(m) == 0:
return 0
m_cast = m.take( m.compareAtoms( ref )[0] )
f = 1. * len( m_cast )
if f > 0:
f = f / m_cast.lenChains( breaks=1, maxDist=5.)
f = f / len( m )
return f
[docs] def compareChains( self, ref, breaks=0, fractLimit=0.2 ):
"""
Get list of corresponding chain indices for this and reference model.
Use takeChains() to create two models with identical chain content and
order from the result of this function.
:param ref: reference PDBModel
:type ref: PDBModel
:param breaks: look for chain breaks in backbone coordinates
:type breaks: 1||0
:param fractLimit:
:type fractLimit: float
:return: chainIndices, chainIndices_ref
:rtype: ([int], [int])
"""
i, i_ref = self.compareAtoms( ref )
c0 = self.atom2chainIndices( i, breaks=breaks )
c_r = ref.atom2chainIndices( i_ref, breaks=breaks )
## dirty hack to throw out single matching residues
c0 = [ c for c in c0 \
if self.__chainFraction( c, ref ) > fractLimit ]
c_r= [ c for c in c_r \
if ref.__chainFraction( c, self ) > fractLimit ]
return c0, c_r
[docs] def biomodel(self, assembly = 0):
"""
Return the 'biologically relevant assembly' of this model
according to the information in the PDB's BIOMT record (captured in
info['BIOMT']).
This removes redundant chains and performs symmetry operations to
complete multimeric structures. Some PDBs define several alternative
biological units: usually (0) the author-defined one and (1)
software-defined -- see :class:`lenBiounits`.
Note: The BIOMT data are currently not updated during take/compress
calls which may change chain indices and content. This method is
therefore best run on an original PDB record before any other
modifications are performed.
:param assembly: assembly index (default: 0 .. author-determined unit)
:type assembly: int
:return: PDBModel; biologically relevant assembly
"""
try:
biounit = BU.BioUnit(self, self.info['BIOMT'])
r = biounit.makeMultimer(assembly)
except AttributeError:
r = self
return r
[docs] def lenBiounits (self):
"""
Number of biological assemblies defined in PDB BIOMT record, if any.
:return: number of alternative biological assemblies defined in
PDB header
:rtype: int
"""
try:
biounit = BU.BioUnit(self, self.info['BIOMT'])
r = len(list(biounit.keys()))
except AttributeError:
r = 0
return r
[docs] def atomkey( self, compress=True ):
"""
Create a string key encoding the atom content of this model independent
of the order in which atoms appear within residues. Atom names are
simply sorted alphabetically within residues and then concatenated.
:param compress: compress key with zlib (default: true)
:type compress: bool
:return: key formed from sorted atom content of model
:rtype: str
"""
import zlib
rindex = N0.concatenate( (self.resIndex(), [len(self)] ) )
r = ''
if len(rindex) == 0:
return r
anames = self.atoms['name'] ## cache for faster access
for i in range(len(rindex)-1):
a = anames[rindex[i]:rindex[i+1]]
a.sort()
a = ''.join(a)
r = r + a
if compress:
return zlib.compress(r)
return r
#############
## TESTING
#############
import biskit.test as BT
class _TestData(object):
MODEL = None
class Test(BT.BiskitTest):
"""Test class """
#: load test PDB once into class rather than 3 times into every instance
## for some reason doesn't actually work
MODEL = None
def prepare(self):
import tempfile
## loading output file from X-plor
self.MODEL = _TestData.MODEL or B.PDBModel( T.testRoot()+'/com/1BGS.pdb')
_TestData.MODEL = self.MODEL
self.m = self.MODEL
self.fout_pdb = tempfile.mktemp( '_test1.pdb' )
self.fout1 = tempfile.mktemp( '_test1.model' )
self.fout2 = tempfile.mktemp( '_test2.model' )
def cleanUp( self ):
T.tryRemove( self.fout1 )
T.tryRemove( self.fout2 )
T.tryRemove( self.fout_pdb )
def test_removeRes(self):
"""PDBModel.removeRes test"""
t = time.time()
self._m = self.m.clone()
self._m.removeRes(['TIP3', 'HOH'])
self.assertEqual( len(self._m), 1968)
self.assertAlmostEqual( self._m.mass(), 21325.90004, 3 )
if self.local: print("removeRes: ", time.time() - t)
def test_chainMethods(self):
"""PDBModel chain methods test"""
## X-plor doesn't write chainIds, so during the simulation
## we store them in the last letter of the segId. Here we
## restore the chainId.
self._m = self.m.clone()
self._m.addChainFromSegid()
## start positions of all chains
chainIdx = self._m.chainIndex().tolist()
## print some chain info
if self.local:
print('The molecule consists of %i chains'% self.m.lenChains())
print('\tChainId \tFirst atom')
for i in chainIdx:
print('\t%s \t\t%i'%(self._m.atoms['chain_id'][i], int(i)))
## iterate over all chains
for c in range( 0, len( chainIdx ) ):
if self.local:
print("chain ", c, " starts with ", end=' ')
print(self._m.atoms['residue_name'][ chainIdx[c] ], end=' ')
print(" and has sequence: ")
## mask out atoms of all other chains
chainMask = N0.equal( self._m.chainMap( breaks=1 ), c )
if self.local:
print(self._m.sequence( chainMask ))
self.assertEqual( self._m.lenChains(), 4)
def test_sorting(self):
"""PDBModel sorting test"""
if self.local:
print("sorting atoms alphabetically...")
m2 = self.m.compress( self.m.maskProtein() )
sort = m2.argsort()
m2 = m2.sort( sort )
self.assertAlmostEqual( N0.sum( m2.centerOfMass() ), 23.1032009125,2)
def test_chainBreaks(self):
"""PDBModel chain break handling and writing test"""
self.m4 = B.PDBModel( T.testRoot()+'/com/1BGS_original.pdb')
self.assertEqual( self.m4.lenChains(), 9 )
self.assertEqual( self.m4.lenChains( breaks=1 ), 9 )
self.assertEqual( self.m4.lenChains( breaks=1, singleRes=1, solvent=1),
9 )
self.m4.writePdb( self.fout_pdb, ter=2 )
def test_chainBreaks2(self):
"""PDBModel more complicated chain break detection"""
self.m5 = B.PDBModel( T.testRoot()+'/pdbclean/foldx_citche.pdb')
breaks = self.m5.chainBreaks()
self.assertEqual( len(breaks), 2 )
## limitation of the method: same model but now with capping residues
## filling the gap
self.m6 = B.PDBModel( T.testRoot()+'/pdbclean/citche_capped.pdb')
breaks = self.m6.chainBreaks()
self.assertEqual( len(breaks), 1 )
def test_chainSingleResidues( self ):
"""PDBModel single residue chain test"""
self.m5 = B.PDBModel( T.testRoot() + '/amber/1HPT_0.pdb' )
self.assertTrue( self.m5.lenChains() < 10, 'single residue chains' )
def test_rename(self):
"""PDBModel renameAmberRes tests"""
self.m3 = B.PDBModel( T.testRoot()+'/amber/leap/1HPT_dry.pdb')
n_cyx = self.m3.atoms['residue_name'].count('CYX')
n_hid = self.m3.atoms['residue_name'].count('HID')
n_hip = self.m3.atoms['residue_name'].count('HIP')
n_hie = self.m3.atoms['residue_name'].count('HIE')
n_hix = n_hid + n_hie + n_hip
self.m3.renameAmberRes()
self.assertEqual(n_cyx, self.m3.atoms['residue_name'].count('CYS'))
self.assertEqual(n_hix, self.m3.atoms['residue_name'].count('HIS'))
def test_xplor2amber(self):
"""PDBModel xplor2amber test"""
## test is simply back-converting a PDB comming from 'ambpdb -aatm'
## a better test would be starting from an actually xplor-generated PDB
m1 = B.PDBModel( T.testRoot() +'/amber/1HPT_0dry.pdb' )
m1.xplor2amber( aatm=True )
m2 = B.PDBModel( T.testRoot() + '/amber/1HPT_0dry_amberformat.pdb' )
self.assertEqual( m1.atomNames(), m2.atomNames() )
def test_report(self):
"""PDBModel report&plot test"""
self.report_output = self.m.report( prnt=self.local,
plot=(self.local or self.VERBOSITY > 2) )
def test_sourceHandling(self):
"""PDBModel source / disconnection tests"""
self._m = self.m.clone()
anames = self.m.atoms['name']
xyz0 = self.m.getXyz()[0]
self._m.slim()
## _m2 uses _m1 as source
self._m2 = B.PDBModel( self._m )
l1 = self._m2.atoms['name']
self.assertEqual( l1, anames )
## remove unchanged profiles and coordinates
self._m2.slim()
## fetch them again from source (of source)
l2 = self._m2.atoms['name']
self.assertEqual( l2, anames )
## disconnect _m from PDB file source
self._m.saveAs( self.fout2 )
self._m2.slim()
self._m.slim()
## this should now trigger the reloading of fout2
self.assertEqual( self._m2.atoms['name'], anames )
self.assertTrue( N.all( self._m2.getXyz()[0] == xyz0) )
## after disconnection, slim() should not have any effect
self._m2.disconnect()
self._m2.slim()
self.assertTrue( self._m2.atoms.profiles['name'] is not None )
def test_mergeChains( self ):
"""PDBModel.mergeChains test"""
m = self.m.takeChains( [0] )
res_numbers = m['residue_number']
atm_numbers = m['serial_number']
chain_ids = m['chain_id']
m1 = m.takeResidues( list(range(3)) )
m2 = m.takeResidues( list(range(3, m.lenResidues())) )
m2.renumberResidues()
m2['chain_id'] = len(m2) * ['X']
self.m1 = m1
self.m2 = m2
self.r = m1.concat( m2 )
r = self.r
self.assertTrue( r.lenChains() == m.lenChains() + 1 )
r.mergeChains( 0 )
self.r = r
self.assertTrue( r.lenChains() == m.lenChains() )
self.assertTrue( N.all( N0.array(r['chain_id']) == chain_ids ) )
self.assertTrue( N.all( N0.array(r['residue_number']) == res_numbers ) )
def test_mergeResidues( self ):
"""PDBModel.mergeResidues test"""
m = self.m.clone()
gg_position = m.sequence().find( 'GG' )
len_r = m.lenResidues()
len_a = m.lenAtoms()
r_gly = Residue( m, gg_position )
m.mergeResidues( gg_position )
r_gly.reset()
self.assertEqual( m.lenResidues(), len_r - 1 )
self.assertEqual( m.lenAtoms(), len_a )
self.assertEqual( len( r_gly ), 2 * 5 )
def test_getset(self):
"""PDBModel.__get/set__ test"""
self.assertEqual( self.m[10]['occupancy'], 1.0 )
self.assertEqual( self.m['chain_id', 'changed'], 0 )
self.assertEqual( len(self.m['chain_id'] ), len( self.m ) )
self.assertTrue( type( self.m['date']) is str )
self.m['resname'] = self.m.atom2resProfile('residue_name')
self.assertEqual( len( self.m['resname'] ), self.m.lenResidues() )
self.m.info['tested'] = False
self.m['tested'] = True
self.assertTrue( self.m.info['tested'] )
self.m['serial_number', 'default'] = 1
self.assertTrue( self.m.atoms['serial_number','default'] == 1 )
self.m['resname', 'changed'] = 0
self.assertFalse( self.m.residues.isChanged( 'resname' ) )
self.m['index'] = list(range( len( self.m)))
self.assertTrue( self.m['index'][-1] == len( self.m ) - 1 )
def test_slice(self):
"""PDBModel.__slice__ test"""
self.assertTrue( len( self.m[0:100:20] ) == 5 )
def test_various(self):
"""PDBModel various tests"""
m = PDBModel()
self.assertEqual( type( m.getXyz() ), N.ndarray )
def test_compareChains(self):
"""PDBModel.compareChains test"""
m = self.m.clone()
m2 = PDBModel()
## extract first 100 atoms of each chain
for i in range(m.lenChains()):
m2 = m2.concat( m.takeChains([i]).take( list(range(100)) ) )
m3 = m2.takeChains( [2,3,0,1] ) ## re-order chains
i, iref = m2.compareChains( m3 )
class TestExe( BT.BiskitTest ):
"""PDBModel tests that rely on external applications"""
TAGS = [BT.EXE]
def test_structureFit( self ):
"""PDBModel.structureFit test"""
m = T.load( T.testRoot( 'tmalign/1huy_citrine.model' ) )
ref = T.load( T.testRoot( 'tmalign/1zgp_dsred_dimer.model' ) )
ref = ref.takeChains( [0] )
r = m.structureFit( ref )
diff = r.centerOfMass() - ref.centerOfMass()
if self.local:
print('center of mass deviation: \n%r' % diff)
self.assertEqual( r.info['tm_rmsd'], 1.76 )
self.assertTrue( N.all( N0.absolute(diff) < 1 ),
'superposition failed: %r' % diff)
def clock( s, ns=globals() ): ## pragma: no cover
import cProfile
locals().update( ns )
r = cProfile.run( s, 'report.out' )
## Analyzing
import pstats
p = pstats.Stats('report.out')
p.strip_dirs()
## long steps and methods calling them
p.sort_stats('cumulative').print_stats(20)
p.print_callers( 20 )
return r
if __name__ == '__main__':
BT.localTest()