## numpy-oldnumeric calls replaced by custom script; 09/06/2016
## Automatically adapted for numpy-oldnumeric Mar 26, 2007 by alter_code1.py
##
## Biskit, a toolkit for the manipulation of macromolecular structures
## Copyright (C) 2004-2016 Raik Gruenberg & Johan Leckner
##
## This program is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 3 of the
## License, or any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You find a copy of the GNU General Public License in the file
## license.txt along with this program; if not, write to the Free
## Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
##
##
"""
Utilities for handling structures and sequences
"""
## see: https://www.python.org/dev/peps/pep-0366/
## allow relative imports when calling module as main script for testing
if __name__ == "__main__" and __package__ is None:
import biskit
__package__ = "biskit"
from biskit import EHandler
from biskit import tools as t
from biskit.core import oldnumeric as N0
import copy
import types
[docs]class MolUtilError( Exception ):
pass
#: translate PDB amino acid names to single letter code
aaDicStandard =\
{'asp':'D', 'glu':'E', 'lys':'K', 'his':'H', 'arg':'R',
'gln':'Q', 'asn':'N', 'ser':'S', 'asx':'B', 'glx':'Z',
'phe':'F', 'trp':'W', 'tyr':'Y',
'gly':'G', 'ala':'A', 'ile':'I', 'leu':'L', 'cys':'C',
'met':'M', 'thr':'T', 'val':'V', 'pro':'P' }
#: same for nucleic acids (incomplete)
nsDicStandard = {'a':'a', 'g':'g', 'c':'c', 't':'t', 'u':'u',
'a3':'a', 'g3':'g', 'c3':'c', 't3':'t', 'u3':'u',
'a5':'a', 'g5':'g', 'c5':'c', 't5':'t', 'u5':'u',
'da':'a','dg':'g','dc':'c','dt':'t',
'da3':'a','dg3':'g','dc3':'c','dt3':'t',
'da5':'a','dg5':'g','dc5':'c','dt5':'t'
}
#: extend aaDicStandard with non-standard residues
aaDic = copy.copy( aaDicStandard )
aaDic.update( {'cyx':'C', 'hid':'H', 'hie':'H', 'hip':'H',
'unk':'X', 'ace':'X', 'nme':'X'} )#, 'ndp':'X' } )
#: extend nsDicStandard with non-standard residues
nsDic = copy.copy( nsDicStandard )
nsDic.update( {'atp':'a', 'gtp':'g', 'ctp':'c', 'ttp':'t', 'utp':'u',
'adp':'a', 'gdp':'g', 'cdp':'c', 'tdp':'t', 'udp':'u',
'amp':'a', 'gmp':'g',
'fad':'f', 'fmp':'f',
'nad':'n',
} )
#: translate common hetero residues to pseudo single letter code
xxDic = {'tip3':'~', 'hoh':'~', 'wat':'~', 'cl-':'-', 'na+':'+', 'ca':'+',
'ndp':'X', 'nap':'X'}
#: translate standard PDB amino and nucleic acid names to single letter code
resDicStandard = copy.copy( aaDicStandard )
resDicStandard.update( nsDicStandard )
#: extend resDicStandard with common non-standard names
resDic = copy.copy( aaDic )
resDic.update( nsDic )
resDic.update( xxDic )
## map non-standard amino acid names to closest standard amino acid
##
## Data from: http://www.ccp4.ac.uk/html/lib_list.html#peptide_synonyms
## More info at: http://xray.bmc.uu.se/hicup/XXX/ where XXX is the residue code
##
## NOT ADDED:
## SAR SARCOSINE
## PCA 5-pyrrolidone-2-carboxylic_acid
## INI Amidinated_lysine_with_methyl_isonicotinimida
## SAH S-ADENOSYL-L-HOMOCYSTEINE
## SAM S-ADENOSYLMETHIONINE
## LLP LYSINE-PYRIDOXAL-5*-PHOSPHATE
## ACE acetyl
## FOR Formyl
## BOC TERT-BUTYLOXYCARBONYL GROUP
## MLE N-METHYLLEUCINE
## MVA N-METHYLVALINE
## IVA Isovaleric_acid
## STA STATINE
## ETA ethanolamine
## TFA TRIFLUOROACETYL GROUP
## ANI 4-TRIFLUOROMEHYLANILINE
## MPR BETA-MERCAPTOPROPIONATE
## DAM N-METHYL-ALPHA-BETA-DEHYDROALANINE
## ACB 2-AMINO-3-CARBONYLBUTANOIC ACID
## ADD 2,6,8-TRIMETHYL-3-AMINO-9-BENZYL-9-M
## CXM N-CARBOXYMETHIONINE
## DIP DIPENTYLAMINE
## BAL BETA-ALANINE
nonStandardAA={ 'UNK':'ALA', 'ABA':'ALA', 'B2A':'ALA',
'ORN':'ARG',
'ASX':'ASP',
'CSH':'CYS', 'OCS':'CYS', 'CSO':'CYS',
'GLX':'GLU', 'CGU':'GLU', 'ILG':'GLU',
'B2I':'ILE',
'BLE':'LEU',
'KCX':'LYS', 'BLY':'LYS',
'MSE':'MET',
'B1F':'PHE', 'B2F':'PHE',
'HYP':'PRO', '5HP':'PRO',
'SEP':'SER',
'TYS':'TYR',
'B2V':'B2V',
'HIE':'HIS', 'HID':'HIS', 'HIP':'HIS',
'CYX':'CYS' }
#: heavy atoms of amino acids in standard order, OXT applies to C term only
aaAtoms={'GLY':['N','CA','C','O', 'OXT' ],
'ALA':['N','CA','C','O', 'CB', 'OXT'],
'VAL':['N','CA','C','O','CB','CG1','CG2', 'OXT'],
'LEU':['N','CA','C','O','CB','CG','CD1','CD2', 'OXT'],
'ILE':['N','CA','C','O','CB','CG1','CG2','CD1', 'OXT'],
'MET':['N','CA','C','O','CB','CG','SD','CE', 'OXT'],
'PRO':['N','CA','C','O','CB','CG','CD', 'OXT'],
'PHE':['N','CA','C','O','CB','CG','CD1','CD2','CE1','CE2','CZ',
'OXT'],
'TRP':['N','CA','C','O','CB','CG','CD1','CD2','NE1','CE2','CE3',
'CZ2','CZ3','CH2', 'OXT'],
'SER':['N','CA','C','O','CB','OG', 'OXT'],
'THR':['N','CA','C','O','CB','OG1','CG2', 'OXT'],
'ASN':['N','CA','C','O','CB','CG','OD1','ND2', 'OXT'],
'GLN':['N','CA','C','O','CB','CG','CD','OE1','NE2', 'OXT'],
'TYR':['N','CA','C','O','CB','CG','CD1','CD2','CE1','CE2','CZ','OH',
'OXT'],
'CYS':['N','CA','C','O','CB','SG', 'OXT'],
'LYS':['N','CA','C','O','CB','CG','CD','CE','NZ', 'OXT'],
'ARG':['N','CA','C','O','CB','CG','CD','NE','CZ','NH1','NH2', 'OXT'],
'HIS':['N','CA','C','O','CB','CG','ND1','CD2','CE1','NE2', 'OXT'],
'ASP':['N','CA','C','O','CB','CG','OD1','OD2', 'OXT'],
'GLU':['N','CA','C','O','CB','CG','CD','OE1','OE2', 'OXT']}
#: dictionary of elements
elements = { 'carbon':['C', 'CD2', 'CZ2', 'CB', 'CA', 'CG', 'CE', 'CD', 'CZ',
'CH2', 'CE3', 'CD1', 'CE1', 'CZ3', 'CG1', 'CG2', 'CE2'],
'nitrogen':['NZ', 'ND2', 'NH1', 'NH2', 'ND1', 'NE1', 'NE2',
'NE', 'N'],
'oxygen':['OG', 'OE2', 'OXT', 'OD1', 'OE1', 'OH', 'OG1', 'OD2',
'O'],
'suplphur':['SG', 'SD'],
'clustering_BDZ':['C','CB','CD','CD1','CD2','CZ','CZ2','CZ3',
'ND1','ND2','NZ','OD1','OD2','SD' ],
'clustering_ABDZ':['C','CA','CB','CD','CD1','CD2','CZ','CZ2',
'CZ3',
'ND1','ND2','NZ','OD1','OD2','SD' ],
'clustering_G':['C','CG','CG1','OG','OG1','SG' ],
'clustering_B':['C','CB'],
'clustering_AG':['C','CA','CG','CG1','OG','OG1','SG' ],
'clustering_AGE':['C','CA','CG','CG1','OG','OG1','SG','NE','OE1',
'CE1','CE','CE3' ],
'clustering_BD':['C','CB','CD','CD1','OD1','SD' ],
'clustering_ABD':['C','CA','CB','CD','CD1','OD1','SD' ],
'clustering_AB':['C','CA','CB']}
#: number of attached H for each heavy atom in each amino acid
aaAtomsH={'XXX':{'N':1,'CA':1,'C':0,'O':0,'OXT':0},
'GLY':{},
'ALA':{'CB':3},
'VAL':{'CB':0,'CG1':3,'CG2':3},
'LEU':{'CB':2,'CG':0,'CD1':3,'CD2':3},
'ILE':{'CB':0,'CG1':1,'CG2':3,'CD1':3},
'MET':{'CB':2,'CG':2,'SD':0,'CE':3 },
'PRO':{'N':0,'CB':2,'CG':2,'CD':2},
'PHE':{'CB':2,'CG':0,'CD1':1,'CD2':1,'CE1':1,'CE2':1,'CZ':1},
'TRP':{'CB':2,'CG':0,'CD1':1,'CD2':0,'NE1':1,'CE2':0,'CE3':1,
'CZ2':1,'CZ3':1,'CH2':1},
'SER':{'CB':2,'OG':1},
'THR':{'CB':0,'OG1':1,'CG2':3},
'ASN':{'CB':2,'CG':0,'OD1':0,'ND2':2},
'GLN':{'CB':2,'CG':2,'CD':0,'OE1':0,'NE2':2},
'TYR':{'CB':2,'CG':0,'CD1':1,'CD2':1,'CE1':1,'CE2':1,'CZ':0,'OH':1},
'CYS':{'CB':2,'SG':1},
'LYS':{'CB':2,'CG':2,'CD':2,'CE':2,'NZ':3},
'ARG':{'CB':2,'CG':2,'CD':2,'NE':1,'CZ':0,'NH1':2,'NH2':2},
'HIS':{'CB':2,'CG':0,'ND1':1,'CD2':1,'CE1':1,'NE2':0},
'ASP':{'CB':2,'CG':0,'OD1':0,'OD2':0},
'GLU':{'CB':2,'CG':2,'CD':0,'OE1':0,'OE2':0} }
for aa in aaAtomsH:
default = copy.copy( aaAtomsH['XXX'] )
default.update( aaAtomsH[aa] )
aaAtomsH[aa] = default
## work in progress...heavy atoms of nucleic acids in standard order
nsAtoms={
'ATP':['PG', 'O1G', 'O2G', 'O3G', 'PB', 'O1B', 'O2B', 'O3B', 'PA', 'O1A',
'O2A', 'O3A', 'O5*', 'C5*', 'C4*', 'O4*', 'C3*', 'O3*', 'C2*',
'O2*', 'C1*', 'N9', 'C8', 'N7', 'C5', 'C6', 'N6', 'N1', 'C2', 'N3',
'C4'],
'GTP':['PG', 'O1G', 'O2G', 'O3G', 'PB', 'O1B', 'O2B', 'O3B', 'PA', 'O1A',
'O2A', 'O3A', 'O5*', 'C5*', 'C4*', 'O4*', 'C3*', 'O3*', 'C2*',
'O2*', 'C1*', 'N9', 'C8', 'N7', 'C5', 'C6', 'O6', 'N1', 'C2', 'N2',
'N3', 'C4'],
'DA': ['P', 'O1P', 'O2P', "O5'", "C5'", "H5'1", "H5'2", "C4'", "H4'", "O4'",
"C1'", "H1'", 'N9', 'C8', 'H8', 'N7', 'C5', 'C6', 'N6', 'H61', 'H62',
'N1', 'C2', 'H2', 'N3', 'C4', "C3'", "H3'", "C2'", "H2'1", "H2'2",
"O3'"],
'DC': ['P', 'O1P', 'O2P', "O5'", "C5'", "H5'1", "H5'2", "C4'", "H4'", "O4'",
"C1'", "H1'", 'N1', 'C6', 'H6', 'C5', 'H5', 'C4', 'N4', 'H41', 'H42',
'N3', 'C2', 'O2', "C3'", "H3'", "C2'", "H2'1", "H2'2", "O3'"],
'DG': ['P', 'O1P', 'O2P', "O5'", "C5'", "H5'1", "H5'2", "C4'", "H4'", "O4'",
"C1'", "H1'", 'N9', 'C8', 'H8', 'N7', 'C5', 'C6', 'O6', 'N1', 'H1',
'C2', 'N2', 'H21', 'H22', 'N3', 'C4', "C3'", "H3'", "C2'", "H2'1",
"H2'2", "O3'"],
'DT': ['P', 'O1P', 'O2P', "O5'", "C5'", "H5'1", "H5'2", "C4'", "H4'", "O4'",
"C1'", "H1'", 'N1', 'C6', 'H6', 'C5', 'C7', 'H71', 'H72', 'H73',
'C4', 'O4', 'N3', 'H3', 'C2', 'O2', "C3'", "H3'", "C2'", "H2'1",
"H2'2", "O3'"],
'RA': ['P', 'O1P', 'O2P', "O5'", "C5'", "H5'1", "H5'2", "C4'", "H4'", "O4'",
"C1'", "H1'", 'N9', 'C8', 'H8', 'N7', 'C5', 'C6', 'N6', 'H61', 'H62',
'N1', 'C2', 'H2', 'N3', 'C4', "C3'", "H3'", "C2'", "H2'1", "O2'",
"HO'2", "O3'"],
'RC': ['P', 'O1P', 'O2P', "O5'", "C5'", "H5'1", "H5'2", "C4'", "H4'", "O4'",
"C1'", "H1'", 'N1', 'C6', 'H6', 'C5', 'H5', 'C4', 'N4', 'H41', 'H42',
'N3', 'C2', 'O2', "C3'", "H3'", "C2'", "H2'1", "O2'", "HO'2", "O3'"],
'RG': ['P', 'O1P', 'O2P', "O5'", "C5'", "H5'1", "H5'2", "C4'", "H4'", "O4'",
"C1'", "H1'", 'N9', 'C8', 'H8', 'N7', 'C5', 'C6', 'O6', 'N1', 'H1',
'C2', 'N2', 'H21', 'H22', 'N3', 'C4', "C3'", "H3'", "C2'", "H2'1",
"O2'", "HO'2", "O3'"],
'RU': ['P', 'O1P', 'O2P', "O5'", "C5'", "H5'1", "H5'2", "C4'", "H4'", "O4'",
"C1'", "H1'", 'N1', 'C6', 'H6', 'C5', 'H5', 'C4', 'O4', 'N3', 'H3',
'C2', 'O2', "C3'", "H3'", "C2'", "H2'1", "O2'", "HO'2", "O3'"],
'MG' :['MG'],
'NDP':['P1', 'O1', 'O2', 'O5R', 'C5R', 'O1R', 'C4R', 'C3R', 'O3R', 'C2R',
'O2R', 'C1R', 'N9', 'C8', 'N7', 'C5', 'C6', 'N6', 'N1', 'C2',
'N3', 'C4', 'O10', 'P2', 'O11', 'O21', 'O51R', 'C51R', 'O11R',
'C41R', 'C31R', 'O31R', 'C21R', 'O21R', 'C11R', 'N11', 'C61',
'C51', 'C71', 'O71', 'N71', 'C41', 'C31', 'C21', 'P3', 'O3',
'O4', 'O5', 'H8', 'H9', 'H7', 'H6', 'H1', 'H5', 'H4', 'H13',
'H11', 'H12', 'H10', 'H18', 'H19', 'H17', 'H16', 'H3', 'H15',
'H2', 'H14', 'H23', 'H24', 'H25', 'H22', 'H26', 'H21', 'H20'] }
for res in ['DA','DC','DG','DT','RA','RC','RG','RU']:
#delete H
nsAtoms[ res ] = [ a for a in nsAtoms[res] if a[0] != 'H' ]
# create 3' and 5' versions
nsAtoms[ res + '3' ] = nsAtoms[res] + ['H3T']
nsAtoms[ res + '5' ] = ['H5T'] + nsAtoms[res][3:]
nsAtoms['NAP'] = nsAtoms['NDP'].remove('H26')
#: map AA and NS and some other residue names to single letter code
resDic = copy.copy( aaDic )
resDic.update( nsDicStandard )
#: map AA and NS residue names to list of allowed heavy atoms
atomDic = copy.copy( aaAtoms )
atomDic.update( nsAtoms )
#: some common synonyms of atom names
atomSynonyms = { "O'":'O', 'OT1':'O', "O''":'OXT', 'OT2':'OXT',
'O1':'O', 'O2':'OXT',
'CD':'CD1'}
hydrogenSynonyms = { 'H':'HN', '1HE2':'HE21', '2HE2':'HE22',
'1HH1':'HH11', '2HH1':'HH12', '1HH2':'HH21',
'2HH2':'HH22', '1HD2':'HD21', '2HD2':'HD22' }
###################
## Hydrogen bond
##
hbonds={ 'donors': {'GLY':['H','H1','H2','H3'],
'ALA':['H','H1','H2','H3'],
'VAL':['H','H1','H2','H3'],
'LEU':['H','H1','H2','H3'],
'ILE':['H','H1','H2','H3'],
'MET':['H','H1','H2','H3'],
'PRO':['H','H1','H2','H3'],
'PHE':['H','H1','H2','H3'],
'TRP':['H','H1','H2','H3','HE1'],
'SER':['H','H1','H2','H3','HG'],
'THR':['H','H1','H2','H3','HG1'],
'ASN':['H','H1','H2','H3','1HD2','2HD2'],
'GLN':['H','H1','H2','H3','1HE2','2HE2'],
'TYR':['H','H1','H2','H3','HH'],
'CYS':['H','H1','H2','H3','HG'],
'LYS':['H','H1','H2','H3','HZ1','HZ2','HZ3'],
'ARG':['H','H1','H2','H3','HE','1HH1','2HH1',
'1HH2','2HH2'],
'HIS':['H','H1','H2','H3','HD1','HE2'],
'ASP':['H','H1','H2','H3'],
'GLU':['H','H1','H2','H3']},
'acceptors': {'GLY':['O','OXT' ],
'ALA':['O','OXT'],
'VAL':['O','OXT'],
'LEU':['O','OXT'],
'ILE':['O','OXT'],
'MET':['O','SD','OXT'],
'PRO':['O','OXT'],
'PHE':['O','OXT'],
'TRP':['O','OXT'],
'SER':['O','OG', 'OXT'],
'THR':['O','OG1','CG2', 'OXT'],
'ASN':['O','OD1','OXT'],
'GLN':['O','OE1','OXT'],
'TYR':['O','OH','OXT'],
'CYS':['O','SG','OXT'],
'LYS':['O','OXT'],
'ARG':['O','OXT'],
'HIS':['O','OXT'],
'ASP':['O','OD1','OD2', 'OXT'],
'GLU':['O','OE1','OE2', 'OXT']} }
##############################
## Polar hydrogen connectivity -- PARAM19
polarH = {'GLY':{'H':'N','H1':'N','H2':'N','H3':'N'},
'ALA':{'H':'N','H1':'N','H2':'N','H3':'N'},
'VAL':{'H':'N','H1':'N','H2':'N','H3':'N'},
'LEU':{'H':'N','H1':'N','H2':'N','H3':'N'},
'ILE':{'H':'N','H1':'N','H2':'N','H3':'N'},
'MET':{'H':'N','H1':'N','H2':'N','H3':'N'},
'PRO':{'H':'N','H1':'N','H2':'N','H3':'N'},
'PHE':{'H':'N','H1':'N','H2':'N','H3':'N'},
'TRP':{'H':'N','H1':'N','H2':'N','H3':'N',
'HE1':'NE1'},
'SER':{'H':'N','H1':'N','H2':'N','H3':'N',
'HG':'OG'},
'THR':{'H':'N','H1':'N','H2':'N','H3':'N',
'HG1':'OG1'},
'ASN':{'H':'N','H1':'N','H2':'N','H3':'N',
'HD21':'ND2','HD22':'ND2'},
'GLN':{'H':'N','H1':'N','H2':'N','H3':'N',
'HE21':'NE2','HE22':'NE2'},
'TYR':{'H':'N','H1':'N','H2':'N','H3':'N',
'HH':'OH'},
'CYS':{'H':'N','H1':'N','H2':'N','H3':'N'},
'LYS':{'H':'N','H1':'N','H2':'N','H3':'N',
'HZ1':'NZ','HZ2':'NZ','HZ3':'NZ'},
'ARG':{'H':'N','H1':'N','H2':'N','H3':'N',
'HE':'NE', 'HH11':'NH1','HH12':'NH1',
'HH21':'NH2','HH22':'NH2'},
'HIS':{'H':'N','H1':'N','H2':'N','H3':'N',
'HD1':'ND1','HE2':'NE2'},
'ASP':{'H':'N','H1':'N','H2':'N','H3':'N'},
'GLU':{'H':'N','H1':'N','H2':'N','H3':'N'}}
## Scoring matrix for protein-protein interaction surfaces
## (Volume normalized values, Table IV in reference)
##
## The Matrix is based on data from a db of 621 noneredundant protein-protein
## complexes, a CB-CB (CA for Gly) of 6 A was used
##
## Reference:
## "Residue Frequencies and Pair Preferences at Protein-Protein Interfaces"
## F. Glaser, D. M. Steinberg, I. A. Vakser and N0. Ben-Tal,
## Proteins 43:89-102 (2001)
##
## Warning. This is just half of the matrix (above diagonal), the residue names
## in the pairs is sorted in the same order as in Complex.resPairCounts()
pairScore = {'WW': 5.85, 'WY': 6.19, 'RT': 3.77, 'RV': 4.18, 'RW': 8.57, 'RR': 2.87,
'RS': 2.82, 'RY': 5.28, 'GW': 1.42, 'GV':-0.41, 'GT': 0.21, 'GS':-1.53,
'GR': 1.59, 'GQ': 1.70, 'GP':-0.51, 'GY': 1.25, 'GG':-4.40, 'GN':-0.54,
'GM': 0.91, 'GL':-0.37, 'GK': 1.33, 'GI': 0.77, 'GH': 1.08, 'SS':-0.09,
'IY': 5.61, 'HY': 6.05, 'HR': 4.90, 'HS': 0.80, 'HP': 2.89, 'HQ': 4.00,
'HV': 3.21, 'HW': 6.46, 'HT': 2.71, 'KN': 3.17, 'HK': 2.72, 'HH': 5.37,
'HI': 3.38, 'HN': 2.38, 'HL': 4.88, 'HM': 4.65, 'ST': 1.91, 'PR': 3.99,
'PS': 1.33, 'PP': 0.60, 'PQ': 3.50, 'PV': 2.90, 'PW': 7.87, 'PT': 2.65,
'PY': 4.22, 'IQ': 3.60, 'IP': 3.27, 'AK': 2.13, 'EM': 3.88, 'EL': 3.12,
'EN': 2.68, 'EI': 3.20, 'EH': 2.30, 'EK': 5.32, 'EE': 1.65, 'EG':-0.89,
'EF': 2.87, 'IT': 3.05, 'EY': 4.54, 'ET': 2.88, 'EW': 1.20, 'IV': 4.91,
'EQ': 1.95, 'EP': 3.17, 'ES': 2.60, 'ER': 5.75, 'II': 3.89, 'MM': 6.02,
'MN': 2.30, 'AS': 0.39, 'MT': 2.09, 'MW': 4.89, 'MV': 4.37, 'MQ': 4.18,
'MP': 3.38, 'MS': 1.61, 'MR': 3.62, 'MY': 4.81, 'IL': 4.59, 'FP': 4.25,
'FQ': 4.25, 'FR': 4.49, 'FS': 1.75, 'FT': 3.34, 'VV': 3.74, 'FV': 4.69,
'FW': 5.83, 'FY': 5.83, 'AV': 2.57, 'FF': 5.34, 'FG': 0.14, 'FH': 3.47,
'FI': 5.33, 'FK': 3.57, 'FL': 4.86, 'FM': 5.28, 'FN': 3.11, 'EV': 3.22,
'NN': 2.92, 'NY': 3.66, 'NP': 3.09, 'NQ': 3.45, 'NR': 3.85, 'NS': 1.77,
'NT': 2.52, 'NV': 1.36, 'NW': 3.54, 'CK': 2.05, 'CI': 1.76, 'CH': 4.12,
'CN':-0.42, 'CM': 1.84, 'CL': 2.93, 'CC': 7.65, 'CG':-0.25, 'CF': 3.68,
'CE': 2.51, 'CD': 0.24, 'CY': 2.47, 'CS': 2.48, 'CR': 2.81, 'CQ': 1.33,
'CP': 2.47, 'CW': 2.14, 'CV': 2.89, 'CT': 1.03, 'SY': 2.30, 'VW': 2.92,
'KK': 3.24, 'SW': 2.87, 'SV': 1.42, 'KM': 3.93, 'KL': 3.15, 'KS': 2.74,
'KR': 2.29, 'KQ': 3.50, 'KP': 3.75, 'KW': 5.76, 'KV': 4.45, 'KT': 3.67,
'KY': 5.26, 'DN': 3.85, 'DL': 1.40, 'DM': 0.36, 'DK': 3.90, 'DH': 5.20,
'DI': 2.30, 'DF': 0.99, 'DG':-0.08, 'DD': 0.13, 'DE': 0.08, 'YY': 5.93,
'DY': 1.76, 'DV': 1.93, 'DW': 2.62, 'DT': 3.88, 'DR': 4.94, 'DS': 2.94,
'DP': 1.46, 'DQ': 3.26, 'TY': 3.14, 'LN': 2.31, 'TW': 5.12, 'LL': 4.03,
'LM': 5.32, 'LV': 4.20, 'LW': 5.77, 'LT': 2.07, 'LR': 4.99, 'LS': 1.41,
'LP': 2.50, 'LQ': 3.46, 'LY': 4.19, 'AA':-0.52, 'AC': 1.46, 'AE': 1.71,
'AD': 1.13, 'AG':-1.77, 'AF': 3.00, 'AI': 2.84, 'AH': 2.59, 'IS': 1.00,
'IR': 3.80, 'AM': 2.30, 'AL': 2.77, 'IW': 6.24, 'AN': 1.69, 'AQ': 1.72,
'AP': 1.22, 'IK': 3.23, 'AR': 1.90, 'IM': 5.25, 'AT': 1.21, 'AW': 3.37,
'IN': 1.59, 'AY': 2.47, 'VY': 3.95, 'QQ': 2.83, 'QS': 2.00, 'QR': 4.50,
'QT': 1.82, 'QW': 1.37, 'QV': 3.22, 'QY': 2.05, 'TV': 2.83, 'TT': 1.27}
## various constants
boltzmann = 1.38066e-23 ## [J/K]
NA = 6.02214199e+23 ## Avogadro constant [1/mol]
planck2 = 1.0545727e-34 ## [J s], h/2Pi
euler = N0.e
mu = 1.66056e-27 ## atomic mass unit in [kg]
angstroem = 1e-10 ## [m]
calorie = 4.184 ## [J]
#: dictionary with relative atomic mass of elements {'H':1.01, 'ZN':65.39, ...}
atomMasses = { 'H':1.00797, 'C':12.01115, 'N':14.0067,
'S':32.064, 'O':15.9994, 'P':30.9738, 'ZN': 65.39 }
[docs]def allAACodes():
"""
:return: list of all single AA codes, including B, Z, X
:rtype: [str]
"""
result = []
for aa in aaDic.values():
if not aa in result:
result += aa
return result
[docs]def allAA():
"""
:return: list of all 20 'exact' single AA codes.
:rtype: [str]
"""
result = allAACodes()
for a in ['Z','B','X']:
result.remove( a )
return result
[docs]def elementType( eLetter ):
"""
Classify an atom as polar or unpolar::
atomType( eLetter ) -> list of types this element belongs to
:param eLetter: atom name
:type eLetter: str
:return: return 'p' for polar, 'u' for unpolar and None if not
in classified
:rtype: p|u OR None
"""
types = {'p' : ['N','O','H','Cl'], ## polar
'u' : ['C','S'] } ## unpolar
for key, values in types.items():
if eLetter in values:
return key
return None
[docs]def resType( resCode ):
"""
Classify residues as aromatic (a), charged (c) or polar (p).
:param resCode: amino acid code
:type resCode: str
:return: list of types this residue belongs to...
:rtype: a|c|p OR None
"""
types = {'a' : ['F','Y','W','H'], ## aromatic
'c' : ['E','D','L','R','H'], ## charged
'p' : ['Q','N','S'] } ## polar
result = []
for t in types.keys():
if resCode in types[t]:
result += [t]
if result == []:
result = ['u']
return result
[docs]def singleAA(seq, xtable=None, nonstandard=True, unknown='?' ):
"""
convert list with 3-letter AA code to list with 1-letter code
:param seq: amino acid sequence in 3-letter code
:type seq: [str]
:param xtable: dictionary with additional str:single_char mapping
:type xtable: dict
:param nonstandard: support non-standard residue names (default True)
:type nonstandard: bool
:param unknown: letter to use for unknown residues [default: '?']
:type unknown: str
:return: list with 1-letter code; C{ ['A','C','L','A'...]}
:rtype: [str]
"""
result = [] # will hold 1-letter list
table = resDicStandard
if nonstandard: table = resDic
## Python2.5
## table = resDic if nonstandard else resDicStandard
if xtable:
table = copy.copy( table )
table.update( xtable )
for aa in seq:
try:
aa = aa.lower()
result += [ table[aa] ]
except:
result = result + [unknown]
return result
[docs]def single2longAA( seq ):
"""
Convert string of 1-letter AA code into list of 3-letter AA codes.
:param seq: amino acid sequence in 1-letter code
:type seq: str
:return: list with the amino acids in 3-letter code
:rtype: [str]
"""
## invert AA dict
invTab = {}
for key in aaDicStandard:
invTab[ aaDicStandard[key] ] = key
result = []
for aa in seq:
try:
aa = aa.upper()
result += [ invTab[aa].upper() ]
except:
EHandler.warning("unknown residue: " + str(aa))
result += ['Xaa']
return result
[docs]def cmpAtoms( a1, a2 ):
"""
Comparison function for bringing atoms into standard order
within residues as defined by :class:`atomDic`.
:param a1: atom dictionary
:type a1: CrossView or equivalent dictionary
:param a2: atom dictionary
:type a2: CrossView or equivalent dictionary
:return: int or list of matching positions
:rtype: [-1|0|1]
"""
## get standard order within residues
target = atomDic[ a1['residue_name'] ]
i1 = len( target )
if a1['name'] in target:
i1 = target.index( a1['name'] )
i2 = len( target )
if a2['name'] in target:
i2 = target.index( a2['name'] )
return (i1 > i2) - (i1 < i2)
[docs]def sortAtomsOfModel( model ):
"""
Sort atoms within residues into the standard order defined in :class:`atomDic`.
:param model: model to sort
:type model: PDBModel
:return: model with sorted atoms
:rtype: PDBModel
"""
## make a copy
model = model.take( model.atomRange() )
## sort atoms
model = model.sort( model.argsort( cmpAtoms ) )
return model
#############
## TESTING
#############
from . import test as BT
class Test(BT.BiskitTest):
"""Test case"""
def test_molUtils( self ):
"""molUtils test"""
from biskit import PDBModel
S = self
## load a structure
S.m = PDBModel( t.testRoot('lig/1A19.pdb' ))
S.model_1 = S.m.compress( S.m.maskProtein() )
## now sort in standard order
S.model_2 = sortAtomsOfModel( S.model_1)
## compare the atom order
cmp = []
for a in S.model_1.atomRange():
cmp += [ cmpAtoms( S.model_1.atoms[a], S.model_2.atoms[a] )]
self.assertEqual( N0.sum(cmp), 159 )
## get the primaty sequence as a string
S.seq = S.model_1.sequence()
## convert it to a list of three letter code
S.seq=single2longAA(S.seq)
## convert it to a list in one letter code
S.seq=singleAA(S.seq)
self.assertEqual( ''.join(S.seq), S.model_1.sequence() )
if __name__ == '__main__':
BT.localTest()