xi_covutils.distances module
Functions and classes to work with residue distances in proteins structures
"""
Functions and classes to work with residue distances in proteins structures
"""
import re
import operator
class Distances(object):
'''
Store and access distance data for residues from a protein structure.
'''
def __init__(self, dist_data):
'''
Creates a new instance from distance data.
Distance data should be a list of tuples of five elements: (chain1, pos1, chain2, pos2, distance).
:param dist_data: a list of (chain1, pos1, chain2, pos2, distance)
'''
dis = {}
for ch1, po1, ch2, po2, dist in dist_data:
if (ch1, po1) not in dis:
dis[(ch1, po1)] = {}
dis[(ch1, po1)][(ch2, po2)] = dist
self._distances = dis
def of(self, chain_a, pos_a, chain_b, pos_b): #pylint: disable=invalid-name
'''
Retrieves distance for a residue pair.
If the pair is not found, None is returned.
:param chain_a: A string specifying the first residue chain.
:param pos_a: An integer specifying the first residue position.
:param chain_b: A string specifying the second residue chain.
:param pos_b: An integer specifying the second residue position.
'''
pair1 = ((chain_a, pos_a))
pair2 = ((chain_b, pos_b))
if pair1 == pair2: # Special case for distance with the same residue.
return 0
distance = self._distances.get(pair1, {}).get(pair2)
if not distance:
distance = self._distances.get(pair2, {}).get(pair1)
return distance
def remap_positions(self, mapping):
'''
Remap index positions.
If a positions could not be mapped it is excluded from the results.
:param mapping: a dict that maps old positions to new positions.
'''
def _remap(dic):
return {(chain, mapping[chain][pos]):value
for (chain, pos), value in dic.items()
if pos in mapping.get(chain, {})}
self._distances = _remap({(c1, p1):_remap(r2)
for (c1, p1), r2 in self._distances.items()})
def is_contact(self, chain_a, pos_a, chain_b, pos_b, distance_cutoff=6.05): #pylint: disable=too-many-arguments
'''
Returns True if a given pair's distance is lower or equal than a given
distance cutoff.
:param chain_a: A string specifying the first residue chain.
:param pos_a: An integer specifying the first residue position.
:param chain_b: A string specifying the second residue chain.
:param pos_b: An integer specifying the second residue position.
:param distance_cutoff: a float with the distance cutoff (defaults to 6.05 angstroms)
'''
return self.of(chain_a, pos_a, chain_b, pos_b) <= distance_cutoff
@staticmethod
def _sum_true(boolean_list):
return reduce(lambda a, b: a+(1 if b else 0), boolean_list, 0)
def mean_intramolecular(self):
"""
Return the mean number of intramolecular contacts across all residues for every chain.
:param self: a Distances obj
"""
def _pos_contacts(chain, pos1, all_positions):
return [self.is_contact(chain, pos1, chain, pos2) for pos2 in all_positions
if not pos1 == pos2]
all_residues = set(self._distances.keys()).union(
{pair2 for pair1 in self._distances.keys() for pair2 in self._distances[pair1].keys()})
all_chains = {chain for chain, pos in all_residues}
pos_by_chain = {chain: [p for c, p in all_residues if c == chain] for chain in all_chains}
n_contacts = {chain: [self._sum_true(_pos_contacts(chain, pos, pos_by_chain[chain]))
for pos in pos_by_chain[chain]]
for chain in all_chains}
n_contacts = {chain: float(reduce(operator.add, n, 0)) / max(1, len(n)) for chain, n in n_contacts.items()}
return n_contacts
def from_mitos(dist_file):
'''
Loads data of residue distances from a file generated by MIToS.
Input data should look like:
# model_i,chain_i,group_i,pdbe_i,number_i,name_i,model_j,chain_j,group_j,pdbe_j,number_j,name_j,distance
1,A,ATOM,,55,LEU,1,A,ATOM,,56,LEU,1.3247309160731473
:param dist_file: A string to a text file with the distance data.
'''
# model_i,chain_i,group_i,pdbe_i,number_i,name_i,model_j,chain_j,group_j,pdbe_j,number_j,name_j,distance
# 1,A,ATOM,,55,LEU,1,A,ATOM,,56,LEU,1.3247309160731473
# 1 ,A ,ATOM, ,55 ,LEU ,1 ,A ,ATOM, ,56 ,LEU ,1.3247309160731473
d_pattern = re.compile(r"(\d+),(.),(.+),.*,(\d+),(.+),(\d+),(.),(.+),.*,(\d+),(.+),(.+)$")
res = []
with open(dist_file) as handle:
for line in handle:
line = line.strip()
if not line.startswith("#"):
match = re.match(d_pattern, line)
try:
res.append((
match.group(2), # Chain 1
int(match.group(4)), # Pos res 1
match.group(7), # Chain 2
int(match.group(9)), # Pos res 2
float(match.group(11)))) # distance
except (IndexError, AttributeError):
pass
return res
Functions
def from_mitos(
dist_file)
Loads data of residue distances from a file generated by MIToS.
Input data should look like:
# model_i,chain_i,group_i,pdbe_i,number_i,name_i,model_j,chain_j,group_j,pdbe_j,number_j,name_j,distance 1,A,ATOM,,55,LEU,1,A,ATOM,,56,LEU,1.3247309160731473
:param dist_file: A string to a text file with the distance data.
def from_mitos(dist_file):
'''
Loads data of residue distances from a file generated by MIToS.
Input data should look like:
# model_i,chain_i,group_i,pdbe_i,number_i,name_i,model_j,chain_j,group_j,pdbe_j,number_j,name_j,distance
1,A,ATOM,,55,LEU,1,A,ATOM,,56,LEU,1.3247309160731473
:param dist_file: A string to a text file with the distance data.
'''
# model_i,chain_i,group_i,pdbe_i,number_i,name_i,model_j,chain_j,group_j,pdbe_j,number_j,name_j,distance
# 1,A,ATOM,,55,LEU,1,A,ATOM,,56,LEU,1.3247309160731473
# 1 ,A ,ATOM, ,55 ,LEU ,1 ,A ,ATOM, ,56 ,LEU ,1.3247309160731473
d_pattern = re.compile(r"(\d+),(.),(.+),.*,(\d+),(.+),(\d+),(.),(.+),.*,(\d+),(.+),(.+)$")
res = []
with open(dist_file) as handle:
for line in handle:
line = line.strip()
if not line.startswith("#"):
match = re.match(d_pattern, line)
try:
res.append((
match.group(2), # Chain 1
int(match.group(4)), # Pos res 1
match.group(7), # Chain 2
int(match.group(9)), # Pos res 2
float(match.group(11)))) # distance
except (IndexError, AttributeError):
pass
return res
Classes
class Distances
Store and access distance data for residues from a protein structure.
class Distances(object):
'''
Store and access distance data for residues from a protein structure.
'''
def __init__(self, dist_data):
'''
Creates a new instance from distance data.
Distance data should be a list of tuples of five elements: (chain1, pos1, chain2, pos2, distance).
:param dist_data: a list of (chain1, pos1, chain2, pos2, distance)
'''
dis = {}
for ch1, po1, ch2, po2, dist in dist_data:
if (ch1, po1) not in dis:
dis[(ch1, po1)] = {}
dis[(ch1, po1)][(ch2, po2)] = dist
self._distances = dis
def of(self, chain_a, pos_a, chain_b, pos_b): #pylint: disable=invalid-name
'''
Retrieves distance for a residue pair.
If the pair is not found, None is returned.
:param chain_a: A string specifying the first residue chain.
:param pos_a: An integer specifying the first residue position.
:param chain_b: A string specifying the second residue chain.
:param pos_b: An integer specifying the second residue position.
'''
pair1 = ((chain_a, pos_a))
pair2 = ((chain_b, pos_b))
if pair1 == pair2: # Special case for distance with the same residue.
return 0
distance = self._distances.get(pair1, {}).get(pair2)
if not distance:
distance = self._distances.get(pair2, {}).get(pair1)
return distance
def remap_positions(self, mapping):
'''
Remap index positions.
If a positions could not be mapped it is excluded from the results.
:param mapping: a dict that maps old positions to new positions.
'''
def _remap(dic):
return {(chain, mapping[chain][pos]):value
for (chain, pos), value in dic.items()
if pos in mapping.get(chain, {})}
self._distances = _remap({(c1, p1):_remap(r2)
for (c1, p1), r2 in self._distances.items()})
def is_contact(self, chain_a, pos_a, chain_b, pos_b, distance_cutoff=6.05): #pylint: disable=too-many-arguments
'''
Returns True if a given pair's distance is lower or equal than a given
distance cutoff.
:param chain_a: A string specifying the first residue chain.
:param pos_a: An integer specifying the first residue position.
:param chain_b: A string specifying the second residue chain.
:param pos_b: An integer specifying the second residue position.
:param distance_cutoff: a float with the distance cutoff (defaults to 6.05 angstroms)
'''
return self.of(chain_a, pos_a, chain_b, pos_b) <= distance_cutoff
@staticmethod
def _sum_true(boolean_list):
return reduce(lambda a, b: a+(1 if b else 0), boolean_list, 0)
def mean_intramolecular(self):
"""
Return the mean number of intramolecular contacts across all residues for every chain.
:param self: a Distances obj
"""
def _pos_contacts(chain, pos1, all_positions):
return [self.is_contact(chain, pos1, chain, pos2) for pos2 in all_positions
if not pos1 == pos2]
all_residues = set(self._distances.keys()).union(
{pair2 for pair1 in self._distances.keys() for pair2 in self._distances[pair1].keys()})
all_chains = {chain for chain, pos in all_residues}
pos_by_chain = {chain: [p for c, p in all_residues if c == chain] for chain in all_chains}
n_contacts = {chain: [self._sum_true(_pos_contacts(chain, pos, pos_by_chain[chain]))
for pos in pos_by_chain[chain]]
for chain in all_chains}
n_contacts = {chain: float(reduce(operator.add, n, 0)) / max(1, len(n)) for chain, n in n_contacts.items()}
return n_contacts
Ancestors (in MRO)
- Distances
- __builtin__.object
Methods
def __init__(
self, dist_data)
Creates a new instance from distance data.
Distance data should be a list of tuples of five elements: (chain1, pos1, chain2, pos2, distance). :param dist_data: a list of (chain1, pos1, chain2, pos2, distance)
def __init__(self, dist_data):
'''
Creates a new instance from distance data.
Distance data should be a list of tuples of five elements: (chain1, pos1, chain2, pos2, distance).
:param dist_data: a list of (chain1, pos1, chain2, pos2, distance)
'''
dis = {}
for ch1, po1, ch2, po2, dist in dist_data:
if (ch1, po1) not in dis:
dis[(ch1, po1)] = {}
dis[(ch1, po1)][(ch2, po2)] = dist
self._distances = dis
def is_contact(
self, chain_a, pos_a, chain_b, pos_b, distance_cutoff=6.05)
Returns True if a given pair's distance is lower or equal than a given distance cutoff. :param chain_a: A string specifying the first residue chain. :param pos_a: An integer specifying the first residue position. :param chain_b: A string specifying the second residue chain. :param pos_b: An integer specifying the second residue position. :param distance_cutoff: a float with the distance cutoff (defaults to 6.05 angstroms)
def is_contact(self, chain_a, pos_a, chain_b, pos_b, distance_cutoff=6.05): #pylint: disable=too-many-arguments
'''
Returns True if a given pair's distance is lower or equal than a given
distance cutoff.
:param chain_a: A string specifying the first residue chain.
:param pos_a: An integer specifying the first residue position.
:param chain_b: A string specifying the second residue chain.
:param pos_b: An integer specifying the second residue position.
:param distance_cutoff: a float with the distance cutoff (defaults to 6.05 angstroms)
'''
return self.of(chain_a, pos_a, chain_b, pos_b) <= distance_cutoff
def mean_intramolecular(
self)
Return the mean number of intramolecular contacts across all residues for every chain.
:param self: a Distances obj
def mean_intramolecular(self):
"""
Return the mean number of intramolecular contacts across all residues for every chain.
:param self: a Distances obj
"""
def _pos_contacts(chain, pos1, all_positions):
return [self.is_contact(chain, pos1, chain, pos2) for pos2 in all_positions
if not pos1 == pos2]
all_residues = set(self._distances.keys()).union(
{pair2 for pair1 in self._distances.keys() for pair2 in self._distances[pair1].keys()})
all_chains = {chain for chain, pos in all_residues}
pos_by_chain = {chain: [p for c, p in all_residues if c == chain] for chain in all_chains}
n_contacts = {chain: [self._sum_true(_pos_contacts(chain, pos, pos_by_chain[chain]))
for pos in pos_by_chain[chain]]
for chain in all_chains}
n_contacts = {chain: float(reduce(operator.add, n, 0)) / max(1, len(n)) for chain, n in n_contacts.items()}
return n_contacts
def of(
self, chain_a, pos_a, chain_b, pos_b)
Retrieves distance for a residue pair.
If the pair is not found, None is returned. :param chain_a: A string specifying the first residue chain. :param pos_a: An integer specifying the first residue position. :param chain_b: A string specifying the second residue chain. :param pos_b: An integer specifying the second residue position.
def of(self, chain_a, pos_a, chain_b, pos_b): #pylint: disable=invalid-name
'''
Retrieves distance for a residue pair.
If the pair is not found, None is returned.
:param chain_a: A string specifying the first residue chain.
:param pos_a: An integer specifying the first residue position.
:param chain_b: A string specifying the second residue chain.
:param pos_b: An integer specifying the second residue position.
'''
pair1 = ((chain_a, pos_a))
pair2 = ((chain_b, pos_b))
if pair1 == pair2: # Special case for distance with the same residue.
return 0
distance = self._distances.get(pair1, {}).get(pair2)
if not distance:
distance = self._distances.get(pair2, {}).get(pair1)
return distance
def remap_positions(
self, mapping)
Remap index positions.
If a positions could not be mapped it is excluded from the results. :param mapping: a dict that maps old positions to new positions.
def remap_positions(self, mapping):
'''
Remap index positions.
If a positions could not be mapped it is excluded from the results.
:param mapping: a dict that maps old positions to new positions.
'''
def _remap(dic):
return {(chain, mapping[chain][pos]):value
for (chain, pos), value in dic.items()
if pos in mapping.get(chain, {})}
self._distances = _remap({(c1, p1):_remap(r2)
for (c1, p1), r2 in self._distances.items()})