Top

xi_covutils.distances module

Functions and classes to work with residue distances in proteins structures

"""
    Functions and classes to work with residue distances in proteins structures
"""

import re
import operator

class Distances(object):
    '''
    Store and access distance data for residues from a protein structure.
    '''
    def __init__(self, dist_data):
        '''
        Creates a new instance from distance data.

        Distance data should be a list of tuples of five elements: (chain1, pos1, chain2, pos2, distance).
        :param dist_data: a list of (chain1, pos1, chain2, pos2, distance)
        '''
        dis = {}
        for ch1, po1, ch2, po2, dist in dist_data:
            if (ch1, po1) not in dis:
                dis[(ch1, po1)] = {}
            dis[(ch1, po1)][(ch2, po2)] = dist
        self._distances = dis

    def of(self, chain_a, pos_a, chain_b, pos_b): #pylint: disable=invalid-name
        '''
        Retrieves distance for a residue pair.

        If the pair is not found, None is returned.
        :param chain_a: A string specifying the first residue chain.
        :param pos_a: An integer specifying the first residue position.
        :param chain_b: A string specifying the second residue chain.
        :param pos_b: An integer specifying the second residue position.
        '''
        pair1 = ((chain_a, pos_a))
        pair2 = ((chain_b, pos_b))
        if pair1 == pair2: # Special case for distance with the same residue.
            return 0
        distance = self._distances.get(pair1, {}).get(pair2)
        if not distance:
            distance = self._distances.get(pair2, {}).get(pair1)
        return distance

    def remap_positions(self, mapping):
        '''
        Remap index positions.

        If a positions could not be mapped it is excluded from the results.
        :param mapping: a dict that maps old positions to new positions.
        '''
        def _remap(dic):
            return {(chain, mapping[chain][pos]):value
                    for (chain, pos), value in dic.items()
                    if pos in mapping.get(chain, {})}

        self._distances = _remap({(c1, p1):_remap(r2)
                                  for (c1, p1), r2 in self._distances.items()})

    def is_contact(self, chain_a, pos_a, chain_b, pos_b, distance_cutoff=6.05): #pylint: disable=too-many-arguments
        '''
        Returns True if a given pair's distance is lower or equal than a given
        distance cutoff.
        :param chain_a: A string specifying the first residue chain.
        :param pos_a: An integer specifying the first residue position.
        :param chain_b: A string specifying the second residue chain.
        :param pos_b: An integer specifying the second residue position.
        :param distance_cutoff: a float with the distance cutoff (defaults to 6.05 angstroms)
        '''
        return self.of(chain_a, pos_a, chain_b, pos_b) <= distance_cutoff

    @staticmethod
    def _sum_true(boolean_list):
        return reduce(lambda a, b: a+(1 if b else 0), boolean_list, 0)

    def mean_intramolecular(self):
        """
        Return the mean number of intramolecular contacts across all residues for every chain.

            :param self: a Distances obj
        """
        def _pos_contacts(chain, pos1, all_positions):
            return [self.is_contact(chain, pos1, chain, pos2) for pos2 in all_positions
                    if not pos1 == pos2]
        all_residues = set(self._distances.keys()).union(
            {pair2 for pair1 in self._distances.keys() for pair2 in self._distances[pair1].keys()})
        all_chains = {chain for chain, pos in all_residues}
        pos_by_chain = {chain: [p for c, p in all_residues if c == chain] for chain in all_chains}

        n_contacts = {chain: [self._sum_true(_pos_contacts(chain, pos, pos_by_chain[chain]))
                              for pos in pos_by_chain[chain]]
                      for chain in all_chains}
        n_contacts = {chain: float(reduce(operator.add, n, 0)) / max(1, len(n)) for chain, n in n_contacts.items()}
        return n_contacts


def from_mitos(dist_file):
    '''
    Loads data of residue distances from a file generated by MIToS.

    Input data should look like:

    
    # model_i,chain_i,group_i,pdbe_i,number_i,name_i,model_j,chain_j,group_j,pdbe_j,number_j,name_j,distance
    1,A,ATOM,,55,LEU,1,A,ATOM,,56,LEU,1.3247309160731473
    
:param dist_file: A string to a text file with the distance data. ''' # model_i,chain_i,group_i,pdbe_i,number_i,name_i,model_j,chain_j,group_j,pdbe_j,number_j,name_j,distance # 1,A,ATOM,,55,LEU,1,A,ATOM,,56,LEU,1.3247309160731473 # 1 ,A ,ATOM, ,55 ,LEU ,1 ,A ,ATOM, ,56 ,LEU ,1.3247309160731473 d_pattern = re.compile(r"(\d+),(.),(.+),.*,(\d+),(.+),(\d+),(.),(.+),.*,(\d+),(.+),(.+)$") res = [] with open(dist_file) as handle: for line in handle: line = line.strip() if not line.startswith("#"): match = re.match(d_pattern, line) try: res.append(( match.group(2), # Chain 1 int(match.group(4)), # Pos res 1 match.group(7), # Chain 2 int(match.group(9)), # Pos res 2 float(match.group(11)))) # distance except (IndexError, AttributeError): pass return res

Functions

def from_mitos(

dist_file)

Loads data of residue distances from a file generated by MIToS.

Input data should look like:

# model_i,chain_i,group_i,pdbe_i,number_i,name_i,model_j,chain_j,group_j,pdbe_j,number_j,name_j,distance
1,A,ATOM,,55,LEU,1,A,ATOM,,56,LEU,1.3247309160731473

:param dist_file: A string to a text file with the distance data.

def from_mitos(dist_file):
    '''
    Loads data of residue distances from a file generated by MIToS.

    Input data should look like:

    
    # model_i,chain_i,group_i,pdbe_i,number_i,name_i,model_j,chain_j,group_j,pdbe_j,number_j,name_j,distance
    1,A,ATOM,,55,LEU,1,A,ATOM,,56,LEU,1.3247309160731473
    
:param dist_file: A string to a text file with the distance data. ''' # model_i,chain_i,group_i,pdbe_i,number_i,name_i,model_j,chain_j,group_j,pdbe_j,number_j,name_j,distance # 1,A,ATOM,,55,LEU,1,A,ATOM,,56,LEU,1.3247309160731473 # 1 ,A ,ATOM, ,55 ,LEU ,1 ,A ,ATOM, ,56 ,LEU ,1.3247309160731473 d_pattern = re.compile(r"(\d+),(.),(.+),.*,(\d+),(.+),(\d+),(.),(.+),.*,(\d+),(.+),(.+)$") res = [] with open(dist_file) as handle: for line in handle: line = line.strip() if not line.startswith("#"): match = re.match(d_pattern, line) try: res.append(( match.group(2), # Chain 1 int(match.group(4)), # Pos res 1 match.group(7), # Chain 2 int(match.group(9)), # Pos res 2 float(match.group(11)))) # distance except (IndexError, AttributeError): pass return res

Classes

class Distances

Store and access distance data for residues from a protein structure.

class Distances(object):
    '''
    Store and access distance data for residues from a protein structure.
    '''
    def __init__(self, dist_data):
        '''
        Creates a new instance from distance data.

        Distance data should be a list of tuples of five elements: (chain1, pos1, chain2, pos2, distance).
        :param dist_data: a list of (chain1, pos1, chain2, pos2, distance)
        '''
        dis = {}
        for ch1, po1, ch2, po2, dist in dist_data:
            if (ch1, po1) not in dis:
                dis[(ch1, po1)] = {}
            dis[(ch1, po1)][(ch2, po2)] = dist
        self._distances = dis

    def of(self, chain_a, pos_a, chain_b, pos_b): #pylint: disable=invalid-name
        '''
        Retrieves distance for a residue pair.

        If the pair is not found, None is returned.
        :param chain_a: A string specifying the first residue chain.
        :param pos_a: An integer specifying the first residue position.
        :param chain_b: A string specifying the second residue chain.
        :param pos_b: An integer specifying the second residue position.
        '''
        pair1 = ((chain_a, pos_a))
        pair2 = ((chain_b, pos_b))
        if pair1 == pair2: # Special case for distance with the same residue.
            return 0
        distance = self._distances.get(pair1, {}).get(pair2)
        if not distance:
            distance = self._distances.get(pair2, {}).get(pair1)
        return distance

    def remap_positions(self, mapping):
        '''
        Remap index positions.

        If a positions could not be mapped it is excluded from the results.
        :param mapping: a dict that maps old positions to new positions.
        '''
        def _remap(dic):
            return {(chain, mapping[chain][pos]):value
                    for (chain, pos), value in dic.items()
                    if pos in mapping.get(chain, {})}

        self._distances = _remap({(c1, p1):_remap(r2)
                                  for (c1, p1), r2 in self._distances.items()})

    def is_contact(self, chain_a, pos_a, chain_b, pos_b, distance_cutoff=6.05): #pylint: disable=too-many-arguments
        '''
        Returns True if a given pair's distance is lower or equal than a given
        distance cutoff.
        :param chain_a: A string specifying the first residue chain.
        :param pos_a: An integer specifying the first residue position.
        :param chain_b: A string specifying the second residue chain.
        :param pos_b: An integer specifying the second residue position.
        :param distance_cutoff: a float with the distance cutoff (defaults to 6.05 angstroms)
        '''
        return self.of(chain_a, pos_a, chain_b, pos_b) <= distance_cutoff

    @staticmethod
    def _sum_true(boolean_list):
        return reduce(lambda a, b: a+(1 if b else 0), boolean_list, 0)

    def mean_intramolecular(self):
        """
        Return the mean number of intramolecular contacts across all residues for every chain.

            :param self: a Distances obj
        """
        def _pos_contacts(chain, pos1, all_positions):
            return [self.is_contact(chain, pos1, chain, pos2) for pos2 in all_positions
                    if not pos1 == pos2]
        all_residues = set(self._distances.keys()).union(
            {pair2 for pair1 in self._distances.keys() for pair2 in self._distances[pair1].keys()})
        all_chains = {chain for chain, pos in all_residues}
        pos_by_chain = {chain: [p for c, p in all_residues if c == chain] for chain in all_chains}

        n_contacts = {chain: [self._sum_true(_pos_contacts(chain, pos, pos_by_chain[chain]))
                              for pos in pos_by_chain[chain]]
                      for chain in all_chains}
        n_contacts = {chain: float(reduce(operator.add, n, 0)) / max(1, len(n)) for chain, n in n_contacts.items()}
        return n_contacts

Ancestors (in MRO)

Methods

def __init__(

self, dist_data)

Creates a new instance from distance data.

Distance data should be a list of tuples of five elements: (chain1, pos1, chain2, pos2, distance). :param dist_data: a list of (chain1, pos1, chain2, pos2, distance)

def __init__(self, dist_data):
    '''
    Creates a new instance from distance data.
    Distance data should be a list of tuples of five elements: (chain1, pos1, chain2, pos2, distance).
    :param dist_data: a list of (chain1, pos1, chain2, pos2, distance)
    '''
    dis = {}
    for ch1, po1, ch2, po2, dist in dist_data:
        if (ch1, po1) not in dis:
            dis[(ch1, po1)] = {}
        dis[(ch1, po1)][(ch2, po2)] = dist
    self._distances = dis

def is_contact(

self, chain_a, pos_a, chain_b, pos_b, distance_cutoff=6.05)

Returns True if a given pair's distance is lower or equal than a given distance cutoff. :param chain_a: A string specifying the first residue chain. :param pos_a: An integer specifying the first residue position. :param chain_b: A string specifying the second residue chain. :param pos_b: An integer specifying the second residue position. :param distance_cutoff: a float with the distance cutoff (defaults to 6.05 angstroms)

def is_contact(self, chain_a, pos_a, chain_b, pos_b, distance_cutoff=6.05): #pylint: disable=too-many-arguments
    '''
    Returns True if a given pair's distance is lower or equal than a given
    distance cutoff.
    :param chain_a: A string specifying the first residue chain.
    :param pos_a: An integer specifying the first residue position.
    :param chain_b: A string specifying the second residue chain.
    :param pos_b: An integer specifying the second residue position.
    :param distance_cutoff: a float with the distance cutoff (defaults to 6.05 angstroms)
    '''
    return self.of(chain_a, pos_a, chain_b, pos_b) <= distance_cutoff

def mean_intramolecular(

self)

Return the mean number of intramolecular contacts across all residues for every chain.

:param self: a Distances obj

def mean_intramolecular(self):
    """
    Return the mean number of intramolecular contacts across all residues for every chain.
        :param self: a Distances obj
    """
    def _pos_contacts(chain, pos1, all_positions):
        return [self.is_contact(chain, pos1, chain, pos2) for pos2 in all_positions
                if not pos1 == pos2]
    all_residues = set(self._distances.keys()).union(
        {pair2 for pair1 in self._distances.keys() for pair2 in self._distances[pair1].keys()})
    all_chains = {chain for chain, pos in all_residues}
    pos_by_chain = {chain: [p for c, p in all_residues if c == chain] for chain in all_chains}
    n_contacts = {chain: [self._sum_true(_pos_contacts(chain, pos, pos_by_chain[chain]))
                          for pos in pos_by_chain[chain]]
                  for chain in all_chains}
    n_contacts = {chain: float(reduce(operator.add, n, 0)) / max(1, len(n)) for chain, n in n_contacts.items()}
    return n_contacts

def of(

self, chain_a, pos_a, chain_b, pos_b)

Retrieves distance for a residue pair.

If the pair is not found, None is returned. :param chain_a: A string specifying the first residue chain. :param pos_a: An integer specifying the first residue position. :param chain_b: A string specifying the second residue chain. :param pos_b: An integer specifying the second residue position.

def of(self, chain_a, pos_a, chain_b, pos_b): #pylint: disable=invalid-name
    '''
    Retrieves distance for a residue pair.
    If the pair is not found, None is returned.
    :param chain_a: A string specifying the first residue chain.
    :param pos_a: An integer specifying the first residue position.
    :param chain_b: A string specifying the second residue chain.
    :param pos_b: An integer specifying the second residue position.
    '''
    pair1 = ((chain_a, pos_a))
    pair2 = ((chain_b, pos_b))
    if pair1 == pair2: # Special case for distance with the same residue.
        return 0
    distance = self._distances.get(pair1, {}).get(pair2)
    if not distance:
        distance = self._distances.get(pair2, {}).get(pair1)
    return distance

def remap_positions(

self, mapping)

Remap index positions.

If a positions could not be mapped it is excluded from the results. :param mapping: a dict that maps old positions to new positions.

def remap_positions(self, mapping):
    '''
    Remap index positions.
    If a positions could not be mapped it is excluded from the results.
    :param mapping: a dict that maps old positions to new positions.
    '''
    def _remap(dic):
        return {(chain, mapping[chain][pos]):value
                for (chain, pos), value in dic.items()
                if pos in mapping.get(chain, {})}
    self._distances = _remap({(c1, p1):_remap(r2)
                              for (c1, p1), r2 in self._distances.items()})