Module xi_covutils.smooth

Functions to compute smooth covariation scores

Expand source code
"""
Functions to compute smooth covariation scores
"""
from xi_covutils.read_results import inter_covariation
from xi_covutils.read_results import intra_covariation

def _smooth_cov_segment(cov_data, windows_size=3):
  def _get_global_guards(cov_data):
    chain1_id = list(cov_data.keys())[0][0][0]
    chain2_id = list(cov_data.keys())[0][1][0]
    global_guards = {
      'min': {chain1_id: float('inf'), chain2_id: float('inf')},
      'max': {chain1_id: 0, chain2_id: 0}
    }
    for ((chain1, pos1), (chain2, pos2)) in cov_data:
      global_guards['min'][chain1] = min(global_guards['min'][chain1], pos1)
      global_guards['min'][chain2] = min(global_guards['min'][chain2], pos2)
      global_guards['max'][chain1] = max(global_guards['max'][chain1], pos1)
      global_guards['max'][chain2] = max(global_guards['max'][chain2], pos2)
    return global_guards
  def _compute_smoothed(chain1, chain2, locals_guards):
    cumm_scores = 0
    summables = 0
    chain1_range = range(
      locals_guards['min']['chain1'], locals_guards['max']['chain1']+1
    )
    chain2_range = range(
      locals_guards['min']['chain2'], locals_guards['max']['chain2']+1
    )
    for lpos1 in chain1_range:
      for lpos2 in chain2_range:
        if (chain1, lpos1) != (chain2, lpos2):
          summables += 1
          index_1 = ((chain1, lpos1), (chain2, lpos2))
          index_2 = ((chain2, lpos2), (chain1, lpos1))
          current_score = cov_data.get(index_1, cov_data.get(index_2))
          cumm_scores += current_score
    return float(cumm_scores) / max(1, summables)

  global_guards = _get_global_guards(cov_data)
  semi_w = int((windows_size - 1)/2)
  results = {}

  for ((chain1, pos1), (chain2, pos2)) in cov_data:
    locals_guards = {
      'min':{
        'chain1': max(global_guards['min'][chain1], pos1-semi_w),
        'chain2': max(global_guards['min'][chain2], pos2-semi_w)
      },
      'max':{
        'chain1': min(global_guards['max'][chain1], pos1+semi_w),
        'chain2': min(global_guards['max'][chain2], pos2+semi_w)
      }
    }
    results[((chain1, pos1), (chain2, pos2))] = (
      _compute_smoothed(chain1, chain2, locals_guards)
    )
  return results

def smooth_cov(
    cov_data:dict[tuple[int, int], float],
    windows_size:int=3
  ) -> dict[tuple[int, int], float]:
  """
  Calculate smoothed covariation data of a single protein.

  Covariation data is assumed to be a dictionary of tuples of
  indices (i,j) where i<=j as keys and score as value.

  Args:
    cov_data (dict[tuple[int, int], float]): Covariation data.
    windows_size (int): The size of the window to compute the average.

  Returns:
    dict[tuple[int, int], float]: The window averaged covariation data.
  """
  def _as_paired(cov_data):
    return {(('A', i), ('A', j)): v for (i, j), v in cov_data.items()}
  def _from_paired(cov_data):
    return {(i, j): v for ((_, i), (_, j)), v in cov_data.items()}
  smoothed = _smooth_cov_segment(_as_paired(cov_data), windows_size)
  return _from_paired(smoothed)

def smooth_cov_paired(
    cov_data:dict[tuple[tuple[str, int], tuple[str, int]], float],
    windows_size:int=3
  ) -> dict[tuple[tuple[str, int], tuple[str, int]], float]:
  """
  Computes smoothed covariation for paired cov data.

  Covariation data is assumed to be a dictionary of tuples of
  indices ((chain1, i) ,(chain2, j)) as keys and score as value.

  Args:
    cov_data (dict[tuple[tuple[str, int], tuple[str, int]], float]):
      The covariation data.
    windows_size (int): The size of the window to compute the average.

  Returns:
    dict[tuple[tuple[str, int], tuple[str, int]], float]: The window averaged
      covariation data.
  """
  intra_cov = intra_covariation(cov_data)
  inter_cov = inter_covariation(cov_data)
  segments = [v for _, v in intra_cov.items()]
  segments = segments + [v for _, v in inter_cov.items()]
  smoothed = [_smooth_cov_segment(s, windows_size) for s in segments]
  return {k: v for s in smoothed for k, v in s.items()}

Functions

def smooth_cov(cov_data: dict, windows_size: int = 3) ‑> dict

Calculate smoothed covariation data of a single protein.

Covariation data is assumed to be a dictionary of tuples of indices (i,j) where i<=j as keys and score as value.

Args

cov_data : dict[tuple[int, int], float]
Covariation data.
windows_size : int
The size of the window to compute the average.

Returns

dict[tuple[int, int], float]
The window averaged covariation data.
Expand source code
def smooth_cov(
    cov_data:dict[tuple[int, int], float],
    windows_size:int=3
  ) -> dict[tuple[int, int], float]:
  """
  Calculate smoothed covariation data of a single protein.

  Covariation data is assumed to be a dictionary of tuples of
  indices (i,j) where i<=j as keys and score as value.

  Args:
    cov_data (dict[tuple[int, int], float]): Covariation data.
    windows_size (int): The size of the window to compute the average.

  Returns:
    dict[tuple[int, int], float]: The window averaged covariation data.
  """
  def _as_paired(cov_data):
    return {(('A', i), ('A', j)): v for (i, j), v in cov_data.items()}
  def _from_paired(cov_data):
    return {(i, j): v for ((_, i), (_, j)), v in cov_data.items()}
  smoothed = _smooth_cov_segment(_as_paired(cov_data), windows_size)
  return _from_paired(smoothed)
def smooth_cov_paired(cov_data: dict, windows_size: int = 3) ‑> dict

Computes smoothed covariation for paired cov data.

Covariation data is assumed to be a dictionary of tuples of indices ((chain1, i) ,(chain2, j)) as keys and score as value.

Args

cov_data (dict[tuple[tuple[str, int], tuple[str, int]], float]):
The covariation data.
windows_size : int
The size of the window to compute the average.

Returns

dict[tuple[tuple[str, int], tuple[str, int]], float]
The window averaged covariation data.
Expand source code
def smooth_cov_paired(
    cov_data:dict[tuple[tuple[str, int], tuple[str, int]], float],
    windows_size:int=3
  ) -> dict[tuple[tuple[str, int], tuple[str, int]], float]:
  """
  Computes smoothed covariation for paired cov data.

  Covariation data is assumed to be a dictionary of tuples of
  indices ((chain1, i) ,(chain2, j)) as keys and score as value.

  Args:
    cov_data (dict[tuple[tuple[str, int], tuple[str, int]], float]):
      The covariation data.
    windows_size (int): The size of the window to compute the average.

  Returns:
    dict[tuple[tuple[str, int], tuple[str, int]], float]: The window averaged
      covariation data.
  """
  intra_cov = intra_covariation(cov_data)
  inter_cov = inter_covariation(cov_data)
  segments = [v for _, v in intra_cov.items()]
  segments = segments + [v for _, v in inter_cov.items()]
  smoothed = [_smooth_cov_segment(s, windows_size) for s in segments]
  return {k: v for s in smoothed for k, v in s.items()}