xi_covutils.smooth module
Functions to compute smooth covariation scores
"""
Functions to compute smooth covariation scores
"""
from sys import maxint
from xi_covutils.read_results import inter_covariation
from xi_covutils.read_results import intra_covariation
def _smooth_cov_segment(cov_data, windows_size=3):
"""
docstring here
:param cov_data:
:param windows_size=3:
"""
def _get_global_guards(cov_data):
chain1_id = cov_data.keys()[0][0][0]
chain2_id = cov_data.keys()[0][1][0]
global_guards = {
'min': {chain1_id: maxint, chain2_id: maxint},
'max': {chain1_id: 0, chain2_id: 0}
}
for ((chain1, pos1), (chain2, pos2)) in cov_data:
global_guards['min'][chain1] = min(global_guards['min'][chain1], pos1)
global_guards['min'][chain2] = min(global_guards['min'][chain2], pos2)
global_guards['max'][chain1] = max(global_guards['max'][chain1], pos1)
global_guards['max'][chain2] = max(global_guards['max'][chain2], pos2)
return global_guards
def _compute_smoothed(chain1, chain2, locals_guards):
cumm_scores = 0
summables = 0
chain1_range = xrange(locals_guards['min']['chain1'], locals_guards['max']['chain1']+1)
chain2_range = xrange(locals_guards['min']['chain2'], locals_guards['max']['chain2']+1)
for lpos1 in chain1_range:
for lpos2 in chain2_range:
if (chain1, lpos1) != (chain2, lpos2):
summables += 1
index_1 = ((chain1, lpos1), (chain2, lpos2))
index_2 = ((chain2, lpos2), (chain1, lpos1))
current_score = cov_data.get(index_1, cov_data.get(index_2))
cumm_scores += current_score
return float(cumm_scores) / max(1, summables)
global_guards = _get_global_guards(cov_data)
semi_w = (windows_size - 1)/2
results = {}
for ((chain1, pos1), (chain2, pos2)) in cov_data:
locals_guards = {
'min':{
'chain1': max(global_guards['min'][chain1], pos1-semi_w),
'chain2': max(global_guards['min'][chain2], pos2-semi_w)
},
'max':{
'chain1': min(global_guards['max'][chain1], pos1+semi_w),
'chain2': min(global_guards['max'][chain2], pos2+semi_w)
}
}
results[((chain1, pos1), (chain2, pos2))] = _compute_smoothed(chain1, chain2, locals_guards)
return results
def smooth_cov(cov_data, windows_size=3):
"""
Calculate smoothed covariation data of a single protein.
Covariation data is assumed to be a dictionary of tuples of
indices (i,j) where i<=j as keys and score as value.
:param cov_data: covariation data dict.
:param windows_size: the size of the window to compute the average.
"""
def _as_paired(cov_data):
return {(('A', i), ('A', j)): v for (i, j), v in cov_data.items()}
def _from_paired(cov_data):
return {(i, j): v for ((_, i), (_, j)), v in cov_data.items()}
smoothed = _smooth_cov_segment(_as_paired(cov_data), windows_size)
return _from_paired(smoothed)
def smooth_cov_paired(cov_data, windows_size=3):
"""
Computes smoothed covariation for paired cov data.
Covariation data is assumed to be a dictionary of tuples of
indices ((chain1, i) ,(chain2, j)) as keys and score as value.
:param cov_data: covariation data dict.
:param windows_size=3: the size of the window to compute the average.
"""
intra_cov = intra_covariation(cov_data)
inter_cov = inter_covariation(cov_data)
segments = [v for _, v in intra_cov.items()]
segments = segments + [v for _, v in inter_cov.items()]
smoothed = [_smooth_cov_segment(s, windows_size) for s in segments]
return {k: v for s in smoothed for k, v in s.items()}
Module variables
var maxint
Functions
def smooth_cov(
cov_data, windows_size=3)
Calculate smoothed covariation data of a single protein.
Covariation data is assumed to be a dictionary of tuples of indices (i,j) where i<=j as keys and score as value.
:param cov_data: covariation data dict.
:param windows_size: the size of the window to compute the average.
def smooth_cov(cov_data, windows_size=3):
"""
Calculate smoothed covariation data of a single protein.
Covariation data is assumed to be a dictionary of tuples of
indices (i,j) where i<=j as keys and score as value.
:param cov_data: covariation data dict.
:param windows_size: the size of the window to compute the average.
"""
def _as_paired(cov_data):
return {(('A', i), ('A', j)): v for (i, j), v in cov_data.items()}
def _from_paired(cov_data):
return {(i, j): v for ((_, i), (_, j)), v in cov_data.items()}
smoothed = _smooth_cov_segment(_as_paired(cov_data), windows_size)
return _from_paired(smoothed)
def smooth_cov_paired(
cov_data, windows_size=3)
Computes smoothed covariation for paired cov data.
Covariation data is assumed to be a dictionary of tuples of indices ((chain1, i) ,(chain2, j)) as keys and score as value.
:param cov_data: covariation data dict.
:param windows_size=3: the size of the window to compute the average.
def smooth_cov_paired(cov_data, windows_size=3):
"""
Computes smoothed covariation for paired cov data.
Covariation data is assumed to be a dictionary of tuples of
indices ((chain1, i) ,(chain2, j)) as keys and score as value.
:param cov_data: covariation data dict.
:param windows_size=3: the size of the window to compute the average.
"""
intra_cov = intra_covariation(cov_data)
inter_cov = inter_covariation(cov_data)
segments = [v for _, v in intra_cov.items()]
segments = segments + [v for _, v in inter_cov.items()]
smoothed = [_smooth_cov_segment(s, windows_size) for s in segments]
return {k: v for s in smoothed for k, v in s.items()}