Module dadi.TwoLocus.TLSpectrum_mod
Contains triallelic Spectrum object
Expand source code
"""
Contains triallelic Spectrum object
"""
import os
import numpy as np
import dadi
class TLSpectrum(np.ma.masked_array):
"""
Represents a two-locus frequency spectrum.
The constructor has the format:
fs = dadi.Triallele.TLSpectrum(data, mask, mask_infeasible,
data_folded,
extrap_x, extrap_t)
data: The triallelic frequency spectrum data
mask: An optional array of the same size as data, similar to dadi.Spectrum
data_folded: If True, it is assumed that the input data is folded
check_folding: If True and data_folded=True, the data and
mask will be checked to ensure they are consistent
extrap_x: Optional floating point value specifying x value to use in
extrapolation.
extrap_t: Optional floating point value specifying t value to use in
extrapolation.
"""
def __new__(subtype, data, mask=np.ma.nomask, mask_infeasible=True,
data_folded=None, check_folding=True,
dtype=float, copy=True, fill_value=np.nan, keep_mask=True,
shrink=True, extrap_x=None, extrap_t=None):
data = np.asanyarray(data)
if mask is np.ma.nomask:
mask = np.ma.make_mask_none(data.shape)
subarr = np.ma.masked_array(data, mask=mask, dtype=dtype, copy=copy,
fill_value=fill_value, keep_mask=True,
shrink=True)
subarr = subarr.view(subtype)
if hasattr(data, 'folded'):
if data_folded is None or data_folded == data.folded:
subarr.folded = data.folded
elif data_folded != data.folded:
raise ValueError('Data does not have same folding status as '
'was called for in TLSpectrum constructor.')
elif data_folded is not None:
subarr.folded = data_folded
else:
subarr.folded = False
### XXX To do: ensure that all goes well when creating the TLSpectrum, come
### back to this
# Check that if we're declaring that the input data is folded, it actually is,
# and the mask reflects this.
if mask_infeasible:
subarr.mask_infeasible()
subarr.extrap_x = extrap_x
subarr.extrap_t = extrap_t
return subarr
# See https://scipy.github.io/old-wiki/pages/Subclasses.html for information on
# __array_finalize__ and __array_wrap__ methods.
#
# We need these methods to ensure extra attributes get copied along when
# we do arithmetic on the FS.
def __array_finalize__(self, obj):
if obj is None:
return
np.ma.masked_array.__array_finalize__(self, obj)
self.folded = getattr(obj, 'folded', 'unspecified')
self.extrap_x = getattr(obj, 'extrap_x', None)
self.extrap_t = getattr(obj, 'extrap_t', None)
def __array_wrap__(self, obj, context=None):
result = obj.view(type(self))
result = np.ma.masked_array.__array_wrap__(self, obj,
context=context)
result.folded = self.folded
result.extrap_t = self.extrap_t
return result
def _update_from(self, obj):
np.ma.masked_array._update_from(self, obj)
if hasattr(obj, 'folded'):
self.folded = obj.folded
if hasattr(obj, 'extrap_x'):
self.extrap_x = obj.extrap_x
if hasattr(obj, 'extrap_t'):
self.extrap_t = obj.extrap_t
# masked_array has priority 15.
__array_priority__ = 20
def __repr__(self):
return 'TLSpectrum(%s, folded=%s)'\
% (str(self), str(self.folded))
def mask_infeasible(self):
"""
Mask any infeasible entries.
"""
ns = len(self)-1
self.mask[0,0,0] = True
self.mask[0,:,0] = True
self.mask[0,0,:] = True
for ii in range(len(self)):
for jj in range(len(self)):
for kk in range(len(self)):
if ii+jj+kk > ns:
self.mask[ii,jj,kk] = True
for ii in range(len(self)):
self.mask[ii,ns-ii,0] = True
self.mask[ii,0,ns-ii] = True
return self
def unfold(self):
if not self.folded:
raise ValueError('Input Spectrum is not folded.')
data = self.data
unfolded = TLSpectrum(data, mask_infeasible=True)
unfolded.extrap_x = self.extrap_x
unfolded.extrap_t = self.extrap_t
return unfolded
def _get_sample_size(self):
return np.asarray(self.shape)[0] - 1
sample_size = property(_get_sample_size)
def _ensure_shape_and_dimension(self):
"""
Ensure that fs has Npop dimensions.
"""
pass
# Make from_file a static method, so we can use it without an instance.
@staticmethod
def from_file(fid, mask_infeasible=True, return_comments=False):
"""
Read frequency spectrum from file.
fid: string with file name to read from or an open file object.
mask_infeasible: If True, mask the infeasible entries in the triallelic spectrum.
return_comments: If true, the return value is (fs, comments), where
comments is a list of strings containing the comments
from the file (without #'s).
See to_file method for details on the file format.
"""
newfile = False
# Try to read from fid. If we can't, assume it's something that we can
# use to open a file.
if not hasattr(fid, 'read'):
newfile = True
fid = open(fid, 'r')
line = fid.readline()
# Strip out the comments
comments = []
while line.startswith('#'):
comments.append(line[1:].strip())
line = fid.readline()
# Read the shape of the data
shape,folded,extrap_x,extrap_t = line.split()
shape = [int(shape)+1,int(shape)+1,int(shape)+1]
data = np.fromstring(fid.readline().strip(),
count=np.product(shape), sep=' ')
# fromfile returns a 1-d array. Reshape it to the proper form.
data = data.reshape(*shape)
maskline = fid.readline().strip()
mask = np.fromstring(maskline,
count=np.product(shape), sep=' ')
mask = mask.reshape(*shape)
if folded == 'folded':
folded = True
else:
folded = False
if extrap_x == 'None':
extrap_x = None
else:
extrap_x = float(extrap_x)
if extrap_t == 'None':
extrap_t = None
else:
extrap_t = float(extrap_t)
# If we opened a new file, clean it up.
if newfile:
fid.close()
fs = TLSpectrum(data, mask, mask_infeasible, data_folded=folded)
fs.extrap_x = extrap_x
fs.extrap_t = extrap_t
if not return_comments:
return fs
else:
return fs,comments
def to_file(self, fid, precision=16, comment_lines=[], foldmaskinfo=True, extrapinfo=True):
"""
Write frequency spectrum to file.
fid: string with file name to write to or an open file object.
precision: precision with which to write out entries of the SFS. (They
are formated via %.<p>g, where <p> is the precision.)
comment lines: list of strings to be used as comment lines in the header
of the output file.
foldmaskinfo: If False, folding and mask and population label
information will not be saved. This conforms to the file
format for dadi versions prior to 1.3.0.
The file format is:
# Any number of comment lines beginning with a '#'
A single line containing N integers giving the dimensions of the fs
array. So this line would be '5 5 3' for an SFS that was 5x5x3.
(That would be 4x4x2 *samples*.)
On the *same line*, the string 'folded' or 'unfolded'
denoting the folding status of the array
A single line giving the array elements. The order of elements is
e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ...
A single line giving the elements of the mask in the same order as
the data line. '1' indicates masked, '0' indicates unmasked.
"""
# Open the file object.
newfile = False
if not hasattr(fid, 'write'):
newfile = True
fid = open(fid, 'w')
# Write comments
for line in comment_lines:
fid.write('# ')
fid.write(line.strip())
fid.write(os.linesep)
# Write out the shape of the fs
fid.write('{0} '.format(self.sample_size))
if foldmaskinfo:
if not self.folded:
fid.write('unfolded ')
else:
fid.write('folded ')
if extrapinfo:
if not self.extrap_x:
fid.write('None ')
else:
fid.write('{0} '.format(self.extrap_x))
if not self.extrap_t:
fid.write('None')
else:
fid.write('{0}'.format(self.extrap_t))
fid.write(os.linesep)
# Write the data to the file
self.data.tofile(fid, ' ', '%%.%ig' % precision)
fid.write(os.linesep)
if foldmaskinfo:
# Write the mask to the file
np.asarray(self.mask,int).tofile(fid, ' ')
fid.write(os.linesep)
# Close file
if newfile:
fid.close()
tofile = to_file
def marginalA(self):
"""
Marginal 1D frequency spectrum for A locus.
"""
ns = self.shape[0] - 1
marg = dadi.Spectrum(np.zeros(ns+1))
for fAB in range(ns):
for fAb in range(ns-fAB):
marg[fAB+fAb] += self[fAB,fAb,:].sum()
marg.extrap_x = self.extrap_x
marg.extrap_t = self.extrap_t
return marg
def marginalB(self):
"""
Marginal 1D frequency spectrum for B locus.
"""
ns = self.shape[0] - 1
marg = dadi.Spectrum(np.zeros(ns+1))
for fAB in range(ns):
for faB in range(ns-fAB):
marg[fAB+faB] += self[fAB,:,faB].sum()
marg.extrap_x = self.extrap_x
marg.extrap_t = self.extrap_t
return marg
def mean_r2(self):
"""
Mean of normalized squared correlation coefficient between A and B loci.
"""
from . import numerics
ns = self.shape[0] - 1
norm = self.sum()
Dbin, r2bin = numerics.LD_per_bin(ns)
return (self*r2bin).sum()/self.sum()
def fold(self):
if self.folded:
raise ValueError('Input Spectrum is already folded.')
ns = self.shape[0] - 1
folded = 0*self
for ii in range(ns+1):
for jj in range(ns+1):
for kk in range(ns+1):
if self.mask[ii,jj,kk]:
continue
p = ii + jj
q = ii + kk
if p > ns/2 and q > ns/2:
# Switch A/a and B/b, so AB becomes ab, Ab becomes aB, etc
folded[ns-ii-jj-kk,kk,jj] = self[ns-ii-jj-kk,kk,jj] + self[ii,jj,kk]
folded.mask[ii,jj,kk] = True
elif p > ns/2:
# Switch A/a, so AB -> aB, Ab -> ab, aB -> AB, and ab -> Ab
folded[kk,ns-ii-jj-kk,ii] = self[kk,ns-ii-jj-kk,ii] + self[ii,jj,kk]
folded.mask[ii,jj,kk] = True
elif q > ns/2:
# Switch B/b, so AB -> Ab, Ab -> AB, aB -> ab, and ab -> aB
folded[jj,ii,ns-ii-jj-kk] = self[jj,ii,ns-ii-jj-kk] + self[ii,jj,kk]
folded.mask[ii,jj,kk] = True
folded.folded = True
folded.extrap_x = self.extrap_x
folded.extrap_t = self.extrap_t
return folded
# Ensures that when arithmetic is done with TLSpectrum objects,
# attributes are preserved. For details, see similar code in
# dadi.Spectrum_mod
for method in ['__add__','__radd__','__sub__','__rsub__','__mul__',
'__rmul__','__div__','__rdiv__','__truediv__','__rtruediv__',
'__floordiv__','__rfloordiv__','__rpow__','__pow__']:
exec("""
def %(method)s(self, other):
self._check_other_folding(other)
if isinstance(other, np.ma.masked_array):
newdata = self.data.%(method)s (other.data)
newmask = np.ma.mask_or(self.mask, other.mask)
else:
newdata = self.data.%(method)s (other)
newmask = self.mask
if hasattr(other, 'extrap_x') and self.extrap_x != other.extrap_x:
extrap_x = None
else:
extrap_x = self.extrap_x
if hasattr(other, 'extrap_t') and self.extrap_t != other.extrap_t:
extrap_t = None
else:
extrap_t = self.extrap_t
outfs = self.__class__.__new__(self.__class__, newdata, newmask,
mask_infeasible=False,
data_folded=self.folded,
extrap_x=extrap_x, extrap_t=extrap_t)
return outfs
""" % {'method':method})
# Methods that modify the Spectrum in-place.
for method in ['__iadd__','__isub__','__imul__','__idiv__',
'__itruediv__','__ifloordiv__','__ipow__']:
exec("""
def %(method)s(self, other):
self._check_other_folding(other)
if isinstance(other, np.ma.masked_array):
self.data.%(method)s (other.data)
self.mask = np.ma.mask_or(self.mask, other.mask)
else:
self.data.%(method)s (other)
if hasattr(other, 'extrap_x') and self.extrap_x != other.extrap_x:
self.extrap_x = None
if hasattr(other, 'extrap_t') and self.extrap_t != other.extrap_t:
self.extrap_t = None
return self
""" % {'method':method})
def _check_other_folding(self, other):
"""
Ensure other Spectrum has same .folded status
"""
if isinstance(other, self.__class__)\
and (other.folded != self.folded):
raise ValueError('Cannot operate with a folded Spectrum and an '
'unfolded one.')
# Allow TLSpectrum objects to be pickled.
# See http://effbot.org/librarybook/copy-reg.htm
try:
import copyreg
except:
# For Python 2.x compatibility
import copy_reg as copyreg
def TLSpectrum_pickler(fs):
# Collect all the info necessary to save the state of a TLSpectrum
return TLSpectrum_unpickler, (fs.data, fs.mask, fs.folded,
fs.extrap_x, fs.extrap_t)
def TLSpectrum_unpickler(data, mask, folded,
extrap_x, extrap_t):
# Use that info to recreate the TLSpectrum
return TLSpectrum(data, mask, mask_infeasible=False,
data_folded=folded,
extrap_x=extrap_x, extrap_t=extrap_t)
copyreg.pickle(TLSpectrum, TLSpectrum_pickler, TLSpectrum_unpickler)
Functions
def TLSpectrum_pickler(fs)
-
Expand source code
def TLSpectrum_pickler(fs): # Collect all the info necessary to save the state of a TLSpectrum return TLSpectrum_unpickler, (fs.data, fs.mask, fs.folded, fs.extrap_x, fs.extrap_t)
def TLSpectrum_unpickler(data, mask, folded, extrap_x, extrap_t)
-
Expand source code
def TLSpectrum_unpickler(data, mask, folded, extrap_x, extrap_t): # Use that info to recreate the TLSpectrum return TLSpectrum(data, mask, mask_infeasible=False, data_folded=folded, extrap_x=extrap_x, extrap_t=extrap_t)
Classes
class TLSpectrum (*args, **kwargs)
-
Represents a two-locus frequency spectrum.
The constructor has the format: fs = dadi.Triallele.TLSpectrum(data, mask, mask_infeasible, data_folded, extrap_x, extrap_t)
data: The triallelic frequency spectrum data mask: An optional array of the same size as data, similar to dadi.Spectrum data_folded: If True, it is assumed that the input data is folded check_folding: If True and data_folded=True, the data and mask will be checked to ensure they are consistent extrap_x: Optional floating point value specifying x value to use in extrapolation. extrap_t: Optional floating point value specifying t value to use in extrapolation.
Expand source code
class TLSpectrum(np.ma.masked_array): """ Represents a two-locus frequency spectrum. The constructor has the format: fs = dadi.Triallele.TLSpectrum(data, mask, mask_infeasible, data_folded, extrap_x, extrap_t) data: The triallelic frequency spectrum data mask: An optional array of the same size as data, similar to dadi.Spectrum data_folded: If True, it is assumed that the input data is folded check_folding: If True and data_folded=True, the data and mask will be checked to ensure they are consistent extrap_x: Optional floating point value specifying x value to use in extrapolation. extrap_t: Optional floating point value specifying t value to use in extrapolation. """ def __new__(subtype, data, mask=np.ma.nomask, mask_infeasible=True, data_folded=None, check_folding=True, dtype=float, copy=True, fill_value=np.nan, keep_mask=True, shrink=True, extrap_x=None, extrap_t=None): data = np.asanyarray(data) if mask is np.ma.nomask: mask = np.ma.make_mask_none(data.shape) subarr = np.ma.masked_array(data, mask=mask, dtype=dtype, copy=copy, fill_value=fill_value, keep_mask=True, shrink=True) subarr = subarr.view(subtype) if hasattr(data, 'folded'): if data_folded is None or data_folded == data.folded: subarr.folded = data.folded elif data_folded != data.folded: raise ValueError('Data does not have same folding status as ' 'was called for in TLSpectrum constructor.') elif data_folded is not None: subarr.folded = data_folded else: subarr.folded = False ### XXX To do: ensure that all goes well when creating the TLSpectrum, come ### back to this # Check that if we're declaring that the input data is folded, it actually is, # and the mask reflects this. if mask_infeasible: subarr.mask_infeasible() subarr.extrap_x = extrap_x subarr.extrap_t = extrap_t return subarr # See https://scipy.github.io/old-wiki/pages/Subclasses.html for information on # __array_finalize__ and __array_wrap__ methods. # # We need these methods to ensure extra attributes get copied along when # we do arithmetic on the FS. def __array_finalize__(self, obj): if obj is None: return np.ma.masked_array.__array_finalize__(self, obj) self.folded = getattr(obj, 'folded', 'unspecified') self.extrap_x = getattr(obj, 'extrap_x', None) self.extrap_t = getattr(obj, 'extrap_t', None) def __array_wrap__(self, obj, context=None): result = obj.view(type(self)) result = np.ma.masked_array.__array_wrap__(self, obj, context=context) result.folded = self.folded result.extrap_t = self.extrap_t return result def _update_from(self, obj): np.ma.masked_array._update_from(self, obj) if hasattr(obj, 'folded'): self.folded = obj.folded if hasattr(obj, 'extrap_x'): self.extrap_x = obj.extrap_x if hasattr(obj, 'extrap_t'): self.extrap_t = obj.extrap_t # masked_array has priority 15. __array_priority__ = 20 def __repr__(self): return 'TLSpectrum(%s, folded=%s)'\ % (str(self), str(self.folded)) def mask_infeasible(self): """ Mask any infeasible entries. """ ns = len(self)-1 self.mask[0,0,0] = True self.mask[0,:,0] = True self.mask[0,0,:] = True for ii in range(len(self)): for jj in range(len(self)): for kk in range(len(self)): if ii+jj+kk > ns: self.mask[ii,jj,kk] = True for ii in range(len(self)): self.mask[ii,ns-ii,0] = True self.mask[ii,0,ns-ii] = True return self def unfold(self): if not self.folded: raise ValueError('Input Spectrum is not folded.') data = self.data unfolded = TLSpectrum(data, mask_infeasible=True) unfolded.extrap_x = self.extrap_x unfolded.extrap_t = self.extrap_t return unfolded def _get_sample_size(self): return np.asarray(self.shape)[0] - 1 sample_size = property(_get_sample_size) def _ensure_shape_and_dimension(self): """ Ensure that fs has Npop dimensions. """ pass # Make from_file a static method, so we can use it without an instance. @staticmethod def from_file(fid, mask_infeasible=True, return_comments=False): """ Read frequency spectrum from file. fid: string with file name to read from or an open file object. mask_infeasible: If True, mask the infeasible entries in the triallelic spectrum. return_comments: If true, the return value is (fs, comments), where comments is a list of strings containing the comments from the file (without #'s). See to_file method for details on the file format. """ newfile = False # Try to read from fid. If we can't, assume it's something that we can # use to open a file. if not hasattr(fid, 'read'): newfile = True fid = open(fid, 'r') line = fid.readline() # Strip out the comments comments = [] while line.startswith('#'): comments.append(line[1:].strip()) line = fid.readline() # Read the shape of the data shape,folded,extrap_x,extrap_t = line.split() shape = [int(shape)+1,int(shape)+1,int(shape)+1] data = np.fromstring(fid.readline().strip(), count=np.product(shape), sep=' ') # fromfile returns a 1-d array. Reshape it to the proper form. data = data.reshape(*shape) maskline = fid.readline().strip() mask = np.fromstring(maskline, count=np.product(shape), sep=' ') mask = mask.reshape(*shape) if folded == 'folded': folded = True else: folded = False if extrap_x == 'None': extrap_x = None else: extrap_x = float(extrap_x) if extrap_t == 'None': extrap_t = None else: extrap_t = float(extrap_t) # If we opened a new file, clean it up. if newfile: fid.close() fs = TLSpectrum(data, mask, mask_infeasible, data_folded=folded) fs.extrap_x = extrap_x fs.extrap_t = extrap_t if not return_comments: return fs else: return fs,comments def to_file(self, fid, precision=16, comment_lines=[], foldmaskinfo=True, extrapinfo=True): """ Write frequency spectrum to file. fid: string with file name to write to or an open file object. precision: precision with which to write out entries of the SFS. (They are formated via %.<p>g, where <p> is the precision.) comment lines: list of strings to be used as comment lines in the header of the output file. foldmaskinfo: If False, folding and mask and population label information will not be saved. This conforms to the file format for dadi versions prior to 1.3.0. The file format is: # Any number of comment lines beginning with a '#' A single line containing N integers giving the dimensions of the fs array. So this line would be '5 5 3' for an SFS that was 5x5x3. (That would be 4x4x2 *samples*.) On the *same line*, the string 'folded' or 'unfolded' denoting the folding status of the array A single line giving the array elements. The order of elements is e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ... A single line giving the elements of the mask in the same order as the data line. '1' indicates masked, '0' indicates unmasked. """ # Open the file object. newfile = False if not hasattr(fid, 'write'): newfile = True fid = open(fid, 'w') # Write comments for line in comment_lines: fid.write('# ') fid.write(line.strip()) fid.write(os.linesep) # Write out the shape of the fs fid.write('{0} '.format(self.sample_size)) if foldmaskinfo: if not self.folded: fid.write('unfolded ') else: fid.write('folded ') if extrapinfo: if not self.extrap_x: fid.write('None ') else: fid.write('{0} '.format(self.extrap_x)) if not self.extrap_t: fid.write('None') else: fid.write('{0}'.format(self.extrap_t)) fid.write(os.linesep) # Write the data to the file self.data.tofile(fid, ' ', '%%.%ig' % precision) fid.write(os.linesep) if foldmaskinfo: # Write the mask to the file np.asarray(self.mask,int).tofile(fid, ' ') fid.write(os.linesep) # Close file if newfile: fid.close() tofile = to_file def marginalA(self): """ Marginal 1D frequency spectrum for A locus. """ ns = self.shape[0] - 1 marg = dadi.Spectrum(np.zeros(ns+1)) for fAB in range(ns): for fAb in range(ns-fAB): marg[fAB+fAb] += self[fAB,fAb,:].sum() marg.extrap_x = self.extrap_x marg.extrap_t = self.extrap_t return marg def marginalB(self): """ Marginal 1D frequency spectrum for B locus. """ ns = self.shape[0] - 1 marg = dadi.Spectrum(np.zeros(ns+1)) for fAB in range(ns): for faB in range(ns-fAB): marg[fAB+faB] += self[fAB,:,faB].sum() marg.extrap_x = self.extrap_x marg.extrap_t = self.extrap_t return marg def mean_r2(self): """ Mean of normalized squared correlation coefficient between A and B loci. """ from . import numerics ns = self.shape[0] - 1 norm = self.sum() Dbin, r2bin = numerics.LD_per_bin(ns) return (self*r2bin).sum()/self.sum() def fold(self): if self.folded: raise ValueError('Input Spectrum is already folded.') ns = self.shape[0] - 1 folded = 0*self for ii in range(ns+1): for jj in range(ns+1): for kk in range(ns+1): if self.mask[ii,jj,kk]: continue p = ii + jj q = ii + kk if p > ns/2 and q > ns/2: # Switch A/a and B/b, so AB becomes ab, Ab becomes aB, etc folded[ns-ii-jj-kk,kk,jj] = self[ns-ii-jj-kk,kk,jj] + self[ii,jj,kk] folded.mask[ii,jj,kk] = True elif p > ns/2: # Switch A/a, so AB -> aB, Ab -> ab, aB -> AB, and ab -> Ab folded[kk,ns-ii-jj-kk,ii] = self[kk,ns-ii-jj-kk,ii] + self[ii,jj,kk] folded.mask[ii,jj,kk] = True elif q > ns/2: # Switch B/b, so AB -> Ab, Ab -> AB, aB -> ab, and ab -> aB folded[jj,ii,ns-ii-jj-kk] = self[jj,ii,ns-ii-jj-kk] + self[ii,jj,kk] folded.mask[ii,jj,kk] = True folded.folded = True folded.extrap_x = self.extrap_x folded.extrap_t = self.extrap_t return folded # Ensures that when arithmetic is done with TLSpectrum objects, # attributes are preserved. For details, see similar code in # dadi.Spectrum_mod for method in ['__add__','__radd__','__sub__','__rsub__','__mul__', '__rmul__','__div__','__rdiv__','__truediv__','__rtruediv__', '__floordiv__','__rfloordiv__','__rpow__','__pow__']: exec(""" def %(method)s(self, other): self._check_other_folding(other) if isinstance(other, np.ma.masked_array): newdata = self.data.%(method)s (other.data) newmask = np.ma.mask_or(self.mask, other.mask) else: newdata = self.data.%(method)s (other) newmask = self.mask if hasattr(other, 'extrap_x') and self.extrap_x != other.extrap_x: extrap_x = None else: extrap_x = self.extrap_x if hasattr(other, 'extrap_t') and self.extrap_t != other.extrap_t: extrap_t = None else: extrap_t = self.extrap_t outfs = self.__class__.__new__(self.__class__, newdata, newmask, mask_infeasible=False, data_folded=self.folded, extrap_x=extrap_x, extrap_t=extrap_t) return outfs """ % {'method':method}) # Methods that modify the Spectrum in-place. for method in ['__iadd__','__isub__','__imul__','__idiv__', '__itruediv__','__ifloordiv__','__ipow__']: exec(""" def %(method)s(self, other): self._check_other_folding(other) if isinstance(other, np.ma.masked_array): self.data.%(method)s (other.data) self.mask = np.ma.mask_or(self.mask, other.mask) else: self.data.%(method)s (other) if hasattr(other, 'extrap_x') and self.extrap_x != other.extrap_x: self.extrap_x = None if hasattr(other, 'extrap_t') and self.extrap_t != other.extrap_t: self.extrap_t = None return self """ % {'method':method}) def _check_other_folding(self, other): """ Ensure other Spectrum has same .folded status """ if isinstance(other, self.__class__)\ and (other.folded != self.folded): raise ValueError('Cannot operate with a folded Spectrum and an ' 'unfolded one.')
Ancestors
- numpy.ma.core.MaskedArray
- numpy.ndarray
Class variables
var method
-
str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str
Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.str() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'.
Static methods
def from_file(fid, mask_infeasible=True, return_comments=False)
-
Read frequency spectrum from file.
fid: string with file name to read from or an open file object. mask_infeasible: If True, mask the infeasible entries in the triallelic spectrum. return_comments: If true, the return value is (fs, comments), where comments is a list of strings containing the comments from the file (without #'s).
See to_file method for details on the file format.
Expand source code
@staticmethod def from_file(fid, mask_infeasible=True, return_comments=False): """ Read frequency spectrum from file. fid: string with file name to read from or an open file object. mask_infeasible: If True, mask the infeasible entries in the triallelic spectrum. return_comments: If true, the return value is (fs, comments), where comments is a list of strings containing the comments from the file (without #'s). See to_file method for details on the file format. """ newfile = False # Try to read from fid. If we can't, assume it's something that we can # use to open a file. if not hasattr(fid, 'read'): newfile = True fid = open(fid, 'r') line = fid.readline() # Strip out the comments comments = [] while line.startswith('#'): comments.append(line[1:].strip()) line = fid.readline() # Read the shape of the data shape,folded,extrap_x,extrap_t = line.split() shape = [int(shape)+1,int(shape)+1,int(shape)+1] data = np.fromstring(fid.readline().strip(), count=np.product(shape), sep=' ') # fromfile returns a 1-d array. Reshape it to the proper form. data = data.reshape(*shape) maskline = fid.readline().strip() mask = np.fromstring(maskline, count=np.product(shape), sep=' ') mask = mask.reshape(*shape) if folded == 'folded': folded = True else: folded = False if extrap_x == 'None': extrap_x = None else: extrap_x = float(extrap_x) if extrap_t == 'None': extrap_t = None else: extrap_t = float(extrap_t) # If we opened a new file, clean it up. if newfile: fid.close() fs = TLSpectrum(data, mask, mask_infeasible, data_folded=folded) fs.extrap_x = extrap_x fs.extrap_t = extrap_t if not return_comments: return fs else: return fs,comments
Instance variables
var sample_size
-
Expand source code
def _get_sample_size(self): return np.asarray(self.shape)[0] - 1
Methods
def fold(self)
-
Expand source code
def fold(self): if self.folded: raise ValueError('Input Spectrum is already folded.') ns = self.shape[0] - 1 folded = 0*self for ii in range(ns+1): for jj in range(ns+1): for kk in range(ns+1): if self.mask[ii,jj,kk]: continue p = ii + jj q = ii + kk if p > ns/2 and q > ns/2: # Switch A/a and B/b, so AB becomes ab, Ab becomes aB, etc folded[ns-ii-jj-kk,kk,jj] = self[ns-ii-jj-kk,kk,jj] + self[ii,jj,kk] folded.mask[ii,jj,kk] = True elif p > ns/2: # Switch A/a, so AB -> aB, Ab -> ab, aB -> AB, and ab -> Ab folded[kk,ns-ii-jj-kk,ii] = self[kk,ns-ii-jj-kk,ii] + self[ii,jj,kk] folded.mask[ii,jj,kk] = True elif q > ns/2: # Switch B/b, so AB -> Ab, Ab -> AB, aB -> ab, and ab -> aB folded[jj,ii,ns-ii-jj-kk] = self[jj,ii,ns-ii-jj-kk] + self[ii,jj,kk] folded.mask[ii,jj,kk] = True folded.folded = True folded.extrap_x = self.extrap_x folded.extrap_t = self.extrap_t return folded
def marginalA(self)
-
Marginal 1D frequency spectrum for A locus.
Expand source code
def marginalA(self): """ Marginal 1D frequency spectrum for A locus. """ ns = self.shape[0] - 1 marg = dadi.Spectrum(np.zeros(ns+1)) for fAB in range(ns): for fAb in range(ns-fAB): marg[fAB+fAb] += self[fAB,fAb,:].sum() marg.extrap_x = self.extrap_x marg.extrap_t = self.extrap_t return marg
def marginalB(self)
-
Marginal 1D frequency spectrum for B locus.
Expand source code
def marginalB(self): """ Marginal 1D frequency spectrum for B locus. """ ns = self.shape[0] - 1 marg = dadi.Spectrum(np.zeros(ns+1)) for fAB in range(ns): for faB in range(ns-fAB): marg[fAB+faB] += self[fAB,:,faB].sum() marg.extrap_x = self.extrap_x marg.extrap_t = self.extrap_t return marg
def mask_infeasible(self)
-
Mask any infeasible entries.
Expand source code
def mask_infeasible(self): """ Mask any infeasible entries. """ ns = len(self)-1 self.mask[0,0,0] = True self.mask[0,:,0] = True self.mask[0,0,:] = True for ii in range(len(self)): for jj in range(len(self)): for kk in range(len(self)): if ii+jj+kk > ns: self.mask[ii,jj,kk] = True for ii in range(len(self)): self.mask[ii,ns-ii,0] = True self.mask[ii,0,ns-ii] = True return self
def mean_r2(self)
-
Mean of normalized squared correlation coefficient between A and B loci.
Expand source code
def mean_r2(self): """ Mean of normalized squared correlation coefficient between A and B loci. """ from . import numerics ns = self.shape[0] - 1 norm = self.sum() Dbin, r2bin = numerics.LD_per_bin(ns) return (self*r2bin).sum()/self.sum()
def to_file(self, fid, precision=16, comment_lines=[], foldmaskinfo=True, extrapinfo=True)
-
Write frequency spectrum to file.
fid: string with file name to write to or an open file object. precision: precision with which to write out entries of the SFS. (They are formated via %.
g, where
is the precision.) comment lines: list of strings to be used as comment lines in the header of the output file. foldmaskinfo: If False, folding and mask and population label information will not be saved. This conforms to the file format for dadi versions prior to 1.3.0.
The file format is: # Any number of comment lines beginning with a '#' A single line containing N integers giving the dimensions of the fs array. So this line would be '5 5 3' for an SFS that was 5x5x3. (That would be 4x4x2 samples.) On the same line, the string 'folded' or 'unfolded' denoting the folding status of the array A single line giving the array elements. The order of elements is e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] … fs[0,1,0] fs[0,1,1] … A single line giving the elements of the mask in the same order as the data line. '1' indicates masked, '0' indicates unmasked.
Expand source code
def to_file(self, fid, precision=16, comment_lines=[], foldmaskinfo=True, extrapinfo=True): """ Write frequency spectrum to file. fid: string with file name to write to or an open file object. precision: precision with which to write out entries of the SFS. (They are formated via %.<p>g, where <p> is the precision.) comment lines: list of strings to be used as comment lines in the header of the output file. foldmaskinfo: If False, folding and mask and population label information will not be saved. This conforms to the file format for dadi versions prior to 1.3.0. The file format is: # Any number of comment lines beginning with a '#' A single line containing N integers giving the dimensions of the fs array. So this line would be '5 5 3' for an SFS that was 5x5x3. (That would be 4x4x2 *samples*.) On the *same line*, the string 'folded' or 'unfolded' denoting the folding status of the array A single line giving the array elements. The order of elements is e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ... A single line giving the elements of the mask in the same order as the data line. '1' indicates masked, '0' indicates unmasked. """ # Open the file object. newfile = False if not hasattr(fid, 'write'): newfile = True fid = open(fid, 'w') # Write comments for line in comment_lines: fid.write('# ') fid.write(line.strip()) fid.write(os.linesep) # Write out the shape of the fs fid.write('{0} '.format(self.sample_size)) if foldmaskinfo: if not self.folded: fid.write('unfolded ') else: fid.write('folded ') if extrapinfo: if not self.extrap_x: fid.write('None ') else: fid.write('{0} '.format(self.extrap_x)) if not self.extrap_t: fid.write('None') else: fid.write('{0}'.format(self.extrap_t)) fid.write(os.linesep) # Write the data to the file self.data.tofile(fid, ' ', '%%.%ig' % precision) fid.write(os.linesep) if foldmaskinfo: # Write the mask to the file np.asarray(self.mask,int).tofile(fid, ' ') fid.write(os.linesep) # Close file if newfile: fid.close()
def tofile(self, fid, precision=16, comment_lines=[], foldmaskinfo=True, extrapinfo=True)
-
Write frequency spectrum to file.
fid: string with file name to write to or an open file object. precision: precision with which to write out entries of the SFS. (They are formated via %.
g, where
is the precision.) comment lines: list of strings to be used as comment lines in the header of the output file. foldmaskinfo: If False, folding and mask and population label information will not be saved. This conforms to the file format for dadi versions prior to 1.3.0.
The file format is: # Any number of comment lines beginning with a '#' A single line containing N integers giving the dimensions of the fs array. So this line would be '5 5 3' for an SFS that was 5x5x3. (That would be 4x4x2 samples.) On the same line, the string 'folded' or 'unfolded' denoting the folding status of the array A single line giving the array elements. The order of elements is e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] … fs[0,1,0] fs[0,1,1] … A single line giving the elements of the mask in the same order as the data line. '1' indicates masked, '0' indicates unmasked.
Expand source code
def to_file(self, fid, precision=16, comment_lines=[], foldmaskinfo=True, extrapinfo=True): """ Write frequency spectrum to file. fid: string with file name to write to or an open file object. precision: precision with which to write out entries of the SFS. (They are formated via %.<p>g, where <p> is the precision.) comment lines: list of strings to be used as comment lines in the header of the output file. foldmaskinfo: If False, folding and mask and population label information will not be saved. This conforms to the file format for dadi versions prior to 1.3.0. The file format is: # Any number of comment lines beginning with a '#' A single line containing N integers giving the dimensions of the fs array. So this line would be '5 5 3' for an SFS that was 5x5x3. (That would be 4x4x2 *samples*.) On the *same line*, the string 'folded' or 'unfolded' denoting the folding status of the array A single line giving the array elements. The order of elements is e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ... A single line giving the elements of the mask in the same order as the data line. '1' indicates masked, '0' indicates unmasked. """ # Open the file object. newfile = False if not hasattr(fid, 'write'): newfile = True fid = open(fid, 'w') # Write comments for line in comment_lines: fid.write('# ') fid.write(line.strip()) fid.write(os.linesep) # Write out the shape of the fs fid.write('{0} '.format(self.sample_size)) if foldmaskinfo: if not self.folded: fid.write('unfolded ') else: fid.write('folded ') if extrapinfo: if not self.extrap_x: fid.write('None ') else: fid.write('{0} '.format(self.extrap_x)) if not self.extrap_t: fid.write('None') else: fid.write('{0}'.format(self.extrap_t)) fid.write(os.linesep) # Write the data to the file self.data.tofile(fid, ' ', '%%.%ig' % precision) fid.write(os.linesep) if foldmaskinfo: # Write the mask to the file np.asarray(self.mask,int).tofile(fid, ' ') fid.write(os.linesep) # Close file if newfile: fid.close()
def unfold(self)
-
Expand source code
def unfold(self): if not self.folded: raise ValueError('Input Spectrum is not folded.') data = self.data unfolded = TLSpectrum(data, mask_infeasible=True) unfolded.extrap_x = self.extrap_x unfolded.extrap_t = self.extrap_t return unfolded