'''
Created on Nov 11, 2014
@author: thocu
'''
from VirtualMicrobes.my_tools.utility import OrderedDefaultdict
from sortedcontainers.sorteddict import SortedDict
from ete3 import TreeNode #TODO: try upgrade to ete3.3 (fork git repo to allow merging in updates to local copy)
import VirtualMicrobes.my_tools.utility as util
import warnings
from collections import OrderedDict, deque
import itertools
#import warnings
[docs]def nh_branch(_id, branch_length, features=None):
nhx_tag = ''
if features is not None:
nhx_tag = '[&&NHX:' + ':'.join( [ '='.join( (n,str(v)) ) for (n,v) in features ]) + ']'
branch_length_tag = ':' + str(branch_length) if branch_length is not None else ''
return _id + branch_length_tag + nhx_tag
[docs]def newick(_id, time, branch_length):
branch_length_tag = ":" + str(branch_length) if branch_length else ''
return str(_id)+'_' + str(time) + branch_length_tag
[docs]def nhx(_id, time, branch_length):
nhx_tag = '[&&NHX:' + 'ID=' + str(id) + ':TIME=' + str(time) + ']'
return newick(_id, time, branch_length) + nhx_tag
[docs]class PhyloNode(object):
"""
:version:
:author:
"""
""" ATTRIBUTES
parent (private)
this holds the actual object
children (private)
the_object (private)
"""
max_node_depth=0
id = 0
def __init__(self, val, time):
self.val = val
if time > PhyloNode.max_node_depth:
PhyloNode.max_node_depth = time
self.time = time
self.offspring_nodes = []
self.ancestor_nodes = []
self.internal_child_node = None
self.internal_parent_node = None
PhyloNode.id += 1
self.id = PhyloNode.id
self.is_root = False
self.root = None
[docs] def add_root(self, root_node):
self.root = root_node
root_node.internal_child_node = self
[docs] def excise(self):
'''
remove references to this node from other tree nodes
and reconnect remaining nodes.
'''
#print 'excising', self
if self.internal_parent_node and self.internal_child_node: #crosconnect child with parent
self.internal_child_node._connect_internal_parent(self.internal_parent_node)
elif self.internal_parent_node: # internal parent should no longer reference self
self.internal_parent_node._remove_internal_child()
elif self.internal_child_node: #child inherits the ancestors of self and ancestors
# replace references to self with child
self.internal_child_node._inherit_ancestors_from(self)
if self.has_root(): #child inherits reference to root
self.internal_child_node.inherit_root(self)
self.internal_child_node._remove_internal_parent()
else: # remove all references to self at the ancestors
for anc in self.ancestor_nodes:
anc.offspring_nodes.remove(self)
self.internal_parent_node = None
self.internal_child_node = None
[docs] def push_onto_internal_child(self, child): # takes PhyloNode objects
'''
makes this node an internal parent node of child
:param child:
'''
if self is child:
raise Exception('trying to make {} a child of itself'.format(child))
self._inherit_ancestors_from(child)
#self.internal_child_node = child #NOTE should be redundant with line below
child._connect_internal_parent(self)
if child.has_root():
child.push_up_root()
def _connect_internal_parent(self, parent): # takes PhyloNode objects
self.internal_parent_node = parent
parent.internal_child_node = self
def _disconnect_internal_child(self):
self.internal_child_node._remove_internal_parent()
self.internal_child_node = None
def _remove_internal_child(self):
self.internal_child_node = None
def _remove_internal_parent(self):
self.internal_parent_node = None
[docs] def connect_phylo_offspring(self, offspring): # takes PhyloNode objects
self._add_phylo_offspring(offspring)
offspring._add_phylo_ancestor(self)
def _add_phylo_offspring(self, offspring):
self.offspring_nodes.append(offspring)
def _add_phylo_ancestor(self, ancestor): # takes PhyloNode objects
self.ancestor_nodes.append(ancestor)
def _inherit_ancestors_from(self, node):
for anc in node.ancestor_nodes[:]:
anc.offspring_nodes.remove(node)
anc.connect_phylo_offspring(self)
node.ancestor_nodes.remove(anc)
@property
def children(self):
internal = [self.internal_child_node] if self.internal_child_node else []
return internal + self.offspring_nodes
@property
def parents(self):
internal = [self.internal_parent_node] if self.internal_parent_node else []
return internal + self.ancestor_nodes
[docs] def push_up_root(self):
'''
Iteratively push up the root to the internal_parent_node.
'''
if self.internal_parent_node:
self.internal_parent_node.inherit_root(self)
self.internal_parent_node.push_up_root()
[docs] def inherit_root(self, node):
self.root = node.root
node.root = None
self.root.internal_child_node = self
[docs] def has_root(self):
return True if self.root else False
def _remove_root(self):
self.root = None
[docs] def remove_root_stem(self):
if not self.is_root:
raise Exception('Trying to remove root stem of non-root node', str(self))
if self.internal_child_node != None:
self.internal_child_node._remove_root()
self.is_root = False
@property
def is_leaf(self):
"""
is this a leaf of the tree
"""
return False if self.internal_child_node or self.offspring_nodes else True
[docs] def dist_to_parent(self, parent):
dist = None
if parent in self.parents:
dist = self.time - parent.time
elif self.root and parent is None:
dist = self.time - self.root.time
return dist
@property
def newick_id(self):
'''
id tag used in new hampshire (newick) tree format
'''
return str(self.val.id) + '_' + str(self.time)
@property
def nhx_features(self):
'''
additional node features used in the 'extended' newick format (nhx)
'''
return [('ID', str(self.val.id)), ('TIME', self.time)]
[docs] def iter_prepostorder(self, is_leaf_fn=None):
"""
Iterate over all nodes in a tree yielding every node in both
pre and post order. Each iteration returns a postorder flag
(True if node is being visited in postorder) and a node
instance.
"""
to_visit = [(None,self)]
while to_visit:
node = to_visit.pop(-1)
try:
_ , parent, node = node
#if isinstance()
except ValueError:
# PREORDER ACTIONS
parent, node = node
yield (False, parent, node)
if not node.is_leaf:
# ADD CHILDREN
to_visit.extend(reversed( [ (node, c ) for c in node.children ] + [[1, parent, node]] ) )
else:
#POSTORDER ACTIONS
yield (True, parent, node)
def _iter_descendants_levelorder(self, is_leaf_fn=None):
"""
Iterate over all desdecendant nodes.
"""
tovisit = deque([self])
while len(tovisit)>0:
node = tovisit.popleft()
yield node
if not is_leaf_fn or not is_leaf_fn(node):
tovisit.extend(node.children )
def __str__(self):
internal_child_id = self.internal_child_node.id if self.internal_child_node else ''
internal_parent_id = self.internal_parent_node.id if self.internal_parent_node else ''
return ('TN'+str(self.id)+'{int_child_TN:'+str(internal_child_id)+
", int_parent_TN:"+str(internal_parent_id)+
" external_offspring_TN:[" + ",".join([str(c.id) for c in self.offspring_nodes])+
"], external_ancestor_TN:[" + ",".join([str(c.id) for c in self.ancestor_nodes]) + "]" +
", id:"+str(self.val.id)+", t:"+str(self.time)+('root:'+str(self.root.id) if self.root else '')+'}' )
[docs]class PhyloTree(object):
"""
Primary use is a phylogenetic tree, representing reproduction/speciation
events at particular points in time. Because generations are overlapping, a
parent may have offspring at various time points. Therefore Nodes are not
strictly parents, but rather, parent-reproduction-time tuples.
"""
class_version = '1.0'
[docs] class SuperNode(object):
def __init__(self):
self.id = 'ROOT'
def __init__(self, supertree=False):
self.version = self.__class__.class_version
self.max_depth = 0
self.nodes_dict = OrderedDefaultdict(SortedDict) #using SortedDict the maintain time ordered TreeNodes
self.leafs = set()
self.max_leaf_depth=(0, None)
self.roots = []
#self.ete_tree = None
self.ete_trees = OrderedDict()
self.lca = None
self.supertree = supertree
[docs] def clear(self):
self.nodes_dict = OrderedDefaultdict(SortedDict)
self.leafs = set()
self.max_leaf_depth=(0, None)
self.roots = []
[docs] def create_root_stem(self, phylo_root):
new_root = self.add_node(phylo_root, phylo_root.time_birth)
#print 'adding root', new_root
if new_root is not None:
self.connect_internal_node(new_root)
self.roots.append(new_root)
new_root.is_root = True
if phylo_root.time_death is not None and not self.nodes_dict[phylo_root].has_key(phylo_root.time_death):
self.create_leaf(phylo_root)
return new_root
def _connect_root(self, root_node):
root_child = None
if len(self.nodes_dict[root_node.val]) > 0: #internal node entries for phylo_unit exist in nodes_dict
internal_phylo_time = self.nodes_dict[root_node.val].iloc[0]
root_child = self.nodes_dict[root_node.val][internal_phylo_time]
else:
root_child = self.create_leaf(root_node.val)
root_child.add_root(root_node)
def _remove_root(self, root_node):
root_node.remove_root_stem()
#print 'removing root', root_node
self.roots.remove(root_node)
[docs] def add_leaf(self, leaf):
self.leafs.add(leaf)
if leaf.time > self.max_leaf_depth[0]:
self.max_leaf_depth = leaf.time, leaf
[docs] def update(self, new_phylo_units=[], removed_phylo_units=[], new_roots=[]):
for new_root in new_roots:
new_root = self.create_root_stem(new_root)
for phylo_unit in new_phylo_units:
self.add_phylo_history(phylo_unit)
for phylo_unit in removed_phylo_units:
self.delete_phylo_hist(phylo_unit)
[docs] def add_phylo_history(self, phylo_unit):
new_leaf = self.create_leaf(phylo_unit)
phylo_parent_nodes = self.create_stems(phylo_unit)
for phylo_parent_node in phylo_parent_nodes:
self.connect_phylo_parent_child(phylo_parent_node, new_leaf)
return phylo_parent_nodes, new_leaf
[docs] def create_leaf(self, phylo_unit):
time_death = phylo_unit.time_death
new_leaf = None
if not self.nodes_dict[phylo_unit].has_key(time_death):
new_leaf = self.add_node(phylo_unit, time_death)
self.add_leaf(new_leaf)
self.connect_leaf(new_leaf)
else:
new_leaf = self.nodes_dict[phylo_unit][time_death]
print 'WARNING: Node already in tree', new_leaf
return new_leaf
[docs] def connect_leaf(self, leaf_node): # a cell death node
connected = False
(phylo_unit, time_death) = leaf_node.val, leaf_node.time
if len(self.nodes_dict[phylo_unit]) > 1: #there are more internal nodes
leaf_node_index = self.nodes_dict[phylo_unit].index(time_death)
internal_parent_time = self.nodes_dict[phylo_unit].iloc[leaf_node_index - 1]
internal_parent_node = self.nodes_dict[phylo_unit][internal_parent_time]
internal_parent_node.push_onto_internal_child(leaf_node)
connected = True
return connected
[docs] def create_stems(self, phylo_unit):
time_birth = phylo_unit.time_birth
phylo_parent_nodes = []
new_internal_nodes = []
for phylo_parent in phylo_unit.parents: # add parent nodes as needed
if self.nodes_dict[phylo_parent].has_key(time_birth):
phylo_parent_nodes.append(self.nodes_dict[phylo_parent][time_birth])
else:
new_internal_nodes.append(self.add_node(phylo_parent, time_birth))
for int_node in new_internal_nodes:
self.connect_internal_node(int_node)
phylo_parent_nodes.append(int_node)
return phylo_parent_nodes
[docs] def connect_phylo_parent_child(self, phylo_parent_node, phylo_child_node):
phylo_child_node_pos = self.nodes_dict[phylo_child_node.val].index(phylo_child_node.time)
if phylo_child_node_pos == 0:
phylo_parent_node.connect_phylo_offspring(phylo_child_node)
else:
first_phylo_time = self.nodes_dict[phylo_child_node.val].iloc[0]
phylo_parent_node.connect_phylo_offspring(self.nodes_dict[phylo_child_node.val][first_phylo_time])
[docs] def add_node(self, phylo_unit, time):
if time is not None:
self.max_depth = max(self.max_depth, time)
else:
time = self.max_depth
new_node = PhyloNode(phylo_unit, time)
if self.nodes_dict[new_node.val].has_key(new_node.time):
new_node = self.nodes_dict[new_node.val][new_node.time]
warnings.warn('reassigning existing node to tree:\n {}\n{}'.format(self.nodes_dict[new_node.val][new_node.time],
new_node))
self.nodes_dict[new_node.val][new_node.time] = new_node
return new_node
[docs] def reconnect_internal_nodes(self, internal_parent, internal_child):
internal_child_orig = internal_parent.internal_child_node
if internal_child_orig != None:
internal_child.push_onto_internal_child(internal_child_orig)
internal_parent.push_onto_internal_child(internal_child)
[docs] def connect_internal_node(self, int_node):
'''Connect internal node 'int_node' with nodes above and below it on its
own branch (representing births and/or death in this phylogenetic unit's
life history branch)
:param int_node: newly made internal node that should get connected up
and down in the tree.
'''
connected_up, connected_down = False, False
(phylo_unit, time) = int_node.val, int_node.time
new_node_index = self.nodes_dict[phylo_unit].index(time) # get its index in dict
orig_dict_entries = len(self.nodes_dict[phylo_unit])
if new_node_index > 0: # this indicates that other (earlier) internal nodes exist, and int_node has to be connected to them
internal_parent_time = self.nodes_dict[phylo_unit].iloc[new_node_index - 1]
internal_parent = self.nodes_dict[phylo_unit][internal_parent_time]
self.reconnect_internal_nodes(internal_parent, int_node)
connected_up = True
if orig_dict_entries > (new_node_index+1): #there are also later nodes in the
connected_down = True
elif orig_dict_entries > (new_node_index+1): # this indicates that it is the 'earliest' internal node; reconnect to internal children
internal_child_time = self.nodes_dict[phylo_unit].iloc[new_node_index + 1]
internal_child = self.nodes_dict[phylo_unit][internal_child_time]
self.reconnect_internal_nodes(int_node, internal_child)
connected_down = True
return (connected_up, connected_down)
[docs] def delete_phylo_hist(self, phylo_unit):
'''
Remove the branch representing the phylogenetic unit and disconnect it
from all sub-branches (children) in the tree.
:param phylo_unit:
'''
birth, death = phylo_unit.time_birth, phylo_unit.time_death
if len(self.nodes_dict[phylo_unit]) == 0: # internal node entries for phylo_unit exist in nodes_dict (phylo_unit reproduced)
raise Exception('could not find internal node for {}, {}, {}'.format(phylo_unit.id, birth, death))
for node in self.nodes_dict[phylo_unit].values():
self.delete_node(node)
[docs] def delete_node(self, tree_node):
tree_node.excise()
for phylo_parent in tree_node.ancestor_nodes:
if len(phylo_parent.offspring_nodes) == 0 and not phylo_parent.has_root():
phylo_parent.excise()
del self.nodes_dict[phylo_parent.val][phylo_parent.time]
if len(self.nodes_dict[phylo_parent.val]) == 0:
#pass
del self.nodes_dict[phylo_parent.val]
for phylo_child in tree_node.offspring_nodes:
phylo_child.ancestor_nodes.remove(tree_node)
del self.nodes_dict[tree_node.val][tree_node.time]
if len(self.nodes_dict[tree_node.val]) == 0:
del self.nodes_dict[tree_node.val]
try:
self.leafs.remove(tree_node)
except KeyError:
pass
if tree_node.is_root and tree_node.is_leaf:
self._remove_root(tree_node)
[docs] def delete_empty_phylo_stems(self):
for phylo_unit, times_dict in self.nodes_dict.items():
if len(times_dict) == 0:
del self.nodes_dict[phylo_unit]
[docs] def recalc_max_leaf_depth(self, leafs=None):
if leafs is None:
leafs = self.leafs
max_leaf_depth, leaf_node = 0, None
for leaf in self.leafs:
if leaf.time > max_leaf_depth:
max_leaf_depth, leaf_node = leaf.time, leaf
self.max_leaf_depth = (max_leaf_depth, leaf_node)
[docs] def find_lca(self):
'''
Find the last common ancestor in the tree.
Start at the single root and go up until a branching point in the tree
is found.
Returns
-------
PhyloNode : LCA if it exists else None
'''
if len(self.roots) != 1:
return None
node = self.roots[0]
while True:
children = node.offspring_nodes
if len(children) > 1:
break
elif len(children) == 1:
if node.internal_child_node and not node.internal_child_node.is_leaf:
break
else:
node = children[0]
elif len(children) == 0:
if not node.internal_child_node:
break
else:
node = node.internal_child_node
return node
[docs] def coalescent(self):
'''
Find coalescent node and depth in the tree.
Returns
-------
(class:`Tree.PhyloTree.PhyloNode`, time) : LCA, depth
'''
self.lca = self.find_lca()
lca_depth = self.lca.time if self.lca is not None else 0
max_depth, _leaf = self.max_leaf_depth
return (self.lca, max_depth - lca_depth)
[docs] def nodes_iter(self):
for d in self.nodes_dict.values():
for node in d.values():
yield node
'''
Functions pertaining to ete3-tree representation. PhyloTree can be
(approximately) converted to a representation of trees in the ete3 module.
This is useful for drawing routines and various tree algorithms and
functions.
'''
@property
def ete_tree(self):
'''
Return the first 'ete tree' in the ete trees dict if it exists.
This is an adapter function during the transition to working with the
multiple ete tree dictionary.
'''
try:
return self.ete_trees.values()[0].tree
except IndexError:
return None
@property
def ete_named_node_dict(self):
'''
Return the ete_named_node_dict belonging to the first ete tree.
See Also
--------
func:`ete_tree`
'''
try:
return self.ete_trees.values()[0].named_node_dict
except IndexError:
return None
@property
def ete_node_name_to_phylo_node(self):
'''
Return the ete_node_name_to_phylo_node belonging to the first ete tree.
See Also
--------
func:`ete_tree`
'''
try:
return self.ete_trees.values()[0].node_name_to_phylo_node
except IndexError:
return None
[docs] def to_ete_trees(self, with_leafs=False, supertree=None):
'''
Construct TreeNode representations for all roots in the PhyloTree.
ete3.TreeNode representations can be used for advanced plotting and tree
analysis.
Parameters
----------
with_leafs : bool
construct with or without terminal nodes
supertree : bool
use a supernode as an artificial root node (useful when simulation
starts from independent lineages)
Returns
-------
OrderedDict of name -> TreeNode : TreeNodes by name for all phylo roots
'''
print 'writing NHX format'
nhxs = self.nh_formats(supertree=supertree, _format='NHX')
#print nhx
print 'constructing ETE trees for roots:',
self.ete_trees = OrderedDict()
for nhx in nhxs:
tree = TreeNode(nhx, format=1)
named_node_dict, node_name_to_phylo_node = self.ete_init_mappings(tree)
tree_struct = util.ETEtreeStruct(tree=tree,
named_node_dict=named_node_dict,
node_name_to_phylo_node=node_name_to_phylo_node
)
if not with_leafs:
self.ete_prune_leafs(tree_struct)
self.ete_trees[tree.name] = tree_struct
print tree.name,
print 'done'
return self.ete_trees
[docs] def ete_init_mappings(self, ete_tree):
'''
Construct helper dictionaries for converting TreeNode names to TreeNodes
and TreeNode names to PhyloUnits.
'''
ete_named_node_dict = OrderedDict( (n.name, n) for n in ete_tree.traverse())
ete_node_name_to_phylo_node = OrderedDict([ (n.newick_id, n) for n in self.nodes_iter() ])
return ete_named_node_dict, ete_node_name_to_phylo_node
[docs] def ete_get_lca(self, ete_tree, nodes=None):
'''
Return last common ancestor (LCA) for a set of nodes.
(default: all
leafs of the current tree)
Parameters
----------
ete_tree : TreeNode
ete tree root node
nodes : list of class:`ete3.TreeNode`
nodes in tree for which to find LCA
Returns
-------
TreeNode : the last common ancestor
'''
if nodes is None:
nodes = ete_tree.get_leaves()
return ete_tree.get_common_ancestor(nodes) if len(nodes) else None
[docs] def ete_calc_lca_depth(self, ete_tree):
'''
Calculate the distance from the last common ancestor to the farthest
leaf in an ete tree.
Parameters
----------
ete_tree : TreeNode
ete tree root node
Returns
-------
int : depth of LCA
'''
lca = self.ete_get_lca(ete_tree)
lca_depth = 0
if lca is not None:
_max_node, lca_depth = lca.get_farthest_leaf()
return lca_depth
[docs] def distances(self, ete_tree, nodes):
"""
Pairwise distances between all nodes.
Parameters
----------
ete_tree : class:`ete3.TreeNode`
ete tree root node
nodes : sequence of (phylo_node, ete_node) pairs
Returns
-------
sequence of (phylo_node1, phylo_node2, tree_dist, top_dist)
- tree_dist is phylogenetic distance.
- top_dist is topological distance
"""
pairwise_distances = []
for (pn1, en1), (pn2, en2) in itertools.combinations(nodes, 2):
t_dist = ete_tree.get_distance(en1, en2)
top_dist = ete_tree.get_distance(en1, en2, topology_only=True)
pairwise_distances.append( (pn1,pn2, t_dist, top_dist ))
return pairwise_distances
[docs] def ete_n_most_distant_phylo_units(self, ete_tree_struct, n=2, root=None):
'''
Convenience function to return phylo units after finding
'ete_n_most_distant_leafs' (see below)
:param root: phylo unit used as root
'''
if root is not None:
root = self.ete_phylo_to_ete_birth_nodes(ete_tree_struct, root)[0]
else:
root = ete_tree_struct.tree
return [ (ete_tree_struct.node_name_to_phylo_node[n.name].val, n)
for (n,_) in self.ete_n_most_distant_leafs(root, n) ]
[docs] def ete_n_most_distant_leafs(self, ete_root, n):
'''
Find n leafs that are most diverged from eachother.
First, the oldest n subtrees are determined. Then, for all subtrees the
farthest lying leaf is returned.
:param n: number of leafs
:param root: starting point for search (default is the ete_tree root)
'''
if n == 1:
return [ (sorted(ete_root.get_leaves(), key=lambda n: n.name )[0],None ) ]
return [ s.get_farthest_leaf() for s,_ in self.ete_n_oldest_subtrees(ete_root, n) ]
[docs] def ete_n_oldest_subtrees(self, ete_root, n):
'''
Find n oldest subtrees under root.
Iteratively expands the oldest subtree root into its children until the
desired number of subtrees is in the list. Return as a list of tuples of
(subtree, distance-from-root).
Parameters
----------
n : int
number of subtrees
root : TreeNode
start point for subtree search
Returns
-------
subtrees : a list of (TreeNode, distance)
'''
subtrees = [ (ete_root, 0) ]
oldest_subtree = None
while len(subtrees) < n:
oldest_subtree, subtree_age = subtrees[0]
if oldest_subtree.is_leaf(): # oldest subtree is a leaf, nothing left to expand
break
subtrees.pop(0) # oldest is replaced by its children in the subtrees list
for child in oldest_subtree.get_children():
subtrees.append( (child, subtree_age + child.dist) )
subtrees = sorted( subtrees, key=lambda x: x[1])
return subtrees[:n] # because multifurcations exist, there may be more than n subtrees in the list
def _node_ids_to_ete_nodes(self, ete_tree_struct, node_ids):
'''
Iterator that maps ete node ids to TreeNode objects using a precompiled map.
Parameters
----------
node_ids : list
list of ete node ids
Yields
------
TreeNode : the mapped ete node
'''
for _id in node_ids:
try:
yield ete_tree_struct.named_node_dict[_id]
except KeyError: # We want to catch this, because the ete_tree may have
# had been pruned
warnings.warn('Node id {} not found in the ete_named_node_dict. Perhaps the ete_tree was pruned.'.format(_id))
pass
[docs] def ete_phylo_to_ete_birth_nodes(self, ete_tree_struct, phylo_unit):
'''
Return iterator over birth nodes in the ete tree for a phylo unit.
Parameters
----------
phylo_unit: class:`virtual_cell.PhyloUnit.PhyloUnit`
phylounit to map
Returns
-------
iterator of class:`ete3.TreeNode`
'''
node_ids = []
if phylo_unit.parents is not None:
for parent in phylo_unit.parents:
birth_node = self.nodes_dict[parent][phylo_unit.time_birth]
node_ids.append(birth_node.newick_id)
else:
birth_node = self.nodes_dict[parent][phylo_unit.time_birth]
node_ids.append(birth_node.newick_id)
return list(self._node_ids_to_ete_nodes(ete_tree_struct, node_ids))
[docs] def ete_phylo_to_ete_stem_nodes(self,ete_tree_struct, phylo_unit):
'''
Return iterator over stem nodes in the ete tree for a phylo unit.
The stem nodes represent replication and death of the phylounit.
Parameters
----------
phylo_unit: class:`virtual_cell.PhyloUnit.PhyloUnit`
phylounit to map
Returns
------_
iterator of class:`ete3.TreeNode`
'''
node_ids = []
for _tp, node in self.nodes_dict[phylo_unit].items():
node_ids.append(node.newick_id)
return list(self._node_ids_to_ete_nodes(ete_tree_struct, node_ids))
[docs] def ete_phylo_to_ete_death_node(self, ete_tree_struct, phylo_unit):
'''
Return TreeNode representing the death of a phylo_unit.
'''
phylo_node = self.nodes_dict[phylo_unit][phylo_unit.time_death]
return ete_tree_struct.named_node_dict[phylo_node.newick_id]
[docs] def phylo_to_ete_nodes(self,ete_tree_struct, phylo_unit):
'''
Return TreeNode objects in the ete_tree representing the birth and death
nodes for this phylo_unit (e.g. cell or gene).
Parameters
----------
ete_tree_struct : class:`my_tools.utility.ETEtreeStruct`
ete tree struct in which to find the phylo_unit
phylo_unit: class:`virtual_cell.PhyloUnit.PhyloUnit`
phylounit to map
Returns
------
list of class:`ete3.TreeNode`
'''
nodes = []
nodes += self.ete_phylo_to_ete_birth_nodes(ete_tree_struct, phylo_unit)
nodes += self.ete_phylo_to_ete_stem_nodes(ete_tree_struct, phylo_unit)
try:
death_node = self.ete_phylo_to_ete_death_node(ete_tree_struct, phylo_unit)
nodes.append(death_node)
except KeyError:
pass
return nodes
[docs] def ete_node_to_phylo_unit(self, ete_tree_struct, ete_node):
'''
Maps TreeNode to PhyloUnit.
Parameters
----------
ete_node : TreeNode
node to map
Returns
-------
PhyloUnit
'''
return ete_tree_struct.node_name_to_phylo_node[ete_node.name].val
[docs] def ete_nodes_to_phylo_units(self, ete_tree_struct, nodes=None):
'''
Obtain a list of all the phylo units that are represented in the ete
tree.
The ete_tree is traversed and ete node names are mapped first to
PhyloTree nodes. The PhyloNodes have a reference to the PhyloUnit in
their 'val' attribute.
'''
return self.ete_get_phylo2ete_dict(ete_tree_struct, nodes).keys()
[docs] def ete_get_phylo2ete_dict(self, ete_tree_struct, nodes=None):
'''
Return mapping from ete PhyloUnits to lists of ete TreeNode objects.
The mapping is constructed for a set of TreeNodes *nodes*. The default
action is to map all the TreeNodes in the current ete_tree.
Parameters
----------
nodes : sequence
sequence of TreeNodes
Returns
-------
dictionary from class:`virtual_cell.PhyloUnit.PhyloUnit`s to list of class:`ete3.TreeNode`s
'''
if nodes is None:
nodes = list(ete_tree_struct.tree.traverse())
phylo2ete = OrderedDefaultdict(list)
for ete_node in nodes:
phylo_unit = self.ete_node_to_phylo_unit(ete_tree_struct, ete_node)
phylo2ete[phylo_unit].append(ete_node)
return phylo2ete
[docs] def ete_prune_leafs(self, ete_struct):
'''
Prune the external nodes of an 'ete tree'.
Parameters
----------
ete_struct : class:`my_tools.utility.ETEtreeStruct`
struct holding data for the ete tree
'''
to_prune = set()
for l in ete_struct.tree.get_leaves():
to_prune.add(l)
for l in to_prune:
l.detach()
del ete_struct.named_node_dict[l.name]
[docs] def ete_prune_internal(self, ete_tree_struct):
'''
Excise all internal nodes that have a single child, while preserving the
length of the branch.
'''
to_prune = set()
for n in ete_tree_struct.tree.traverse():
if not n.is_leaf() and not n.is_root():
if len(n.children) == 1:
to_prune.add(n)
for n in to_prune:
n.delete(prevent_nondicotomic=True, preserve_branch_length=True)
del ete_tree_struct.named_node_dict[n.name]
[docs] def ete_prune_external(self,ete_tree_struct, prune_depth):
print 'pruning tree to max depth', prune_depth,
to_detach = set()
for node in ete_tree_struct.tree.iter_leaves(is_leaf_fn = lambda n:
self.ete_node_to_phylo_unit(ete_tree_struct,
n).time_birth > prune_depth):
print '.',
to_detach.add(node)
for n in to_detach:
n.detach()
del ete_tree_struct.named_node_dict[n.name]
print 'done'
[docs] def ete_annotate_tree(self, ete_tree_struct, features=[], func_features=dict(), ete_root=None):
'''
Annotate the phylogenetic tree with cell data for tree plotting.
Assumes that the ete_tree has been constructed/updated. Creates a
dictionary of feature dictionaries, keyed by the cells in the tree.
Attaches the feature dictionaries to nodes in the ete_tree
(annotate_ete_tree). Transforms some data to cummulative data along the
branches of the tree. Optionally, prunes internal tree nodes (this will
greatly simplify the tree drawing algorithm). Finally, transforms some
data to rate of change data, using branch length for rate calculation.
:param prune_internal: if True, nodes with 1 offspring only will be
removed/collapsed and their branch length added to the preceding node on
the branch.
'''
if ete_root is None:
ete_root = ete_tree_struct.tree
for c, ete_nodes in self.ete_get_phylo2ete_dict(ete_tree_struct, ete_root.traverse()).items():
feature_dict = dict() # NOTE: unordered ok
for feature in features:
feature_dict[feature] = getattr(c, feature)
for feature, func in func_features.items():
feature_dict[feature] = func(c)
for ete_node in ete_nodes:
ete_node.add_features(**feature_dict)
[docs] def ete_cummulative_features(self, ete_tree_struct, features=[],
func_features=dict(), ete_root=None):
if ete_root is None:
ete_root = ete_tree_struct.tree
for feature in features + func_features.keys():
self.ete_convert_feature_to_cummulative(ete_tree_struct, feature, ete_root)
[docs] def ete_rate_features(self, ete_tree_struct, features=[],
func_features=dict(), ete_root=None):
if ete_root is None:
ete_root = ete_tree_struct.tree
max_rate_dict = dict()
for feature in features + func_features.keys():
max_rate, rate_feature = self.ete_convert_feature_to_rate(ete_root=ete_root,
feature=feature)
max_rate_dict[rate_feature] = max_rate
return max_rate_dict
[docs] def ete_convert_feature_to_cummulative(self, ete_tree_struct, feature, ete_root):
'''
Convert annotated feature values to cummulative values.
By starting at the root, values further down the tree can be computed by
adding values calculated for the parent node. It is useful when for
example the aim is to prune away internal nodes and rates of change of a
feature need to be calculated on the pruned tree.
'''
for n in ete_root.traverse('levelorder'):
if n is ete_root:
continue
parent = n.up
#if parent is not None:
new_val = getattr(parent, feature)
if (self.ete_node_to_phylo_unit(ete_tree_struct, n)
is not self.ete_node_to_phylo_unit(ete_tree_struct, parent)):
# don't add the value if we're looking at an identical phylo_unit
try:
new_val += getattr(n, feature)
except AttributeError:
print feature, 'not found for', n.name
#raise
setattr(n,feature, new_val)
[docs] def ete_convert_feature_to_rate(self, ete_root, feature,
replace_tup=('count','rate')):
'''
Convert an annotated feature on the tree nodes to a rate.
The function assumes cummulative values of the feature. The rate is
calculated by dividing the difference of the feature value between a
parent and child by the corresponding branch length.
'''
max_rate = 0
rate_feature = feature.replace(*replace_tup)
for n in ete_root.traverse('levelorder'):
rate = 0
if n is not ete_root: #n.up is not None:
branch_length = n.get_distance(n.up)
parent_val = getattr(n.up, feature)
my_val = getattr(n, feature)
distance = my_val - parent_val
if branch_length > 0:
rate = distance/ branch_length
if rate > max_rate:
max_rate = rate
setattr(n,rate_feature, rate)
return max_rate, rate_feature
[docs] def annotate_phylo_units_feature(self, ete_tree_struct, phylo_units, feature_name):
'''
'''
phylo_units = set(phylo_units)
for cell in phylo_units:
for node in self.phylo_to_ete_nodes(ete_tree_struct, cell):
has_feat = True
if not hasattr(node, feature_name): # Check if this wasn't already annotated
node.add_feature(feature_name, has_feat)
[docs] def annotate_leafs(self,ete_tree_struct, leaf):
'''
Annotate leaf labels
'''
for node in self.phylo_to_ete_nodes(ete_tree_struct, leaf):
if node.is_leaf():
node.add_feature('leaflabel', str(leaf.id))
def __str__(self):
out = ''
for phylo_unit, tp_dict in self.nodes_dict.items():
out += str(phylo_unit.id) + ':\n'
for tp, node in tp_dict.items():
out += " t="+str(tp) + ":" + str(node) + '\n'
return 'ROOTS:\n '+ '\n'.join( [str(root) for root in self.roots]) + '\n' + out
def __len__(self):
return sum([ len(nodes) for nodes in self.nodes_dict.values() ] )
[docs] def upgrade(self):
'''
Upgrading from older pickled version of class to latest version. Version
information is saved as class variable and should be updated when class
invariants (e.g. fields) are added.
Adapted from recipe at http://code.activestate.com/recipes/521901-upgradable-pickles/
'''
version = float(self.version)
if version < 1.:
self.ete_trees = OrderedDict()
self.version = self.class_version
if version > float(self.class_version):
print 'upgraded class',
else:
print 'reset class',
print self.__class__.__name__, ' from version', version ,'to version', self.version
def __getstate__(self):
odict = self.__dict__.copy()
return odict
def __setstate__(self, d):
self.__dict__ = d
# upgrade class if it has an older version
if not hasattr(self, 'version'):
self.version = '0.0'
if self.version != self.class_version:
self.upgrade()