Coverage for lingpy/thirdparty/cogent/tree.py : 92%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
"""Classes for storing and manipulating a phylogenetic tree.
These trees can be either strictly binary, or have polytomies (multiple children to a parent node).
Trees consist of Nodes (or branches) that connect two nodes. The Tree can be created only from a newick formatted string read either from file or from a string object. Other formats will be added as time permits.
Tree can: - Deal with either rooted or unrooted tree's and can convert between these types. - Return a sub-tree given a list of tip-names - Identify an edge given two tip names. This method facilitates the statistical modelling by simplyifying the syntax for specifying sub-regions of a tree. - Assess whether two Tree instances represent the same topology.
Definition of relevant terms or abbreviations: - edge: also known as a branch on a tree. - node: the point at which two edges meet - tip: a sequence or species - clade: all and only the nodes (including tips) that descend from a node - stem: the edge immediately preceeding a clade """ #from cogent.util.transform import comb #from cogent.maths.stats.test import correlation #from cogent.util.misc import InverseDict
"Andrew Butterfield", "Catherine Lozupone", "Micah Hamady", "Jeremy Widmann", "Zongzhi Liu", "Daniel McDonald", "Justin Kuczynski"]
if sys.version_info[0] > 2: # pragma: no cover from functools import reduce
def cmp(a, b): return (a < b) - (b < a)
"""Yields each successive combination of n items.
items: a slicable sequence. n: number of items in each combination This version from Raymond Hettinger, 2006/03/23 """ else: #def distance_from_r_squared(m1, m2): # """Estimates distance as 1-r^2: no correl = max distance""" # return 1 - (correlation(m1.flat, m2.flat)[0])**2 # #def distance_from_r(m1, m2): # """Estimates distance as (1-r)/2: neg correl = max distance""" # return (1-correlation(m1.flat, m2.flat)[0])/2
"""Store information about a tree node. Mutable.
Parameters: Name: label for the node, assumed to be unique. Children: list of the node's children. Params: dict containing arbitrary parameters for the node. NameLoaded: ? """
NameLoaded=True, **kwargs): """Returns new TreeNode object.""" Parent.append(self)
# added taxa here, for convenience when using tree-class in lingpy JML
### built-in methods and list interface support """Returns reconstructable string representation of tree.
WARNING: Does not currently set the class to the right type. """
"""Returns Newick-format string representation of tree."""
"""Compares TreeNode by name"""
"""Equality test for trees by name""" # if they are the same object then they must be the same tree...
"""Converts i to self's type, with self as its parent.
Cleans up refs from i's original parent, but doesn't give self ref to i. """ i._parent.Children.remove(i) else:
"""Appends i to self.Children, in-place, cleaning up refs."""
"""Extends self.Children by items, in-place, cleaning up refs."""
"""Inserts an item at specified position in self.Children.""" self.Children.insert(index, self._to_self_child(i))
"""Returns and deletes child of self at index (default: -1)""" result = self.Children.pop(index) result._parent = None return result
"""Removes node by name instead of identity.
Returns True if node was present, False otherwise. """ if isinstance(target, TreeNode): target = target.Name for (i, curr_node) in enumerate(self.Children): if curr_node.Name == target: self.removeNode(curr_node) return True return False
"""Node delegates slicing to Children; faster to access them directly."""
"""Node[i] = x sets the corresponding item in Children.""" curr = self.Children[i] if isinstance(i, slice): for c in curr: c._parent = None coerced_val = map(self._to_self_child, val) self.Children[i] = coerced_val[:] else: #assume we got a single index curr._parent = None coerced_val = self._to_self_child(val) self.Children[i] = coerced_val
"""del node[i] deletes index or slice from self.Children.""" for c in curr: c._parent = None else:
"""Node iter iterates over the Children."""
"""Node len returns number of children."""
#support for copy module """Returns copy of self's structure, including shallow copy of attrs.
constructor is ignored; required to support old tree unit tests. """
"""Returns a copy of self using an iterative approach"""
#check the top node, any children left unvisited?
len(old_child.Children)]) else: #no unvisited children
"""Copies only the topology and labels of a tree, not any extra data.
Useful when you want another copy of the tree with the same structure and labels, but want to e.g. assign different branch lengths and environments. Does not use deepcopy from the copy module, so _much_ faster than the copy() method. """ if constructor is None: constructor = self.__class__ children = [c.copyTopology(constructor) for c in self.Children] return constructor(Name=self.Name[:], Children=children)
#support for basic tree operations -- finding objects and moving in the tree """Accessor for parent.
If using an algorithm that accesses Parent a lot, it will be much faster to access self._parent directly, but don't do it if mutating self._parent! (or, if you must, remember to clean up the refs). """
"""Mutator for parent: cleans up refs in old parent."""
"""Returns index of self in parent.""" return self._parent.Children.index(self)
"""Returns True if the current node is a tip, i.e. has no children."""
"""Returns True if the current is a root, i.e. has no parent.""" return self._parent is None
"""Returns iterator over descendants. Iterative: safe for large trees.
Notes ----- self_before includes each node before its descendants if True. self_after includes each node after its descendants if True. include_self includes the initial node if True.
self_before and self_after are independent. If neither is True, only terminal nodes will be returned.
Note that if self is terminal, it will only be included once even if self_before and self_after are both True.
This is a depth-first traversal. Since the trees are not binary, preorder and postorder traversals are possible, but inorder traversals would depend on the data in the tree and are not handled here. """ else: else: else:
"""Performs levelorder iteration over tree""" queue = [self] while queue: curr = queue.pop(0) if include_self or (curr is not self): yield curr queue.extend(curr.Children)
"""Performs preorder iteration over tree.""" """Performs postorder iteration over tree.
This is somewhat inelegant compared to saving the node and its index on the stack, but is 30% faster in the average case and 3x faster in the worst case (for a comb tree).
Zongzhi Liu's slower but more compact version is::
def postorder_zongzhi(self): stack = [[self, 0]] while stack: curr, child_idx = stack[-1] if child_idx < len(curr.Children): stack[-1][1] += 1 stack.append([curr.Children[child_idx], 0]) else: yield stack.pop()[0] """ #if there are children left, process them #if the current child has children, go there #otherwise, yield that child else: #if there are no children left, return self, and move to #self's parent else: # add this line to prevent error if parent is none @LinguList
"""Performs iteration over tree, visiting node before and after.""" #handle simple case first #if there are children left, process them #if the current child has children, go there child_index_stack.append(0) curr = curr_child curr_children = curr.Children #otherwise, yield that child else: #if there are no children left, return self, and move to #self's parent else: curr = curr.Parent curr_children = curr.Children
include_self=True): """Returns iterator over descendants. IMPORTANT: read notes below.
Notes ----- traverse_recursive is slower than traverse, and can lead to stack errors. However, you _must_ use traverse_recursive if you plan to modify the tree topology as you walk over it (e.g. in post-order), because the iterative methods use their own stack that is not updated if you alter the tree.
self_before includes each node before its descendants if True. self_after includes each node after its descendants if True. include_self includes the initial node if True.
self_before and self_after are independent. If neither is True, only terminal nodes will be returned.
Note that if self is terminal, it will only be included once even if self_before and self_after are both True.
This is a depth-first traversal. Since the trees are not binary, preorder and postorder traversals are possible, but inorder traversals would depend on the data in the tree and are not handled here. """
"""Returns all ancestors back to the root. Dynamically calculated."""
"""Returns root of the tree self is in. Dynamically calculated.""" while curr._parent is not None: curr = curr._parent return curr
"""Returns True if root of a tree, i.e. no parent."""
"""Returns all nodes that are children of the same parent as self.
Notes ----- Excludes self from the list. Dynamically calculated.
""" if self._parent is None:
"""Iterates over tips descended from self, [] if self is a tip.""" #bail out in easy case #use stack-based method: robust to large trees else:
"""Returns tips descended from self, [] if self is a tip."""
"""Iterates over nontips descended from self, [] if none.
include_self, if True (default is False), will return the current node as part of the list of nontips if it is a nontip."""
"""Returns nontips descended from self."""
"""Returns True if is tip, i.e. no children."""
"""Returns direct children of self that are tips.""" return [i for i in self.Children if not i.Children]
"""Returns direct children in self that have descendants."""
"""Returns list containing lists of children sharing a state.
In other words, returns runs of tip and nontip children. """ #bail out in trivial cases of 0 or 1 item #otherwise, have to do it properly... else: #handle last group
"""Finds last common ancestor of self and other, or None.
Always tests by identity. """
"""Lowest common ancestor for a list of tipnames
This should be around O(H sqrt(n)), where H is height and n is the number of tips passed in. """
# increase black count, multiple children lead to here
# curr = curr.black[0] # changed above statement in order to check for missing attribute # in lower node JML
# clear all black attributes from the tree, added by JML
#support for more advanced tree operations """ Find lowest common ancestor of a given number of nodes.
Notes ----- This function is supposed to yield the same output as lowestCommonAncestor does. It was added in order to overcome certain problems in the original function, resulting from attributes added to a PhyloNode-object that make the use at time unsecure. Furthermore, it works with an arbitrary list of nodes (including tips and internal nodes). """ # XXX function added by JML
# check for nodes that are not in the list of the nodes of self "[i] There are nodes that do not occur on the tree." )
# make a dictionary that stores which nodes have been visited
# pick one node at random (first one)
# get nodes from queue
# get all tips
# check for visited
# append parent to queue
"""Returns number of edges separating self and other.""" #detect trivial case #otherwise, check the list of ancestors #need to figure out how many steps there were back from self else: return None
"""Returns numpy array with nodes in rows and descendants in columns.
A value of 1 indicates that the decendant is a descendant of that node/ A value of 0 indicates that it is not
Also returns a list of nodes in the same order as they are listed in the array.
tip_list is a list of the names of the tips that will be considered, in the order they will appear as columns in the final array. Internal nodes will appear as rows in preorder traversal order. """
#get a list of internal nodes
#get a list of tip names if one is not supplied #make a blank array of the right dimensions to alter #put 1 in the column for each child of each node
"""sets the Data property of unnamed nodes to an arbitrary value
Internal nodes are often unnamed and so this function assigns a value for referencing.""" #make a list of the names that are already in the tree #assign unique names to the Data property of nodes where Data = None new_name = 'node' + str(name_index) #choose a new name if name is already in tree while new_name in names_in_use: name_index += 1 new_name = 'node' + str(name_index) node.Name = new_name names_in_use.append(new_name) name_index += 1
"""Makes an array with nodes in rows and descendants in columns.
A value of 1 indicates that the decendant is a descendant of that node/ A value of 0 indicates that it is not
also returns a list of nodes in the same order as they are listed in the array""" #get a list of internal nodes
#get a list of tips() Name if one is not supplied #make a blank array of the right dimensions to alter #put 1 in the column for each child of each node
"""Reconstructs correct topology after nodes have been removed.
Internal nodes with only one child will be removed and new connections will be made to reflect change. """ #traverse tree to decide nodes to be removed. #save current parent #save child #remove current node by setting parent to None #Connect child to current node's parent
"""Ignores lengths and order, so trees should be sorted first""" else:
escape_name=True): """Return the newick string for this edge.
Arguments: - with_distances: whether branch lengths are included. - semicolon: end tree string with a semicolon - escape_name: if any of these characters []'"(),:;_ exist in a nodes name, wrap the name in single quotes """
for child in self.Children]
else: name.endswith("'")): else:
"""Return the newick string for this tree.
Arguments: - with_distances: whether branch lengths are included. - semicolon: end tree string with a semicolon - escape_name: if any of these characters []'"(),:;_ exist in a nodes name, wrap the name in single quotes
NOTE: This method returns the Newick representation of this node and its descendents. This method is a modification of an implementation by Zongzhi Liu """
#check the top node, any children left unvisited? #pre-visit else: #no unvisited children #post-visit
else: name.endswith("'")): else:
#result.append(":%s" % top_node.Length)
else: else: else: else:
"""Removes node by identity instead of value.
Returns True if node was present, False otherwise. """ else:
getclade, getstem, outgroup_name=None): """Return the list of stem and/or sub tree (clade) edge name(s). This is done by finding the common intersection, and then getting the list of names. If the clade traverses the root, then use the outgroup_name argument to ensure valid specification.
Arguments: - tip1/2name: edge 1/2 names - getstem: whether the name of the clade stem edge is returned. - getclade: whether the names of the edges within the clade are returned - outgroup_name: if provided the calculation is done on a version of the tree re-rooted relative to the provided tip.
Usage: The returned list can be used to specify subtrees for special parameterisation. For instance, say you want to allow the primates to have a different value of a particular parameter. In this case, provide the results of this method to the parameter controller method `setParamRule()` along with the parameter name etc.. """ # If outgroup specified put it at the top of the tree so that clades are # defined by their distance from it. This makes a temporary tree with # a named edge at it's root, but it's only used here then discarded. raise TreeError('Outgroup (%s) must be a tip' % outgroup_name)
raise TreeError('LCA(%s,%s) is the root and so has no stem' % (tip1name, tip2name)) else:
#get the list of names contained by join_edge
# For walking the tree as if it was unrooted. if c is not None and c is not parent]
"""Iteratively calcluates all of the root-to-tip and tip-to-tip distances, resulting in a tuple of: - A list of (name, path length) pairs. - A dictionary of (tip1,tip2):distance pairs """ ## linearize the tips in postorder. # .__start, .__stop compose the slice in tip_order. else: tip_order = [] tip_order.append(node)
# set tip_tip distance between tips of different child tipdistances[tip1] + tipdistances[tip2] tipdistances[tip1] + tipdistances[tip2]
## subtree with solved child wedges else: ## update result if nessessary
"""The distance matrix as a dictionary.
Usage: Grabs the branch lengths (evolutionary distances) as a complete matrix (i.e. a,b and b,a). """
"""Propagate tip distance information up the tree
This method was originally implemented by Julia Goodrich with the intent of being able to determine max tip to tip distances between nodes on large trees efficiently. The code has been modified to track the specific tips the distance is between """ else: tip_a[0] += n.Children[0].Length or 0.0 else:
"""Returns the max tip tip distance between any pair of tips
Returns (dist, tip_names, internal_node) """
"""returns the max distance between any pair of tips
Also returns the tip names that it is between as a tuple"""
"""An equivalent node with possibly fewer children, or None"""
# Renumber autonamed edges
else: # don't need to pass keep_root to children, though # internal nodes will be elminated this way for child in self.Children] # Merge parameter dictionaries by adding lengths and making # weighted averages of other parameters. This should probably # be moved out of here into a ParameterSet class (Model?) or # tree subclass. if v is not None and child.params.get(n) is not None and n is not "length"] (self.params[n]*self.Length + child.params[n]*child.Length) / length) for n in shared_params]) else:
"""A new instance of a sub tree that contains all the otus that are listed in name_list.
ignore_missing: if False, getSubTree will raise a ValueError if name_list contains names that aren't nodes in the tree
keep_root: if False, the root of the subtree will be the last common ancestor of all nodes kept in the subtree. Root to tip distance is then (possibly) different from the original tree If True, the root to tip distance remains constant, but root may only have one child node. """ # this may take a long time
raise TreeError("only a tip was returned from selecting sub tree") else: # keep unrooted
""""The number of edges beyond 'parent' in the direction of 'self', unrooted""" for child in neighbours])
"""The edge count from here, (except via 'parent'), divided into that from the heaviest neighbour, and that from the rest of them. 'cache' should be a dictionary that can be shared by calls to self.edgecount, it stores the edgecount for each node (from self) without having to put it on the tree itself."""
"""Score all the edges, sort them, and return minimum score and a sorted tree. """ # Only need to duplicate whole tree because of .Parent pointers
else: for child in self.Children] for (score, child) in scored_subtrees])
for (score, child) in scored_subtrees if score is not None]
"""An equivalent tree sorted into a standard order. If this is not specified then alphabetical order is used. At each node starting from root, the algorithm will try to put the descendant which contains the lowest scoring tip on the left. """
labels=False): """ Notes ----- Added a labels-keyword to this function. This is useful for debugging, since it allows the user to replace all nodes with a specific label, and print it to the tree, accordingly. """ # XXX Added labels-keywords: JML else: else: labels) else:
"""Returns a string containing an ascii drawing of the tree.
Parameters ---------- show_internal: bool includes internal edge names. compact: bool use exactly one line per tip. labels: {bool, list} specify specific labels for all nodes in the tree.
Notes ----- The labels-keyword was added to the function by JML. """ # XXX added labels-keywords JML show_internal=show_internal, compact=compact, labels=labels)
#def _getXmlLines(self, indent=0, parent_params=None): # """Return the xml strings for this edge. # """ # params = {} # if parent_params is not None: # params.update(parent_params) # pad = ' ' * indent # xml = ["%s<clade>" % pad] # if self.NameLoaded: # xml.append("%s <name>%s</name>" % (pad, self.Name)) # for (n,v) in self.params.items(): # if v == params.get(n, None): # continue # xml.append("%s <param><name>%s</name><value>%s</value></param>" # % (pad, n, v)) # params[n] = v # for child in self.Children: # xml.extend(child._getXmlLines(indent + 1, params)) # xml.append(pad + "</clade>") # return xml # #def getXML(self): # """Return XML formatted tree string.""" # header = ['<?xml version="1.0"?>'] # <!DOCTYPE ... # return '\n'.join(header + self._getXmlLines())
"""Save the tree to filename
Arguments: - filename: self-evident - with_distances: whether branch lengths are included in string. - format: default is newick, xml is alternate. Argument overrides the filename suffix. All attributes are saved in the xml format. """ if format: else: xml = filename.lower().endswith('xml')
if xml: data = self.getXML() else: outf.writelines(data) outf.close()
"""Return a list of edges from this edge - may or may not include self. This node (or first connection) will be the first, and then they will be listed in the natural traverse order. """ else:
"""return the list of the names of all tips contained by this edge """
"""Collect the list of edges in postfix order""" return [node for node in self.traverse(False, True)]
""" find the edge with the name, or return None """
(name, self.getTipNames()))
"""Finds the last common ancestor of the two named edges."""
"""returns a list of edges connecting two nodes
includes self and other in the list""" #remove nodes deeper than the LCA #remove LCA and deeper nodes from anc list of other
"""returns the parameter value for named edge"""
"""set's the value for param at named edge"""
"""Reassigns node names based on a mapping dict
mapping : dict, old_name -> new_name nodes : specific nodes for renaming (such as just tips, etc...) """
"""Returns a dict keyed by node name, value is node
Will raise TreeError if non-unique names are encountered """
else:
"""Returns set of names that descend from specified node"""
"""Returns all sets of names that come from specified node and its kids""" include_self=False): else: if len(leaf_set) > 1:
"""Returns fraction of overlapping subsets where self and other differ.
Other is expected to be a tree object compatible with PhyloNode.
Notes ----- Names present in only one of the two trees will count as mismatches: if you don't want this behavior, strip out the non-matching tips first. """ other_sets = [i & in_both for i in other_sets] other_sets = frozenset([i for i in other_sets if len(i) > 1])
#def tipToTipDistances(self, default_length=1): # """Returns distance matrix between all pairs of tips, and a tip order. # # Warning: .__start and .__stop added to self and its descendants.
# tip_order contains the actual node objects, not their names (may be # confusing in some cases). # """ # ## linearize the tips in postorder. # # .__start, .__stop compose the slice in tip_order. # tip_order = list(self.tips()) # for i, tip in enumerate(tip_order): # tip.__start, tip.__stop = i, i+1
# num_tips = len(tip_order) # result = zeros((num_tips, num_tips), float) #tip by tip matrix # tipdistances = zeros((num_tips), float) #distances from tip to curr node
# def update_result(): # # set tip_tip distance between tips of different child # for child1, child2 in comb(node.Children, 2): # for tip1 in range(child1.__start, child1.__stop): # for tip2 in range(child2.__start, child2.__stop): # result[tip1,tip2] = \ # tipdistances[tip1] + tipdistances[tip2]
# for node in self.traverse(self_before=False, self_after=True): # if not node.Children: # continue # ## subtree with solved child wedges # starts, stops = [], [] #to calc ._start and ._stop for curr node # for child in node.Children: # if hasattr(child, 'Length') and child.Length is not None: # child_len = child.Length # else: # child_len = default_length # tipdistances[child.__start : child.__stop] += child_len # starts.append(child.__start); stops.append(child.__stop) # node.__start, node.__stop = min(starts), max(stops) # ## update result if nessessary # if len(node.Children) > 1: #not single child # update_result() # return result+result.T, tip_order
self.params = {}
"""Returns string version of self, with names and distances."""
"""Returns branch length between self and other.""" #never any length between self and other return 0 #otherwise, find self's ancestors and find the first ancestor of #other that is in the list
#found the first shared ancestor -- need to sum other branch else: if other.Length:
"""Returns total descending branch length from self""" if n.Length is not None])
#def tipsWithinDistance(self, distance): # """Returns tips within specified distance from self
# Branch lengths of None will be interpreted as 0 # """ # def get_distance(d1, d2): # if d2 is None: # return d1 # else: # return d1 + d2
# to_process = [(self, 0.0)] # tips_to_save = []
# curr_node, curr_dist = to_process[0]
# seen = set([id(self)]) # while to_process: # curr_node, curr_dist = to_process.pop(0) # # # have we've found a tip within distance? # if curr_node.isTip() and curr_node != self: # tips_to_save.append(curr_node) # continue # # # add the parent node if it is within distance # parent_dist = get_distance(curr_dist, curr_node.Length) # if curr_node.Parent is not None and parent_dist <= distance and \ # id(curr_node.Parent) not in seen: # to_process.append((curr_node.Parent, parent_dist)) # seen.add(id(curr_node.Parent))
# # add children if we haven't seen them and if they are in distance # for child in curr_node.Children: # if id(child) in seen: # continue # seen.add(id(child))
# child_dist = get_distance(curr_dist, child.Length) # if child_dist <= distance: # to_process.append((child, child_dist))
# return tips_to_save
"""Reconstructs correct tree after nodes have been removed.
Internal nodes with only one child will be removed and new connections and Branch lengths will be made to reflect change. """ #traverse tree to decide nodes to be removed. nodes_to_remove = [] if (node.Parent is not None) and (len(node.Children)==1): #save current parent #save child #remove current node by setting parent to None #Connect child to current node's parent #Add the Length of the removed node to the Length of the Child else:
# walks the tree unrooted-style, ie: treating self.Parent as just # another child 'parent' is where we got here from, ie: the neighbour # that we don't need to explore.
# we might be walking UP the tree, so: # base edge # self's parent is becoming self's child, and edge params are stored # by the child else:
# With every node having 2 or fewer children.
"""Tree 'rooted' here with no neighbour having > 50% of the edges.
Notes ----- Using a balanced tree can substantially improve performance of the likelihood calculations. Note that the resulting tree has a different orientation with the effect that specifying clades or stems for model parameterisation should be done using the 'outgroup_name' argument. """ # this should work OK on ordinary 3-way trees, not so sure about # other cases. Given 3 neighbours, if one has > 50% of edges it # can only improve things to divide it up, worst case: # (51),25,24 -> (50,1),49. # If no neighbour has >50% we can't improve on where we are, eg: # (49),25,26 -> (20,19),51 last_edge, cache) known_weight += 1
"""Tests whether two trees have the same topology."""
"""A tree with at least 3 children at the root. """ if sib.Length is not None and oldnode.Length is not None: sib.Length += oldnode.Length else:
"""Return a new tree rooted at the provided node.
Usage: This can be useful for drawing unrooted trees with an orientation that reflects knowledge of the true root location. """ newroot = self.getNodeMatchingName(edge_name) repr(edge_name))
"""A new tree with the named tip as one of the root's children"""
""" return a new tree rooted at midpoint of the two tips farthest apart
this fn doesn't preserve the internal node naming or structure, but does keep tip to tip distances correct. uses unrootedDeepcopy() """ # max_dist, tip_names = tree.maxTipTipDistance() # this is slow
return self.unrootedDeepcopy() # print tip_names else:
dist_climbed += climb_node.Length climb_node = climb_node.Parent
# now midpt is either at on the branch to climb_node's parent # or midpt is at climb_node's parent # print dist_climbed, half_max_dist, 'dists cl hamax' # climb to midpoint spot else: # print climb_node.Name, 'clmb node'
else: # make a new node on climb_node's branch to its parent
"""returns the nodes surrounding the maxTipTipDistance midpoint
WAS used for midpoint rooting. ORPHANED NOW max_dist: The maximum distance between any 2 tips tip_pair: Names of the two tips associated with max_dist """ half_max_dist = max_dist/2.0 #get a list of the nodes that separate the tip pair
"""Sets distance from each node to the most distant tip.""" c in node.Children]) else:
"""Scales BranchLengths in place to integers for ascii output.
Warning: tree might not be exactly the length you specify.
Set ultrametric=True if you want all the root-tip distances to end up precisely the same. """ max(1, int(round(1.0*curr/orig_max*max_length))) else: #hard case -- need to make sure they all line up at the end #if we get here, we know the node has children #figure out what distance we want to set for this node distance = max(min_distance, ideal_distance) for c in node.Children: #reset the BranchLengths
"""Iteratively calcluates all of the root-to-tip and tip-to-tip distances, resulting in a tuple of: - A list of (name, path length) pairs. - A dictionary of (tip1,tip2):distance pairs """ ## linearize the tips in postorder. # .__start, .__stop compose the slice in tip_order. else:
# set tip_tip distance between tips of different child tipdistances[tip1] + tipdistances[tip2] tipdistances[tip1] + tipdistances[tip2]
## subtree with solved child wedges else: ## update result if nessessary
"""The distance matrix as a dictionary.
Usage: Grabs the branch lengths (evolutionary distances) as a complete matrix (i.e. a,b and b,a)."""
"""Returns distance matrix between all pairs of tips, and a tip order.
Warning: .__start and .__stop added to self and its descendants.
tip_order contains the actual node objects, not their names (may be confusing in some cases). """ else: else:
## linearize all tips in postorder # .__start, .__stop compose the slice in tip_order.
# the result map provides index in the result matrix
# set tip_tip distance between tips of different child tipdistances[tip1] + tipdistances[tip2]
## subtree with solved child wedges else: child_len = default_length ## update result if nessessary
# get all tree nodes
# define function to return all partitions as sets of taxa
tmp_taxa]))
#def compareByTipDistances(self, other, sample=None, dist_f=distance_from_r,\ # shuffle_f=shuffle): # """Compares self to other using tip-to-tip distance matrices.
# Value returned is dist_f(m1, m2) for the two matrices. Default is # to use the Pearson correlation coefficient, with +1 giving a distance # of 0 and -1 giving a distance of +1 (the madimum possible value). # Depending on the application, you might instead want to use # distance_from_r_squared, which counts correlations of both +1 and -1 # as identical (0 distance). # # Note: automatically strips out the names that don't match (this is # necessary for this method because the distance between non-matching # names and matching names is undefined in the tree where they don't # match, and because we need to reorder the names in the two trees to # match up the distance matrices). # """ # self_names = dict([(i.Name, i) for i in self.tips()]) # other_names = dict([(i.Name, i) for i in other.tips()]) # common_names = frozenset(self_names.keys()) & \ # frozenset(other_names.keys()) # common_names = list(common_names)
# if not common_names: # raise ValueError, "No names in common between the two trees.""" # if len(common_names) <= 2: # return 1 #the two trees must match by definition in this case
# if sample is not None: # shuffle_f(common_names) # common_names = common_names[:sample] # # self_nodes = [self_names[k] for k in common_names] # other_nodes = [other_names[k] for k in common_names]
# self_matrix = self.tipToTipDistances(endpoints=self_nodes)[0] # other_matrix = other.tipToTipDistances(endpoints=other_nodes)[0]
# return dist_f(self_matrix, other_matrix)
# Some tree code which isn't needed once the tree is finished. # Mostly exists to give edges unique names # Children must be created before their parents.
# Unnamed edges become edge.0, edge.1 edge.2 ... # Other duplicates go mouse mouse.2 mouse.3 ... else:
# default is just to keep it
"""Callback for tree-to-tree transforms like getSubTree""" else: children, edge.Name, params, nameLoaded=edge.NameLoaded)
"""Callback for newick parser""" Children = list(children), Name = self._unique_name(name), NameLoaded = nameLoaded and (name is not None), Params = params, )
"""Constructor for tree.
Arguments, use only one of: - filename: a file containing a newick or xml formatted tree. - treestring: a newick or xml formatted tree string. - tip_names: a list of tip names.
Notes ----- Underscore_unmunging is turned off by default, although it is part of the Newick format. Set underscore_unmunge to True to replace underscores with spaces in all names read. """ # slight modification for easy import of treestrings instead of # file-reading by JML else: #FIXME: More general strategy for underscore_unmunge tips = [tree_builder([], tip_name, {}) for tip_name in tip_names] else: |