Trees¶
Loading a tree from a file and visualizing it with ascii_art()
¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
print(tr.ascii_art())
/-Human
/edge.0--|
/edge.1--| \-HowlerMon
| |
| \-Mouse
-root----|
|--NineBande
|
\-DogFaced
Writing a tree to a file¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
tr.write("data/temp.tree")
Getting the individual nodes of a tree by name¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
names = tr.get_node_names()
names[:4]
['root', 'edge.1', 'edge.0', 'Human']
names[4:]
names_nodes = tr.get_nodes_dict()
names_nodes["Human"]
Tree("Human;")
tr.get_node_matching_name("Mouse")
Tree("Mouse;")
Getting the name of a node (or a tree)¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
hu = tr.get_node_matching_name("Human")
tr.name
'root'
hu.name
'Human'
The object type of a tree and its nodes is the same¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
nodes = tr.get_nodes_dict()
hu = nodes["Human"]
type(hu)
cogent3.core.tree.PhyloNode
type(tr)
cogent3.core.tree.PhyloNode
Working with the nodes of a tree¶
Get all the nodes, tips and edges
from cogent3 import load_tree
tr = load_tree("data/test.tree")
nodes = tr.get_nodes_dict()
for n in nodes.items():
print(n)
('root', Tree("(((Human,HowlerMon),Mouse),NineBande,DogFaced);"))
('edge.1', Tree("((Human,HowlerMon),Mouse);"))
('edge.0', Tree("(Human,HowlerMon);"))
('Human', Tree("Human;"))
('HowlerMon', Tree("HowlerMon;"))
('Mouse', Tree("Mouse;"))
('NineBande', Tree("NineBande;"))
('DogFaced', Tree("DogFaced;"))
only the terminal nodes (tips)
for n in tr.iter_tips():
print(n)
Human:0.0311054096183;
HowlerMon:0.0415847131449;
Mouse:0.277353608988;
NineBande:0.0939768158209;
DogFaced:0.113211053859;
for internal nodes (edges) we can use Newick format to simplify the output
from cogent3 import load_tree
tr = load_tree("data/test.tree")
for n in tr.iter_nontips():
print(n.get_newick())
((Human,HowlerMon),Mouse);
(Human,HowlerMon);
Getting the path between two tips or edges (connecting edges)¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
edges = tr.get_connecting_edges("edge.1", "Human")
for edge in edges:
print(edge.name)
edge.1
edge.0
Human
Getting the distance between two nodes¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
nodes = tr.get_nodes_dict()
hu = nodes["Human"]
mu = nodes["Mouse"]
hu.distance(mu)
hu.is_tip()
True
Getting the last common ancestor (LCA) for two nodes¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
nodes = tr.get_nodes_dict()
hu = nodes["Human"]
mu = nodes["Mouse"]
lca = hu.last_common_ancestor(mu)
lca
Tree("((Human,HowlerMon),Mouse);")
type(lca)
cogent3.core.tree.PhyloNode
Getting all the ancestors for a node¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
hu = tr.get_node_matching_name("Human")
for a in hu.ancestors():
print(a.name)
edge.0
edge.1
root
Getting all the children for a node¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
node = tr.get_node_matching_name("edge.1")
children = list(node.iter_tips()) + list(node.iter_nontips())
for child in children:
print(child.name)
Human
HowlerMon
Mouse
edge.0
Getting all the distances for a tree¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
dists = tr.get_distances()
We also show how to select a subset of distances involving just one species.
human_dists = [names for names in dists if "Human" in names]
for dist in human_dists:
print(dist, dists[dist])
('Human', 'HowlerMon') 0.0726901227632
('HowlerMon', 'Human') 0.0726901227632
('Human', 'Mouse') 0.3467553610937
('Mouse', 'Human') 0.3467553610937
('Human', 'NineBande') 0.18310641816450002
('NineBande', 'Human') 0.18310641816450002
('Human', 'DogFaced') 0.2023406562026
('DogFaced', 'Human') 0.2023406562026
Getting the two nodes that are farthest apart¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
tr.max_tip_tip_distance()
(0.4102925130849, ('Mouse', 'DogFaced'))
Get the nodes within a given distance¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
hu = tr.get_node_matching_name("Human")
tips = hu.tips_within_distance(0.2)
for t in tips:
print(t)
HowlerMon:0.0415847131449;
NineBande:0.0939768158209;
Rerooting trees¶
At a named node¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
print(tr.rooted_at("edge.0").ascii_art())
/-Human
|
-root----|--HowlerMon
|
| /-Mouse
\edge.0--|
| /-NineBande
\edge.1--|
\-DogFaced
At the midpoint¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
print(tr.root_at_midpoint().ascii_art())
/-Mouse
|
-root----| /-Human
| /edge.0--|
| | \-HowlerMon
\edge.0.2|
| /-NineBande
\edge.1--|
\-DogFaced
print(tr.ascii_art())
/-Human
/edge.0--|
/edge.1--| \-HowlerMon
| |
| \-------- /-Mouse
-root----|
|--NineBande
|
\-DogFaced
Near a given tip¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
print(tr.ascii_art())
/-Human
/edge.0--|
/edge.1--| \-HowlerMon
| |
| \-Mouse
-root----|
|--NineBande
|
\-DogFaced
print(tr.rooted_with_tip("Mouse").ascii_art())
/-Human
/edge.0--|
| \-HowlerMon
|
-root----|--Mouse
|
| /-NineBande
\edge.1--|
\-DogFaced
Tree representations¶
Newick format¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
tr.get_newick()
'(((Human,HowlerMon),Mouse),NineBande,DogFaced);'
tr.get_newick(with_distances=True)
'(((Human:0.0311054096183,HowlerMon:0.0415847131449):0.0382963424874,Mouse:0.277353608988):0.0197278502379,NineBande:0.0939768158209,DogFaced:0.113211053859);'
XML format¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
xml = tr.get_xml()
for line in xml.splitlines():
print(line)
<?xml version="1.0"?>
<clade>
<clade>
<param><name>length</name><value>0.0197278502379</value></param>
<clade>
<param><name>length</name><value>0.0382963424874</value></param>
<clade>
<name>Human</name>
<param><name>length</name><value>0.0311054096183</value></param>
</clade>
<clade>
<name>HowlerMon</name>
<param><name>length</name><value>0.0415847131449</value></param>
</clade>
</clade>
<clade>
<name>Mouse</name>
<param><name>length</name><value>0.277353608988</value></param>
</clade>
</clade>
<clade>
<name>NineBande</name>
<param><name>length</name><value>0.0939768158209</value></param>
</clade>
<clade>
<name>DogFaced</name>
<param><name>length</name><value>0.113211053859</value></param>
</clade>
</clade>
Tree traversal¶
Here is the example tree for reference:
from cogent3 import load_tree
tr = load_tree("data/test.tree")
print(tr.ascii_art())
/-Human
/edge.0--|
/edge.1--| \-HowlerMon
| |
| \-Mouse
-root----|
|--NineBande
|
\-DogFaced
Preorder¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
for t in tr.preorder():
print(t.get_newick())
(((Human,HowlerMon),Mouse),NineBande,DogFaced);
((Human,HowlerMon),Mouse);
(Human,HowlerMon);
Human;
HowlerMon;
Mouse;
NineBande;
DogFaced;
Postorder¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
for t in tr.postorder():
print(t.get_newick())
Human;
HowlerMon;
(Human,HowlerMon);
Mouse;
((Human,HowlerMon),Mouse);
NineBande;
DogFaced;
(((Human,HowlerMon),Mouse),NineBande,DogFaced);
Selecting subtrees¶
One way to do it¶
from cogent3 import load_tree
tr = load_tree("data/test.tree")
for tip in tr.iter_nontips():
tip_names = tip.get_tip_names()
print(tip_names)
sub_tree = tr.get_sub_tree(tip_names)
print(sub_tree.ascii_art())
['Human', 'HowlerMon', 'Mouse']
/-Human
|
-root----|--HowlerMon
|
\-Mouse
['Human', 'HowlerMon']
/-Human
-root----|
\-HowlerMon
Tree manipulation methods¶
Pruning the tree¶
Remove internal nodes with only one child. Create new connections and branch lengths (if tree is a PhyloNode) to reflect the change.
from cogent3 import make_tree
simple_tree_string = "(B:0.2,(D:0.4)E:0.5)F;"
simple_tree = make_tree(simple_tree_string)
print(simple_tree.ascii_art())
/-B
-F-------|
\E------- /-D
simple_tree.prune()
print(simple_tree.ascii_art())
/-B
-F-------|
\-D
print(simple_tree)
(B:0.2,D:0.9)F;
Create a full unrooted copy of the tree¶
from cogent3 import load_tree
tr1 = load_tree("data/test.tree")
print(tr1.get_newick())
(((Human,HowlerMon),Mouse),NineBande,DogFaced);
tr2 = tr1.unrooted_deepcopy()
print(tr2.get_newick())
(((Human,HowlerMon),Mouse),NineBande,DogFaced);
Transform tree into a bifurcating tree¶
Add internal nodes so that every node has 2 or fewer children.
from cogent3 import load_tree
tree_string = "(B:0.2,H:0.2,(C:0.3,D:0.4,E:0.1)F:0.5)G;"
tr = make_tree(tree_string)
print(tr.ascii_art())
/-B
|
|--H
-G-------|
| /-C
| |
\F-------|--D
|
\-E
print(tr.bifurcating().ascii_art())
/-B
-G-------|
| /-H
\--------|
| /-C
\F-------|
| /-D
\--------|
\-E
Transform tree into a balanced tree¶
Using a balanced tree can substantially improve performance of likelihood calculations. Note that the resulting tree has a different orientation with the effect that specifying clades or stems for model parameterisation should be done using the “outgroup_name” argument.
from cogent3 import load_tree
tr = load_tree("data/test.tree")
print(tr.ascii_art())
/-Human
/edge.0--|
/edge.1--| \-HowlerMon
| |
| \-Mouse
-root----|
|--NineBande
|
\-DogFaced
print(tr.balanced().ascii_art())
/-Human
/edge.0--|
| \-HowlerMon
|
-root----|--Mouse
|
| /-NineBande
\edge.1--|
\-DogFaced
Test two trees for same topology¶
Branch lengths don’t matter.
from cogent3 import load_tree
tr1 = make_tree("(B:0.2,(C:0.2,D:0.2)F:0.2)G;")
tr2 = make_tree("((C:0.1,D:0.1)F:0.1,B:0.1)G;")
tr1.same_topology(tr2)
True
Calculate each node’s maximum distance to a tip¶
Sets each node’s “TipDistance” attribute to be the distance from that node to its most distant tip.
from cogent3 import load_tree
tr = make_tree("(B:0.2,(C:0.3,D:0.4)F:0.5)G;")
print(tr.ascii_art())
/-B
-G-------|
| /-C
\F-------|
\-D
tr.set_tip_distances()
for t in tr.preorder():
print(t.name, t.TipDistance)
G 0.9
B 0
F 0.4
C 0
D 0
Scale branch lengths in place to integers for ascii output¶
from cogent3 import load_tree
tr = make_tree("(B:0.2,(C:0.3,D:0.4)F:0.5)G;")
print(tr)
(B:0.2,(C:0.3,D:0.4)F:0.5)G;
tr.scale_branch_lengths()
print(tr)
(B:22,(C:33,D:44)F:56)G;
Get tip-to-tip distances¶
Get a distance matrix between all pairs of tips and a list of the tip nodes.
from cogent3 import load_tree
tr = make_tree("(B:3,(C:2,D:4)F:5)G;")
d, tips = tr.tip_to_tip_distances()
for i, t in enumerate(tips):
print(t.name, d[i])
B [ 0. 10. 12.]
C [10. 0. 6.]
D [12. 6. 0.]
Compare two trees using tip-to-tip distance matrices¶
Score ranges from 0 (minimum distance) to 1 (maximum distance). The default is to use Pearson’s correlation, in which case a score of 0 means that the Pearson’s correlation was perfectly good (1), and a score of 1 means that the Pearson’s correlation was perfectly bad (-1).
Note: automatically strips out the names that don’t match.
from cogent3 import load_tree
tr1 = make_tree("(B:2,(C:3,D:4)F:5)G;")
tr2 = make_tree("(C:2,(B:3,D:4)F:5)G;")
tr1.compare_by_tip_distances(tr2)
0.08352668213457076