'''
Created on Apr 11, 2015
@author: thocu
'''
import collections
import VirtualMicrobes.cython_gsl_interface.integrate as integrate
from VirtualMicrobes.environment.Environment import Locality
import random
import VirtualMicrobes.simulation.Simulation as simu
import os
from VirtualMicrobes.plotting.Graphs import BindingNetwork, MetabolicNetwork, Genome, PhyloTreeGraph
import VirtualMicrobes.my_tools.utility as util
import pandas as pd
import matplotlib.pyplot as plt
import glob
import shutil
import copy
def _plot_cell_graphs(cell, GRN_grapher, metabolome_grapher, genome_grapher, max_genome_size, suffixes):
'''
Draw all graphs for cell
Parameters
----------
cell : :class:`VirtualMicrobes.virtual_cell.Cell.Cell`
GRN_grapher : :class:`VirtualMicrobes.plotting.Graphs.BindingNetwork`
draws gene regulatory network graphs
metabolome_grapher : :class:`VirtualMicrobes.plotting.Graphs.MetabolicNetwork`
draws metabolic network
genome_grapher : :class:`VirtualMicrobes.plotting.Graphs.Genome`
draws genome layout graphs
'''
GRN_grapher.init_network(cell)
GRN_grapher.layout_network_positions(prog='nwx')
GRN_grapher.redraw_network()
GRN_grapher.update_figure()
for suffix in suffixes:
GRN_grapher.save_fig(labels=[str(cell.time_birth).zfill(10), 'nwx'], suffix=suffix, bbox_inches='tight')
GRN_grapher.layout_network_positions(prog='dot')
GRN_grapher.redraw_network()
GRN_grapher.update_figure()
for suffix in suffixes:
GRN_grapher.save_fig(labels=[str(cell.time_birth).zfill(10), 'bound', 'dot'], suffix=suffix, bbox_inches='tight')
GRN_grapher.redraw_network(edge_effect='effect_apo')
GRN_grapher.update_figure()
for suffix in suffixes:
GRN_grapher.save_fig(labels=[str(cell.time_birth).zfill(10), 'apo', 'dot'], suffix=suffix, bbox_inches='tight')
GRN_grapher.clear_graph()
metabolome_grapher.redraw_network(reactions_dict=cell.reaction_set_dict,
building_blocks=cell.building_blocks)
metabolome_grapher.update_figure()
for suffix in suffixes:
metabolome_grapher.save_fig(labels=[str(cell.time_birth).zfill(10)], suffix=suffix, bbox_inches='tight')
genome_grapher.plot_genome_structure(cell, labels=[str(cell.time_birth).zfill(10)],
max_size=max_genome_size)
genome_grapher.update_figure()
for suffix in suffixes:
genome_grapher.save_fig(labels=[str(cell.time_birth).zfill(10)], suffix=suffix)
def _plot_cell_time_course(cell, sim_graphs, save_dir, suffixes):
'''
Plot all time courses within the life span of an individual.
Parameters
----------
cell : :class:`VirtualMicrobes.virtual_cell.Cell.Cell`
sim_graphs : :class:`VirtualMicrobes.plotting.Graphs.Graphs`
simulation grapher object that draws the plots
save_dir : str
suffixes : list of file suffixes
'''
fig = plt.figure(figsize=(12,12))
mol_ax = fig.add_subplot(321)
mol_ax.set_title('internal molecule conc')
prot_ax = fig.add_subplot(323)
prot_ax.set_title('protein concentration')
size_ax = fig.add_subplot(322)
size_ax.set_title('cell size')
tox_ax = fig.add_subplot(324)
tox_ax.set_title('toxicity')
prod_ax = fig.add_subplot(326)
prod_ax.set_title('production')
size_dat = cell.get_cell_size_time_course()
if not len(size_dat[0,:]):
return
mol_dat = cell.get_mol_time_course_dict()
prot_dat = cell.get_gene_type_time_course_dict()
tox_dat = cell.get_toxicity_time_course()
prod_dat = cell.get_raw_production_time_course()
sim_graphs.plot_mol_class_data(mol_ax, mol_dat)
sim_graphs.plot_prot_data(prot_ax, prot_dat)
title = size_ax.get_title()
size_ax.clear()
size_ax.set_title(title)
size_ax.plot(size_dat[0,:],size_dat[1,:])
title = tox_ax.get_title()
tox_ax.clear()
tox_ax.set_title(title)
tox_ax.plot(tox_dat[0,:], tox_dat[1,:])
title = prod_ax.get_title()
prod_ax.clear()
prod_ax.set_title(title)
prod_ax.plot(prod_dat[0,:], prod_dat[1,:])
for suffix in suffixes:
fig.savefig(os.path.join(save_dir,'time_course_'+str(cell.time_birth).zfill(10)+suffix),
bbox_inches='tight')
def _lod_time_course_data(ancestors, base_save_dir, viewer_path, chunk_size=100 ):
'''
Write time series data in the line of descent.
Concatenates time courses of individuals along a :class:`LOD`.
Concatenations are done in *chunks* of a chosen `chunk_size`. For each chunk
**.csv** files are stored in a directory named part*n*, where *n* is the
chunk number.
Parameters
----------
ancestors : list of :class:`VirtualMicrobes.virtual_cell.Cell.Cell`\s
base_save_dir : str
viewer_path : str
path to utility files for html data viewer
chunk_size : int
length of chunks of concatenated data
'''
# divide the ancestors in LOD into chunks; concatenate time courses per chunk
for part, anc_chunk in enumerate(util.chunks(ancestors, chunk_size)):
num = str(part).zfill(5)
save_dir = os.path.join(base_save_dir, 'part{}'.format(num))
util.ensure_dir(save_dir)
for filename in glob.glob(os.path.join(viewer_path, '*')):
shutil.copy2(filename, save_dir)
prod_series = []
cell_size_series = []
tox_series = []
mol_dfs = []
prot_dfs = []
for anc in anc_chunk:
# production data
ts_dat = anc.get_raw_production_time_course()
ts = pd.Series(data=ts_dat[1], index=ts_dat[0])
prod_series.append(ts)
# cell size data
ts_dat = anc.get_cell_size_time_course()
ts = pd.Series(data=ts_dat[1], index=ts_dat[0])
cell_size_series.append(ts)
# toxicity data
ts_dat = anc.get_toxicity_time_course()
ts = pd.Series(data=ts_dat[1], index=ts_dat[0])
tox_series.append(ts)
# metabolite data
mol_time_courses = anc.get_mol_time_course_dict()
mol_series = dict()
for mol, tc in mol_time_courses.items():
mol_series[mol] = pd.Series(data=tc[1], index=tc[0], name=mol)
mol_df = pd.DataFrame(mol_series)
mol_dfs.append(mol_df)
# protein data
prot_time_courses = anc.get_total_reaction_type_time_course_dict()
prot_series = dict()
for _reac_type, tc_dict in prot_time_courses.items():
for reac, tc in tc_dict.items():
if isinstance(reac, tuple):
reac, exp = reac
name = str(reac)
name += '-e' if exp else '-i'
else:
name = str(reac)
prot_series[name ] = pd.Series(data=tc[1],
index=tc[0],
name=name )
prot_df = pd.DataFrame(prot_series)
prot_dfs.append(prot_df)
# concatenate each data type and write to file
prod_series = pd.concat(prod_series)
prod_series = prod_series[~prod_series.index.duplicated(keep='last')]
ts_base_name = os.path.join(save_dir,'production_time_course')
prod_series.to_csv(ts_base_name+'.csv', index_label='time point')
cell_size_series = pd.concat(cell_size_series)
cell_size_series = cell_size_series[~cell_size_series.index.duplicated(keep='last')]
ts_base_name = os.path.join(save_dir,'cell_size_time_course')
cell_size_series.to_csv(ts_base_name+'.csv', index_label='time point')
tox_series = pd.concat(tox_series)
tox_series = tox_series[~tox_series.index.duplicated(keep='last')]
ts_base_name = os.path.join(save_dir,'toxicity_time_course')
tox_series.to_csv(ts_base_name+'.csv', index_label='time point')
mol_df_combine = pd.concat(mol_dfs)
mol_df_combine = mol_df_combine[~mol_df_combine.index.duplicated(keep='last')]
df_base_name = os.path.join(save_dir,'mol_time_courses')
mol_df_combine.to_csv(df_base_name+'.csv', index_label='time point')
prot_df_combine = pd.concat(prot_dfs)
prot_df_combine = prot_df_combine[~prot_df_combine.index.duplicated(keep='last')]
df_base_name = os.path.join(save_dir,'prot_time_courses')
prot_df_combine.to_csv(df_base_name+'.csv', index_label='time point')
[docs]class LOD_Analyser(object):
'''
Analyses the evolutionary history of a population by tracing ancestors in the
line of descent.
Loads a simulation save from a file, keeping a reference in :attr:`ref_sim`.
From this, initialise :attr:`ref_pop_hist` as a :class:`PopulationHistory`
object that analyses the phylogenetic tree of the population.
The :class:`PopulationHistory` generates a :class:`LOD` for 1 or more
individuals in the saved population. For each :class:`LOD`, evolutionary
data and network and genome plots can be produced.
It is possible to load additional simulation snapshots that preceed the
:attr:`ref_pop_hist` and compare individuals to their contemporaries present
in the preceding populations. :attr:`compare_saves` contains a list of file
names of populations-saves that should be compared.
'''
args = None
'''config and command line arguments used for initialisation'''
ref_sim = None
''':class:`VirtualMicrobes.simulation.Simulation` snapshot to analyse'''
ref_pop_hist = None
''':class:`PopulationHistory` for the reference simulation (`ref_sim`) snapshot '''
compare_saves = []
'''names of snapshot files to copmare to `ref_sim`'''
def __init__(self, args):
'''
Initialize the analyzer from an argument dictionary.
Load the population save from file :param:`args`.pop_save and initialize
special fields in its :class:`data_tools.store.DataStore` that can hold
ancestor tracing data. From :attr:`ref_sim` initialize :attr:`ref_pop_hist`
as a :class:`PopulationHistory` that can be used to generate and analyze
the evolutionary history of the population stored in :attr:`ref_sim`.
Parameters
----------
args : dict
arguments attribute dictionary
'''
self.args = args
self.ref_sim = simu.load_simulation(args.pop_save, **vars(args))
self.compare_saves = args.compare_saves
print 'historic maximum of production medium:', self.ref_sim.system.population.historic_production_max
self.init_ref_history()
[docs] def init_ref_history(self, ref_sim=None, nr_lods=None,
prune_depth=0, pop_hist_dir='population_history'):
'''
Create a :class:`PopulationHistory` from the :attr:`ref_sim`
:class:`VirtualMicrobes.simulation.Simulation.Simulation` object.
For the :class:`PopulationHistory` object constructs its phylogenetic tree
and prune back the tree to a maximum depth of (max_depth - prune_depth)
counted from the root. Then create :class:`LOD` objects representing
the *line of descent* of the `nr_lods` *most diverged* branches in the tree.
Parameters
----------
ref_sim : :class:`VirtualMicrobes.simulation.Simulation.Simulation` object
simulation snapshot that is the basis for `LOD` analysis
nr_lods : int nr_lods
nr of separate (most distant) :class:`LOD`\s to initialize
prune_depth : int
prune back the phylogenetic tree with this many timesteps
pop_hist_dir : str
name of directory to store lod analysis output
'''
if ref_sim is None:
ref_sim = self.ref_sim
if nr_lods is None:
nr_lods = self.args.nr_lods
tp = ref_sim.run_time
save_dir = os.path.join(ref_sim.save_dir, pop_hist_dir+'_'+str(tp))
self.ref_pop_hist = PopulationHistory(sim=self.ref_sim,
params=self.ref_sim.params,
save_dir=save_dir,
prune_depth=prune_depth)
self.ref_pop_hist.init_phylo_tree()
self.ref_pop_hist.init_lods(nr_lods)
self.ref_pop_hist.init_pop_hist_data_store()
[docs] def compare_to_pops(self):
'''
Compare reference simulation to a set of previous population snapshots.
Compares each of the simulation snapshot saves in :attr:`compare_saves`
to the :attr:`ref_pop_hist`. A :class:`PopulationHistory` is constructed for
each of the compare snapshots. Within the compare snapshot, individuals
that correspond to the are part of (any of) the :class:`LOD`(s) of the
:attr:`ref_pop_hist` will be identified. Properties of these *ancestors*
will then be compare with their statistical values for the whole
population.
'''
# TODO check that compare saves are not older than the reference
# and raise error when it is the case:
if not self.args.skip_store:
self.ref_sim.data_store.init_ancestry_compare_stores(self.ref_pop_hist)
for compare_save in sorted(self.compare_saves, key=lambda n: int(n.strip('.sav').split('_')[-1])):
self.ref_pop_hist.compare_to_pop(compare_save)
[docs] def lod_stats(self):
'''
Write time series of evolutionary changes along all :class:`LOD`\s.
'''
print 'Running LOD stats'
self.ref_pop_hist.lod_stats()
[docs] def lod_network_stats(self):
'''
Write time series for evolutionary network property changes along all :class:`LOD`\s.
'''
print 'Running LOD Network stats'
self.ref_pop_hist.lod_network_stats()
[docs] def lod_binding_conservation(self):
'''
Write time series for TF binding conservation for :class:`LOD`\s.
'''
print 'Running LOD binding conservation'
self.ref_pop_hist.lod_binding_conservation()
[docs] def draw_ref_trees(self):
'''Draw a reference phylogenetic tree, with individual, selected :class:`LOD`\s marked'''
self.ref_pop_hist.draw_ref_trees()
[docs] def lod_graphs(self, stride=None, time_interval=None, lod_range=None, formats=None):
'''Draw network and genome graphs for :class:`LOD`\s
It is possible to set an interval and a range to sample individuals in
the :class:`LOD`.
Parameters
----------
stride : int
stride in generations for sampling individuals along the :class:`LOD`
time_interval : int
interval in simulation time for sampling individuals along the
:class:`LOD`
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
Note
----
Either use a stride or a time interval to sample individuals from the lod.
'''
if stride is not None and time_interval is not None:
raise Exception('defining both lod_generation_interval and lod_time_interval is not allowed')
if stride is None:
stride = self.args.lod_generation_interval
if time_interval is None:
time_interval = self.args.lod_time_interval
if lod_range is None:
lod_range = self.args.lod_range
if formats is None:
formats = self.args.image_formats
self.ref_pop_hist.plot_lod_graphs(stride, time_interval, lod_range, formats)
[docs] def lod_time_courses(self, lod_range=None, chunk_size=None):
'''
Write time series of molecule concentrations within the :class:`LOD`
It is possible to set a range to sample individuals in
the :class:`LOD`.
Parameters
----------
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
chunk_size : int
number of generations in LOD to concatenate per chunk
'''
if lod_range is None:
lod_range = self.args.lod_range
if chunk_size is None:
chunk_size = self.args.time_course_chunk_size
self.ref_pop_hist.lods_time_course_data(lod_range, chunk_size)
[docs] def lod_time_course_plots(self, stride=None, time_interval=None, lod_range=None, formats=None):
'''
Draw time course diagrams for individuals in the :class:`LOD`\s.
It is possible to set an interval and a range to sample individuals in
the :class:`LOD`.
Parameters
----------
stride : int
stride in generations for sampling individuals along the :class:`LOD`
time_interval : int
interval in simulation time for sampling individuals along the
:class:`LOD`
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
Note
----
Either use a stride or a time interval to sample individuals from the lod.
'''
if stride is not None and time_interval is not None:
raise Exception('defining both lod_generation_interval and lod_time_interval is not allowed')
if stride is None:
stride = self.args.lod_generation_interval
if time_interval is None:
time_interval = self.args.lod_time_interval
if lod_range is None:
lod_range = self.args.lod_range
if formats is None:
formats = self.args.image_formats
self.ref_pop_hist.lods_time_course_plots(stride, time_interval, lod_range, formats)
[docs] def write_newick_trees(self):
'''write newick trees for all phylogenies in attr:`ref_pop_hist`'''
self.ref_pop_hist.write_newick_trees()
def __enter__(self):
return self
def __exit__(self, _type, _value, _traceback):
self.ref_sim.close_phylo_shelf()
def __str__(self):
return str(self.ref_pop_hist)
[docs]class PopulationHistory(object):
'''
Performs and stores evolutionary history analysis of
:class:`VirtualMicrobes.simulation.Simulation.Simulation` snapshots.
Generates :class:`LOD`\s for 1 or more individuals in the population plot the
evolutionary events along the line of descent.
A reference :class:`PopulationHistory` can also be compared to *population
history* at earlier simulation time points. In this case the ancestors of
individuals in the reference *population history* will be identified and
compared to the rest of the population at that point in time. In this way,
evolutionary biases on the line of descent can be brought to light.
'''
sim = None
'''The :class:`VirtualMicrobes.simulation.Simulation.Simulation` snapshot for which this pophist was made.'''
params = None
'''The (updated) simulation parameters.'''
prune_depth = 0
'''Number of generations from leaves to prune the phylogenetic tree of the pophist.'''
population = None
'''Short cut to :class:`VirtualMicrobes.virtual_cell.Population.Population` of `sim`.'''
environment = None
'''Short cut to :class:`VirtualMicrobes.environment.Environment` of `sim`.'''
time_point = None
'''Last simulation time of the `sim`.'''
tree_lods = []
'''List of lists of :class:`LOD`\s. One list for each independent phylogenetic tree within the population.'''
def __init__(self, sim, params, save_dir=None ,prune_depth=None):
'''
Set parameters for phylogenetic analysis.
Parameters
----------
sim : :class:`VirtualMicrobes.simulation.Simulation.Simulation
params : dict
the simulation parameters
save_dir : str
path to save analysis data to
prune_depth : int
number of generations to prune from the leafs of phylogenetic tree
'''
self.save_dir = save_dir
if self.save_dir is not None:
util.ensure_dir(self.save_dir)
self.sim = sim
self.params = params
self.prune_depth = prune_depth
self.population = sim.system.population
for anc in self.population.current_ancestors:
if self.params.reconstruct_grn:
anc.update_grn()
self.environment = sim.system.environment
self.time_point = sim.run_time
self.init_rand_gens()
self.init_test_bed()
self.tree_lods = list()
[docs] def init_phylo_tree(self, prune_depth=None):
'''
Update the phylogenetic tree of the population.
Clears the change in the population of the final regular simulation
step. Prunes back the tree to a maximum depth.
Parameters
----------
prune_depth : int
number of generations to prune from the leafs of phylogenetic tree
'''
if prune_depth is None:
prune_depth = self.prune_depth
self.population.clear_pop_changes()
self.population.update_phylogeny()
self.population.phylo_tree.to_ete_trees()
for root_id, ete_tree_struct in self.population.phylo_tree.ete_trees.items():
print 'pruning ete tree', root_id
if prune_depth is not None:
max_depth = self.population.phylo_tree.max_depth
print 'tree has depth', max_depth
self.population.phylo_tree.ete_prune_external(ete_tree_struct, max(0, max_depth - prune_depth))
[docs] def init_lods(self, nr_lods, save_dir=None,
stride=None, time_interval=None, lod_range=None):
'''
Initialize the line of descent (:class:`LOD`) container objects.
Iterate over the phylogenetic trees of the :attr:`population` and for
each tree select `nr_lods` leaf nodes that are at maximum phylogenetic
distance.
For each of the selected leafs, construct a line of descent object
(:class:`LOD`).
Parameters
----------
nr_lods : int
number of :class:`LOD` objects per phylogenetic tree
save_dir : str
stride : int
stride in generations for sampling individuals along the :class:`LOD`
time_interval : int
interval in simulation time for sampling individuals along the
:class:`LOD`
lod_range : (float,float)
bounds in fractions of the total range of the :class:`LOD`
'''
if save_dir is None:
save_dir = self.save_dir
if stride is None:
stride = self.params.lod_generation_interval
if time_interval is None:
time_interval = self.params.lod_time_interval
if lod_range is None:
lod_range = self.params.lod_range
# iterate (potentially multiple) phylogenetic trees for the population
for root_id, ete_tree_struct in self.population.phylo_tree.ete_trees.items():
tree_save_dir = root_id.split('_')[0]
print 'constructing lines of descent for ete tree', root_id
leafs = self.population.phylo_tree.ete_n_most_distant_phylo_units(ete_tree_struct, nr_lods)
if sum( [ not leaf.has_living_offspring() for (leaf, _ete_node) in leafs ]):
raise Exception('leaf with no living offspring found')
print 'done'
lods = collections.OrderedDict()
# print information on the phylogenetic distances between leafs
for l1, l2, t_dist, top_dist in self.population.phylo_tree.distances(ete_tree_struct.tree,leafs):
print l1.id, '-->', l2.id, ': time_dist :', t_dist, 'topology_dist :', top_dist
for leaf, _ete_node in leafs:
for lod in leaf.lods_up(): # in a clonal (the default) population, there is only 1 lod per leaf
name = str(leaf.id)
lod_save_dir = os.path.join(save_dir, tree_save_dir, name)
lods[leaf] = LOD(list(lod), name=name, save_dir = lod_save_dir,
stride=stride, time_interval=time_interval, lod_range=lod_range)
break # record 1 lod per leaf (in clonal pop, there is only 1 per leaf)
self.tree_lods.append((ete_tree_struct,lods))
[docs] def init_pop_hist_data_store(self):
self.sim.data_store.init_phylo_hist_stores(phylo_hist=self)
[docs] def identify_lod_ancestor(self, ete_tree_struct, lod):
'''
Identify the individual in the population that is on the line of descent
(lod) under consideration.
The nodes in the ete tree corresponding to the *lod* will be annotated
with a tag.
Parameters
----------
ete_tree_struct : :class:`VirtualMicrobes.my_tools.utility.ETEtreeStruct`
container structure for phylogenetic tree representations
lod : :class:`LOD`
line of descent
Returns
-------
(:class:`VirtualMicrobes.virtual_cell.Cell.Cell`, :class:`ete3.TreeNode`)
(oldest ancestor cell, its tree node representation)
'''
last, last_ete = None, None
phylo2ete_dict = self.population.phylo_tree.ete_get_phylo2ete_dict(ete_tree_struct)
phylo_id2phylo = dict([ ( (str(phylo_unit.id),phylo_unit.time_birth), phylo_unit)
for phylo_unit in phylo2ete_dict ])
for anc in lod.lod: # going from oldest to youngest
anc_id = (str(anc.id), anc.time_birth)
if anc_id not in phylo_id2phylo: # reached an ancestor that lives later than the leafs in the tree
break
last = phylo_id2phylo[anc_id]
last_ete = phylo2ete_dict[last][0]
for ete_node in phylo2ete_dict[last]:
ete_node.add_feature('lod', True) # annotate the ete nodes as being on the lod
return last, last_ete
[docs] def init_rand_gens(self, rand_seed=None):
if rand_seed is None:
test_rand_seed = self.params.test_rand_seed
self.test_rand = random.Random(int(test_rand_seed))
[docs] def init_test_bed(self):
self.test_bed = Locality(self.params,
internal_molecules=self.environment.internal_molecules,
influx_reactions=self.environment.influx_reactions,
degradation_reactions=self.environment.degradation_reactions,
env_rand=self.test_rand )
[docs] def init_integrator(self, diffusion_steps=None, between_diffusion_reports=None,
max_retries=3, retry_steps_factor=2.):
if diffusion_steps is None:
diffusion_steps = self.params.diffusion_steps
if between_diffusion_reports is None:
between_diffusion_reports = self.params.between_diffusion_reports
max_time_steps_store = max(int(diffusion_steps * between_diffusion_reports
* retry_steps_factor ** (max_retries)), 1)
integrator = integrate.Integrator(locality = self.test_bed, # @UndefinedVariable
nr_time_points=max_time_steps_store,
nr_neighbors=0,
num_threads=1,
step_function=self.params.step_function,
hstart=self.params.init_step_size,
epsabs=self.params.absolute_error,
epsrel=self.params.relative_error,
init_time=0.)
return integrator
[docs] def write_newick_trees(self):
for ete_tree_struct, lods in self.tree_lods:
name = ete_tree_struct.tree.name.split('_')[0]
filename = 'tree' + '_' + name
suffix = '.nw'
ete_tree_struct.tree.write(format=1,
outfile=os.path.join(self.save_dir,
filename + suffix))
[docs] def lod_stats(self):
'''
Write time series for line of descent properties such as network
connectivity, protein expression etc.
Either use a stride or a time interval to sample individuals from the lod.
'''
cumulative_features = self.sim.data_store.mut_stats_names + self.sim.data_store.fit_stats_names + ['iterage']
simple_features = self.sim.data_store.functional_stats_names
for ete_tree_struct, lods in self.tree_lods:
self.population.annotate_phylo_tree(ete_tree_struct=ete_tree_struct,
features=cumulative_features)
self.population.annotate_phylo_tree(ete_tree_struct=ete_tree_struct,
features=simple_features, cummulative=False)
for ref, lod in lods.items():
print ref.id
self.sim.data_store.add_lod_data(ete_tree_struct, lod, self.population, self.environment)
print 'done'
#self.ref_sim.data_store.write_data()
[docs] def lod_network_stats(self):
'''
Write time series for line of descent properties such as network
connectivity, protein expression etc.
Either use a stride or a time interval to sample individuals from the lod.
'''
for ete_tree_struct, lods in self.tree_lods:
print 'ete_tree', ete_tree_struct.tree.name
for ref, lod in lods.items():
print 'lod', ref.id
self.sim.data_store.add_lod_network_data(lod)
print 'done'
[docs] def lod_binding_conservation(self):
'''
Write time series for line of descent properties such as network
connectivity, protein expression etc.
Either use a stride or a time interval to sample individuals from the lod.
'''
for ete_tree_struct, lods in self.tree_lods:
print 'ete_tree', ete_tree_struct.tree.name
for ref, lod in lods.items():
print 'lod', ref.id
self.sim.data_store.add_lod_binding_conservation(lod)
print 'done'
[docs] def plot_lod_graphs(self, stride, time_interval, lod_range, formats):
metabolites = self.environment.mols_per_class_dict
conversions = self.environment.reactions_dict['conversion']
imports = self.environment.reactions_dict['import']
suffixes = map(lambda fmt: '.'+fmt, formats)
for ete_tree_struct, lods in self.tree_lods:
print 'ete_tree', ete_tree_struct.tree.name
for ref, lod in lods.items():
print 'adding network graphs for lod', str(ref.id)
attr_dict = self.sim.graphs.attribute_mapper
GRN_grapher = BindingNetwork(lod.save_dir, 'GRN', attribute_dict=attr_dict, show=False)
metabolome_grapher = MetabolicNetwork(lod.save_dir, 'Metabolome',
mol_class_dict=metabolites,
conversions=conversions,
imports=imports,
attribute_dict=attr_dict, show=False)
genome_grapher = Genome(lod.save_dir, 'Genome', attribute_dict=attr_dict, show=False) # Initialise grapher for genome structure (and make directory)
ancestors = lod.strided_lod(stride, time_interval, lod_range)
max_genome = max( [ anc.genome_size for anc in ancestors ] )
for anc in ancestors:
_plot_cell_graphs(anc, GRN_grapher, metabolome_grapher, genome_grapher, max_genome, suffixes)
[docs] def lods_time_course_plots(self, stride, time_interval, lod_range, formats):
suffixes = map(lambda fmt: '.'+fmt, formats)
for ete_tree_struct, lods in self.tree_lods:
print 'ete_tree', ete_tree_struct.tree.name
for ref, lod in lods.items():
print 'lod', ref.id
save_dir = os.path.join(lod.save_dir, 'time_course_plots')
util.ensure_dir(save_dir)
ancestors = lod.strided_lod(stride, time_interval, lod_range)
for anc in ancestors:
_plot_cell_time_course(anc, self.sim.graphs,
save_dir = save_dir, suffixes=suffixes)
[docs] def lods_time_course_data(self, lod_range, chunk_size):
'''
Write time series data in the line of descent.
Concatenates time courses of individuals along a :class:`LOD`.
Concatenations are done in *chunks* of a chosen `chunk_size`. For each chunk
**.csv** files are stored in a directory named part*n*, where *n* is the
chunk number.
Parameters
----------
ancestors : list of :class:`VirtualMicrobes.virtual_cell.Cell.Cell`\s
base_save_dir : str
viewer_path : str
path to utility files for html data viewer
chunk_size : int
length of chunks of concatenated data
'''
for ete_tree_struct, lods in self.tree_lods:
print 'ete_tree', ete_tree_struct.tree.name
for ref, lod in lods.items():
print 'lod', ref.id
save_dir = os.path.join(lod.save_dir, 'time_courses')
util.ensure_dir(save_dir)
ancestors = lod.strided_lod(stride=None, time_interval=None, lod_range=lod_range)
_lod_time_course_data(ancestors, base_save_dir = save_dir,
viewer_path=os.path.join(self.sim.utility_path, 'time_course_viewer'),
chunk_size=chunk_size)
[docs] def draw_ref_trees(self):
for ete_tree_struct, lods in self.tree_lods:
func_features={'metabolic_type':self.population.metabolic_type_color}
self.population.annotate_phylo_tree(ete_tree_struct,
func_features=func_features,
#max_tree_depth=max_depth,
cummulative=False,
prune_internal=True,
to_rate=False
)
for leaf, lod in lods.items():
self.population.phylo_tree.annotate_phylo_units_feature(ete_tree_struct,
lod.lod, 'in_lod')
self.population.phylo_tree.annotate_leafs(ete_tree_struct, leaf)
attr_dict = self.sim.graphs.attribute_mapper
range_dic = self.population.value_range_dict
rescale_factor = 500.0 / self.population.phylo_tree.ete_calc_lca_depth(ete_tree_struct.tree)
print 'rescale factor is ' + str(rescale_factor)
save_loc = os.path.join(self.save_dir,
ete_tree_struct.tree.name.split('_')[0],
'ancestry_plots')
#phylo_grapher = PhyloTreeGraph(save_loc, name='Phylotree', attribute_dict=attr_dict, range_dict=range_dic, show=False)
phylo_grapher = PhyloTreeGraph(save_loc, name='Phylotree', attribute_dict=attr_dict, show=False)
phylo_grapher.update(ete_tree_struct.tree)
phylo_grapher.save_fig(feature='metabolic_with_lod', name='lodstree',
rescale=rescale_factor, dpi=10, suffix=".svg")
print 'Plotted reference tree'
[docs] def compare_to_pop(self, compare_save, prune_depth=None, leafs_sample_size=None):
'''
Compare the reference :class:`PopulationHistory` to an earlier population-save.
Parameters
----------
compare_save : str
file location of population-save
prune_depth : int
prune back phylogeny of the :param:compare_save with this many timesteps
leafs_sample_size : int
maximum number of phylogenetic tree leafs to use for comparison
'''
if prune_depth is None:
prune_depth = self.params.prune_compare_pop
if leafs_sample_size is None:
leafs_sample_size = self.params.leafs_sample_size
params = copy.copy(self.params)
params['name'] = os.path.join(params['name'], 'temp')
compare_sim = simu.load_simulation(compare_save, **params)
comp_pop_hist = PopulationHistory(sim=compare_sim,
params=self.sim.params,
prune_depth=prune_depth)
comp_pop_hist.init_phylo_tree()
comp_phylo_tree = comp_pop_hist.population.phylo_tree
time_point = int(compare_save.strip('.sav').split('_')[-1])
cumulative_features = (self.sim.data_store.mut_stats_names +
self.sim.data_store.fit_stats_names + ['iterage'] )
for ref_ete_tree_struct, ref_lods in self.tree_lods:
for comp_ete_tree_struct in comp_phylo_tree.ete_trees.values():
if ref_ete_tree_struct.tree.name != comp_ete_tree_struct.tree.name:
# not comparing trees with the same root
continue
comp_pop_hist.population.annotate_phylo_tree(ete_tree_struct=comp_ete_tree_struct,
features=cumulative_features)
comp_dat = comp_ete_tree_struct, comp_pop_hist
self.sim.data_store.add_ancestry_data_point(comp_dat, ref_lods,
time_point,
leafs_sample_size)
compare_sim.close_phylo_shelf()
def __str__(self):
return '\n'.join([ str(lod) for tree_lods in self.tree_lods
for lod in tree_lods[1] ])
[docs]class LOD(object):
'''
classdocs
'''
def __init__(self, lod, name, stride, time_interval, lod_range, save_dir=None):
'''
Store the line of descent to analyse.
'''
self.lod = lod
self.name = name
self.stride = stride
self.time_interval = time_interval
self.lod_range = lod_range
self.save_dir = save_dir
if self.save_dir is not None:
util.ensure_dir(self.save_dir)
[docs] def standardized_production(self, test_params):
for c in self.lod:
self.test_bed.clear_locality()
self.test_bed.add_cell(c)
integrator = self.init_integrator()
self.run_system(integrator)
[docs] def strided_lod(self, stride, time_interval, lod_range):
if lod_range is None:
lod_range = (0.,1.)
if stride is not None and time_interval is not None:
raise Exception('defining both stride and time_interval is not allowed')
lod_all = list(self)
if stride is not None:
ancestors = lod_all[::stride]
elif time_interval is not None:
ancestors = list(self.t_interval_iter(time_interval))
else:
ancestors = lod_all
if ancestors[-1] != lod_all[-1]:
ancestors.append(lod_all[-1])
from_root, from_leaf = int(lod_range[0]*len(ancestors)), int(lod_range[1]*len(ancestors))
return ancestors[from_root:from_leaf]
[docs] def t_interval_iter(self, time_interval):
'''
iterate ancestors that are approximately 'time_interval' timesteps apart
in their time of birth.
'''
mod_time = time_interval
for anc in self:
prev_mod_time = mod_time
mod_time = anc.time_birth % time_interval
if mod_time > prev_mod_time:
continue
yield anc
def __iter__(self):
return iter(self.lod)
def __str__(self):
return '\t'+'\n\t'.join([ str(c) for c in self.lod ])