Source code for evolvers

"""
##################
evolvers -- cmcpy module for abstract base class of Ardell Sella Evolvers
########
"""
import numpy
import math
import os
import time
import abc # requires python 2.6
import copy
import multiprocessing
import sys
try:                                                                                                 
        import pycuda.driver as cuda                                       
        import pycuda.autoinit                                                   
        from pycuda.compiler import SourceModule            
except:
        pass 

[docs]class ArdellSellaEvolverAbstractBase(object): """ Abstract Base Class for ArdellSellaEvolvers for Code-Message Coevolution corresponding to models published in Ardell and Sella (2001, 2002) and Sella and Ardell (2002, 2006). Concrete Implementations subclass from this for different implementations of Eigenvalue solutions. """ __metaclass__ = abc.ABCMeta def __init__(self, initial_code, site_types, delta, epsilon, observables): self.code = initial_code self.codons = initial_code.codons self.site_types = site_types self.aas = site_types.aas self.num_codons = self.codons.num_codons self.num_aas = site_types.num_aas self.num_site_types = site_types.num_site_types self.mutation_matrix = self.codons.get_mutation_matrix() ## THIS COMMITS THIS EVOLVER TO STATIC MUTATION self.fitness_matrix = site_types.get_fitness_matrix() ## THIS COMMITS THIS EVOLVER TO STATIC FITNESS self.delta = delta self.epsilon = epsilon self.eigenmatrix = numpy.ones((self.num_site_types,self.num_codons)) #.astype(numpy.float32) self.eigenvalues = numpy.ones(self.num_site_types) self.observables = observables self.num_fitter_code_mutants = 0 self.max_mutant_fitness = 0 self.frozen = False self.initial_equilibrate_messages() self.equilibrated = True self._growth_rate_computed = False self.print_initial_observables() self.print_observables_header()
[docs] def get_mutation_selection_matrix(self,alpha): mutation = self.mutation_matrix code = self.code.get_effective_code_matrix() fitness = self.fitness_matrix return numpy.dot(mutation, numpy.diag(numpy.dot(code,fitness[alpha])))
[docs] def get_selection_matrix(self,alpha): mutation = self.mutation_matrix code = self.code.get_effective_code_matrix() fitness = self.fitness_matrix return numpy.diag(numpy.dot(code,fitness[alpha]))
[docs] def get_eigenvalue( self, msm, eigenvec): top = numpy.dot(msm,eigenvec) * eigenvec bot = eigenvec * eigenvec lamda = top.sum() / bot.sum() return lamda
@abc.abstractmethod
[docs] def equilibrate_messages(self): '''Compute eigensystems in site-types for an established genetic code. This finds eigensystems (codon frequencies and growth rates) for different site-types given the genetic code. Abstract method: subclasses must: 1) store their results by setting self.eigenvalues and self.eigenmatrix 2) set self.equilibrated to True 3) call super(<<SubClass>>, self).equilibrate_messages() to print observables at end of subclass method where <<SubClass>> is the subclass name to print observables. ''' pass
[docs] def initial_equilibrate_messages(self): '''Compute eigensystems in site-types for an established genetic code. This finds eigensystems (codon frequencies and growth rates) for different site-types given the genetic code. ''' if self.code.is_ambiguous(): self.eigenmatrix = numpy.ones((self.num_site_types,self.num_codons)) / self.num_codons msm = self.get_mutation_selection_matrix(0) lamda = self.get_eigenvalue(msm,self.eigenmatrix[0]) self.eigenvalues = numpy.ones((self.num_site_types,1)) * lamda self.equilibrated = True else: self.equilibrate_messages()
[docs] def growth_rate_from_lambda(self): if (not(self.equilibrated)) : self.equilibrate_messages() gr = 1.0 site_weight = self.site_types.get_site_type_weights() for alpha in xrange(self.num_site_types): gr *= self.eigenvalues[alpha] ** site_weight[alpha] return gr[0]
[docs] def growth_rate(self): ''' ''' if (not(self.equilibrated)): self.equilibrate_messages() if (self._growth_rate_computed): return self._growth_rate else: self._growth_rate = self.compute_code_fitness_given_messages(self.messages(), self.code.get_effective_code_matrix()) self._growth_rate_computed = True return self._growth_rate
[docs] def messages(self): ''' ''' if (not(self.equilibrated)): self.equilibrate_messages() return self.eigenmatrix.copy()
[docs] def compute_code_fitness_given_messages(self, equilibrated_messages, effective_code_matrix): ''' Implement eg eqns. 2-7 from Sella and Ardell(2006) ''' nc = self.num_codons na = self.num_aas ns = self.num_site_types site_weight = self.site_types.get_site_type_weights() u = usage = equilibrated_messages ## assumed here to be ns x nc c = code = effective_code_matrix ## assumed here to be nc x na w = fitness_coef = self.fitness_matrix ## assumed here to be ns x na **THIS IS CRITICAL FOR NEW SITE-TYPE SPACE DEFINITIONS** ## solution number one: fit1 = numpy.prod(numpy.sum(u.repeat(na,axis=1) * numpy.tile(c.ravel(),(ns,1)) * numpy.tile(w,(1,nc)),axis=1)** site_weight) return fit1 ## ## solution number four: ## pdb.set_trace() ## fitness = 1.0 ## for alpha in xrange(ns): ## fitness_per_site_type = 0.0 ## for codon in xrange(nc): ## # fitness_per_codon_per_site_type = 0.0 ## for aa in xrange (na): ## # fitness_per_codon_per_site_type += code[codon][aa] * fitness_coef[alpha][aa] ## #fitness_per_site_type += fitness_per_codon_per_site_type * usage[alpha][codon] ## fitness_per_site_type += code[codon][aa] * fitness_coef[alpha][aa] * usage[alpha][codon] ## fitness *= fitness_per_site_type ** site_weight[alpha] ## return fitness
[docs] def compute_max_fitness_code_mutation(self): ''' ''' if (not(self.equilibrated)): self.equilibrate_messages() code_mutation = None mutation_num = 0 max_mutant_fitness = 0 max_mutation_num = 0 max_fitness_code_mutation = None num_fitter_code_mutants = 0 self.num_fitter_code_mutants = 0 for codon in xrange(self.num_codons): for aa in xrange(self.num_aas): if self.code.encodes(codon,aa): continue else: code_mutation = self.code.mutate(codon,aa) mutant_fitness = self.compute_code_fitness_given_messages(self.messages(), code_mutation.get_effective_code_matrix()) if (mutant_fitness > self.growth_rate()): num_fitter_code_mutants += 1 if (mutant_fitness > max_mutant_fitness and math.fabs(mutant_fitness - max_mutant_fitness) > self.epsilon): max_mutant_fitness = mutant_fitness max_fitness_code_mutation = code_mutation self.num_fitter_code_mutants = num_fitter_code_mutants return (max_mutant_fitness,max_fitness_code_mutation)
[docs] def evolve_code_unless_frozen(self): ''' Mutate genetic code. Unless genetic code is frozen, mutate it according to the Ardell and Sella models, and update code to most fit mutant if it exists. If no more fit mutant code exists, set the "frozen" attribute to True. ''' if (not(self.frozen)): (max_mutant_fitness, max_fitness_code_mutation) = self.compute_max_fitness_code_mutation() if (max_mutant_fitness > self.growth_rate()): self.code.update(max_fitness_code_mutation) self._growth_rate_computed = False self.equilibrated = False self.max_mutant_fitness = max_mutant_fitness else: self.frozen = True
[docs] def evolve_one_step(self): ''' Mutate genetic code and equilibrate messages. Unless genetic code is frozen, mutate it according to the Ardell and Sella models, update code to most fit mutant if it exists, and equilibrate messages to the new mutant genetic code. If no more fit mutant code exists, set the "frozen" attribute to True. ''' if (not(self.frozen)): self.evolve_code_unless_frozen() if (not(self.equilibrated)): self.equilibrate_messages() if not(self.observables.show_frozen_results_only): self.print_observables()
[docs] def evolve_until_frozen(self): ''' Iteratively evlove genetic code and messages. Until genetic code is frozen, mutate it according to the Ardell and Sella models, update code to most fit mutant if it exists, and equilibrate messages to the new mutant genetic code. Once no more fit mutant code exists, set the "frozen" attribute to True. ''' while (not(self.frozen)): self.evolve_one_step() if self.observables.show_frozen_results_only: self.print_observables()
[docs] def print_initial_observables(self): if (self.observables.show_initial_parameters): print '# Delta:{} Epsilon:{}'.format(self.delta,self.epsilon) if (self.observables.show_matrix_parameters): precision = self.observables.print_precision mm = self.codons.get_mutation_matrix() print "# Mutation Matrix:\n",mm dm = self.aas.get_distance_matrix() print "# Distance matrix:\n",dm.round(precision) fm = site_types.get_fitness_matrix() print "# Fitness matrix:\n",fm.round(precision) msm = self.get_mutation_selection_matrix(0) print "# Iteration Matrix for site-type 0:\n",msm.round(precision)
[docs] def print_observables_header(self): if self.observables.show_code_evolution_statistics or self.observables.show_all: print '# RBE = Reassignments Before Explicit' print '# RAE = Reassignments After Explicit' print '# NAA = Number encoded Amino Acids' print '# NER = Normalized Encoded Range (Ardell and Sella, 2001)' if self.observables.show_fitness_statistics or self.observables.show_all: print '# NFCM = Number Fitter Code Mutants' print '# MMF = Maximum Mutant Fitness' print '# GR = Growth Rate' print '# GRFL = Growth Rate from Lambda' print '#' precision = self.observables.print_precision width = precision + 8 print '#{:>{width}s}'.format('STEPS',width=(width-1)), if self.observables.show_code_evolution_statistics or self.observables.show_all: obs = ['RBE','RAE','NAA','NER'] print ('{:>{width}s}'*(len(obs))).format(*obs,width=width), if self.observables.show_fitness_statistics or self.observables.show_all: obs = ['NFCM','MMF','GR','GRFL'] print ('{:>{width}s}'*(len(obs))).format(*obs,width=width), print
[docs] def print_observables(self): if self.observables.show_codes or self.observables.show_all: print '{}'.format(self.code) precision = self.observables.print_precision width = precision + 8 print '{:{width}d}'.format(self.code.num_mutations,width=width), #if self.observables.show_codes_single_line: # print '{}\t'.format(self.code.as_string), if self.observables.show_code_evolution_statistics or self.observables.show_all: obs = [self.code.num_reassignments_before_explicit(),self.code.num_reassignments_after_explicit(),self.code.num_encoded_amino_acids()] print ('{:>{width}d}'*(len(obs))).format(*obs,width=width), ner = self.code.normalized_encoded_range() if ner.__class__.__name__ == 'str': type = 's' else: type = 'f' print '{:>{width}.{precision}{type}}'.format(self.code.normalized_encoded_range(),width=width-1,precision=precision,type=type), if self.observables.show_fitness_statistics or self.observables.show_all: print '{:>{width}d}'.format(self.num_fitter_code_mutants,width=width), obs = [self.max_mutant_fitness,self.growth_rate(),self.growth_rate_from_lambda()] print ('{:>{width}.{precision}f}'*(len(obs))).format(*obs,width=width,precision=precision), if self.observables.show_messages: print self.messages().round(precision), print "\n"
[docs]class ArdellSellaEvolverPowerMethod(ArdellSellaEvolverAbstractBase): def __init__(self, initial_code, site_types, num_processes, observables, delta=10**-32, epsilon=10**-12): ArdellSellaEvolverAbstractBase.__init__(self, initial_code, site_types, delta, epsilon, observables)
[docs] def compute_eigensystem(self, alpha, max_time = 60, numpy_type = numpy.float64): msm = self.get_mutation_selection_matrix(alpha).astype(numpy_type) eigenvec = numpy.ones(self.num_codons).astype(numpy_type) start = time.time() error = 1 while ( ( (time.time() - start) < max_time ) and ( math.fabs(error) > self.delta )): eigenvec_old = eigenvec eigenvec = numpy.dot( msm, eigenvec_old ) eigenvec /= eigenvec.sum() error = numpy.linalg.norm(eigenvec - eigenvec_old) if ( math.fabs(error) > self.delta ): print 'Thread time-out occured. Eigenvalue did not converge to specified accuracy %f for site-type %d within time limit.' % (self.delta,alpha) self.eigenvalues[alpha] = self.get_eigenvalue(msm,eigenvec) self.eigenmatrix[alpha] = eigenvec
[docs] def equilibrate_messages(self): for alpha in xrange(self.num_site_types): self.compute_eigensystem(alpha) self.equilibrated = True super(ArdellSellaEvolverPowerMethod, self).equilibrate_messages()
[docs]class ArdellSellaEvolverPowerMethodProcessChild(multiprocessing.Process): """ Finds the eigensystem (message equilibrium and growth rate) for an established genetic code """ def __init__(self, in_queue, out_queue, num_codons, delta, max_time = 60): multiprocessing.Process.__init__(self) self.in_queue = in_queue # holds site type index and (DA: code?) matrix and computeS self.out_queue = out_queue # holds site type index leading eigenvalue and corresponding eigenvector self.delta = delta # to what degree of precision user wants to calculate leading eigenvalue self.max_time = max_time # in seconds. Each power fucntion has a time limit default is 1 hour self.num_codons = num_codons
[docs] def run(self): while True: lamda = 987 err = self.delta + 1 alpha, msm = self.in_queue.get() # alpha indexes the site-type eigenvec = numpy.ones(self.num_codons) start = time.time() convergence = True while ( ( (time.time() - start) < self.max_time ) and ( math.fabs(err) > self.delta ) ): eigenvec_old = eigenvec eigenvec = numpy.dot( msm, eigenvec_old ) eigenvec /= eigenvec.sum() error = numpy.linalg.norm(eigenvec - eigenvec_old) if ( math.fabs(error) > self.delta ): convergence = False # print 'Thread time-out occured. Eigenvalue did not converge to specified accuracy %f for site-type %d within time limit.' % (self.delta,alpha) self.out_queue.put((alpha,msm,eigenvec,convergence)) self.in_queue.task_done()
[docs]class ArdellSellaEvolverPowerMulticore (ArdellSellaEvolverAbstractBase): in_queue = multiprocessing.JoinableQueue() out_queue = multiprocessing.JoinableQueue() def __init__(self, initial_code, site_types, num_processes, observables, delta=10**-32, epsilon=10**-12): ArdellSellaEvolverAbstractBase.__init__(self, initial_code, site_types, delta, epsilon, observables) self.num_processes = num_processes for i in range(self.num_processes): child = ArdellSellaEvolverPowerMethodProcessChild(ArdellSellaEvolverPowerMulticore.in_queue,ArdellSellaEvolverPowerMulticore.out_queue,self.num_codons,self.delta) child.daemon = True child.start()
[docs] def equilibrate_messages(self): for alpha in range(self.num_site_types): msm = self.get_mutation_selection_matrix(alpha) ArdellSellaEvolverPowerMulticore.in_queue.put((alpha,msm)) ArdellSellaEvolverPowerMulticore.in_queue.join() while(not(ArdellSellaEvolverPowerMulticore.out_queue.empty())): alpha,msm,eigenvec,convergence = ArdellSellaEvolverPowerMulticore.out_queue.get() if ( not convergence ): print 'Thread time-out occured. Eigenvalue did not converge to specified accuracy %f for site-type %d within time limit.' % (self.delta,alpha) self.eigenvalues[alpha] = self.get_eigenvalue(msm,eigenvec) self.eigenmatrix[alpha] = eigenvec self.equilibrated = True super(ArdellSellaEvolverPowerMulticore, self).equilibrate_messages()
[docs]class ArdellSellaEvolverNumpyProcessChild(multiprocessing.Process): """ Finds the eigensystem (message equilibrium and growth rate) for an established genetic code """ def __init__(self, in_queue, out_queue): multiprocessing.Process.__init__(self) self.in_queue = in_queue # holds site type index and (DA: code?) matrix and computeS self.out_queue = out_queue # holds site type index leading eigenvalue and corresponding eigenvector #print "initializing {}".format(self.name)
[docs] def run(self): while True: #print "running {} and inqueue emptiness is {}".format(self.name,self.in_queue.empty()) (alpha, msm) = self.in_queue.get() # alpha indexes the site-type #print "processing" (eigval,eigmat) = numpy.linalg.eig(msm) maxi = numpy.argmax(eigval) eigenvec = eigmat[:,maxi] eigenvec /= eigenvec.sum() eigenval = eigval[maxi] self.out_queue.put((alpha,eigenvec,eigenval)) self.in_queue.task_done()
in_queue = multiprocessing.JoinableQueue() out_queue = multiprocessing.JoinableQueue()
[docs]class ArdellSellaEvolverNumpyMulticore (ArdellSellaEvolverAbstractBase): def __init__(self, initial_code, site_types, num_processes, observables, delta=10**-32, epsilon=10**-12): ArdellSellaEvolverAbstractBase.__init__(self, initial_code, site_types, delta, epsilon, observables) self.num_processes = num_processes for i in range(self.num_processes): # child = ArdellSellaEvolverNumpyProcessChild(ArdellSellaEvolverNumpyMulticore.in_queue,ArdellSellaEvolverNumpyMulticore.out_queue) child = ArdellSellaEvolverNumpyProcessChild(in_queue,out_queue) child.daemon = True child.start()
[docs] def equilibrate_messages(self): for alpha in range(self.num_site_types): msm = self.get_mutation_selection_matrix(alpha).astype(numpy.float64) in_queue.put((alpha,msm)) # ArdellSellaEvolverPowerMulticore.in_queue.put((alpha,msm)) in_queue.join() # ArdellSellaEvolverNumpyMulticore.in_queue.join() # print "inqueue emptiness is {}".format(ArdellSellaEvolverPowerMulticore.in_queue.empty()) #print "inqueue emptiness is {}".format(in_queue.empty()) # while(not(ArdellSellaEvolverNumpyMulticore.out_queue.empty())): while(not(out_queue.empty())): # (alpha,eigenvec,eigenval) = ArdellSellaEvolverNumpyMulticore.out_queue.get() (alpha,eigenvec,eigenval) = out_queue.get() self.eigenvalues[alpha] = eigenval self.eigenmatrix[alpha] = eigenvec self.equilibrated = True super(ArdellSellaEvolverNumpyMulticore, self).equilibrate_messages()
[docs]class ArdellSellaEvolverNumpy(ArdellSellaEvolverAbstractBase): def __init__(self, initial_code, site_types, num_processes, observables, delta=10**-32, epsilon=10**-12): ArdellSellaEvolverAbstractBase.__init__(self, initial_code, site_types, delta, epsilon, observables)
[docs] def compute_eigensystem(self, alpha, max_time = 60, numpy_type = numpy.float64): msm = self.get_mutation_selection_matrix(alpha).astype(numpy_type) (eigval,eigmat) = numpy.linalg.eig(msm) maxi = numpy.argmax(eigval) eigenvec = eigmat[:,maxi] eigenvec /= eigenvec.sum() eigenval = eigval[maxi] self.eigenvalues[alpha] = eigenval # self.get_eigenvalue(msm,eigenvec) self.eigenmatrix[alpha] = eigenvec
[docs] def equilibrate_messages(self): for alpha in xrange(self.num_site_types): self.compute_eigensystem(alpha) self.equilibrated = True super(ArdellSellaEvolverNumpy, self).equilibrate_messages()
[docs]class ArdellSellaEvolverPerturbative(ArdellSellaEvolverAbstractBase): def __init__(self, initial_code, site_types, num_processes, observables, delta=10**-32, epsilon=10**-12): ArdellSellaEvolverAbstractBase.__init__(self, initial_code, site_types, delta, epsilon, observables) self.max_order = max_order
[docs] def choose(self,n, k): """ A fast way to calculate binomial coefficients by Andrew Dalke (contrib). """ if 0 <= k <= n: ntok = 1 ktok = 1 for t in xrange(1, min(k, n - k) + 1): ntok *= n ktok *= t n -= 1 return ntok // ktok else: return 0
[docs] def compute_eigensystem(self, alpha, max_time = 60, numpy_type = numpy.float64): import pdb # pdb.set_trace() codons = self.codons mu = codons.mu n = self.num_codons dm = numpy.matrix(codons.get_derivative_matrix()) sm = numpy.matrix(self.get_selection_matrix(alpha)) wp = numpy.matrix(numpy.diag(sm) - sm[0][0]).transpose() wp = wp[1:n] maxord = self.max_order lamb = numpy.matrix(numpy.zeros((1,maxord+1))) vmat = numpy.matrix(numpy.zeros((n,maxord+1))) lamb[0,0] = sm[0,0] vmat[0,0] = 1 eone = numpy.matrix(numpy.zeros((n,1))) eone[0] = 1 for j in xrange(1,maxord): tempvec = -j * dm * sm * vmat[:,(j-1)] temp = - eone.T * tempvec for m in xrange(1,(j-1)): temp = temp - self.choose(j,m) * lamb[0,m] * vmat[1,(j-m)]; lamb[0,j] = temp for m in xrange(1,j): tempvec = tempvec + self.choose(j,m) * lamb[0,m] * vmat[:,(j-m)] vmat[0,j] = 1; vmat[1:n,j] = tempvec[1:n] / wp; ordvec = (((numpy.ones(maxord+1) * mu) ** (numpy.matrix((range(maxord+1))))) / numpy.matrix([math.factorial(i) for i in xrange((maxord+1))])).T lpert = lamb * ordvec vpert = vmat * ordvec vpert /= vpert.sum() (lpert, vpert) = codons.post_process_perturbative_solution(lpert,vpert) self.eigenvalues[alpha] = lpert self.eigenmatrix[alpha] = vpert
[docs] def equilibrate_messages(self): for alpha in xrange(self.num_site_types): self.compute_eigensystem(alpha) self.equilibrated = True super(ArdellSellaEvolverNumpy, self).equilibrate_messages()
[docs]class eigensystem_CUDA_implementation: def __init__(self, parent, max_time = 60, delta = 10**(-32)): self.verbose = False self.debug_mode = False self.starttime=time.time() self.max_time=max_time self.parent = parent self.delta = delta self.num_site_types = self.parent.num_site_types # create super matrix; dimension (num_site_types)^2 by num_site_types self.alpha1msm = self.parent.get_mutation_selection_matrix(0) self.msm_dim=len(self.alpha1msm) self.supermatrix = numpy.zeros((self.msm_dim*self.num_site_types,self.msm_dim)).astype(numpy.float32) self.supermatrix[(0*self.msm_dim):((0+1)*self.msm_dim)]=self.alpha1msm # fill super matrix # While this 'for' loop accomplishes the same goal as Python's list comprehension, # NumPy's list comprehension capability is unknown. for alpha in range(1,self.num_site_types): self.supermatrix[(alpha*self.msm_dim):((alpha+1)*self.msm_dim)]=self.parent.get_mutation_selection_matrix(alpha) #print 'supermatrix: ',self.supermatrix # eigenvector of ones; dimension num_site_types*msm_dim self.eigenvec=numpy.ones(self.num_site_types*self.msm_dim).astype(numpy.float32) self.eigenvec_old=numpy.copy(self.eigenvec) ## length num_site_types*msm_dim eigenvec will be rearranged into a (num_site_types)by (msm_dim) # eigen matrix -- one of the outputs of this class -- later self.eigenmatrix = numpy.zeros((self.num_site_types,self.msm_dim)).astype(numpy.float32) self.loop_power=numpy.int32(8) # Maximum power of 2 of CUDA kernel loop count; # to avoid 'time-out' errors, kernel calls can be kept short by keeping this number low # (11 should be fine). The time difference between two kernel calls of 2^10 loops each # and one kernel call of 2^11 loops is negligable. Of course, the time difference between # running 2^11 kernel calls of one loop and one kernel call of 2^11 loops would be significant. self.power_max=numpy.int32(11) self.loop_total=0 # memory allocation self.cuda_supermatrix=cuda.mem_alloc(self.supermatrix.nbytes) self.cuda_loop_power=cuda.mem_alloc(8) self.cuda_power_max=cuda.mem_alloc(8) self.cuda_eigenvec=cuda.mem_alloc(self.eigenvec.nbytes) ## sums to normalize by self.cuda_vsum=cuda.mem_alloc(4*self.num_site_types) ## one float for each eigenvector self.source_module = SourceModule(""" #include <stdio.h> __global__ void eigensystem(float* vector, const float* mat, const int* loop_power, const int* power_max, float* vsum){ //dimension of individual eigenmatrix const int dim = blockDim.x; // one eigenmatrix per block const int b = blockIdx.x; const int t = threadIdx.x; const int block_pos_mat = b*__powf(dim,2); const int i = block_pos_mat+(t*dim); // i -- row of supermatrix const int thread_pos_evec = b*dim; int j; //const double loop_count = loop; //const int loop_count = 3000; //unsigned long loop_count = pow(2, loop_power); unsigned int loop_count = 2; if(*loop_power<*power_max){ loop_count = loop_count << (*loop_power); }else{ loop_count = loop_count << (*power_max); } //printf(\"block.thread %d.%d; #loops: %u\\n\", blockIdx.x, threadIdx.x, loop_count); unsigned int l; __shared__ float product[64]; __shared__ float e_vec_sum; // sum of all dot products on block; used for normalization for(l=0;l<loop_count;l++){ //__shared__ float product[dim]; //does not work product[t] = 0.0; e_vec_sum = 0.0; /* While CUDAs printf statement can print output from multiple threads and blocks in one kernel call, this 'if' condition restricts the printing to one thread of one block. This is still useful because, so far, the bugs needing to be fixed have been shared by all CUDA blocks and threads.Fixing one has fixed all. if(blockIdx.x == 0 && threadIdx.x == 0){ printf("block.thread %d.%d;\\t\\t loop_power: %u power_max: %u loop_count: %u l: %u\\n", blockIdx.x, threadIdx.x, *loop_power, *power_max, loop_count, l); } */ for(j=0;j<dim;j++){ //e_vec_sum += mat[i+j]*vector[thread_pos_evec+j]; //product[t] += mat[i+j]*vector[thread_pos_evec+j]; float out = mat[i+j]*vector[thread_pos_evec+j]; atomicAdd(&e_vec_sum, out); atomicAdd(&product[t], out); } __syncthreads; vector[thread_pos_evec+t]=product[t]; // Normalize... vector[thread_pos_evec+t]=vector[thread_pos_evec+t]/e_vec_sum; } } """) ## getting reference to kernel self.cuda_eigensystem = self.source_module.get_function("eigensystem") # moving data into global memory; # only done once; subsequent loops kernel calls pick up where the last left off cuda.memcpy_htod(self.cuda_eigenvec,self.eigenvec) cuda.memcpy_htod(self.cuda_supermatrix, self.supermatrix) cuda.memcpy_htod(self.cuda_power_max,self.power_max)
[docs] def calculate(self): # move power of 2 request onto GPU cuda.memcpy_htod(self.cuda_loop_power, self.loop_power) # calling kernel if(self.verbose): print "================================== " print(""""Calling CUDA; %d by %d supermatrix;\neigenvector of length %d;\n%d loops run so far; \n%d loop request...""" % (self.msm_dim*self.num_site_types, self.msm_dim, self.msm_dim*self.num_site_types, self.loop_total, 2**self.loop_power)) print "Blocks: %d by %d\nThreads/block: %d by %d by %d" % (self.num_site_types,1, self.msm_dim,1,1) print "================================== " self.cuda_eigensystem(self.cuda_eigenvec, self.cuda_supermatrix, self.cuda_loop_power, self.cuda_power_max, self.cuda_vsum, grid=(self.num_site_types,1), block=(self.msm_dim,1,1)) ## just called kernel for self. loops; add this to self.loop_total; logging purposes only self.loop_total+=2**self.loop_power # copy cuda_eigenvec back to main memory cuda.memcpy_dtoh(self.eigenvec,self.cuda_eigenvec)
[docs] def get_eigenvalue(self,alpha): ## use self.subeigenvecrix of self.supermatrix, and subvector of self.eigenvec; ## direct translation of original 'evolvers' get_eigenvalue( msm, eigenvec) self.top = (numpy.dot(self.supermatrix[(alpha*self.msm_dim):((alpha+1)*self.msm_dim)], self.eigenvec[(alpha*self.msm_dim):((alpha+1)*self.msm_dim)])* self.eigenvec[(alpha*self.msm_dim):((alpha+1)*self.msm_dim)]) self.bottom = (self.eigenvec[(alpha*self.msm_dim):((alpha+1)*self.msm_dim)] * self.eigenvec[(alpha*self.msm_dim):((alpha+1)*self.msm_dim)]) self.lamda = self.top.sum()/self.bottom.sum() #print 'self.lamda =',self.lamda return self.lamda
[docs] def get_eigenvalues(self): ## compute all eigenvalues and put them in an array self.eigenvalues = numpy.zeros(self.num_site_types).astype(numpy.float32) for alpha in xrange(self.num_site_types): self.eigenvalues[alpha]=self.get_eigenvalue(alpha) #print 'alpha:',alpha,'\teigenvalues:',self.eigenvalues[alpha] return self.eigenvalues
[docs] def get_eigenmatrix(self): ## rearrange eigenvector of length (num_site_types * msm_dim) into eigenmatrix for alpha in xrange(self.num_site_types): self.eigenmatrix[alpha]=self.eigenvec[(alpha*self.msm_dim):((alpha+1)*self.msm_dim)] #print 'alpha:',alpha,'\teigenmatrix:',self.eigenmatrix[alpha] return self.eigenmatrix
[docs] def error_check(self): self.errorarray = numpy.zeros(self.num_site_types).astype(numpy.float32) for alpha in range(0,self.num_site_types): self.errorarray[alpha]=(numpy.linalg.norm(self.eigenvec[ (alpha*self.msm_dim):((alpha+1)*self.msm_dim)] - self.eigenvec_old[(alpha*self.msm_dim):((alpha+1)*self.msm_dim)])) if(self.verbose): print 'error, length %d: ' % len(self.errorarray), self.errorarray if((time.time()-self.starttime)>self.max_time): if(self.verbose): print 'time out...' return True elif(numpy.abs(max(self.errorarray))>self.delta): # Error exceeds threshold if(self.verbose): print 'error exceeds delta (threshold) of', self.parent.delta ## increase loop request if(self.loop_power<self.power_max): self.loop_power+=1 ## need to run more loops; copy eigenvec to eigenvec_old self.eigenvec_old=numpy.copy(self.eigenvec) return False else: if(self.verbose): print '-=-=-=-=-=-=-=-=-' print 'error below delta!' print '-=-=-=-=-=-=-=-=-' return True
[docs] def done(self): if(self.verbose): print 'CUDA loop total (for all site types):\t',self.loop_total print 'final maximum error: ',numpy.abs(max(self.errorarray)) self.parent.eigenvalues = self.get_eigenvalues() self.parent.eigenmatrix = self.get_eigenmatrix()
[docs]class ArdellSellaEvolverPowerCUDA (ArdellSellaEvolverAbstractBase): def __init__(self, initial_code, site_types, num_processes, observables, delta=10**-32, epsilon=10**-12): ArdellSellaEvolverAbstractBase.__init__(self, initial_code, site_types, delta, epsilon, observables = cmcpy.observables.Observables()) #from pycuda.compiler import SourceModule #import pycuda.driver as self.cuda #import pycuda.autoinit #SourceModule = __import__(pycuda.compiler.SourceModule) #cuda = __import__(pycuda.driver) #pycuda = __import__(pycuda.autoinit)
[docs] def equilibrate_messages(self): super_matrix_eigen_system = eigensystem_CUDA_implementation(parent=self, delta=self.delta) # All function calls have been moved out of the eigensystem_CUDA_implementation class super_matrix_eigen_system.calculate() converged = super_matrix_eigen_system.error_check() while(not converged): #super_matrix_eigen_system.reload() super_matrix_eigen_system.calculate() converged = super_matrix_eigen_system.error_check() super_matrix_eigen_system.done() del super_matrix_eigen_system ## CUDA eigensystem solver done; eigenvalues and eigenmatrix have been inserted into this object ## by child object (CUDA eigensystem solver) self.equilibrated = True super(ArdellSellaEvolverPowerCUDA, self).equilibrate_messages()