1
2
3
4
5
6
7
8
9
10
11
12
13 from numpy.core.records import recarray
14 try:
15 import psyco
16 psyco.full()
17 except:
18 pass
19 import sys
20 import os
21 import copy
22 import cPickle as CP
23 import like
24 import pylab as P
25 from scipy.stats.kde import gaussian_kde
26 from scipy.linalg import LinAlgError
27 from scipy import stats
28 import numpy
29 from numpy import array, nan_to_num, zeros, product, exp, ones,mean, var,sqrt,floor
30 from time import time
31 from numpy.random import normal, randint, random, seed
32 import PlotMeld as PM
33 try:
34 from BIP.Viz.realtime import RTplot
35 Viz=True
36 except:
37 Viz=False
38 print r"""Please install Gnuplot-py to enable realtime visualization.
39 http://gnuplot-py.sourceforge.net/
40 """
41 import lhs
42
43 from multiprocessing import Pool
44 if Viz:
45 dtplot = RTplot();phiplot = RTplot();thplot = RTplot()
46
47 __docformat__ = "restructuredtext en"
48
50 """
51 Fit a model to data generating
52 Bayesian posterior distributions of input and
53 outputs of the model.
54 """
55 - def __init__(self, K,L,model, ntheta, nphi,inits,tf,phinames,thetanames,wl=None,nw=1,verbose=False):
56 """
57 Initialize the model fitter.
58
59 :Parameters:
60 - `K`: Number of samples from the priors. On MCMC also the number of samples of the posterior.
61 - `L`: Number of samples of the posteriors. Only used on SIR and ABC methods.
62 - `model`: Callable (function) returning the output of the model, from a set of parameter values received as argument.
63 - `ntheta`: Number of parameters included in the inference.
64 - `nphi`: Number of outputs of the model.
65 - `inits`: inits initial values for the model's variables.
66 - `tf`: Length of the simulation, in units of time.
67 - `phinames`: List of names (strings) with names of the model's variables
68 - `thetanames`: List of names (strings) with names of parameters included on the inference.
69 - `wl`: window lenght length of the inference window.
70 - `nw`: Number of windows to analyze on iterative inference mode
71 - `verbose`: Verbose output if True.
72 """
73 self.Me = Meld(K=K,L=L,model=model,ntheta=3,nphi=5,verbose=verbose)
74 self.inits = inits
75 self.tf = tf
76 self.model = model
77 self.nphi = nphi
78 self.ntheta = ntheta
79 self.phinames = phinames
80 self.thetanames = thetanames
81 self.wl = wl
82 self.nw = nw
83 self.done_running = False
84 self.prior_set = False
85
87 """
88 Set the prior distributions for Phi and Theta
89
90 :Parameters:
91 - `pdists`: distributions for the output variables. For example: [scipy.stats.uniform,scipy.stats.norm]
92 - `ppars`: paramenters for the distributions in pdists. For example: [(0,1),(0,1)]
93 - `tdists`: same as pdists, but for input parameters (Theta).
94 - `tpars`: same as ppars, but for tdists.
95 """
96 self.pdists = pdists
97 self.ppars = ppars
98 self.tdists = tdists
99 self.tpars = tpars
100 self.prior_set = True
101
103 """
104 """
105 if prior['theta'] and prior['phi']:
106 self.Me.setThetaFromData(names_theta,prior['theta'],[(0,.9),(0,.0001),(0,1)])
107 self.Me.setPhiFromData(names_phi,prior['phi'],[(0,1)]*self.nphi)
108 else:
109 print "++++>"
110 self.Me.setTheta(self.thetanames,[stats.uniform]*nt,[(0.001,.2),(0,.0001),(0.1,.5)])
111 self.Me.setPhi(self.phinames,[stats.uniform]*np,[(0,1)]*self.nphi,[(0,1)]*self.nphi)
113 """
114 """
115 self._init_priors(prior)
116 succ=0
117 att = 1
118 if method == "SIR":
119 while not succ:
120 print 'attempt #',att
121 succ = self.Me.sir(data=data,variance=1e-6,nopool=True,t=tf)
122 att += 1
123 pt,series = self.Me.getPosteriors(t=tf)
124 elif method == "MCMC":
125 while not succ:
126 print 'attempt #',att
127 succ = self.Me.mcmc_run(data,t=tf,likvariance=.5e-7,burnin=1000)
128 pt,series = self.Me.getPosteriors(t=tf)
129
130 elif method == "ABC":
131 Me.abcRun(data=data,fitfun=fitInc,t=105)
132 pt,series = Me.getPosteriors(t=tf)
133
134 - def run(self, data,method,monitor=False):
135 """
136 Fit the model against data
137
138 :Parameters:
139 - `data`: dictionary with variable names and observed series, as Key and value respectively.
140 - `method`: Inference method: "ABC", "SIR" or "MCMC"
141 """
142 if not self.prior_set: return
143 if monitor:
144 self._monitor_setup()
145 start = time.time()
146 d = data
147 prior = {'theta':[],'phi':[]}
148 os.system('rm wres_*')
149 wl = self.wl
150 for w in range(self.nw):
151 print '==> Window # %s of %s!'%(w,nw)
152 self.tf=wl
153 d2 = {}
154 for k,v in d.items():
155 d2[k] = v[w*wl:w*wl+wl]
156 print v.shape
157 if w==0:
158 self.inits[2] = d2['I'][0]
159 self.inits[0] += 1-sum(self.inits)
160 pt,pp,series,predseries,att = self.do_inference(data=d2, prior=prior,predlen=wl, method=method)
161 f = open('wres_%s'%w,'w')
162
163 cPickle.dump((pt,pp,series,d,predseries, att*K),f)
164 f.close()
165 prior = {'theta':[],'phi':[]}
166 for n in pt.dtype.names:
167 prior['theta'].append(pt[n])
168
169 for n in pp.dtype.names:
170
171 prior['phi'].append(pp[n])
172 if monitor:
173 self._monitor_plot(series,prior)
174 print "time: %s seconds"%(time.time()-start)
175 self.done_running = True
176
177
179 """
180 Sets up realtime plotting of inference
181 """
182 self.hst = RTplot()
183 self.hsp = RTplot()
184 self.ser = RTplot()
186 """
187 Plots real time data
188 """
189 self.ser.plotlines(d2[r'I'],style='points', names=['Obs. I'])
190 i5 = array([stats.scoreatpercentile(t,2.5) for t in series[r'I'].T])
191 i95 = array([stats.scoreatpercentile(t,97.5) for t in series[r'I'].T])
192 diff = abs(series.I[:,-1]-(d2['I'][-1]))
193 initind = diff.tolist().index(min(diff))
194 self.ser.plotlines(data=series[r'I'][initind], names=["Best run's I"])
195 self.ser.plotlines(data=i5, names=['2.5%'])
196 self.ser.plotlines(data=i95, names=['97.5%'],title='Week %s'%(w+1))
197 self.hst.plothist(data=array(prior['theta']),names=list(self.thetanames),title='Week %s'%(w+1))
198 self.ser.clearFig()
199 self.hst.clearFig()
200 self.hsp.clearFig()
201
203 """
204 Plot the final results of the inference
205 """
206 if not self.done_running:
207 return
208 pt,pp,series,predseries = self._read_results()
209 PM.plot_par_series(range(len(pt)),pt)
210 PM.plot_par_violin(range(len(pt)),pt)
211 P.figure()
212 PM.predNewCases(obs,predseries,nc,fig,names,ws)
213 plot_series2(range(nc*ws),obs,series,names=names)
214 plot_series2(range(nc*ws),obs,predseries,names=names,tit='Predicted vs. Observed series',lag=True)
215 P.show()
216
218 """
219 read results from disk
220 """
221 pt,pp,series,predseries = [],[],[],[]
222 for w in range(self.nw):
223 fn = 'wres_%s'%w
224 print fn
225 f = open(fn,'r')
226 a,b,c,obs,pred, samples = cPickle.load(f)
227 f.close()
228
229 pt.append(a)
230 pp.append(b)
231 series.append(c)
232 predseries.append(pred)
233 return pt,pp,series,predseries
234
235
237 """
238 Bayesian Melding class
239 """
240 - def __init__(self, K, L, model, ntheta, nphi, alpha = 0.5, verbose = False, viz=False ):
241 """
242 Initializes the Melding class.
243
244 :Parameters:
245 - `K`: Number of replicates of the model run. Also determines the prior sample size.
246 - `L`: Number of samples from the Posterior distributions. Usually 10% of K.
247 - `model`: Callable taking theta as argument and returning phi = M(theta).
248 - `ntheta`: Number of inputs to the model (parameters).
249 - `nphi`: Number of outputs of the model (State-variables)
250 - `verbose`: Boolean: whether to show more information about the computations
251 - `viz`: Boolean. Wether to show graphical outputs of the fitting process
252 """
253 self.K = K
254 self.L = L
255 self.verbose = verbose
256 self.model = model
257 self.likelist = []
258 self.q1theta = recarray(K,formats=['f8']*ntheta)
259 self.post_theta = recarray(L,formats=['f8']*ntheta)
260 self.q2phi = recarray(K,formats=['f8']*nphi)
261 self.phi = recarray(K,formats=['f8']*nphi)
262 self.q2type = []
263 self.post_phi = recarray(L,formats=['f8']*nphi)
264 self.ntheta = ntheta
265 self.nphi = nphi
266 self.thetapars = []
267 self.phipars = []
268 self.alpha = alpha
269 self.done_running = False
270 self.theta_dists = {}
271 self.phi_dists = {}
272 self.proposal_variance = 0.0000001
273 self.adaptscalefactor = 1
274 self.salt_band = 0.1
275 if Viz:
276 self.viz = viz
277 else:
278 self.viz = False
279
280
281 - def setPhi(self, names, dists=[stats.norm], pars=[(0, 1)], limits=[(-5,5)]):
282 """
283 Setup the models Outputs, or Phi, and generate the samples from prior distributions
284 needed for the melding replicates.
285
286 :Parameters:
287 - `names`: list of string with the names of the variables.
288 - `dists`: is a list of RNG from scipy.stats
289 - `pars`: is a list of tuples of variables for each prior distribution, respectively.
290 - `limits`: lower and upper limits on the support of variables.
291 """
292 if len(names) != self.nphi:
293 raise ValueError("Number of names(%s) does not match the number of output variables(%s)."%(len(names),self.nphi))
294 self.q2phi.dtype.names = names
295 self.phi.dtype.names = names
296 self.post_phi.dtype.names = names
297 self.plimits = limits
298 self.phipars = pars
299 for n,d,p in zip(names,dists,pars):
300 self.q2phi[n] = lhs.lhs(d,p,self.K).ravel()
301 self.q2type.append(d.name)
302 self.phi_dists[n]=d(*p)
303
304
305
306 - def setTheta(self, names, dists=[stats.norm], pars=[(0, 1)]):
307 """
308 Setup the models inputs and generate the samples from prior distributions
309 needed for the dists the melding replicates.
310
311 :Parameters:
312 - `names`: list of string with the names of the parameters.
313 - `dists`: is a list of RNG from scipy.stats
314 - `pars`: is a list of tuples of parameters for each prior distribution, respectivelydists
315 """
316 self.q1theta.dtype.names = names
317 self.post_theta.dtype.names = names
318 self.thetapars = pars
319 if os.path.exists('q1theta'):
320 self.q1theta = CP.load(open('q1theta','r'))
321 else:
322 for n,d,p in zip(names,dists,pars):
323 self.q1theta[n] = lhs.lhs(d,p,self.K).ravel()
324 self.theta_dists[n]=d(*p).rvs
325
327 """
328 Adds a few extra uniformly distributed data
329 points beyond the dataset range.
330 This is done by adding from a uniform dist.
331
332 :Parameters:
333 - `dataset`: vector of data
334 - `band`: Fraction of range to extend [0,1[
335
336 :Returns:
337 Salted dataset.
338 """
339 dmax = max(dataset)
340 dmin = min(dataset)
341 drange = dmax-dmin
342 hb = drange*band/2.
343 d = numpy.concatenate((dataset,stats.uniform(dmin-hb,dmax-dmin+hb).rvs(self.K*.05)))
344 return d
345
347 """
348 Setup the model inputs and set the prior distributions from the vectors
349 in data.
350 This method is to be used when the prior distributions are available in
351 the form of a sample from an empirical distribution such as a bayesian
352 posterior.
353 In order to expand the samples provided, K samples are generated from a
354 kernel density estimate of the original sample.
355
356 :Parameters:
357 - `names`: list of string with the names of the parameters.
358 - `data`: list of vectors. Samples of a proposed distribution
359 - `limits`: List of (min,max) tuples for each theta to make sure samples are not generated outside these limits.
360 """
361 self.q1theta.dtype.names = names
362 self.post_theta.dtype.names = names
363 tlimits = dict(zip(names,limits))
364 class Proposal:
365 def __init__(self,name, dist):
366 self.name = name
367 self.dist = dist
368 def __call__(self):
369 smp = -numpy.inf
370 while not (smp>=tlimits[self.name][0] and smp<=tlimits[self.name][1]):
371 smp = self.dist.resample(1)[0][0]
372 return smp
373
374 if os.path.exists('q1theta'):
375 self.q1theta = CP.load(open('q1theta','r'))
376 else:
377 for n,d,lim in zip(names,data,limits):
378 smp = []
379
380
381 salted = self.add_salt(d,self.salt_band)
382
383 dist = gaussian_kde(salted)
384 while len(smp)<self.K:
385 smp += [x for x in dist.resample(self.K)[0] if x >= lim[0] and x <= lim[1]]
386
387 self.q1theta[n] = array(smp[:self.K])
388 self.theta_dists[n] = Proposal(copy.deepcopy(n),copy.deepcopy(dist))
389
391 """
392 Setup the model outputs and set their prior distributions from the
393 vectors in data.
394 This method is to be used when the prior distributions are available in
395 the form of a sample from an empirical distribution such as a bayesian
396 posterior.
397 In order to expand the samples provided, K samples are generated from a
398 kernel density estimate of the original sample.
399
400 :Parameters:
401 - `names`: list of string with the names of the variables.
402 - `data`: list of vectors. Samples of the proposed distribution.
403 - `limits`: list of tuples (ll,ul),lower and upper limits on the support of variables.
404 """
405 self.q2phi.dtype.names = names
406 self.phi.dtype.names = names
407 self.post_phi.dtype.names = names
408 self.limits = limits
409 for n,d in zip(names,data):
410 i = 0
411 smp = []
412 while len(smp)<self.K:
413 try:
414 smp += [x for x in gaussian_kde(d).resample(self.K)[0] if x >= limits[i][0] and x <= limits[i][1]]
415 except:
416
417
418
419 smp = ones(self.K)*d[0]
420 self.q2phi[n] = array(smp[:self.K])
421 self.q2type.append('empirical')
422 i += 1
423
424
425
426 - def run(self,*args):
427 """
428 Runs the model through the Melding inference.model
429 model is a callable which return the output of the deterministic model,
430 i.e. the model itself.
431 The model is run self.K times to obtain phi = M(theta).
432 """
433
434 for i in xrange(self.K):
435 theta = [self.q1theta[n][i] for n in self.q1theta.dtype.names]
436 r = self.po.apply_async(self.model, theta)
437 self.phi[i]= r.get()[-1]
438
439 self.done_running = True
440
441 - def getPosteriors(self,t):
442 """
443 Updates the posteriors of the model's output for the last t time steps.
444 Returns two record arrays:
445 - The posteriors of the Theta
446 - the posterior of Phi last t values of time-series. self.L by `t` arrays.
447
448 :Parameters:
449 - `t`: length of the posterior time-series to return.
450 """
451 if not self.done_running:
452 return
453 if t > 1:
454 self.post_phi = recarray((self.L,t),formats=['f8']*self.nphi)
455 self.post_phi.dtype.names = self.phi.dtype.names
456 def cb(r):
457 '''
458 callback function for the asynchronous model runs.
459 r: tuple with results of simulation (results, run#)
460 '''
461 if t == 1:
462 self.post_phi[r[1]] = (r[0][-1],)
463
464 else:
465 self.post_phi[r[1]]= [tuple(l) for l in r[0][-t:]]
466 po = Pool()
467
468 pti = lhs.lhs(stats.randint,(0,self.L),siz=(self.ntheta,self.L))
469 for i in xrange(self.L):
470 theta = [self.post_theta[n][pti[j,i]] for j,n in enumerate(self.post_theta.dtype.names)]
471 po.apply_async(enumRun, (self.model,theta,i), callback=cb)
472
473
474
475
476
477 if i%100 == 0 and self.verbose:
478 print "==> L = %s\r"%i
479
480 po.close()
481 po.join()
482 return self.post_theta, self.post_phi
483
484 - def filtM(self,cond,x,limits):
485 '''
486 Multiple condition filtering.
487 Remove values in x[i], if corresponding values in
488 cond[i] are less than limits[i][0] or greater than
489 limits[i][1].
490
491 :Parameters:
492 - `cond`: is an array of conditions.
493 - `limits`: is a list of tuples (ll,ul) with length equal to number of lines in `cond` and `x`.
494 - `x`: array to be filtered.
495 '''
496
497 names = []
498 if isinstance(cond, recarray):
499 names = list(cond.dtype.names)
500 cond = [cond[v] for v in cond.dtype.names]
501 x = [x[v] for v in x.dtype.names]
502
503 cond = array(cond)
504 cnd = ones(cond.shape[1],int)
505 for i,j in zip(cond,limits):
506 ll = j[0]
507 ul = j[1]
508
509 cnd = cnd & less(i,ul) & greater(i,ll)
510 f = compress(cnd,x, axis=1)
511
512 if names:
513 r = recarray((1,f.shape[1]),formats=['f8']*len(names),names=names)
514 for i,n in enumerate(names):
515 r[n]=f[i]
516 f=r
517
518 return f
519
521 '''
522 Calculates a basic fitness calculation between a model-
523 generated time series and a observed time series.
524 it uses a normalized RMS variation.
525
526 :Parameters:
527 - `s1`: model-generated time series. record array.
528 - `s2`: observed time series. dictionary with keys matching names of s1
529
530 :Return:
531 Root mean square deviation between ´s1´ and ´s2´.
532 '''
533 fit = []
534 for k in s2.keys():
535 if s2[k] == [] or (not s2[k].any()):
536 continue
537 e = numpy.sqrt(mean((s1[k]-s2[k])**2.))
538 fit.append(e)
539
540 return mean(fit)
541
542
544 """
545 Returns the probability associated with each phi[i]
546 on the pooled pdf of phi and q2phi.
547
548 :Parameters:
549 - `phi`: prior of Phi induced by the model and q1theta.
550 """
551
552
553 phidens = gaussian_kde(array([phi[n][:,-1] for n in phi.dtype.names]))
554
555 q2dens = gaussian_kde(array([self.q2phi[n] for n in self.q2phi.dtype.names]))
556
557
558 lastp = array([list(phi[i,-1]) for i in xrange(self.K)])
559
560 qtilphi = (phidens.evaluate(lastp.T)**(1-self.alpha))*q2dens.evaluate(lastp.T)**self.alpha
561 return qtilphi/sum(qtilphi)
562
563 - def abcRun(self,fitfun=None, data={}, t=1,nopool=False,savetemp=False):
564 """
565 Runs the model for inference through Approximate Bayes Computation
566 techniques. This method should be used as an alternative to the sir.
567
568 :Parameters:
569 - `fitfun`: Callable which will return the goodness of fit of the model to data as a number between 0-1, with 1 meaning perfect fit
570 - `t`: number of time steps to retain at the end of the of the model run for fitting purposes.
571 - `data`: dict containing observed time series (lists of length t) of the state variables. This dict must have as many items the number of state variables, with labels matching variables names. Unorbserved variables must have an empty list as value.
572 - `savetemp`: Should temp results be saved. Useful for long runs. Alows for resuming the simulation from last sa
573 """
574 seed()
575 if not fitfun:
576 fitfun = self.basicfit
577 if savetemp:
578 CP.dump(self.q1theta,open('q1theta','w'))
579
580 phi = self.runModel(savetemp,t)
581
582 print "==> Done Running the K replicates\n"
583
584 if nopool:
585 qtilphi = ones(self.K)
586 else:
587 t0 = time()
588 qtilphi = self.logPooling(phi)
589 print "==> Done Running the Log Pooling (took %s seconds)\n"%(time()-t0)
590 qtilphi = nan_to_num(qtilphi)
591
592 if sum(qtilphi)==0:
593 print 'Pooled prior on ouputs is null, please check your priors, and try again.'
594 return 0
595
596
597 w = [fitfun(phi[i],data) for i in xrange(phi.shape[0])]
598 w /=sum(w)
599 w = 1-w
600
601
602
603
604 w = nan_to_num(w)
605 w = array(w)*qtilphi
606 w /=sum(w)
607 w = nan_to_num(w)
608 print 'max(w): %s\nmean(w): %s\nvar(w): %s'%(max(w), mean(w), var(w))
609 if sum(w) == 0.0:
610 print 'Resampling weights are all zero, please check your model or data.'
611 return 0
612 t0 = time()
613 j = 0
614 while j < self.L:
615 i=randint(0,w.size)
616 if random()<= w[i]:
617 self.post_theta[j] = self.q1theta[i]
618 j+=1
619 print "==> Done Resampling (L=%s) priors (took %s seconds)"%(self.L,(time()-t0))
620
621 self.done_running = True
622 return 1
623
625 """
626 Importance sampling
627
628 :Returns:
629 returns a sample of size n
630 """
631
632 print "Starting importance Sampling"
633 w /=sum(w)
634 w = nan_to_num(w)
635 j=0
636 k=0
637 smp = copy.deepcopy(data[:n])
638 while j < n:
639 i = randint(0,w.size)
640 if random() <= w[i]:
641 smp[j] = data[j]
642 j += 1
643
644 k+=1
645 print "Done imp samp."
646 return smp
647
648 - def mcmc_run(self, data, t=1, likvariance=10,burnin=100, nopool=False, method="MH" ):
649 """
650 MCMC based fitting
651
652 :Parameters:
653 - `data`: observed time series on the model's output
654 - `t`: length of the observed time series
655 - `likvariance`: variance of the Normal likelihood function
656 - `nopool`: True if no priors on the outputs are available. Leads to faster calculations
657 - `method`: Step method. defaults to Metropolis hastings
658 """
659 self.phi = recarray((self.K,t),formats=['f8']*self.nphi, names = self.phi.dtype.names)
660 if method == "MH":
661 self.mh(self.K,t,data,like.Normal,likvariance,burnin)
662 else:
663 sys.exit("Invalid MCMC method!\nTry 'MH'.")
664 self.done_running = 1
665 return 1
666
667
668 - def mh(self, n,t, data, likfun,variance, burnin):
669 """
670 Simple Metropolis hastings.
671 Assumes symmetric proposal distributions.
672
673 :Parameters:
674 - `n`: number of samples to obtain
675 - `t`: Length of time-series
676 - `data`: Observation dictionary {'variable':[time-series]}
677 - `likfun`: likelihood function to use
678 - `variance`: variance of the likelihood function
679 - `burnin`: number of samples to discard at the beginning of the chain.
680 """
681 def tune(ar):
682 if ar<0.05:
683 self.adaptscalefactor *= 0.5
684 elif ar <0.2:
685 self.adaptscalefactor *= 0.9
686 elif ar >0.9:
687 self.adaptscalefactor *= 10
688 elif ar >0.75:
689 self.adaptscalefactor *= 2
690 elif ar >0.5:
691 self.adaptscalefactor *= 1.1
692 ptheta = recarray((self.K),formats=['f8']*self.ntheta, names = self.post_theta.dtype.names)
693 i=0;j=0;rej=0
694 last_lik = None
695 liklist = []
696 while j < n:
697
698 pvar = self.proposal_variance*self.adaptscalefactor
699 theta = [self.theta_dists[dist]() for dist in self.q1theta.dtype.names]
700 prop = self.model(*theta)
701 if t == 1:
702 prop=[(v,) for v in prop]
703
704 lik=0
705 for k in xrange(self.nphi):
706 if self.q2phi.dtype.names[k] not in data:
707 continue
708 obs = data[self.q2phi.dtype.names[k]]
709 for p in xrange(t):
710 lik += likfun(obs[p],prop[p][k],1./variance)
711
712 if last_lik == None:
713 last_lik = lik
714 continue
715
716 if not (lik-last_lik) > numpy.log(1):
717 if not numpy.log(random())< lik-last_lik:
718 rej +=1
719 i +=1
720
721 continue
722 if i >= burnin:
723 self.phi[j] = prop[0] if t==1 else [tuple(point) for point in prop]
724 ptheta[j] = tuple(theta)
725 liklist.append(lik)
726 j += 1
727 last_lik = lik
728 i+=1
729 if i%n ==0:
730 print "==>",n,"\r"
731
732 self.post_theta = self.imp_sample(self.L,ptheta,liklist)
733 ar = (i-rej)/float(i)
734 if ar > 0.9:
735 if self.salt_band < 0:
736 self.salt_band = 0.05
737 elif self.salt_band < 30:
738 self.salt_band *= 10
739 elif ar > 0.7:
740 self.salt_band *= 2
741 elif ar < 0.4:
742 self.salt_band = 0.001
743 print "Total steps(i): ",i,"rej:",rej, "j:",j
744 print ">>> Salt-Band: %s"%self.salt_band
745 print ">>> Acceptance rate: %s"%ar
746
747 return 1
748
749
750
751
752 - def sir(self, data={}, t=1,variance=0.1, nopool=False,savetemp=False):
753 """
754 Run the model output through the Sampling-Importance-Resampling algorithm.
755 Returns 1 if successful or 0 if not.
756
757 :Parameters:
758 - `data`: observed time series on the model's output
759 - `t`: length of the observed time series
760 - `variance`: variance of the Normal likelihood function
761 - `nopool`: True if no priors on the outputs are available. Leads to faster calculations
762 - `savetemp`: Boolean. create a temp file?
763 """
764 seed()
765 phi = self.runModel(savetemp,t)
766
767 if nopool:
768 qtilphi = ones(self.K)
769 else:
770 t0 = time()
771 qtilphi = self.logPooling(phi)
772 print "==> Done Running the Log Pooling (took %s seconds)\n"%(time()-t0)
773 qtilphi = nan_to_num(qtilphi)
774 print 'max(qtilphi): ', max(qtilphi)
775 if sum(qtilphi)==0:
776 print 'Pooled prior on ouputs is null, please check your priors, and try again.'
777 return 0
778
779
780 lik = zeros(self.K)
781 t0=time()
782
783 for i in xrange(self.K):
784 l=1
785 for n in data.keys():
786 if isinstance(data[n],list) and data[n] == []:
787 continue
788 elif isinstance(data[n],numpy.ndarray) and (not data[n].any()):
789 continue
790 p = phi[n]
791
792
793 l *= product([exp(like.Normal(data[n][m], j,1./(variance))) for m,j in enumerate(p[i])])
794
795
796 lik[i]=l
797
798
799 if self.viz:
800 dtplot.clearFig();phiplot.clearFig();thplot.clearFig()
801 dtplot.gp.xlabel('observed')
802 dtplot.gp.ylabel('simulated')
803 obs = [];sim =[]
804 for n in data.keys():
805 obs.append(data[n])
806 sim.append(phi[n].mean(axis=0).tolist())
807 dtplot.scatter(array(obs),array(sim),names=data.keys(),title='fit')
808 phiplot.plotlines(array(sim),names=data.keys(),title='Model Output')
809 thplot.plothist(self.q1theta, title='Input parameters',names=self.q1theta.dtype.names)
810 print "==> Done Calculating Likelihoods (took %s seconds)"%(time()-t0)
811 lr = nan_to_num(max(lik)/min(lik))
812 print '==> Likelihood (min,mean,max,sum): ',min(lik),mean(lik),max(lik), sum(lik)
813 print "==> Likelihood ratio of best run/worst run: %s"%(lr,)
814
815 w = nan_to_num(qtilphi*lik)
816 w = nan_to_num(w/sum(w))
817
818 if not sum(w) == 0.0:
819 j = 0
820 t0 = time()
821 maxw = 0;minw = max(w)
822 while j < self.L:
823 i=randint(0,w.size)
824 if random()*max(w)<= w[i]:
825 self.post_theta[j] = self.q1theta[i]
826 maxw = max(maxw,w[i])
827 minw = min(minw,w[i])
828 j+=1
829 if not j%100 and self.verbose:
830 print j, "of %s"%self.L
831 self.done_running = True
832 print "==> Done Resampling (L=%s) priors (took %s seconds)"%(self.L,(time()-t0))
833 wr = maxw/minw
834 print "==> Likelihood ratio of best/worst retained runs: %s"%(wr,)
835 if wr == 1:
836 print "==> Flat likelihood, trying again..."
837 return 0
838 print "==> Improvement: %s percent"%(100-100*wr/lr,)
839 else:
840 print 'Resampling weights are all zero, please check your model or data, and try again.\n'
841 print '==> Likelihood (min,mean,max): ',min(lik),mean(lik),max(lik)
842 print '==> RMS deviation of outputs: %s'%(self.basicfit(phi, data),)
843 return 0
844 return 1
845
846 - def runModel(self,savetemp,t=1, k=None):
847 '''
848 Handles running the model k times keeping a temporary savefile for
849 resuming calculation in case of interruption.
850
851 :Parameters:
852 - `savetemp`: Boolean. create a temp file?
853 '''
854 if savetemp:
855 CP.dump(self.q1theta,open('q1theta','w'))
856 if not k:
857 k = self.K
858
859
860
861 if os.path.exists('phi.temp'):
862 self.phi,j = CP.load(open('phi.temp','r'))
863 else:
864 j=0
865 self.phi = recarray((k,t),formats=['f8']*self.nphi, names = self.phi.dtype.names)
866 def cb(r):
867 '''
868 callback function for the asynchronous model runs
869 '''
870 if t == 1:
871 self.phi[r[1]] = (r[0][-1],)
872 else:
873 self.phi[r[1]] = [tuple(l) for l in r[0][-t:]]
874
875 po = Pool()
876 t0=time()
877 for i in xrange(j,k):
878 theta = [self.q1theta[n][i] for n in self.q1theta.dtype.names]
879 r = po.apply_async(enumRun,(self.model,theta,i),callback=cb)
880
881
882
883
884
885 if i%100 == 0 and self.verbose:
886 print "==> K = %s"%i
887 if savetemp:
888 CP.dump((self.phi,i),open('phi.temp','w'))
889 if savetemp:
890 os.unlink('phi.temp')
891 os.unlink('q1theta')
892 po.close()
893 po.join()
894 print "==> Done Running the K (%s) replicates (took %s seconds)\n"%(k,(time()-t0))
895
896 return self.phi
897
898
900 """
901 Returns model results plus run number.
902
903 :Parameters:
904 - `model`: model callable
905 - `theta`: model input list
906 - `k`: run number
907
908 :Return:
909 - res: result list
910 - `k`: run number
911 """
912 res =model(*theta)
913 return (res,k)
914
915
916
918 """
919 Model (r,p0, n=1)
920 Simulates the Population dynamic Model (PDM) Pt = rP0
921 for n time steps.
922 P0 is the initial population size.
923 Example model for testing purposes.
924 """
925
926 Pt = zeros(n, float)
927 P = p0
928 for i in xrange(n):
929 Pt[i] = r*P
930 P = Pt[i]
931
932 return Pt
933
934
936 '''
937 Plots a record array
938 as a panel of histograms
939 '''
940 nv = len(arr.dtype.names)
941 fs = (numpy.ceil(numpy.sqrt(nv)),numpy.floor(numpy.sqrt(nv))+1)
942 P.figure()
943 for i,n in enumerate(arr.dtype.names):
944 P.subplot(floor(sqrt(nv)),floor(sqrt(nv))+1,i+1)
945 P.hist(arr[n],bins=50, normed=1, label=n)
946 P.legend()
947
948
950 start = time()
951
952 Me.setTheta(['r','p0'],[stats.uniform,stats.uniform],[(2,4),(0,5)])
953 Me.setPhi(['p'],[stats.uniform],[(6,9)],[(6,9)])
954
955
956 Me.sir(data ={'p':[7.5]} )
957 pt,pp = Me.getPosteriors(1)
958 end = time()
959 print end-start, ' seconds'
960 plotRaHist(pt)
961 plotRaHist(pp)
962 P.show()
963
964
966 start = time()
967
968 Me.setTheta(['r','p0'],[stats.uniform,stats.uniform],[(2,4),(0,5)])
969 Me.setPhi(['p'],[stats.uniform],[(6,9)],[(6,9)])
970
971
972 Me.mcmc_run(data ={'p':[7.5]},burnin=1000)
973 pt,pp = Me.getPosteriors(1)
974 end = time()
975 print end-start, ' seconds'
976 plotRaHist(pt)
977 plotRaHist(pp)
978 P.show()
979
980
981 if __name__ == '__main__':
982 Me = Meld(K=5000,L=1000,model=model, ntheta=2,nphi=1,verbose=False,viz=False)
983 mh_test()
984
985