Package BIP :: Package Bayes :: Package general :: Module bvariables
[hide private]
[frames] | no frames]

Source Code for Module BIP.Bayes.general.bvariables

  1  """ 
  2  This module implements classes to represent an arbitrary Bayesian random variable. 
  3   
  4  This is experimental code! do not use for serious applications! 
  5   
  6  """ 
  7  # copyright 2007 Flavio Codeco Coelho 
  8  # Licensed under GPL v3 
  9  from BIP.Bayes import like 
 10  from numpy import arange 
 11  from numpy import array 
 12  from numpy import compress 
 13  from numpy import exp 
 14  from numpy import greater 
 15  from numpy import less 
 16  from numpy import ones 
 17  from numpy import searchsorted 
 18  from numpy import sqrt 
 19  import pylab as P 
 20  from scipy import stats 
 21  import sys 
 22   
 23  from BIP.Viz.asciihist import Histogram 
 24   
 25  __docformat__ = "restructuredtext en" 
 26  ## Conjugate prior list: distribution types which have supported conjugate prior  
 27  discrete_conjugate_priors = { 
 28      'Bernoulli':'Beta', 
 29      'Binomial':'Beta', 
 30      'Negative Binomial':'Beta', 
 31      'Poisson':'Gamma', 
 32      'Multinomial':'Dirichlet', 
 33      'Geometric':'Beta' 
 34      } 
 35  continuous_conjugate_priors = { 
 36      'Uniform':'Pareto', 
 37      'Exponential':'Gamma', 
 38      'Normal':'Normal', 
 39      'Pareto':'Gamma', 
 40      'Gamma':'Gamma', 
 41      'Inverse Gamma':'Gamma' 
 42      } 
 43   
 44   
45 -def BayesVar(priortype, pars, range, resolution=1024):
46 """ 47 Factory function for continuous and discrete variables 48 """ 49 if isinstance(priortype, stats.rv_continuous): 50 return __BayesC(priortype, pars, range, resolution) 51 if isinstance(disttype, stats.rv_discrete): 52 return __BayesD(priortype, pars, range, resolution)
53 54
55 -class _BayesVar(object):
56 """ 57 Bayesian random variate. 58 """
59 - def __init__(self, disttype, pars, rang, resolution=1024):
60 ''' 61 Initializes random variable. 62 63 :parameters: 64 - `disttype`: must be a valid RNG class from scipy.stats 65 - `pars`: are the parameters of the distribution. 66 - `rang`: range of the variable support. 67 - `resolution`: resolution of the support. 68 ''' 69 70 self.distn = disttype.name 71 self._flavorize(disttype(*pars), disttype) 72 self.pars = pars 73 self.rang = rang 74 self.res = (rang[1]-rang[0]) * 1. / resolution 75 self.likelihood = None 76 self.data = None 77 self.posterior = array([])
78
79 - def __str__(self):
80 ''' 81 :Return: 82 ascii histogram of the variable 83 ''' 84 if self.posterior.any(): 85 d = self.posterior 86 else: 87 d = self.get_posterior_sample(200000) 88 name = self.distn + self.pars.__str__() 89 h = Histogram(d, bins=10) 90 return name + '\n' + h.vertical()
91
92 - def _flavorize(self, pt, ptbase):
93 ''' 94 Add methods from distribution type 95 ''' 96 self.cdf = pt.cdf 97 self.isf = pt.isf 98 if isinstance(ptbase, stats.rv_continuous): 99 self.pdf = pt.pdf 100 elif isinstance(ptbase, stats.rv_discrete): 101 self.pdf = pt.pmf 102 else: sys.exit('Invalid distribution object') 103 self.ppf = pt.ppf 104 self.rvs = pt.rvs
105 - def _update(self, model):
106 """ 107 Calculate likelihood function 108 """ 109 if self.data != None: 110 d = self.data 111 sc = self.pars[1] 112 m = self.rang[0] 113 M = self.rang[1] 114 step = self.res 115 likefun = self._likelihood(model)#returns log-likelihood function 116 lik = exp(array([likefun((d, i, d.var())) for i in arange(m, M, step)])) 117 self.likelihood = lik / sum(lik)
118
119 - def add_data(self, data, model):
120 """ 121 Updates variable with information from dataset 122 123 :Parameters: 124 - `data`: sequence of numbers 125 - `model`: probabilistic model underlying data 126 """ 127 self.data = array(data) 128 self._update(model.dist.name)
129
130 - def get_prior_sample(self, n):
131 ''' 132 Returns a sample from the prior distribution 133 134 :Parameters: 135 - `n`: Sample size. 136 ''' 137 return self.rvs(size=n)
138
139 - def get_prior_dist(self):
140 """ 141 Returns the prior PDF. 142 """ 143 return self.pdf(arange(self.rang[0], self.rang[1], self.res))
144
145 - def get_posterior_sample(self, n):
146 """ 147 Return a sample of the posterior distribution. 148 Uses SIR algorithm. 149 150 :Parameters: 151 - `n`: Sample size. 152 """ 153 if self.posterior.any():# Use last posterior as prior 154 k = stats.kde.gaussian_kde(self.posterior) 155 s = k.resample(n) 156 else: 157 s = self.get_prior_sample(n) 158 if self.data != None: 159 m = self.rang[0] 160 M = self.rang[1] 161 step = self.res 162 supp = arange(m, M, step)#support 163 s = compress(less(s.ravel(), M) & greater(s.ravel(), m), s)#removing out-of-range samples 164 d = stats.uniform.rvs(loc=0, scale=1, size=len(s))#Uniform 0-1 samples 165 w = self.pdf(supp) * self.likelihood 166 w = w / sum(w) #normalizing weights 167 sx = searchsorted(supp, s) 168 w = w[sx-1]#search sorted returns 1-based binlist 169 post = compress(d < w, s) 170 self.posterior = post 171 return post 172 else: 173 return array([])
174
175 - def _likelihood(self, dname):
176 ''' 177 Defines parametric family of the likelihood function. 178 Returns likelihood function. 179 180 :Parameters: 181 - `dname`: must be a string. 182 :Return: 183 lambda function to calculate the likelihood. 184 ''' 185 like_funs = { 186 'norm': lambda(x):like.Normal(x[0], x[1], 1. / x[2]), 187 'expon': lambda(x):(1. / x[2]) ** x[0].size * exp(-(1. / x[2]) * sum(x[0])), 188 'beta': lambda(x):like.Beta(x[0], x[1], x[2]), 189 'uniform': lambda(x): like.Uniform(x[0], x[1]-2 * sqrt(x[2]), x[1] + 2 * sqrt(x[2])) 190 } 191 return like_funs[dname]
192 #TODO: expand for more distribution types 193 194
195 - def _post_from_conjugate(self, dname, * pars):
196 ''' 197 Returns posterior distribution function using conjugate prior theory 198 ''' 199 if not self.data: 200 return 201 if dname == 'bernoulli': 202 pdist = stats.beta(pars[0])
203 # TODO: finish this 204
205 -class __BayesC(_BayesVar, stats.rv_continuous):
206 - def __init__(self, priortype, pars, range, resolution=512):
207 _BayesVar.__init__(self, priortype, pars, range, resolution)
208
209 -class __BayesD(_BayesVar, stats.rv_discrete):
210 - def __init__(self, priortype, pars, range, resolution=512):
211 _BayesVar.__init__(self, priortype, pars, range, resolution)
212 213 if __name__ == "__main__": 214 #bv = BayesVar(stats.norm,(3,1),range=(0,5)) 215 bv = BayesVar(stats.norm, (3, 1), range=(0, 5), resolution=1000) 216 data = stats.uniform(1, 3).rvs(500) 217 bv.add_data(data, stats.uniform(1, 3)) 218 print bv 219 p = bv.get_posterior_sample(200000) 220 print bv 221 P.plot(arange(bv.rang[0], bv.rang[1], bv.res), bv.likelihood / max(bv.likelihood), 'ro', lw=2) 222 P.plot(arange(bv.rang[0], bv.rang[1], bv.res), bv.get_prior_dist(), 'g+', lw=2) 223 print p 224 P.hist(p, normed=1) 225 P.legend(['Likelihood', 'Prior', 'Posterior']) 226 P.title('Bayesian inference') 227 P.savefig('bayesvar.png', dpi=400) 228 P.show() 229