Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3Created on Sat Oct 01 20:20:16 2011 

4 

5Author: Josef Perktold 

6License: BSD-3 

7 

8TODO: 

9check orientation, size and alpha should be increasing for interp1d, 

10but what is alpha? can be either sf or cdf probability 

11change it to use one consistent notation 

12 

13check: instead of bound checking I could use the fill-value of the 

14interpolators 

15""" 

16import numpy as np 

17from scipy.interpolate import interp1d, interp2d, Rbf 

18 

19from statsmodels.tools.decorators import cache_readonly 

20 

21 

22class TableDist(object): 

23 """ 

24 Distribution, critical values and p-values from tables 

25 

26 currently only 1 extra parameter, e.g. sample size 

27 

28 Parameters 

29 ---------- 

30 alpha : array_like, 1d 

31 probabiliy in the table, could be either sf (right tail) or cdf (left 

32 tail) 

33 size : array_like, 1d 

34 The sample sizes for the table 

35 crit_table : array_like, 2d 

36 The sample sizes in the table 

37 array with critical values for sample size in rows and probability in 

38 columns 

39 asymptotic : callable, optional 

40 Callable function with the form fn(nobs) that returns len(alpha) 

41 critical values where the critical value in position i corresponds to 

42 alpha[i] 

43 min_nobs : int, optional 

44 Minimum number of observations to use the asymptotic distribution. If 

45 not provided, uses max(size). 

46 max_nobs : int, optional 

47 Maximum number of observations to use the tabular distribution. If not 

48 provided, uses max(size) 

49 

50 Notes 

51 ----- 

52 size and alpha must be sorted and increasing. 

53 

54 If both min_nobs and max_nobs are provided, then 

55 the critical values from the tabular distribution and the asymptotic 

56 distribution are linearly blended using the formula 

57 :math:`w cv_a + (1-w) cv_t` where the weight is 

58 :math:`w = (n - a_{min}) / (a_{max} - a_{min})`. This ensures the 

59 transition between the tabular and the asymptotic critical values is 

60 continuous. If these are not provided, then the asymptotic critical value 

61 is used for nobs > max(size). 

62 """ 

63 

64 def __init__(self, alpha, size, crit_table, asymptotic=None, 

65 min_nobs=None, max_nobs=None): 

66 self.alpha = np.asarray(alpha) 

67 if self.alpha.ndim != 1: 

68 raise ValueError('alpha is not 1d') 

69 elif (np.diff(self.alpha) <= 0).any(): 

70 raise ValueError('alpha is not sorted') 

71 self.size = np.asarray(size) 

72 if self.size.ndim != 1: 

73 raise ValueError('size is not 1d') 

74 elif (np.diff(self.size) <= 0).any(): 

75 raise ValueError('size is not sorted') 

76 if self.size.ndim == 1: 

77 if (np.diff(alpha) <= 0).any(): 

78 raise ValueError('alpha is not sorted') 

79 self.crit_table = np.asarray(crit_table) 

80 if self.crit_table.shape != (self.size.shape[0], self.alpha.shape[0]): 

81 raise ValueError('crit_table must have shape' 

82 '(len(size), len(alpha))') 

83 

84 self.n_alpha = len(alpha) 

85 self.signcrit = np.sign(np.diff(self.crit_table, 1).mean()) 

86 if self.signcrit > 0: # increasing 

87 self.critv_bounds = self.crit_table[:, [0, 1]] 

88 else: 

89 self.critv_bounds = self.crit_table[:, [1, 0]] 

90 self.asymptotic = None 

91 max_size = self.max_size = max(size) 

92 

93 if asymptotic is not None: 

94 try: 

95 cv = asymptotic(self.max_size + 1) 

96 except Exception as exc: 

97 raise type(exc)('Calling asymptotic(self.size+1) failed. The ' 

98 'error message was:' 

99 '\n\n{err_msg}'.format(err_msg=exc.args[0])) 

100 if len(cv) != len(alpha): 

101 raise ValueError('asymptotic does not return len(alpha) ' 

102 'values') 

103 self.asymptotic = asymptotic 

104 

105 self.min_nobs = max_size if min_nobs is None else min_nobs 

106 self.max_nobs = max_size if max_nobs is None else max_nobs 

107 if self.min_nobs > max_size: 

108 raise ValueError('min_nobs > max(size)') 

109 if self.max_nobs > max_size: 

110 raise ValueError('max_nobs > max(size)') 

111 

112 @cache_readonly 

113 def polyn(self): 

114 polyn = [interp1d(self.size, self.crit_table[:, i]) 

115 for i in range(self.n_alpha)] 

116 return polyn 

117 

118 @cache_readonly 

119 def poly2d(self): 

120 # check for monotonicity ? 

121 # fix this, interp needs increasing 

122 poly2d = interp2d(self.size, self.alpha, self.crit_table) 

123 return poly2d 

124 

125 @cache_readonly 

126 def polyrbf(self): 

127 xs, xa = np.meshgrid(self.size.astype(float), self.alpha) 

128 polyrbf = Rbf(xs.ravel(), xa.ravel(), self.crit_table.T.ravel(), 

129 function='linear') 

130 return polyrbf 

131 

132 def _critvals(self, n): 

133 """ 

134 Rows of the table, linearly interpolated for given sample size 

135 

136 Parameters 

137 ---------- 

138 n : float 

139 sample size, second parameter of the table 

140 

141 Returns 

142 ------- 

143 critv : ndarray, 1d 

144 critical values (ppf) corresponding to a row of the table 

145 

146 Notes 

147 ----- 

148 This is used in two step interpolation, or if we want to know the 

149 critical values for all alphas for any sample size that we can obtain 

150 through interpolation 

151 """ 

152 if n > self.max_size: 

153 if self.asymptotic is not None: 

154 cv = self.asymptotic(n) 

155 else: 

156 raise ValueError('n is above max(size) and no asymptotic ' 

157 'distribtuion is provided') 

158 else: 

159 cv = ([p(n) for p in self.polyn]) 

160 if n > self.min_nobs: 

161 w = (n - self.min_nobs) / (self.max_nobs - self.min_nobs) 

162 w = min(1.0, w) 

163 a_cv = self.asymptotic(n) 

164 cv = w * a_cv + (1 - w) * cv 

165 

166 return cv 

167 

168 def prob(self, x, n): 

169 """ 

170 Find pvalues by interpolation, either cdf(x) 

171 

172 Returns extreme probabilities, 0.001 and 0.2, for out of range 

173 

174 Parameters 

175 ---------- 

176 x : array_like 

177 observed value, assumed to follow the distribution in the table 

178 n : float 

179 sample size, second parameter of the table 

180 

181 Returns 

182 ------- 

183 prob : array_like 

184 This is the probability for each value of x, the p-value in 

185 underlying distribution is for a statistical test. 

186 """ 

187 critv = self._critvals(n) 

188 alpha = self.alpha 

189 

190 if self.signcrit < 1: 

191 # reverse if critv is decreasing 

192 critv, alpha = critv[::-1], alpha[::-1] 

193 

194 # now critv is increasing 

195 if np.size(x) == 1: 

196 if x < critv[0]: 

197 return alpha[0] 

198 elif x > critv[-1]: 

199 return alpha[-1] 

200 return interp1d(critv, alpha)(x)[()] 

201 else: 

202 # vectorized 

203 cond_low = (x < critv[0]) 

204 cond_high = (x > critv[-1]) 

205 cond_interior = ~np.logical_or(cond_low, cond_high) 

206 

207 probs = np.nan * np.ones(x.shape) # mistake if nan left 

208 probs[cond_low] = alpha[0] 

209 probs[cond_low] = alpha[-1] 

210 probs[cond_interior] = interp1d(critv, alpha)(x[cond_interior]) 

211 

212 return probs 

213 

214 def crit(self, prob, n): 

215 """ 

216 Returns interpolated quantiles, similar to ppf or isf 

217 

218 use two sequential 1d interpolation, first by n then by prob 

219 

220 Parameters 

221 ---------- 

222 prob : array_like 

223 probabilities corresponding to the definition of table columns 

224 n : int or float 

225 sample size, second parameter of the table 

226 

227 Returns 

228 ------- 

229 ppf : array_like 

230 critical values with same shape as prob 

231 """ 

232 prob = np.asarray(prob) 

233 alpha = self.alpha 

234 critv = self._critvals(n) 

235 

236 # vectorized 

237 cond_ilow = (prob > alpha[0]) 

238 cond_ihigh = (prob < alpha[-1]) 

239 cond_interior = np.logical_or(cond_ilow, cond_ihigh) 

240 

241 # scalar 

242 if prob.size == 1: 

243 if cond_interior: 

244 return interp1d(alpha, critv)(prob) 

245 else: 

246 return np.nan 

247 

248 # vectorized 

249 quantile = np.nan * np.ones(prob.shape) # nans for outside 

250 quantile[cond_interior] = interp1d(alpha, critv)(prob[cond_interior]) 

251 return quantile 

252 

253 def crit3(self, prob, n): 

254 """ 

255 Returns interpolated quantiles, similar to ppf or isf 

256 

257 uses Rbf to interpolate critical values as function of `prob` and `n` 

258 

259 Parameters 

260 ---------- 

261 prob : array_like 

262 probabilities corresponding to the definition of table columns 

263 n : int or float 

264 sample size, second parameter of the table 

265 

266 Returns 

267 ------- 

268 ppf : array_like 

269 critical values with same shape as prob, returns nan for arguments 

270 that are outside of the table bounds 

271 """ 

272 prob = np.asarray(prob) 

273 alpha = self.alpha 

274 

275 # vectorized 

276 cond_ilow = (prob > alpha[0]) 

277 cond_ihigh = (prob < alpha[-1]) 

278 cond_interior = np.logical_or(cond_ilow, cond_ihigh) 

279 

280 # scalar 

281 if prob.size == 1: 

282 if cond_interior: 

283 return self.polyrbf(n, prob) 

284 else: 

285 return np.nan 

286 

287 # vectorized 

288 quantile = np.nan * np.ones(prob.shape) # nans for outside 

289 

290 quantile[cond_interior] = self.polyrbf(n, prob[cond_interior]) 

291 return quantile