Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/genmod/_prediction.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3Created on Fri Dec 19 11:29:18 2014
5Author: Josef Perktold
6License: BSD-3
8"""
10import numpy as np
11from scipy import stats
14# this is similar to ContrastResults after t_test, partially copied and adjusted
15class PredictionResults(object):
17 def __init__(self, predicted_mean, var_pred_mean, var_resid=None,
18 df=None, dist=None, row_labels=None, linpred=None, link=None):
19 # TODO: is var_resid used? drop from arguments?
20 self.predicted_mean = predicted_mean
21 self.var_pred_mean = var_pred_mean
22 self.df = df
23 self.var_resid = var_resid
24 self.row_labels = row_labels
25 self.linpred = linpred
26 self.link = link
28 if dist is None or dist == 'norm':
29 self.dist = stats.norm
30 self.dist_args = ()
31 elif dist == 't':
32 self.dist = stats.t
33 self.dist_args = (self.df,)
34 else:
35 self.dist = dist
36 self.dist_args = ()
38 @property
39 def se_obs(self):
40 raise NotImplementedError
41 return np.sqrt(self.var_pred_mean + self.var_resid)
43 @property
44 def se_mean(self):
45 return np.sqrt(self.var_pred_mean)
47 @property
48 def tvalues(self):
49 return self.predicted_mean / self.se_mean
51 def t_test(self, value=0, alternative='two-sided'):
52 '''z- or t-test for hypothesis that mean is equal to value
54 Parameters
55 ----------
56 value : array_like
57 value under the null hypothesis
58 alternative : str
59 'two-sided', 'larger', 'smaller'
61 Returns
62 -------
63 stat : ndarray
64 test statistic
65 pvalue : ndarray
66 p-value of the hypothesis test, the distribution is given by
67 the attribute of the instance, specified in `__init__`. Default
68 if not specified is the normal distribution.
70 '''
71 # assumes symmetric distribution
72 stat = (self.predicted_mean - value) / self.se_mean
74 if alternative in ['two-sided', '2-sided', '2s']:
75 pvalue = self.dist.sf(np.abs(stat), *self.dist_args)*2
76 elif alternative in ['larger', 'l']:
77 pvalue = self.dist.sf(stat, *self.dist_args)
78 elif alternative in ['smaller', 's']:
79 pvalue = self.dist.cdf(stat, *self.dist_args)
80 else:
81 raise ValueError('invalid alternative')
82 return stat, pvalue
84 def conf_int(self, method='endpoint', alpha=0.05, **kwds):
85 """
86 Returns the confidence interval of the value, `effect` of the
87 constraint.
89 This is currently only available for t and z tests.
91 Parameters
92 ----------
93 alpha : float, optional
94 The significance level for the confidence interval.
95 ie., The default `alpha` = .05 returns a 95% confidence interval.
97 kwds : extra keyword arguments
98 currently ignored, only for compatibility, consistent signature
100 Returns
101 -------
102 ci : ndarray, (k_constraints, 2)
103 The array has the lower and the upper limit of the confidence
104 interval in the columns.
105 """
106 tmp = np.linspace(0, 1, 6)
107 is_linear = (self.link.inverse(tmp) == tmp).all()
108 if method == 'endpoint' and not is_linear:
109 ci_linear = self.linpred.conf_int(alpha=alpha, obs=False)
110 ci = self.link.inverse(ci_linear)
111 elif method == 'delta' or is_linear:
112 se = self.se_mean
113 q = self.dist.ppf(1 - alpha / 2., *self.dist_args)
114 lower = self.predicted_mean - q * se
115 upper = self.predicted_mean + q * se
116 ci = np.column_stack((lower, upper))
117 # if we want to stack at a new last axis, for lower.ndim > 1
118 # np.concatenate((lower[..., None], upper[..., None]), axis=-1)
120 return ci
122 def summary_frame(self, what='all', alpha=0.05):
123 """Summary frame"""
124 # TODO: finish and cleanup
125 import pandas as pd
126 from collections import OrderedDict
127 #ci_obs = self.conf_int(alpha=alpha, obs=True) # need to split
128 ci_mean = self.conf_int(alpha=alpha)
129 to_include = OrderedDict()
130 to_include['mean'] = self.predicted_mean
131 to_include['mean_se'] = self.se_mean
132 to_include['mean_ci_lower'] = ci_mean[:, 0]
133 to_include['mean_ci_upper'] = ci_mean[:, 1]
135 self.table = to_include
136 #OrderedDict does not work to preserve sequence
137 # pandas dict does not handle 2d_array
138 #data = np.column_stack(list(to_include.values()))
139 #names = ....
140 res = pd.DataFrame(to_include, index=self.row_labels,
141 columns=to_include.keys())
142 return res
145def get_prediction_glm(self, exog=None, transform=True, weights=None,
146 row_labels=None, linpred=None, link=None,
147 pred_kwds=None):
148 """
149 compute prediction results
151 Parameters
152 ----------
153 exog : array_like, optional
154 The values for which you want to predict.
155 transform : bool, optional
156 If the model was fit via a formula, do you want to pass
157 exog through the formula. Default is True. E.g., if you fit
158 a model y ~ log(x1) + log(x2), and transform is True, then
159 you can pass a data structure that contains x1 and x2 in
160 their original form. Otherwise, you'd need to log the data
161 first.
162 weights : array_like, optional
163 Weights interpreted as in WLS, used for the variance of the predicted
164 residual.
165 *args :
166 Some models can take additional arguments. See the
167 predict method of the model for the details.
168 **kwargs :
169 Some models can take additional keyword arguments. See the
170 predict method of the model for the details.
172 Returns
173 -------
174 prediction_results : generalized_linear_model.PredictionResults
175 The prediction results instance contains prediction and prediction
176 variance and can on demand calculate confidence intervals and summary
177 tables for the prediction of the mean and of new observations.
178 """
180 # prepare exog and row_labels, based on base Results.predict
181 if transform and hasattr(self.model, 'formula') and exog is not None:
182 from patsy import dmatrix
183 exog = dmatrix(self.model.data.design_info,
184 exog)
186 if exog is not None:
187 if row_labels is None:
188 row_labels = getattr(exog, 'index', None)
189 if callable(row_labels):
190 row_labels = None
192 exog = np.asarray(exog)
193 if exog.ndim == 1 and (self.model.exog.ndim == 1 or
194 self.model.exog.shape[1] == 1):
195 exog = exog[:, None]
196 exog = np.atleast_2d(exog) # needed in count model shape[1]
197 else:
198 exog = self.model.exog
199 if weights is None:
200 weights = getattr(self.model, 'weights', None)
202 if row_labels is None:
203 row_labels = getattr(self.model.data, 'row_labels', None)
205 # need to handle other arrays, TODO: is delegating to model possible ?
206 if weights is not None:
207 weights = np.asarray(weights)
208 if (weights.size > 1 and
209 (weights.ndim != 1 or weights.shape[0] == exog.shape[1])):
210 raise ValueError('weights has wrong shape')
212 ### end
214 pred_kwds['linear'] = False
215 predicted_mean = self.model.predict(self.params, exog, **pred_kwds)
217 covb = self.cov_params()
219 link_deriv = self.model.family.link.inverse_deriv(linpred.predicted_mean)
220 var_pred_mean = link_deriv**2 * (exog * np.dot(covb, exog.T).T).sum(1)
221 var_resid = self.scale # self.mse_resid / weights
223 # TODO: check that we have correct scale, Refactor scale #???
224 # special case for now:
225 if self.cov_type == 'fixed scale':
226 var_resid = self.cov_kwds['scale']
228 if weights is not None:
229 var_resid /= weights
231 dist = ['norm', 't'][self.use_t]
232 return PredictionResults(predicted_mean, var_pred_mean, var_resid,
233 df=self.df_resid, dist=dist,
234 row_labels=row_labels, linpred=linpred, link=link)
237def params_transform_univariate(params, cov_params, link=None, transform=None,
238 row_labels=None):
239 """
240 results for univariate, nonlinear, monotonicaly transformed parameters
242 This provides transformed values, standard errors and confidence interval
243 for transformations of parameters, for example in calculating rates with
244 `exp(params)` in the case of Poisson or other models with exponential
245 mean function.
246 """
248 from statsmodels.genmod.families import links
249 if link is None and transform is None:
250 link = links.Log()
252 if row_labels is None and hasattr(params, 'index'):
253 row_labels = params.index
255 params = np.asarray(params)
257 predicted_mean = link.inverse(params)
258 link_deriv = link.inverse_deriv(params)
259 var_pred_mean = link_deriv**2 * np.diag(cov_params)
260 # TODO: do we want covariance also, or just var/se
262 dist = stats.norm
264 # TODO: need ci for linear prediction, method of `lin_pred
265 linpred = PredictionResults(params, np.diag(cov_params), dist=dist,
266 row_labels=row_labels, link=links.identity())
268 res = PredictionResults(predicted_mean, var_pred_mean, dist=dist,
269 row_labels=row_labels, linpred=linpred, link=link)
271 return res