Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/gam/smooth_basis.py : 15%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3Spline and other smoother classes for Generalized Additive Models
5Author: Luca Puggini
6Author: Josef Perktold
8Created on Fri Jun 5 16:32:00 2015
9"""
11# import useful only for development
12from abc import ABCMeta, abstractmethod
13from statsmodels.compat.python import with_metaclass
15import numpy as np
16import pandas as pd
17from patsy import dmatrix
18from patsy.mgcv_cubic_splines import _get_all_sorted_knots
20from statsmodels.tools.linalg import transf_constraints
23# Obtain b splines from patsy
25def _equally_spaced_knots(x, df):
26 n_knots = df - 2
27 x_min = x.min()
28 x_max = x.max()
29 knots = np.linspace(x_min, x_max, n_knots)
30 return knots
33def _R_compat_quantile(x, probs):
34 # return np.percentile(x, 100 * np.asarray(probs))
35 probs = np.asarray(probs)
36 quantiles = np.asarray([np.percentile(x, 100 * prob)
37 for prob in probs.ravel(order="C")])
38 return quantiles.reshape(probs.shape, order="C")
41# FIXME: is this copy/pasted? If so, why do we need it? If not, get
42# rid of the try/except for scipy import
43# from patsy splines.py
44def _eval_bspline_basis(x, knots, degree, deriv='all', include_intercept=True):
45 try:
46 from scipy.interpolate import splev
47 except ImportError:
48 raise ImportError("spline functionality requires scipy")
49 # 'knots' are assumed to be already pre-processed. E.g. usually you
50 # want to include duplicate copies of boundary knots; you should do
51 # that *before* calling this constructor.
52 knots = np.atleast_1d(np.asarray(knots, dtype=float))
53 assert knots.ndim == 1
54 knots.sort()
55 degree = int(degree)
56 x = np.atleast_1d(x)
57 if x.ndim == 2 and x.shape[1] == 1:
58 x = x[:, 0]
59 assert x.ndim == 1
60 # XX FIXME: when points fall outside of the boundaries, splev and R seem
61 # to handle them differently. I do not know why yet. So until we understand
62 # this and decide what to do with it, I'm going to play it safe and
63 # disallow such points.
64 if np.min(x) < np.min(knots) or np.max(x) > np.max(knots):
65 raise NotImplementedError("some data points fall outside the "
66 "outermost knots, and I'm not sure how "
67 "to handle them. (Patches accepted!)")
68 # Thanks to Charles Harris for explaining splev. It's not well
69 # documented, but basically it computes an arbitrary b-spline basis
70 # given knots and degree on some specificed points (or derivatives
71 # thereof, but we do not use that functionality), and then returns some
72 # linear combination of these basis functions. To get out the basis
73 # functions themselves, we use linear combinations like [1, 0, 0], [0,
74 # 1, 0], [0, 0, 1].
75 # NB: This probably makes it rather inefficient (though I have not checked
76 # to be sure -- maybe the fortran code actually skips computing the basis
77 # function for coefficients that are zero).
78 # Note: the order of a spline is the same as its degree + 1.
79 # Note: there are (len(knots) - order) basis functions.
81 k_const = 1 - int(include_intercept)
82 n_bases = len(knots) - (degree + 1) - k_const
83 if deriv in ['all', 0]:
84 basis = np.empty((x.shape[0], n_bases), dtype=float)
85 ret = basis
86 if deriv in ['all', 1]:
87 der1_basis = np.empty((x.shape[0], n_bases), dtype=float)
88 ret = der1_basis
89 if deriv in ['all', 2]:
90 der2_basis = np.empty((x.shape[0], n_bases), dtype=float)
91 ret = der2_basis
93 for i in range(n_bases):
94 coefs = np.zeros((n_bases + k_const,))
95 # we are skipping the first column of the basis to drop constant
96 coefs[i + k_const] = 1
97 ii = i
98 if deriv in ['all', 0]:
99 basis[:, ii] = splev(x, (knots, coefs, degree))
100 if deriv in ['all', 1]:
101 der1_basis[:, ii] = splev(x, (knots, coefs, degree), der=1)
102 if deriv in ['all', 2]:
103 der2_basis[:, ii] = splev(x, (knots, coefs, degree), der=2)
105 if deriv == 'all':
106 return basis, der1_basis, der2_basis
107 else:
108 return ret
111def compute_all_knots(x, df, degree):
112 order = degree + 1
113 n_inner_knots = df - order
114 lower_bound = np.min(x)
115 upper_bound = np.max(x)
116 knot_quantiles = np.linspace(0, 1, n_inner_knots + 2)[1:-1]
117 inner_knots = _R_compat_quantile(x, knot_quantiles)
118 all_knots = np.concatenate(([lower_bound, upper_bound] * order,
119 inner_knots))
120 return all_knots, lower_bound, upper_bound, inner_knots
123def make_bsplines_basis(x, df, degree):
124 ''' make a spline basis for x '''
126 all_knots, _, _, _ = compute_all_knots(x, df, degree)
127 basis, der_basis, der2_basis = _eval_bspline_basis(x, all_knots, degree)
128 return basis, der_basis, der2_basis
131def get_knots_bsplines(x=None, df=None, knots=None, degree=3,
132 spacing='quantile', lower_bound=None,
133 upper_bound=None, all_knots=None):
134 """knots for use in B-splines
136 There are two main options for the knot placement
138 - quantile spacing with multiplicity of boundary knots
139 - equal spacing extended to boundary or exterior knots
141 The first corresponds to splines as used by patsy. the second is the
142 knot spacing for P-Splines.
143 """
144 # based on patsy memorize_finish
145 if all_knots is not None:
146 return all_knots
148 x_min = x.min()
149 x_max = x.max()
151 if degree < 0:
152 raise ValueError("degree must be greater than 0 (not %r)"
153 % (degree,))
154 if int(degree) != degree:
155 raise ValueError("degree must be an integer (not %r)"
156 % (degree,))
158 # These are guaranteed to all be 1d vectors by the code above
159 # x = np.concatenate(tmp["xs"])
160 if df is None and knots is None:
161 raise ValueError("must specify either df or knots")
162 order = degree + 1
163 if df is not None:
164 n_inner_knots = df - order
165 if n_inner_knots < 0:
166 raise ValueError("df=%r is too small for degree=%r; must be >= %s"
167 % (df, degree,
168 # We know that n_inner_knots is negative;
169 # if df were that much larger, it would
170 # have been zero, and things would work.
171 df - n_inner_knots))
172 if knots is not None:
173 if len(knots) != n_inner_knots:
174 raise ValueError("df=%s with degree=%r implies %s knots, "
175 "but %s knots were provided"
176 % (df, degree,
177 n_inner_knots, len(knots)))
178 elif spacing == 'quantile':
179 # Need to compute inner knots
180 knot_quantiles = np.linspace(0, 1, n_inner_knots + 2)[1:-1]
181 inner_knots = _R_compat_quantile(x, knot_quantiles)
182 elif spacing == 'equal':
183 # Need to compute inner knots
184 grid = np.linspace(0, 1, n_inner_knots + 2)[1:-1]
185 inner_knots = x_min + grid * (x_max - x_min)
186 diff_knots = inner_knots[1] - inner_knots[0]
187 else:
188 raise ValueError("incorrect option for spacing")
189 if knots is not None:
190 inner_knots = knots
191 if lower_bound is None:
192 lower_bound = np.min(x)
193 if upper_bound is None:
194 upper_bound = np.max(x)
196 if lower_bound > upper_bound:
197 raise ValueError("lower_bound > upper_bound (%r > %r)"
198 % (lower_bound, upper_bound))
199 inner_knots = np.asarray(inner_knots)
200 if inner_knots.ndim > 1:
201 raise ValueError("knots must be 1 dimensional")
202 if np.any(inner_knots < lower_bound):
203 raise ValueError("some knot values (%s) fall below lower bound "
204 "(%r)"
205 % (inner_knots[inner_knots < lower_bound],
206 lower_bound))
207 if np.any(inner_knots > upper_bound):
208 raise ValueError("some knot values (%s) fall above upper bound "
209 "(%r)"
210 % (inner_knots[inner_knots > upper_bound],
211 upper_bound))
213 if spacing == "equal":
214 diffs = np.arange(1, order + 1) * diff_knots
215 lower_knots = inner_knots[0] - diffs[::-1]
216 upper_knots = inner_knots[-1] + diffs
217 all_knots = np.concatenate((lower_knots, inner_knots, upper_knots))
218 else:
219 all_knots = np.concatenate(([lower_bound, upper_bound] * order,
220 inner_knots))
221 all_knots.sort()
223 return all_knots
226def _get_integration_points(knots, k_points=3):
227 """add points to each subinterval defined by knots
229 inserts k_points between each two consecutive knots
230 """
231 k_points = k_points + 1
232 knots = np.unique(knots)
233 dxi = np.arange(k_points) / k_points
234 dxk = np.diff(knots)
235 dx = dxk[:, None] * dxi
236 x = np.concatenate(((knots[:-1, None] + dx).ravel(), [knots[-1]]))
237 return x
240def get_covder2(smoother, k_points=4, integration_points=None,
241 skip_ctransf=False, deriv=2):
242 """
243 Approximate integral of cross product of second derivative of smoother
245 This uses scipy.integrate simps to compute an approximation to the
246 integral of the smoother derivative cross-product at knots plus k_points
247 in between knots.
248 """
249 from scipy.integrate import simps
250 knots = smoother.knots
251 x = _get_integration_points(knots, k_points=3)
252 if integration_points is None:
253 d2 = smoother.transform(x, deriv=deriv, skip_ctransf=skip_ctransf)
254 else:
255 x = integration_points
256 covd2 = simps(d2[:, :, None] * d2[:, None, :], x, axis=0)
257 return covd2
260# TODO: this function should be deleted
261def make_poly_basis(x, degree, intercept=True):
262 '''
263 given a vector x returns poly=(1, x, x^2, ..., x^degree)
264 and its first and second derivative
265 '''
267 if intercept:
268 start = 0
269 else:
270 start = 1
272 nobs = len(x)
273 basis = np.zeros(shape=(nobs, degree + 1 - start))
274 der_basis = np.zeros(shape=(nobs, degree + 1 - start))
275 der2_basis = np.zeros(shape=(nobs, degree + 1 - start))
277 for i in range(start, degree + 1):
278 basis[:, i - start] = x ** i
279 der_basis[:, i - start] = i * x ** (i - 1)
280 der2_basis[:, i - start] = i * (i - 1) * x ** (i - 2)
282 return basis, der_basis, der2_basis
285# TODO: try to include other kinds of splines from patsy
286# x = np.linspace(0, 1, 30)
287# df = 10
288# degree = 3
289# from patsy.mgcv_cubic_splines import cc, cr, te
290# all_knots, lower, upper, inner = compute_all_knots(x, df, degree)
291# result = cc(x, df=df, knots=all_knots, lower_bound=lower, upper_bound=upper,
292# constraints=None)
293#
294# import matplotlib.pyplot as plt
295#
296# result = np.array(result)
297# print(result.shape)
298# plt.plot(result.T)
299# plt.show()
301class UnivariateGamSmoother(with_metaclass(ABCMeta)):
302 """Base Class for single smooth component
303 """
304 def __init__(self, x, constraints=None, variable_name='x'):
305 self.x = x
306 self.constraints = constraints
307 self.variable_name = variable_name
308 self.nobs, self.k_variables = len(x), 1
310 base4 = self._smooth_basis_for_single_variable()
311 if constraints == 'center':
312 constraints = base4[0].mean(0)[None, :]
314 if constraints is not None and not isinstance(constraints, str):
315 ctransf = transf_constraints(constraints)
316 self.ctransf = ctransf
317 else:
318 # subclasses might set ctransf directly
319 # only used if constraints is None
320 if not hasattr(self, 'ctransf'):
321 self.ctransf = None
323 self.basis, self.der_basis, self.der2_basis, self.cov_der2 = base4
324 if self.ctransf is not None:
325 ctransf = self.ctransf
326 # transform attributes that are not None
327 if base4[0] is not None:
328 self.basis = base4[0].dot(ctransf)
329 if base4[1] is not None:
330 self.der_basis = base4[1].dot(ctransf)
331 if base4[2] is not None:
332 self.der2_basis = base4[2].dot(ctransf)
333 if base4[3] is not None:
334 self.cov_der2 = ctransf.T.dot(base4[3]).dot(ctransf)
336 self.dim_basis = self.basis.shape[1]
337 self.col_names = [self.variable_name + "_s" + str(i)
338 for i in range(self.dim_basis)]
340 @abstractmethod
341 def _smooth_basis_for_single_variable(self):
342 return
345class UnivariateGenericSmoother(UnivariateGamSmoother):
346 """Generic single smooth component
347 """
348 def __init__(self, x, basis, der_basis, der2_basis, cov_der2,
349 variable_name='x'):
350 self.basis = basis
351 self.der_basis = der_basis
352 self.der2_basis = der2_basis
353 self.cov_der2 = cov_der2
355 super(UnivariateGenericSmoother, self).__init__(
356 x, variable_name=variable_name)
358 def _smooth_basis_for_single_variable(self):
359 return self.basis, self.der_basis, self.der2_basis, self.cov_der2
362class UnivariatePolynomialSmoother(UnivariateGamSmoother):
363 """polynomial single smooth component
364 """
365 def __init__(self, x, degree, variable_name='x'):
366 self.degree = degree
367 super(UnivariatePolynomialSmoother, self).__init__(
368 x, variable_name=variable_name)
370 def _smooth_basis_for_single_variable(self):
371 # TODO: unclear description
372 """
373 given a vector x returns poly=(1, x, x^2, ..., x^degree)
374 and its first and second derivative
375 """
377 basis = np.zeros(shape=(self.nobs, self.degree))
378 der_basis = np.zeros(shape=(self.nobs, self.degree))
379 der2_basis = np.zeros(shape=(self.nobs, self.degree))
380 for i in range(self.degree):
381 dg = i + 1
382 basis[:, i] = self.x ** dg
383 der_basis[:, i] = dg * self.x ** (dg - 1)
384 if dg > 1:
385 der2_basis[:, i] = dg * (dg - 1) * self.x ** (dg - 2)
386 else:
387 der2_basis[:, i] = 0
389 cov_der2 = np.dot(der2_basis.T, der2_basis)
391 return basis, der_basis, der2_basis, cov_der2
394class UnivariateBSplines(UnivariateGamSmoother):
395 """B-Spline single smooth component
397 This creates and holds the B-Spline basis function for one
398 component.
400 Parameters
401 ----------
402 x : ndarray, 1-D
403 underlying explanatory variable for smooth terms.
404 df : int
405 numer of basis functions or degrees of freedom
406 degree : int
407 degree of the spline
408 include_intercept : bool
409 If False, then the basis functions are transformed so that they
410 do not include a constant. This avoids perfect collinearity if
411 a constant or several components are included in the model.
412 constraints : {None, str, array}
413 Constraints are used to transform the basis functions to satisfy
414 those constraints.
415 `constraints = 'center'` applies a linear transform to remove the
416 constant and center the basis functions.
417 variable_name : {None, str}
418 The name for the underlying explanatory variable, x, used in for
419 creating the column and parameter names for the basis functions.
420 covder2_kwds : {None, dict}
421 options for computing the penalty matrix from the second derivative
422 of the spline.
423 knot_kwds : {None, list[dict]}
424 option for the knot selection.
425 By default knots are selected in the same way as in patsy, however the
426 number of knots is independent of keeping or removing the constant.
427 Interior knot selection is based on quantiles of the data and is the
428 same in patsy and mgcv. Boundary points are at the limits of the data
429 range.
430 The available options use with `get_knots_bsplines` are
432 - knots : None or array
433 interior knots
434 - spacing : 'quantile' or 'equal'
435 - lower_bound : None or float
436 location of lower boundary knots, all boundary knots are at the same
437 point
438 - upper_bound : None or float
439 location of upper boundary knots, all boundary knots are at the same
440 point
441 - all_knots : None or array
442 If all knots are provided, then those will be taken as given and
443 all other options will be ignored.
444 """
445 def __init__(self, x, df, degree=3, include_intercept=False,
446 constraints=None, variable_name='x',
447 covder2_kwds=None, **knot_kwds):
448 self.degree = degree
449 self.df = df
450 self.include_intercept = include_intercept
451 self.knots = get_knots_bsplines(x, degree=degree, df=df, **knot_kwds)
452 self.covder2_kwds = (covder2_kwds if covder2_kwds is not None
453 else {})
454 super(UnivariateBSplines, self).__init__(
455 x, constraints=constraints, variable_name=variable_name)
457 def _smooth_basis_for_single_variable(self):
458 basis, der_basis, der2_basis = _eval_bspline_basis(
459 self.x, self.knots, self.degree,
460 include_intercept=self.include_intercept)
461 # cov_der2 = np.dot(der2_basis.T, der2_basis)
463 cov_der2 = get_covder2(self, skip_ctransf=True,
464 **self.covder2_kwds)
466 return basis, der_basis, der2_basis, cov_der2
468 def transform(self, x_new, deriv=0, skip_ctransf=False):
469 """create the spline basis for new observations
471 The main use of this stateful transformation is for prediction
472 using the same specification of the spline basis.
474 Parameters
475 ----------
476 x_new : ndarray
477 observations of the underlying explanatory variable
478 deriv : int
479 which derivative of the spline basis to compute
480 This is an options for internal computation.
481 skip_ctransf : bool
482 whether to skip the constraint transform
483 This is an options for internal computation.
485 Returns
486 -------
487 basis : ndarray
488 design matrix for the spline basis for given ``x_new``
489 """
491 if x_new is None:
492 x_new = self.x
493 exog = _eval_bspline_basis(x_new, self.knots, self.degree,
494 deriv=deriv,
495 include_intercept=self.include_intercept)
497 # ctransf does not exist yet when cov_der2 is computed
498 ctransf = getattr(self, 'ctransf', None)
499 if ctransf is not None and not skip_ctransf:
500 exog = exog.dot(self.ctransf)
501 return exog
504class UnivariateCubicSplines(UnivariateGamSmoother):
505 """Cubic Spline single smooth component
507 Cubic splines as described in the wood's book in chapter 3
508 """
510 def __init__(self, x, df, constraints=None, transform='domain',
511 variable_name='x'):
513 self.degree = 3
514 self.df = df
515 self.transform_data_method = transform
517 self.x = x = self.transform_data(x, initialize=True)
518 self.knots = _equally_spaced_knots(x, df)
519 super(UnivariateCubicSplines, self).__init__(
520 x, constraints=constraints, variable_name=variable_name)
522 def transform_data(self, x, initialize=False):
523 tm = self.transform_data_method
524 if tm is None:
525 return x
527 if initialize is True:
528 if tm == 'domain':
529 self.domain_low = x.min(0)
530 self.domain_upp = x.max(0)
531 elif isinstance(tm, tuple):
532 self.domain_low = tm[0]
533 self.domain_upp = tm[1]
534 self.transform_data_method = 'domain'
535 else:
536 raise ValueError("transform should be None, 'domain' "
537 "or a tuple")
538 self.domain_diff = self.domain_upp - self.domain_low
540 if self.transform_data_method == 'domain':
541 x = (x - self.domain_low) / self.domain_diff
542 return x
543 else:
544 raise ValueError("incorrect transform_data_method")
546 def _smooth_basis_for_single_variable(self):
548 basis = self._splines_x()[:, :-1]
549 # demean except for constant, does not affect derivatives
550 if not self.constraints == 'none':
551 self.transf_mean = basis[:, 1:].mean(0)
552 basis[:, 1:] -= self.transf_mean
553 else:
554 self.transf_mean = np.zeros(basis.shape[1])
555 s = self._splines_s()[:-1, :-1]
556 if not self.constraints == 'none':
557 ctransf = np.diag(1/np.max(np.abs(basis), axis=0))
558 else:
559 ctransf = np.eye(basis.shape[1])
560 # use np.eye to avoid rescaling
561 # ctransf = np.eye(basis.shape[1])
563 if self.constraints == 'no-const':
564 ctransf = ctransf[1:]
566 self.ctransf = ctransf
568 return basis, None, None, s
570 def _rk(self, x, z):
571 p1 = ((z - 1 / 2) ** 2 - 1 / 12) * ((x - 1 / 2) ** 2 - 1 / 12) / 4
572 p2 = ((np.abs(z - x) - 1 / 2) ** 4 -
573 1 / 2 * (np.abs(z - x) - 1 / 2) ** 2 +
574 7 / 240) / 24.
575 return p1 - p2
577 def _splines_x(self, x=None):
578 if x is None:
579 x = self.x
580 n_columns = len(self.knots) + 2
581 nobs = x.shape[0]
582 basis = np.ones(shape=(nobs, n_columns))
583 basis[:, 1] = x
584 # for loop equivalent to outer(x, xk, fun=rk)
585 for i, xi in enumerate(x):
586 for j, xkj in enumerate(self.knots):
587 s_ij = self._rk(xi, xkj)
588 basis[i, j + 2] = s_ij
589 return basis
591 def _splines_s(self):
592 q = len(self.knots) + 2
593 s = np.zeros(shape=(q, q))
594 for i, x1 in enumerate(self.knots):
595 for j, x2 in enumerate(self.knots):
596 s[i + 2, j + 2] = self._rk(x1, x2)
597 return s
599 def transform(self, x_new):
600 x_new = self.transform_data(x_new, initialize=False)
601 exog = self._splines_x(x_new)
602 exog[:, 1:] -= self.transf_mean
603 if self.ctransf is not None:
604 exog = exog.dot(self.ctransf)
605 return exog
608class UnivariateCubicCyclicSplines(UnivariateGamSmoother):
609 """cyclic cubic regression spline single smooth component
611 This creates and holds the Cyclic CubicSpline basis function for one
612 component.
614 Parameters
615 ----------
616 x : ndarray, 1-D
617 underlying explanatory variable for smooth terms.
618 df : int
619 numer of basis functions or degrees of freedom
620 degree : int
621 degree of the spline
622 include_intercept : bool
623 If False, then the basis functions are transformed so that they
624 do not include a constant. This avoids perfect collinearity if
625 a constant or several components are included in the model.
626 constraints : {None, str, array}
627 Constraints are used to transform the basis functions to satisfy
628 those constraints.
629 `constraints = 'center'` applies a linear transform to remove the
630 constant and center the basis functions.
631 variable_name : None or str
632 The name for the underlying explanatory variable, x, used in for
633 creating the column and parameter names for the basis functions.
634 """
635 def __init__(self, x, df, constraints=None, variable_name='x'):
636 self.degree = 3
637 self.df = df
638 self.x = x
639 self.knots = _equally_spaced_knots(x, df)
640 super(UnivariateCubicCyclicSplines, self).__init__(
641 x, constraints=constraints, variable_name=variable_name)
643 def _smooth_basis_for_single_variable(self):
644 basis = dmatrix("cc(x, df=" + str(self.df) + ") - 1", {"x": self.x})
645 self.design_info = basis.design_info
646 n_inner_knots = self.df - 2 + 1 # +n_constraints
647 # TODO: from CubicRegressionSplines class
648 all_knots = _get_all_sorted_knots(self.x, n_inner_knots=n_inner_knots,
649 inner_knots=None,
650 lower_bound=None, upper_bound=None)
652 b, d = self._get_b_and_d(all_knots)
653 s = self._get_s(b, d)
655 return basis, None, None, s
657 def _get_b_and_d(self, knots):
658 """Returns mapping of cyclic cubic spline values to 2nd derivatives.
660 .. note:: See 'Generalized Additive Models', Simon N. Wood, 2006,
661 pp 146-147
663 Parameters
664 ----------
665 knots : ndarray
666 The 1-d array knots used for cubic spline parametrization,
667 must be sorted in ascending order.
669 Returns
670 -------
671 b, d: ndarrays
672 arrays for mapping cyclic cubic spline values at knots to
673 second derivatives.
674 penalty matrix is equal to ``s = d.T.dot(b^-1).dot(d)``
675 """
676 h = knots[1:] - knots[:-1]
677 n = knots.size - 1
679 # b and d are defined such that the penalty matrix is equivalent to:
680 # s = d.T.dot(b^-1).dot(d)
681 # reference in particular to pag 146 of Wood's book
682 b = np.zeros((n, n)) # the b matrix on page 146 of Wood's book
683 d = np.zeros((n, n)) # the d matrix on page 146 of Wood's book
685 b[0, 0] = (h[n - 1] + h[0]) / 3.
686 b[0, n - 1] = h[n - 1] / 6.
687 b[n - 1, 0] = h[n - 1] / 6.
689 d[0, 0] = -1. / h[0] - 1. / h[n - 1]
690 d[0, n - 1] = 1. / h[n - 1]
691 d[n - 1, 0] = 1. / h[n - 1]
693 for i in range(1, n):
694 b[i, i] = (h[i - 1] + h[i]) / 3.
695 b[i, i - 1] = h[i - 1] / 6.
696 b[i - 1, i] = h[i - 1] / 6.
698 d[i, i] = -1. / h[i - 1] - 1. / h[i]
699 d[i, i - 1] = 1. / h[i - 1]
700 d[i - 1, i] = 1. / h[i - 1]
702 return b, d
704 def _get_s(self, b, d):
705 return d.T.dot(np.linalg.inv(b)).dot(d)
707 def transform(self, x_new):
708 exog = dmatrix(self.design_info, {"x": x_new})
709 if self.ctransf is not None:
710 exog = exog.dot(self.ctransf)
711 return exog
714class AdditiveGamSmoother(with_metaclass(ABCMeta)):
715 """Base class for additive smooth components
716 """
717 def __init__(self, x, variable_names=None, include_intercept=False,
718 **kwargs):
720 # get pandas names before using asarray
721 if isinstance(x, pd.DataFrame):
722 data_names = x.columns.tolist()
723 elif isinstance(x, pd.Series):
724 data_names = [x.name]
725 else:
726 data_names = None
728 x = np.asarray(x)
730 if x.ndim == 1:
731 self.x = x.copy()
732 self.x.shape = (len(x), 1)
733 else:
734 self.x = x
736 self.nobs, self.k_variables = self.x.shape
737 if isinstance(include_intercept, bool):
738 self.include_intercept = [include_intercept] * self.k_variables
739 else:
740 self.include_intercept = include_intercept
742 if variable_names is None:
743 if data_names is not None:
744 self.variable_names = data_names
745 else:
746 self.variable_names = ['x' + str(i)
747 for i in range(self.k_variables)]
748 else:
749 self.variable_names = variable_names
751 self.smoothers = self._make_smoothers_list()
752 self.basis = np.hstack(list(smoother.basis
753 for smoother in self.smoothers))
754 self.dim_basis = self.basis.shape[1]
755 self.penalty_matrices = [smoother.cov_der2
756 for smoother in self.smoothers]
757 self.col_names = []
758 for smoother in self.smoothers:
759 self.col_names.extend(smoother.col_names)
761 self.mask = []
762 last_column = 0
763 for smoother in self.smoothers:
764 mask = np.array([False] * self.dim_basis)
765 mask[last_column:smoother.dim_basis + last_column] = True
766 last_column = last_column + smoother.dim_basis
767 self.mask.append(mask)
769 @abstractmethod
770 def _make_smoothers_list(self):
771 pass
773 def transform(self, x_new):
774 """create the spline basis for new observations
776 The main use of this stateful transformation is for prediction
777 using the same specification of the spline basis.
779 Parameters
780 ----------
781 x_new: ndarray
782 observations of the underlying explanatory variable
784 Returns
785 -------
786 basis : ndarray
787 design matrix for the spline basis for given ``x_new``.
788 """
789 exog = np.hstack(list(self.smoothers[i].transform(x_new[:, i])
790 for i in range(self.k_variables)))
791 return exog
794class GenericSmoothers(AdditiveGamSmoother):
795 """generic class for additive smooth components for GAM
796 """
797 def __init__(self, x, smoothers):
798 self.smoothers = smoothers
799 super(GenericSmoothers, self).__init__(x, variable_names=None)
801 def _make_smoothers_list(self):
802 return self.smoothers
805class PolynomialSmoother(AdditiveGamSmoother):
806 """additive polynomial components for GAM
807 """
808 def __init__(self, x, degrees, variable_names=None):
809 self.degrees = degrees
810 super(PolynomialSmoother, self).__init__(x,
811 variable_names=variable_names)
813 def _make_smoothers_list(self):
814 smoothers = []
815 for v in range(self.k_variables):
816 uv_smoother = UnivariatePolynomialSmoother(
817 self.x[:, v],
818 degree=self.degrees[v],
819 variable_name=self.variable_names[v])
820 smoothers.append(uv_smoother)
821 return smoothers
824class BSplines(AdditiveGamSmoother):
825 """additive smooth components using B-Splines
827 This creates and holds the B-Spline basis function for several
828 components.
830 Parameters
831 ----------
832 x : array_like, 1-D or 2-D
833 underlying explanatory variable for smooth terms.
834 If 2-dimensional, then observations should be in rows and
835 explanatory variables in columns.
836 df : int
837 numer of basis functions or degrees of freedom
838 degree : int
839 degree of the spline
840 include_intercept : bool
841 If False, then the basis functions are transformed so that they
842 do not include a constant. This avoids perfect collinearity if
843 a constant or several components are included in the model.
844 constraints : {None, str, array}
845 Constraints are used to transform the basis functions to satisfy
846 those constraints.
847 `constraints = 'center'` applies a linear transform to remove the
848 constant and center the basis functions.
849 variable_names : {list[str], None}
850 The names for the underlying explanatory variables, x used in for
851 creating the column and parameter names for the basis functions.
852 If ``x`` is a pandas object, then the names will be taken from it.
853 knot_kwds : None or list of dict
854 option for the knot selection.
855 By default knots are selected in the same way as in patsy, however the
856 number of knots is independent of keeping or removing the constant.
857 Interior knot selection is based on quantiles of the data and is the
858 same in patsy and mgcv. Boundary points are at the limits of the data
859 range.
860 The available options use with `get_knots_bsplines` are
862 - knots : None or array
863 interior knots
864 - spacing : 'quantile' or 'equal'
865 - lower_bound : None or float
866 location of lower boundary knots, all boundary knots are at the same
867 point
868 - upper_bound : None or float
869 location of upper boundary knots, all boundary knots are at the same
870 point
871 - all_knots : None or array
872 If all knots are provided, then those will be taken as given and
873 all other options will be ignored.
876 Attributes
877 ----------
878 smoothers : list of univariate smooth component instances
879 basis : design matrix, array of spline bases columns for all components
880 penalty_matrices : list of penalty matrices, one for each smooth term
881 dim_basis : number of columns in the basis
882 k_variables : number of smooth components
883 col_names : created names for the basis columns
885 There are additional attributes about the specification of the splines
886 and some attributes mainly for internal use.
888 Notes
889 -----
890 A constant in the spline basis function can be removed in two different
891 ways.
892 The first is by dropping one basis column and normalizing the
893 remaining columns. This is obtained by the default
894 ``include_intercept=False, constraints=None``
895 The second option is by using the centering transform which is a linear
896 transformation of all basis functions. As a consequence of the
897 transformation, the B-spline basis functions do not have locally bounded
898 support anymore. This is obtained ``constraints='center'``. In this case
899 ``include_intercept`` will be automatically set to True to avoid
900 dropping an additional column.
901 """
902 def __init__(self, x, df, degree, include_intercept=False,
903 constraints=None, variable_names=None, knot_kwds=None):
904 self.degrees = degree
905 self.dfs = df
906 self.knot_kwds = knot_kwds
907 # TODO: move attaching constraints to super call
908 self.constraints = constraints
909 if constraints == 'center':
910 include_intercept = True
912 super(BSplines, self).__init__(x, include_intercept=include_intercept,
913 variable_names=variable_names)
915 def _make_smoothers_list(self):
916 smoothers = []
917 for v in range(self.k_variables):
918 kwds = self.knot_kwds[v] if self.knot_kwds else {}
919 uv_smoother = UnivariateBSplines(
920 self.x[:, v],
921 df=self.dfs[v], degree=self.degrees[v],
922 include_intercept=self.include_intercept[v],
923 constraints=self.constraints,
924 variable_name=self.variable_names[v], **kwds)
925 smoothers.append(uv_smoother)
927 return smoothers
930class CubicSplines(AdditiveGamSmoother):
931 """additive smooth components using cubic splines as in Wood 2006.
933 Note, these splines do NOT use the same spline basis as
934 ``Cubic Regression Splines``.
935 """
936 def __init__(self, x, df, constraints='center', transform='domain',
937 variable_names=None):
938 self.dfs = df
939 self.constraints = constraints
940 self.transform = transform
941 super(CubicSplines, self).__init__(x, constraints=constraints,
942 variable_names=variable_names)
944 def _make_smoothers_list(self):
945 smoothers = []
946 for v in range(self.k_variables):
947 uv_smoother = UnivariateCubicSplines(
948 self.x[:, v], df=self.dfs[v],
949 constraints=self.constraints,
950 transform=self.transform,
951 variable_name=self.variable_names[v])
952 smoothers.append(uv_smoother)
954 return smoothers
957class CyclicCubicSplines(AdditiveGamSmoother):
958 """additive smooth components using cyclic cubic regression splines
960 This spline basis is the same as in patsy.
962 Parameters
963 ----------
964 x : array_like, 1-D or 2-D
965 underlying explanatory variable for smooth terms.
966 If 2-dimensional, then observations should be in rows and
967 explanatory variables in columns.
968 df : int
969 numer of basis functions or degrees of freedom
970 constraints : {None, str, array}
971 Constraints are used to transform the basis functions to satisfy
972 those constraints.
973 variable_names : {list[str], None}
974 The names for the underlying explanatory variables, x used in for
975 creating the column and parameter names for the basis functions.
976 If ``x`` is a pandas object, then the names will be taken from it.
977 """
978 def __init__(self, x, df, constraints=None, variable_names=None):
979 self.dfs = df
980 # TODO: move attaching constraints to super call
981 self.constraints = constraints
982 super(CyclicCubicSplines, self).__init__(x,
983 variable_names=variable_names)
985 def _make_smoothers_list(self):
986 smoothers = []
987 for v in range(self.k_variables):
988 uv_smoother = UnivariateCubicCyclicSplines(
989 self.x[:, v],
990 df=self.dfs[v], constraints=self.constraints,
991 variable_name=self.variable_names[v])
992 smoothers.append(uv_smoother)
994 return smoothers
996# class CubicRegressionSplines(BaseCubicSplines):
997# # TODO: this class is still not tested
998#
999# def __init__(self, x, df=10):
1000# import warnings
1001# warnings.warn("This class is still not tested and it is probably"
1002# " not working properly. "
1003# "I suggest to use another smoother", Warning)
1004#
1005# super(CubicRegressionSplines, self).__init__(x, df)
1006#
1007# self.basis = dmatrix("cc(x, df=" + str(df) + ") - 1", {"x": x})
1008# n_inner_knots = df - 2 + 1 # +n_constraints
1009# # TODO: ACcording to CubicRegressionSplines class this should be
1010# # n_inner_knots = df - 2
1011# all_knots = _get_all_sorted_knots(x, n_inner_knots=n_inner_knots,
1012# inner_knots=None,
1013# lower_bound=None, upper_bound=None)
1014#
1015# b, d = self._get_b_and_d(all_knots)
1016# self.s = self._get_s(b, d)
1017#
1018# self.dim_basis = self.basis.shape[1]
1019#
1020# def _get_b_and_d(self, knots):
1021#
1022# h = knots[1:] - knots[:-1]
1023# n = knots.size - 1
1024#
1025# # b and d are defined such that the penalty matrix is equivalent to:
1026# # s = d.T.dot(b^-1).dot(d)
1027# # reference in particular to pag 146 of Wood's book
1028# b = np.zeros((n, n)) # the b matrix on page 146 of Wood's book
1029# d = np.zeros((n, n)) # the d matrix on page 146 of Wood's book
1030#
1031# for i in range(n-2):
1032# d[i, i] = 1/h[i]
1033# d[i, i+1] = -1/h[i] - 1/h[i+1]
1034# d[i, i+2] = 1/h[i+1]
1035#
1036# b[i, i] = (h[i] + h[i+1])/3
1037#
1038# for i in range(n-3):
1039# b[i, i+1] = h[i+1]/6
1040# b[i+1, i] = h[i+1]/6
1041#
1042# return b, d
1043#
1044# def _get_s(self, b, d):
1045#
1046# return d.T.dot(np.linalg.pinv(b)).dot(d)