Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of Patsy 

2# Copyright (C) 2014 GDF Suez, http://www.gdfsuez.com/ 

3# See file LICENSE.txt for license information. 

4 

5# R package 'mgcv' compatible cubic spline basis functions 

6 

7# These are made available in the patsy.* namespace 

8__all__ = ["cr", "cc", "te"] 

9 

10import numpy as np 

11 

12from patsy.util import (have_pandas, atleast_2d_column_default, 

13 no_pickling, assert_no_pickling, safe_string_eq) 

14from patsy.state import stateful_transform 

15 

16if have_pandas: 

17 import pandas 

18 

19 

20def _get_natural_f(knots): 

21 """Returns mapping of natural cubic spline values to 2nd derivatives. 

22 

23 .. note:: See 'Generalized Additive Models', Simon N. Wood, 2006, pp 145-146 

24 

25 :param knots: The 1-d array knots used for cubic spline parametrization, 

26 must be sorted in ascending order. 

27 :return: A 2-d array mapping natural cubic spline values at 

28 knots to second derivatives. 

29 

30 :raise ImportError: if scipy is not found, required for 

31 ``linalg.solve_banded()`` 

32 """ 

33 try: 

34 from scipy import linalg 

35 except ImportError: # pragma: no cover 

36 raise ImportError("Cubic spline functionality requires scipy.") 

37 

38 h = knots[1:] - knots[:-1] 

39 diag = (h[:-1] + h[1:]) / 3. 

40 ul_diag = h[1:-1] / 6. 

41 banded_b = np.array([np.r_[0., ul_diag], diag, np.r_[ul_diag, 0.]]) 

42 d = np.zeros((knots.size - 2, knots.size)) 

43 for i in range(knots.size - 2): 

44 d[i, i] = 1. / h[i] 

45 d[i, i + 2] = 1. / h[i + 1] 

46 d[i, i + 1] = - d[i, i] - d[i, i + 2] 

47 

48 fm = linalg.solve_banded((1, 1), banded_b, d) 

49 

50 return np.vstack([np.zeros(knots.size), fm, np.zeros(knots.size)]) 

51 

52 

53# Cyclic Cubic Regression Splines 

54 

55 

56def _map_cyclic(x, lbound, ubound): 

57 """Maps values into the interval [lbound, ubound] in a cyclic fashion. 

58 

59 :param x: The 1-d array values to be mapped. 

60 :param lbound: The lower bound of the interval. 

61 :param ubound: The upper bound of the interval. 

62 :return: A new 1-d array containing mapped x values. 

63 

64 :raise ValueError: if lbound >= ubound. 

65 """ 

66 if lbound >= ubound: 

67 raise ValueError("Invalid argument: lbound (%r) should be " 

68 "less than ubound (%r)." 

69 % (lbound, ubound)) 

70 

71 x = np.copy(x) 

72 x[x > ubound] = lbound + (x[x > ubound] - ubound) % (ubound - lbound) 

73 x[x < lbound] = ubound - (lbound - x[x < lbound]) % (ubound - lbound) 

74 

75 return x 

76 

77 

78def test__map_cyclic(): 

79 x = np.array([1.5, 2.6, 0.1, 4.4, 10.7]) 

80 x_orig = np.copy(x) 

81 expected_mapped_x = np.array([3.0, 2.6, 3.1, 2.9, 3.2]) 

82 mapped_x = _map_cyclic(x, 2.1, 3.6) 

83 assert np.allclose(x, x_orig) 

84 assert np.allclose(mapped_x, expected_mapped_x) 

85 

86 

87def test__map_cyclic_errors(): 

88 from nose.tools import assert_raises 

89 x = np.linspace(0.2, 5.7, 10) 

90 assert_raises(ValueError, _map_cyclic, x, 4.5, 3.6) 

91 assert_raises(ValueError, _map_cyclic, x, 4.5, 4.5) 

92 

93 

94def _get_cyclic_f(knots): 

95 """Returns mapping of cyclic cubic spline values to 2nd derivatives. 

96 

97 .. note:: See 'Generalized Additive Models', Simon N. Wood, 2006, pp 146-147 

98 

99 :param knots: The 1-d array knots used for cubic spline parametrization, 

100 must be sorted in ascending order. 

101 :return: A 2-d array mapping cyclic cubic spline values at 

102 knots to second derivatives. 

103 """ 

104 h = knots[1:] - knots[:-1] 

105 n = knots.size - 1 

106 b = np.zeros((n, n)) 

107 d = np.zeros((n, n)) 

108 

109 b[0, 0] = (h[n - 1] + h[0]) / 3. 

110 b[0, n - 1] = h[n - 1] / 6. 

111 b[n - 1, 0] = h[n - 1] / 6. 

112 

113 d[0, 0] = -1. / h[0] - 1. / h[n - 1] 

114 d[0, n - 1] = 1. / h[n - 1] 

115 d[n - 1, 0] = 1. / h[n - 1] 

116 

117 for i in range(1, n): 

118 b[i, i] = (h[i - 1] + h[i]) / 3. 

119 b[i, i - 1] = h[i - 1] / 6. 

120 b[i - 1, i] = h[i - 1] / 6. 

121 

122 d[i, i] = -1. / h[i - 1] - 1. / h[i] 

123 d[i, i - 1] = 1. / h[i - 1] 

124 d[i - 1, i] = 1. / h[i - 1] 

125 

126 return np.linalg.solve(b, d) 

127 

128 

129# Tensor Product 

130 

131 

132def _row_tensor_product(dms): 

133 """Computes row-wise tensor product of given arguments. 

134 

135 .. note:: Custom algorithm to precisely match what is done in 'mgcv', 

136 in particular look out for order of result columns! 

137 For reference implementation see 'mgcv' source code, 

138 file 'mat.c', mgcv_tensor_mm(), l.62 

139 

140 :param dms: A sequence of 2-d arrays (marginal design matrices). 

141 :return: The 2-d array row-wise tensor product of given arguments. 

142 

143 :raise ValueError: if argument sequence is empty, does not contain only 

144 2-d arrays or if the arrays number of rows does not match. 

145 """ 

146 if len(dms) == 0: 

147 raise ValueError("Tensor product arrays sequence should not be empty.") 

148 for dm in dms: 

149 if dm.ndim != 2: 

150 raise ValueError("Tensor product arguments should be 2-d arrays.") 

151 

152 tp_nrows = dms[0].shape[0] 

153 tp_ncols = 1 

154 for dm in dms: 

155 if dm.shape[0] != tp_nrows: 

156 raise ValueError("Tensor product arguments should have " 

157 "same number of rows.") 

158 tp_ncols *= dm.shape[1] 

159 tp = np.zeros((tp_nrows, tp_ncols)) 

160 tp[:, -dms[-1].shape[1]:] = dms[-1] 

161 filled_tp_ncols = dms[-1].shape[1] 

162 for dm in dms[-2::-1]: 

163 p = - filled_tp_ncols * dm.shape[1] 

164 for j in range(dm.shape[1]): 

165 xj = dm[:, j] 

166 for t in range(-filled_tp_ncols, 0): 

167 tp[:, p] = tp[:, t] * xj 

168 p += 1 

169 filled_tp_ncols *= dm.shape[1] 

170 

171 return tp 

172 

173 

174def test__row_tensor_product_errors(): 

175 from nose.tools import assert_raises 

176 assert_raises(ValueError, _row_tensor_product, []) 

177 assert_raises(ValueError, _row_tensor_product, [np.arange(1, 5)]) 

178 assert_raises(ValueError, _row_tensor_product, 

179 [np.arange(1, 5), np.arange(1, 5)]) 

180 assert_raises(ValueError, _row_tensor_product, 

181 [np.arange(1, 13).reshape((3, 4)), 

182 np.arange(1, 13).reshape((4, 3))]) 

183 

184 

185def test__row_tensor_product(): 

186 # Testing cases where main input array should not be modified 

187 dm1 = np.arange(1, 17).reshape((4, 4)) 

188 assert np.array_equal(_row_tensor_product([dm1]), dm1) 

189 ones = np.ones(4).reshape((4, 1)) 

190 tp1 = _row_tensor_product([ones, dm1]) 

191 assert np.array_equal(tp1, dm1) 

192 tp2 = _row_tensor_product([dm1, ones]) 

193 assert np.array_equal(tp2, dm1) 

194 

195 # Testing cases where main input array should be scaled 

196 twos = 2 * ones 

197 tp3 = _row_tensor_product([twos, dm1]) 

198 assert np.array_equal(tp3, 2 * dm1) 

199 tp4 = _row_tensor_product([dm1, twos]) 

200 assert np.array_equal(tp4, 2 * dm1) 

201 

202 # Testing main cases 

203 dm2 = np.array([[1, 2], [1, 2]]) 

204 dm3 = np.arange(1, 7).reshape((2, 3)) 

205 expected_tp5 = np.array([[1, 2, 3, 2, 4, 6], 

206 [4, 5, 6, 8, 10, 12]]) 

207 tp5 = _row_tensor_product([dm2, dm3]) 

208 assert np.array_equal(tp5, expected_tp5) 

209 expected_tp6 = np.array([[1, 2, 2, 4, 3, 6], 

210 [4, 8, 5, 10, 6, 12]]) 

211 tp6 = _row_tensor_product([dm3, dm2]) 

212 assert np.array_equal(tp6, expected_tp6) 

213 

214 

215# Common code 

216 

217 

218def _find_knots_lower_bounds(x, knots): 

219 """Finds knots lower bounds for given values. 

220 

221 Returns an array of indices ``I`` such that 

222 ``0 <= I[i] <= knots.size - 2`` for all ``i`` 

223 and 

224 ``knots[I[i]] < x[i] <= knots[I[i] + 1]`` if 

225 ``np.min(knots) < x[i] <= np.max(knots)``, 

226 ``I[i] = 0`` if ``x[i] <= np.min(knots)`` 

227 ``I[i] = knots.size - 2`` if ``np.max(knots) < x[i]`` 

228  

229 :param x: The 1-d array values whose knots lower bounds are to be found. 

230 :param knots: The 1-d array knots used for cubic spline parametrization, 

231 must be sorted in ascending order. 

232 :return: An array of knots lower bounds indices. 

233 """ 

234 lb = np.searchsorted(knots, x) - 1 

235 

236 # I[i] = 0 for x[i] <= np.min(knots) 

237 lb[lb == -1] = 0 

238 

239 # I[i] = knots.size - 2 for x[i] > np.max(knots) 

240 lb[lb == knots.size - 1] = knots.size - 2 

241 

242 return lb 

243 

244 

245def _compute_base_functions(x, knots): 

246 """Computes base functions used for building cubic splines basis. 

247 

248 .. note:: See 'Generalized Additive Models', Simon N. Wood, 2006, p. 146 

249 and for the special treatment of ``x`` values outside ``knots`` range 

250 see 'mgcv' source code, file 'mgcv.c', function 'crspl()', l.249 

251 

252 :param x: The 1-d array values for which base functions should be computed. 

253 :param knots: The 1-d array knots used for cubic spline parametrization, 

254 must be sorted in ascending order. 

255 :return: 4 arrays corresponding to the 4 base functions ajm, ajp, cjm, cjp 

256 + the 1-d array of knots lower bounds indices corresponding to 

257 the given ``x`` values. 

258 """ 

259 j = _find_knots_lower_bounds(x, knots) 

260 

261 h = knots[1:] - knots[:-1] 

262 hj = h[j] 

263 xj1_x = knots[j + 1] - x 

264 x_xj = x - knots[j] 

265 

266 ajm = xj1_x / hj 

267 ajp = x_xj / hj 

268 

269 cjm_3 = xj1_x * xj1_x * xj1_x / (6. * hj) 

270 cjm_3[x > np.max(knots)] = 0. 

271 cjm_1 = hj * xj1_x / 6. 

272 cjm = cjm_3 - cjm_1 

273 

274 cjp_3 = x_xj * x_xj * x_xj / (6. * hj) 

275 cjp_3[x < np.min(knots)] = 0. 

276 cjp_1 = hj * x_xj / 6. 

277 cjp = cjp_3 - cjp_1 

278 

279 return ajm, ajp, cjm, cjp, j 

280 

281 

282def _absorb_constraints(design_matrix, constraints): 

283 """Absorb model parameters constraints into the design matrix. 

284 

285 :param design_matrix: The (2-d array) initial design matrix. 

286 :param constraints: The 2-d array defining initial model parameters 

287 (``betas``) constraints (``np.dot(constraints, betas) = 0``). 

288 :return: The new design matrix with absorbed parameters constraints. 

289 

290 :raise ImportError: if scipy is not found, used for ``scipy.linalg.qr()`` 

291 which is cleaner than numpy's version requiring a call like 

292 ``qr(..., mode='complete')`` to get a full QR decomposition. 

293 """ 

294 try: 

295 from scipy import linalg 

296 except ImportError: # pragma: no cover 

297 raise ImportError("Cubic spline functionality requires scipy.") 

298 

299 m = constraints.shape[0] 

300 q, r = linalg.qr(np.transpose(constraints)) 

301 

302 return np.dot(design_matrix, q[:, m:]) 

303 

304 

305def _get_free_crs_dmatrix(x, knots, cyclic=False): 

306 """Builds an unconstrained cubic regression spline design matrix. 

307 

308 Returns design matrix with dimensions ``len(x) x n`` 

309 for a cubic regression spline smoother 

310 where  

311 - ``n = len(knots)`` for natural CRS 

312 - ``n = len(knots) - 1`` for cyclic CRS 

313 

314 .. note:: See 'Generalized Additive Models', Simon N. Wood, 2006, p. 145 

315 

316 :param x: The 1-d array values. 

317 :param knots: The 1-d array knots used for cubic spline parametrization, 

318 must be sorted in ascending order. 

319 :param cyclic: Indicates whether used cubic regression splines should 

320 be cyclic or not. Default is ``False``. 

321 :return: The (2-d array) design matrix. 

322 """ 

323 n = knots.size 

324 if cyclic: 

325 x = _map_cyclic(x, min(knots), max(knots)) 

326 n -= 1 

327 

328 ajm, ajp, cjm, cjp, j = _compute_base_functions(x, knots) 

329 

330 j1 = j + 1 

331 if cyclic: 

332 j1[j1 == n] = 0 

333 

334 i = np.identity(n) 

335 

336 if cyclic: 

337 f = _get_cyclic_f(knots) 

338 else: 

339 f = _get_natural_f(knots) 

340 

341 dmt = ajm * i[j, :].T + ajp * i[j1, :].T + \ 

342 cjm * f[j, :].T + cjp * f[j1, :].T 

343 

344 return dmt.T 

345 

346 

347def _get_crs_dmatrix(x, knots, constraints=None, cyclic=False): 

348 """Builds a cubic regression spline design matrix. 

349 

350 Returns design matrix with dimensions len(x) x n 

351 where: 

352 - ``n = len(knots) - nrows(constraints)`` for natural CRS 

353 - ``n = len(knots) - nrows(constraints) - 1`` for cyclic CRS 

354 for a cubic regression spline smoother 

355 

356 :param x: The 1-d array values. 

357 :param knots: The 1-d array knots used for cubic spline parametrization, 

358 must be sorted in ascending order. 

359 :param constraints: The 2-d array defining model parameters (``betas``) 

360 constraints (``np.dot(constraints, betas) = 0``). 

361 :param cyclic: Indicates whether used cubic regression splines should 

362 be cyclic or not. Default is ``False``. 

363 :return: The (2-d array) design matrix. 

364 """ 

365 dm = _get_free_crs_dmatrix(x, knots, cyclic) 

366 if constraints is not None: 

367 dm = _absorb_constraints(dm, constraints) 

368 

369 return dm 

370 

371 

372def _get_te_dmatrix(design_matrices, constraints=None): 

373 """Builds tensor product design matrix, given the marginal design matrices. 

374 

375 :param design_matrices: A sequence of 2-d arrays (marginal design matrices). 

376 :param constraints: The 2-d array defining model parameters (``betas``) 

377 constraints (``np.dot(constraints, betas) = 0``). 

378 :return: The (2-d array) design matrix. 

379 """ 

380 dm = _row_tensor_product(design_matrices) 

381 if constraints is not None: 

382 dm = _absorb_constraints(dm, constraints) 

383 

384 return dm 

385 

386 

387# Stateful Transforms 

388 

389 

390def _get_all_sorted_knots(x, n_inner_knots=None, inner_knots=None, 

391 lower_bound=None, upper_bound=None): 

392 """Gets all knots locations with lower and upper exterior knots included. 

393 

394 If needed, inner knots are computed as equally spaced quantiles of the 

395 input data falling between given lower and upper bounds. 

396 

397 :param x: The 1-d array data values. 

398 :param n_inner_knots: Number of inner knots to compute. 

399 :param inner_knots: Provided inner knots if any. 

400 :param lower_bound: The lower exterior knot location. If unspecified, the 

401 minimum of ``x`` values is used. 

402 :param upper_bound: The upper exterior knot location. If unspecified, the 

403 maximum of ``x`` values is used. 

404 :return: The array of ``n_inner_knots + 2`` distinct knots. 

405 

406 :raise ValueError: for various invalid parameters sets or if unable to 

407 compute ``n_inner_knots + 2`` distinct knots. 

408 """ 

409 if lower_bound is None and x.size == 0: 

410 raise ValueError("Cannot set lower exterior knot location: empty " 

411 "input data and lower_bound not specified.") 

412 elif lower_bound is None and x.size != 0: 

413 lower_bound = np.min(x) 

414 

415 if upper_bound is None and x.size == 0: 

416 raise ValueError("Cannot set upper exterior knot location: empty " 

417 "input data and upper_bound not specified.") 

418 elif upper_bound is None and x.size != 0: 

419 upper_bound = np.max(x) 

420 

421 if upper_bound < lower_bound: 

422 raise ValueError("lower_bound > upper_bound (%r > %r)" 

423 % (lower_bound, upper_bound)) 

424 

425 if inner_knots is None and n_inner_knots is not None: 

426 if n_inner_knots < 0: 

427 raise ValueError("Invalid requested number of inner knots: %r" 

428 % (n_inner_knots,)) 

429 

430 x = x[(lower_bound <= x) & (x <= upper_bound)] 

431 x = np.unique(x) 

432 

433 if x.size != 0: 

434 inner_knots_q = np.linspace(0, 100, n_inner_knots + 2)[1:-1] 

435 # .tolist() is necessary to work around a bug in numpy 1.8 

436 inner_knots = np.asarray(np.percentile(x, inner_knots_q.tolist())) 

437 elif n_inner_knots == 0: 

438 inner_knots = np.array([]) 

439 else: 

440 raise ValueError("No data values between lower_bound(=%r) and " 

441 "upper_bound(=%r): cannot compute requested " 

442 "%r inner knot(s)." 

443 % (lower_bound, upper_bound, n_inner_knots)) 

444 elif inner_knots is not None: 

445 inner_knots = np.unique(inner_knots) 

446 if n_inner_knots is not None and n_inner_knots != inner_knots.size: 

447 raise ValueError("Needed number of inner knots=%r does not match " 

448 "provided number of inner knots=%r." 

449 % (n_inner_knots, inner_knots.size)) 

450 n_inner_knots = inner_knots.size 

451 if np.any(inner_knots < lower_bound): 

452 raise ValueError("Some knot values (%s) fall below lower bound " 

453 "(%r)." 

454 % (inner_knots[inner_knots < lower_bound], 

455 lower_bound)) 

456 if np.any(inner_knots > upper_bound): 

457 raise ValueError("Some knot values (%s) fall above upper bound " 

458 "(%r)." 

459 % (inner_knots[inner_knots > upper_bound], 

460 upper_bound)) 

461 else: 

462 raise ValueError("Must specify either 'n_inner_knots' or 'inner_knots'.") 

463 

464 all_knots = np.concatenate(([lower_bound, upper_bound], inner_knots)) 

465 all_knots = np.unique(all_knots) 

466 if all_knots.size != n_inner_knots + 2: 

467 raise ValueError("Unable to compute n_inner_knots(=%r) + 2 distinct " 

468 "knots: %r data value(s) found between " 

469 "lower_bound(=%r) and upper_bound(=%r)." 

470 % (n_inner_knots, x.size, lower_bound, upper_bound)) 

471 

472 return all_knots 

473 

474 

475def test__get_all_sorted_knots(): 

476 from nose.tools import assert_raises 

477 assert_raises(ValueError, _get_all_sorted_knots, 

478 np.array([]), -1) 

479 assert_raises(ValueError, _get_all_sorted_knots, 

480 np.array([]), 0) 

481 assert_raises(ValueError, _get_all_sorted_knots, 

482 np.array([]), 0, lower_bound=1) 

483 assert_raises(ValueError, _get_all_sorted_knots, 

484 np.array([]), 0, upper_bound=5) 

485 assert_raises(ValueError, _get_all_sorted_knots, 

486 np.array([]), 0, lower_bound=3, upper_bound=1) 

487 assert np.array_equal( 

488 _get_all_sorted_knots(np.array([]), 0, lower_bound=1, upper_bound=5), 

489 [1, 5]) 

490 assert_raises(ValueError, _get_all_sorted_knots, 

491 np.array([]), 0, lower_bound=1, upper_bound=1) 

492 x = np.arange(6) * 2 

493 assert_raises(ValueError, _get_all_sorted_knots, 

494 x, -2) 

495 assert np.array_equal( 

496 _get_all_sorted_knots(x, 0), 

497 [0, 10]) 

498 assert np.array_equal( 

499 _get_all_sorted_knots(x, 0, lower_bound=3, upper_bound=8), 

500 [3, 8]) 

501 assert np.array_equal( 

502 _get_all_sorted_knots(x, 2, lower_bound=1, upper_bound=9), 

503 [1, 4, 6, 9]) 

504 assert_raises(ValueError, _get_all_sorted_knots, 

505 x, 2, lower_bound=1, upper_bound=3) 

506 assert_raises(ValueError, _get_all_sorted_knots, 

507 x, 1, lower_bound=1.3, upper_bound=1.4) 

508 assert np.array_equal( 

509 _get_all_sorted_knots(x, 1, lower_bound=1, upper_bound=3), 

510 [1, 2, 3]) 

511 assert_raises(ValueError, _get_all_sorted_knots, 

512 x, 1, lower_bound=2, upper_bound=3) 

513 assert_raises(ValueError, _get_all_sorted_knots, 

514 x, 1, inner_knots=[2, 3]) 

515 assert_raises(ValueError, _get_all_sorted_knots, 

516 x, lower_bound=2, upper_bound=3) 

517 assert np.array_equal( 

518 _get_all_sorted_knots(x, inner_knots=[3, 7]), 

519 [0, 3, 7, 10]) 

520 assert np.array_equal( 

521 _get_all_sorted_knots(x, inner_knots=[3, 7], lower_bound=2), 

522 [2, 3, 7, 10]) 

523 assert_raises(ValueError, _get_all_sorted_knots, 

524 x, inner_knots=[3, 7], lower_bound=4) 

525 assert_raises(ValueError, _get_all_sorted_knots, 

526 x, inner_knots=[3, 7], upper_bound=6) 

527 

528 

529def _get_centering_constraint_from_dmatrix(design_matrix): 

530 """ Computes the centering constraint from the given design matrix. 

531 

532 We want to ensure that if ``b`` is the array of parameters, our 

533 model is centered, ie ``np.mean(np.dot(design_matrix, b))`` is zero. 

534 We can rewrite this as ``np.dot(c, b)`` being zero with ``c`` a 1-row 

535 constraint matrix containing the mean of each column of ``design_matrix``. 

536 

537 :param design_matrix: The 2-d array design matrix. 

538 :return: A 2-d array (1 x ncols(design_matrix)) defining the 

539 centering constraint. 

540 """ 

541 return design_matrix.mean(axis=0).reshape((1, design_matrix.shape[1])) 

542 

543 

544class CubicRegressionSpline(object): 

545 """Base class for cubic regression spline stateful transforms 

546 

547 This class contains all the functionality for the following stateful 

548 transforms: 

549 - ``cr(x, df=None, knots=None, lower_bound=None, upper_bound=None, constraints=None)`` 

550 for natural cubic regression spline 

551 - ``cc(x, df=None, knots=None, lower_bound=None, upper_bound=None, constraints=None)`` 

552 for cyclic cubic regression spline 

553 """ 

554 common_doc = """ 

555 :arg df: The number of degrees of freedom to use for this spline. The 

556 return value will have this many columns. You must specify at least one 

557 of ``df`` and ``knots``. 

558 :arg knots: The interior knots to use for the spline. If unspecified, then 

559 equally spaced quantiles of the input data are used. You must specify at 

560 least one of ``df`` and ``knots``. 

561 :arg lower_bound: The lower exterior knot location. 

562 :arg upper_bound: The upper exterior knot location. 

563 :arg constraints: Either a 2-d array defining general linear constraints 

564 (that is ``np.dot(constraints, betas)`` is zero, where ``betas`` denotes 

565 the array of *initial* parameters, corresponding to the *initial* 

566 unconstrained design matrix), or the string 

567 ``'center'`` indicating that we should apply a centering constraint 

568 (this constraint will be computed from the input data, remembered and 

569 re-used for prediction from the fitted model). 

570 The constraints are absorbed in the resulting design matrix which means 

571 that the model is actually rewritten in terms of 

572 *unconstrained* parameters. For more details see :ref:`spline-regression`. 

573 

574 This is a stateful transforms (for details see 

575 :ref:`stateful-transforms`). If ``knots``, ``lower_bound``, or 

576 ``upper_bound`` are not specified, they will be calculated from the data 

577 and then the chosen values will be remembered and re-used for prediction 

578 from the fitted model. 

579 

580 Using this function requires scipy be installed. 

581 

582 .. versionadded:: 0.3.0 

583 """ 

584 

585 def __init__(self, name, cyclic): 

586 self._name = name 

587 self._cyclic = cyclic 

588 self._tmp = {} 

589 self._all_knots = None 

590 self._constraints = None 

591 

592 def memorize_chunk(self, x, df=None, knots=None, 

593 lower_bound=None, upper_bound=None, 

594 constraints=None): 

595 args = {"df": df, 

596 "knots": knots, 

597 "lower_bound": lower_bound, 

598 "upper_bound": upper_bound, 

599 "constraints": constraints, 

600 } 

601 self._tmp["args"] = args 

602 

603 x = np.atleast_1d(x) 

604 if x.ndim == 2 and x.shape[1] == 1: 

605 x = x[:, 0] 

606 if x.ndim > 1: 

607 raise ValueError("Input to %r must be 1-d, " 

608 "or a 2-d column vector." 

609 % (self._name,)) 

610 

611 self._tmp.setdefault("xs", []).append(x) 

612 

613 def memorize_finish(self): 

614 args = self._tmp["args"] 

615 xs = self._tmp["xs"] 

616 # Guards against invalid subsequent memorize_chunk() calls. 

617 del self._tmp 

618 

619 x = np.concatenate(xs) 

620 if args["df"] is None and args["knots"] is None: 

621 raise ValueError("Must specify either 'df' or 'knots'.") 

622 

623 constraints = args["constraints"] 

624 n_constraints = 0 

625 if constraints is not None: 

626 if safe_string_eq(constraints, "center"): 

627 # Here we collect only number of constraints, 

628 # actual centering constraint will be computed after all_knots 

629 n_constraints = 1 

630 else: 

631 constraints = np.atleast_2d(constraints) 

632 if constraints.ndim != 2: 

633 raise ValueError("Constraints must be 2-d array or " 

634 "1-d vector.") 

635 n_constraints = constraints.shape[0] 

636 

637 n_inner_knots = None 

638 if args["df"] is not None: 

639 min_df = 1 

640 if not self._cyclic and n_constraints == 0: 

641 min_df = 2 

642 if args["df"] < min_df: 

643 raise ValueError("'df'=%r must be greater than or equal to %r." 

644 % (args["df"], min_df)) 

645 n_inner_knots = args["df"] - 2 + n_constraints 

646 if self._cyclic: 

647 n_inner_knots += 1 

648 self._all_knots = _get_all_sorted_knots(x, 

649 n_inner_knots=n_inner_knots, 

650 inner_knots=args["knots"], 

651 lower_bound=args["lower_bound"], 

652 upper_bound=args["upper_bound"]) 

653 if constraints is not None: 

654 if safe_string_eq(constraints, "center"): 

655 # Now we can compute centering constraints 

656 constraints = _get_centering_constraint_from_dmatrix( 

657 _get_free_crs_dmatrix(x, self._all_knots, cyclic=self._cyclic) 

658 ) 

659 

660 df_before_constraints = self._all_knots.size 

661 if self._cyclic: 

662 df_before_constraints -= 1 

663 if constraints.shape[1] != df_before_constraints: 

664 raise ValueError("Constraints array should have %r columns but" 

665 " %r found." 

666 % (df_before_constraints, constraints.shape[1])) 

667 self._constraints = constraints 

668 

669 def transform(self, x, df=None, knots=None, 

670 lower_bound=None, upper_bound=None, 

671 constraints=None): 

672 x_orig = x 

673 x = np.atleast_1d(x) 

674 if x.ndim == 2 and x.shape[1] == 1: 

675 x = x[:, 0] 

676 if x.ndim > 1: 

677 raise ValueError("Input to %r must be 1-d, " 

678 "or a 2-d column vector." 

679 % (self._name,)) 

680 dm = _get_crs_dmatrix(x, self._all_knots, 

681 self._constraints, cyclic=self._cyclic) 

682 if have_pandas: 

683 if isinstance(x_orig, (pandas.Series, pandas.DataFrame)): 

684 dm = pandas.DataFrame(dm) 

685 dm.index = x_orig.index 

686 return dm 

687 

688 __getstate__ = no_pickling 

689 

690 

691class CR(CubicRegressionSpline): 

692 """cr(x, df=None, knots=None, lower_bound=None, upper_bound=None, constraints=None) 

693 

694 Generates a natural cubic spline basis for ``x`` 

695 (with the option of absorbing centering or more general parameters 

696 constraints), allowing non-linear fits. The usual usage is something like:: 

697 

698 y ~ 1 + cr(x, df=5, constraints='center') 

699 

700 to fit ``y`` as a smooth function of ``x``, with 5 degrees of freedom 

701 given to the smooth, and centering constraint absorbed in 

702 the resulting design matrix. Note that in this example, due to the centering 

703 constraint, 6 knots will get computed from the input data ``x`` 

704 to achieve 5 degrees of freedom. 

705 

706 

707 .. note:: This function reproduce the cubic regression splines 'cr' and 'cs' 

708 as implemented in the R package 'mgcv' (GAM modelling). 

709 

710 """ 

711 

712 # Under python -OO, __doc__ will be defined but set to None 

713 if __doc__: 

714 __doc__ += CubicRegressionSpline.common_doc 

715 

716 def __init__(self): 

717 CubicRegressionSpline.__init__(self, name='cr', cyclic=False) 

718 

719cr = stateful_transform(CR) 

720 

721 

722class CC(CubicRegressionSpline): 

723 """cc(x, df=None, knots=None, lower_bound=None, upper_bound=None, constraints=None) 

724 

725 Generates a cyclic cubic spline basis for ``x`` 

726 (with the option of absorbing centering or more general parameters 

727 constraints), allowing non-linear fits. The usual usage is something like:: 

728 

729 y ~ 1 + cc(x, df=7, constraints='center') 

730 

731 to fit ``y`` as a smooth function of ``x``, with 7 degrees of freedom 

732 given to the smooth, and centering constraint absorbed in 

733 the resulting design matrix. Note that in this example, due to the centering 

734 and cyclic constraints, 9 knots will get computed from the input data ``x`` 

735 to achieve 7 degrees of freedom. 

736 

737 .. note:: This function reproduce the cubic regression splines 'cc' 

738 as implemented in the R package 'mgcv' (GAM modelling). 

739 

740 """ 

741 

742 # Under python -OO, __doc__ will be defined but set to None 

743 if __doc__: 

744 __doc__ += CubicRegressionSpline.common_doc 

745 

746 def __init__(self): 

747 CubicRegressionSpline.__init__(self, name='cc', cyclic=True) 

748 

749cc = stateful_transform(CC) 

750 

751 

752def test_crs_errors(): 

753 from nose.tools import assert_raises 

754 # Invalid 'x' shape 

755 assert_raises(ValueError, cr, np.arange(16).reshape((4, 4)), df=4) 

756 assert_raises(ValueError, CR().transform, 

757 np.arange(16).reshape((4, 4)), df=4) 

758 # Should provide at least 'df' or 'knots' 

759 assert_raises(ValueError, cr, np.arange(50)) 

760 # Invalid constraints shape 

761 assert_raises(ValueError, cr, np.arange(50), df=4, 

762 constraints=np.arange(27).reshape((3, 3, 3))) 

763 # Invalid nb of columns in constraints 

764 # (should have df + 1 = 5, but 6 provided) 

765 assert_raises(ValueError, cr, np.arange(50), df=4, 

766 constraints=np.arange(6)) 

767 # Too small 'df' for natural cubic spline 

768 assert_raises(ValueError, cr, np.arange(50), df=1) 

769 # Too small 'df' for cyclic cubic spline 

770 assert_raises(ValueError, cc, np.arange(50), df=0) 

771 

772 

773def test_crs_compat(): 

774 from patsy.test_state import check_stateful 

775 from patsy.test_splines_crs_data import (R_crs_test_x, 

776 R_crs_test_data, 

777 R_crs_num_tests) 

778 lines = R_crs_test_data.split("\n") 

779 tests_ran = 0 

780 start_idx = lines.index("--BEGIN TEST CASE--") 

781 while True: 

782 if not lines[start_idx] == "--BEGIN TEST CASE--": 

783 break 

784 start_idx += 1 

785 stop_idx = lines.index("--END TEST CASE--", start_idx) 

786 block = lines[start_idx:stop_idx] 

787 test_data = {} 

788 for line in block: 

789 key, value = line.split("=", 1) 

790 test_data[key] = value 

791 # Translate the R output into Python calling conventions 

792 adjust_df = 0 

793 if test_data["spline_type"] == "cr" or test_data["spline_type"] == "cs": 

794 spline_type = CR 

795 elif test_data["spline_type"] == "cc": 

796 spline_type = CC 

797 adjust_df += 1 

798 else: 

799 raise ValueError("Unrecognized spline type %r" 

800 % (test_data["spline_type"],)) 

801 kwargs = {} 

802 if test_data["absorb_cons"] == "TRUE": 

803 kwargs["constraints"] = "center" 

804 adjust_df += 1 

805 if test_data["knots"] != "None": 

806 all_knots = np.asarray(eval(test_data["knots"])) 

807 all_knots.sort() 

808 kwargs["knots"] = all_knots[1:-1] 

809 kwargs["lower_bound"] = all_knots[0] 

810 kwargs["upper_bound"] = all_knots[-1] 

811 else: 

812 kwargs["df"] = eval(test_data["nb_knots"]) - adjust_df 

813 output = np.asarray(eval(test_data["output"])) 

814 # Do the actual test 

815 check_stateful(spline_type, False, R_crs_test_x, output, **kwargs) 

816 tests_ran += 1 

817 # Set up for the next one 

818 start_idx = stop_idx + 1 

819 assert tests_ran == R_crs_num_tests 

820 

821test_crs_compat.slow = True 

822 

823def test_crs_with_specific_constraint(): 

824 from patsy.highlevel import incr_dbuilder, build_design_matrices, dmatrix 

825 x = (-1.5)**np.arange(20) 

826 # Hard coded R values for smooth: s(x, bs="cr", k=5) 

827 # R> knots <- smooth$xp 

828 knots_R = np.array([-2216.837820053100585937, 

829 -50.456909179687500000, 

830 -0.250000000000000000, 

831 33.637939453125000000, 

832 1477.891880035400390625]) 

833 # R> centering.constraint <- t(qr.X(attr(smooth, "qrc"))) 

834 centering_constraint_R = np.array([[0.064910676323168478574, 

835 1.4519875239407085132, 

836 -2.1947446912471946234, 

837 1.6129783104357671153, 

838 0.064868180547550072235]]) 

839 # values for which we want a prediction 

840 new_x = np.array([-3000., -200., 300., 2000.]) 

841 result1 = dmatrix("cr(new_x, knots=knots_R[1:-1], " 

842 "lower_bound=knots_R[0], upper_bound=knots_R[-1], " 

843 "constraints=centering_constraint_R)") 

844 

845 data_chunked = [{"x": x[:10]}, {"x": x[10:]}] 

846 new_data = {"x": new_x} 

847 builder = incr_dbuilder("cr(x, df=4, constraints='center')", 

848 lambda: iter(data_chunked)) 

849 result2 = build_design_matrices([builder], new_data)[0] 

850 

851 assert np.allclose(result1, result2, rtol=1e-12, atol=0.) 

852 

853 

854class TE(object): 

855 """te(s1, .., sn, constraints=None) 

856 

857 Generates smooth of several covariates as a tensor product of the bases 

858 of marginal univariate smooths ``s1, .., sn``. The marginal smooths are 

859 required to transform input univariate data into some kind of smooth 

860 functions basis producing a 2-d array output with the ``(i, j)`` element 

861 corresponding to the value of the ``j`` th basis function at the ``i`` th 

862 data point. 

863 The resulting basis dimension is the product of the basis dimensions of 

864 the marginal smooths. The usual usage is something like:: 

865 

866 y ~ 1 + te(cr(x1, df=5), cc(x2, df=6), constraints='center') 

867 

868 to fit ``y`` as a smooth function of both ``x1`` and ``x2``, with a natural 

869 cubic spline for ``x1`` marginal smooth and a cyclic cubic spline for 

870 ``x2`` (and centering constraint absorbed in the resulting design matrix). 

871 

872 :arg constraints: Either a 2-d array defining general linear constraints 

873 (that is ``np.dot(constraints, betas)`` is zero, where ``betas`` denotes 

874 the array of *initial* parameters, corresponding to the *initial* 

875 unconstrained design matrix), or the string 

876 ``'center'`` indicating that we should apply a centering constraint 

877 (this constraint will be computed from the input data, remembered and 

878 re-used for prediction from the fitted model). 

879 The constraints are absorbed in the resulting design matrix which means 

880 that the model is actually rewritten in terms of 

881 *unconstrained* parameters. For more details see :ref:`spline-regression`. 

882 

883 Using this function requires scipy be installed. 

884 

885 .. note:: This function reproduce the tensor product smooth 'te' as 

886 implemented in the R package 'mgcv' (GAM modelling). 

887 See also 'Generalized Additive Models', Simon N. Wood, 2006, pp 158-163 

888 

889 .. versionadded:: 0.3.0 

890 """ 

891 def __init__(self): 

892 self._tmp = {} 

893 self._constraints = None 

894 

895 def memorize_chunk(self, *args, **kwargs): 

896 constraints = self._tmp.setdefault("constraints", 

897 kwargs.get("constraints")) 

898 if safe_string_eq(constraints, "center"): 

899 args_2d = [] 

900 for arg in args: 

901 arg = atleast_2d_column_default(arg) 

902 if arg.ndim != 2: 

903 raise ValueError("Each tensor product argument must be " 

904 "a 2-d array or 1-d vector.") 

905 args_2d.append(arg) 

906 

907 tp = _row_tensor_product(args_2d) 

908 self._tmp.setdefault("count", 0) 

909 self._tmp["count"] += tp.shape[0] 

910 

911 chunk_sum = np.atleast_2d(tp.sum(axis=0)) 

912 self._tmp.setdefault("sum", np.zeros(chunk_sum.shape)) 

913 self._tmp["sum"] += chunk_sum 

914 

915 def memorize_finish(self): 

916 tmp = self._tmp 

917 constraints = self._tmp["constraints"] 

918 # Guards against invalid subsequent memorize_chunk() calls. 

919 del self._tmp 

920 

921 if constraints is not None: 

922 if safe_string_eq(constraints, "center"): 

923 constraints = np.atleast_2d(tmp["sum"] / tmp["count"]) 

924 else: 

925 constraints = np.atleast_2d(constraints) 

926 if constraints.ndim != 2: 

927 raise ValueError("Constraints must be 2-d array or " 

928 "1-d vector.") 

929 

930 self._constraints = constraints 

931 

932 def transform(self, *args, **kwargs): 

933 args_2d = [] 

934 for arg in args: 

935 arg = atleast_2d_column_default(arg) 

936 if arg.ndim != 2: 

937 raise ValueError("Each tensor product argument must be " 

938 "a 2-d array or 1-d vector.") 

939 args_2d.append(arg) 

940 

941 return _get_te_dmatrix(args_2d, self._constraints) 

942 

943 __getstate__ = no_pickling 

944 

945te = stateful_transform(TE) 

946 

947 

948def test_te_errors(): 

949 from nose.tools import assert_raises 

950 x = np.arange(27) 

951 # Invalid input shape 

952 assert_raises(ValueError, te, x.reshape((3, 3, 3))) 

953 assert_raises(ValueError, te, x.reshape((3, 3, 3)), constraints='center') 

954 # Invalid constraints shape 

955 assert_raises(ValueError, te, x, 

956 constraints=np.arange(8).reshape((2, 2, 2))) 

957 

958 

959def test_te_1smooth(): 

960 from patsy.splines import bs 

961 # Tensor product of 1 smooth covariate should be the same 

962 # as the smooth alone 

963 x = (-1.5)**np.arange(20) 

964 assert np.allclose(cr(x, df=6), te(cr(x, df=6))) 

965 assert np.allclose(cc(x, df=5), te(cc(x, df=5))) 

966 assert np.allclose(bs(x, df=4), te(bs(x, df=4))) 

967 # Adding centering constraint to tensor product 

968 assert np.allclose(cr(x, df=3, constraints='center'), 

969 te(cr(x, df=4), constraints='center')) 

970 # Adding specific constraint 

971 center_constraint = np.arange(1, 5) 

972 assert np.allclose(cr(x, df=3, constraints=center_constraint), 

973 te(cr(x, df=4), constraints=center_constraint)) 

974 

975 

976def test_te_2smooths(): 

977 from patsy.highlevel import incr_dbuilder, build_design_matrices 

978 x1 = (-1.5)**np.arange(20) 

979 x2 = (1.6)**np.arange(20) 

980 # Hard coded R results for smooth: te(x1, x2, bs=c("cs", "cc"), k=c(5,7)) 

981 # Without centering constraint: 

982 dmatrix_R_nocons = \ 

983 np.array([[-4.4303024184609255207e-06, 7.9884438387230142235e-06, 

984 9.7987758194797719025e-06, -7.2894213245475212959e-08, 

985 1.5907686862964493897e-09, -3.2565884983072595159e-11, 

986 0.0170749607855874667439, -3.0788499835965849050e-02, 

987 -3.7765754357352458725e-02, 2.8094376299826799787e-04, 

988 -6.1310290747349201414e-06, 1.2551314933193442915e-07, 

989 -0.26012671685838206770, 4.6904420337437874311e-01, 

990 0.5753384627946153129230, -4.2800085814700449330e-03, 

991 9.3402525733484874533e-05, -1.9121170389937518131e-06, 

992 -0.0904312240489447832781, 1.6305991924427923334e-01, 

993 2.0001237112941641638e-01, -1.4879148887003382663e-03, 

994 3.2470731316462736135e-05, -6.6473404365914134499e-07, 

995 2.0447857920168824846e-05, -3.6870296695050991799e-05, 

996 -4.5225801045409022233e-05, 3.3643990293641665710e-07, 

997 -7.3421200200015877329e-09, 1.5030635073660743297e-10], 

998 [-9.4006130602653794302e-04, 7.8681398069163730347e-04, 

999 2.4573006857381437217e-04, -1.4524712230452725106e-04, 

1000 7.8216741353106329551e-05, -3.1304283003914264551e-04, 

1001 3.6231183382798337611064, -3.0324832476174168328e+00, 

1002 -9.4707559178211142559e-01, 5.5980126937492580286e-01, 

1003 -3.0145747744342332730e-01, 1.2065077148806895302e+00, 

1004 -35.17561267504181188315, 2.9441339255948005160e+01, 

1005 9.1948319320782125885216, -5.4349184288245195873e+00, 

1006 2.9267472035096449012e+00, -1.1713569391233907169e+01, 

1007 34.0275626863976370373166, -2.8480442582712722555e+01, 

1008 -8.8947340548151565542e+00, 5.2575353623762932642e+00, 

1009 -2.8312249982592527786e+00, 1.1331265795534763541e+01, 

1010 7.9462158845078978420e-01, -6.6508361863670617531e-01, 

1011 -2.0771242914526857892e-01, 1.2277550230353953542e-01, 

1012 -6.6115593588420035198e-02, 2.6461103043402139923e-01]]) 

1013 # With centering constraint: 

1014 dmatrix_R_cons = \ 

1015 np.array([[0.00329998606323867252343, 1.6537431155796576600e-04, 

1016 -1.2392262709790753433e-04, 6.5405304166706783407e-05, 

1017 -6.6764045799537624095e-05, -0.1386431081763726258504, 

1018 0.124297283800864313830, -3.5487293655619825405e-02, 

1019 -3.0527115315785902268e-03, 5.2009247643311604277e-04, 

1020 -0.00384203992301702674378, -0.058901915802819435064, 

1021 0.266422358491648914036, 0.5739281693874087597607, 

1022 -1.3171008503525844392e-03, 8.2573456631878912413e-04, 

1023 6.6730833453016958831e-03, -0.1467677784718444955470, 

1024 0.220757650934837484913, 0.1983127687880171796664, 

1025 -1.6269930328365173316e-03, -1.7785892412241208812e-03, 

1026 -3.2702835436351201243e-03, -4.3252183044300757109e-02, 

1027 4.3403766976235179376e-02, 3.5973406402893762387e-05, 

1028 -5.4035858568225075046e-04, 2.9565209382794241247e-04, 

1029 -2.2769990750264097637e-04], 

1030 [0.41547954838956052681098, 1.9843570584107707994e-02, 

1031 -1.5746590234791378593e-02, 8.3171184312221431434e-03, 

1032 -8.7233014052017516377e-03, -15.9926770785086258541696, 

1033 16.503663226274017716833, -6.6005803955894726265e-01, 

1034 1.3986092022708346283e-01, -2.3516913533670955050e-01, 

1035 0.72251037497207359905360, -9.827337059999853963177, 

1036 3.917078117294827688255, 9.0171773596973618936090, 

1037 -5.0616811270787671617e+00, 3.0189990249009683865e+00, 

1038 -1.0872720629943064097e+01, 26.9308504460453121964747, 

1039 -21.212262927009287949431, -9.1088328555582247503253, 

1040 5.2400156972500298025e+00, -3.0593641098325474736e+00, 

1041 1.0919392118399086300e+01, -4.6564290223265718538e+00, 

1042 4.8071307441606982991e+00, -1.9748377005689798924e-01, 

1043 5.4664183716965096538e-02, -2.8871392916916285148e-02, 

1044 2.3592766838010845176e-01]]) 

1045 new_x1 = np.array([11.390625, 656.84083557128906250]) 

1046 new_x2 = np.array([16.777216000000006346, 1844.6744073709567147]) 

1047 new_data = {"x1": new_x1, "x2": new_x2} 

1048 data_chunked = [{"x1": x1[:10], "x2": x2[:10]}, 

1049 {"x1": x1[10:], "x2": x2[10:]}] 

1050 

1051 builder = incr_dbuilder("te(cr(x1, df=5), cc(x2, df=6)) - 1", 

1052 lambda: iter(data_chunked)) 

1053 dmatrix_nocons = build_design_matrices([builder], new_data)[0] 

1054 assert np.allclose(dmatrix_nocons, dmatrix_R_nocons, rtol=1e-12, atol=0.) 

1055 

1056 builder = incr_dbuilder("te(cr(x1, df=5), cc(x2, df=6), " 

1057 "constraints='center') - 1", 

1058 lambda: iter(data_chunked)) 

1059 dmatrix_cons = build_design_matrices([builder], new_data)[0] 

1060 assert np.allclose(dmatrix_cons, dmatrix_R_cons, rtol=1e-12, atol=0.) 

1061 

1062 

1063def test_te_3smooths(): 

1064 from patsy.highlevel import incr_dbuilder, build_design_matrices 

1065 x1 = (-1.5)**np.arange(20) 

1066 x2 = (1.6)**np.arange(20) 

1067 x3 = (-1.2)**np.arange(20) 

1068 # Hard coded R results for smooth: te(x1, x2, x3, bs=c("cr", "cs", "cc"), k=c(3,3,4)) 

1069 design_matrix_R = \ 

1070 np.array([[7.2077663709837084334e-05, 2.0648333344343273131e-03, 

1071 -4.7934014082310591768e-04, 2.3923430783992746568e-04, 

1072 6.8534265421922660466e-03, -1.5909867344112936776e-03, 

1073 -6.8057712777151204314e-09, -1.9496724335203412851e-07, 

1074 4.5260614658693259131e-08, 0.0101479754187435277507, 

1075 0.290712501531622591333, -0.067487370093906928759, 

1076 0.03368233306025386619709, 0.9649092451763204847381, 

1077 -0.2239985793289433757547, -9.5819975394704535133e-07, 

1078 -2.7449874082511405643e-05, 6.3723431275833230217e-06, 

1079 -1.5205851762850489204e-04, -0.00435607204539782688624, 

1080 0.00101123909269346416370, -5.0470024059694933508e-04, 

1081 -1.4458319360584082416e-02, 3.3564223914790921634e-03, 

1082 1.4357783514933466209e-08, 4.1131230514870551983e-07, 

1083 -9.5483976834512651038e-08]]) 

1084 new_data = {"x1": -38.443359375000000000, 

1085 "x2": 68.719476736000032702, 

1086 "x3": -5.1597803519999985156} 

1087 data_chunked = [{"x1": x1[:10], "x2": x2[:10], "x3": x3[:10]}, 

1088 {"x1": x1[10:], "x2": x2[10:], "x3": x3[10:]}] 

1089 builder = incr_dbuilder("te(cr(x1, df=3), cr(x2, df=3), cc(x3, df=3)) - 1", 

1090 lambda: iter(data_chunked)) 

1091 design_matrix = build_design_matrices([builder], new_data)[0] 

1092 assert np.allclose(design_matrix, design_matrix_R, rtol=1e-12, atol=0.)