Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import builtins 

2import numpy as np 

3from numpy.testing import suppress_warnings 

4from operator import index 

5from collections import namedtuple 

6 

7__all__ = ['binned_statistic', 

8 'binned_statistic_2d', 

9 'binned_statistic_dd'] 

10 

11 

12BinnedStatisticResult = namedtuple('BinnedStatisticResult', 

13 ('statistic', 'bin_edges', 'binnumber')) 

14 

15 

16def binned_statistic(x, values, statistic='mean', 

17 bins=10, range=None): 

18 """ 

19 Compute a binned statistic for one or more sets of data. 

20 

21 This is a generalization of a histogram function. A histogram divides 

22 the space into bins, and returns the count of the number of points in 

23 each bin. This function allows the computation of the sum, mean, median, 

24 or other statistic of the values (or set of values) within each bin. 

25 

26 Parameters 

27 ---------- 

28 x : (N,) array_like 

29 A sequence of values to be binned. 

30 values : (N,) array_like or list of (N,) array_like 

31 The data on which the statistic will be computed. This must be 

32 the same shape as `x`, or a set of sequences - each the same shape as 

33 `x`. If `values` is a set of sequences, the statistic will be computed 

34 on each independently. 

35 statistic : string or callable, optional 

36 The statistic to compute (default is 'mean'). 

37 The following statistics are available: 

38 

39 * 'mean' : compute the mean of values for points within each bin. 

40 Empty bins will be represented by NaN. 

41 * 'std' : compute the standard deviation within each bin. This 

42 is implicitly calculated with ddof=0. 

43 * 'median' : compute the median of values for points within each 

44 bin. Empty bins will be represented by NaN. 

45 * 'count' : compute the count of points within each bin. This is 

46 identical to an unweighted histogram. `values` array is not 

47 referenced. 

48 * 'sum' : compute the sum of values for points within each bin. 

49 This is identical to a weighted histogram. 

50 * 'min' : compute the minimum of values for points within each bin. 

51 Empty bins will be represented by NaN. 

52 * 'max' : compute the maximum of values for point within each bin. 

53 Empty bins will be represented by NaN. 

54 * function : a user-defined function which takes a 1D array of 

55 values, and outputs a single numerical statistic. This function 

56 will be called on the values in each bin. Empty bins will be 

57 represented by function([]), or NaN if this returns an error. 

58 

59 bins : int or sequence of scalars, optional 

60 If `bins` is an int, it defines the number of equal-width bins in the 

61 given range (10 by default). If `bins` is a sequence, it defines the 

62 bin edges, including the rightmost edge, allowing for non-uniform bin 

63 widths. Values in `x` that are smaller than lowest bin edge are 

64 assigned to bin number 0, values beyond the highest bin are assigned to 

65 ``bins[-1]``. If the bin edges are specified, the number of bins will 

66 be, (nx = len(bins)-1). 

67 range : (float, float) or [(float, float)], optional 

68 The lower and upper range of the bins. If not provided, range 

69 is simply ``(x.min(), x.max())``. Values outside the range are 

70 ignored. 

71 

72 Returns 

73 ------- 

74 statistic : array 

75 The values of the selected statistic in each bin. 

76 bin_edges : array of dtype float 

77 Return the bin edges ``(length(statistic)+1)``. 

78 binnumber: 1-D ndarray of ints 

79 Indices of the bins (corresponding to `bin_edges`) in which each value 

80 of `x` belongs. Same length as `values`. A binnumber of `i` means the 

81 corresponding value is between (bin_edges[i-1], bin_edges[i]). 

82 

83 See Also 

84 -------- 

85 numpy.digitize, numpy.histogram, binned_statistic_2d, binned_statistic_dd 

86 

87 Notes 

88 ----- 

89 All but the last (righthand-most) bin is half-open. In other words, if 

90 `bins` is ``[1, 2, 3, 4]``, then the first bin is ``[1, 2)`` (including 1, 

91 but excluding 2) and the second ``[2, 3)``. The last bin, however, is 

92 ``[3, 4]``, which *includes* 4. 

93 

94 .. versionadded:: 0.11.0 

95 

96 Examples 

97 -------- 

98 >>> from scipy import stats 

99 >>> import matplotlib.pyplot as plt 

100 

101 First some basic examples: 

102 

103 Create two evenly spaced bins in the range of the given sample, and sum the 

104 corresponding values in each of those bins: 

105 

106 >>> values = [1.0, 1.0, 2.0, 1.5, 3.0] 

107 >>> stats.binned_statistic([1, 1, 2, 5, 7], values, 'sum', bins=2) 

108 BinnedStatisticResult(statistic=array([4. , 4.5]), 

109 bin_edges=array([1., 4., 7.]), binnumber=array([1, 1, 1, 2, 2])) 

110 

111 Multiple arrays of values can also be passed. The statistic is calculated 

112 on each set independently: 

113 

114 >>> values = [[1.0, 1.0, 2.0, 1.5, 3.0], [2.0, 2.0, 4.0, 3.0, 6.0]] 

115 >>> stats.binned_statistic([1, 1, 2, 5, 7], values, 'sum', bins=2) 

116 BinnedStatisticResult(statistic=array([[4. , 4.5], 

117 [8. , 9. ]]), bin_edges=array([1., 4., 7.]), 

118 binnumber=array([1, 1, 1, 2, 2])) 

119 

120 >>> stats.binned_statistic([1, 2, 1, 2, 4], np.arange(5), statistic='mean', 

121 ... bins=3) 

122 BinnedStatisticResult(statistic=array([1., 2., 4.]), 

123 bin_edges=array([1., 2., 3., 4.]), 

124 binnumber=array([1, 2, 1, 2, 3])) 

125 

126 As a second example, we now generate some random data of sailing boat speed 

127 as a function of wind speed, and then determine how fast our boat is for 

128 certain wind speeds: 

129 

130 >>> windspeed = 8 * np.random.rand(500) 

131 >>> boatspeed = .3 * windspeed**.5 + .2 * np.random.rand(500) 

132 >>> bin_means, bin_edges, binnumber = stats.binned_statistic(windspeed, 

133 ... boatspeed, statistic='median', bins=[1,2,3,4,5,6,7]) 

134 >>> plt.figure() 

135 >>> plt.plot(windspeed, boatspeed, 'b.', label='raw data') 

136 >>> plt.hlines(bin_means, bin_edges[:-1], bin_edges[1:], colors='g', lw=5, 

137 ... label='binned statistic of data') 

138 >>> plt.legend() 

139 

140 Now we can use ``binnumber`` to select all datapoints with a windspeed 

141 below 1: 

142 

143 >>> low_boatspeed = boatspeed[binnumber == 0] 

144 

145 As a final example, we will use ``bin_edges`` and ``binnumber`` to make a 

146 plot of a distribution that shows the mean and distribution around that 

147 mean per bin, on top of a regular histogram and the probability 

148 distribution function: 

149 

150 >>> x = np.linspace(0, 5, num=500) 

151 >>> x_pdf = stats.maxwell.pdf(x) 

152 >>> samples = stats.maxwell.rvs(size=10000) 

153 

154 >>> bin_means, bin_edges, binnumber = stats.binned_statistic(x, x_pdf, 

155 ... statistic='mean', bins=25) 

156 >>> bin_width = (bin_edges[1] - bin_edges[0]) 

157 >>> bin_centers = bin_edges[1:] - bin_width/2 

158 

159 >>> plt.figure() 

160 >>> plt.hist(samples, bins=50, density=True, histtype='stepfilled', 

161 ... alpha=0.2, label='histogram of data') 

162 >>> plt.plot(x, x_pdf, 'r-', label='analytical pdf') 

163 >>> plt.hlines(bin_means, bin_edges[:-1], bin_edges[1:], colors='g', lw=2, 

164 ... label='binned statistic of data') 

165 >>> plt.plot((binnumber - 0.5) * bin_width, x_pdf, 'g.', alpha=0.5) 

166 >>> plt.legend(fontsize=10) 

167 >>> plt.show() 

168 

169 """ 

170 try: 

171 N = len(bins) 

172 except TypeError: 

173 N = 1 

174 

175 if N != 1: 

176 bins = [np.asarray(bins, float)] 

177 

178 if range is not None: 

179 if len(range) == 2: 

180 range = [range] 

181 

182 medians, edges, binnumbers = binned_statistic_dd( 

183 [x], values, statistic, bins, range) 

184 

185 return BinnedStatisticResult(medians, edges[0], binnumbers) 

186 

187 

188BinnedStatistic2dResult = namedtuple('BinnedStatistic2dResult', 

189 ('statistic', 'x_edge', 'y_edge', 

190 'binnumber')) 

191 

192 

193def binned_statistic_2d(x, y, values, statistic='mean', 

194 bins=10, range=None, expand_binnumbers=False): 

195 """ 

196 Compute a bidimensional binned statistic for one or more sets of data. 

197 

198 This is a generalization of a histogram2d function. A histogram divides 

199 the space into bins, and returns the count of the number of points in 

200 each bin. This function allows the computation of the sum, mean, median, 

201 or other statistic of the values (or set of values) within each bin. 

202 

203 Parameters 

204 ---------- 

205 x : (N,) array_like 

206 A sequence of values to be binned along the first dimension. 

207 y : (N,) array_like 

208 A sequence of values to be binned along the second dimension. 

209 values : (N,) array_like or list of (N,) array_like 

210 The data on which the statistic will be computed. This must be 

211 the same shape as `x`, or a list of sequences - each with the same 

212 shape as `x`. If `values` is such a list, the statistic will be 

213 computed on each independently. 

214 statistic : string or callable, optional 

215 The statistic to compute (default is 'mean'). 

216 The following statistics are available: 

217 

218 * 'mean' : compute the mean of values for points within each bin. 

219 Empty bins will be represented by NaN. 

220 * 'std' : compute the standard deviation within each bin. This 

221 is implicitly calculated with ddof=0. 

222 * 'median' : compute the median of values for points within each 

223 bin. Empty bins will be represented by NaN. 

224 * 'count' : compute the count of points within each bin. This is 

225 identical to an unweighted histogram. `values` array is not 

226 referenced. 

227 * 'sum' : compute the sum of values for points within each bin. 

228 This is identical to a weighted histogram. 

229 * 'min' : compute the minimum of values for points within each bin. 

230 Empty bins will be represented by NaN. 

231 * 'max' : compute the maximum of values for point within each bin. 

232 Empty bins will be represented by NaN. 

233 * function : a user-defined function which takes a 1D array of 

234 values, and outputs a single numerical statistic. This function 

235 will be called on the values in each bin. Empty bins will be 

236 represented by function([]), or NaN if this returns an error. 

237 

238 bins : int or [int, int] or array_like or [array, array], optional 

239 The bin specification: 

240 

241 * the number of bins for the two dimensions (nx = ny = bins), 

242 * the number of bins in each dimension (nx, ny = bins), 

243 * the bin edges for the two dimensions (x_edge = y_edge = bins), 

244 * the bin edges in each dimension (x_edge, y_edge = bins). 

245 

246 If the bin edges are specified, the number of bins will be, 

247 (nx = len(x_edge)-1, ny = len(y_edge)-1). 

248 

249 range : (2,2) array_like, optional 

250 The leftmost and rightmost edges of the bins along each dimension 

251 (if not specified explicitly in the `bins` parameters): 

252 [[xmin, xmax], [ymin, ymax]]. All values outside of this range will be 

253 considered outliers and not tallied in the histogram. 

254 expand_binnumbers : bool, optional 

255 'False' (default): the returned `binnumber` is a shape (N,) array of 

256 linearized bin indices. 

257 'True': the returned `binnumber` is 'unraveled' into a shape (2,N) 

258 ndarray, where each row gives the bin numbers in the corresponding 

259 dimension. 

260 See the `binnumber` returned value, and the `Examples` section. 

261 

262 .. versionadded:: 0.17.0 

263 

264 Returns 

265 ------- 

266 statistic : (nx, ny) ndarray 

267 The values of the selected statistic in each two-dimensional bin. 

268 x_edge : (nx + 1) ndarray 

269 The bin edges along the first dimension. 

270 y_edge : (ny + 1) ndarray 

271 The bin edges along the second dimension. 

272 binnumber : (N,) array of ints or (2,N) ndarray of ints 

273 This assigns to each element of `sample` an integer that represents the 

274 bin in which this observation falls. The representation depends on the 

275 `expand_binnumbers` argument. See `Notes` for details. 

276 

277 

278 See Also 

279 -------- 

280 numpy.digitize, numpy.histogram2d, binned_statistic, binned_statistic_dd 

281 

282 Notes 

283 ----- 

284 Binedges: 

285 All but the last (righthand-most) bin is half-open. In other words, if 

286 `bins` is ``[1, 2, 3, 4]``, then the first bin is ``[1, 2)`` (including 1, 

287 but excluding 2) and the second ``[2, 3)``. The last bin, however, is 

288 ``[3, 4]``, which *includes* 4. 

289 

290 `binnumber`: 

291 This returned argument assigns to each element of `sample` an integer that 

292 represents the bin in which it belongs. The representation depends on the 

293 `expand_binnumbers` argument. If 'False' (default): The returned 

294 `binnumber` is a shape (N,) array of linearized indices mapping each 

295 element of `sample` to its corresponding bin (using row-major ordering). 

296 If 'True': The returned `binnumber` is a shape (2,N) ndarray where 

297 each row indicates bin placements for each dimension respectively. In each 

298 dimension, a binnumber of `i` means the corresponding value is between 

299 (D_edge[i-1], D_edge[i]), where 'D' is either 'x' or 'y'. 

300 

301 .. versionadded:: 0.11.0 

302 

303 Examples 

304 -------- 

305 >>> from scipy import stats 

306 

307 Calculate the counts with explicit bin-edges: 

308 

309 >>> x = [0.1, 0.1, 0.1, 0.6] 

310 >>> y = [2.1, 2.6, 2.1, 2.1] 

311 >>> binx = [0.0, 0.5, 1.0] 

312 >>> biny = [2.0, 2.5, 3.0] 

313 >>> ret = stats.binned_statistic_2d(x, y, None, 'count', bins=[binx, biny]) 

314 >>> ret.statistic 

315 array([[2., 1.], 

316 [1., 0.]]) 

317 

318 The bin in which each sample is placed is given by the `binnumber` 

319 returned parameter. By default, these are the linearized bin indices: 

320 

321 >>> ret.binnumber 

322 array([5, 6, 5, 9]) 

323 

324 The bin indices can also be expanded into separate entries for each 

325 dimension using the `expand_binnumbers` parameter: 

326 

327 >>> ret = stats.binned_statistic_2d(x, y, None, 'count', bins=[binx, biny], 

328 ... expand_binnumbers=True) 

329 >>> ret.binnumber 

330 array([[1, 1, 1, 2], 

331 [1, 2, 1, 1]]) 

332 

333 Which shows that the first three elements belong in the xbin 1, and the 

334 fourth into xbin 2; and so on for y. 

335 

336 """ 

337 

338 # This code is based on np.histogram2d 

339 try: 

340 N = len(bins) 

341 except TypeError: 

342 N = 1 

343 

344 if N != 1 and N != 2: 

345 xedges = yedges = np.asarray(bins, float) 

346 bins = [xedges, yedges] 

347 

348 medians, edges, binnumbers = binned_statistic_dd( 

349 [x, y], values, statistic, bins, range, 

350 expand_binnumbers=expand_binnumbers) 

351 

352 return BinnedStatistic2dResult(medians, edges[0], edges[1], binnumbers) 

353 

354 

355BinnedStatisticddResult = namedtuple('BinnedStatisticddResult', 

356 ('statistic', 'bin_edges', 

357 'binnumber')) 

358 

359 

360def binned_statistic_dd(sample, values, statistic='mean', 

361 bins=10, range=None, expand_binnumbers=False, 

362 binned_statistic_result=None): 

363 """ 

364 Compute a multidimensional binned statistic for a set of data. 

365 

366 This is a generalization of a histogramdd function. A histogram divides 

367 the space into bins, and returns the count of the number of points in 

368 each bin. This function allows the computation of the sum, mean, median, 

369 or other statistic of the values within each bin. 

370 

371 Parameters 

372 ---------- 

373 sample : array_like 

374 Data to histogram passed as a sequence of N arrays of length D, or 

375 as an (N,D) array. 

376 values : (N,) array_like or list of (N,) array_like 

377 The data on which the statistic will be computed. This must be 

378 the same shape as `sample`, or a list of sequences - each with the 

379 same shape as `sample`. If `values` is such a list, the statistic 

380 will be computed on each independently. 

381 statistic : string or callable, optional 

382 The statistic to compute (default is 'mean'). 

383 The following statistics are available: 

384 

385 * 'mean' : compute the mean of values for points within each bin. 

386 Empty bins will be represented by NaN. 

387 * 'median' : compute the median of values for points within each 

388 bin. Empty bins will be represented by NaN. 

389 * 'count' : compute the count of points within each bin. This is 

390 identical to an unweighted histogram. `values` array is not 

391 referenced. 

392 * 'sum' : compute the sum of values for points within each bin. 

393 This is identical to a weighted histogram. 

394 * 'std' : compute the standard deviation within each bin. This 

395 is implicitly calculated with ddof=0. If the number of values 

396 within a given bin is 0 or 1, the computed standard deviation value 

397 will be 0 for the bin. 

398 * 'min' : compute the minimum of values for points within each bin. 

399 Empty bins will be represented by NaN. 

400 * 'max' : compute the maximum of values for point within each bin. 

401 Empty bins will be represented by NaN. 

402 * function : a user-defined function which takes a 1D array of 

403 values, and outputs a single numerical statistic. This function 

404 will be called on the values in each bin. Empty bins will be 

405 represented by function([]), or NaN if this returns an error. 

406 

407 bins : sequence or positive int, optional 

408 The bin specification must be in one of the following forms: 

409 

410 * A sequence of arrays describing the bin edges along each dimension. 

411 * The number of bins for each dimension (nx, ny, ... = bins). 

412 * The number of bins for all dimensions (nx = ny = ... = bins). 

413 range : sequence, optional 

414 A sequence of lower and upper bin edges to be used if the edges are 

415 not given explicitly in `bins`. Defaults to the minimum and maximum 

416 values along each dimension. 

417 expand_binnumbers : bool, optional 

418 'False' (default): the returned `binnumber` is a shape (N,) array of 

419 linearized bin indices. 

420 'True': the returned `binnumber` is 'unraveled' into a shape (D,N) 

421 ndarray, where each row gives the bin numbers in the corresponding 

422 dimension. 

423 See the `binnumber` returned value, and the `Examples` section of 

424 `binned_statistic_2d`. 

425 binned_statistic_result : binnedStatisticddResult 

426 Result of a previous call to the function in order to reuse bin edges 

427 and bin numbers with new values and/or a different statistic. 

428 To reuse bin numbers, `expand_binnumbers` must have been set to False 

429 (the default) 

430 

431 .. versionadded:: 0.17.0 

432 

433 Returns 

434 ------- 

435 statistic : ndarray, shape(nx1, nx2, nx3,...) 

436 The values of the selected statistic in each two-dimensional bin. 

437 bin_edges : list of ndarrays 

438 A list of D arrays describing the (nxi + 1) bin edges for each 

439 dimension. 

440 binnumber : (N,) array of ints or (D,N) ndarray of ints 

441 This assigns to each element of `sample` an integer that represents the 

442 bin in which this observation falls. The representation depends on the 

443 `expand_binnumbers` argument. See `Notes` for details. 

444 

445 

446 See Also 

447 -------- 

448 numpy.digitize, numpy.histogramdd, binned_statistic, binned_statistic_2d 

449 

450 Notes 

451 ----- 

452 Binedges: 

453 All but the last (righthand-most) bin is half-open in each dimension. In 

454 other words, if `bins` is ``[1, 2, 3, 4]``, then the first bin is 

455 ``[1, 2)`` (including 1, but excluding 2) and the second ``[2, 3)``. The 

456 last bin, however, is ``[3, 4]``, which *includes* 4. 

457 

458 `binnumber`: 

459 This returned argument assigns to each element of `sample` an integer that 

460 represents the bin in which it belongs. The representation depends on the 

461 `expand_binnumbers` argument. If 'False' (default): The returned 

462 `binnumber` is a shape (N,) array of linearized indices mapping each 

463 element of `sample` to its corresponding bin (using row-major ordering). 

464 If 'True': The returned `binnumber` is a shape (D,N) ndarray where 

465 each row indicates bin placements for each dimension respectively. In each 

466 dimension, a binnumber of `i` means the corresponding value is between 

467 (bin_edges[D][i-1], bin_edges[D][i]), for each dimension 'D'. 

468 

469 .. versionadded:: 0.11.0 

470 

471 Examples 

472 -------- 

473 >>> from scipy import stats 

474 >>> import matplotlib.pyplot as plt 

475 >>> from mpl_toolkits.mplot3d import Axes3D 

476 

477 Take an array of 600 (x, y) coordinates as an example. 

478 `binned_statistic_dd` can handle arrays of higher dimension `D`. But a plot 

479 of dimension `D+1` is required. 

480 

481 >>> mu = np.array([0., 1.]) 

482 >>> sigma = np.array([[1., -0.5],[-0.5, 1.5]]) 

483 >>> multinormal = stats.multivariate_normal(mu, sigma) 

484 >>> data = multinormal.rvs(size=600, random_state=235412) 

485 >>> data.shape 

486 (600, 2) 

487 

488 Create bins and count how many arrays fall in each bin: 

489 

490 >>> N = 60 

491 >>> x = np.linspace(-3, 3, N) 

492 >>> y = np.linspace(-3, 4, N) 

493 >>> ret = stats.binned_statistic_dd(data, np.arange(600), bins=[x, y], 

494 ... statistic='count') 

495 >>> bincounts = ret.statistic 

496 

497 Set the volume and the location of bars: 

498 

499 >>> dx = x[1] - x[0] 

500 >>> dy = y[1] - y[0] 

501 >>> x, y = np.meshgrid(x[:-1]+dx/2, y[:-1]+dy/2) 

502 >>> z = 0 

503 

504 >>> bincounts = bincounts.ravel() 

505 >>> x = x.ravel() 

506 >>> y = y.ravel() 

507 

508 >>> fig = plt.figure() 

509 >>> ax = fig.add_subplot(111, projection='3d') 

510 >>> with np.errstate(divide='ignore'): # silence random axes3d warning 

511 ... ax.bar3d(x, y, z, dx, dy, bincounts) 

512 

513 Reuse bin numbers and bin edges with new values: 

514 

515 >>> ret2 = stats.binned_statistic_dd(data, -np.arange(600), 

516 ... binned_statistic_result=ret, 

517 ... statistic='mean') 

518 """ 

519 known_stats = ['mean', 'median', 'count', 'sum', 'std', 'min', 'max'] 

520 if not callable(statistic) and statistic not in known_stats: 

521 raise ValueError('invalid statistic %r' % (statistic,)) 

522 

523 try: 

524 bins = index(bins) 

525 except TypeError: 

526 # bins is not an integer 

527 pass 

528 # If bins was an integer-like object, now it is an actual Python int. 

529 

530 # NOTE: for _bin_edges(), see e.g. gh-11365 

531 if isinstance(bins, int) and not np.isfinite(sample).all(): 

532 raise ValueError('%r contains non-finite values.' % (sample,)) 

533 

534 # `Ndim` is the number of dimensions (e.g. `2` for `binned_statistic_2d`) 

535 # `Dlen` is the length of elements along each dimension. 

536 # This code is based on np.histogramdd 

537 try: 

538 # `sample` is an ND-array. 

539 Dlen, Ndim = sample.shape 

540 except (AttributeError, ValueError): 

541 # `sample` is a sequence of 1D arrays. 

542 sample = np.atleast_2d(sample).T 

543 Dlen, Ndim = sample.shape 

544 

545 # Store initial shape of `values` to preserve it in the output 

546 values = np.asarray(values) 

547 input_shape = list(values.shape) 

548 # Make sure that `values` is 2D to iterate over rows 

549 values = np.atleast_2d(values) 

550 Vdim, Vlen = values.shape 

551 

552 # Make sure `values` match `sample` 

553 if(statistic != 'count' and Vlen != Dlen): 

554 raise AttributeError('The number of `values` elements must match the ' 

555 'length of each `sample` dimension.') 

556 

557 try: 

558 M = len(bins) 

559 if M != Ndim: 

560 raise AttributeError('The dimension of bins must be equal ' 

561 'to the dimension of the sample x.') 

562 except TypeError: 

563 bins = Ndim * [bins] 

564 

565 if binned_statistic_result is None: 

566 nbin, edges, dedges = _bin_edges(sample, bins, range) 

567 binnumbers = _bin_numbers(sample, nbin, edges, dedges) 

568 else: 

569 edges = binned_statistic_result.bin_edges 

570 nbin = np.array([len(edges[i]) + 1 for i in builtins.range(Ndim)]) 

571 # +1 for outlier bins 

572 dedges = [np.diff(edges[i]) for i in builtins.range(Ndim)] 

573 binnumbers = binned_statistic_result.binnumber 

574 

575 result = np.empty([Vdim, nbin.prod()], float) 

576 

577 if statistic == 'mean': 

578 result.fill(np.nan) 

579 flatcount = np.bincount(binnumbers, None) 

580 a = flatcount.nonzero() 

581 for vv in builtins.range(Vdim): 

582 flatsum = np.bincount(binnumbers, values[vv]) 

583 result[vv, a] = flatsum[a] / flatcount[a] 

584 elif statistic == 'std': 

585 result.fill(0) 

586 flatcount = np.bincount(binnumbers, None) 

587 a = flatcount.nonzero() 

588 for vv in builtins.range(Vdim): 

589 for i in np.unique(binnumbers): 

590 # NOTE: take std dev by bin, np.std() is 2-pass and stable 

591 binned_data = values[vv, binnumbers == i] 

592 # calc std only when binned data is 2 or more for speed up. 

593 if len(binned_data) >= 2: 

594 result[vv, i] = np.std(binned_data) 

595 elif statistic == 'count': 

596 result.fill(0) 

597 flatcount = np.bincount(binnumbers, None) 

598 a = np.arange(len(flatcount)) 

599 result[:, a] = flatcount[np.newaxis, :] 

600 elif statistic == 'sum': 

601 result.fill(0) 

602 for vv in builtins.range(Vdim): 

603 flatsum = np.bincount(binnumbers, values[vv]) 

604 a = np.arange(len(flatsum)) 

605 result[vv, a] = flatsum 

606 elif statistic == 'median': 

607 result.fill(np.nan) 

608 for i in np.unique(binnumbers): 

609 for vv in builtins.range(Vdim): 

610 result[vv, i] = np.median(values[vv, binnumbers == i]) 

611 elif statistic == 'min': 

612 result.fill(np.nan) 

613 for i in np.unique(binnumbers): 

614 for vv in builtins.range(Vdim): 

615 result[vv, i] = np.min(values[vv, binnumbers == i]) 

616 elif statistic == 'max': 

617 result.fill(np.nan) 

618 for i in np.unique(binnumbers): 

619 for vv in builtins.range(Vdim): 

620 result[vv, i] = np.max(values[vv, binnumbers == i]) 

621 elif callable(statistic): 

622 with np.errstate(invalid='ignore'), suppress_warnings() as sup: 

623 sup.filter(RuntimeWarning) 

624 try: 

625 null = statistic([]) 

626 except Exception: 

627 null = np.nan 

628 result.fill(null) 

629 for i in np.unique(binnumbers): 

630 for vv in builtins.range(Vdim): 

631 result[vv, i] = statistic(values[vv, binnumbers == i]) 

632 

633 # Shape into a proper matrix 

634 result = result.reshape(np.append(Vdim, nbin)) 

635 

636 # Remove outliers (indices 0 and -1 for each bin-dimension). 

637 core = tuple([slice(None)] + Ndim * [slice(1, -1)]) 

638 result = result[core] 

639 

640 # Unravel binnumbers into an ndarray, each row the bins for each dimension 

641 if(expand_binnumbers and Ndim > 1): 

642 binnumbers = np.asarray(np.unravel_index(binnumbers, nbin)) 

643 

644 if np.any(result.shape[1:] != nbin - 2): 

645 raise RuntimeError('Internal Shape Error') 

646 

647 # Reshape to have output (`result`) match input (`values`) shape 

648 result = result.reshape(input_shape[:-1] + list(nbin-2)) 

649 

650 return BinnedStatisticddResult(result, edges, binnumbers) 

651 

652 

653def _bin_edges(sample, bins=None, range=None): 

654 """ Create edge arrays 

655 """ 

656 Dlen, Ndim = sample.shape 

657 

658 nbin = np.empty(Ndim, int) # Number of bins in each dimension 

659 edges = Ndim * [None] # Bin edges for each dim (will be 2D array) 

660 dedges = Ndim * [None] # Spacing between edges (will be 2D array) 

661 

662 # Select range for each dimension 

663 # Used only if number of bins is given. 

664 if range is None: 

665 smin = np.atleast_1d(np.array(sample.min(axis=0), float)) 

666 smax = np.atleast_1d(np.array(sample.max(axis=0), float)) 

667 else: 

668 smin = np.zeros(Ndim) 

669 smax = np.zeros(Ndim) 

670 for i in builtins.range(Ndim): 

671 smin[i], smax[i] = range[i] 

672 

673 # Make sure the bins have a finite width. 

674 for i in builtins.range(len(smin)): 

675 if smin[i] == smax[i]: 

676 smin[i] = smin[i] - .5 

677 smax[i] = smax[i] + .5 

678 

679 # Create edge arrays 

680 for i in builtins.range(Ndim): 

681 if np.isscalar(bins[i]): 

682 nbin[i] = bins[i] + 2 # +2 for outlier bins 

683 edges[i] = np.linspace(smin[i], smax[i], nbin[i] - 1) 

684 else: 

685 edges[i] = np.asarray(bins[i], float) 

686 nbin[i] = len(edges[i]) + 1 # +1 for outlier bins 

687 dedges[i] = np.diff(edges[i]) 

688 

689 nbin = np.asarray(nbin) 

690 

691 return nbin, edges, dedges 

692 

693 

694def _bin_numbers(sample, nbin, edges, dedges): 

695 """Compute the bin number each sample falls into, in each dimension 

696 """ 

697 Dlen, Ndim = sample.shape 

698 

699 sampBin = [ 

700 np.digitize(sample[:, i], edges[i]) 

701 for i in range(Ndim) 

702 ] 

703 

704 # Using `digitize`, values that fall on an edge are put in the right bin. 

705 # For the rightmost bin, we want values equal to the right 

706 # edge to be counted in the last bin, and not as an outlier. 

707 for i in range(Ndim): 

708 # Find the rounding precision 

709 dedges_min = dedges[i].min() 

710 if dedges_min == 0: 

711 raise ValueError('The smallest edge difference is numerically 0.') 

712 decimal = int(-np.log10(dedges_min)) + 6 

713 # Find which points are on the rightmost edge. 

714 on_edge = np.where(np.around(sample[:, i], decimal) == 

715 np.around(edges[i][-1], decimal))[0] 

716 # Shift these points one bin to the left. 

717 sampBin[i][on_edge] -= 1 

718 

719 # Compute the sample indices in the flattened statistic matrix. 

720 binnumbers = np.ravel_multi_index(sampBin, nbin) 

721 

722 return binnumbers