Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from contextlib import contextmanager 

2 

3from pandas.plotting._core import _get_plot_backend 

4 

5 

6def table(ax, data, rowLabels=None, colLabels=None, **kwargs): 

7 """ 

8 Helper function to convert DataFrame and Series to matplotlib.table. 

9 

10 Parameters 

11 ---------- 

12 ax : Matplotlib axes object 

13 data : DataFrame or Series 

14 Data for table contents. 

15 **kwargs 

16 Keyword arguments to be passed to matplotlib.table.table. 

17 If `rowLabels` or `colLabels` is not specified, data index or column 

18 name will be used. 

19 

20 Returns 

21 ------- 

22 matplotlib table object 

23 """ 

24 plot_backend = _get_plot_backend("matplotlib") 

25 return plot_backend.table( 

26 ax=ax, data=data, rowLabels=None, colLabels=None, **kwargs 

27 ) 

28 

29 

30def register(): 

31 """ 

32 Register Pandas Formatters and Converters with matplotlib. 

33 

34 This function modifies the global ``matplotlib.units.registry`` 

35 dictionary. Pandas adds custom converters for 

36 

37 * pd.Timestamp 

38 * pd.Period 

39 * np.datetime64 

40 * datetime.datetime 

41 * datetime.date 

42 * datetime.time 

43 

44 See Also 

45 -------- 

46 deregister_matplotlib_converters 

47 """ 

48 plot_backend = _get_plot_backend("matplotlib") 

49 plot_backend.register() 

50 

51 

52def deregister(): 

53 """ 

54 Remove pandas' formatters and converters. 

55 

56 Removes the custom converters added by :func:`register`. This 

57 attempts to set the state of the registry back to the state before 

58 pandas registered its own units. Converters for pandas' own types like 

59 Timestamp and Period are removed completely. Converters for types 

60 pandas overwrites, like ``datetime.datetime``, are restored to their 

61 original value. 

62 

63 See Also 

64 -------- 

65 register_matplotlib_converters 

66 """ 

67 plot_backend = _get_plot_backend("matplotlib") 

68 plot_backend.deregister() 

69 

70 

71def scatter_matrix( 

72 frame, 

73 alpha=0.5, 

74 figsize=None, 

75 ax=None, 

76 grid=False, 

77 diagonal="hist", 

78 marker=".", 

79 density_kwds=None, 

80 hist_kwds=None, 

81 range_padding=0.05, 

82 **kwargs, 

83): 

84 """ 

85 Draw a matrix of scatter plots. 

86 

87 Parameters 

88 ---------- 

89 frame : DataFrame 

90 alpha : float, optional 

91 Amount of transparency applied. 

92 figsize : (float,float), optional 

93 A tuple (width, height) in inches. 

94 ax : Matplotlib axis object, optional 

95 grid : bool, optional 

96 Setting this to True will show the grid. 

97 diagonal : {'hist', 'kde'} 

98 Pick between 'kde' and 'hist' for either Kernel Density Estimation or 

99 Histogram plot in the diagonal. 

100 marker : str, optional 

101 Matplotlib marker type, default '.'. 

102 density_kwds : keywords 

103 Keyword arguments to be passed to kernel density estimate plot. 

104 hist_kwds : keywords 

105 Keyword arguments to be passed to hist function. 

106 range_padding : float, default 0.05 

107 Relative extension of axis range in x and y with respect to 

108 (x_max - x_min) or (y_max - y_min). 

109 **kwargs 

110 Keyword arguments to be passed to scatter function. 

111 

112 Returns 

113 ------- 

114 numpy.ndarray 

115 A matrix of scatter plots. 

116 

117 Examples 

118 -------- 

119 >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) 

120 >>> scatter_matrix(df, alpha=0.2) 

121 """ 

122 plot_backend = _get_plot_backend("matplotlib") 

123 return plot_backend.scatter_matrix( 

124 frame=frame, 

125 alpha=alpha, 

126 figsize=figsize, 

127 ax=ax, 

128 grid=grid, 

129 diagonal=diagonal, 

130 marker=marker, 

131 density_kwds=density_kwds, 

132 hist_kwds=hist_kwds, 

133 range_padding=range_padding, 

134 **kwargs, 

135 ) 

136 

137 

138def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): 

139 """ 

140 Plot a multidimensional dataset in 2D. 

141 

142 Each Series in the DataFrame is represented as a evenly distributed 

143 slice on a circle. Each data point is rendered in the circle according to 

144 the value on each Series. Highly correlated `Series` in the `DataFrame` 

145 are placed closer on the unit circle. 

146 

147 RadViz allow to project a N-dimensional data set into a 2D space where the 

148 influence of each dimension can be interpreted as a balance between the 

149 influence of all dimensions. 

150 

151 More info available at the `original article 

152 <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.135.889>`_ 

153 describing RadViz. 

154 

155 Parameters 

156 ---------- 

157 frame : `DataFrame` 

158 Pandas object holding the data. 

159 class_column : str 

160 Column name containing the name of the data point category. 

161 ax : :class:`matplotlib.axes.Axes`, optional 

162 A plot instance to which to add the information. 

163 color : list[str] or tuple[str], optional 

164 Assign a color to each category. Example: ['blue', 'green']. 

165 colormap : str or :class:`matplotlib.colors.Colormap`, default None 

166 Colormap to select colors from. If string, load colormap with that 

167 name from matplotlib. 

168 **kwds 

169 Options to pass to matplotlib scatter plotting method. 

170 

171 Returns 

172 ------- 

173 class:`matplotlib.axes.Axes` 

174 

175 See Also 

176 -------- 

177 plotting.andrews_curves : Plot clustering visualization. 

178 

179 Examples 

180 -------- 

181 .. plot:: 

182 :context: close-figs 

183 

184 >>> df = pd.DataFrame({ 

185 ... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6, 

186 ... 6.7, 4.6], 

187 ... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2, 

188 ... 3.3, 3.6], 

189 ... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4, 

190 ... 5.7, 1.0], 

191 ... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2, 

192 ... 2.1, 0.2], 

193 ... 'Category': ['virginica', 'virginica', 'setosa', 

194 ... 'virginica', 'virginica', 'versicolor', 

195 ... 'versicolor', 'setosa', 'virginica', 

196 ... 'setosa'] 

197 ... }) 

198 >>> rad_viz = pd.plotting.radviz(df, 'Category') # doctest: +SKIP 

199 """ 

200 plot_backend = _get_plot_backend("matplotlib") 

201 return plot_backend.radviz( 

202 frame=frame, 

203 class_column=class_column, 

204 ax=ax, 

205 color=color, 

206 colormap=colormap, 

207 **kwds, 

208 ) 

209 

210 

211def andrews_curves( 

212 frame, class_column, ax=None, samples=200, color=None, colormap=None, **kwargs 

213): 

214 """ 

215 Generate a matplotlib plot of Andrews curves, for visualising clusters of 

216 multivariate data. 

217 

218 Andrews curves have the functional form: 

219 

220 f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + 

221 x_4 sin(2t) + x_5 cos(2t) + ... 

222 

223 Where x coefficients correspond to the values of each dimension and t is 

224 linearly spaced between -pi and +pi. Each row of frame then corresponds to 

225 a single curve. 

226 

227 Parameters 

228 ---------- 

229 frame : DataFrame 

230 Data to be plotted, preferably normalized to (0.0, 1.0). 

231 class_column : Name of the column containing class names 

232 ax : matplotlib axes object, default None 

233 samples : Number of points to plot in each curve 

234 color : list or tuple, optional 

235 Colors to use for the different classes. 

236 colormap : str or matplotlib colormap object, default None 

237 Colormap to select colors from. If string, load colormap with that name 

238 from matplotlib. 

239 **kwargs 

240 Options to pass to matplotlib plotting method. 

241 

242 Returns 

243 ------- 

244 class:`matplotlip.axis.Axes` 

245 """ 

246 plot_backend = _get_plot_backend("matplotlib") 

247 return plot_backend.andrews_curves( 

248 frame=frame, 

249 class_column=class_column, 

250 ax=ax, 

251 samples=samples, 

252 color=color, 

253 colormap=colormap, 

254 **kwargs, 

255 ) 

256 

257 

258def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): 

259 """ 

260 Bootstrap plot on mean, median and mid-range statistics. 

261 

262 The bootstrap plot is used to estimate the uncertainty of a statistic 

263 by relaying on random sampling with replacement [1]_. This function will 

264 generate bootstrapping plots for mean, median and mid-range statistics 

265 for the given number of samples of the given size. 

266 

267 .. [1] "Bootstrapping (statistics)" in \ 

268 https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29 

269 

270 Parameters 

271 ---------- 

272 series : pandas.Series 

273 Pandas Series from where to get the samplings for the bootstrapping. 

274 fig : matplotlib.figure.Figure, default None 

275 If given, it will use the `fig` reference for plotting instead of 

276 creating a new one with default parameters. 

277 size : int, default 50 

278 Number of data points to consider during each sampling. It must be 

279 greater or equal than the length of the `series`. 

280 samples : int, default 500 

281 Number of times the bootstrap procedure is performed. 

282 **kwds 

283 Options to pass to matplotlib plotting method. 

284 

285 Returns 

286 ------- 

287 matplotlib.figure.Figure 

288 Matplotlib figure. 

289 

290 See Also 

291 -------- 

292 DataFrame.plot : Basic plotting for DataFrame objects. 

293 Series.plot : Basic plotting for Series objects. 

294 

295 Examples 

296 -------- 

297 

298 .. plot:: 

299 :context: close-figs 

300 

301 >>> s = pd.Series(np.random.uniform(size=100)) 

302 >>> fig = pd.plotting.bootstrap_plot(s) # doctest: +SKIP 

303 """ 

304 plot_backend = _get_plot_backend("matplotlib") 

305 return plot_backend.bootstrap_plot( 

306 series=series, fig=fig, size=size, samples=samples, **kwds 

307 ) 

308 

309 

310def parallel_coordinates( 

311 frame, 

312 class_column, 

313 cols=None, 

314 ax=None, 

315 color=None, 

316 use_columns=False, 

317 xticks=None, 

318 colormap=None, 

319 axvlines=True, 

320 axvlines_kwds=None, 

321 sort_labels=False, 

322 **kwargs, 

323): 

324 """ 

325 Parallel coordinates plotting. 

326 

327 Parameters 

328 ---------- 

329 frame : DataFrame 

330 class_column : str 

331 Column name containing class names. 

332 cols : list, optional 

333 A list of column names to use. 

334 ax : matplotlib.axis, optional 

335 Matplotlib axis object. 

336 color : list or tuple, optional 

337 Colors to use for the different classes. 

338 use_columns : bool, optional 

339 If true, columns will be used as xticks. 

340 xticks : list or tuple, optional 

341 A list of values to use for xticks. 

342 colormap : str or matplotlib colormap, default None 

343 Colormap to use for line colors. 

344 axvlines : bool, optional 

345 If true, vertical lines will be added at each xtick. 

346 axvlines_kwds : keywords, optional 

347 Options to be passed to axvline method for vertical lines. 

348 sort_labels : bool, default False 

349 Sort class_column labels, useful when assigning colors. 

350 **kwargs 

351 Options to pass to matplotlib plotting method. 

352 

353 Returns 

354 ------- 

355 class:`matplotlib.axis.Axes` 

356 

357 Examples 

358 -------- 

359 >>> from matplotlib import pyplot as plt 

360 >>> df = pd.read_csv('https://raw.github.com/pandas-dev/pandas/master' 

361 '/pandas/tests/data/csv/iris.csv') 

362 >>> pd.plotting.parallel_coordinates( 

363 df, 'Name', 

364 color=('#556270', '#4ECDC4', '#C7F464')) 

365 >>> plt.show() 

366 """ 

367 plot_backend = _get_plot_backend("matplotlib") 

368 return plot_backend.parallel_coordinates( 

369 frame=frame, 

370 class_column=class_column, 

371 cols=cols, 

372 ax=ax, 

373 color=color, 

374 use_columns=use_columns, 

375 xticks=xticks, 

376 colormap=colormap, 

377 axvlines=axvlines, 

378 axvlines_kwds=axvlines_kwds, 

379 sort_labels=sort_labels, 

380 **kwargs, 

381 ) 

382 

383 

384def lag_plot(series, lag=1, ax=None, **kwds): 

385 """ 

386 Lag plot for time series. 

387 

388 Parameters 

389 ---------- 

390 series : Time series 

391 lag : lag of the scatter plot, default 1 

392 ax : Matplotlib axis object, optional 

393 **kwds 

394 Matplotlib scatter method keyword arguments. 

395 

396 Returns 

397 ------- 

398 class:`matplotlib.axis.Axes` 

399 """ 

400 plot_backend = _get_plot_backend("matplotlib") 

401 return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds) 

402 

403 

404def autocorrelation_plot(series, ax=None, **kwargs): 

405 """ 

406 Autocorrelation plot for time series. 

407 

408 Parameters 

409 ---------- 

410 series : Time series 

411 ax : Matplotlib axis object, optional 

412 **kwargs 

413 Options to pass to matplotlib plotting method. 

414 

415 Returns 

416 ------- 

417 class:`matplotlib.axis.Axes` 

418 """ 

419 plot_backend = _get_plot_backend("matplotlib") 

420 return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwargs) 

421 

422 

423class _Options(dict): 

424 """ 

425 Stores pandas plotting options. 

426 

427 Allows for parameter aliasing so you can just use parameter names that are 

428 the same as the plot function parameters, but is stored in a canonical 

429 format that makes it easy to breakdown into groups later. 

430 """ 

431 

432 # alias so the names are same as plotting method parameter names 

433 _ALIASES = {"x_compat": "xaxis.compat"} 

434 _DEFAULT_KEYS = ["xaxis.compat"] 

435 

436 def __init__(self, deprecated=False): 

437 self._deprecated = deprecated 

438 super().__setitem__("xaxis.compat", False) 

439 

440 def __getitem__(self, key): 

441 key = self._get_canonical_key(key) 

442 if key not in self: 

443 raise ValueError(f"{key} is not a valid pandas plotting option") 

444 return super().__getitem__(key) 

445 

446 def __setitem__(self, key, value): 

447 key = self._get_canonical_key(key) 

448 return super().__setitem__(key, value) 

449 

450 def __delitem__(self, key): 

451 key = self._get_canonical_key(key) 

452 if key in self._DEFAULT_KEYS: 

453 raise ValueError(f"Cannot remove default parameter {key}") 

454 return super().__delitem__(key) 

455 

456 def __contains__(self, key) -> bool: 

457 key = self._get_canonical_key(key) 

458 return super().__contains__(key) 

459 

460 def reset(self): 

461 """ 

462 Reset the option store to its initial state 

463 

464 Returns 

465 ------- 

466 None 

467 """ 

468 self.__init__() 

469 

470 def _get_canonical_key(self, key): 

471 return self._ALIASES.get(key, key) 

472 

473 @contextmanager 

474 def use(self, key, value): 

475 """ 

476 Temporarily set a parameter value using the with statement. 

477 Aliasing allowed. 

478 """ 

479 old_value = self[key] 

480 try: 

481 self[key] = value 

482 yield self 

483 finally: 

484 self[key] = old_value 

485 

486 

487plot_params = _Options()