Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1'''correlation plots 

2 

3Author: Josef Perktold 

4License: BSD-3 

5 

6example for usage with different options in 

7statsmodels/sandbox/examples/thirdparty/ex_ratereturn.py 

8 

9''' 

10import numpy as np 

11 

12from . import utils 

13 

14 

15def plot_corr(dcorr, xnames=None, ynames=None, title=None, normcolor=False, 

16 ax=None, cmap='RdYlBu_r'): 

17 """Plot correlation of many variables in a tight color grid. 

18 

19 Parameters 

20 ---------- 

21 dcorr : ndarray 

22 Correlation matrix, square 2-D array. 

23 xnames : list[str], optional 

24 Labels for the horizontal axis. If not given (None), then the 

25 matplotlib defaults (integers) are used. If it is an empty list, [], 

26 then no ticks and labels are added. 

27 ynames : list[str], optional 

28 Labels for the vertical axis. Works the same way as `xnames`. 

29 If not given, the same names as for `xnames` are re-used. 

30 title : str, optional 

31 The figure title. If None, the default ('Correlation Matrix') is used. 

32 If ``title=''``, then no title is added. 

33 normcolor : bool or tuple of scalars, optional 

34 If False (default), then the color coding range corresponds to the 

35 range of `dcorr`. If True, then the color range is normalized to 

36 (-1, 1). If this is a tuple of two numbers, then they define the range 

37 for the color bar. 

38 ax : AxesSubplot, optional 

39 If `ax` is None, then a figure is created. If an axis instance is 

40 given, then only the main plot but not the colorbar is created. 

41 cmap : str or Matplotlib Colormap instance, optional 

42 The colormap for the plot. Can be any valid Matplotlib Colormap 

43 instance or name. 

44 

45 Returns 

46 ------- 

47 Figure 

48 If `ax` is None, the created figure. Otherwise the figure to which 

49 `ax` is connected. 

50 

51 Examples 

52 -------- 

53 >>> import numpy as np 

54 >>> import matplotlib.pyplot as plt 

55 >>> import statsmodels.graphics.api as smg 

56 

57 >>> hie_data = sm.datasets.randhie.load_pandas() 

58 >>> corr_matrix = np.corrcoef(hie_data.data.T) 

59 >>> smg.plot_corr(corr_matrix, xnames=hie_data.names) 

60 >>> plt.show() 

61 

62 ..plot :: plots/graphics_correlation_plot_corr.py 

63 """ 

64 if ax is None: 

65 create_colorbar = True 

66 else: 

67 create_colorbar = False 

68 

69 fig, ax = utils.create_mpl_ax(ax) 

70 

71 nvars = dcorr.shape[0] 

72 

73 if ynames is None: 

74 ynames = xnames 

75 if title is None: 

76 title = 'Correlation Matrix' 

77 if isinstance(normcolor, tuple): 

78 vmin, vmax = normcolor 

79 elif normcolor: 

80 vmin, vmax = -1.0, 1.0 

81 else: 

82 vmin, vmax = None, None 

83 

84 axim = ax.imshow(dcorr, cmap=cmap, interpolation='nearest', 

85 extent=(0,nvars,0,nvars), vmin=vmin, vmax=vmax) 

86 

87 # create list of label positions 

88 labelPos = np.arange(0, nvars) + 0.5 

89 

90 if ynames is not None: 

91 ax.set_yticks(labelPos) 

92 ax.set_yticks(labelPos[:-1]+0.5, minor=True) 

93 ax.set_yticklabels(ynames[::-1], fontsize='small', 

94 horizontalalignment='right') 

95 elif ynames == []: 

96 ax.set_yticks([]) 

97 

98 if xnames is not None: 

99 ax.set_xticks(labelPos) 

100 ax.set_xticks(labelPos[:-1]+0.5, minor=True) 

101 ax.set_xticklabels(xnames, fontsize='small', rotation=45, 

102 horizontalalignment='right') 

103 elif xnames == []: 

104 ax.set_xticks([]) 

105 

106 if not title == '': 

107 ax.set_title(title) 

108 

109 if create_colorbar: 

110 fig.colorbar(axim, use_gridspec=True) 

111 fig.tight_layout() 

112 

113 ax.tick_params(which='minor', length=0) 

114 ax.tick_params(direction='out', top=False, right=False) 

115 try: 

116 ax.grid(True, which='minor', linestyle='-', color='w', lw=1) 

117 except AttributeError: 

118 # Seems to fail for axes created with AxesGrid. MPL bug? 

119 pass 

120 

121 return fig 

122 

123 

124def plot_corr_grid(dcorrs, titles=None, ncols=None, normcolor=False, xnames=None, 

125 ynames=None, fig=None, cmap='RdYlBu_r'): 

126 """ 

127 Create a grid of correlation plots. 

128 

129 The individual correlation plots are assumed to all have the same 

130 variables, axis labels can be specified only once. 

131 

132 Parameters 

133 ---------- 

134 dcorrs : list or iterable of ndarrays 

135 List of correlation matrices. 

136 titles : list[str], optional 

137 List of titles for the subplots. By default no title are shown. 

138 ncols : int, optional 

139 Number of columns in the subplot grid. If not given, the number of 

140 columns is determined automatically. 

141 normcolor : bool or tuple, optional 

142 If False (default), then the color coding range corresponds to the 

143 range of `dcorr`. If True, then the color range is normalized to 

144 (-1, 1). If this is a tuple of two numbers, then they define the range 

145 for the color bar. 

146 xnames : list[str], optional 

147 Labels for the horizontal axis. If not given (None), then the 

148 matplotlib defaults (integers) are used. If it is an empty list, [], 

149 then no ticks and labels are added. 

150 ynames : list[str], optional 

151 Labels for the vertical axis. Works the same way as `xnames`. 

152 If not given, the same names as for `xnames` are re-used. 

153 fig : Figure, optional 

154 If given, this figure is simply returned. Otherwise a new figure is 

155 created. 

156 cmap : str or Matplotlib Colormap instance, optional 

157 The colormap for the plot. Can be any valid Matplotlib Colormap 

158 instance or name. 

159 

160 Returns 

161 ------- 

162 Figure 

163 If `ax` is None, the created figure. Otherwise the figure to which 

164 `ax` is connected. 

165 

166 Examples 

167 -------- 

168 >>> import numpy as np 

169 >>> import matplotlib.pyplot as plt 

170 >>> import statsmodels.api as sm 

171 

172 In this example we just reuse the same correlation matrix several times. 

173 Of course in reality one would show a different correlation (measuring a 

174 another type of correlation, for example Pearson (linear) and Spearman, 

175 Kendall (nonlinear) correlations) for the same variables. 

176 

177 >>> hie_data = sm.datasets.randhie.load_pandas() 

178 >>> corr_matrix = np.corrcoef(hie_data.data.T) 

179 >>> sm.graphics.plot_corr_grid([corr_matrix] * 8, xnames=hie_data.names) 

180 >>> plt.show() 

181 

182 ..plot :: plots/graphics_correlation_plot_corr_grid.py 

183 """ 

184 if ynames is None: 

185 ynames = xnames 

186 

187 if not titles: 

188 titles = ['']*len(dcorrs) 

189 

190 n_plots = len(dcorrs) 

191 if ncols is not None: 

192 nrows = int(np.ceil(n_plots / float(ncols))) 

193 else: 

194 # Determine number of rows and columns, square if possible, otherwise 

195 # prefer a wide (more columns) over a high layout. 

196 if n_plots < 4: 

197 nrows, ncols = 1, n_plots 

198 else: 

199 nrows = int(np.sqrt(n_plots)) 

200 ncols = int(np.ceil(n_plots / float(nrows))) 

201 

202 # Create a figure with the correct size 

203 aspect = min(ncols / float(nrows), 1.8) 

204 vsize = np.sqrt(nrows) * 5 

205 fig = utils.create_mpl_fig(fig, figsize=(vsize * aspect + 1, vsize)) 

206 

207 for i, c in enumerate(dcorrs): 

208 ax = fig.add_subplot(nrows, ncols, i+1) 

209 # Ensure to only plot labels on bottom row and left column 

210 _xnames = xnames if nrows * ncols - (i+1) < ncols else [] 

211 _ynames = ynames if (i+1) % ncols == 1 else [] 

212 plot_corr(c, xnames=_xnames, ynames=_ynames, title=titles[i], 

213 normcolor=normcolor, ax=ax, cmap=cmap) 

214 

215 # Adjust figure margins and add a colorbar 

216 fig.subplots_adjust(bottom=0.1, left=0.09, right=0.9, top=0.9) 

217 cax = fig.add_axes([0.92, 0.1, 0.025, 0.8]) 

218 fig.colorbar(fig.axes[0].images[0], cax=cax) 

219 

220 return fig