Source code for satlas2.plotting

"""
Functions for the generation of plots related to the fitting results.

.. moduleauthor:: Wouter Gins <wouter.gins@kuleuven.be>
"""
import copy

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import tqdm
import uncertainties as u
from scipy import optimize
from scipy.stats import chi2

from .overwrite import SATLASHDFBackend

inv_color_list = [
    '#7acfff', '#fff466', '#00c48f', '#ff8626', '#ff9cd3', '#0093e6'
]
color_list = [c for c in reversed(inv_color_list)]
cmap = mpl.colors.ListedColormap(color_list)
cmap.set_over(color_list[-1])
cmap.set_under(color_list[0])
invcmap = mpl.colors.ListedColormap(inv_color_list)
invcmap.set_over(inv_color_list[-1])
invcmap.set_under(inv_color_list[0])

__all__ = [
    'generateChisquareMap', 'generateCorrelationPlot', 'generateWalkPlot'
]


def _make_axes_grid(no_variables,
                    padding=0,
                    cbar_size=0.5,
                    axis_padding=0.5,
                    cbar=True):
    """Makes a triangular grid of axes, with a colorbar axis next to it.

    Parameters
    ----------
    no_variables: int
        Number of variables for which to generate a figure.
    padding: float
        Padding around the figure (in cm).
    cbar_size: float
        Width of the colorbar (in cm).
    axis_padding: float
        Padding between axes (in cm).

    Returns
    -------
    fig, axes, cbar: tuple
        Tuple containing the figure, a 2D-array of axes and the colorbar axis."""

    # Convert to inches.
    padding, cbar_size, axis_padding = (padding * 0.393700787,
                                        cbar_size * 0.393700787,
                                        axis_padding * 0.393700787)
    if not cbar:
        cbar_size = 0

    # Generate the figure, convert padding to percentages.
    fig = plt.figure()
    padding = 1

    axis_size_left = (fig.get_figwidth() - padding - 0 *
                      (no_variables + 1) * padding) / no_variables
    axis_size_up = (fig.get_figheight() - padding - 0 *
                    (no_variables + 1) * padding) / no_variables

    cbar_size = cbar_size / fig.get_figwidth()
    left_padding = padding * 0.5 / fig.get_figwidth()
    left_axis_padding = axis_padding / fig.get_figwidth()
    up_padding = padding * 0.5 / fig.get_figheight()
    up_axis_padding = 0 * axis_padding / fig.get_figheight()
    axis_size_left = axis_size_left / fig.get_figwidth()
    axis_size_up = axis_size_up / fig.get_figheight()

    # Pre-allocate a 2D-array to hold the axes.
    axes = np.array([[None for _ in range(no_variables)]
                     for _ in range(no_variables)],
                    dtype='object')

    for i, I in zip(range(no_variables), reversed(range(no_variables))):
        for j in reversed(range(no_variables)):
            # Only create axes on the lower triangle.
            if I + j < no_variables:
                # Share the x-axis with the plot on the diagonal,
                # directly above the plot.
                sharex = axes[j, j] if i != j else None
                # Share the y-axis among the 2D maps along one row,
                # but not the plot on the diagonal!
                sharey = axes[i, i - 1] if (i != j and i - 1 != j) else None
                # Determine the place and size of the axes
                left_edge = j * axis_size_left + left_padding
                bottom_edge = I * axis_size_up + up_padding
                if j > 0:
                    left_edge += j * left_axis_padding
                if I > 0:
                    bottom_edge += I * up_axis_padding

                a = plt.axes(
                    [left_edge, bottom_edge, axis_size_left, axis_size_up],
                    sharex=sharex,
                    sharey=sharey)
                plt.setp(a.xaxis.get_majorticklabels(), rotation=45)
                plt.setp(a.yaxis.get_majorticklabels(), rotation=45)
            else:
                a = None
            if i == j:
                a.yaxis.tick_right()
                a.yaxis.set_label_position('right')
            axes[i, j] = a

    axes = np.array(axes)
    for a in axes[:-1, :].flatten():
        if a is not None:
            plt.setp(a.get_xticklabels(), visible=False)
    for a in axes[:, 1:].flatten():
        if a is not None:
            plt.setp(a.get_yticklabels(), visible=False)
    left_edge = no_variables * (axis_size_left +
                                left_axis_padding) + left_padding
    bottom_edge = up_padding
    width = cbar_size

    height = axis_size_up * len(axes) + up_padding * (len(axes) - 1)

    cbar_width = axis_size_left * 0.1
    if cbar:
        cbar = plt.axes([
            1 - cbar_width - padding * 0.5 / fig.get_figwidth(),
            padding * 0.5 / fig.get_figheight() + axis_size_up * 1.5,
            cbar_width, axis_size_up * (no_variables - 1) - axis_size_up * 0.5
        ])
        plt.setp(cbar.get_xticklabels(), visible=False)
        plt.setp(cbar.get_yticklabels(), visible=False)
    else:
        cbar = None
    return fig, axes, cbar


[docs]def generateChisquareMap(fitter, filter=None, method='chisquare', resolution_diag=15, resolution_map=15, fit_kws={}, source=False, model=True): """Generates a correlation map for either the chisquare or the MLE method. On the diagonal, the chisquare or loglikelihood is drawn as a function of one fixed parameter. Refitting to the data each time gives the points on the line. A dashed line is drawn on these plots, with the intersection with the plots giving the correct confidence interval for the parameter. In solid lines, the interval estimated by the fitting routine is drawn. On the offdiagonal, two parameters are fixed and the model is again fitted to the data. The change in chisquare/loglikelihood is mapped to 1, 2 and 3 sigma contourmaps. Parameters ---------- fitter: :class:`.Fitter` Fitter instance for which the chisquare map must be created. Other parameters ---------------- filter: list of strings Only the parameters matching the names given in this list will be used to generate the maps. resolution_diag: int Number of points for the line plot on each diagonal. resolution_map: int Number of points along each dimension for the meshgrids. fit_kws: dictionary Dictionary of keywords to pass on to the fitting routine. npar: int Number of parameters for which simultaneous predictions need to be made. Influences the uncertainty estimates from the parabola.""" title = '{}\n${}_{{-{}}}^{{+{}}}$' title_e = '{}\n$({}_{{-{}}}^{{+{}}})e{}$' try: orig_value = fitter.chisqr except AttributeError: fitter.fit(**fit_kws) orig_value = fitter.chisqr if method.lower().startswith('llh'): orig_value = fitter.llh_result result = copy.deepcopy(fitter.result) orig_params = copy.deepcopy(fitter.lmpars) ranges = {} param_names = [] no_params = 0 for p in orig_params: if orig_params[p].vary and (filter is None or any([f in p for f in filter])): no_params += 1 param_names.append(p) fig, axes, cbar = _make_axes_grid(no_params, axis_padding=0, cbar=no_params > 1) split_names = [name.split('___') for name in param_names] sources = [name[0] for name in split_names] models = [name[1] for name in split_names] var_names = [name[2] for name in split_names] to_be_combined = [var_names] if model: to_be_combined.insert(0, models) if source: to_be_combined.insert(0, sources) var_names = [' '.join(tbc) for tbc in zip(*to_be_combined)] # Make the plots on the diagonal: plot the chisquare/likelihood # for the best fitting values while setting one parameter to # a fixed value. saved_params = copy.deepcopy(fitter.lmpars) for i in range(no_params): params = copy.deepcopy(saved_params) ranges[param_names[i]] = {} # Set the y-ticklabels. ax = axes[i, i] ax.set_title(param_names[i]) if i == no_params - 1: if method.lower().startswith('chisquare'): ax.set_ylabel(r'$\Delta\chi^2$') else: ax.set_ylabel(r'$\Delta\mathcal{L}$') fit_kws['llh_selected'] = True # Select starting point to determine error widths. value = orig_params[param_names[i]].value stderr = orig_params[param_names[i]].stderr stderr = stderr if stderr is not None else 0.01 * np.abs(value) stderr = stderr if stderr != 0 else 0.01 * np.abs(value) right = value + stderr left = value - stderr params[param_names[i]].vary = False ranges[param_names[i]]['left_val'] = 3 * left - 2 * value ranges[param_names[i]]['right_val'] = 3 * right - 2 * value value_range = np.linspace(3 * left - 2 * value, right * 3 - 2 * value, resolution_diag) chisquare = np.zeros(len(value_range)) # Calculate the new value, and store it in the array. Update the progressbar. # with tqdm.tqdm(value_range, desc=param_names[i], leave=True) as pbar: for j, v in enumerate(value_range): params[param_names[i]].value = v fitter.lmpars = params fitter.fit(prepFit=False, **fit_kws) if fitter.llh_result is not None: chisquare[j] = fitter.llh_result - orig_value else: chisquare[j] = fitter.chisqr - orig_value # pbar.update(1) # Plot the result ax.plot(value_range, chisquare, color='k') c = '#0093e6' ax.axvline(right, ls="dashed", color=c) ax.axvline(left, ls="dashed", color=c) ax.axvline(value, ls="dashed", color=c) up = '{:.2ug}'.format(u.ufloat(value, stderr)) down = '{:.2ug}'.format(u.ufloat(value, stderr)) val = up.split('+/-')[0].split('(')[-1] r = up.split('+/-')[1].split(')')[0] l = down.split('+/-')[1].split(')')[0] if 'e' in up or 'e' in down: ex = up.split('e')[-1] ax.set_title(title_e.format(var_names[i], val, l, r, ex)) else: ax.set_title(title.format(var_names[i], val, l, r)) # Restore the parameters. fitter.lmpars = orig_params for i, j in zip(*np.tril_indices_from(axes, -1)): params = copy.deepcopy(orig_params) ax = axes[i, j] x_name = param_names[j] y_name = param_names[i] if j == 0: ax.set_ylabel(var_names[i]) if i == no_params - 1: ax.set_xlabel(var_names[j]) right = ranges[x_name]['right_val'] left = ranges[x_name]['left_val'] x_range = np.linspace(left, right, resolution_map) right = ranges[y_name]['right_val'] left = ranges[y_name]['left_val'] y_range = np.linspace(left, right, resolution_map) X, Y = np.meshgrid(x_range, y_range) Z = np.zeros(X.shape) i_indices, j_indices = np.indices(Z.shape) params[param_names[i]].vary = False params[param_names[j]].vary = False for k, l in zip(i_indices.flatten(), j_indices.flatten()): x = X[k, l] y = Y[k, l] params[param_names[j]].value = x params[param_names[i]].value = y fitter.lmpars = params fitter.fit(prepFit=False, **fit_kws) if fitter.llh_result is not None: Z[k, l] = (fitter.llh_result - orig_value) * 2 else: Z[k, l] = fitter.chisqr - orig_value Z = -Z bounds = [] for bound in [0.997300204, 0.954499736, 0.682689492]: chifunc = lambda x: chi2.cdf( x, 1) - bound # Calculate 1 sigma boundary bounds.append(-optimize.root(chifunc, 1).x[0]) bounds.append(0) bounds = np.array(bounds) norm = mpl.colors.BoundaryNorm(bounds, invcmap.N) contourset = ax.contourf(X, Y, Z, bounds, cmap=invcmap, norm=norm) fitter.lmpars = copy.deepcopy(orig_params) try: cbar = plt.colorbar(contourset, cax=cbar, orientation='vertical') cbar.ax.yaxis.set_ticks([-7.5, -4.5, -1.5]) cbar.ax.set_yticklabels([r'3$\sigma$', r'2$\sigma$', r'1$\sigma$']) except: pass for a in axes.flatten(): if a is not None: for label in a.get_xticklabels()[::2]: label.set_visible(False) for label in a.get_yticklabels()[::2]: label.set_visible(False) fitter.result = result fitter.updateInfo() return fig, axes, cbar
[docs]def generateCorrelationPlot(filename, filter=None, bins=None, burnin=0, source=True, binreduction=1, bin2dreduction=1, model=True): """Given the random walk data, creates a triangle plot: distribution of a single parameter on the diagonal axes, 2D contour plots with 1, 2 and 3 sigma contours on the off-diagonal. The 1-sigma limits based on the percentile method are also indicated, as well as added to the title. Parameters ---------- filename: string Filename for the h5 file containing the data from the walk. filter: list of str, optional If supplied, only this list of columns is used for the plot. bins: int or list of int, optional If supplied, use this number of bins for the plotting. Returns ------- figure Returns the MatPlotLib figure created.""" reader = SATLASHDFBackend(filename) var_names = list(reader.labels) split_names = [name.split('___') for name in var_names] sources = [name[0] + '\n' for name in split_names] models = [name[1] for name in split_names] var_names = [name[2] for name in split_names] to_be_combined = [var_names] if model: to_be_combined.insert(0, models) if source: to_be_combined.insert(0, sources) var_names = [' '.join(tbc) for tbc in zip(*to_be_combined)] full_names = list(reader.labels) data = reader.get_chain(flat=False) dataset_length = data.shape[0] first, last = int(burnin), int(dataset_length) data = data[first:last, :, :] data = data.reshape(-1, data.shape[-1]) if filter is not None: filter = [(c, n) for f in filter for (c, n) in zip(var_names, full_names) if f in c] else: filter = list(zip(var_names, full_names)) with tqdm.tqdm(total=len(filter) + (len(filter)**2 - len(filter)) / 2, leave=True) as pbar: fig, axes, cbar = _make_axes_grid(len(filter), axis_padding=0) metadata = {} if not isinstance(bins, list): bins = [bins for _ in filter] for i, val in enumerate(filter): name, full_name = val pbar.set_description(name) ax = axes[i, i] bin_index = i i = full_names.index(full_name) x = data[:, i] if bins[bin_index] is None: width = 3.5 * np.std(x) / x.size**( 1 / 3) #Scott's rule for binwidth bins[bin_index] = int( min(int(x.ptp() / width), 1000) / binreduction) try: n, b, p, = ax.hist(x, int(bins[bin_index]), histtype='step', color='k') except TypeError: bins[bin_index] = 50 n, b, p, = ax.hist(x, int(bins[bin_index]), histtype='step', color='k') # center = n.argmax() # q50 = (b[center] + b[center+1])/2 q = [15.87, 50, 84.13] q16, q50, q84 = np.percentile(x, q) metadata[full_name] = { 'bins': bins[bin_index], 'min': x.min(), 'max': x.max() } title = '{}\n${}_{{-{}}}^{{+{}}}$' title_e = '{}\n$({}_{{-{}}}^{{+{}}})e{}$' up = '{:.2ug}'.format(u.ufloat(q50, np.abs(q84 - q50))) down = '{:.2ug}'.format(u.ufloat(q50, np.abs(q50 - q16))) param_val = up.split('+/-')[0].split('(')[-1] r = up.split('+/-')[1].split(')')[0] l = down.split('+/-')[1].split(')')[0] if 'e' in up or 'e' in down: ex = up.split('e')[-1] ax.set_title(title_e.format(name, param_val, l, r, ex)) else: ax.set_title(title.format(name, param_val, l, r)) qvalues = [q16, q50, q84] c = '#0093e6' for q in qvalues: ax.axvline(q, ls="dashed", color=c) ax.set_yticks([]) ax.set_yticklabels([]) pbar.update(1) for i, j in zip(*np.tril_indices_from(axes, -1)): x_name, x_fullname = filter[j] y_name, y_fullname = filter[i] pbar.set_description(', '.join([x_name, y_name])) ax = axes[i, j] if j == 0: ax.set_ylabel(y_name) if i == len(filter) - 1: ax.set_xlabel(x_name) j = full_names.index(x_fullname) i = full_names.index(y_fullname) x = data[:, j] y = data[:, i] x_min, x_max, x_bins = metadata[x_fullname]['min'], metadata[ x_fullname]['max'], metadata[x_fullname]['bins'] y_min, y_max, y_bins = metadata[y_fullname]['min'], metadata[ y_fullname]['max'], metadata[y_fullname]['bins'] X = np.linspace(x_min, x_max, int(x_bins / bin2dreduction) + 1) Y = np.linspace(y_min, y_max, int(y_bins / bin2dreduction) + 1) H, X, Y = np.histogram2d(x.flatten(), y.flatten(), bins=(X, Y), weights=None) X1, Y1 = 0.5 * (X[1:] + X[:-1]), 0.5 * (Y[1:] + Y[:-1]) X, Y = X[:-1], Y[:-1] H = (H - H.min()) / (H.max() - H.min()) Hflat = H.flatten() inds = np.argsort(Hflat)[::-1] Hflat = Hflat[inds] sm = np.cumsum(Hflat) sm /= sm[-1] levels = 1.0 - np.exp(-0.5 * np.arange(1, 3.1, 1)**2) V = np.empty(len(levels)) for i, v0 in enumerate(levels): try: V[i] = Hflat[sm <= v0][-1] except: V[i] = Hflat[0] bounds = np.unique(np.concatenate([[H.max()], V])[::-1]) norm = mpl.colors.BoundaryNorm(bounds, invcmap.N) contourset = ax.contourf(X1, Y1, H.T, bounds, cmap=invcmap, norm=norm) pbar.update(1) try: cbar = plt.colorbar(contourset, cax=cbar, orientation='vertical') ticks = cbar.ax.get_yticks() dfticks = (ticks[1:] - ticks[:-1]) / 2 ticks = ticks[:-1] + dfticks cbar.ax.yaxis.set_ticks(ticks) # cbar.ax.yaxis.set_ticks([0, 1 / 6, 0.5, 5 / 6, 1.0]) cbar.ax.set_yticklabels([r'3$\sigma$', r'2$\sigma$', r'1$\sigma$']) except: cbar = None return fig, axes, cbar
[docs]def generateWalkPlot(filename, filter=None, burnin=0, source=False, model=True): """Given the random walk data, the random walk for the selected parameters is plotted. Parameters ---------- filename: string Filename for the h5 file containing the data from the walk. filter: list of str, optional If supplied, only this list of parameters is used for the plot. Returns ------- figure Returns the MatPlotLib figure created.""" reader = SATLASHDFBackend(filename) var_names = reader.labels split_names = [name.split('___') for name in var_names] sources = [name[0] for name in split_names] models = [name[1] for name in split_names] var_names = [name[2] for name in split_names] to_be_combined = [var_names] if model: to_be_combined.insert(0, models) if source: to_be_combined.insert(0, sources) var_names = [' '.join(tbc) for tbc in zip(*to_be_combined)] full_names = list(reader.labels) data = reader.get_chain(flat=False) dataset_length = data.shape[0] first, last = int(burnin), dataset_length data = data[first:last, :, :] if filter is not None: filter = [(c, n) for f in filter for (c, n) in zip(var_names, full_names) if f in c] else: filter = list(zip(var_names, full_names)) with tqdm.tqdm(total=len(filter), leave=True) as pbar: fig, axes = plt.subplots(len(filter), 1, sharex=True) if len(filter) == 1: axes = [axes] for i, (val, ax) in enumerate(zip(filter, axes)): name, full_name = val pbar.set_description(name) i = full_names.index(full_name) x = data[:, :, i] q50 = np.percentile(x, [50.0]) ax.plot(range(first, last), x, alpha=0.3, color='gray') ax.set_ylabel(name) ax.axhline(q50, color='k') pbar.update(1) ax.set_xlabel('Step') pbar.close() return fig, axes