mgplot.summary_plot
summary_plot.py: Produce a summary plot for the data in a given DataFrame. The data is normalised to z-scores and scaled.
1""" 2summary_plot.py: 3Produce a summary plot for the data in a given DataFrame. 4The data is normalised to z-scores and scaled. 5""" 6 7# --- imports 8# system imports 9from typing import Any 10 11# from collections.abc import Sequence 12 13# analytic third-party imports 14from numpy import ndarray, array 15from matplotlib.pyplot import Axes, subplots 16from pandas import DataFrame, Period 17 18# local imports 19from mgplot.settings import DataT 20from mgplot.finalise_plot import make_legend 21from mgplot.utilities import constrain_data, check_clean_timeseries 22from mgplot.kw_type_checking import ( 23 report_kwargs, 24 ExpectedTypeDict, 25 validate_expected, 26 validate_kwargs, 27) 28 29 30# --- constants 31ZSCORES = "zscores" 32ZSCALED = "zscaled" 33 34SUMMARY_KW_TYPES: ExpectedTypeDict = { 35 "verbose": bool, 36 "middle": float, 37 "plot_type": str, 38 "plot_from": (int, Period, type(None)), 39 "legend": (type(None), bool, dict, (str, object)), 40} 41validate_expected(SUMMARY_KW_TYPES, "summary_plot") 42 43 44# --- functions 45def _calc_quantiles(middle: float) -> ndarray: 46 """Calculate the quantiles for the middle of the data.""" 47 return array([(1 - middle) / 2.0, 1 - (1 - middle) / 2.0]) 48 49 50def _calculate_z( 51 original: DataFrame, # only contains the data points of interest 52 middle: float, # middle proportion of data to highlight (eg. 0.8) 53 verbose: bool = False, # print the summary data 54) -> tuple[DataFrame, DataFrame]: 55 """Calculate z-scores, scaled z-scores and middle quantiles. 56 Return z_scores, z_scaled, q (which are the quantiles for the 57 start/end of the middle proportion of data to highlight).""" 58 59 # calculate z-scores, scaled scores and middle quantiles 60 z_scores: DataFrame = (original - original.mean()) / original.std() 61 z_scaled: DataFrame = ( 62 # scale z-scores between -1 and +1 63 (((z_scores - z_scores.min()) / (z_scores.max() - z_scores.min())) - 0.5) 64 * 2 65 ) 66 q_middle = _calc_quantiles(middle) 67 68 if verbose: 69 frame = DataFrame( 70 { 71 "count": original.count(), 72 "mean": original.mean(), 73 "median": original.median(), 74 "min shaded": original.quantile(q=q_middle[0]), 75 "max shaded": original.quantile(q=q_middle[1]), 76 "z-scores": z_scores.iloc[-1], 77 "scaled": z_scaled.iloc[-1], 78 } 79 ) 80 print(frame) 81 82 return DataFrame(z_scores), DataFrame(z_scaled) # syntactic sugar for type hinting 83 84 85def _plot_middle_bars( 86 adjusted: DataFrame, 87 middle: float, 88 kwargs: dict[str, Any], # must be a dictionary, not a splat 89) -> Axes: 90 """Plot the middle (typically 80%) of the data as a bar. 91 Note: also sets the x-axis limits in kwargs. 92 Return the matplotlib Axes object.""" 93 94 q = _calc_quantiles(middle) 95 lo_hi: DataFrame = adjusted.quantile(q=q).T # get the middle section of data 96 span = 1.15 97 space = 0.2 98 low = min(adjusted.iloc[-1].min(), lo_hi.min().min(), -span) - space 99 high = max(adjusted.iloc[-1].max(), lo_hi.max().max(), span) + space 100 kwargs["xlim"] = (low, high) # remember the x-axis limits 101 _fig, ax = subplots() 102 ax.barh( 103 y=lo_hi.index, 104 width=lo_hi[q[1]] - lo_hi[q[0]], 105 left=lo_hi[q[0]], 106 color="#bbbbbb", 107 label=f"Middle {middle*100:0.0f}% of prints", 108 ) 109 return ax 110 111 112def _plot_latest_datapoint( 113 ax: Axes, 114 original: DataFrame, 115 adjusted: DataFrame, 116 f_size: int, 117) -> None: 118 """Add the latest datapoints to the summary plot""" 119 120 ax.scatter(adjusted.iloc[-1], adjusted.columns, color="darkorange", label="Latest") 121 f_size = 10 122 row = adjusted.index[-1] 123 for col_num, col_name in enumerate(original.columns): 124 ax.text( 125 x=adjusted.at[row, col_name], 126 y=col_num, 127 s=f"{original.at[row, col_name]:.1f}", 128 ha="center", 129 va="center", 130 size=f_size, 131 ) 132 133 134def _label_extremes( 135 ax: Axes, 136 data: tuple[DataFrame, DataFrame], 137 plot_type: str, 138 f_size: int, 139 kwargs: dict[str, Any], # must be a dictionary, not a splat 140) -> None: 141 """Label the extremes in the scaled plots.""" 142 143 original, adjusted = data 144 low, high = kwargs["xlim"] 145 if plot_type == ZSCALED: 146 ax.axvline(-1, color="#555555", linewidth=0.5, linestyle="--") 147 ax.axvline(1, color="#555555", linewidth=0.5, linestyle="--") 148 ax.scatter( 149 adjusted.median(), 150 adjusted.columns, 151 color="darkorchid", 152 marker="x", 153 s=5, 154 label="Median", 155 ) 156 for col_num, col_name in enumerate(original.columns): 157 ax.text( 158 low, 159 col_num, 160 f" {original[col_name].min():.1f}", 161 ha="left", 162 va="center", 163 size=f_size, 164 ) 165 ax.text( 166 high, 167 col_num, 168 f"{original[col_name].max():.1f} ", 169 ha="right", 170 va="center", 171 size=f_size, 172 ) 173 174 175def _horizontal_bar_plot( 176 original: DataFrame, 177 adjusted: DataFrame, 178 middle: float, 179 plot_type: str, 180 kwargs: dict[str, Any], # must be a dictionary, not a splat 181) -> Axes: 182 """Plot horizontal bars for the middle of the data.""" 183 184 # kwargs is a dictionary, not a splat 185 # so that we can pass it to the Axes object and 186 # set the x-axis limits. 187 188 ax = _plot_middle_bars(adjusted, middle, kwargs) 189 f_size = 10 190 _plot_latest_datapoint(ax, original, adjusted, f_size) 191 _label_extremes( 192 ax, data=(original, adjusted), plot_type=plot_type, f_size=f_size, kwargs=kwargs 193 ) 194 195 return ax 196 197 198# public 199def summary_plot( 200 data: DataT, # summary data 201 **kwargs, 202) -> Axes: 203 """Plot a summary of historical data for a given DataFrame. 204 205 Args: 206 - summary: DataFrame containing the summary data. The column names are 207 used as labels for the plot. 208 - kwargs: additional arguments for the plot, including: 209 - plot_from: int | Period | None 210 - verbose: if True, print the summary data. 211 - middle: proportion of data to highlight (default is 0.8). 212 - plot_types: list of plot types to generate. 213 214 215 Returns Axes. 216 """ 217 218 # --- check the kwargs 219 me = "summary_plot" 220 report_kwargs(called_from=me, **kwargs) 221 validate_kwargs(SUMMARY_KW_TYPES, me, **kwargs) 222 223 # --- check the data 224 data = check_clean_timeseries(data, me) 225 if not isinstance(data, DataFrame): 226 raise TypeError("data must be a pandas DataFrame for summary_plot()") 227 df = DataFrame(data) # syntactic sugar for type hinting 228 229 # --- optional arguments 230 verbose = kwargs.pop("verbose", False) 231 middle = float(kwargs.pop("middle", 0.8)) 232 plot_type = kwargs.pop("plot_type", ZSCORES) 233 kwargs["legend"] = kwargs.get( 234 "legend", 235 { 236 # put the legend below the x-axis label 237 "loc": "upper center", 238 "fontsize": "xx-small", 239 "bbox_to_anchor": (0.5, -0.125), 240 "ncol": 4, 241 }, 242 ) 243 244 # get the data, calculate z-scores and scaled scores based on the start period 245 subset, kwargs = constrain_data(df, **kwargs) 246 z_scores, z_scaled = _calculate_z(subset, middle, verbose=verbose) 247 248 # plot as required by the plot_types argument 249 adjusted = z_scores if plot_type == ZSCORES else z_scaled 250 ax = _horizontal_bar_plot(subset, adjusted, middle, plot_type, kwargs) 251 ax.tick_params(axis="y", labelsize="small") 252 make_legend(ax, kwargs["legend"]) 253 ax.set_xlim(kwargs.get("xlim", None)) # provide space for the labels 254 255 return ax
ZSCORES =
'zscores'
ZSCALED =
'zscaled'
SUMMARY_KW_TYPES: mgplot.kw_type_checking.ExpectedTypeDict =
{'verbose': <class 'bool'>, 'middle': <class 'float'>, 'plot_type': <class 'str'>, 'plot_from': (<class 'int'>, <class 'pandas._libs.tslibs.period.Period'>, <class 'NoneType'>), 'legend': (<class 'NoneType'>, <class 'bool'>, <class 'dict'>, (<class 'str'>, <class 'object'>))}
def
summary_plot(data: ~DataT, **kwargs) -> matplotlib.axes._axes.Axes:
200def summary_plot( 201 data: DataT, # summary data 202 **kwargs, 203) -> Axes: 204 """Plot a summary of historical data for a given DataFrame. 205 206 Args: 207 - summary: DataFrame containing the summary data. The column names are 208 used as labels for the plot. 209 - kwargs: additional arguments for the plot, including: 210 - plot_from: int | Period | None 211 - verbose: if True, print the summary data. 212 - middle: proportion of data to highlight (default is 0.8). 213 - plot_types: list of plot types to generate. 214 215 216 Returns Axes. 217 """ 218 219 # --- check the kwargs 220 me = "summary_plot" 221 report_kwargs(called_from=me, **kwargs) 222 validate_kwargs(SUMMARY_KW_TYPES, me, **kwargs) 223 224 # --- check the data 225 data = check_clean_timeseries(data, me) 226 if not isinstance(data, DataFrame): 227 raise TypeError("data must be a pandas DataFrame for summary_plot()") 228 df = DataFrame(data) # syntactic sugar for type hinting 229 230 # --- optional arguments 231 verbose = kwargs.pop("verbose", False) 232 middle = float(kwargs.pop("middle", 0.8)) 233 plot_type = kwargs.pop("plot_type", ZSCORES) 234 kwargs["legend"] = kwargs.get( 235 "legend", 236 { 237 # put the legend below the x-axis label 238 "loc": "upper center", 239 "fontsize": "xx-small", 240 "bbox_to_anchor": (0.5, -0.125), 241 "ncol": 4, 242 }, 243 ) 244 245 # get the data, calculate z-scores and scaled scores based on the start period 246 subset, kwargs = constrain_data(df, **kwargs) 247 z_scores, z_scaled = _calculate_z(subset, middle, verbose=verbose) 248 249 # plot as required by the plot_types argument 250 adjusted = z_scores if plot_type == ZSCORES else z_scaled 251 ax = _horizontal_bar_plot(subset, adjusted, middle, plot_type, kwargs) 252 ax.tick_params(axis="y", labelsize="small") 253 make_legend(ax, kwargs["legend"]) 254 ax.set_xlim(kwargs.get("xlim", None)) # provide space for the labels 255 256 return ax
Plot a summary of historical data for a given DataFrame.
Args:
- summary: DataFrame containing the summary data. The column names are used as labels for the plot.
- kwargs: additional arguments for the plot, including:
- plot_from: int | Period | None
- verbose: if True, print the summary data.
- middle: proportion of data to highlight (default is 0.8).
- plot_types: list of plot types to generate.
Returns Axes.