mgplot.run_plot

run_plot.py This code contains a function to plot and highlighted the 'runs' in a series.

  1"""
  2run_plot.py
  3This code contains a function to plot and highlighted
  4the 'runs' in a series.
  5"""
  6
  7# --- imports
  8from collections.abc import Sequence
  9from pandas import Series, concat, period_range
 10from matplotlib.pyplot import Axes
 11from matplotlib import patheffects as pe
 12
 13from mgplot.settings import DataT
 14from mgplot.line_plot import line_plot, LINE_KW_TYPES
 15from mgplot.kw_type_checking import (
 16    limit_kwargs,
 17    ExpectedTypeDict,
 18    validate_kwargs,
 19    validate_expected,
 20    report_kwargs,
 21)
 22from mgplot.utilities import constrain_data, check_clean_timeseries
 23
 24
 25# --- constants
 26THRESHOLD = "threshold"
 27ROUND = "round"
 28HIGHLIGHT = "highlight"
 29DIRECTION = "direction"
 30
 31RUN_KW_TYPES: ExpectedTypeDict = {
 32    THRESHOLD: float,
 33    ROUND: int,
 34    HIGHLIGHT: (str, Sequence, (str,)),  # colors for highlighting the runs
 35    DIRECTION: str,  # "up", "down" or "both"
 36}
 37RUN_KW_TYPES |= LINE_KW_TYPES
 38validate_expected(RUN_KW_TYPES, "run_highlight_plot")
 39
 40# --- functions
 41
 42
 43def _identify_runs(
 44    series: Series,
 45    threshold: float,
 46    up: bool,  # False means down
 47) -> tuple[Series, Series]:
 48    """Identify monotonic increasing/decreasing runs."""
 49
 50    diffed = series.diff()
 51    change_points = concat(
 52        [diffed[diffed.gt(threshold)], diffed[diffed.lt(-threshold)]]
 53    ).sort_index()
 54    if series.index[0] not in change_points.index:
 55        starting_point = Series([0], index=[series.index[0]])
 56        change_points = concat([change_points, starting_point]).sort_index()
 57    facing = change_points > 0 if up else change_points < 0
 58    cycles = (facing & ~facing.shift().astype(bool)).cumsum()
 59    return cycles[facing], change_points
 60
 61
 62def _plot_runs(
 63    axes: Axes,
 64    series: Series,
 65    up: bool,
 66    **kwargs,
 67) -> None:
 68    """Highlight the runs of a series."""
 69
 70    threshold = kwargs[THRESHOLD]
 71    match kwargs.get(HIGHLIGHT):  # make sure highlight is a color string
 72        case str():
 73            highlight = kwargs.get(HIGHLIGHT)
 74        case Sequence():
 75            highlight = kwargs[HIGHLIGHT][0] if up else kwargs[HIGHLIGHT][1]
 76        case _:
 77            raise ValueError(
 78                f"Invalid type for highlight: {type(kwargs.get(HIGHLIGHT))}. "
 79                "Expected str or Sequence."
 80            )
 81
 82    # highlight the runs
 83    stretches, change_points = _identify_runs(series, threshold, up=up)
 84    for k in range(1, stretches.max() + 1):
 85        stretch = stretches[stretches == k]
 86        axes.axvspan(
 87            stretch.index.min(),
 88            stretch.index.max(),
 89            color=highlight,
 90            zorder=-1,
 91        )
 92        space_above = series.max() - series[stretch.index].max()
 93        space_below = series[stretch.index].min() - series.min()
 94        y_pos, vert_align = (
 95            (series.max(), "top")
 96            if space_above > space_below
 97            else (series.min(), "bottom")
 98        )
 99        text = axes.text(
100            x=stretch.index.min(),
101            y=y_pos,
102            s=(
103                change_points[stretch.index].sum().round(kwargs["round"]).astype(str)
104                + " pp"
105            ),
106            va=vert_align,
107            ha="left",
108            fontsize="x-small",
109            rotation=90,
110        )
111        text.set_path_effects([pe.withStroke(linewidth=5, foreground="w")])
112
113
114def run_plot(data: DataT, **kwargs) -> Axes:
115    """Plot a series of percentage rates, highlighting the increasing runs.
116
117    Arguments
118     - data - ordered pandas Series of percentages, with PeriodIndex
119     - **kwargs
120        - threshold - float - used to ignore micro noise near zero
121          (for example, threshhold=0.01)
122        - round - int - rounding for highlight text
123        - highlight - str or Sequence[str] - color(s) for highlighting the
124          runs, two colors can be specified in a list if direction is "both"
125        - direction - str - whether the highlight is for an upward
126          or downward or both runs. Options are "up", "down" or "both".
127        - in addition the **kwargs for line_plot are accepted.
128
129    Return
130     - matplotlib Axes object"""
131
132    # --- check the kwargs
133    me = "run_plot"
134    report_kwargs(called_from=me, **kwargs)
135    validate_kwargs(RUN_KW_TYPES, me, **kwargs)
136
137    # --- check the data
138    series = check_clean_timeseries(data, me)
139    if not isinstance(series, Series):
140        raise TypeError("series must be a pandas Series for run_plot()")
141    series, kwargs = constrain_data(series, **kwargs)
142
143    # --- default arguments - in **kwargs
144    kwargs[THRESHOLD] = kwargs.get(THRESHOLD, 0.1)
145    kwargs[ROUND] = kwargs.get(ROUND, 2)
146    direct = kwargs[DIRECTION] = kwargs.get(DIRECTION, "up")
147    kwargs[HIGHLIGHT], kwargs["color"] = (
148        (kwargs.get(HIGHLIGHT, "gold"), kwargs.get("color", "#dd0000"))
149        if direct == "up"
150        else (
151            (kwargs.get(HIGHLIGHT, "skyblue"), kwargs.get("color", "navy"))
152            if direct == "down"
153            else (
154                kwargs.get(HIGHLIGHT, ("gold", "skyblue")),
155                kwargs.get("color", "navy"),
156            )
157        )
158    )
159
160    # defauls for line_plot
161    kwargs["width"] = kwargs.get("width", 2)
162
163    # plot the line
164    kwargs["drawstyle"] = kwargs.get("drawstyle", "steps-post")
165    lp_kwargs = limit_kwargs(LINE_KW_TYPES, **kwargs)
166    axes = line_plot(series, **lp_kwargs)
167
168    # plot the runs
169    match kwargs[DIRECTION]:
170        case "up":
171            _plot_runs(axes, series, up=True, **kwargs)
172        case "down":
173            _plot_runs(axes, series, up=False, **kwargs)
174        case "both":
175            _plot_runs(axes, series, up=True, **kwargs)
176            _plot_runs(axes, series, up=False, **kwargs)
177        case _:
178            raise ValueError(
179                f"Invalid value for direction: {kwargs[DIRECTION]}. "
180                "Expected 'up', 'down', or 'both'."
181            )
182    return axes
183
184
185# test ---
186if __name__ == "__main__":
187    N_PERIODS = 25
188    periods = period_range(start="2020Q1", periods=N_PERIODS, freq="Q")
189    dataset = Series([1] * N_PERIODS, index=periods).cumsum()
190
191    ax = run_plot(data=dataset, junk="should generate a warning")
THRESHOLD = 'threshold'
ROUND = 'round'
HIGHLIGHT = 'highlight'
DIRECTION = 'direction'
RUN_KW_TYPES: mgplot.kw_type_checking.ExpectedTypeDict = {'threshold': <class 'float'>, 'round': <class 'int'>, 'highlight': (<class 'str'>, <class 'collections.abc.Sequence'>, (<class 'str'>,)), 'direction': <class 'str'>, 'ax': (<class 'matplotlib.axes._axes.Axes'>, <class 'NoneType'>), 'style': (<class 'str'>, <class 'collections.abc.Sequence'>, (<class 'str'>,)), 'width': (<class 'float'>, <class 'int'>, <class 'collections.abc.Sequence'>, (<class 'float'>, <class 'int'>)), 'color': (<class 'str'>, <class 'collections.abc.Sequence'>, (<class 'str'>,)), 'alpha': (<class 'float'>, <class 'collections.abc.Sequence'>, (<class 'float'>,)), 'drawstyle': (<class 'str'>, <class 'collections.abc.Sequence'>, (<class 'str'>,), <class 'NoneType'>), 'marker': (<class 'str'>, <class 'collections.abc.Sequence'>, (<class 'str'>,), <class 'NoneType'>), 'markersize': (<class 'float'>, <class 'collections.abc.Sequence'>, (<class 'float'>,), <class 'int'>, <class 'NoneType'>), 'dropna': (<class 'bool'>, <class 'collections.abc.Sequence'>, (<class 'bool'>,)), 'annotate': (<class 'bool'>, <class 'collections.abc.Sequence'>, (<class 'bool'>,)), 'rounding': (<class 'collections.abc.Sequence'>, (<class 'bool'>, <class 'int'>), <class 'int'>, <class 'bool'>, <class 'NoneType'>), 'fontsize': (<class 'collections.abc.Sequence'>, (<class 'str'>, <class 'int'>), <class 'str'>, <class 'int'>, <class 'NoneType'>), 'plot_from': (<class 'int'>, <class 'pandas._libs.tslibs.period.Period'>, <class 'NoneType'>), 'legend': (<class 'dict'>, (<class 'str'>, <class 'object'>), <class 'bool'>, <class 'NoneType'>)}
def run_plot(data: ~DataT, **kwargs) -> matplotlib.axes._axes.Axes:
115def run_plot(data: DataT, **kwargs) -> Axes:
116    """Plot a series of percentage rates, highlighting the increasing runs.
117
118    Arguments
119     - data - ordered pandas Series of percentages, with PeriodIndex
120     - **kwargs
121        - threshold - float - used to ignore micro noise near zero
122          (for example, threshhold=0.01)
123        - round - int - rounding for highlight text
124        - highlight - str or Sequence[str] - color(s) for highlighting the
125          runs, two colors can be specified in a list if direction is "both"
126        - direction - str - whether the highlight is for an upward
127          or downward or both runs. Options are "up", "down" or "both".
128        - in addition the **kwargs for line_plot are accepted.
129
130    Return
131     - matplotlib Axes object"""
132
133    # --- check the kwargs
134    me = "run_plot"
135    report_kwargs(called_from=me, **kwargs)
136    validate_kwargs(RUN_KW_TYPES, me, **kwargs)
137
138    # --- check the data
139    series = check_clean_timeseries(data, me)
140    if not isinstance(series, Series):
141        raise TypeError("series must be a pandas Series for run_plot()")
142    series, kwargs = constrain_data(series, **kwargs)
143
144    # --- default arguments - in **kwargs
145    kwargs[THRESHOLD] = kwargs.get(THRESHOLD, 0.1)
146    kwargs[ROUND] = kwargs.get(ROUND, 2)
147    direct = kwargs[DIRECTION] = kwargs.get(DIRECTION, "up")
148    kwargs[HIGHLIGHT], kwargs["color"] = (
149        (kwargs.get(HIGHLIGHT, "gold"), kwargs.get("color", "#dd0000"))
150        if direct == "up"
151        else (
152            (kwargs.get(HIGHLIGHT, "skyblue"), kwargs.get("color", "navy"))
153            if direct == "down"
154            else (
155                kwargs.get(HIGHLIGHT, ("gold", "skyblue")),
156                kwargs.get("color", "navy"),
157            )
158        )
159    )
160
161    # defauls for line_plot
162    kwargs["width"] = kwargs.get("width", 2)
163
164    # plot the line
165    kwargs["drawstyle"] = kwargs.get("drawstyle", "steps-post")
166    lp_kwargs = limit_kwargs(LINE_KW_TYPES, **kwargs)
167    axes = line_plot(series, **lp_kwargs)
168
169    # plot the runs
170    match kwargs[DIRECTION]:
171        case "up":
172            _plot_runs(axes, series, up=True, **kwargs)
173        case "down":
174            _plot_runs(axes, series, up=False, **kwargs)
175        case "both":
176            _plot_runs(axes, series, up=True, **kwargs)
177            _plot_runs(axes, series, up=False, **kwargs)
178        case _:
179            raise ValueError(
180                f"Invalid value for direction: {kwargs[DIRECTION]}. "
181                "Expected 'up', 'down', or 'both'."
182            )
183    return axes

Plot a series of percentage rates, highlighting the increasing runs.

Arguments

  • data - ordered pandas Series of percentages, with PeriodIndex
  • *kwargs
    • threshold - float - used to ignore micro noise near zero (for example, threshhold=0.01)
    • round - int - rounding for highlight text
    • highlight - str or Sequence[str] - color(s) for highlighting the runs, two colors can be specified in a list if direction is "both"
    • direction - str - whether the highlight is for an upward or downward or both runs. Options are "up", "down" or "both".
    • in addition the *kwargs for line_plot are accepted.

Return

  • matplotlib Axes object