Source code for skchange.change_scores._cusum

"""The CUSUM test statistic for a change in the mean."""

__author__ = ["Tveten"]

import numpy as np

from ..base import BaseIntervalScorer
from ..utils.numba import njit
from ..utils.numba.stats import col_cumsum


@njit
def cusum_score(
    starts: np.ndarray,
    ends: np.ndarray,
    splits: np.ndarray,
    sums: np.ndarray,
) -> np.ndarray:
    """
    Calculate the CUSUM score for a change in the mean.

    Compares the mean of the data before and after the split within the interval from
    ``start:end``.

    Parameters
    ----------
    starts : `np.ndarray`
        Start indices of the intervals to test for a change in the mean.
    ends : `np.ndarray`
        End indices of the intervals to test for a change in the mean.
    splits : `np.ndarray`
        Split indices of the intervals to test for a change in the mean.
    sums : `np.ndarray`
        Cumulative sum of the input data, with a row of 0-entries as the first row.

    Returns
    -------
    `np.ndarray`
        CUSUM scores for the intervals and splits.
    """
    n = ends - starts
    before_n = splits - starts
    after_n = ends - splits
    before_sum = sums[splits] - sums[starts]
    after_sum = sums[ends] - sums[splits]
    before_weight = np.sqrt(after_n / (n * before_n)).reshape(-1, 1)
    after_weight = np.sqrt(before_n / (n * after_n)).reshape(-1, 1)
    cusum = np.abs(before_weight * before_sum - after_weight * after_sum)
    return cusum


[docs] class CUSUM(BaseIntervalScorer): """CUSUM change score for a change in the mean. The classical CUSUM test statistic for a change in the mean is calculated as the weighted difference between the mean before and after a split point within an interval. See e.g. Section 4 of [2]_, the idea goes back to [1]_. References ---------- .. [1] Page, E. S. (1954). Continuous inspection schemes. Biometrika, 41(1/2), 100-115. .. [2] Wang, D., Yu, Y., & Rinaldo, A. (2020). Univariate mean change point detection: Penalization, cusum and optimality. Electronic Journal of Statistics, 14(1) 1917-1961. """ _tags = { "authors": ["Tveten"], "maintainers": "Tveten", "task": "change_score", } def __init__(self): super().__init__() @property def min_size(self) -> int: """Minimum size of the interval to evaluate.""" return 1 def _fit(self, X: np.ndarray, y=None): """Fit the change score evaluator. Parameters ---------- X : np.ndarray Data to evaluate. Must be a 2D array. y : None Ignored. Included for API consistency by convention. Returns ------- self : Reference to self. """ self._sums = col_cumsum(X, init_zero=True) return self def _evaluate(self, cuts: np.ndarray): """Evaluate the change score for a split within an interval. Parameters ---------- cuts : np.ndarray A 2D array with three columns of integer locations. The first column is the ``start``, the second is the ``split``, and the third is the ``end`` of the interval to evaluate. The difference between subsets ``X[start:split]`` and ``X[split:end]`` is evaluated for each row in `cuts`. Returns ------- scores : np.ndarray A 2D array of change scores. One row for each cut. The number of columns is 1 if the change score is inherently multivariate. The number of columns is equal to the number of columns in the input data if the score is univariate. In this case, each column represents the univariate score for the corresponding input data column. """ starts = cuts[:, 0] splits = cuts[:, 1] ends = cuts[:, 2] return cusum_score(starts, ends, splits, self._sums)
[docs] @classmethod def get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. Parameters ---------- parameter_set : str, default="default" Name of the set of test parameters to return, for use in tests. If no special parameters are defined for a value, will return `"default"` set. There are currently no reserved values for interval scorers. Returns ------- params : dict or list of dict, default = {} Parameters to create testing instances of the class Each dict are parameters to construct an "interesting" test instance, i.e., `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. `create_test_instance` uses the first (or only) dictionary in `params` """ # CUSUM does not have any parameters to set params = [{}] return params