Source code for skchange.change_detectors.base

"""Base classes for change point detectors.

    classes:
        BaseChangeDetector

By inheriting from these classes the remaining methods of the BaseDetector class to
implement to obtain a fully functional change point detector are given below.

Needs to be implemented:
    _fit(self, X, y=None)
    _predict(self, X)

Optional to implement:
    _transform_scores(self, X)
    _update(self, X, y=None)
"""

import numpy as np
import pandas as pd

from ..base import BaseDetector


[docs] class BaseChangeDetector(BaseDetector): """Base class for change detectors. Changepoint detectors detect points in time where a change in the data occurs. Data between two change points is a segment where the data is considered to be homogeneous, i.e., of the same distribution. A change point is defined as the location of the first element of a segment. """ _tags = { "authors": ["Tveten"], "maintainers": ["Tveten"], "task": "change_point_detection", }
[docs] @staticmethod def sparse_to_dense( y_sparse: pd.DataFrame, index: pd.Index, columns: pd.Index = None ) -> pd.Series: """Convert the sparse output from the `predict` method to a dense format. Parameters ---------- y_sparse : pd.DataFrame The sparse output from a change point detector's `predict` method. index : array-like Indices that are to be annotated according to `y_sparse`. columns: array-like Not used. Only for API compatibility. Returns ------- pd.DataFrame with the input data index and one column: * ``"label"`` - integer labels ``0, ..., K`` for each segment between two change points. """ changepoints = y_sparse["ilocs"].to_list() n = len(index) changepoints = [0] + changepoints + [n] segment_labels = np.zeros(n) for i in range(len(changepoints) - 1): segment_labels[changepoints[i] : changepoints[i + 1]] = i return pd.DataFrame( segment_labels, index=index, columns=["labels"], dtype="int64" )
[docs] @staticmethod def dense_to_sparse(y_dense: pd.DataFrame) -> pd.DataFrame: """Convert the dense output from the `transform` method to a sparse format. Parameters ---------- y_dense : pd.DataFrame The dense output from a change point detector's `transform` method. Returns ------- pd.DataFrame : A `pd.DataFrame` with a range index and one column: * ``"ilocs"`` - integer locations of the change points. """ is_changepoint = y_dense["labels"].diff().abs() > 0 changepoints = y_dense.index[is_changepoint] return BaseChangeDetector._format_sparse_output_ilocs(changepoints)
@staticmethod def _format_sparse_output_ilocs(changepoints) -> pd.DataFrame: """Format the sparse output of change point detectors. Can be reused by subclasses to format the output of the `_predict` method. Parameters ---------- changepoints : list List of change point locations. Returns ------- pd.DataFrame : A `pd.DataFrame` with a range index and one column: * ``"ilocs"`` - integer locations of the change points. """ return pd.DataFrame(changepoints, columns=["ilocs"], dtype="int64") def _format_sparse_output(self, changepoints) -> pd.DataFrame: """Format the sparse output of change point detectors. Can be reused by subclasses to format the output of the `_predict` method. Parameters ---------- changepoints : list List of changepoint locations. Returns ------- pd.DataFrame : A `pd.DataFrame` with a range index and one column: * ``"ilocs"`` - integer locations of the change points. """ return self._format_sparse_output_ilocs(changepoints)