Source code for skchange.penalties._linear_penalties

"""Linear penalties for change and anomaly detection."""

import numpy as np

from ..utils.validation.parameters import check_larger_than


[docs] def make_linear_penalty(intercept: float, slope: float, p: int) -> np.ndarray: """Create a linear penalty. The penalty is given by ``intercept + slope * (1, 2, ..., p)``, where `p` is the number of variables/columns in the data being analysed. The penalty is non-decreasing. Parameters ---------- intercept : float Intercept of the linear penalty. slope : float Slope of the linear penalty. p : int Number of variables/columns in the data being analysed. Returns ------- np.ndarray The non-decreasing linear penalty values. The shape is ``(p,)``. Element ``i`` of the array is the penalty value for ``i+1`` variables being affected by a change or anomaly. """ check_larger_than(0.0, intercept, "intercept") check_larger_than(0.0, slope, "slope") check_larger_than(1, p, "p") penalty_vector = intercept + slope * np.arange(1, p + 1) return penalty_vector
[docs] def make_linear_chi2_penalty(n_params_per_variable: int, n: int, p: int) -> np.ndarray: """Create a linear chi-square penalty. The penalty is a piece of the default penalty for the `MVCAPA` algorithm. It is described as "penalty regime 2" in the MVCAPA article [1]_, suitable for detecting sparse anomalies in the data. Sparse anomalies only affect a few variables. Parameters ---------- n_params_per_variable: int Number of model parameters per variable and segment. n : int Sample size. p : int Number of variables/columns in the data being analysed. Returns ------- np.ndarray The non-decreasing linear chi-square penalty values. The shape is ``(p,)``. Element ``i`` of the array is the penalty value for ``i+1`` variables being affected by a change or anomaly. References ---------- .. [1] Fisch, A. T., Eckley, I. A., & Fearnhead, P. (2022). Subset multivariate segment and point anomaly detection. Journal of Computational and Graphical Statistics, 31(2), 574-585. """ check_larger_than(1, n_params_per_variable, "n_params_per_variable") check_larger_than(1, n, "n") check_larger_than(1, p, "p") psi = np.log(n) component_penalty = 2 * np.log(n_params_per_variable * p) penalty_vector = 2 * psi + 2 * np.cumsum(np.full(p, component_penalty)) return penalty_vector