Source code for skchange.penalties._constant_penalties
"""Constant penalties for change and anomaly detection."""
import numpy as np
from ..utils.validation.parameters import check_larger_than
[docs]
def make_bic_penalty(n_params: int, n: int, additional_cpts: int = 1) -> float:
"""Create a Bayesian Information Criterion (BIC) penalty.
The BIC penalty is a constant penalty given by
``(n_params + additional_cpts) * log(n)``, where `n` is the sample size and
`n_params` is the number of parameters per segment in the model across all
variables, and `additional_cpts` is the number of additional change point parameters
per segment. For change detection, this is 1.
Parameters
----------
n_params : int
Number of model parameters per segment.
n : int
Sample size.
additional_cpts: int, optional, default=1
Number of additional change point parameters per segment. For change detection,
this is 1.
Returns
-------
float
The BIC penalty value.
"""
check_larger_than(1, n_params, "n_params")
check_larger_than(1, n, "n")
check_larger_than(0, additional_cpts, "additional_cpts")
return (n_params + additional_cpts) * np.log(n)
[docs]
def make_chi2_penalty(n_params: int, n: int) -> float:
"""Create a chi-square penalty.
The penalty is the default penalty for the `CAPA` algorithm. It is described as
"penalty regime 1" in the MVCAPA article [1]_. The penalty is based on a probability
bound on the chi-squared distribution.
The penalty is given by ``n_params + 2 * sqrt(n_params * log(n)) + 2 * log(n)``,
where `n` is the sample size and `n_params` is the total number of parameters per
segment in the model across all variables.
Parameters
----------
n_params : int
Number of model parameters per segment.
n : int
Sample size.
Returns
-------
float
The chi-square penalty value.
References
----------
.. [1] Fisch, A. T., Eckley, I. A., & Fearnhead, P. (2022). Subset multivariate
segment and point anomaly detection. Journal of Computational and Graphical
Statistics, 31(2), 574-585.
"""
check_larger_than(1, n_params, "n_params")
check_larger_than(1, n, "n")
psi = np.log(n)
return n_params + 2 * np.sqrt(n_params * psi) + 2 * psi