# -*- coding: utf-8 -*-
"""Complexity module.
This module contains predefined complexity functions for some of the most popular algorithms in the scikit-learn library:
* **linearModels_complexity**: Any algorithm from `sklearn.linear_model'. Returns: 10^9·nFeatures + (sum of the squared coefs).
* **svm_complexity**: Any algorithm from `sklearn.svm'. Returns: 10^9·nFeatures + (number of support vectors).
* **knn_complexity**: Any algorithm from `sklearn.neighbors'. Returns: 10^9·nFeatures + 1/(number of neighbors)
* **mlp_complexity**: Any algorithm from `sklearn.neural_network'. Returns: 10^9·nFeatures + (sum of the ANN squared weights).
* **randomForest_complexity**: Any algorithm from `sklearn.ensemble.RandomForestRegressor' or 'sklearn.ensemble.RandomForestClassifier'. Returns: 10^9·nFeatures + (the average of tree leaves).
* **xgboost_complexity**: XGboost sklearn model. Returns: 10^9·nFeatures + (the average of tree leaves * number of trees) (Experimental)
* **decision_tree_complexity**: Any algorithm from 'sklearn.tree'. Return: 10^9·nFeatures + (number of leaves) (Experimental)
Otherwise:
* **generic_complexity**: Any algorithm. Returns: the number of input features (nFeatures).
Other complexity functions can be defined with the following interface.
.. highlight:: python
.. code-block:: python
def complexity(model, nFeatures, **kwargs):
pass
return complexity
"""
import numpy as np
[docs]def generic_complexity(model, nFeatures, **kwargs):
r"""
Generic complexity function.
Parameters
----------
model : model
The model from which the internal complexity is calculated.
nFeatures : int
The number of the selected features.
**kwargs :
Other arguments.
Returns
-------
int
nFeatures.
"""
return nFeatures
[docs]def linearModels_complexity(model, nFeatures, **kwargs):
r"""
Complexity function for linear models.
Parameters
----------
model : model
The model from which the internal complexity is calculated.
nFeatures : int
The number of the selected features.
**kwargs :
Other arguments.
Returns
-------
int
10^9·nFeatures + (sum of the model squared coefs).
"""
int_comp = np.min((1E09-1,np.sum(model.coef_**2))) # Internal Complexity Sum of squared weigths
return nFeatures*1E09 + int_comp
[docs]def kernel_ridge_complexity(model, nFeatures, **kwargs):
weights_l2 = np.sum(model.dual_coef_ ** 2)
int_comp = np.min((1E09 - 1, weights_l2)) # More leaves more complex
return nFeatures * 1E09 + int_comp
[docs]def svm_complexity(model, nFeatures, **kwargs):
r"""
Complexity function for SVM models.
Parameters
----------
model : model
The model from which the internal complexity is calculated.
nFeatures : int
The number of the selected features.
**kwargs :
Other arguments.
Returns
-------
int
10^9·nFeatures + (number of support vectors)
"""
int_comp = np.min((1E09-1,np.sum(model.n_support_))) # Internal Complexity
return nFeatures*1E09 + int_comp
[docs]def knn_complexity(model, nFeatures, **kwargs):
r"""
Complexity function for KNN models.
Parameters
----------
model : model
The model from which the internal complexity is calculated.
nFeatures : int
The number of the selected features.
**kwargs :
Other arguments.
Returns
-------
int
10^9·nFeatures + 1/(number of neighbors)
"""
int_comp = 1E06 * 1/model.n_neighbors # More k less flexible
return nFeatures*1E09 + int_comp
[docs]def mlp_complexity(model, nFeatures, **kwargs):
r"""
Complexity function for MLP models.
Parameters
----------
model : model
The model from which the internal complexity is calculated.
nFeatures : int
The number of the selected features.
**kwargs :
Other arguments.
Returns
-------
int
10^9·nFeatures + (sum of the ANN squared weights)
"""
weights = [np.concatenate(model.intercepts_)]
for wm in model.coefs_:
weights.append(wm.flatten())
weights = np.concatenate(weights)
int_comp = np.min((1E09-1,np.sum(weights**2)))
return nFeatures*1E09 + int_comp
[docs]def randomForest_complexity(model, nFeatures, **kwargs):
r"""
Complexity function for RandomForest models.
Parameters
----------
model : model
The model from which the internal complexity is calculated.
nFeatures : int
The number of the selected features.
**kwargs :
Other arguments.
Returns
-------
int
10^9·nFeatures + (the average of tree leaves)
"""
num_leaves = [tree.get_n_leaves() for tree in model.estimators_]
int_comp = np.min((1E09-1,np.mean(num_leaves))) # More leaves more complex
return nFeatures*1E09 + int_comp
[docs]def xgboost_complexity(model, nFeatures, **kwargs):
r"""
Complexity function for XGBoost model.
Parameters
----------
model : model
The model from which the internal complexity is calculated.
nFeatures : int
The number of the selected features.
**kwargs :
Other arguments.
Returns
-------
int
10^9·nFeatures + (the average of tree leaves * number of trees) (Experimental)
"""
df_model = model.get_booster().trees_to_dataframe()
df_model = df_model[df_model.Feature=='Leaf']
mean_leaves = df_model.groupby('Tree')['Feature'].count().mean()
num_trees = df_model.Tree.nunique()
int_comp = np.min((1E09-1,num_trees*mean_leaves))
return nFeatures*1E09 + int_comp
[docs]def decision_tree_complexity(model, nFeatures, **kwargs):
r"""
Complexity function for Decision Tree models.
Parameters
----------
model : model
The model from which the internal complexity is calculated.
nFeatures : int
The number of the selected features.
**kwargs :
Other arguments.
Returns
-------
int
10^9·nFeatures + (number of leaves)
"""
num_leaves = model.get_n_leaves()
int_comp = np.min((1E09-1,num_leaves)) # More leaves more complex
return nFeatures*1E09 + int_comp