Source code for hybparsimony.util.population

# -*- coding: utf-8 -*-

import numpy as np

[docs]class Population: INTEGER = 0 FLOAT = 1 CATEGORICAL = 2 CONSTANT = 3 POWER = 4 def __init__(self, params, columns, population = None): r""" This class is used to create the HYB populations. Allow chromosomes to have int, float, and constant values. Parameters ---------- params : dict It is a dictionary with the model's hyperparameters to be adjusted and the search space of them. .. code-block:: { "<< hyperparameter name >>": { "range": [<< minimum value >>, << maximum value >>], "type": Population.FLOAT/Population.INTEGER/Population.POWER }, "<< hyperparameter name >>": { "value": << constant value >>, "type": Population.CONSTANT } } columns : int or list of str The number of features/columns in the dataset or a list with their names. population : numpy.array, optional It is a float matrix that represents the population. Default `None`. Attributes ---------- population : Population The population. _min : numpy.array A vector of length `params+columns` with the smallest values that can take. _max : numpy.array A vector of length `params+columns` with the highest values that can take. _params : dict Dict with the parameter values. const : dict Dict with the constants values. colsnames : list of str List with the columns names. """ if type(params) is not dict: raise Exception("params must be of type dict !!!") self._min = np.array([(0 if params[x]["type"] is Population.CATEGORICAL else params[x]["range"][0]) for x in params if params[x]["type"] is not Population.CONSTANT]) self._max = np.array([(len(params[x]["range"]) if params[x]["type"] is Population.CATEGORICAL else params[x]["range"][1]) for x in params if params[x]["type"] is not Population.CONSTANT]) self._params = dict((x, params[x]) for x in params if params[x]["type"] is not Population.CONSTANT) self.const = dict((x, params[x]["value"]) for x in params if params[x]["type"] is Population.CONSTANT) self.colsnames = (columns if type(columns) is list else columns.tolist()) if hasattr(columns, '__iter__') else [f"col_{i}" for i in range(columns)] self._min = np.concatenate((self._min, np.zeros(len(self.colsnames))), axis=0) self._max = np.concatenate((self._max, np.ones(len(self.colsnames))), axis=0) self._pos_n, self._pos_c = list(), list() for i, x in enumerate(self._params): if self._params[x]["type"] is Population.CATEGORICAL: self._pos_c.append(i) else: self._pos_n.append(i) def _trans_mut(): def compute_feature_probability(threshold): p = np.random.uniform(low=0, high=1) # Número aleatorio que decidirá si tenemos que ponerlo a True o no. if p <= threshold: #Si tenemos que tener un True return np.random.uniform(low=0.5, high=1) else: #Tenemos que tener un false return np.random.uniform(low=0, high=0.5) t = list() gen = list() for x in self.paramsnames: if params[x]["type"] == Population.INTEGER: t.append(np.vectorize(lambda x: int(x), otypes=[int])) gen.append(lambda y, x=x, **kwargs: np.random.randint(low=self._min[y], high=self._max[y])) elif params[x]["type"] == Population.FLOAT: t.append(np.vectorize(lambda x: float(x), otypes=[float])) gen.append(lambda y, x=x, **kwargs: np.random.uniform(low=self._min[y], high=self._max[y])) elif params[x]["type"] == Population.POWER: t.append(np.vectorize(lambda x: pow(10,x), otypes=[float])) # TODO: No tengo claro que esté bien. gen.append(lambda y, x=x, **kwargs: pow(10,np.random.randint(low=self._min[y], high=self._max[y]))) elif params[x]["type"] == Population.CATEGORICAL: t.append(np.vectorize(lambda y, x=x: y if type(y) is str else params[x]["range"][int(np.trunc(y))], otypes=[str])) gen.append(lambda y, x=x, **kwargs: np.random.randint(low=self._min[y], high=self._max[y])) t.extend([lambda x: x>0.5]*len(self.colsnames)) #gen.extend([lambda y, x=x, **kwargs: np.random.uniform(low=self._min[y], high=self._max[y]) <= kwargs["feat_mut_thres"]]*len(self.colsnames)) gen.extend([lambda y, x=x, **kwargs: compute_feature_probability(kwargs["feat_mut_thres"])]*len(self.colsnames)) # We have to avoid 0-dimensional numpy arrays. Otherwise, some algorithms that perform type # checks will fail since, for instance, they receive an integer as a 0-dimensional array, but expect an # integer. def aux(x): if len(x.shape) > 1: return np.array(list(map(lambda f, c: f(x[:, c]), t, range(0, x.shape[1]))), dtype=object).T else: return list( map(lambda i: i[1][0](i[1][1]).item() if i[0] < len(self.paramsnames) else i[1][0](i[1][1]), enumerate(zip(t, x)))) return aux, gen self._transformers, self.random_gen = _trans_mut() if population is not None: if type(population) is not np.ndarray or len(population.shape) < 2: raise Exception("Popularion is not a numpy matrix") self.population = population @property def population(self): return self._transformers(self._pop) @population.setter def population(self, population): self._pop = np.apply_along_axis(lambda x: x.astype(object), 1, population.astype(object)) @property def paramsnames(self): return list(self._params.keys()) def __getitem__(self, key): return self._pop[key] def __setitem__(self, key, newvalue): self._pop[key] = newvalue
[docs] def getChromosome(self, key): r""" This method returns a chromosome from the population. Parameters ---------- key : int Chromosome row index . Returns ------- Chromosome A `Chromosome` object. """ data = self._transformers(self._pop[key, :]) return Chromosome(data[:len(self.paramsnames)], self.paramsnames, self.const, data[len(self.paramsnames):], self.colsnames)
# Method that updates the population to satisfy the feat_thres
[docs] def update_to_feat_thres(self, popSize, feat_thres): for i in range(popSize): #For each chromosome for j in range(len(self._params),len(self.colsnames) + len(self._params)): # Each feature p = np.random.uniform(low=0, high=1) #Random number in interval [0,1] if p <= feat_thres and self._pop[i,j] < 0.5: # if p <= self.feat_thres, the feature must be true self._pop[i, j] += 0.5 elif p > feat_thres and self._pop[i,j] >= 0.5: # if p > self.feat_thres, the feature must be false self._pop[i, j] = self._pop[i, j] - 0.5
[docs]class Chromosome: # @autoassign def __init__(self, params, name_params, const, cols, name_cols): r""" This class defines a chromosome which includes the hyperparameters, the constant values, and the feature selection. Parameters ---------- params : numpy.array The algorithm hyperparameter values. name_params : list of str The names of the hyperparameters. const : numpy.array A dictionary with the constants to include in the chomosome. cols : numpy.array The probabilities for selecting the input features (selected if prob>0.5). name_cols : list of str The names of the input features. Attributes ---------- params : dict A dictionary with the parameter values (hyperparameters and constants). columns : numpy.array of bool A boolean vector with the selected features. """ self._params = params self.name_params = name_params self.const = const self._cols = cols self.name_cols = name_cols @property def params(self): return {**dict(zip(self.name_params, self._params)), **self.const} @property def columns(self): return self._cols