import pandas as pd
from rocelib.tasks.Task import Task
[docs]
class ClassificationTask(Task):
"""
A specific task type for classification problems that extends the base Task class.
This class provides methods for training the model and retrieving positive instances
from the training data.
Attributes:
model: The model to be trained and used for predictions.
_training_data: The dataset used for training the model.
"""
[docs]
def train(self, **kwargs):
"""
Trains the model using the training data.
This method uses the features (X) and labels (y) from the training data to fit the model.
"""
self.model.train(self._training_data.X, self._training_data.y, **kwargs)
[docs]
def get_random_positive_instance(self, neg_value, column_name="target") -> pd.Series:
"""
Retrieves a random positive instance from the training data that does not have the specified negative value.
This method continues to sample from the training data until a positive instance
is found whose predicted label is not equal to the negative value.
@param neg_value: The value considered negative in the target variable.
@param column_name: The name of the target column used to identify positive instances.
@return: A Pandas Series representing a random positive instance.
"""
# Get a random positive instance from the training data
pos_instance = self._training_data.get_random_positive_instance(neg_value, column_name=column_name)
# Loop until a positive instance whose prediction is positive is found
while self.model.predict_single(pos_instance) == neg_value:
pos_instance = self._training_data.get_random_positive_instance(neg_value, column_name=column_name)
return pos_instance