src.general_method

 1import pandas as pd
 2from utils.utils import load_dataset, generate_hash_functions, display_results, generate_error_table
 3from individual_method import IndividualMethod
 4from scripts.parameter_fitting import PrivacyUtilityOptimizer
 5from tabulate import tabulate
 6
 7def run_general_method(df):
 8        """
 9        Executes the general method for optimizing privacy and utility trade-offs.
10
11        Steps:
12        1. Selects the error metric to optimize (MSE, LP, or Percentage Error).
13        2. Identifies the user with the most data in the dataset.
14        3. Calculates k and m values using the IndividualMethod class.
15        4. Executes no-privacy and private algorithms.
16        5. Optimizes privacy-utility trade-off for each user.
17
18        Args:
19                df (pd.DataFrame): The dataset containing user data with frequency values.
20        """
21        # Step 1: Set value for error metric
22        metric = input("Enter the metric to optimize: \n1. MSE\n2. LP\n3. Porcentual Error \nSelect (1, 2 or 3):  ")
23        if metric == "1":
24                Lp = float(input("Enter the MSE to reach: "))
25                p = 2
26        elif metric == "2":
27                Lp = float(input("Enter the Lp to reach: "))
28                p = float(input("Enter the type of error (p): "))
29        elif metric == "3":
30                Lp = float(input("Enter the Porcentual Error to reach: "))
31                p = 1
32
33        # Step 2: Set the user with more data
34        df = df.explode("values", ignore_index=True).rename(columns={"values": "value"})
35        user_counts = df["user"].value_counts() # Count the number of times each user appears in the dataset
36        max_user = user_counts.idxmax() # Get the user with more data
37        df_user = df[df["user"] == max_user] # Get the data of the user with more data
38        print(df_user.head())
39
40        # Step 3: Set k and m
41        individual = IndividualMethod(df_user)
42        k, m = individual.calculate_k_m()
43        individual.execute_no_privacy()
44        individual.execute_private_algorithms()
45        algorithm = individual.select_algorithm()
46
47        # Step 4: Execute utility error
48        headers = ["Element", "Real Frequency", "Real Percentage", "Estimated Frequency", "Estimated Percentage", "Estimation Difference", "Percentage Error"]
49        results = []
50        for user in df["user"].unique():
51                print(f"Processing user {user}")
52                df_user_specific = df[df["user"] == user]
53
54                optimizer = PrivacyUtilityOptimizer(df_user_specific, k, m, algorithm)
55                e, _, _, data_table = optimizer.utility_error(Lp, p, metric)
56                
57                data_table = pd.DataFrame(data_table, columns=headers)
58                results.append({"e": e, "Porcentual Error Table": data_table})
59        
60        results_df = pd.DataFrame(results)
61
62        for index, result in results_df.iterrows():
63                print(f"\nUser: {df['user'].unique()[index]}, e:{result["e"]}, k:{k}, m:{m}")  # Imprimir el usuario
64                print(tabulate(result["Porcentual Error Table"], headers='keys', tablefmt='pretty'))
def run_general_method(df):
 8def run_general_method(df):
 9        """
10        Executes the general method for optimizing privacy and utility trade-offs.
11
12        Steps:
13        1. Selects the error metric to optimize (MSE, LP, or Percentage Error).
14        2. Identifies the user with the most data in the dataset.
15        3. Calculates k and m values using the IndividualMethod class.
16        4. Executes no-privacy and private algorithms.
17        5. Optimizes privacy-utility trade-off for each user.
18
19        Args:
20                df (pd.DataFrame): The dataset containing user data with frequency values.
21        """
22        # Step 1: Set value for error metric
23        metric = input("Enter the metric to optimize: \n1. MSE\n2. LP\n3. Porcentual Error \nSelect (1, 2 or 3):  ")
24        if metric == "1":
25                Lp = float(input("Enter the MSE to reach: "))
26                p = 2
27        elif metric == "2":
28                Lp = float(input("Enter the Lp to reach: "))
29                p = float(input("Enter the type of error (p): "))
30        elif metric == "3":
31                Lp = float(input("Enter the Porcentual Error to reach: "))
32                p = 1
33
34        # Step 2: Set the user with more data
35        df = df.explode("values", ignore_index=True).rename(columns={"values": "value"})
36        user_counts = df["user"].value_counts() # Count the number of times each user appears in the dataset
37        max_user = user_counts.idxmax() # Get the user with more data
38        df_user = df[df["user"] == max_user] # Get the data of the user with more data
39        print(df_user.head())
40
41        # Step 3: Set k and m
42        individual = IndividualMethod(df_user)
43        k, m = individual.calculate_k_m()
44        individual.execute_no_privacy()
45        individual.execute_private_algorithms()
46        algorithm = individual.select_algorithm()
47
48        # Step 4: Execute utility error
49        headers = ["Element", "Real Frequency", "Real Percentage", "Estimated Frequency", "Estimated Percentage", "Estimation Difference", "Percentage Error"]
50        results = []
51        for user in df["user"].unique():
52                print(f"Processing user {user}")
53                df_user_specific = df[df["user"] == user]
54
55                optimizer = PrivacyUtilityOptimizer(df_user_specific, k, m, algorithm)
56                e, _, _, data_table = optimizer.utility_error(Lp, p, metric)
57                
58                data_table = pd.DataFrame(data_table, columns=headers)
59                results.append({"e": e, "Porcentual Error Table": data_table})
60        
61        results_df = pd.DataFrame(results)
62
63        for index, result in results_df.iterrows():
64                print(f"\nUser: {df['user'].unique()[index]}, e:{result["e"]}, k:{k}, m:{m}")  # Imprimir el usuario
65                print(tabulate(result["Porcentual Error Table"], headers='keys', tablefmt='pretty'))

Executes the general method for optimizing privacy and utility trade-offs.

Steps:

  1. Selects the error metric to optimize (MSE, LP, or Percentage Error).
  2. Identifies the user with the most data in the dataset.
  3. Calculates k and m values using the IndividualMethod class.
  4. Executes no-privacy and private algorithms.
  5. Optimizes privacy-utility trade-off for each user.

Args: df (pd.DataFrame): The dataset containing user data with frequency values.