src.general_method
1import pandas as pd 2from utils.utils import load_dataset, generate_hash_functions, display_results, generate_error_table 3from individual_method import IndividualMethod 4from scripts.parameter_fitting import PrivacyUtilityOptimizer 5from tabulate import tabulate 6 7def run_general_method(df): 8 """ 9 Executes the general method for optimizing privacy and utility trade-offs. 10 11 Steps: 12 1. Selects the error metric to optimize (MSE, LP, or Percentage Error). 13 2. Identifies the user with the most data in the dataset. 14 3. Calculates k and m values using the IndividualMethod class. 15 4. Executes no-privacy and private algorithms. 16 5. Optimizes privacy-utility trade-off for each user. 17 18 Args: 19 df (pd.DataFrame): The dataset containing user data with frequency values. 20 """ 21 # Step 1: Set value for error metric 22 metric = input("Enter the metric to optimize: \n1. MSE\n2. LP\n3. Porcentual Error \nSelect (1, 2 or 3): ") 23 if metric == "1": 24 Lp = float(input("Enter the MSE to reach: ")) 25 p = 2 26 elif metric == "2": 27 Lp = float(input("Enter the Lp to reach: ")) 28 p = float(input("Enter the type of error (p): ")) 29 elif metric == "3": 30 Lp = float(input("Enter the Porcentual Error to reach: ")) 31 p = 1 32 33 # Step 2: Set the user with more data 34 df = df.explode("values", ignore_index=True).rename(columns={"values": "value"}) 35 user_counts = df["user"].value_counts() # Count the number of times each user appears in the dataset 36 max_user = user_counts.idxmax() # Get the user with more data 37 df_user = df[df["user"] == max_user] # Get the data of the user with more data 38 print(df_user.head()) 39 40 # Step 3: Set k and m 41 individual = IndividualMethod(df_user) 42 k, m = individual.calculate_k_m() 43 individual.execute_no_privacy() 44 individual.execute_private_algorithms() 45 algorithm = individual.select_algorithm() 46 47 # Step 4: Execute utility error 48 headers = ["Element", "Real Frequency", "Real Percentage", "Estimated Frequency", "Estimated Percentage", "Estimation Difference", "Percentage Error"] 49 results = [] 50 for user in df["user"].unique(): 51 print(f"Processing user {user}") 52 df_user_specific = df[df["user"] == user] 53 54 optimizer = PrivacyUtilityOptimizer(df_user_specific, k, m, algorithm) 55 e, _, _, data_table = optimizer.utility_error(Lp, p, metric) 56 57 data_table = pd.DataFrame(data_table, columns=headers) 58 results.append({"e": e, "Porcentual Error Table": data_table}) 59 60 results_df = pd.DataFrame(results) 61 62 for index, result in results_df.iterrows(): 63 print(f"\nUser: {df['user'].unique()[index]}, e:{result["e"]}, k:{k}, m:{m}") # Imprimir el usuario 64 print(tabulate(result["Porcentual Error Table"], headers='keys', tablefmt='pretty'))
def
run_general_method(df):
8def run_general_method(df): 9 """ 10 Executes the general method for optimizing privacy and utility trade-offs. 11 12 Steps: 13 1. Selects the error metric to optimize (MSE, LP, or Percentage Error). 14 2. Identifies the user with the most data in the dataset. 15 3. Calculates k and m values using the IndividualMethod class. 16 4. Executes no-privacy and private algorithms. 17 5. Optimizes privacy-utility trade-off for each user. 18 19 Args: 20 df (pd.DataFrame): The dataset containing user data with frequency values. 21 """ 22 # Step 1: Set value for error metric 23 metric = input("Enter the metric to optimize: \n1. MSE\n2. LP\n3. Porcentual Error \nSelect (1, 2 or 3): ") 24 if metric == "1": 25 Lp = float(input("Enter the MSE to reach: ")) 26 p = 2 27 elif metric == "2": 28 Lp = float(input("Enter the Lp to reach: ")) 29 p = float(input("Enter the type of error (p): ")) 30 elif metric == "3": 31 Lp = float(input("Enter the Porcentual Error to reach: ")) 32 p = 1 33 34 # Step 2: Set the user with more data 35 df = df.explode("values", ignore_index=True).rename(columns={"values": "value"}) 36 user_counts = df["user"].value_counts() # Count the number of times each user appears in the dataset 37 max_user = user_counts.idxmax() # Get the user with more data 38 df_user = df[df["user"] == max_user] # Get the data of the user with more data 39 print(df_user.head()) 40 41 # Step 3: Set k and m 42 individual = IndividualMethod(df_user) 43 k, m = individual.calculate_k_m() 44 individual.execute_no_privacy() 45 individual.execute_private_algorithms() 46 algorithm = individual.select_algorithm() 47 48 # Step 4: Execute utility error 49 headers = ["Element", "Real Frequency", "Real Percentage", "Estimated Frequency", "Estimated Percentage", "Estimation Difference", "Percentage Error"] 50 results = [] 51 for user in df["user"].unique(): 52 print(f"Processing user {user}") 53 df_user_specific = df[df["user"] == user] 54 55 optimizer = PrivacyUtilityOptimizer(df_user_specific, k, m, algorithm) 56 e, _, _, data_table = optimizer.utility_error(Lp, p, metric) 57 58 data_table = pd.DataFrame(data_table, columns=headers) 59 results.append({"e": e, "Porcentual Error Table": data_table}) 60 61 results_df = pd.DataFrame(results) 62 63 for index, result in results_df.iterrows(): 64 print(f"\nUser: {df['user'].unique()[index]}, e:{result["e"]}, k:{k}, m:{m}") # Imprimir el usuario 65 print(tabulate(result["Porcentual Error Table"], headers='keys', tablefmt='pretty'))
Executes the general method for optimizing privacy and utility trade-offs.
Steps:
- Selects the error metric to optimize (MSE, LP, or Percentage Error).
- Identifies the user with the most data in the dataset.
- Calculates k and m values using the IndividualMethod class.
- Executes no-privacy and private algorithms.
- Optimizes privacy-utility trade-off for each user.
Args: df (pd.DataFrame): The dataset containing user data with frequency values.