src.main.general_method

 1from main.individual_method import IndividualMethod
 2from scripts.preprocess import run_data_processor
 3from scripts.parameter_fitting import PrivacyUtilityOptimizer
 4
 5import pandas as pd
 6from tabulate import tabulate
 7from colorama import Fore, Style
 8import ast
 9
10def run_general_method():
11        """
12        Executes the general method for optimizing privacy and utility trade-offs.
13
14        Steps:
15        1. Selects the error metric to optimize (MSE, LP, or Percentage Error).
16        2. Identifies the user with the most data in the dataset.
17        3. Calculates k and m values using the IndividualMethod class.
18        4. Executes no-privacy and private algorithms.
19        5. Optimizes privacy-utility trade-off for each user.
20
21        Args:
22                df (pd.DataFrame): The dataset containing user data with frequency values.
23        """
24
25        # Load the dataset
26        # base_path = os.path.join('..', 'data', 'raw')
27        # latest_file = max([f for f in os.listdir(base_path) if f.endswith('.xlsx')], key=lambda x: os.path.getmtime(os.path.join(base_path, x)))
28        # excel_file = os.path.join(base_path, latest_file)
29        # df = pd.read_excel(excel_file)
30
31        # Preprocess the dataset
32        df = run_data_processor()
33        
34        print(f"Processing {Style.BRIGHT}{latest_file}{Style.RESET_ALL}")
35
36        # Step 1: Set value for error metric
37        print(f"📊 Selection of the Optimization Metric")
38        metric = input(f"\nEnter the metric to optimize: \n1. {Fore.CYAN}MSE{Style.RESET_ALL}\n2. {Fore.CYAN}LP{Style.RESET_ALL}\n3. {Fore.CYAN}Porcentual Error{Style.RESET_ALL} \nSelect:  ")
39        if metric == "1":
40                Lp = float(input("⭢ Enter the MSE to reach: "))
41                p = 2
42        elif metric == "2":
43                Lp = float(input("⭢ Enter the Lp to reach: "))
44                p = float(input("⭢ Enter the type of error ρ: "))
45        elif metric == "3":
46                Lp = float(input(f"⭢ Enter the {Fore.CYAN}Porcentual Error{Style.RESET_ALL} to reach: "))
47                p = 1
48
49        # Step 2: Set the user with more data
50        df['values'] = df['values'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
51        df = df.explode("values", ignore_index=True).rename(columns={"values": "value"})
52        
53        user_counts = df["user"].value_counts() # Count the number of times each user appears in the dataset
54        max_user = user_counts.idxmax() # Get the user with more data
55        df_user = df[df["user"] == max_user] # Get the data of the user with more data
56        
57        # Step 3: Set k and m
58        e = 150
59        while(True):
60                individual = IndividualMethod(df_user)
61                k, m = individual.calculate_k_m()
62                individual.execute_no_privacy()
63                individual.execute_private_algorithms(e)
64                algorithm = individual.select_algorithm()
65
66                print(f"\n Do you want to test with another value of ϵ? (yes/no): ")
67                if input() == "no":
68                        break
69                else:
70                        e = input("⭢ Enter the value of ϵ: ")
71
72        # Step 4: Execute utility error
73        headers = ["Element", "Real Frequency", "Real Percentage", "Estimated Frequency", "Estimated Percentage", "Estimation Difference", "Percentage Error"]
74        results = []
75        for user in df["user"].unique():
76                print(f"Processing user {user}")
77                df_user_specific = df[df["user"] == user]
78
79                optimizer = PrivacyUtilityOptimizer(df_user_specific, k, m, algorithm)
80                e, _, _, data_table = optimizer.utility_error(Lp, p, metric)
81                
82                data_table = pd.DataFrame(data_table, columns=headers)
83                results.append({"e": e, "Porcentual Error Table": data_table})
84        
85        results_df = pd.DataFrame(results)
86
87        for index, result in results_df.iterrows():
88                print(f"\nUser: {df['user'].unique()[index]}, ϵ:{result['e']}, k:{k}, m:{m}")  # Imprimir el usuario
89                print(tabulate(result["Porcentual Error Table"], headers='keys', tablefmt='fancy_grid'))
90
91if __name__ == "__main__":
92    run_general_method()
def run_general_method():
12def run_general_method():
13        """
14        Executes the general method for optimizing privacy and utility trade-offs.
15
16        Steps:
17        1. Selects the error metric to optimize (MSE, LP, or Percentage Error).
18        2. Identifies the user with the most data in the dataset.
19        3. Calculates k and m values using the IndividualMethod class.
20        4. Executes no-privacy and private algorithms.
21        5. Optimizes privacy-utility trade-off for each user.
22
23        Args:
24                df (pd.DataFrame): The dataset containing user data with frequency values.
25        """
26
27        # Load the dataset
28        # base_path = os.path.join('..', 'data', 'raw')
29        # latest_file = max([f for f in os.listdir(base_path) if f.endswith('.xlsx')], key=lambda x: os.path.getmtime(os.path.join(base_path, x)))
30        # excel_file = os.path.join(base_path, latest_file)
31        # df = pd.read_excel(excel_file)
32
33        # Preprocess the dataset
34        df = run_data_processor()
35        
36        print(f"Processing {Style.BRIGHT}{latest_file}{Style.RESET_ALL}")
37
38        # Step 1: Set value for error metric
39        print(f"📊 Selection of the Optimization Metric")
40        metric = input(f"\nEnter the metric to optimize: \n1. {Fore.CYAN}MSE{Style.RESET_ALL}\n2. {Fore.CYAN}LP{Style.RESET_ALL}\n3. {Fore.CYAN}Porcentual Error{Style.RESET_ALL} \nSelect:  ")
41        if metric == "1":
42                Lp = float(input("⭢ Enter the MSE to reach: "))
43                p = 2
44        elif metric == "2":
45                Lp = float(input("⭢ Enter the Lp to reach: "))
46                p = float(input("⭢ Enter the type of error ρ: "))
47        elif metric == "3":
48                Lp = float(input(f"⭢ Enter the {Fore.CYAN}Porcentual Error{Style.RESET_ALL} to reach: "))
49                p = 1
50
51        # Step 2: Set the user with more data
52        df['values'] = df['values'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
53        df = df.explode("values", ignore_index=True).rename(columns={"values": "value"})
54        
55        user_counts = df["user"].value_counts() # Count the number of times each user appears in the dataset
56        max_user = user_counts.idxmax() # Get the user with more data
57        df_user = df[df["user"] == max_user] # Get the data of the user with more data
58        
59        # Step 3: Set k and m
60        e = 150
61        while(True):
62                individual = IndividualMethod(df_user)
63                k, m = individual.calculate_k_m()
64                individual.execute_no_privacy()
65                individual.execute_private_algorithms(e)
66                algorithm = individual.select_algorithm()
67
68                print(f"\n Do you want to test with another value of ϵ? (yes/no): ")
69                if input() == "no":
70                        break
71                else:
72                        e = input("⭢ Enter the value of ϵ: ")
73
74        # Step 4: Execute utility error
75        headers = ["Element", "Real Frequency", "Real Percentage", "Estimated Frequency", "Estimated Percentage", "Estimation Difference", "Percentage Error"]
76        results = []
77        for user in df["user"].unique():
78                print(f"Processing user {user}")
79                df_user_specific = df[df["user"] == user]
80
81                optimizer = PrivacyUtilityOptimizer(df_user_specific, k, m, algorithm)
82                e, _, _, data_table = optimizer.utility_error(Lp, p, metric)
83                
84                data_table = pd.DataFrame(data_table, columns=headers)
85                results.append({"e": e, "Porcentual Error Table": data_table})
86        
87        results_df = pd.DataFrame(results)
88
89        for index, result in results_df.iterrows():
90                print(f"\nUser: {df['user'].unique()[index]}, ϵ:{result['e']}, k:{k}, m:{m}")  # Imprimir el usuario
91                print(tabulate(result["Porcentual Error Table"], headers='keys', tablefmt='fancy_grid'))

Executes the general method for optimizing privacy and utility trade-offs.

Steps:

  1. Selects the error metric to optimize (MSE, LP, or Percentage Error).
  2. Identifies the user with the most data in the dataset.
  3. Calculates k and m values using the IndividualMethod class.
  4. Executes no-privacy and private algorithms.
  5. Optimizes privacy-utility trade-off for each user.

Args: df (pd.DataFrame): The dataset containing user data with frequency values.