src.individual_method

  1import os
  2import math
  3import pandas as pd
  4import numpy as np
  5from tabulate import tabulate
  6from colorama import Fore, Style
  7from rich.progress import Progress
  8
  9# Importing CMeS functions
 10from private_count_mean.private_cms_server import run_private_cms_server
 11from private_count_mean.private_cms_client import run_private_cms_client
 12from private_count_mean.cms_client_mean import run_cms_client_mean
 13
 14# Importing data preprocessing functions
 15from scripts.preprocess import run_data_processor
 16from scripts.parameter_fitting import run_parameter_fitting
 17
 18# Importing HCMS functions
 19from private_hadamard_count_mean.private_hcms_client import run_private_hcms_client
 20from private_hadamard_count_mean.private_hcms_server import run_private_hcms_server
 21
 22
 23class IndividualMethod:
 24    """
 25    This class represents the execution of various algorithms for private frequency estimation.
 26    It includes preprocessing data, computing parameters, and executing different privacy-preserving algorithms.
 27    """
 28    def __init__(self, df=None,  k=None, m=None, algorithm=None):
 29        """
 30        Initializes the IndividualMethod instance.
 31
 32        :param df: The input dataset as a pandas DataFrame.
 33        :param k: The number of hash functions for the sketching algorithm.
 34        :param m: The number of bins in the sketching algorithm.
 35        :param algorithm: The selected algorithm for execution.
 36        """
 37        self.df = df
 38        self.k = k
 39        self.m = m
 40        self.algorithm = algorithm
 41    
 42    def preprocess_data(self):
 43        """Step 1: Data preprocessing by loading and filtering the dataset."""
 44        self.df = run_data_processor()
 45    
 46    def calculate_k_m(self):
 47        """
 48        Step 2: Calculate k and m values based on user input for failure probability and overestimation factor.
 49        
 50        :return: The computed values of k and m.
 51        """
 52        print("\nπŸ“‚ Calculating k and m ... ")
 53        f = float(input("β†’ Enter the failure probability ΞΆ: "))
 54        E = float(input("β†’ Enter the overestimation factor Ξ·: "))
 55
 56        self.k = int(1 / f)
 57        self.m = int(2.71828 / E )
 58
 59        print(f"{Fore.GREEN}Calculated k = {self.k} and m = {self.m}{Style.RESET_ALL}")
 60        print(f"{Fore.GREEN}Space complexity: {self.k*self.m}{Style.RESET_ALL}")
 61        return self.k, self.m
 62        
 63    def execute_no_privacy(self):
 64        """Step 3: Execute Count-Mean Sketch (CMeS) without privacy protection."""
 65        headers=[
 66            "Element", "Real Frequency", "Real Percentage", 
 67            "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 
 68            "Percentage Error"
 69        ]
 70        
 71        print("\nπŸ“Š Calculing CMeS without privacy")
 72        data_table = run_cms_client_mean(self.k, self.m, self.df)
 73        print(tabulate(data_table, headers=headers, tablefmt="fancy_grid"))
 74
 75    def execute_private_algorithms(self):
 76        """Step 4: Execute privacy-preserving algorithms (CMeS and HCMS)."""
 77        print("\nπŸ” Searching parameters k and m ...")
 78        e = 150   
 79        k_values = [self.k, 16, 128, 1024, 32768]
 80        m_values = [self.m, 16, 1024, 256, 256]
 81
 82        results = {"CMeS": [], "HCMS": []}
 83
 84        headers=[
 85            "Element", "Real Frequency", "Real Percentage", 
 86            "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 
 87            "Percentage Error"
 88        ]
 89         
 90        for k, m in zip(k_values, m_values):
 91            for algorithm, client in zip(["CMeS", "HCMS"], [run_private_cms_client, run_private_hcms_client]):
 92                
 93                print(f"\nRunning {Fore.GREEN}{algorithm}{Style.RESET_ALL} with k: {k}, m: {m} and Ξ΅: {e}")
 94                if algorithm == "HCMS":
 95                    if math.log2(m).is_integer() == False:
 96                        m = 2 ** math.ceil(math.log2(m))
 97                        print(f"{Fore.RED}Adjusting m to a power of 2 β†’ m = {m}{Style.RESET_ALL}")
 98
 99                _, data_table, _, _,_ = client(k, m, e, self.df)
100
101                data_dicts = [dict(zip(headers, row)) for row in data_table]
102
103                for data_dict in data_dicts:
104                    results[algorithm].append([
105                        k, m, 
106                        data_dict.get("Element", ""),
107                        data_dict.get("Real Frequency", ""),
108                        data_dict.get("Real Percentage", ""),
109                        data_dict.get("Estimated Frequency", ""),
110                        data_dict.get("Estimated Percentage", ""),
111                        data_dict.get("Estimation Difference", ""),
112                        data_dict.get("Percentage Error", ""),
113                    ])
114        
115
116        for algo, table in results.items():
117            print(f"\n πŸ”Results for {Fore.CYAN}{algo}{Style.RESET_ALL}")
118            print(tabulate(table, headers=["k", "m"] + headers, tablefmt="fancy_grid"))
119    
120    def select_algorithm(self):
121        """Step 5: Choose an algorithm and specify k and m values."""
122        print(f"\nπŸ” Selecting an parameters and algorithm ...")
123        self.k = int(input("β†’ Enter the value of k: "))
124        self.m = int(input("β†’ Enter the value of m: "))
125        self.algorithm = input("β†’ Enter the algorithm to execute:\n  1. Count-Mean Sketch\n  2. Hadamard Count-Mean Sketch\nSelect: ")
126        return self.algorithm
127    
128    def execute_algorithms(self):
129        """Step 6: Perform parameter fitting and execute the selected server algorithm."""
130        print("\nπŸ”„ Executing personalized privacy ...")
131        e, result, privatized_data = run_parameter_fitting(self.df, self.k, self.m, self.algorithm)
132
133
134        print("\nβš™οΈ Running server ...")
135        if self.algorithm == '1':
136            run_private_cms_server(self.k, self.m, e, self.df, result, privatized_data)
137        elif self.algorithm == '2':
138            run_private_hcms_server(self.k, self.m, e, self.df, result, privatized_data)
139
140        print("\nProcess done and results saved.")
141
142def main(step=1):
143    """Main function to run the step-by-step execution of the method."""
144    experiment = IndividualMethod()
145    while True:
146        if step == 1:
147            # Step 1: Data preprocessing
148            experiment.preprocess_data()
149            step = 2
150    
151        if step == 2:
152            #Step 2: Calculate k and m
153            experiment.calculate_k_m()
154
155            # Step 3: Execute no privacy algorithms
156            experiment.execute_no_privacy()
157
158            if input("Are you satisfied with the results? (yes/no): ") == 'yes':
159                step = 3
160            else:
161                step = 2
162                
163        elif step == 3:
164            # Step 4: Execute private algorithms
165            experiment.execute_private_algorithms()
166
167            # Step 5: Choose an algorithm, k and m
168            experiment.select_algorithm()
169            if input("Are you satisfied with the results? (yes/no): ") == 'yes':
170                step = 4
171            else:
172                step = 2
173
174        elif step == 4:
175            # Step 6: Parameter fitting and execute server
176            experiment.execute_algorithms()
177            break
178    
179
180if __name__ == "__main__":
181    main()
class IndividualMethod:
 24class IndividualMethod:
 25    """
 26    This class represents the execution of various algorithms for private frequency estimation.
 27    It includes preprocessing data, computing parameters, and executing different privacy-preserving algorithms.
 28    """
 29    def __init__(self, df=None,  k=None, m=None, algorithm=None):
 30        """
 31        Initializes the IndividualMethod instance.
 32
 33        :param df: The input dataset as a pandas DataFrame.
 34        :param k: The number of hash functions for the sketching algorithm.
 35        :param m: The number of bins in the sketching algorithm.
 36        :param algorithm: The selected algorithm for execution.
 37        """
 38        self.df = df
 39        self.k = k
 40        self.m = m
 41        self.algorithm = algorithm
 42    
 43    def preprocess_data(self):
 44        """Step 1: Data preprocessing by loading and filtering the dataset."""
 45        self.df = run_data_processor()
 46    
 47    def calculate_k_m(self):
 48        """
 49        Step 2: Calculate k and m values based on user input for failure probability and overestimation factor.
 50        
 51        :return: The computed values of k and m.
 52        """
 53        print("\nπŸ“‚ Calculating k and m ... ")
 54        f = float(input("β†’ Enter the failure probability ΞΆ: "))
 55        E = float(input("β†’ Enter the overestimation factor Ξ·: "))
 56
 57        self.k = int(1 / f)
 58        self.m = int(2.71828 / E )
 59
 60        print(f"{Fore.GREEN}Calculated k = {self.k} and m = {self.m}{Style.RESET_ALL}")
 61        print(f"{Fore.GREEN}Space complexity: {self.k*self.m}{Style.RESET_ALL}")
 62        return self.k, self.m
 63        
 64    def execute_no_privacy(self):
 65        """Step 3: Execute Count-Mean Sketch (CMeS) without privacy protection."""
 66        headers=[
 67            "Element", "Real Frequency", "Real Percentage", 
 68            "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 
 69            "Percentage Error"
 70        ]
 71        
 72        print("\nπŸ“Š Calculing CMeS without privacy")
 73        data_table = run_cms_client_mean(self.k, self.m, self.df)
 74        print(tabulate(data_table, headers=headers, tablefmt="fancy_grid"))
 75
 76    def execute_private_algorithms(self):
 77        """Step 4: Execute privacy-preserving algorithms (CMeS and HCMS)."""
 78        print("\nπŸ” Searching parameters k and m ...")
 79        e = 150   
 80        k_values = [self.k, 16, 128, 1024, 32768]
 81        m_values = [self.m, 16, 1024, 256, 256]
 82
 83        results = {"CMeS": [], "HCMS": []}
 84
 85        headers=[
 86            "Element", "Real Frequency", "Real Percentage", 
 87            "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 
 88            "Percentage Error"
 89        ]
 90         
 91        for k, m in zip(k_values, m_values):
 92            for algorithm, client in zip(["CMeS", "HCMS"], [run_private_cms_client, run_private_hcms_client]):
 93                
 94                print(f"\nRunning {Fore.GREEN}{algorithm}{Style.RESET_ALL} with k: {k}, m: {m} and Ξ΅: {e}")
 95                if algorithm == "HCMS":
 96                    if math.log2(m).is_integer() == False:
 97                        m = 2 ** math.ceil(math.log2(m))
 98                        print(f"{Fore.RED}Adjusting m to a power of 2 β†’ m = {m}{Style.RESET_ALL}")
 99
100                _, data_table, _, _,_ = client(k, m, e, self.df)
101
102                data_dicts = [dict(zip(headers, row)) for row in data_table]
103
104                for data_dict in data_dicts:
105                    results[algorithm].append([
106                        k, m, 
107                        data_dict.get("Element", ""),
108                        data_dict.get("Real Frequency", ""),
109                        data_dict.get("Real Percentage", ""),
110                        data_dict.get("Estimated Frequency", ""),
111                        data_dict.get("Estimated Percentage", ""),
112                        data_dict.get("Estimation Difference", ""),
113                        data_dict.get("Percentage Error", ""),
114                    ])
115        
116
117        for algo, table in results.items():
118            print(f"\n πŸ”Results for {Fore.CYAN}{algo}{Style.RESET_ALL}")
119            print(tabulate(table, headers=["k", "m"] + headers, tablefmt="fancy_grid"))
120    
121    def select_algorithm(self):
122        """Step 5: Choose an algorithm and specify k and m values."""
123        print(f"\nπŸ” Selecting an parameters and algorithm ...")
124        self.k = int(input("β†’ Enter the value of k: "))
125        self.m = int(input("β†’ Enter the value of m: "))
126        self.algorithm = input("β†’ Enter the algorithm to execute:\n  1. Count-Mean Sketch\n  2. Hadamard Count-Mean Sketch\nSelect: ")
127        return self.algorithm
128    
129    def execute_algorithms(self):
130        """Step 6: Perform parameter fitting and execute the selected server algorithm."""
131        print("\nπŸ”„ Executing personalized privacy ...")
132        e, result, privatized_data = run_parameter_fitting(self.df, self.k, self.m, self.algorithm)
133
134
135        print("\nβš™οΈ Running server ...")
136        if self.algorithm == '1':
137            run_private_cms_server(self.k, self.m, e, self.df, result, privatized_data)
138        elif self.algorithm == '2':
139            run_private_hcms_server(self.k, self.m, e, self.df, result, privatized_data)
140
141        print("\nProcess done and results saved.")

This class represents the execution of various algorithms for private frequency estimation. It includes preprocessing data, computing parameters, and executing different privacy-preserving algorithms.

IndividualMethod(df=None, k=None, m=None, algorithm=None)
29    def __init__(self, df=None,  k=None, m=None, algorithm=None):
30        """
31        Initializes the IndividualMethod instance.
32
33        :param df: The input dataset as a pandas DataFrame.
34        :param k: The number of hash functions for the sketching algorithm.
35        :param m: The number of bins in the sketching algorithm.
36        :param algorithm: The selected algorithm for execution.
37        """
38        self.df = df
39        self.k = k
40        self.m = m
41        self.algorithm = algorithm

Initializes the IndividualMethod instance.

Parameters
  • df: The input dataset as a pandas DataFrame.
  • k: The number of hash functions for the sketching algorithm.
  • m: The number of bins in the sketching algorithm.
  • algorithm: The selected algorithm for execution.
df
k
m
algorithm
def preprocess_data(self):
43    def preprocess_data(self):
44        """Step 1: Data preprocessing by loading and filtering the dataset."""
45        self.df = run_data_processor()

Step 1: Data preprocessing by loading and filtering the dataset.

def calculate_k_m(self):
47    def calculate_k_m(self):
48        """
49        Step 2: Calculate k and m values based on user input for failure probability and overestimation factor.
50        
51        :return: The computed values of k and m.
52        """
53        print("\nπŸ“‚ Calculating k and m ... ")
54        f = float(input("β†’ Enter the failure probability ΞΆ: "))
55        E = float(input("β†’ Enter the overestimation factor Ξ·: "))
56
57        self.k = int(1 / f)
58        self.m = int(2.71828 / E )
59
60        print(f"{Fore.GREEN}Calculated k = {self.k} and m = {self.m}{Style.RESET_ALL}")
61        print(f"{Fore.GREEN}Space complexity: {self.k*self.m}{Style.RESET_ALL}")
62        return self.k, self.m

Step 2: Calculate k and m values based on user input for failure probability and overestimation factor.

Returns

The computed values of k and m.

def execute_no_privacy(self):
64    def execute_no_privacy(self):
65        """Step 3: Execute Count-Mean Sketch (CMeS) without privacy protection."""
66        headers=[
67            "Element", "Real Frequency", "Real Percentage", 
68            "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 
69            "Percentage Error"
70        ]
71        
72        print("\nπŸ“Š Calculing CMeS without privacy")
73        data_table = run_cms_client_mean(self.k, self.m, self.df)
74        print(tabulate(data_table, headers=headers, tablefmt="fancy_grid"))

Step 3: Execute Count-Mean Sketch (CMeS) without privacy protection.

def execute_private_algorithms(self):
 76    def execute_private_algorithms(self):
 77        """Step 4: Execute privacy-preserving algorithms (CMeS and HCMS)."""
 78        print("\nπŸ” Searching parameters k and m ...")
 79        e = 150   
 80        k_values = [self.k, 16, 128, 1024, 32768]
 81        m_values = [self.m, 16, 1024, 256, 256]
 82
 83        results = {"CMeS": [], "HCMS": []}
 84
 85        headers=[
 86            "Element", "Real Frequency", "Real Percentage", 
 87            "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 
 88            "Percentage Error"
 89        ]
 90         
 91        for k, m in zip(k_values, m_values):
 92            for algorithm, client in zip(["CMeS", "HCMS"], [run_private_cms_client, run_private_hcms_client]):
 93                
 94                print(f"\nRunning {Fore.GREEN}{algorithm}{Style.RESET_ALL} with k: {k}, m: {m} and Ξ΅: {e}")
 95                if algorithm == "HCMS":
 96                    if math.log2(m).is_integer() == False:
 97                        m = 2 ** math.ceil(math.log2(m))
 98                        print(f"{Fore.RED}Adjusting m to a power of 2 β†’ m = {m}{Style.RESET_ALL}")
 99
100                _, data_table, _, _,_ = client(k, m, e, self.df)
101
102                data_dicts = [dict(zip(headers, row)) for row in data_table]
103
104                for data_dict in data_dicts:
105                    results[algorithm].append([
106                        k, m, 
107                        data_dict.get("Element", ""),
108                        data_dict.get("Real Frequency", ""),
109                        data_dict.get("Real Percentage", ""),
110                        data_dict.get("Estimated Frequency", ""),
111                        data_dict.get("Estimated Percentage", ""),
112                        data_dict.get("Estimation Difference", ""),
113                        data_dict.get("Percentage Error", ""),
114                    ])
115        
116
117        for algo, table in results.items():
118            print(f"\n πŸ”Results for {Fore.CYAN}{algo}{Style.RESET_ALL}")
119            print(tabulate(table, headers=["k", "m"] + headers, tablefmt="fancy_grid"))

Step 4: Execute privacy-preserving algorithms (CMeS and HCMS).

def select_algorithm(self):
121    def select_algorithm(self):
122        """Step 5: Choose an algorithm and specify k and m values."""
123        print(f"\nπŸ” Selecting an parameters and algorithm ...")
124        self.k = int(input("β†’ Enter the value of k: "))
125        self.m = int(input("β†’ Enter the value of m: "))
126        self.algorithm = input("β†’ Enter the algorithm to execute:\n  1. Count-Mean Sketch\n  2. Hadamard Count-Mean Sketch\nSelect: ")
127        return self.algorithm

Step 5: Choose an algorithm and specify k and m values.

def execute_algorithms(self):
129    def execute_algorithms(self):
130        """Step 6: Perform parameter fitting and execute the selected server algorithm."""
131        print("\nπŸ”„ Executing personalized privacy ...")
132        e, result, privatized_data = run_parameter_fitting(self.df, self.k, self.m, self.algorithm)
133
134
135        print("\nβš™οΈ Running server ...")
136        if self.algorithm == '1':
137            run_private_cms_server(self.k, self.m, e, self.df, result, privatized_data)
138        elif self.algorithm == '2':
139            run_private_hcms_server(self.k, self.m, e, self.df, result, privatized_data)
140
141        print("\nProcess done and results saved.")

Step 6: Perform parameter fitting and execute the selected server algorithm.

def main(step=1):
143def main(step=1):
144    """Main function to run the step-by-step execution of the method."""
145    experiment = IndividualMethod()
146    while True:
147        if step == 1:
148            # Step 1: Data preprocessing
149            experiment.preprocess_data()
150            step = 2
151    
152        if step == 2:
153            #Step 2: Calculate k and m
154            experiment.calculate_k_m()
155
156            # Step 3: Execute no privacy algorithms
157            experiment.execute_no_privacy()
158
159            if input("Are you satisfied with the results? (yes/no): ") == 'yes':
160                step = 3
161            else:
162                step = 2
163                
164        elif step == 3:
165            # Step 4: Execute private algorithms
166            experiment.execute_private_algorithms()
167
168            # Step 5: Choose an algorithm, k and m
169            experiment.select_algorithm()
170            if input("Are you satisfied with the results? (yes/no): ") == 'yes':
171                step = 4
172            else:
173                step = 2
174
175        elif step == 4:
176            # Step 6: Parameter fitting and execute server
177            experiment.execute_algorithms()
178            break

Main function to run the step-by-step execution of the method.