src.main.individual_method

  1import math
  2from tabulate import tabulate
  3from colorama import Fore, Style
  4
  5# Importing CMeS functions
  6from count_mean.private_cms_server import run_private_cms_server
  7from count_mean.private_cms_client import run_private_cms_client
  8from count_mean.cms_client_mean import run_cms_client_mean
  9
 10# Importing data preprocessing functions
 11from scripts.preprocess import run_data_processor
 12from scripts.parameter_fitting import run_parameter_fitting
 13
 14# Importing HCMS functions
 15from hadamard_count_mean.private_hcms_client import run_private_hcms_client
 16from hadamard_count_mean.private_hcms_server import run_private_hcms_server
 17
 18
 19class IndividualMethod:
 20    """
 21    This class represents the execution of various algorithms for private frequency estimation.
 22    It includes preprocessing data, computing parameters, and executing different privacy-preserving algorithms.
 23    """
 24    def __init__(self, df=None,  k=None, m=None, algorithm=None):
 25        """
 26        Initializes the IndividualMethod instance.
 27
 28        :param df: The input dataset as a pandas DataFrame.
 29        :param k: The number of hash functions for the sketching algorithm.
 30        :param m: The number of bins in the sketching algorithm.
 31        :param algorithm: The selected algorithm for execution.
 32        """
 33        self.df = df
 34        self.k = k
 35        self.m = m
 36        self.algorithm = algorithm
 37    
 38    def preprocess_data(self):
 39        """Step 1: Data preprocessing by loading and filtering the dataset."""
 40        self.df = run_data_processor()
 41    
 42    def calculate_k_m(self):
 43        """
 44        Step 2: Calculate k and m values based on user input for failure probability and overestimation factor.
 45        
 46        :return: The computed values of k and m.
 47        """
 48        print("\nπŸ“‚ Calculating k and m ... ")
 49        f = float(input("β†’ Enter the failure probability Ξ΄: "))
 50        E = float(input("β†’ Enter the overestimation factor Ξ΅: "))
 51
 52        self.k = int(1 / f)
 53        self.m = int(2.71828 / E )
 54
 55        print(f"{Fore.GREEN}Calculated k = {self.k} and m = {self.m}{Style.RESET_ALL}")
 56        print(f"{Fore.GREEN}Space complexity: {self.k*self.m}{Style.RESET_ALL}")
 57        return self.k, self.m
 58        
 59    def execute_no_privacy(self):
 60        """Step 3: Execute Count-Mean Sketch (CMeS) without privacy protection."""
 61        headers=[
 62            "Element", "Real Frequency", "Real Percentage", 
 63            "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 
 64            "Percentage Error"
 65        ]
 66        
 67        print("\nπŸ“Š Calculing CMeS without privacy")
 68        data_table = run_cms_client_mean(self.k, self.m, self.df)
 69        print(tabulate(data_table, headers=headers, tablefmt="fancy_grid"))
 70
 71    def execute_private_algorithms(self, e=150):
 72        """Step 4: Execute privacy-preserving algorithms (CMeS and HCMS)."""
 73        print("\nπŸ” Searching parameters k and m ...")  
 74        k_values = [self.k, 16, 128, 1024, 32768]
 75        m_values = [self.m, 16, 1024, 256, 256]
 76
 77        results = {"PCMeS": [], "PHCMS": []}
 78
 79        headers=[
 80            "Element", "Real Frequency", "Real Percentage", 
 81            "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 
 82            "Percentage Error"
 83        ]
 84         
 85        for k, m in zip(k_values, m_values):
 86            for algorithm, client in zip(["PCMeS", "PHCMS"], [run_private_cms_client, run_private_hcms_client]):
 87                
 88                print(f"\nRunning {Fore.GREEN}{algorithm}{Style.RESET_ALL} with k: {k}, m: {m} and Ο΅: {e}")
 89                if algorithm == "PHCMS":
 90                    if math.log2(m).is_integer() == False:
 91                        m = 2 ** math.ceil(math.log2(m))
 92                        print(f"{Fore.RED}Adjusting m to a power of 2 β†’ m = {m}{Style.RESET_ALL}")
 93
 94                _, data_table, _, _,_ = client(k, m, e, self.df)
 95
 96                data_dicts = [dict(zip(headers, row)) for row in data_table]
 97
 98                for data_dict in data_dicts:
 99                    results[algorithm].append([
100                        k, m, 
101                        data_dict.get("Element", ""),
102                        data_dict.get("Real Frequency", ""),
103                        data_dict.get("Real Percentage", ""),
104                        data_dict.get("Estimated Frequency", ""),
105                        data_dict.get("Estimated Percentage", ""),
106                        data_dict.get("Estimation Difference", ""),
107                        data_dict.get("Percentage Error", ""),
108                    ])
109        
110
111        for algo, table in results.items():
112            print(f"\n πŸ”Results for {Fore.CYAN}{algo}{Style.RESET_ALL}")
113            print(tabulate(table, headers=["k", "m"] + headers, tablefmt="fancy_grid"))
114    
115    def select_algorithm(self):
116        """Step 5: Choose an algorithm and specify k and m values."""
117        print(f"\nπŸ” Selecting an parameters and algorithm ...")
118        self.k = int(input("β†’ Enter the value of k: "))
119        self.m = int(input("β†’ Enter the value of m: "))
120        self.algorithm = input("β†’ Enter the algorithm to execute:\n  1. Count-Mean Sketch\n  2. Hadamard Count-Mean Sketch\nSelect: ")
121        return self.algorithm
122    
123    def execute_algorithms(self):
124        """Step 6: Perform parameter fitting and execute the selected server algorithm."""
125        print("\nπŸ”„ Executing personalized privacy ...")
126        e, result, privatized_data = run_parameter_fitting(self.df, self.k, self.m, self.algorithm)
127
128
129        print("\nβš™οΈ Running server ...")
130        if self.algorithm == '1':
131            run_private_cms_server(self.k, self.m, e, self.df, result, privatized_data)
132        elif self.algorithm == '2':
133            run_private_hcms_server(self.k, self.m, e, self.df, result, privatized_data)
134
135        print("\nProcess done and results saved.")
136
137def main(step=1):
138    """Main function to run the step-by-step execution of the method."""
139    experiment = IndividualMethod()
140    while True:
141        if step == 1:
142            # Step 1: Data preprocessing
143            experiment.preprocess_data()
144            step = 2
145    
146        if step == 2:
147            #Step 2: Calculate k and m
148            experiment.calculate_k_m()
149
150            # Step 3: Execute no privacy algorithms
151            experiment.execute_no_privacy()
152
153            if input("Are you satisfied with the results? (yes/no): ") == 'yes':
154                step = 3
155            else:
156                step = 2
157                
158        elif step == 3:
159            # Step 4: Execute private algorithms
160            experiment.execute_private_algorithms()
161
162            if input("\nDo you want to change Ο΅ value? (yes/no): ") == 'yes':
163                e1 = float(input("β†’ Enter the new value of Ο΅: "))
164                experiment.execute_private_algorithms(e1)
165
166            # Step 5: Choose an algorithm, k and m
167            experiment.select_algorithm()
168            if input("Are you satisfied with the results? (yes/no): ") == 'yes':
169                step = 4
170            else:
171                step = 2
172
173        elif step == 4:
174            # Step 6: Parameter fitting and execute server
175            experiment.execute_algorithms()
176            break
177    
178
179if __name__ == "__main__":
180    main()
class IndividualMethod:
 20class IndividualMethod:
 21    """
 22    This class represents the execution of various algorithms for private frequency estimation.
 23    It includes preprocessing data, computing parameters, and executing different privacy-preserving algorithms.
 24    """
 25    def __init__(self, df=None,  k=None, m=None, algorithm=None):
 26        """
 27        Initializes the IndividualMethod instance.
 28
 29        :param df: The input dataset as a pandas DataFrame.
 30        :param k: The number of hash functions for the sketching algorithm.
 31        :param m: The number of bins in the sketching algorithm.
 32        :param algorithm: The selected algorithm for execution.
 33        """
 34        self.df = df
 35        self.k = k
 36        self.m = m
 37        self.algorithm = algorithm
 38    
 39    def preprocess_data(self):
 40        """Step 1: Data preprocessing by loading and filtering the dataset."""
 41        self.df = run_data_processor()
 42    
 43    def calculate_k_m(self):
 44        """
 45        Step 2: Calculate k and m values based on user input for failure probability and overestimation factor.
 46        
 47        :return: The computed values of k and m.
 48        """
 49        print("\nπŸ“‚ Calculating k and m ... ")
 50        f = float(input("β†’ Enter the failure probability Ξ΄: "))
 51        E = float(input("β†’ Enter the overestimation factor Ξ΅: "))
 52
 53        self.k = int(1 / f)
 54        self.m = int(2.71828 / E )
 55
 56        print(f"{Fore.GREEN}Calculated k = {self.k} and m = {self.m}{Style.RESET_ALL}")
 57        print(f"{Fore.GREEN}Space complexity: {self.k*self.m}{Style.RESET_ALL}")
 58        return self.k, self.m
 59        
 60    def execute_no_privacy(self):
 61        """Step 3: Execute Count-Mean Sketch (CMeS) without privacy protection."""
 62        headers=[
 63            "Element", "Real Frequency", "Real Percentage", 
 64            "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 
 65            "Percentage Error"
 66        ]
 67        
 68        print("\nπŸ“Š Calculing CMeS without privacy")
 69        data_table = run_cms_client_mean(self.k, self.m, self.df)
 70        print(tabulate(data_table, headers=headers, tablefmt="fancy_grid"))
 71
 72    def execute_private_algorithms(self, e=150):
 73        """Step 4: Execute privacy-preserving algorithms (CMeS and HCMS)."""
 74        print("\nπŸ” Searching parameters k and m ...")  
 75        k_values = [self.k, 16, 128, 1024, 32768]
 76        m_values = [self.m, 16, 1024, 256, 256]
 77
 78        results = {"PCMeS": [], "PHCMS": []}
 79
 80        headers=[
 81            "Element", "Real Frequency", "Real Percentage", 
 82            "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 
 83            "Percentage Error"
 84        ]
 85         
 86        for k, m in zip(k_values, m_values):
 87            for algorithm, client in zip(["PCMeS", "PHCMS"], [run_private_cms_client, run_private_hcms_client]):
 88                
 89                print(f"\nRunning {Fore.GREEN}{algorithm}{Style.RESET_ALL} with k: {k}, m: {m} and Ο΅: {e}")
 90                if algorithm == "PHCMS":
 91                    if math.log2(m).is_integer() == False:
 92                        m = 2 ** math.ceil(math.log2(m))
 93                        print(f"{Fore.RED}Adjusting m to a power of 2 β†’ m = {m}{Style.RESET_ALL}")
 94
 95                _, data_table, _, _,_ = client(k, m, e, self.df)
 96
 97                data_dicts = [dict(zip(headers, row)) for row in data_table]
 98
 99                for data_dict in data_dicts:
100                    results[algorithm].append([
101                        k, m, 
102                        data_dict.get("Element", ""),
103                        data_dict.get("Real Frequency", ""),
104                        data_dict.get("Real Percentage", ""),
105                        data_dict.get("Estimated Frequency", ""),
106                        data_dict.get("Estimated Percentage", ""),
107                        data_dict.get("Estimation Difference", ""),
108                        data_dict.get("Percentage Error", ""),
109                    ])
110        
111
112        for algo, table in results.items():
113            print(f"\n πŸ”Results for {Fore.CYAN}{algo}{Style.RESET_ALL}")
114            print(tabulate(table, headers=["k", "m"] + headers, tablefmt="fancy_grid"))
115    
116    def select_algorithm(self):
117        """Step 5: Choose an algorithm and specify k and m values."""
118        print(f"\nπŸ” Selecting an parameters and algorithm ...")
119        self.k = int(input("β†’ Enter the value of k: "))
120        self.m = int(input("β†’ Enter the value of m: "))
121        self.algorithm = input("β†’ Enter the algorithm to execute:\n  1. Count-Mean Sketch\n  2. Hadamard Count-Mean Sketch\nSelect: ")
122        return self.algorithm
123    
124    def execute_algorithms(self):
125        """Step 6: Perform parameter fitting and execute the selected server algorithm."""
126        print("\nπŸ”„ Executing personalized privacy ...")
127        e, result, privatized_data = run_parameter_fitting(self.df, self.k, self.m, self.algorithm)
128
129
130        print("\nβš™οΈ Running server ...")
131        if self.algorithm == '1':
132            run_private_cms_server(self.k, self.m, e, self.df, result, privatized_data)
133        elif self.algorithm == '2':
134            run_private_hcms_server(self.k, self.m, e, self.df, result, privatized_data)
135
136        print("\nProcess done and results saved.")

This class represents the execution of various algorithms for private frequency estimation. It includes preprocessing data, computing parameters, and executing different privacy-preserving algorithms.

IndividualMethod(df=None, k=None, m=None, algorithm=None)
25    def __init__(self, df=None,  k=None, m=None, algorithm=None):
26        """
27        Initializes the IndividualMethod instance.
28
29        :param df: The input dataset as a pandas DataFrame.
30        :param k: The number of hash functions for the sketching algorithm.
31        :param m: The number of bins in the sketching algorithm.
32        :param algorithm: The selected algorithm for execution.
33        """
34        self.df = df
35        self.k = k
36        self.m = m
37        self.algorithm = algorithm

Initializes the IndividualMethod instance.

Parameters
  • df: The input dataset as a pandas DataFrame.
  • k: The number of hash functions for the sketching algorithm.
  • m: The number of bins in the sketching algorithm.
  • algorithm: The selected algorithm for execution.
df
k
m
algorithm
def preprocess_data(self):
39    def preprocess_data(self):
40        """Step 1: Data preprocessing by loading and filtering the dataset."""
41        self.df = run_data_processor()

Step 1: Data preprocessing by loading and filtering the dataset.

def calculate_k_m(self):
43    def calculate_k_m(self):
44        """
45        Step 2: Calculate k and m values based on user input for failure probability and overestimation factor.
46        
47        :return: The computed values of k and m.
48        """
49        print("\nπŸ“‚ Calculating k and m ... ")
50        f = float(input("β†’ Enter the failure probability Ξ΄: "))
51        E = float(input("β†’ Enter the overestimation factor Ξ΅: "))
52
53        self.k = int(1 / f)
54        self.m = int(2.71828 / E )
55
56        print(f"{Fore.GREEN}Calculated k = {self.k} and m = {self.m}{Style.RESET_ALL}")
57        print(f"{Fore.GREEN}Space complexity: {self.k*self.m}{Style.RESET_ALL}")
58        return self.k, self.m

Step 2: Calculate k and m values based on user input for failure probability and overestimation factor.

Returns

The computed values of k and m.

def execute_no_privacy(self):
60    def execute_no_privacy(self):
61        """Step 3: Execute Count-Mean Sketch (CMeS) without privacy protection."""
62        headers=[
63            "Element", "Real Frequency", "Real Percentage", 
64            "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 
65            "Percentage Error"
66        ]
67        
68        print("\nπŸ“Š Calculing CMeS without privacy")
69        data_table = run_cms_client_mean(self.k, self.m, self.df)
70        print(tabulate(data_table, headers=headers, tablefmt="fancy_grid"))

Step 3: Execute Count-Mean Sketch (CMeS) without privacy protection.

def execute_private_algorithms(self, e=150):
 72    def execute_private_algorithms(self, e=150):
 73        """Step 4: Execute privacy-preserving algorithms (CMeS and HCMS)."""
 74        print("\nπŸ” Searching parameters k and m ...")  
 75        k_values = [self.k, 16, 128, 1024, 32768]
 76        m_values = [self.m, 16, 1024, 256, 256]
 77
 78        results = {"PCMeS": [], "PHCMS": []}
 79
 80        headers=[
 81            "Element", "Real Frequency", "Real Percentage", 
 82            "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 
 83            "Percentage Error"
 84        ]
 85         
 86        for k, m in zip(k_values, m_values):
 87            for algorithm, client in zip(["PCMeS", "PHCMS"], [run_private_cms_client, run_private_hcms_client]):
 88                
 89                print(f"\nRunning {Fore.GREEN}{algorithm}{Style.RESET_ALL} with k: {k}, m: {m} and Ο΅: {e}")
 90                if algorithm == "PHCMS":
 91                    if math.log2(m).is_integer() == False:
 92                        m = 2 ** math.ceil(math.log2(m))
 93                        print(f"{Fore.RED}Adjusting m to a power of 2 β†’ m = {m}{Style.RESET_ALL}")
 94
 95                _, data_table, _, _,_ = client(k, m, e, self.df)
 96
 97                data_dicts = [dict(zip(headers, row)) for row in data_table]
 98
 99                for data_dict in data_dicts:
100                    results[algorithm].append([
101                        k, m, 
102                        data_dict.get("Element", ""),
103                        data_dict.get("Real Frequency", ""),
104                        data_dict.get("Real Percentage", ""),
105                        data_dict.get("Estimated Frequency", ""),
106                        data_dict.get("Estimated Percentage", ""),
107                        data_dict.get("Estimation Difference", ""),
108                        data_dict.get("Percentage Error", ""),
109                    ])
110        
111
112        for algo, table in results.items():
113            print(f"\n πŸ”Results for {Fore.CYAN}{algo}{Style.RESET_ALL}")
114            print(tabulate(table, headers=["k", "m"] + headers, tablefmt="fancy_grid"))

Step 4: Execute privacy-preserving algorithms (CMeS and HCMS).

def select_algorithm(self):
116    def select_algorithm(self):
117        """Step 5: Choose an algorithm and specify k and m values."""
118        print(f"\nπŸ” Selecting an parameters and algorithm ...")
119        self.k = int(input("β†’ Enter the value of k: "))
120        self.m = int(input("β†’ Enter the value of m: "))
121        self.algorithm = input("β†’ Enter the algorithm to execute:\n  1. Count-Mean Sketch\n  2. Hadamard Count-Mean Sketch\nSelect: ")
122        return self.algorithm

Step 5: Choose an algorithm and specify k and m values.

def execute_algorithms(self):
124    def execute_algorithms(self):
125        """Step 6: Perform parameter fitting and execute the selected server algorithm."""
126        print("\nπŸ”„ Executing personalized privacy ...")
127        e, result, privatized_data = run_parameter_fitting(self.df, self.k, self.m, self.algorithm)
128
129
130        print("\nβš™οΈ Running server ...")
131        if self.algorithm == '1':
132            run_private_cms_server(self.k, self.m, e, self.df, result, privatized_data)
133        elif self.algorithm == '2':
134            run_private_hcms_server(self.k, self.m, e, self.df, result, privatized_data)
135
136        print("\nProcess done and results saved.")

Step 6: Perform parameter fitting and execute the selected server algorithm.

def main(step=1):
138def main(step=1):
139    """Main function to run the step-by-step execution of the method."""
140    experiment = IndividualMethod()
141    while True:
142        if step == 1:
143            # Step 1: Data preprocessing
144            experiment.preprocess_data()
145            step = 2
146    
147        if step == 2:
148            #Step 2: Calculate k and m
149            experiment.calculate_k_m()
150
151            # Step 3: Execute no privacy algorithms
152            experiment.execute_no_privacy()
153
154            if input("Are you satisfied with the results? (yes/no): ") == 'yes':
155                step = 3
156            else:
157                step = 2
158                
159        elif step == 3:
160            # Step 4: Execute private algorithms
161            experiment.execute_private_algorithms()
162
163            if input("\nDo you want to change Ο΅ value? (yes/no): ") == 'yes':
164                e1 = float(input("β†’ Enter the new value of Ο΅: "))
165                experiment.execute_private_algorithms(e1)
166
167            # Step 5: Choose an algorithm, k and m
168            experiment.select_algorithm()
169            if input("Are you satisfied with the results? (yes/no): ") == 'yes':
170                step = 4
171            else:
172                step = 2
173
174        elif step == 4:
175            # Step 6: Parameter fitting and execute server
176            experiment.execute_algorithms()
177            break

Main function to run the step-by-step execution of the method.