src.individual_method
1import os 2import math 3import pandas as pd 4import numpy as np 5from tabulate import tabulate 6from colorama import Fore, Style 7from rich.progress import Progress 8 9# Importing CMeS functions 10from private_count_mean.private_cms_server import run_private_cms_server 11from private_count_mean.private_cms_client import run_private_cms_client 12from private_count_mean.cms_client_mean import run_cms_client_mean 13 14# Importing data preprocessing functions 15from scripts.preprocess import run_data_processor 16from scripts.parameter_fitting import run_parameter_fitting 17 18# Importing HCMS functions 19from private_hadamard_count_mean.private_hcms_client import run_private_hcms_client 20from private_hadamard_count_mean.private_hcms_server import run_private_hcms_server 21 22 23class IndividualMethod: 24 """ 25 This class represents the execution of various algorithms for private frequency estimation. 26 It includes preprocessing data, computing parameters, and executing different privacy-preserving algorithms. 27 """ 28 def __init__(self, df=None, k=None, m=None, algorithm=None): 29 """ 30 Initializes the IndividualMethod instance. 31 32 :param df: The input dataset as a pandas DataFrame. 33 :param k: The number of hash functions for the sketching algorithm. 34 :param m: The number of bins in the sketching algorithm. 35 :param algorithm: The selected algorithm for execution. 36 """ 37 self.df = df 38 self.k = k 39 self.m = m 40 self.algorithm = algorithm 41 42 def preprocess_data(self): 43 """Step 1: Data preprocessing by loading and filtering the dataset.""" 44 self.df = run_data_processor() 45 46 def calculate_k_m(self): 47 """ 48 Step 2: Calculate k and m values based on user input for failure probability and overestimation factor. 49 50 :return: The computed values of k and m. 51 """ 52 print("\nπ Calculating k and m ... ") 53 f = float(input("β Enter the failure probability ΞΆ: ")) 54 E = float(input("β Enter the overestimation factor Ξ·: ")) 55 56 self.k = int(1 / f) 57 self.m = int(2.71828 / E ) 58 59 print(f"{Fore.GREEN}Calculated k = {self.k} and m = {self.m}{Style.RESET_ALL}") 60 print(f"{Fore.GREEN}Space complexity: {self.k*self.m}{Style.RESET_ALL}") 61 return self.k, self.m 62 63 def execute_no_privacy(self): 64 """Step 3: Execute Count-Mean Sketch (CMeS) without privacy protection.""" 65 headers=[ 66 "Element", "Real Frequency", "Real Percentage", 67 "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 68 "Percentage Error" 69 ] 70 71 print("\nπ Calculing CMeS without privacy") 72 data_table = run_cms_client_mean(self.k, self.m, self.df) 73 print(tabulate(data_table, headers=headers, tablefmt="fancy_grid")) 74 75 def execute_private_algorithms(self): 76 """Step 4: Execute privacy-preserving algorithms (CMeS and HCMS).""" 77 print("\nπ Searching parameters k and m ...") 78 e = 150 79 k_values = [self.k, 16, 128, 1024, 32768] 80 m_values = [self.m, 16, 1024, 256, 256] 81 82 results = {"CMeS": [], "HCMS": []} 83 84 headers=[ 85 "Element", "Real Frequency", "Real Percentage", 86 "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 87 "Percentage Error" 88 ] 89 90 for k, m in zip(k_values, m_values): 91 for algorithm, client in zip(["CMeS", "HCMS"], [run_private_cms_client, run_private_hcms_client]): 92 93 print(f"\nRunning {Fore.GREEN}{algorithm}{Style.RESET_ALL} with k: {k}, m: {m} and Ξ΅: {e}") 94 if algorithm == "HCMS": 95 if math.log2(m).is_integer() == False: 96 m = 2 ** math.ceil(math.log2(m)) 97 print(f"{Fore.RED}Adjusting m to a power of 2 β m = {m}{Style.RESET_ALL}") 98 99 _, data_table, _, _,_ = client(k, m, e, self.df) 100 101 data_dicts = [dict(zip(headers, row)) for row in data_table] 102 103 for data_dict in data_dicts: 104 results[algorithm].append([ 105 k, m, 106 data_dict.get("Element", ""), 107 data_dict.get("Real Frequency", ""), 108 data_dict.get("Real Percentage", ""), 109 data_dict.get("Estimated Frequency", ""), 110 data_dict.get("Estimated Percentage", ""), 111 data_dict.get("Estimation Difference", ""), 112 data_dict.get("Percentage Error", ""), 113 ]) 114 115 116 for algo, table in results.items(): 117 print(f"\n πResults for {Fore.CYAN}{algo}{Style.RESET_ALL}") 118 print(tabulate(table, headers=["k", "m"] + headers, tablefmt="fancy_grid")) 119 120 def select_algorithm(self): 121 """Step 5: Choose an algorithm and specify k and m values.""" 122 print(f"\nπ Selecting an parameters and algorithm ...") 123 self.k = int(input("β Enter the value of k: ")) 124 self.m = int(input("β Enter the value of m: ")) 125 self.algorithm = input("β Enter the algorithm to execute:\n 1. Count-Mean Sketch\n 2. Hadamard Count-Mean Sketch\nSelect: ") 126 return self.algorithm 127 128 def execute_algorithms(self): 129 """Step 6: Perform parameter fitting and execute the selected server algorithm.""" 130 print("\nπ Executing personalized privacy ...") 131 e, result, privatized_data = run_parameter_fitting(self.df, self.k, self.m, self.algorithm) 132 133 134 print("\nβοΈ Running server ...") 135 if self.algorithm == '1': 136 run_private_cms_server(self.k, self.m, e, self.df, result, privatized_data) 137 elif self.algorithm == '2': 138 run_private_hcms_server(self.k, self.m, e, self.df, result, privatized_data) 139 140 print("\nProcess done and results saved.") 141 142def main(step=1): 143 """Main function to run the step-by-step execution of the method.""" 144 experiment = IndividualMethod() 145 while True: 146 if step == 1: 147 # Step 1: Data preprocessing 148 experiment.preprocess_data() 149 step = 2 150 151 if step == 2: 152 #Step 2: Calculate k and m 153 experiment.calculate_k_m() 154 155 # Step 3: Execute no privacy algorithms 156 experiment.execute_no_privacy() 157 158 if input("Are you satisfied with the results? (yes/no): ") == 'yes': 159 step = 3 160 else: 161 step = 2 162 163 elif step == 3: 164 # Step 4: Execute private algorithms 165 experiment.execute_private_algorithms() 166 167 # Step 5: Choose an algorithm, k and m 168 experiment.select_algorithm() 169 if input("Are you satisfied with the results? (yes/no): ") == 'yes': 170 step = 4 171 else: 172 step = 2 173 174 elif step == 4: 175 # Step 6: Parameter fitting and execute server 176 experiment.execute_algorithms() 177 break 178 179 180if __name__ == "__main__": 181 main()
24class IndividualMethod: 25 """ 26 This class represents the execution of various algorithms for private frequency estimation. 27 It includes preprocessing data, computing parameters, and executing different privacy-preserving algorithms. 28 """ 29 def __init__(self, df=None, k=None, m=None, algorithm=None): 30 """ 31 Initializes the IndividualMethod instance. 32 33 :param df: The input dataset as a pandas DataFrame. 34 :param k: The number of hash functions for the sketching algorithm. 35 :param m: The number of bins in the sketching algorithm. 36 :param algorithm: The selected algorithm for execution. 37 """ 38 self.df = df 39 self.k = k 40 self.m = m 41 self.algorithm = algorithm 42 43 def preprocess_data(self): 44 """Step 1: Data preprocessing by loading and filtering the dataset.""" 45 self.df = run_data_processor() 46 47 def calculate_k_m(self): 48 """ 49 Step 2: Calculate k and m values based on user input for failure probability and overestimation factor. 50 51 :return: The computed values of k and m. 52 """ 53 print("\nπ Calculating k and m ... ") 54 f = float(input("β Enter the failure probability ΞΆ: ")) 55 E = float(input("β Enter the overestimation factor Ξ·: ")) 56 57 self.k = int(1 / f) 58 self.m = int(2.71828 / E ) 59 60 print(f"{Fore.GREEN}Calculated k = {self.k} and m = {self.m}{Style.RESET_ALL}") 61 print(f"{Fore.GREEN}Space complexity: {self.k*self.m}{Style.RESET_ALL}") 62 return self.k, self.m 63 64 def execute_no_privacy(self): 65 """Step 3: Execute Count-Mean Sketch (CMeS) without privacy protection.""" 66 headers=[ 67 "Element", "Real Frequency", "Real Percentage", 68 "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 69 "Percentage Error" 70 ] 71 72 print("\nπ Calculing CMeS without privacy") 73 data_table = run_cms_client_mean(self.k, self.m, self.df) 74 print(tabulate(data_table, headers=headers, tablefmt="fancy_grid")) 75 76 def execute_private_algorithms(self): 77 """Step 4: Execute privacy-preserving algorithms (CMeS and HCMS).""" 78 print("\nπ Searching parameters k and m ...") 79 e = 150 80 k_values = [self.k, 16, 128, 1024, 32768] 81 m_values = [self.m, 16, 1024, 256, 256] 82 83 results = {"CMeS": [], "HCMS": []} 84 85 headers=[ 86 "Element", "Real Frequency", "Real Percentage", 87 "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 88 "Percentage Error" 89 ] 90 91 for k, m in zip(k_values, m_values): 92 for algorithm, client in zip(["CMeS", "HCMS"], [run_private_cms_client, run_private_hcms_client]): 93 94 print(f"\nRunning {Fore.GREEN}{algorithm}{Style.RESET_ALL} with k: {k}, m: {m} and Ξ΅: {e}") 95 if algorithm == "HCMS": 96 if math.log2(m).is_integer() == False: 97 m = 2 ** math.ceil(math.log2(m)) 98 print(f"{Fore.RED}Adjusting m to a power of 2 β m = {m}{Style.RESET_ALL}") 99 100 _, data_table, _, _,_ = client(k, m, e, self.df) 101 102 data_dicts = [dict(zip(headers, row)) for row in data_table] 103 104 for data_dict in data_dicts: 105 results[algorithm].append([ 106 k, m, 107 data_dict.get("Element", ""), 108 data_dict.get("Real Frequency", ""), 109 data_dict.get("Real Percentage", ""), 110 data_dict.get("Estimated Frequency", ""), 111 data_dict.get("Estimated Percentage", ""), 112 data_dict.get("Estimation Difference", ""), 113 data_dict.get("Percentage Error", ""), 114 ]) 115 116 117 for algo, table in results.items(): 118 print(f"\n πResults for {Fore.CYAN}{algo}{Style.RESET_ALL}") 119 print(tabulate(table, headers=["k", "m"] + headers, tablefmt="fancy_grid")) 120 121 def select_algorithm(self): 122 """Step 5: Choose an algorithm and specify k and m values.""" 123 print(f"\nπ Selecting an parameters and algorithm ...") 124 self.k = int(input("β Enter the value of k: ")) 125 self.m = int(input("β Enter the value of m: ")) 126 self.algorithm = input("β Enter the algorithm to execute:\n 1. Count-Mean Sketch\n 2. Hadamard Count-Mean Sketch\nSelect: ") 127 return self.algorithm 128 129 def execute_algorithms(self): 130 """Step 6: Perform parameter fitting and execute the selected server algorithm.""" 131 print("\nπ Executing personalized privacy ...") 132 e, result, privatized_data = run_parameter_fitting(self.df, self.k, self.m, self.algorithm) 133 134 135 print("\nβοΈ Running server ...") 136 if self.algorithm == '1': 137 run_private_cms_server(self.k, self.m, e, self.df, result, privatized_data) 138 elif self.algorithm == '2': 139 run_private_hcms_server(self.k, self.m, e, self.df, result, privatized_data) 140 141 print("\nProcess done and results saved.")
This class represents the execution of various algorithms for private frequency estimation. It includes preprocessing data, computing parameters, and executing different privacy-preserving algorithms.
29 def __init__(self, df=None, k=None, m=None, algorithm=None): 30 """ 31 Initializes the IndividualMethod instance. 32 33 :param df: The input dataset as a pandas DataFrame. 34 :param k: The number of hash functions for the sketching algorithm. 35 :param m: The number of bins in the sketching algorithm. 36 :param algorithm: The selected algorithm for execution. 37 """ 38 self.df = df 39 self.k = k 40 self.m = m 41 self.algorithm = algorithm
Initializes the IndividualMethod instance.
Parameters
- df: The input dataset as a pandas DataFrame.
- k: The number of hash functions for the sketching algorithm.
- m: The number of bins in the sketching algorithm.
- algorithm: The selected algorithm for execution.
43 def preprocess_data(self): 44 """Step 1: Data preprocessing by loading and filtering the dataset.""" 45 self.df = run_data_processor()
Step 1: Data preprocessing by loading and filtering the dataset.
47 def calculate_k_m(self): 48 """ 49 Step 2: Calculate k and m values based on user input for failure probability and overestimation factor. 50 51 :return: The computed values of k and m. 52 """ 53 print("\nπ Calculating k and m ... ") 54 f = float(input("β Enter the failure probability ΞΆ: ")) 55 E = float(input("β Enter the overestimation factor Ξ·: ")) 56 57 self.k = int(1 / f) 58 self.m = int(2.71828 / E ) 59 60 print(f"{Fore.GREEN}Calculated k = {self.k} and m = {self.m}{Style.RESET_ALL}") 61 print(f"{Fore.GREEN}Space complexity: {self.k*self.m}{Style.RESET_ALL}") 62 return self.k, self.m
Step 2: Calculate k and m values based on user input for failure probability and overestimation factor.
Returns
The computed values of k and m.
64 def execute_no_privacy(self): 65 """Step 3: Execute Count-Mean Sketch (CMeS) without privacy protection.""" 66 headers=[ 67 "Element", "Real Frequency", "Real Percentage", 68 "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 69 "Percentage Error" 70 ] 71 72 print("\nπ Calculing CMeS without privacy") 73 data_table = run_cms_client_mean(self.k, self.m, self.df) 74 print(tabulate(data_table, headers=headers, tablefmt="fancy_grid"))
Step 3: Execute Count-Mean Sketch (CMeS) without privacy protection.
76 def execute_private_algorithms(self): 77 """Step 4: Execute privacy-preserving algorithms (CMeS and HCMS).""" 78 print("\nπ Searching parameters k and m ...") 79 e = 150 80 k_values = [self.k, 16, 128, 1024, 32768] 81 m_values = [self.m, 16, 1024, 256, 256] 82 83 results = {"CMeS": [], "HCMS": []} 84 85 headers=[ 86 "Element", "Real Frequency", "Real Percentage", 87 "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 88 "Percentage Error" 89 ] 90 91 for k, m in zip(k_values, m_values): 92 for algorithm, client in zip(["CMeS", "HCMS"], [run_private_cms_client, run_private_hcms_client]): 93 94 print(f"\nRunning {Fore.GREEN}{algorithm}{Style.RESET_ALL} with k: {k}, m: {m} and Ξ΅: {e}") 95 if algorithm == "HCMS": 96 if math.log2(m).is_integer() == False: 97 m = 2 ** math.ceil(math.log2(m)) 98 print(f"{Fore.RED}Adjusting m to a power of 2 β m = {m}{Style.RESET_ALL}") 99 100 _, data_table, _, _,_ = client(k, m, e, self.df) 101 102 data_dicts = [dict(zip(headers, row)) for row in data_table] 103 104 for data_dict in data_dicts: 105 results[algorithm].append([ 106 k, m, 107 data_dict.get("Element", ""), 108 data_dict.get("Real Frequency", ""), 109 data_dict.get("Real Percentage", ""), 110 data_dict.get("Estimated Frequency", ""), 111 data_dict.get("Estimated Percentage", ""), 112 data_dict.get("Estimation Difference", ""), 113 data_dict.get("Percentage Error", ""), 114 ]) 115 116 117 for algo, table in results.items(): 118 print(f"\n πResults for {Fore.CYAN}{algo}{Style.RESET_ALL}") 119 print(tabulate(table, headers=["k", "m"] + headers, tablefmt="fancy_grid"))
Step 4: Execute privacy-preserving algorithms (CMeS and HCMS).
121 def select_algorithm(self): 122 """Step 5: Choose an algorithm and specify k and m values.""" 123 print(f"\nπ Selecting an parameters and algorithm ...") 124 self.k = int(input("β Enter the value of k: ")) 125 self.m = int(input("β Enter the value of m: ")) 126 self.algorithm = input("β Enter the algorithm to execute:\n 1. Count-Mean Sketch\n 2. Hadamard Count-Mean Sketch\nSelect: ") 127 return self.algorithm
Step 5: Choose an algorithm and specify k and m values.
129 def execute_algorithms(self): 130 """Step 6: Perform parameter fitting and execute the selected server algorithm.""" 131 print("\nπ Executing personalized privacy ...") 132 e, result, privatized_data = run_parameter_fitting(self.df, self.k, self.m, self.algorithm) 133 134 135 print("\nβοΈ Running server ...") 136 if self.algorithm == '1': 137 run_private_cms_server(self.k, self.m, e, self.df, result, privatized_data) 138 elif self.algorithm == '2': 139 run_private_hcms_server(self.k, self.m, e, self.df, result, privatized_data) 140 141 print("\nProcess done and results saved.")
Step 6: Perform parameter fitting and execute the selected server algorithm.
143def main(step=1): 144 """Main function to run the step-by-step execution of the method.""" 145 experiment = IndividualMethod() 146 while True: 147 if step == 1: 148 # Step 1: Data preprocessing 149 experiment.preprocess_data() 150 step = 2 151 152 if step == 2: 153 #Step 2: Calculate k and m 154 experiment.calculate_k_m() 155 156 # Step 3: Execute no privacy algorithms 157 experiment.execute_no_privacy() 158 159 if input("Are you satisfied with the results? (yes/no): ") == 'yes': 160 step = 3 161 else: 162 step = 2 163 164 elif step == 3: 165 # Step 4: Execute private algorithms 166 experiment.execute_private_algorithms() 167 168 # Step 5: Choose an algorithm, k and m 169 experiment.select_algorithm() 170 if input("Are you satisfied with the results? (yes/no): ") == 'yes': 171 step = 4 172 else: 173 step = 2 174 175 elif step == 4: 176 # Step 6: Parameter fitting and execute server 177 experiment.execute_algorithms() 178 break
Main function to run the step-by-step execution of the method.