src.main.individual_method
1import math 2from tabulate import tabulate 3from colorama import Fore, Style 4 5# Importing CMeS functions 6from count_mean.private_cms_server import run_private_cms_server 7from count_mean.private_cms_client import run_private_cms_client 8from count_mean.cms_client_mean import run_cms_client_mean 9 10# Importing data preprocessing functions 11from scripts.preprocess import run_data_processor 12from scripts.parameter_fitting import run_parameter_fitting 13 14# Importing HCMS functions 15from hadamard_count_mean.private_hcms_client import run_private_hcms_client 16from hadamard_count_mean.private_hcms_server import run_private_hcms_server 17 18 19class IndividualMethod: 20 """ 21 This class represents the execution of various algorithms for private frequency estimation. 22 It includes preprocessing data, computing parameters, and executing different privacy-preserving algorithms. 23 """ 24 def __init__(self, df=None, k=None, m=None, algorithm=None): 25 """ 26 Initializes the IndividualMethod instance. 27 28 :param df: The input dataset as a pandas DataFrame. 29 :param k: The number of hash functions for the sketching algorithm. 30 :param m: The number of bins in the sketching algorithm. 31 :param algorithm: The selected algorithm for execution. 32 """ 33 self.df = df 34 self.k = k 35 self.m = m 36 self.algorithm = algorithm 37 38 def preprocess_data(self): 39 """Step 1: Data preprocessing by loading and filtering the dataset.""" 40 self.df = run_data_processor() 41 42 def calculate_k_m(self): 43 """ 44 Step 2: Calculate k and m values based on user input for failure probability and overestimation factor. 45 46 :return: The computed values of k and m. 47 """ 48 print("\nπ Calculating k and m ... ") 49 f = float(input("β Enter the failure probability Ξ΄: ")) 50 E = float(input("β Enter the overestimation factor Ξ΅: ")) 51 52 self.k = int(1 / f) 53 self.m = int(2.71828 / E ) 54 55 print(f"{Fore.GREEN}Calculated k = {self.k} and m = {self.m}{Style.RESET_ALL}") 56 print(f"{Fore.GREEN}Space complexity: {self.k*self.m}{Style.RESET_ALL}") 57 return self.k, self.m 58 59 def execute_no_privacy(self): 60 """Step 3: Execute Count-Mean Sketch (CMeS) without privacy protection.""" 61 headers=[ 62 "Element", "Real Frequency", "Real Percentage", 63 "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 64 "Percentage Error" 65 ] 66 67 print("\nπ Calculing CMeS without privacy") 68 data_table = run_cms_client_mean(self.k, self.m, self.df) 69 print(tabulate(data_table, headers=headers, tablefmt="fancy_grid")) 70 71 def execute_private_algorithms(self, e=150): 72 """Step 4: Execute privacy-preserving algorithms (CMeS and HCMS).""" 73 print("\nπ Searching parameters k and m ...") 74 k_values = [self.k, 16, 128, 1024, 32768] 75 m_values = [self.m, 16, 1024, 256, 256] 76 77 results = {"PCMeS": [], "PHCMS": []} 78 79 headers=[ 80 "Element", "Real Frequency", "Real Percentage", 81 "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 82 "Percentage Error" 83 ] 84 85 for k, m in zip(k_values, m_values): 86 for algorithm, client in zip(["PCMeS", "PHCMS"], [run_private_cms_client, run_private_hcms_client]): 87 88 print(f"\nRunning {Fore.GREEN}{algorithm}{Style.RESET_ALL} with k: {k}, m: {m} and Ο΅: {e}") 89 if algorithm == "PHCMS": 90 if math.log2(m).is_integer() == False: 91 m = 2 ** math.ceil(math.log2(m)) 92 print(f"{Fore.RED}Adjusting m to a power of 2 β m = {m}{Style.RESET_ALL}") 93 94 _, data_table, _, _,_ = client(k, m, e, self.df) 95 96 data_dicts = [dict(zip(headers, row)) for row in data_table] 97 98 for data_dict in data_dicts: 99 results[algorithm].append([ 100 k, m, 101 data_dict.get("Element", ""), 102 data_dict.get("Real Frequency", ""), 103 data_dict.get("Real Percentage", ""), 104 data_dict.get("Estimated Frequency", ""), 105 data_dict.get("Estimated Percentage", ""), 106 data_dict.get("Estimation Difference", ""), 107 data_dict.get("Percentage Error", ""), 108 ]) 109 110 111 for algo, table in results.items(): 112 print(f"\n πResults for {Fore.CYAN}{algo}{Style.RESET_ALL}") 113 print(tabulate(table, headers=["k", "m"] + headers, tablefmt="fancy_grid")) 114 115 def select_algorithm(self): 116 """Step 5: Choose an algorithm and specify k and m values.""" 117 print(f"\nπ Selecting an parameters and algorithm ...") 118 self.k = int(input("β Enter the value of k: ")) 119 self.m = int(input("β Enter the value of m: ")) 120 self.algorithm = input("β Enter the algorithm to execute:\n 1. Count-Mean Sketch\n 2. Hadamard Count-Mean Sketch\nSelect: ") 121 return self.algorithm 122 123 def execute_algorithms(self): 124 """Step 6: Perform parameter fitting and execute the selected server algorithm.""" 125 print("\nπ Executing personalized privacy ...") 126 e, result, privatized_data = run_parameter_fitting(self.df, self.k, self.m, self.algorithm) 127 128 129 print("\nβοΈ Running server ...") 130 if self.algorithm == '1': 131 run_private_cms_server(self.k, self.m, e, self.df, result, privatized_data) 132 elif self.algorithm == '2': 133 run_private_hcms_server(self.k, self.m, e, self.df, result, privatized_data) 134 135 print("\nProcess done and results saved.") 136 137def main(step=1): 138 """Main function to run the step-by-step execution of the method.""" 139 experiment = IndividualMethod() 140 while True: 141 if step == 1: 142 # Step 1: Data preprocessing 143 experiment.preprocess_data() 144 step = 2 145 146 if step == 2: 147 #Step 2: Calculate k and m 148 experiment.calculate_k_m() 149 150 # Step 3: Execute no privacy algorithms 151 experiment.execute_no_privacy() 152 153 if input("Are you satisfied with the results? (yes/no): ") == 'yes': 154 step = 3 155 else: 156 step = 2 157 158 elif step == 3: 159 # Step 4: Execute private algorithms 160 experiment.execute_private_algorithms() 161 162 if input("\nDo you want to change Ο΅ value? (yes/no): ") == 'yes': 163 e1 = float(input("β Enter the new value of Ο΅: ")) 164 experiment.execute_private_algorithms(e1) 165 166 # Step 5: Choose an algorithm, k and m 167 experiment.select_algorithm() 168 if input("Are you satisfied with the results? (yes/no): ") == 'yes': 169 step = 4 170 else: 171 step = 2 172 173 elif step == 4: 174 # Step 6: Parameter fitting and execute server 175 experiment.execute_algorithms() 176 break 177 178 179if __name__ == "__main__": 180 main()
20class IndividualMethod: 21 """ 22 This class represents the execution of various algorithms for private frequency estimation. 23 It includes preprocessing data, computing parameters, and executing different privacy-preserving algorithms. 24 """ 25 def __init__(self, df=None, k=None, m=None, algorithm=None): 26 """ 27 Initializes the IndividualMethod instance. 28 29 :param df: The input dataset as a pandas DataFrame. 30 :param k: The number of hash functions for the sketching algorithm. 31 :param m: The number of bins in the sketching algorithm. 32 :param algorithm: The selected algorithm for execution. 33 """ 34 self.df = df 35 self.k = k 36 self.m = m 37 self.algorithm = algorithm 38 39 def preprocess_data(self): 40 """Step 1: Data preprocessing by loading and filtering the dataset.""" 41 self.df = run_data_processor() 42 43 def calculate_k_m(self): 44 """ 45 Step 2: Calculate k and m values based on user input for failure probability and overestimation factor. 46 47 :return: The computed values of k and m. 48 """ 49 print("\nπ Calculating k and m ... ") 50 f = float(input("β Enter the failure probability Ξ΄: ")) 51 E = float(input("β Enter the overestimation factor Ξ΅: ")) 52 53 self.k = int(1 / f) 54 self.m = int(2.71828 / E ) 55 56 print(f"{Fore.GREEN}Calculated k = {self.k} and m = {self.m}{Style.RESET_ALL}") 57 print(f"{Fore.GREEN}Space complexity: {self.k*self.m}{Style.RESET_ALL}") 58 return self.k, self.m 59 60 def execute_no_privacy(self): 61 """Step 3: Execute Count-Mean Sketch (CMeS) without privacy protection.""" 62 headers=[ 63 "Element", "Real Frequency", "Real Percentage", 64 "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 65 "Percentage Error" 66 ] 67 68 print("\nπ Calculing CMeS without privacy") 69 data_table = run_cms_client_mean(self.k, self.m, self.df) 70 print(tabulate(data_table, headers=headers, tablefmt="fancy_grid")) 71 72 def execute_private_algorithms(self, e=150): 73 """Step 4: Execute privacy-preserving algorithms (CMeS and HCMS).""" 74 print("\nπ Searching parameters k and m ...") 75 k_values = [self.k, 16, 128, 1024, 32768] 76 m_values = [self.m, 16, 1024, 256, 256] 77 78 results = {"PCMeS": [], "PHCMS": []} 79 80 headers=[ 81 "Element", "Real Frequency", "Real Percentage", 82 "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 83 "Percentage Error" 84 ] 85 86 for k, m in zip(k_values, m_values): 87 for algorithm, client in zip(["PCMeS", "PHCMS"], [run_private_cms_client, run_private_hcms_client]): 88 89 print(f"\nRunning {Fore.GREEN}{algorithm}{Style.RESET_ALL} with k: {k}, m: {m} and Ο΅: {e}") 90 if algorithm == "PHCMS": 91 if math.log2(m).is_integer() == False: 92 m = 2 ** math.ceil(math.log2(m)) 93 print(f"{Fore.RED}Adjusting m to a power of 2 β m = {m}{Style.RESET_ALL}") 94 95 _, data_table, _, _,_ = client(k, m, e, self.df) 96 97 data_dicts = [dict(zip(headers, row)) for row in data_table] 98 99 for data_dict in data_dicts: 100 results[algorithm].append([ 101 k, m, 102 data_dict.get("Element", ""), 103 data_dict.get("Real Frequency", ""), 104 data_dict.get("Real Percentage", ""), 105 data_dict.get("Estimated Frequency", ""), 106 data_dict.get("Estimated Percentage", ""), 107 data_dict.get("Estimation Difference", ""), 108 data_dict.get("Percentage Error", ""), 109 ]) 110 111 112 for algo, table in results.items(): 113 print(f"\n πResults for {Fore.CYAN}{algo}{Style.RESET_ALL}") 114 print(tabulate(table, headers=["k", "m"] + headers, tablefmt="fancy_grid")) 115 116 def select_algorithm(self): 117 """Step 5: Choose an algorithm and specify k and m values.""" 118 print(f"\nπ Selecting an parameters and algorithm ...") 119 self.k = int(input("β Enter the value of k: ")) 120 self.m = int(input("β Enter the value of m: ")) 121 self.algorithm = input("β Enter the algorithm to execute:\n 1. Count-Mean Sketch\n 2. Hadamard Count-Mean Sketch\nSelect: ") 122 return self.algorithm 123 124 def execute_algorithms(self): 125 """Step 6: Perform parameter fitting and execute the selected server algorithm.""" 126 print("\nπ Executing personalized privacy ...") 127 e, result, privatized_data = run_parameter_fitting(self.df, self.k, self.m, self.algorithm) 128 129 130 print("\nβοΈ Running server ...") 131 if self.algorithm == '1': 132 run_private_cms_server(self.k, self.m, e, self.df, result, privatized_data) 133 elif self.algorithm == '2': 134 run_private_hcms_server(self.k, self.m, e, self.df, result, privatized_data) 135 136 print("\nProcess done and results saved.")
This class represents the execution of various algorithms for private frequency estimation. It includes preprocessing data, computing parameters, and executing different privacy-preserving algorithms.
25 def __init__(self, df=None, k=None, m=None, algorithm=None): 26 """ 27 Initializes the IndividualMethod instance. 28 29 :param df: The input dataset as a pandas DataFrame. 30 :param k: The number of hash functions for the sketching algorithm. 31 :param m: The number of bins in the sketching algorithm. 32 :param algorithm: The selected algorithm for execution. 33 """ 34 self.df = df 35 self.k = k 36 self.m = m 37 self.algorithm = algorithm
Initializes the IndividualMethod instance.
Parameters
- df: The input dataset as a pandas DataFrame.
- k: The number of hash functions for the sketching algorithm.
- m: The number of bins in the sketching algorithm.
- algorithm: The selected algorithm for execution.
39 def preprocess_data(self): 40 """Step 1: Data preprocessing by loading and filtering the dataset.""" 41 self.df = run_data_processor()
Step 1: Data preprocessing by loading and filtering the dataset.
43 def calculate_k_m(self): 44 """ 45 Step 2: Calculate k and m values based on user input for failure probability and overestimation factor. 46 47 :return: The computed values of k and m. 48 """ 49 print("\nπ Calculating k and m ... ") 50 f = float(input("β Enter the failure probability Ξ΄: ")) 51 E = float(input("β Enter the overestimation factor Ξ΅: ")) 52 53 self.k = int(1 / f) 54 self.m = int(2.71828 / E ) 55 56 print(f"{Fore.GREEN}Calculated k = {self.k} and m = {self.m}{Style.RESET_ALL}") 57 print(f"{Fore.GREEN}Space complexity: {self.k*self.m}{Style.RESET_ALL}") 58 return self.k, self.m
Step 2: Calculate k and m values based on user input for failure probability and overestimation factor.
Returns
The computed values of k and m.
60 def execute_no_privacy(self): 61 """Step 3: Execute Count-Mean Sketch (CMeS) without privacy protection.""" 62 headers=[ 63 "Element", "Real Frequency", "Real Percentage", 64 "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 65 "Percentage Error" 66 ] 67 68 print("\nπ Calculing CMeS without privacy") 69 data_table = run_cms_client_mean(self.k, self.m, self.df) 70 print(tabulate(data_table, headers=headers, tablefmt="fancy_grid"))
Step 3: Execute Count-Mean Sketch (CMeS) without privacy protection.
72 def execute_private_algorithms(self, e=150): 73 """Step 4: Execute privacy-preserving algorithms (CMeS and HCMS).""" 74 print("\nπ Searching parameters k and m ...") 75 k_values = [self.k, 16, 128, 1024, 32768] 76 m_values = [self.m, 16, 1024, 256, 256] 77 78 results = {"PCMeS": [], "PHCMS": []} 79 80 headers=[ 81 "Element", "Real Frequency", "Real Percentage", 82 "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 83 "Percentage Error" 84 ] 85 86 for k, m in zip(k_values, m_values): 87 for algorithm, client in zip(["PCMeS", "PHCMS"], [run_private_cms_client, run_private_hcms_client]): 88 89 print(f"\nRunning {Fore.GREEN}{algorithm}{Style.RESET_ALL} with k: {k}, m: {m} and Ο΅: {e}") 90 if algorithm == "PHCMS": 91 if math.log2(m).is_integer() == False: 92 m = 2 ** math.ceil(math.log2(m)) 93 print(f"{Fore.RED}Adjusting m to a power of 2 β m = {m}{Style.RESET_ALL}") 94 95 _, data_table, _, _,_ = client(k, m, e, self.df) 96 97 data_dicts = [dict(zip(headers, row)) for row in data_table] 98 99 for data_dict in data_dicts: 100 results[algorithm].append([ 101 k, m, 102 data_dict.get("Element", ""), 103 data_dict.get("Real Frequency", ""), 104 data_dict.get("Real Percentage", ""), 105 data_dict.get("Estimated Frequency", ""), 106 data_dict.get("Estimated Percentage", ""), 107 data_dict.get("Estimation Difference", ""), 108 data_dict.get("Percentage Error", ""), 109 ]) 110 111 112 for algo, table in results.items(): 113 print(f"\n πResults for {Fore.CYAN}{algo}{Style.RESET_ALL}") 114 print(tabulate(table, headers=["k", "m"] + headers, tablefmt="fancy_grid"))
Step 4: Execute privacy-preserving algorithms (CMeS and HCMS).
116 def select_algorithm(self): 117 """Step 5: Choose an algorithm and specify k and m values.""" 118 print(f"\nπ Selecting an parameters and algorithm ...") 119 self.k = int(input("β Enter the value of k: ")) 120 self.m = int(input("β Enter the value of m: ")) 121 self.algorithm = input("β Enter the algorithm to execute:\n 1. Count-Mean Sketch\n 2. Hadamard Count-Mean Sketch\nSelect: ") 122 return self.algorithm
Step 5: Choose an algorithm and specify k and m values.
124 def execute_algorithms(self): 125 """Step 6: Perform parameter fitting and execute the selected server algorithm.""" 126 print("\nπ Executing personalized privacy ...") 127 e, result, privatized_data = run_parameter_fitting(self.df, self.k, self.m, self.algorithm) 128 129 130 print("\nβοΈ Running server ...") 131 if self.algorithm == '1': 132 run_private_cms_server(self.k, self.m, e, self.df, result, privatized_data) 133 elif self.algorithm == '2': 134 run_private_hcms_server(self.k, self.m, e, self.df, result, privatized_data) 135 136 print("\nProcess done and results saved.")
Step 6: Perform parameter fitting and execute the selected server algorithm.
138def main(step=1): 139 """Main function to run the step-by-step execution of the method.""" 140 experiment = IndividualMethod() 141 while True: 142 if step == 1: 143 # Step 1: Data preprocessing 144 experiment.preprocess_data() 145 step = 2 146 147 if step == 2: 148 #Step 2: Calculate k and m 149 experiment.calculate_k_m() 150 151 # Step 3: Execute no privacy algorithms 152 experiment.execute_no_privacy() 153 154 if input("Are you satisfied with the results? (yes/no): ") == 'yes': 155 step = 3 156 else: 157 step = 2 158 159 elif step == 3: 160 # Step 4: Execute private algorithms 161 experiment.execute_private_algorithms() 162 163 if input("\nDo you want to change Ο΅ value? (yes/no): ") == 'yes': 164 e1 = float(input("β Enter the new value of Ο΅: ")) 165 experiment.execute_private_algorithms(e1) 166 167 # Step 5: Choose an algorithm, k and m 168 experiment.select_algorithm() 169 if input("Are you satisfied with the results? (yes/no): ") == 'yes': 170 step = 4 171 else: 172 step = 2 173 174 elif step == 4: 175 # Step 6: Parameter fitting and execute server 176 experiment.execute_algorithms() 177 break
Main function to run the step-by-step execution of the method.