src.private_count_min.private_cmins_server
1import os 2import importlib.util 3import numpy as np 4import pandas as pd 5import random 6import argparse 7from sympy import primerange 8from progress.bar import Bar 9import pickle 10 11from utils.utils import load_dataset, display_results 12 13class privateCMinSServer: 14 def __init__(self, epsilon, k, m, dataset, domain, H): 15 self.epsilon = epsilon 16 self.k = k 17 self.m = m 18 self.dataset = dataset 19 self.domain = domain 20 self.N = len(dataset) 21 self.H = H 22 23 # Creation of the sketch matrix 24 self.M = np.zeros((self.k, self.m)) 25 26 def update_sketch_matrix(self,v,j): 27 c_e = (np.exp(self.epsilon/2)+1) / ((np.exp(self.epsilon/2))-1) 28 x = self.k * ((c_e/2) * v + (1/2) * np.ones_like(v)) 29 for i in range (self.m): 30 self.M[j,i] += x[i] 31 32 def execute_server(self,privatized_data): 33 bar = Bar('Update sketch matrix', max=len(privatized_data), suffix='%(percent)d%%') 34 35 for data in privatized_data: 36 self.update_sketch_matrix(data[0],data[1]) 37 bar.next() 38 bar.finish() 39 40 F_estimated = {} 41 for x in self.domain: 42 F_estimated[x] = self.estimate_server(x) 43 bar.next() 44 bar.finish() 45 return F_estimated 46 47 def estimate_server(self,d): 48 v_minimum = [] 49 for i in range(self.k): 50 selected_hash = self.H[i] 51 v_minimum.append(self.M[i, selected_hash(d)]) 52 53 minimum = min(v_minimum) 54 f_estimated = (self.m / (self.m-1)) * (minimum - (self.N/self.m)) 55 return f_estimated 56 57 def query_server(self, query_element): 58 if query_element not in self.domain: 59 return "Element not in the domain" 60 estimation = self.estimate_server(query_element) 61 return estimation 62 63 64def run_private_cmins_server(k, m, e, d, H): 65 dataset, df, domain = load_dataset(f"{d}_filtered") 66 67 #Initialize the server Count-Mean Sketch 68 server = privateCMSServer(e, k, m, dataset, domain, H) 69 70 # Obtain the privatized data 71 script_dir = os.path.dirname(os.path.abspath(__file__)) 72 output_dir = os.path.join(script_dir, "../../data/privatized") 73 74 fav_output_file = os.path.join(output_dir, f"{d}_private_fav.pkl") 75 default_output_file = os.path.join(output_dir, f"{d}_private.pkl") 76 77 output_file = fav_output_file if os.path.exists(fav_output_file) else default_output_file 78 with open(output_file, 'rb') as f: 79 privatized_data = pickle.load(f) 80 81 # Execute the server 82 f_estimated = server.execute_server(privatized_data) 83 84 # Show the results 85 os.system('cls' if os.name == 'nt' else 'clear>/dev/null') 86 display_results(df, f_estimated) 87 88 # Query the server 89 while True: 90 query = input("Enter an element to query the server or 'exit' to finish: ") 91 if query.lower() == 'exit': 92 break 93 estimation = server.query_server(query) 94 print(f"The estimated frequency of {query} is {estimation}")
class
privateCMinSServer:
15class privateCMinSServer: 16 def __init__(self, epsilon, k, m, dataset, domain, H): 17 self.epsilon = epsilon 18 self.k = k 19 self.m = m 20 self.dataset = dataset 21 self.domain = domain 22 self.N = len(dataset) 23 self.H = H 24 25 # Creation of the sketch matrix 26 self.M = np.zeros((self.k, self.m)) 27 28 def update_sketch_matrix(self,v,j): 29 c_e = (np.exp(self.epsilon/2)+1) / ((np.exp(self.epsilon/2))-1) 30 x = self.k * ((c_e/2) * v + (1/2) * np.ones_like(v)) 31 for i in range (self.m): 32 self.M[j,i] += x[i] 33 34 def execute_server(self,privatized_data): 35 bar = Bar('Update sketch matrix', max=len(privatized_data), suffix='%(percent)d%%') 36 37 for data in privatized_data: 38 self.update_sketch_matrix(data[0],data[1]) 39 bar.next() 40 bar.finish() 41 42 F_estimated = {} 43 for x in self.domain: 44 F_estimated[x] = self.estimate_server(x) 45 bar.next() 46 bar.finish() 47 return F_estimated 48 49 def estimate_server(self,d): 50 v_minimum = [] 51 for i in range(self.k): 52 selected_hash = self.H[i] 53 v_minimum.append(self.M[i, selected_hash(d)]) 54 55 minimum = min(v_minimum) 56 f_estimated = (self.m / (self.m-1)) * (minimum - (self.N/self.m)) 57 return f_estimated 58 59 def query_server(self, query_element): 60 if query_element not in self.domain: 61 return "Element not in the domain" 62 estimation = self.estimate_server(query_element) 63 return estimation
def
execute_server(self, privatized_data):
34 def execute_server(self,privatized_data): 35 bar = Bar('Update sketch matrix', max=len(privatized_data), suffix='%(percent)d%%') 36 37 for data in privatized_data: 38 self.update_sketch_matrix(data[0],data[1]) 39 bar.next() 40 bar.finish() 41 42 F_estimated = {} 43 for x in self.domain: 44 F_estimated[x] = self.estimate_server(x) 45 bar.next() 46 bar.finish() 47 return F_estimated
def
run_private_cmins_server(k, m, e, d, H):
66def run_private_cmins_server(k, m, e, d, H): 67 dataset, df, domain = load_dataset(f"{d}_filtered") 68 69 #Initialize the server Count-Mean Sketch 70 server = privateCMSServer(e, k, m, dataset, domain, H) 71 72 # Obtain the privatized data 73 script_dir = os.path.dirname(os.path.abspath(__file__)) 74 output_dir = os.path.join(script_dir, "../../data/privatized") 75 76 fav_output_file = os.path.join(output_dir, f"{d}_private_fav.pkl") 77 default_output_file = os.path.join(output_dir, f"{d}_private.pkl") 78 79 output_file = fav_output_file if os.path.exists(fav_output_file) else default_output_file 80 with open(output_file, 'rb') as f: 81 privatized_data = pickle.load(f) 82 83 # Execute the server 84 f_estimated = server.execute_server(privatized_data) 85 86 # Show the results 87 os.system('cls' if os.name == 'nt' else 'clear>/dev/null') 88 display_results(df, f_estimated) 89 90 # Query the server 91 while True: 92 query = input("Enter an element to query the server or 'exit' to finish: ") 93 if query.lower() == 'exit': 94 break 95 estimation = server.query_server(query) 96 print(f"The estimated frequency of {query} is {estimation}")