src.hadamard_count_mean.private_hcms_server

  1import os
  2import numpy as np
  3from rich.progress import Progress
  4
  5from utils.utils import display_results
  6
  7class privateHCMSServer:
  8    """
  9    A private Hadamard Count-Min Sketch (HCMS) server implementation.
 10    """
 11    def __init__(self, epsilon, k, m, df, hashes):
 12        """
 13        Initializes the private HCMS server.
 14        
 15        :param epsilon: Privacy parameter
 16        :param k: Number of hash functions
 17        :param m: Number of columns in the sketch matrix
 18        :param df: Dataframe containing the dataset
 19        :param hashes: List of hash functions
 20        """
 21        self.epsilon = epsilon
 22        self.k = k
 23        self.m = m
 24        self.dataset = self.df['value'].tolist()
 25        self.domain = self.df['value'].unique().tolist()
 26        self.H = self.hadamard_matrix(self.m)
 27        self.N = len(self.dataset)
 28        self.hashes = hashes
 29
 30        # Creation of the sketch matrix
 31        self.M = np.zeros((self.k, self.m))
 32
 33    def update_sketch_matrix(self, w, j, l):
 34        """
 35        Updates the sketch matrix with a new data point.
 36        
 37        :param w: Weight of the data point
 38        :param j: Hash function index
 39        :param l: Hash value
 40        """
 41        c_e = (np.exp(self.epsilon/2)+1) / ((np.exp(self.epsilon/2))-1)
 42        x = self.k * c_e * w
 43        self.M[j,l] =  self.M[j,l] + x
 44
 45    def traspose_M(self):
 46        """
 47        Applies the Hadamard transformation to the sketch matrix.
 48        """
 49        self.M = self.M @ np.transpose(self.H)
 50
 51    def estimate_server(self,d):
 52        """
 53        Estimates the frequency of an element in the dataset.
 54        
 55        :param d: Element to estimate
 56        :return: Estimated frequency
 57        """
 58        return (self.m / (self.m-1)) * (1/self.k * np.sum([self.M[i,self.hashes[i](d)] for i in range(self.k)]) - self.N/self.m)
 59
 60    def execute_server(self, privatized_data):
 61        """
 62        Processes the privatized data and estimates frequencies.
 63        
 64        :param privatized_data: List of privatized data points
 65        :return: Dictionary of estimated frequencies
 66        """
 67        with Progress() as progress:
 68            task = progress.add_task('[cyan]Update sketch matrix', total=len(privatized_data))
 69            for data in privatized_data:
 70                self.update_sketch_matrix(data[0],data[1],data[2])
 71                progress.update(task, advance=1)
 72
 73            # Transpose the matrix
 74            self.traspose_M()
 75
 76            # Estimate the frequencies
 77            F_estimated = {}
 78            task = progress.add_task('[cyan]Obtaining histogram of estimated frequencies', total=len(self.domain))
 79            for x in self.domain:
 80                F_estimated[x] = self.estimate_server(x)
 81                progress.update(task, advance=1)
 82        return F_estimated
 83
 84    def query_server(self, query_element):
 85        """
 86        Queries the estimated frequency of an element.
 87        
 88        :param query_element: Element to query
 89        :return: Estimated frequency or a message if the element is not in the domain
 90        """
 91        if query_element not in self.domain:
 92            return "Element not in the domain"
 93        estimation = self.estimate_server(query_element)
 94        return estimation
 95
 96def run_private_hcms_server(k, m, e, df, hashes, privatized_data):
 97    """
 98    Runs the private HCMS server pipeline.
 99    
100    :param k: Number of hash functions
101    :param m: Number of columns in the sketch matrix
102    :param e: Privacy parameter
103    :param df: Dataframe containing the dataset
104    :param hashes: List of hash functions
105    :param privatized_data: List of privatized data points
106    """
107    # Initialize the server
108    server = privateHCMSServer(e, k, m, df, hashes)
109
110    # Save the privatized data
111    privatized_data_save = pd.DataFrame(privatized_data)
112    privatized_data_file = os.path.join(os.path.join('..', 'data', 'private'), 'privatized_data.csv')
113    privatized_data_save.to_csv(privatized_data_file, index=False)
114    
115    # Execute the server
116    f_estimated = server.execute_server(privatized_data)
117
118    # Show the results
119    display_results(df, f_estimated)
120
121    # Query the server
122    while True:
123        query = input("Enter an element to query the server or 'exit' to finish: ")
124        if query.lower() == 'exit':
125            break
126        estimation = server.query_server(query)
127        print(f"The estimated frequency of {query} is {estimation:.2f}")
128
129
130  
class privateHCMSServer:
 8class privateHCMSServer:
 9    """
10    A private Hadamard Count-Min Sketch (HCMS) server implementation.
11    """
12    def __init__(self, epsilon, k, m, df, hashes):
13        """
14        Initializes the private HCMS server.
15        
16        :param epsilon: Privacy parameter
17        :param k: Number of hash functions
18        :param m: Number of columns in the sketch matrix
19        :param df: Dataframe containing the dataset
20        :param hashes: List of hash functions
21        """
22        self.epsilon = epsilon
23        self.k = k
24        self.m = m
25        self.dataset = self.df['value'].tolist()
26        self.domain = self.df['value'].unique().tolist()
27        self.H = self.hadamard_matrix(self.m)
28        self.N = len(self.dataset)
29        self.hashes = hashes
30
31        # Creation of the sketch matrix
32        self.M = np.zeros((self.k, self.m))
33
34    def update_sketch_matrix(self, w, j, l):
35        """
36        Updates the sketch matrix with a new data point.
37        
38        :param w: Weight of the data point
39        :param j: Hash function index
40        :param l: Hash value
41        """
42        c_e = (np.exp(self.epsilon/2)+1) / ((np.exp(self.epsilon/2))-1)
43        x = self.k * c_e * w
44        self.M[j,l] =  self.M[j,l] + x
45
46    def traspose_M(self):
47        """
48        Applies the Hadamard transformation to the sketch matrix.
49        """
50        self.M = self.M @ np.transpose(self.H)
51
52    def estimate_server(self,d):
53        """
54        Estimates the frequency of an element in the dataset.
55        
56        :param d: Element to estimate
57        :return: Estimated frequency
58        """
59        return (self.m / (self.m-1)) * (1/self.k * np.sum([self.M[i,self.hashes[i](d)] for i in range(self.k)]) - self.N/self.m)
60
61    def execute_server(self, privatized_data):
62        """
63        Processes the privatized data and estimates frequencies.
64        
65        :param privatized_data: List of privatized data points
66        :return: Dictionary of estimated frequencies
67        """
68        with Progress() as progress:
69            task = progress.add_task('[cyan]Update sketch matrix', total=len(privatized_data))
70            for data in privatized_data:
71                self.update_sketch_matrix(data[0],data[1],data[2])
72                progress.update(task, advance=1)
73
74            # Transpose the matrix
75            self.traspose_M()
76
77            # Estimate the frequencies
78            F_estimated = {}
79            task = progress.add_task('[cyan]Obtaining histogram of estimated frequencies', total=len(self.domain))
80            for x in self.domain:
81                F_estimated[x] = self.estimate_server(x)
82                progress.update(task, advance=1)
83        return F_estimated
84
85    def query_server(self, query_element):
86        """
87        Queries the estimated frequency of an element.
88        
89        :param query_element: Element to query
90        :return: Estimated frequency or a message if the element is not in the domain
91        """
92        if query_element not in self.domain:
93            return "Element not in the domain"
94        estimation = self.estimate_server(query_element)
95        return estimation

A private Hadamard Count-Min Sketch (HCMS) server implementation.

privateHCMSServer(epsilon, k, m, df, hashes)
12    def __init__(self, epsilon, k, m, df, hashes):
13        """
14        Initializes the private HCMS server.
15        
16        :param epsilon: Privacy parameter
17        :param k: Number of hash functions
18        :param m: Number of columns in the sketch matrix
19        :param df: Dataframe containing the dataset
20        :param hashes: List of hash functions
21        """
22        self.epsilon = epsilon
23        self.k = k
24        self.m = m
25        self.dataset = self.df['value'].tolist()
26        self.domain = self.df['value'].unique().tolist()
27        self.H = self.hadamard_matrix(self.m)
28        self.N = len(self.dataset)
29        self.hashes = hashes
30
31        # Creation of the sketch matrix
32        self.M = np.zeros((self.k, self.m))

Initializes the private HCMS server.

Parameters
  • epsilon: Privacy parameter
  • k: Number of hash functions
  • m: Number of columns in the sketch matrix
  • df: Dataframe containing the dataset
  • hashes: List of hash functions
epsilon
k
m
dataset
domain
H
N
hashes
M
def update_sketch_matrix(self, w, j, l):
34    def update_sketch_matrix(self, w, j, l):
35        """
36        Updates the sketch matrix with a new data point.
37        
38        :param w: Weight of the data point
39        :param j: Hash function index
40        :param l: Hash value
41        """
42        c_e = (np.exp(self.epsilon/2)+1) / ((np.exp(self.epsilon/2))-1)
43        x = self.k * c_e * w
44        self.M[j,l] =  self.M[j,l] + x

Updates the sketch matrix with a new data point.

Parameters
  • w: Weight of the data point
  • j: Hash function index
  • l: Hash value
def traspose_M(self):
46    def traspose_M(self):
47        """
48        Applies the Hadamard transformation to the sketch matrix.
49        """
50        self.M = self.M @ np.transpose(self.H)

Applies the Hadamard transformation to the sketch matrix.

def estimate_server(self, d):
52    def estimate_server(self,d):
53        """
54        Estimates the frequency of an element in the dataset.
55        
56        :param d: Element to estimate
57        :return: Estimated frequency
58        """
59        return (self.m / (self.m-1)) * (1/self.k * np.sum([self.M[i,self.hashes[i](d)] for i in range(self.k)]) - self.N/self.m)

Estimates the frequency of an element in the dataset.

Parameters
  • d: Element to estimate
Returns

Estimated frequency

def execute_server(self, privatized_data):
61    def execute_server(self, privatized_data):
62        """
63        Processes the privatized data and estimates frequencies.
64        
65        :param privatized_data: List of privatized data points
66        :return: Dictionary of estimated frequencies
67        """
68        with Progress() as progress:
69            task = progress.add_task('[cyan]Update sketch matrix', total=len(privatized_data))
70            for data in privatized_data:
71                self.update_sketch_matrix(data[0],data[1],data[2])
72                progress.update(task, advance=1)
73
74            # Transpose the matrix
75            self.traspose_M()
76
77            # Estimate the frequencies
78            F_estimated = {}
79            task = progress.add_task('[cyan]Obtaining histogram of estimated frequencies', total=len(self.domain))
80            for x in self.domain:
81                F_estimated[x] = self.estimate_server(x)
82                progress.update(task, advance=1)
83        return F_estimated

Processes the privatized data and estimates frequencies.

Parameters
  • privatized_data: List of privatized data points
Returns

Dictionary of estimated frequencies

def query_server(self, query_element):
85    def query_server(self, query_element):
86        """
87        Queries the estimated frequency of an element.
88        
89        :param query_element: Element to query
90        :return: Estimated frequency or a message if the element is not in the domain
91        """
92        if query_element not in self.domain:
93            return "Element not in the domain"
94        estimation = self.estimate_server(query_element)
95        return estimation

Queries the estimated frequency of an element.

Parameters
  • query_element: Element to query
Returns

Estimated frequency or a message if the element is not in the domain

def run_private_hcms_server(k, m, e, df, hashes, privatized_data):
 97def run_private_hcms_server(k, m, e, df, hashes, privatized_data):
 98    """
 99    Runs the private HCMS server pipeline.
100    
101    :param k: Number of hash functions
102    :param m: Number of columns in the sketch matrix
103    :param e: Privacy parameter
104    :param df: Dataframe containing the dataset
105    :param hashes: List of hash functions
106    :param privatized_data: List of privatized data points
107    """
108    # Initialize the server
109    server = privateHCMSServer(e, k, m, df, hashes)
110
111    # Save the privatized data
112    privatized_data_save = pd.DataFrame(privatized_data)
113    privatized_data_file = os.path.join(os.path.join('..', 'data', 'private'), 'privatized_data.csv')
114    privatized_data_save.to_csv(privatized_data_file, index=False)
115    
116    # Execute the server
117    f_estimated = server.execute_server(privatized_data)
118
119    # Show the results
120    display_results(df, f_estimated)
121
122    # Query the server
123    while True:
124        query = input("Enter an element to query the server or 'exit' to finish: ")
125        if query.lower() == 'exit':
126            break
127        estimation = server.query_server(query)
128        print(f"The estimated frequency of {query} is {estimation:.2f}")

Runs the private HCMS server pipeline.

Parameters
  • k: Number of hash functions
  • m: Number of columns in the sketch matrix
  • e: Privacy parameter
  • df: Dataframe containing the dataset
  • hashes: List of hash functions
  • privatized_data: List of privatized data points