src.count_mean.private_cms_server

  1import numpy as np
  2import pandas as pd
  3import os
  4from rich.progress import Progress
  5
  6from utils.utils import display_results
  7
  8class privateCMSServer:
  9    """
 10    This class represents the server side of the Private Count-Mean Sketch (PCMS).
 11    It is responsible for updating the sketch matrix and providing frequency estimations.
 12
 13    Attributes:
 14        df (pandas.DataFrame): The dataset containing the values.
 15        epsilon (float): The privacy parameter epsilon.
 16        k (int): The number of hash functions.
 17        m (int): The size of the sketch.
 18        dataset (list): The list of values in the dataset.
 19        domain (list): The unique values in the dataset.
 20        N (int): The size of the dataset.
 21        H (list): The list of hash functions.
 22        M (numpy.ndarray): The sketch matrix.
 23    """
 24    def __init__(self, epsilon, k, m, df, H):
 25        """
 26        Initializes the privateCMSServer class with the given parameters.
 27
 28        Args:
 29            epsilon (float): The privacy parameter epsilon.
 30            k (int): The number of hash functions.
 31            m (int): The size of the sketch.
 32            df (pandas.DataFrame): The dataset containing the values.
 33            H (list): The list of hash functions.
 34        """
 35        self.df = df
 36        self.epsilon = epsilon
 37        self.k = k
 38        self.m = m
 39        self.dataset = self.df['value'].tolist()
 40        self.domain = self.df['value'].unique().tolist()
 41        self.N = len(self.dataset)
 42        self.H = H
 43
 44        # Creation of the sketch matrix
 45        self.M = np.zeros((self.k, self.m))
 46    
 47    def update_sketch_matrix(self,v,j):
 48        """
 49        Updates the sketch matrix based on the given privatized data.
 50
 51        Args:
 52            v (numpy.ndarray): The privatized vector.
 53            j (int): The index of the hash function used.
 54        """
 55        c_e = (np.exp(self.epsilon/2)+1) / ((np.exp(self.epsilon/2))-1)
 56        x = self.k * ((c_e/2) * v + (1/2) * np.ones_like(v))
 57        for i in range (self.m):
 58            self.M[j,i] += x[i]
 59
 60    def execute_server(self,privatized_data):
 61        """
 62        Executes the server-side operations, including updating the sketch matrix
 63        and estimating the frequencies.
 64
 65        Args:
 66            privatized_data (list): The privatized data from the client.
 67
 68        Returns:
 69            dict: A dictionary containing the estimated frequencies for each element.
 70        """
 71        with Progress() as progress:
 72            task = progress.add_task('[cyan]Update sketch matrix', total=len(privatized_data))
 73
 74            for data in privatized_data:
 75                self.update_sketch_matrix(data[0],data[1])
 76                progress.update(task, advance=1)
 77
 78            F_estimated = {}
 79            task = progress.add_task('[cyan]Obtaining histogram of estimated frequencies', total=len(self.domain))
 80            for x in self.domain:
 81                F_estimated[x] = self.estimate_server(x)
 82                progress.update(task, advance=1)
 83                
 84        return F_estimated
 85
 86    def estimate_server(self,d):
 87        """
 88        Estimates the frequency of an element based on the current sketch matrix.
 89
 90        Args:
 91            d (any): The element whose frequency is to be estimated.
 92
 93        Returns:
 94            float: The estimated frequency of the element.
 95        """
 96        sum_aux = 0
 97        for i in range(self.k):
 98            selected_hash = self.H[i]
 99            sum_aux += self.M[i, selected_hash(d)]
100        
101        f_estimated = (self.m/(self.m-1))*((sum_aux/self.k)-(self.N/self.m))
102        return f_estimated
103    
104    def query_server(self, query_element):
105        """
106        Queries the server for the estimated frequency of an element.
107
108        Args:
109            query_element (any): The element to query.
110
111        Returns:
112            float or str: The estimated frequency of the element, or a message if the element is not in the domain.
113        """
114        if query_element not in self.domain:
115            return "Element not in the domain"
116        estimation = self.estimate_server(query_element)
117        return estimation
118
119    
120def run_private_cms_server(k, m, e, df, H, privatized_data):
121    """
122    Runs the server-side operations for the Private Count-Mean Sketch, including
123    estimating frequencies and querying the server.
124
125    Args:
126        k (int): The number of hash functions.
127        m (int): The size of the sketch.
128        e (float): The privacy parameter epsilon.
129        df (pandas.DataFrame): The dataset containing the values.
130        H (list): The list of hash functions.
131        privatized_data (list): The privatized data from the client.
132    """
133    #Initialize the server Count-Mean Sketch
134    server = privateCMSServer(e, k, m, df, H)
135
136    # Save the privatized data
137    privatized_data_save = pd.DataFrame(privatized_data)
138    privatized_data_file = os.path.join(os.path.join('..', 'data', 'private'), 'privatized_data.csv')
139    privatized_data_save.to_csv(privatized_data_file, index=False)
140    
141    # Execute the server
142    f_estimated = server.execute_server(privatized_data)
143
144    # Show the results
145    display_results(df, f_estimated)
146
147    # Query the server
148    while True:
149        query = input("Enter an element to query the server or 'exit' to finish: ")
150        if query.lower() == 'exit':
151            break
152        estimation = server.query_server(query)
153        print(f"The estimated frequency of {query} is {estimation:.2f}")
class privateCMSServer:
 10class privateCMSServer:
 11    """
 12    This class represents the server side of the Private Count-Mean Sketch (PCMS).
 13    It is responsible for updating the sketch matrix and providing frequency estimations.
 14
 15    Attributes:
 16        df (pandas.DataFrame): The dataset containing the values.
 17        epsilon (float): The privacy parameter epsilon.
 18        k (int): The number of hash functions.
 19        m (int): The size of the sketch.
 20        dataset (list): The list of values in the dataset.
 21        domain (list): The unique values in the dataset.
 22        N (int): The size of the dataset.
 23        H (list): The list of hash functions.
 24        M (numpy.ndarray): The sketch matrix.
 25    """
 26    def __init__(self, epsilon, k, m, df, H):
 27        """
 28        Initializes the privateCMSServer class with the given parameters.
 29
 30        Args:
 31            epsilon (float): The privacy parameter epsilon.
 32            k (int): The number of hash functions.
 33            m (int): The size of the sketch.
 34            df (pandas.DataFrame): The dataset containing the values.
 35            H (list): The list of hash functions.
 36        """
 37        self.df = df
 38        self.epsilon = epsilon
 39        self.k = k
 40        self.m = m
 41        self.dataset = self.df['value'].tolist()
 42        self.domain = self.df['value'].unique().tolist()
 43        self.N = len(self.dataset)
 44        self.H = H
 45
 46        # Creation of the sketch matrix
 47        self.M = np.zeros((self.k, self.m))
 48    
 49    def update_sketch_matrix(self,v,j):
 50        """
 51        Updates the sketch matrix based on the given privatized data.
 52
 53        Args:
 54            v (numpy.ndarray): The privatized vector.
 55            j (int): The index of the hash function used.
 56        """
 57        c_e = (np.exp(self.epsilon/2)+1) / ((np.exp(self.epsilon/2))-1)
 58        x = self.k * ((c_e/2) * v + (1/2) * np.ones_like(v))
 59        for i in range (self.m):
 60            self.M[j,i] += x[i]
 61
 62    def execute_server(self,privatized_data):
 63        """
 64        Executes the server-side operations, including updating the sketch matrix
 65        and estimating the frequencies.
 66
 67        Args:
 68            privatized_data (list): The privatized data from the client.
 69
 70        Returns:
 71            dict: A dictionary containing the estimated frequencies for each element.
 72        """
 73        with Progress() as progress:
 74            task = progress.add_task('[cyan]Update sketch matrix', total=len(privatized_data))
 75
 76            for data in privatized_data:
 77                self.update_sketch_matrix(data[0],data[1])
 78                progress.update(task, advance=1)
 79
 80            F_estimated = {}
 81            task = progress.add_task('[cyan]Obtaining histogram of estimated frequencies', total=len(self.domain))
 82            for x in self.domain:
 83                F_estimated[x] = self.estimate_server(x)
 84                progress.update(task, advance=1)
 85                
 86        return F_estimated
 87
 88    def estimate_server(self,d):
 89        """
 90        Estimates the frequency of an element based on the current sketch matrix.
 91
 92        Args:
 93            d (any): The element whose frequency is to be estimated.
 94
 95        Returns:
 96            float: The estimated frequency of the element.
 97        """
 98        sum_aux = 0
 99        for i in range(self.k):
100            selected_hash = self.H[i]
101            sum_aux += self.M[i, selected_hash(d)]
102        
103        f_estimated = (self.m/(self.m-1))*((sum_aux/self.k)-(self.N/self.m))
104        return f_estimated
105    
106    def query_server(self, query_element):
107        """
108        Queries the server for the estimated frequency of an element.
109
110        Args:
111            query_element (any): The element to query.
112
113        Returns:
114            float or str: The estimated frequency of the element, or a message if the element is not in the domain.
115        """
116        if query_element not in self.domain:
117            return "Element not in the domain"
118        estimation = self.estimate_server(query_element)
119        return estimation

This class represents the server side of the Private Count-Mean Sketch (PCMS). It is responsible for updating the sketch matrix and providing frequency estimations.

Attributes: df (pandas.DataFrame): The dataset containing the values. epsilon (float): The privacy parameter epsilon. k (int): The number of hash functions. m (int): The size of the sketch. dataset (list): The list of values in the dataset. domain (list): The unique values in the dataset. N (int): The size of the dataset. H (list): The list of hash functions. M (numpy.ndarray): The sketch matrix.

privateCMSServer(epsilon, k, m, df, H)
26    def __init__(self, epsilon, k, m, df, H):
27        """
28        Initializes the privateCMSServer class with the given parameters.
29
30        Args:
31            epsilon (float): The privacy parameter epsilon.
32            k (int): The number of hash functions.
33            m (int): The size of the sketch.
34            df (pandas.DataFrame): The dataset containing the values.
35            H (list): The list of hash functions.
36        """
37        self.df = df
38        self.epsilon = epsilon
39        self.k = k
40        self.m = m
41        self.dataset = self.df['value'].tolist()
42        self.domain = self.df['value'].unique().tolist()
43        self.N = len(self.dataset)
44        self.H = H
45
46        # Creation of the sketch matrix
47        self.M = np.zeros((self.k, self.m))

Initializes the privateCMSServer class with the given parameters.

Args: epsilon (float): The privacy parameter epsilon. k (int): The number of hash functions. m (int): The size of the sketch. df (pandas.DataFrame): The dataset containing the values. H (list): The list of hash functions.

df
epsilon
k
m
dataset
domain
N
H
M
def update_sketch_matrix(self, v, j):
49    def update_sketch_matrix(self,v,j):
50        """
51        Updates the sketch matrix based on the given privatized data.
52
53        Args:
54            v (numpy.ndarray): The privatized vector.
55            j (int): The index of the hash function used.
56        """
57        c_e = (np.exp(self.epsilon/2)+1) / ((np.exp(self.epsilon/2))-1)
58        x = self.k * ((c_e/2) * v + (1/2) * np.ones_like(v))
59        for i in range (self.m):
60            self.M[j,i] += x[i]

Updates the sketch matrix based on the given privatized data.

Args: v (numpy.ndarray): The privatized vector. j (int): The index of the hash function used.

def execute_server(self, privatized_data):
62    def execute_server(self,privatized_data):
63        """
64        Executes the server-side operations, including updating the sketch matrix
65        and estimating the frequencies.
66
67        Args:
68            privatized_data (list): The privatized data from the client.
69
70        Returns:
71            dict: A dictionary containing the estimated frequencies for each element.
72        """
73        with Progress() as progress:
74            task = progress.add_task('[cyan]Update sketch matrix', total=len(privatized_data))
75
76            for data in privatized_data:
77                self.update_sketch_matrix(data[0],data[1])
78                progress.update(task, advance=1)
79
80            F_estimated = {}
81            task = progress.add_task('[cyan]Obtaining histogram of estimated frequencies', total=len(self.domain))
82            for x in self.domain:
83                F_estimated[x] = self.estimate_server(x)
84                progress.update(task, advance=1)
85                
86        return F_estimated

Executes the server-side operations, including updating the sketch matrix and estimating the frequencies.

Args: privatized_data (list): The privatized data from the client.

Returns: dict: A dictionary containing the estimated frequencies for each element.

def estimate_server(self, d):
 88    def estimate_server(self,d):
 89        """
 90        Estimates the frequency of an element based on the current sketch matrix.
 91
 92        Args:
 93            d (any): The element whose frequency is to be estimated.
 94
 95        Returns:
 96            float: The estimated frequency of the element.
 97        """
 98        sum_aux = 0
 99        for i in range(self.k):
100            selected_hash = self.H[i]
101            sum_aux += self.M[i, selected_hash(d)]
102        
103        f_estimated = (self.m/(self.m-1))*((sum_aux/self.k)-(self.N/self.m))
104        return f_estimated

Estimates the frequency of an element based on the current sketch matrix.

Args: d (any): The element whose frequency is to be estimated.

Returns: float: The estimated frequency of the element.

def query_server(self, query_element):
106    def query_server(self, query_element):
107        """
108        Queries the server for the estimated frequency of an element.
109
110        Args:
111            query_element (any): The element to query.
112
113        Returns:
114            float or str: The estimated frequency of the element, or a message if the element is not in the domain.
115        """
116        if query_element not in self.domain:
117            return "Element not in the domain"
118        estimation = self.estimate_server(query_element)
119        return estimation

Queries the server for the estimated frequency of an element.

Args: query_element (any): The element to query.

Returns: float or str: The estimated frequency of the element, or a message if the element is not in the domain.

def run_private_cms_server(k, m, e, df, H, privatized_data):
122def run_private_cms_server(k, m, e, df, H, privatized_data):
123    """
124    Runs the server-side operations for the Private Count-Mean Sketch, including
125    estimating frequencies and querying the server.
126
127    Args:
128        k (int): The number of hash functions.
129        m (int): The size of the sketch.
130        e (float): The privacy parameter epsilon.
131        df (pandas.DataFrame): The dataset containing the values.
132        H (list): The list of hash functions.
133        privatized_data (list): The privatized data from the client.
134    """
135    #Initialize the server Count-Mean Sketch
136    server = privateCMSServer(e, k, m, df, H)
137
138    # Save the privatized data
139    privatized_data_save = pd.DataFrame(privatized_data)
140    privatized_data_file = os.path.join(os.path.join('..', 'data', 'private'), 'privatized_data.csv')
141    privatized_data_save.to_csv(privatized_data_file, index=False)
142    
143    # Execute the server
144    f_estimated = server.execute_server(privatized_data)
145
146    # Show the results
147    display_results(df, f_estimated)
148
149    # Query the server
150    while True:
151        query = input("Enter an element to query the server or 'exit' to finish: ")
152        if query.lower() == 'exit':
153            break
154        estimation = server.query_server(query)
155        print(f"The estimated frequency of {query} is {estimation:.2f}")

Runs the server-side operations for the Private Count-Mean Sketch, including estimating frequencies and querying the server.

Args: k (int): The number of hash functions. m (int): The size of the sketch. e (float): The privacy parameter epsilon. df (pandas.DataFrame): The dataset containing the values. H (list): The list of hash functions. privatized_data (list): The privatized data from the client.