src.test.test_doubleDB

 1import os
 2import sys
 3import pandas as pd
 4
 5sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 6
 7from private_count_mean.private_cms_client import run_private_cms_client
 8from scripts.preprocess import run_data_processor
 9
10def doubleDB(file_name):
11    """
12    Doubles the size of a given dataset by concatenating it with itself.
13
14    Args:
15        file_name (str): The name of the dataset file (without the extension).
16
17    Returns:
18        str: The name of the new dataset file that has been doubled.
19    
20    Saves the new dataset as a CSV file with '_doubled' added to the original file name.
21    """
22    # Load the dataset
23    excel_file = os.path.join(os.path.join('..', '..', 'data', 'raw'), file_name) 
24    df = pd.read_excel(excel_file)
25
26    df_doubled = pd.concat([df, df], ignore_index=True)
27
28    return df_doubled
29
30def run_double_test():
31    """
32    Runs a test by evaluating the original and doubled versions of a dataset 
33    using the Private Count Mean Sketch (PrivateCMS).
34
35    The test runs the PrivateCMS client on both the original and doubled datasets, 
36    and displays the error table after each run.
37    
38    The dataset used in this test is 'dataOviedo'.
39    """
40    excel_file = os.path.join(os.path.join('..', '..', 'data', 'raw'), 'dataOviedo.xlsx') 
41    df = pd.read_excel(excel_file)
42
43    print(" ========= Original DB ===========")
44    run_private_cms_client(1024, 256, 50, df)
45
46    # Transform the raw dataset 
47    df = doubleDB('dataOviedo.xlsx')
48
49    print(" ========= Double DB ===========")
50    run_private_cms_client(1024, 256, 50, df)
51
52if __name__ == '__main__':
53    run_double_test()
def doubleDB(file_name):
13def doubleDB(file_name):
14    """
15    Doubles the size of a given dataset by concatenating it with itself.
16
17    Args:
18        file_name (str): The name of the dataset file (without the extension).
19
20    Returns:
21        str: The name of the new dataset file that has been doubled.
22    
23    Saves the new dataset as a CSV file with '_doubled' added to the original file name.
24    """
25    # Load the dataset
26    excel_file = os.path.join(os.path.join('..', '..', 'data', 'raw'), file_name) 
27    df = pd.read_excel(excel_file)
28
29    df_doubled = pd.concat([df, df], ignore_index=True)
30
31    return df_doubled

Doubles the size of a given dataset by concatenating it with itself.

Args: file_name (str): The name of the dataset file (without the extension).

Returns: str: The name of the new dataset file that has been doubled.

Saves the new dataset as a CSV file with '_doubled' added to the original file name.

def run_double_test():
33def run_double_test():
34    """
35    Runs a test by evaluating the original and doubled versions of a dataset 
36    using the Private Count Mean Sketch (PrivateCMS).
37
38    The test runs the PrivateCMS client on both the original and doubled datasets, 
39    and displays the error table after each run.
40    
41    The dataset used in this test is 'dataOviedo'.
42    """
43    excel_file = os.path.join(os.path.join('..', '..', 'data', 'raw'), 'dataOviedo.xlsx') 
44    df = pd.read_excel(excel_file)
45
46    print(" ========= Original DB ===========")
47    run_private_cms_client(1024, 256, 50, df)
48
49    # Transform the raw dataset 
50    df = doubleDB('dataOviedo.xlsx')
51
52    print(" ========= Double DB ===========")
53    run_private_cms_client(1024, 256, 50, df)

Runs a test by evaluating the original and doubled versions of a dataset using the Private Count Mean Sketch (PrivateCMS).

The test runs the PrivateCMS client on both the original and doubled datasets, and displays the error table after each run.

The dataset used in this test is 'dataOviedo'.