src.test.test_doubleDB
1import os 2import sys 3import pandas as pd 4 5sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 6 7from private_count_mean.private_cms_client import run_private_cms_client 8from scripts.preprocess import run_data_processor 9 10def doubleDB(file_name): 11 """ 12 Doubles the size of a given dataset by concatenating it with itself. 13 14 Args: 15 file_name (str): The name of the dataset file (without the extension). 16 17 Returns: 18 str: The name of the new dataset file that has been doubled. 19 20 Saves the new dataset as a CSV file with '_doubled' added to the original file name. 21 """ 22 # Load the dataset 23 excel_file = os.path.join(os.path.join('..', '..', 'data', 'raw'), file_name) 24 df = pd.read_excel(excel_file) 25 26 df_doubled = pd.concat([df, df], ignore_index=True) 27 28 return df_doubled 29 30def run_double_test(): 31 """ 32 Runs a test by evaluating the original and doubled versions of a dataset 33 using the Private Count Mean Sketch (PrivateCMS). 34 35 The test runs the PrivateCMS client on both the original and doubled datasets, 36 and displays the error table after each run. 37 38 The dataset used in this test is 'dataOviedo'. 39 """ 40 excel_file = os.path.join(os.path.join('..', '..', 'data', 'raw'), 'dataOviedo.xlsx') 41 df = pd.read_excel(excel_file) 42 43 print(" ========= Original DB ===========") 44 run_private_cms_client(1024, 256, 50, df) 45 46 # Transform the raw dataset 47 df = doubleDB('dataOviedo.xlsx') 48 49 print(" ========= Double DB ===========") 50 run_private_cms_client(1024, 256, 50, df) 51 52if __name__ == '__main__': 53 run_double_test()
def
doubleDB(file_name):
13def doubleDB(file_name): 14 """ 15 Doubles the size of a given dataset by concatenating it with itself. 16 17 Args: 18 file_name (str): The name of the dataset file (without the extension). 19 20 Returns: 21 str: The name of the new dataset file that has been doubled. 22 23 Saves the new dataset as a CSV file with '_doubled' added to the original file name. 24 """ 25 # Load the dataset 26 excel_file = os.path.join(os.path.join('..', '..', 'data', 'raw'), file_name) 27 df = pd.read_excel(excel_file) 28 29 df_doubled = pd.concat([df, df], ignore_index=True) 30 31 return df_doubled
Doubles the size of a given dataset by concatenating it with itself.
Args: file_name (str): The name of the dataset file (without the extension).
Returns: str: The name of the new dataset file that has been doubled.
Saves the new dataset as a CSV file with '_doubled' added to the original file name.
def
run_double_test():
33def run_double_test(): 34 """ 35 Runs a test by evaluating the original and doubled versions of a dataset 36 using the Private Count Mean Sketch (PrivateCMS). 37 38 The test runs the PrivateCMS client on both the original and doubled datasets, 39 and displays the error table after each run. 40 41 The dataset used in this test is 'dataOviedo'. 42 """ 43 excel_file = os.path.join(os.path.join('..', '..', 'data', 'raw'), 'dataOviedo.xlsx') 44 df = pd.read_excel(excel_file) 45 46 print(" ========= Original DB ===========") 47 run_private_cms_client(1024, 256, 50, df) 48 49 # Transform the raw dataset 50 df = doubleDB('dataOviedo.xlsx') 51 52 print(" ========= Double DB ===========") 53 run_private_cms_client(1024, 256, 50, df)
Runs a test by evaluating the original and doubled versions of a dataset using the Private Count Mean Sketch (PrivateCMS).
The test runs the PrivateCMS client on both the original and doubled datasets, and displays the error table after each run.
The dataset used in this test is 'dataOviedo'.