src.test.test_private_cms_client

 1import os
 2import sys
 3
 4sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
 5from private_count_mean.private_cms_client import run_private_cms_client
 6from tabulate import tabulate
 7
 8def test_algoritmos():
 9    """
10    Runs a test for various parameter combinations using the Private Count Mean Sketch algorithm.
11
12    This function tests the algorithm by passing different values of k (number of hash functions) and 
13    m (number of counters) to the `run_private_cms_client` function. The real and estimated frequencies
14    for different elements are compared, and the results are displayed in a tabulated format.
15    
16    The test data used is based on the 'dataOviedo' dataset, and the error tolerance (e) is set to 50.
17
18    The results include:
19        - Real frequency
20        - Real percentage
21        - Estimated frequency
22        - Estimated percentage
23        - Estimation difference
24        - Percentage error
25    
26    The results are printed in a tabular format using the `tabulate` library.
27    """
28    excel_file = os.path.join(os.path.join('..', '..', 'data', 'raw'), 'dataOviedo.xlsx') 
29    df = pd.read_excel(excel_file)
30    
31    e = 50
32    k = [16, 128, 128, 1024, 32768]
33    m = [16, 16, 1024, 256, 256]
34
35    general_table = []
36
37    headers=[
38        "Element", "Real Frequency", "Real Percentage", 
39        "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 
40        "Percentage Error"
41    ]
42
43    for i in range(len(k)):
44        _, data_table, _ = run_private_cms_client(k[i], m[i], e, df)
45
46        data_dicts = [dict(zip(headers, row)) for row in data_table]
47
48        for data_dict in data_dicts:
49            general_table.append([
50                k[i], m[i], 
51                data_dict.get("Element", ""),
52                data_dict.get("Real Frequency", ""),
53                data_dict.get("Real Percentage", ""),
54                data_dict.get("Estimated Frequency", ""),
55                data_dict.get("Estimated Percentage", ""),
56                data_dict.get("Estimation Difference", ""),
57                data_dict.get("Percentage Error", ""),
58            ])
59            
60
61    headers=[
62        "k", "m", "Element", "Real Frequency", "Real Percentage", 
63        "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 
64        "Percentage Error"
65    ]
66
67    print(tabulate(general_table, headers=headers, tablefmt="grid"))
68    
69if __name__ == '__main__':
70    test_algoritmos()
def test_algoritmos():
 9def test_algoritmos():
10    """
11    Runs a test for various parameter combinations using the Private Count Mean Sketch algorithm.
12
13    This function tests the algorithm by passing different values of k (number of hash functions) and 
14    m (number of counters) to the `run_private_cms_client` function. The real and estimated frequencies
15    for different elements are compared, and the results are displayed in a tabulated format.
16    
17    The test data used is based on the 'dataOviedo' dataset, and the error tolerance (e) is set to 50.
18
19    The results include:
20        - Real frequency
21        - Real percentage
22        - Estimated frequency
23        - Estimated percentage
24        - Estimation difference
25        - Percentage error
26    
27    The results are printed in a tabular format using the `tabulate` library.
28    """
29    excel_file = os.path.join(os.path.join('..', '..', 'data', 'raw'), 'dataOviedo.xlsx') 
30    df = pd.read_excel(excel_file)
31    
32    e = 50
33    k = [16, 128, 128, 1024, 32768]
34    m = [16, 16, 1024, 256, 256]
35
36    general_table = []
37
38    headers=[
39        "Element", "Real Frequency", "Real Percentage", 
40        "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 
41        "Percentage Error"
42    ]
43
44    for i in range(len(k)):
45        _, data_table, _ = run_private_cms_client(k[i], m[i], e, df)
46
47        data_dicts = [dict(zip(headers, row)) for row in data_table]
48
49        for data_dict in data_dicts:
50            general_table.append([
51                k[i], m[i], 
52                data_dict.get("Element", ""),
53                data_dict.get("Real Frequency", ""),
54                data_dict.get("Real Percentage", ""),
55                data_dict.get("Estimated Frequency", ""),
56                data_dict.get("Estimated Percentage", ""),
57                data_dict.get("Estimation Difference", ""),
58                data_dict.get("Percentage Error", ""),
59            ])
60            
61
62    headers=[
63        "k", "m", "Element", "Real Frequency", "Real Percentage", 
64        "Estimated Frequency", "Estimated Percentage", "Estimation Difference", 
65        "Percentage Error"
66    ]
67
68    print(tabulate(general_table, headers=headers, tablefmt="grid"))

Runs a test for various parameter combinations using the Private Count Mean Sketch algorithm.

This function tests the algorithm by passing different values of k (number of hash functions) and m (number of counters) to the run_private_cms_client function. The real and estimated frequencies for different elements are compared, and the results are displayed in a tabulated format.

The test data used is based on the 'dataOviedo' dataset, and the error tolerance (e) is set to 50.

The results include: - Real frequency - Real percentage - Estimated frequency - Estimated percentage - Estimation difference - Percentage error

The results are printed in a tabular format using the tabulate library.