src.test.test_compare_methods

 1import os
 2import sys
 3from tabulate import tabulate
 4
 5sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
 6
 7from private_count_mean.private_cms_client import run_private_cms_client
 8from private_count_sketch.private_cs_client import run_private_cs_client
 9from private_hadamard_count_mean.private_hcms_client import run_private_hcms_client
10
11def run_distribution_test():
12    """
13    This script performs a distribution test using different private count-sketch methods.
14
15    The purpose of this test is to compare the accuracy of three different private estimation techniques:
16    1. **Private Count-Min Sketch (CMS)**
17    2. **Private Count Sketch (CS)**
18    3. **Private Hadamard Count-Min Sketch (HCMS)**
19
20    These methods are used to estimate frequency distributions while preserving privacy. The test runs each method with different parameters and prints the corresponding error tables.
21    - `k`: A list of values determining different sketch sizes.
22    - `m`: A list of values controlling the memory allocation for each method.
23    - `e`: Privacy parameter (presumably epsilon, controlling differential privacy strength).
24    - `filename`: The input dataset file (`dataOviedo`).
25
26    For each combination of `k` and `m`, the script runs the three private sketching methods and prints their respective error tables.
27    """
28    k = [16, 128, 128, 1024, 32768]
29    m = [16, 16, 1024, 256, 256]
30    e = 2
31
32    filename = f"dataOviedo"
33
34    for j in range(len(k)):
35        print(f"\n================== k: {k[j]}, m: {m[j]} ==================")
36        print(" \n========= CMS ==========")
37        _, error_table = run_private_cms_client(k[j], m[j], e, filename)
38        print(" \n========= CS ===========")
39        _, error_table = run_private_cs_client(k[j], m[j], e, filename)
40        print(" \n========= HCMS ===========")
41        _, error_table = run_private_hcms_client(k[j], m[j], e, filename)
42
43
44if __name__ == '__main__':
45    run_distribution_test()
def run_distribution_test():
13def run_distribution_test():
14    """
15    This script performs a distribution test using different private count-sketch methods.
16
17    The purpose of this test is to compare the accuracy of three different private estimation techniques:
18    1. **Private Count-Min Sketch (CMS)**
19    2. **Private Count Sketch (CS)**
20    3. **Private Hadamard Count-Min Sketch (HCMS)**
21
22    These methods are used to estimate frequency distributions while preserving privacy. The test runs each method with different parameters and prints the corresponding error tables.
23    - `k`: A list of values determining different sketch sizes.
24    - `m`: A list of values controlling the memory allocation for each method.
25    - `e`: Privacy parameter (presumably epsilon, controlling differential privacy strength).
26    - `filename`: The input dataset file (`dataOviedo`).
27
28    For each combination of `k` and `m`, the script runs the three private sketching methods and prints their respective error tables.
29    """
30    k = [16, 128, 128, 1024, 32768]
31    m = [16, 16, 1024, 256, 256]
32    e = 2
33
34    filename = f"dataOviedo"
35
36    for j in range(len(k)):
37        print(f"\n================== k: {k[j]}, m: {m[j]} ==================")
38        print(" \n========= CMS ==========")
39        _, error_table = run_private_cms_client(k[j], m[j], e, filename)
40        print(" \n========= CS ===========")
41        _, error_table = run_private_cs_client(k[j], m[j], e, filename)
42        print(" \n========= HCMS ===========")
43        _, error_table = run_private_hcms_client(k[j], m[j], e, filename)

This script performs a distribution test using different private count-sketch methods.

The purpose of this test is to compare the accuracy of three different private estimation techniques:

  1. Private Count-Min Sketch (CMS)
  2. Private Count Sketch (CS)
  3. Private Hadamard Count-Min Sketch (HCMS)

These methods are used to estimate frequency distributions while preserving privacy. The test runs each method with different parameters and prints the corresponding error tables.

  • k: A list of values determining different sketch sizes.
  • m: A list of values controlling the memory allocation for each method.
  • e: Privacy parameter (presumably epsilon, controlling differential privacy strength).
  • filename: The input dataset file (dataOviedo).

For each combination of k and m, the script runs the three private sketching methods and prints their respective error tables.