Coverage for tests\unit\maze_dataset\processing\test_collect_gen_metadata.py: 100%
17 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-23 12:49 -0700
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-23 12:49 -0700
1from zanj import ZANJ
3from maze_dataset import MazeDataset, MazeDatasetConfig
4from maze_dataset.dataset.maze_dataset import SERIALIZE_MINIMAL_THRESHOLD
7def test_remove_duplicates():
8 cfg: MazeDatasetConfig = MazeDatasetConfig(
9 name="test_collect",
10 grid_n=5,
11 n_mazes=10,
12 )
14 dataset: MazeDataset = MazeDataset.from_config(
15 cfg,
16 load_local=False,
17 save_local=True,
18 local_base_path="tests/_temp/test_collect/",
19 verbose=True,
20 zanj=ZANJ(external_list_threshold=1000),
21 )
22 print(f"Generated {len(dataset)} mazes")
24 dataset = dataset.filter_by.remove_duplicates(
25 minimum_difference_connection_list=0, minimum_difference_solution=1
26 )
27 print(f"After removing duplicates, we have {len(dataset)} mazes")
30def test_remove_duplicates_large():
31 cfg: MazeDatasetConfig = MazeDatasetConfig(
32 name="test_collect",
33 grid_n=5,
34 n_mazes=SERIALIZE_MINIMAL_THRESHOLD + 1,
35 )
37 dataset: MazeDataset = MazeDataset.from_config(
38 cfg,
39 load_local=False,
40 save_local=True,
41 local_base_path="tests/_temp/test_collect/",
42 verbose=True,
43 zanj=ZANJ(external_list_threshold=1000),
44 )
45 print(f"Generated {len(dataset)} mazes")
47 print(f"\t{dataset.generation_metadata_collected = }")
48 print(f"\t{dataset.mazes[0].generation_meta = }")
50 dataset = dataset.filter_by.remove_duplicates(
51 minimum_difference_connection_list=0, minimum_difference_solution=1
52 )
53 print(f"After removing duplicates, we have {len(dataset)} mazes")