Coverage for tests\unit\maze_dataset\processing\test_collect_gen_metadata.py: 100%

17 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-02-23 12:49 -0700

1from zanj import ZANJ 

2 

3from maze_dataset import MazeDataset, MazeDatasetConfig 

4from maze_dataset.dataset.maze_dataset import SERIALIZE_MINIMAL_THRESHOLD 

5 

6 

7def test_remove_duplicates(): 

8 cfg: MazeDatasetConfig = MazeDatasetConfig( 

9 name="test_collect", 

10 grid_n=5, 

11 n_mazes=10, 

12 ) 

13 

14 dataset: MazeDataset = MazeDataset.from_config( 

15 cfg, 

16 load_local=False, 

17 save_local=True, 

18 local_base_path="tests/_temp/test_collect/", 

19 verbose=True, 

20 zanj=ZANJ(external_list_threshold=1000), 

21 ) 

22 print(f"Generated {len(dataset)} mazes") 

23 

24 dataset = dataset.filter_by.remove_duplicates( 

25 minimum_difference_connection_list=0, minimum_difference_solution=1 

26 ) 

27 print(f"After removing duplicates, we have {len(dataset)} mazes") 

28 

29 

30def test_remove_duplicates_large(): 

31 cfg: MazeDatasetConfig = MazeDatasetConfig( 

32 name="test_collect", 

33 grid_n=5, 

34 n_mazes=SERIALIZE_MINIMAL_THRESHOLD + 1, 

35 ) 

36 

37 dataset: MazeDataset = MazeDataset.from_config( 

38 cfg, 

39 load_local=False, 

40 save_local=True, 

41 local_base_path="tests/_temp/test_collect/", 

42 verbose=True, 

43 zanj=ZANJ(external_list_threshold=1000), 

44 ) 

45 print(f"Generated {len(dataset)} mazes") 

46 

47 print(f"\t{dataset.generation_metadata_collected = }") 

48 print(f"\t{dataset.mazes[0].generation_meta = }") 

49 

50 dataset = dataset.filter_by.remove_duplicates( 

51 minimum_difference_connection_list=0, minimum_difference_solution=1 

52 ) 

53 print(f"After removing duplicates, we have {len(dataset)} mazes")