Coverage for maze_dataset/tokenization/__init__.py: 100%

5 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-03-24 00:33 -0600

1"""turning a maze into text 

2 

3- `MazeTokenizerModular` is the new recommended way to do this as of 1.0.0 

4- legacy `TokenizationMode` enum and `MazeTokenizer` class for supporting existing code 

5- a whole lot of helper classes and functions 

6 

7""" 

8 

9from maze_dataset.tokenization.maze_tokenizer_legacy import ( 

10 MazeTokenizer, 

11 TokenizationMode, 

12 get_tokens_up_to_path_start, 

13) 

14from maze_dataset.tokenization.modular.element_base import _TokenizerElement 

15from maze_dataset.tokenization.modular.elements import ( 

16 AdjListTokenizers, 

17 CoordTokenizers, 

18 EdgeGroupings, 

19 EdgePermuters, 

20 EdgeSubsets, 

21 PathTokenizers, 

22 PromptSequencers, 

23 StepSizes, 

24 StepTokenizers, 

25 TargetTokenizers, 

26) 

27from maze_dataset.tokenization.modular.maze_tokenizer_modular import ( 

28 MazeTokenizerModular, 

29) 

30 

31# we don't sort alphabetically on purpose, we sort by the type 

32__all__ = [ 

33 # submodules 

34 "modular", 

35 "common", 

36 "maze_tokenizer_legacy", 

37 "maze_tokenizer", 

38 # legacy tokenizer 

39 "MazeTokenizer", 

40 "TokenizationMode", 

41 # MMT 

42 "MazeTokenizerModular", 

43 # element base 

44 "_TokenizerElement", 

45 # elements 

46 "PromptSequencers", 

47 "CoordTokenizers", 

48 "AdjListTokenizers", 

49 "EdgeGroupings", 

50 "EdgePermuters", 

51 "EdgeSubsets", 

52 "TargetTokenizers", 

53 "StepSizes", 

54 "StepTokenizers", 

55 "PathTokenizers", 

56 # helpers 

57 "get_tokens_up_to_path_start", 

58]