Coverage for tests\unit\maze_dataset\tokenization\test_vocab.py: 100%
21 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-23 12:49 -0700
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-23 12:49 -0700
1import pytest
3from maze_dataset.constants import (
4 SPECIAL_TOKENS,
5 VOCAB,
6 VOCAB_LIST,
7 VOCAB_TOKEN_TO_INDEX,
8)
11def test_special_tokens_base():
12 # Test the getitem method
13 assert SPECIAL_TOKENS["ADJLIST_START"] == "<ADJLIST_START>"
15 with pytest.raises(KeyError):
16 SPECIAL_TOKENS["NON_EXISTENT_KEY"]
18 # Test the len method
19 assert len(SPECIAL_TOKENS) == 11
21 # Test the contains method
22 assert "ADJLIST_START" in SPECIAL_TOKENS
23 assert "NON_EXISTENT_KEY" not in SPECIAL_TOKENS
25 # Test the values method
26 assert "<ADJLIST_START>" in SPECIAL_TOKENS.values()
28 # Test the items method
29 assert ("ADJLIST_START", "<ADJLIST_START>") in SPECIAL_TOKENS.items()
31 # Test the keys method
32 assert "ADJLIST_START" in SPECIAL_TOKENS.keys()
35def test_vocab():
36 assert len(VOCAB) == 4096
37 assert VOCAB.CTT_10 == "10"
38 assert VOCAB_LIST[0] == "<ADJLIST_START>"
39 assert VOCAB_LIST[706] == "&"
40 assert VOCAB_TOKEN_TO_INDEX["<UNK>"] == 19
41 assert VOCAB_TOKEN_TO_INDEX["0"] == 320
42 assert VOCAB_TOKEN_TO_INDEX["-1"] == 703
43 assert VOCAB_TOKEN_TO_INDEX["(0,0)"] == 1596