Coverage for src/topsim/grammap.py: 47%

15 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-04 17:54 -0700

1from collections import Counter 

2from collections.abc import Iterable 

3from itertools import count 

4 

5from .localtyping import GramMap, RawStringSet, StringSet 

6 

7 

8def create_gram_map( 

9 s_raw_str_sets: Iterable[RawStringSet], 

10) -> GramMap: 

11 gram_freqs: Counter = Counter() 

12 

13 for s_raw_str_set in s_raw_str_sets: 

14 gram_freqs.update(s_raw_str_set) 

15 

16 gram_mapper = count(start=0) 

17 

18 return { 

19 gram: next(gram_mapper) 

20 for gram, _ in sorted( 

21 gram_freqs.items(), 

22 key=lambda x: (x[1], x[0]), 

23 ) 

24 } 

25 

26 

27def update_gram_map( 

28 gram_map: GramMap, 

29 r_raw_str_set: RawStringSet, 

30) -> None: 

31 gram_mapper = count(start=-1, step=-1) 

32 

33 gram_map.update( 

34 (gram, next(gram_mapper)) 

35 for gram in r_raw_str_set 

36 if gram not in gram_map 

37 ) 

38 

39 

40def apply_gram_map( 

41 gram_map: GramMap, 

42 r_raw_str_set: RawStringSet, 

43) -> StringSet: 

44 return sorted(gram_map[gram] for gram in r_raw_str_set)