Coverage for src/topsim/grammap.py: 47%
15 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-04 17:54 -0700
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-04 17:54 -0700
1from collections import Counter
2from collections.abc import Iterable
3from itertools import count
5from .localtyping import GramMap, RawStringSet, StringSet
8def create_gram_map(
9 s_raw_str_sets: Iterable[RawStringSet],
10) -> GramMap:
11 gram_freqs: Counter = Counter()
13 for s_raw_str_set in s_raw_str_sets:
14 gram_freqs.update(s_raw_str_set)
16 gram_mapper = count(start=0)
18 return {
19 gram: next(gram_mapper)
20 for gram, _ in sorted(
21 gram_freqs.items(),
22 key=lambda x: (x[1], x[0]),
23 )
24 }
27def update_gram_map(
28 gram_map: GramMap,
29 r_raw_str_set: RawStringSet,
30) -> None:
31 gram_mapper = count(start=-1, step=-1)
33 gram_map.update(
34 (gram, next(gram_mapper))
35 for gram in r_raw_str_set
36 if gram not in gram_map
37 )
40def apply_gram_map(
41 gram_map: GramMap,
42 r_raw_str_set: RawStringSet,
43) -> StringSet:
44 return sorted(gram_map[gram] for gram in r_raw_str_set)