Coverage for src/topsim/best.py: 18%

38 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-04 17:54 -0700

1from collections import defaultdict 

2from collections.abc import Callable 

3from heapq import heappop, heappush 

4 

5from extratools_core.set import add_to_set 

6 

7from .localtyping import Index, Output, StringSet 

8from .setsimilarity import check_sim 

9 

10 

11def find_best( 

12 r_str: StringSet, 

13 s_strs: list[StringSet], 

14 s_index: Index, 

15 *, 

16 k: int, 

17 tie: bool, 

18 upbound_func: Callable[[int, int, int, int, int], float], 

19) -> Output: 

20 worst_sim: float = 0.0 

21 total_num: int = 0 

22 

23 sim_heap: list[float] = [] 

24 sim_map: dict[float, list[int]] = defaultdict(list) 

25 

26 ln_set: set[int] = set() 

27 for i, item in enumerate(r_str): 

28 if upbound_func(len(r_str), i + 1, len(r_str) - (i + 1) + 1, 1, 1) < worst_sim: 

29 break 

30 

31 for ln, p in s_index[item]: 

32 if not add_to_set(ln_set, ln): 

33 continue 

34 

35 curr_sim: float | None = check_sim( 

36 worst_sim, 

37 upbound_func, 

38 r_str, 

39 s_strs[ln], 

40 i + 1, 

41 p[0] + 1, 

42 1, 

43 ) 

44 if curr_sim is None: 

45 continue 

46 

47 if curr_sim not in sim_map: 

48 heappush(sim_heap, curr_sim) 

49 sim_map[curr_sim].append(ln) 

50 total_num += 1 

51 

52 if total_num > k: 

53 curr_worst_sim: float = sim_heap[0] 

54 curr_worst_num: int = len(sim_map[curr_worst_sim]) 

55 

56 if total_num - curr_worst_num >= k: 

57 del sim_map[curr_worst_sim] 

58 total_num -= curr_worst_num 

59 heappop(sim_heap) 

60 elif not tie: 

61 del sim_map[curr_worst_sim][curr_worst_num - (total_num - k):] 

62 total_num = k 

63 

64 if total_num >= k: 

65 worst_sim = sim_heap[0] 

66 

67 return sorted(sim_map.items(), key=lambda x: x[0], reverse=True)