Coverage for src/abcd_graph/graph/core/build.py: 98%

102 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2024-11-17 17:02 +0100

1# Copyright (c) 2024 Jordan Barrett & Aleksander Wojnarowicz 

2# 

3# Permission is hereby granted, free of charge, to any person obtaining a copy 

4# of this software and associated documentation files (the "Software"), to deal 

5# in the Software without restriction, including without limitation the rights 

6# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 

7# copies of the Software, and to permit persons to whom the Software is 

8# furnished to do so, subject to the following conditions: 

9# 

10# The above copyright notice and this permission notice shall be included in all 

11# copies or substantial portions of the Software. 

12# 

13# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 

14# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 

15# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 

16# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 

17# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 

18# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 

19# SOFTWARE. 

20 

21__all__ = [ 

22 "build_communities", 

23 "build_degrees", 

24 "assign_degrees", 

25 "split_degrees", 

26 "build_community_sizes", 

27 "add_outliers", 

28] 

29 

30from typing import Any 

31 

32import numpy as np 

33from numpy.typing import NDArray 

34 

35from abcd_graph.graph.core.constants import OUTLIER_COMMUNITY_ID 

36from abcd_graph.graph.core.utils import ( 

37 powerlaw_distribution, 

38 rand_round, 

39) 

40 

41 

42def build_degrees(n: int, gamma: float, min_degree: int, max_degree: int) -> NDArray[np.int64]: 

43 avail = np.arange(min_degree, max_degree + 1) 

44 

45 probabilities = powerlaw_distribution(avail, gamma) 

46 

47 degrees = np.sort(np.random.choice(avail, size=n, p=probabilities))[::-1] 

48 

49 if degrees.sum() % 2 == 1: 

50 degrees[0] += 1 

51 

52 return degrees 

53 

54 

55def build_community_sizes(n: int, beta: float, min_community_size: int, max_community_size: int) -> NDArray[np.int64]: 

56 max_community_number = int(np.ceil(n / min_community_size)) 

57 avail = np.arange(min_community_size, max_community_size + 1) 

58 

59 probabilities = powerlaw_distribution(avail, beta) 

60 

61 big_list: NDArray[np.int64] = np.random.choice(avail, size=max_community_number, p=probabilities) 

62 community_sizes: NDArray[np.int64] = np.zeros(max_community_number, dtype=np.int64) 

63 

64 index = 0 

65 while community_sizes.sum() < n: 

66 community_sizes[index] = big_list[index] 

67 index += 1 

68 

69 community_sizes = community_sizes[:index] 

70 excess = community_sizes.sum() - n 

71 if excess > 0: 

72 if (community_sizes[-1] - excess) >= min_community_size: 

73 community_sizes[-1] -= excess 

74 else: 

75 removed = community_sizes[-1] 

76 community_sizes = community_sizes[:-1] 

77 for i in range(removed - excess): 

78 community_sizes[i % len(community_sizes)] += 1 

79 return np.sort(community_sizes)[::-1] 

80 

81 

82def build_communities(community_sizes: NDArray[np.int64]) -> dict[int, list[int]]: 

83 communities = {} 

84 v_last = 0 

85 for i, c in enumerate(community_sizes): 

86 communities[i] = [v for v in range(v_last, v_last + c)] 

87 v_last += c 

88 return communities 

89 

90 

91def assign_degrees( 

92 degrees: NDArray[np.int64], 

93 communities: dict[int, list[int]], 

94 community_sizes: NDArray[np.int64], 

95 xi: float, 

96) -> dict[int, Any]: 

97 phi = 1 - np.sum(community_sizes**2) / (len(degrees) ** 2) 

98 deg = {} 

99 avail = 0 

100 already_chosen: set[int] = set() 

101 

102 lock = 0 

103 d_previous = degrees[0] + 1 

104 

105 for i, d in enumerate(degrees): 

106 if lock_needs_update(d, d_previous, lock, len(community_sizes)): 

107 threshold = calculate_threshold(d, xi, phi) 

108 lock, avail = update_lock(threshold, lock, avail, community_sizes, communities) 

109 

110 d_previous = d 

111 

112 v = choose_new_vertex(avail, already_chosen) 

113 

114 already_chosen.add(v) 

115 deg[v] = d 

116 

117 if avail == len(degrees) - 1: 

118 return assign_remaining_degrees(i, degrees, already_chosen, deg) 

119 

120 return deg 

121 

122 

123def lock_needs_update(degree: int, previous_degree: int, lock: int, num_communities: int) -> bool: 

124 return (degree < previous_degree) and (lock < num_communities) 

125 

126 

127def calculate_threshold(d: int, xi: float, phi: float) -> float: 

128 return d * (1 - xi * phi) + 1 

129 

130 

131def update_lock( 

132 threshold: float, 

133 lock: int, 

134 avail: int, 

135 community_sizes: NDArray[np.int64], 

136 communities: dict[int, list[int]], 

137) -> tuple[int, int]: 

138 while community_sizes[lock] >= threshold: 

139 avail = communities[lock][-1] 

140 lock += 1 

141 if lock == len(community_sizes): 

142 break 

143 return lock, avail 

144 

145 

146def choose_new_vertex(avail: int, already_chosen: set[int]) -> int: 

147 v = np.random.choice(avail) 

148 while v in already_chosen: 

149 v = np.random.choice(avail) 

150 

151 return v 

152 

153 

154def assign_remaining_degrees( 

155 degree_index: int, 

156 degrees: NDArray[np.int64], 

157 already_chosen: set[int], 

158 deg: dict[int, Any], 

159) -> dict[int, Any]: 

160 still_not_chosen_set = set(range(len(degrees))) - already_chosen 

161 still_not_chosen: NDArray[np.int64] = np.array([v for v in still_not_chosen_set]) 

162 degrees_remaining: NDArray[np.int64] = degrees[degree_index + 1 :] # noqa: E203 

163 

164 np.random.shuffle(still_not_chosen) 

165 

166 deg.update({label: degree for label, degree in zip(still_not_chosen, degrees_remaining)}) 

167 return deg 

168 

169 

170def split_degrees( 

171 degrees: dict[int, int], 

172 communities: dict[int, list[int]], 

173 xi: float, 

174) -> tuple[dict[int, int], dict[int, int]]: 

175 deg_c = {v: rand_round((1 - xi) * degrees[v]) for v in degrees} 

176 for community in communities.values(): 

177 if sum(deg_c[v] for v in community) % 2 == 0: 

178 continue 

179 

180 v_max = _get_v_max(deg_c, community) 

181 deg_c[v_max] += 1 

182 if deg_c[v_max] > degrees[v_max]: 

183 deg_c[v_max] -= 2 

184 

185 deg_b = {v: (degrees[v] - deg_c[v]) for v in degrees} 

186 return deg_c, deg_b 

187 

188 

189def add_outliers( 

190 *, 

191 vcount: int, 

192 num_outliers: int, 

193 gamma: float, 

194 min_degree: int, 

195 max_degree: int, 

196 communities: dict[int, list[int]], 

197 deg_b: dict[int, int], 

198 deg_c: dict[int, int], 

199) -> tuple[dict[int, list[int]], dict[int, int], dict[int, int]]: 

200 regular_vertices = vcount - num_outliers 

201 outlier_degrees = build_degrees(num_outliers, gamma, min_degree, max_degree) 

202 communities = communities | {OUTLIER_COMMUNITY_ID: list(range(regular_vertices, vcount))} 

203 deg_b = deg_b | {regular_vertices + i: outlier_degrees[i] for i in range(num_outliers)} 

204 deg_c = deg_c | {regular_vertices + i: 0 for i in range(num_outliers)} 

205 

206 return communities, deg_b, deg_c 

207 

208 

209def _get_v_max(deg_c: dict[int, int], community: list[int]) -> int: 

210 deg_c_subset = {v: deg_c[v] for v in community} 

211 return max(deg_c_subset, key=deg_c_subset.__getitem__)