Coverage for src/abcd_graph/graph/core/abcd_objects/graph_impl.py: 98%

205 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2024-12-04 21:31 +0100

1__all__ = ["GraphImpl"] 

2 

3from typing import Optional 

4 

5import numpy as np 

6from numpy.typing import NDArray 

7 

8from abcd_graph.graph.core.abcd_objects import ( 

9 BackgroundGraph, 

10 Community, 

11 Edge, 

12) 

13from abcd_graph.graph.core.abcd_objects.abstract import AbstractGraph 

14from abcd_graph.graph.core.abcd_objects.utils import ( 

15 build_recycle_list, 

16 choose_other_edge, 

17 rewire_edge, 

18) 

19from abcd_graph.graph.core.constants import OUTLIER_COMMUNITY_ID 

20from abcd_graph.models import Model 

21from abcd_graph.params import ABCDParams 

22 

23 

24class GraphImpl(AbstractGraph): 

25 def __init__(self, deg_b: dict[int, int], deg_c: dict[int, int], params: ABCDParams) -> None: 

26 self.deg_b = deg_b 

27 self.deg_c = deg_c 

28 

29 self._params = params 

30 

31 self.communities: list[Community] = [] 

32 self.background_graph: Optional[BackgroundGraph] = None 

33 

34 self._adj_dict: dict[Edge, int] = {} 

35 

36 @property 

37 def average_degree(self) -> float: 

38 return (sum(self.deg_b.values()) + sum(self.deg_c.values())) / len(self.deg_b) 

39 

40 @property 

41 def expected_average_degree(self) -> float: 

42 bottom: float = sum( 

43 k ** (-self._params.gamma) for k in range(self._params.min_degree, self._params.max_degree + 1) 

44 ) 

45 top: float = sum( 

46 k ** (1 - self._params.gamma) for k in range(self._params.min_degree, self._params.max_degree + 1) 

47 ) 

48 

49 return top / bottom 

50 

51 @property 

52 def actual_degree_cdf(self) -> dict[int, float]: 

53 return self._calc_actual_degree_cdf() 

54 

55 def _calc_actual_degree_cdf(self) -> dict[int, float]: 

56 deg = {v: self.deg_b[v] + self.deg_c[v] for v in self.deg_b} 

57 sorted_deg = sorted(list(deg.values())) 

58 val = sorted_deg[0] 

59 cdf = {val: 1 / self._params.vcount} 

60 for d in sorted_deg[1:]: 

61 new_val = d 

62 if new_val == val: 

63 cdf[new_val] += 1 / self._params.vcount 

64 else: 

65 cdf[new_val] = cdf[val] + 1 / self._params.vcount 

66 val = new_val 

67 return cdf 

68 

69 @property 

70 def expected_degree_cdf(self) -> dict[int, float]: 

71 return self._calc_expected_degree_cdf() 

72 

73 def _calc_expected_degree_cdf(self) -> dict[int, float]: 

74 cdf = {} 

75 bottom = sum(k ** (-self._params.gamma) for k in range(self._params.min_degree, self._params.max_degree + 1)) 

76 

77 for d in range(self._params.min_degree, self._params.max_degree + 1): 

78 cdf[d] = sum(k ** (-self._params.gamma) for k in range(self._params.min_degree, d + 1)) / bottom 

79 return cdf 

80 

81 @property 

82 def actual_average_community_size(self) -> float: 

83 return self._calc_actual_average_community_size() 

84 

85 def _calc_actual_average_community_size(self) -> float: 

86 volume = sum( 

87 len(c.vertices) for c in self.communities if c.community_id != OUTLIER_COMMUNITY_ID 

88 ) # Excluding outliers 

89 num_communities = len([c for c in self.communities if c.community_id != OUTLIER_COMMUNITY_ID]) 

90 return volume / num_communities 

91 

92 @property 

93 def expected_average_community_size(self) -> float: 

94 return self._calc_expected_average_community_size() 

95 

96 def _calc_expected_average_community_size(self) -> float: 

97 bottom: float = sum( 

98 k ** (-self._params.beta) 

99 for k in range(self._params.min_community_size, self._params.max_community_size + 1) 

100 ) 

101 top: float = sum( 

102 k ** (1 - self._params.beta) 

103 for k in range(self._params.min_community_size, self._params.max_community_size + 1) 

104 ) 

105 return top / bottom 

106 

107 @property 

108 def actual_community_cdf(self) -> dict[int, float]: 

109 return self._calc_actual_community_cdf() 

110 

111 def _calc_actual_community_cdf(self) -> dict[int, float]: 

112 L = len([c for c in self.communities if c.community_id != OUTLIER_COMMUNITY_ID]) # Excluding outliers 

113 sizes = {c: len(c.vertices) for c in self.communities if c.community_id != OUTLIER_COMMUNITY_ID} 

114 sorted_sizes = sorted(list(sizes.values())) 

115 val = sorted_sizes[0] 

116 cdf = {val: 1 / L} 

117 for s in sorted_sizes[1:]: 

118 new_val = s 

119 if new_val == val: 

120 cdf[new_val] += 1 / L 

121 else: 

122 cdf[new_val] = cdf[val] + 1 / L 

123 val = new_val 

124 return cdf 

125 

126 @property 

127 def expected_community_cdf(self) -> dict[int, float]: 

128 return self._calc_expected_community_cdf() 

129 

130 def _calc_expected_community_cdf(self) -> dict[int, float]: 

131 cdf = {} 

132 bottom = sum( 

133 k ** (-self._params.beta) 

134 for k in range(self._params.min_community_size, self._params.max_community_size + 1) 

135 ) 

136 for s in range(self._params.min_community_size, self._params.max_community_size + 1): 

137 cdf[s] = sum(k**self._params.beta for k in range(self._params.min_community_size, s + 1)) / bottom 

138 return cdf 

139 

140 @property 

141 def num_loops(self) -> int: 

142 assert self.background_graph is not None 

143 

144 return ( 

145 sum(community.diagnostics["num_loops"] for community in self.communities) 

146 + self.background_graph.diagnostics["num_loops"] 

147 ) 

148 

149 @property 

150 def num_multi_edges(self) -> int: 

151 assert self.background_graph is not None 

152 

153 return ( 

154 sum(community.diagnostics["num_multi_edges"] for community in self.communities) 

155 + self.background_graph.diagnostics["num_multi_edges"] 

156 ) 

157 

158 @property 

159 def xi_matrix(self) -> NDArray[np.float64]: 

160 if self._params.xi == 0: 

161 raise ValueError("xi_matrix only available if xi > 0") 

162 

163 return XiMatrixBuilder(self._params.xi, self.communities, self._adj_dict, self.deg_b).build() 

164 

165 @property 

166 def degree_sequence(self) -> dict[int, int]: 

167 deg = {v: 0 for v in range(len(self.deg_b))} 

168 for e in self.edges: 

169 deg[e[0]] += 1 

170 deg[e[1]] += 1 

171 return deg 

172 

173 @property 

174 def adj_dict(self) -> dict[Edge, int]: 

175 return self._adj_dict 

176 

177 def to_adj_matrix(self) -> NDArray[np.bool_]: 

178 adj_matrix = np.zeros((len(self.deg_b), len(self.deg_b)), dtype=bool) 

179 for edge in self._adj_dict: 

180 adj_matrix[edge.v1, edge.v2] = True 

181 adj_matrix[edge.v2, edge.v1] = True 

182 

183 return adj_matrix 

184 

185 @property 

186 def edges(self) -> list[tuple[int, int]]: 

187 return [(edge.v1, edge.v2) for edge in self._adj_dict] 

188 

189 @property 

190 def is_proper_abcd(self) -> bool: 

191 return len(build_recycle_list(self._adj_dict)) == 0 

192 

193 @property 

194 def num_communities(self) -> int: 

195 return len(self.communities) if self._params.num_outliers == 0 else len(self.communities) - 1 

196 

197 @property 

198 def membership_list(self) -> list[int]: 

199 result = [] 

200 

201 for community in self.communities: 

202 result += [community.community_id] * len(community.vertices) 

203 

204 return result 

205 

206 def build_communities(self, communities: dict[int, list[int]], model: Model) -> "GraphImpl": 

207 for community_id, community_vertices in communities.items(): 

208 community_edges = model({v: self.deg_c[v] for v in community_vertices}) 

209 community_obj = Community( 

210 edges=[Edge(e[0], e[1]) for e in community_edges], 

211 vertices=community_vertices, 

212 deg_b=self.deg_b, 

213 deg_c=self.deg_c, 

214 community_id=community_id, 

215 ) 

216 community_obj.rewire_community() 

217 

218 assert len(build_recycle_list(community_obj.adj_dict)) == 0 

219 

220 self.communities.append(community_obj) 

221 

222 return self 

223 

224 def build_background_edges(self, model: Model) -> "GraphImpl": 

225 edges = [Edge(edge[0], edge[1]) for edge in model(self.deg_b)] 

226 self.background_graph = BackgroundGraph(edges) 

227 self._adj_dict = self.background_graph.adj_dict 

228 

229 return self 

230 

231 def combine_edges(self) -> "GraphImpl": 

232 for community in self.communities: 

233 for edge, count in community.adj_dict.items(): 

234 if edge in self._adj_dict: 

235 self._adj_dict[edge] += count 

236 else: 

237 self._adj_dict[edge] = count 

238 

239 return self 

240 

241 def rewire_graph(self) -> "GraphImpl": 

242 bad_edges = build_recycle_list(self._adj_dict) 

243 

244 while len(bad_edges) > 0: 

245 for edge in bad_edges: 

246 other_edge = choose_other_edge(self._adj_dict, edge) 

247 rewire_edge(self._adj_dict, edge, other_edge) 

248 

249 bad_edges = build_recycle_list(self._adj_dict) 

250 

251 return self 

252 

253 

254class XiMatrixBuilder: 

255 def __init__( 

256 self, 

257 xi: float, 

258 communities: list[Community], 

259 adj_matrix: dict[Edge, int], 

260 deg_b: dict[int, int], 

261 ) -> None: 

262 self.xi = xi 

263 self.communities = communities 

264 self._community_len = len(communities) 

265 self.adj_matrix = adj_matrix 

266 self.deg_b = deg_b 

267 

268 self.location: dict[int, int] = {} 

269 self.actual_betweenness_matrix = np.zeros((self._community_len, self._community_len)) 

270 self.expected_betweenness_matrix = np.zeros((self._community_len, self._community_len)) 

271 self.normalized_betweeness_matrix = np.zeros((self._community_len, self._community_len)) 

272 

273 def _build_location(self) -> None: 

274 for c in self.communities: 

275 for v in c.vertices: 

276 self.location[v] = c.community_id 

277 

278 def _build_actual_matrix(self) -> None: 

279 for edge in self.adj_matrix: 

280 self.actual_betweenness_matrix[self.location[edge.v1]][self.location[edge.v2]] += 1 

281 self.actual_betweenness_matrix[self.location[edge.v2]][self.location[edge.v1]] += 1 

282 

283 def _build_expectation_matrix(self) -> None: 

284 bottom = sum(self.deg_b.values()) - 1 

285 for c_i in self.communities: 

286 for c_j in self.communities: 

287 if c_i.community_id == OUTLIER_COMMUNITY_ID: 

288 vol_i = float(sum(c_i.degree_sequence.values())) 

289 else: 

290 vol_i = sum(c_i.degree_sequence.values()) * c_i.empirical_xi 

291 if c_j.community_id == OUTLIER_COMMUNITY_ID: 

292 vol_j = float(sum(c_j.degree_sequence.values())) 

293 else: 

294 vol_j = sum(c_j.degree_sequence.values()) * c_j.empirical_xi 

295 top = vol_i * vol_j 

296 

297 self.expected_betweenness_matrix[c_i.community_id][c_j.community_id] = top / bottom 

298 self.expected_betweenness_matrix[c_j.community_id][c_i.community_id] = top / bottom 

299 

300 def _build_normalized_matrix(self) -> None: 

301 for c_i in self.communities: 

302 for c_j in self.communities: 

303 if c_i == c_j and c_i.community_id != OUTLIER_COMMUNITY_ID: 

304 self.normalized_betweeness_matrix[c_i.community_id][c_j.community_id] = (1 - c_i.empirical_xi) / ( 

305 1 - self.xi 

306 ) 

307 else: 

308 self.normalized_betweeness_matrix[c_i.community_id][c_j.community_id] = ( 

309 self.actual_betweenness_matrix[c_i.community_id][c_j.community_id] 

310 / self.expected_betweenness_matrix[c_i.community_id][c_j.community_id] 

311 ) 

312 

313 def build(self) -> NDArray[np.float64]: 

314 self._build_location() 

315 self._build_actual_matrix() 

316 self._build_expectation_matrix() 

317 self._build_normalized_matrix() 

318 

319 return self.normalized_betweeness_matrix