Coverage for src/abcd_graph/graph/core/abcd_objects/graph_impl.py: 98%
205 statements
« prev ^ index » next coverage.py v7.5.3, created at 2024-12-04 21:31 +0100
« prev ^ index » next coverage.py v7.5.3, created at 2024-12-04 21:31 +0100
1__all__ = ["GraphImpl"]
3from typing import Optional
5import numpy as np
6from numpy.typing import NDArray
8from abcd_graph.graph.core.abcd_objects import (
9 BackgroundGraph,
10 Community,
11 Edge,
12)
13from abcd_graph.graph.core.abcd_objects.abstract import AbstractGraph
14from abcd_graph.graph.core.abcd_objects.utils import (
15 build_recycle_list,
16 choose_other_edge,
17 rewire_edge,
18)
19from abcd_graph.graph.core.constants import OUTLIER_COMMUNITY_ID
20from abcd_graph.models import Model
21from abcd_graph.params import ABCDParams
24class GraphImpl(AbstractGraph):
25 def __init__(self, deg_b: dict[int, int], deg_c: dict[int, int], params: ABCDParams) -> None:
26 self.deg_b = deg_b
27 self.deg_c = deg_c
29 self._params = params
31 self.communities: list[Community] = []
32 self.background_graph: Optional[BackgroundGraph] = None
34 self._adj_dict: dict[Edge, int] = {}
36 @property
37 def average_degree(self) -> float:
38 return (sum(self.deg_b.values()) + sum(self.deg_c.values())) / len(self.deg_b)
40 @property
41 def expected_average_degree(self) -> float:
42 bottom: float = sum(
43 k ** (-self._params.gamma) for k in range(self._params.min_degree, self._params.max_degree + 1)
44 )
45 top: float = sum(
46 k ** (1 - self._params.gamma) for k in range(self._params.min_degree, self._params.max_degree + 1)
47 )
49 return top / bottom
51 @property
52 def actual_degree_cdf(self) -> dict[int, float]:
53 return self._calc_actual_degree_cdf()
55 def _calc_actual_degree_cdf(self) -> dict[int, float]:
56 deg = {v: self.deg_b[v] + self.deg_c[v] for v in self.deg_b}
57 sorted_deg = sorted(list(deg.values()))
58 val = sorted_deg[0]
59 cdf = {val: 1 / self._params.vcount}
60 for d in sorted_deg[1:]:
61 new_val = d
62 if new_val == val:
63 cdf[new_val] += 1 / self._params.vcount
64 else:
65 cdf[new_val] = cdf[val] + 1 / self._params.vcount
66 val = new_val
67 return cdf
69 @property
70 def expected_degree_cdf(self) -> dict[int, float]:
71 return self._calc_expected_degree_cdf()
73 def _calc_expected_degree_cdf(self) -> dict[int, float]:
74 cdf = {}
75 bottom = sum(k ** (-self._params.gamma) for k in range(self._params.min_degree, self._params.max_degree + 1))
77 for d in range(self._params.min_degree, self._params.max_degree + 1):
78 cdf[d] = sum(k ** (-self._params.gamma) for k in range(self._params.min_degree, d + 1)) / bottom
79 return cdf
81 @property
82 def actual_average_community_size(self) -> float:
83 return self._calc_actual_average_community_size()
85 def _calc_actual_average_community_size(self) -> float:
86 volume = sum(
87 len(c.vertices) for c in self.communities if c.community_id != OUTLIER_COMMUNITY_ID
88 ) # Excluding outliers
89 num_communities = len([c for c in self.communities if c.community_id != OUTLIER_COMMUNITY_ID])
90 return volume / num_communities
92 @property
93 def expected_average_community_size(self) -> float:
94 return self._calc_expected_average_community_size()
96 def _calc_expected_average_community_size(self) -> float:
97 bottom: float = sum(
98 k ** (-self._params.beta)
99 for k in range(self._params.min_community_size, self._params.max_community_size + 1)
100 )
101 top: float = sum(
102 k ** (1 - self._params.beta)
103 for k in range(self._params.min_community_size, self._params.max_community_size + 1)
104 )
105 return top / bottom
107 @property
108 def actual_community_cdf(self) -> dict[int, float]:
109 return self._calc_actual_community_cdf()
111 def _calc_actual_community_cdf(self) -> dict[int, float]:
112 L = len([c for c in self.communities if c.community_id != OUTLIER_COMMUNITY_ID]) # Excluding outliers
113 sizes = {c: len(c.vertices) for c in self.communities if c.community_id != OUTLIER_COMMUNITY_ID}
114 sorted_sizes = sorted(list(sizes.values()))
115 val = sorted_sizes[0]
116 cdf = {val: 1 / L}
117 for s in sorted_sizes[1:]:
118 new_val = s
119 if new_val == val:
120 cdf[new_val] += 1 / L
121 else:
122 cdf[new_val] = cdf[val] + 1 / L
123 val = new_val
124 return cdf
126 @property
127 def expected_community_cdf(self) -> dict[int, float]:
128 return self._calc_expected_community_cdf()
130 def _calc_expected_community_cdf(self) -> dict[int, float]:
131 cdf = {}
132 bottom = sum(
133 k ** (-self._params.beta)
134 for k in range(self._params.min_community_size, self._params.max_community_size + 1)
135 )
136 for s in range(self._params.min_community_size, self._params.max_community_size + 1):
137 cdf[s] = sum(k**self._params.beta for k in range(self._params.min_community_size, s + 1)) / bottom
138 return cdf
140 @property
141 def num_loops(self) -> int:
142 assert self.background_graph is not None
144 return (
145 sum(community.diagnostics["num_loops"] for community in self.communities)
146 + self.background_graph.diagnostics["num_loops"]
147 )
149 @property
150 def num_multi_edges(self) -> int:
151 assert self.background_graph is not None
153 return (
154 sum(community.diagnostics["num_multi_edges"] for community in self.communities)
155 + self.background_graph.diagnostics["num_multi_edges"]
156 )
158 @property
159 def xi_matrix(self) -> NDArray[np.float64]:
160 if self._params.xi == 0:
161 raise ValueError("xi_matrix only available if xi > 0")
163 return XiMatrixBuilder(self._params.xi, self.communities, self._adj_dict, self.deg_b).build()
165 @property
166 def degree_sequence(self) -> dict[int, int]:
167 deg = {v: 0 for v in range(len(self.deg_b))}
168 for e in self.edges:
169 deg[e[0]] += 1
170 deg[e[1]] += 1
171 return deg
173 @property
174 def adj_dict(self) -> dict[Edge, int]:
175 return self._adj_dict
177 def to_adj_matrix(self) -> NDArray[np.bool_]:
178 adj_matrix = np.zeros((len(self.deg_b), len(self.deg_b)), dtype=bool)
179 for edge in self._adj_dict:
180 adj_matrix[edge.v1, edge.v2] = True
181 adj_matrix[edge.v2, edge.v1] = True
183 return adj_matrix
185 @property
186 def edges(self) -> list[tuple[int, int]]:
187 return [(edge.v1, edge.v2) for edge in self._adj_dict]
189 @property
190 def is_proper_abcd(self) -> bool:
191 return len(build_recycle_list(self._adj_dict)) == 0
193 @property
194 def num_communities(self) -> int:
195 return len(self.communities) if self._params.num_outliers == 0 else len(self.communities) - 1
197 @property
198 def membership_list(self) -> list[int]:
199 result = []
201 for community in self.communities:
202 result += [community.community_id] * len(community.vertices)
204 return result
206 def build_communities(self, communities: dict[int, list[int]], model: Model) -> "GraphImpl":
207 for community_id, community_vertices in communities.items():
208 community_edges = model({v: self.deg_c[v] for v in community_vertices})
209 community_obj = Community(
210 edges=[Edge(e[0], e[1]) for e in community_edges],
211 vertices=community_vertices,
212 deg_b=self.deg_b,
213 deg_c=self.deg_c,
214 community_id=community_id,
215 )
216 community_obj.rewire_community()
218 assert len(build_recycle_list(community_obj.adj_dict)) == 0
220 self.communities.append(community_obj)
222 return self
224 def build_background_edges(self, model: Model) -> "GraphImpl":
225 edges = [Edge(edge[0], edge[1]) for edge in model(self.deg_b)]
226 self.background_graph = BackgroundGraph(edges)
227 self._adj_dict = self.background_graph.adj_dict
229 return self
231 def combine_edges(self) -> "GraphImpl":
232 for community in self.communities:
233 for edge, count in community.adj_dict.items():
234 if edge in self._adj_dict:
235 self._adj_dict[edge] += count
236 else:
237 self._adj_dict[edge] = count
239 return self
241 def rewire_graph(self) -> "GraphImpl":
242 bad_edges = build_recycle_list(self._adj_dict)
244 while len(bad_edges) > 0:
245 for edge in bad_edges:
246 other_edge = choose_other_edge(self._adj_dict, edge)
247 rewire_edge(self._adj_dict, edge, other_edge)
249 bad_edges = build_recycle_list(self._adj_dict)
251 return self
254class XiMatrixBuilder:
255 def __init__(
256 self,
257 xi: float,
258 communities: list[Community],
259 adj_matrix: dict[Edge, int],
260 deg_b: dict[int, int],
261 ) -> None:
262 self.xi = xi
263 self.communities = communities
264 self._community_len = len(communities)
265 self.adj_matrix = adj_matrix
266 self.deg_b = deg_b
268 self.location: dict[int, int] = {}
269 self.actual_betweenness_matrix = np.zeros((self._community_len, self._community_len))
270 self.expected_betweenness_matrix = np.zeros((self._community_len, self._community_len))
271 self.normalized_betweeness_matrix = np.zeros((self._community_len, self._community_len))
273 def _build_location(self) -> None:
274 for c in self.communities:
275 for v in c.vertices:
276 self.location[v] = c.community_id
278 def _build_actual_matrix(self) -> None:
279 for edge in self.adj_matrix:
280 self.actual_betweenness_matrix[self.location[edge.v1]][self.location[edge.v2]] += 1
281 self.actual_betweenness_matrix[self.location[edge.v2]][self.location[edge.v1]] += 1
283 def _build_expectation_matrix(self) -> None:
284 bottom = sum(self.deg_b.values()) - 1
285 for c_i in self.communities:
286 for c_j in self.communities:
287 if c_i.community_id == OUTLIER_COMMUNITY_ID:
288 vol_i = float(sum(c_i.degree_sequence.values()))
289 else:
290 vol_i = sum(c_i.degree_sequence.values()) * c_i.empirical_xi
291 if c_j.community_id == OUTLIER_COMMUNITY_ID:
292 vol_j = float(sum(c_j.degree_sequence.values()))
293 else:
294 vol_j = sum(c_j.degree_sequence.values()) * c_j.empirical_xi
295 top = vol_i * vol_j
297 self.expected_betweenness_matrix[c_i.community_id][c_j.community_id] = top / bottom
298 self.expected_betweenness_matrix[c_j.community_id][c_i.community_id] = top / bottom
300 def _build_normalized_matrix(self) -> None:
301 for c_i in self.communities:
302 for c_j in self.communities:
303 if c_i == c_j and c_i.community_id != OUTLIER_COMMUNITY_ID:
304 self.normalized_betweeness_matrix[c_i.community_id][c_j.community_id] = (1 - c_i.empirical_xi) / (
305 1 - self.xi
306 )
307 else:
308 self.normalized_betweeness_matrix[c_i.community_id][c_j.community_id] = (
309 self.actual_betweenness_matrix[c_i.community_id][c_j.community_id]
310 / self.expected_betweenness_matrix[c_i.community_id][c_j.community_id]
311 )
313 def build(self) -> NDArray[np.float64]:
314 self._build_location()
315 self._build_actual_matrix()
316 self._build_expectation_matrix()
317 self._build_normalized_matrix()
319 return self.normalized_betweeness_matrix