Coverage for src/abcd_graph/graph/core/build.py: 98%
102 statements
« prev ^ index » next coverage.py v7.5.3, created at 2024-11-17 17:02 +0100
« prev ^ index » next coverage.py v7.5.3, created at 2024-11-17 17:02 +0100
1# Copyright (c) 2024 Jordan Barrett & Aleksander Wojnarowicz
2#
3# Permission is hereby granted, free of charge, to any person obtaining a copy
4# of this software and associated documentation files (the "Software"), to deal
5# in the Software without restriction, including without limitation the rights
6# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7# copies of the Software, and to permit persons to whom the Software is
8# furnished to do so, subject to the following conditions:
9#
10# The above copyright notice and this permission notice shall be included in all
11# copies or substantial portions of the Software.
12#
13# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19# SOFTWARE.
21__all__ = [
22 "build_communities",
23 "build_degrees",
24 "assign_degrees",
25 "split_degrees",
26 "build_community_sizes",
27 "add_outliers",
28]
30from typing import Any
32import numpy as np
33from numpy.typing import NDArray
35from abcd_graph.graph.core.constants import OUTLIER_COMMUNITY_ID
36from abcd_graph.graph.core.utils import (
37 powerlaw_distribution,
38 rand_round,
39)
42def build_degrees(n: int, gamma: float, min_degree: int, max_degree: int) -> NDArray[np.int64]:
43 avail = np.arange(min_degree, max_degree + 1)
45 probabilities = powerlaw_distribution(avail, gamma)
47 degrees = np.sort(np.random.choice(avail, size=n, p=probabilities))[::-1]
49 if degrees.sum() % 2 == 1:
50 degrees[0] += 1
52 return degrees
55def build_community_sizes(n: int, beta: float, min_community_size: int, max_community_size: int) -> NDArray[np.int64]:
56 max_community_number = int(np.ceil(n / min_community_size))
57 avail = np.arange(min_community_size, max_community_size + 1)
59 probabilities = powerlaw_distribution(avail, beta)
61 big_list: NDArray[np.int64] = np.random.choice(avail, size=max_community_number, p=probabilities)
62 community_sizes: NDArray[np.int64] = np.zeros(max_community_number, dtype=np.int64)
64 index = 0
65 while community_sizes.sum() < n:
66 community_sizes[index] = big_list[index]
67 index += 1
69 community_sizes = community_sizes[:index]
70 excess = community_sizes.sum() - n
71 if excess > 0:
72 if (community_sizes[-1] - excess) >= min_community_size:
73 community_sizes[-1] -= excess
74 else:
75 removed = community_sizes[-1]
76 community_sizes = community_sizes[:-1]
77 for i in range(removed - excess):
78 community_sizes[i % len(community_sizes)] += 1
79 return np.sort(community_sizes)[::-1]
82def build_communities(community_sizes: NDArray[np.int64]) -> dict[int, list[int]]:
83 communities = {}
84 v_last = 0
85 for i, c in enumerate(community_sizes):
86 communities[i] = [v for v in range(v_last, v_last + c)]
87 v_last += c
88 return communities
91def assign_degrees(
92 degrees: NDArray[np.int64],
93 communities: dict[int, list[int]],
94 community_sizes: NDArray[np.int64],
95 xi: float,
96) -> dict[int, Any]:
97 phi = 1 - np.sum(community_sizes**2) / (len(degrees) ** 2)
98 deg = {}
99 avail = 0
100 already_chosen: set[int] = set()
102 lock = 0
103 d_previous = degrees[0] + 1
105 for i, d in enumerate(degrees):
106 if lock_needs_update(d, d_previous, lock, len(community_sizes)):
107 threshold = calculate_threshold(d, xi, phi)
108 lock, avail = update_lock(threshold, lock, avail, community_sizes, communities)
110 d_previous = d
112 v = choose_new_vertex(avail, already_chosen)
114 already_chosen.add(v)
115 deg[v] = d
117 if avail == len(degrees) - 1:
118 return assign_remaining_degrees(i, degrees, already_chosen, deg)
120 return deg
123def lock_needs_update(degree: int, previous_degree: int, lock: int, num_communities: int) -> bool:
124 return (degree < previous_degree) and (lock < num_communities)
127def calculate_threshold(d: int, xi: float, phi: float) -> float:
128 return d * (1 - xi * phi) + 1
131def update_lock(
132 threshold: float,
133 lock: int,
134 avail: int,
135 community_sizes: NDArray[np.int64],
136 communities: dict[int, list[int]],
137) -> tuple[int, int]:
138 while community_sizes[lock] >= threshold:
139 avail = communities[lock][-1]
140 lock += 1
141 if lock == len(community_sizes):
142 break
143 return lock, avail
146def choose_new_vertex(avail: int, already_chosen: set[int]) -> int:
147 v = np.random.choice(avail)
148 while v in already_chosen:
149 v = np.random.choice(avail)
151 return v
154def assign_remaining_degrees(
155 degree_index: int,
156 degrees: NDArray[np.int64],
157 already_chosen: set[int],
158 deg: dict[int, Any],
159) -> dict[int, Any]:
160 still_not_chosen_set = set(range(len(degrees))) - already_chosen
161 still_not_chosen: NDArray[np.int64] = np.array([v for v in still_not_chosen_set])
162 degrees_remaining: NDArray[np.int64] = degrees[degree_index + 1 :] # noqa: E203
164 np.random.shuffle(still_not_chosen)
166 deg.update({label: degree for label, degree in zip(still_not_chosen, degrees_remaining)})
167 return deg
170def split_degrees(
171 degrees: dict[int, int],
172 communities: dict[int, list[int]],
173 xi: float,
174) -> tuple[dict[int, int], dict[int, int]]:
175 deg_c = {v: rand_round((1 - xi) * degrees[v]) for v in degrees}
176 for community in communities.values():
177 if sum(deg_c[v] for v in community) % 2 == 0:
178 continue
180 v_max = _get_v_max(deg_c, community)
181 deg_c[v_max] += 1
182 if deg_c[v_max] > degrees[v_max]:
183 deg_c[v_max] -= 2
185 deg_b = {v: (degrees[v] - deg_c[v]) for v in degrees}
186 return deg_c, deg_b
189def add_outliers(
190 *,
191 vcount: int,
192 num_outliers: int,
193 gamma: float,
194 min_degree: int,
195 max_degree: int,
196 communities: dict[int, list[int]],
197 deg_b: dict[int, int],
198 deg_c: dict[int, int],
199) -> tuple[dict[int, list[int]], dict[int, int], dict[int, int]]:
200 regular_vertices = vcount - num_outliers
201 outlier_degrees = build_degrees(num_outliers, gamma, min_degree, max_degree)
202 communities = communities | {OUTLIER_COMMUNITY_ID: list(range(regular_vertices, vcount))}
203 deg_b = deg_b | {regular_vertices + i: outlier_degrees[i] for i in range(num_outliers)}
204 deg_c = deg_c | {regular_vertices + i: 0 for i in range(num_outliers)}
206 return communities, deg_b, deg_c
209def _get_v_max(deg_c: dict[int, int], community: list[int]) -> int:
210 deg_c_subset = {v: deg_c[v] for v in community}
211 return max(deg_c_subset, key=deg_c_subset.__getitem__)