Coverage for C:\Users\reysi\PycharmProjects\preflibtools\preflibtools\instances\preflibinstance.py: 69%

446 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-20 19:24 +0100

1""" This module describes the main class to deal with PrefLib instances.. 

2""" 

3import os.path 

4 

5from .sampling import * 

6 

7from copy import deepcopy 

8from os import path 

9 

10import urllib 

11import re 

12 

13 

14class PrefLibInstance(object): 

15 """ This class provide a general template to implement specific classes representing PrefLib instances. It should 

16 mainly be used as an abstract class. 

17 

18 :ivar file_path: The path to the file the instance is taken from. 

19 :ivar file_name: The name of the file the instance is taken from. 

20 :ivar data_type: The data type of the instance. Whenever a function only applies to certain types of data 

21 (strict and complete orders for instance), we do so by checking this value. 

22 :ivar modification_type: The modification type of the file: original if it represents original data; induced 

23 or imbued it is derived from some other data; synthetic if it is synthetically generated. 

24 :ivar relates_to: The data file this instance relates to, typically the original data file for induced 

25 preferences. 

26 :ivar related_files: The data files that are to the instance. 

27 :ivar title: The title of the instance. 

28 :ivar description: A description of the instance. 

29 :ivar publication_date: Date at which the corresponding file has been added to PrefLib.com. 

30 :ivar modification_date: Last date the file has been modified on PrefLib.com. 

31 :ivar num_alternatives: The number of alternatives in the instance. 

32 :ivar alternatives_name: A dictionary mapping alternative (int) to their name (str). 

33 :ivar num_voters: The number of voters in the instance. 

34 """ 

35 

36 def __init__(self): 

37 self.file_path = "" 

38 self.file_name = "" 

39 self.data_type = "" 

40 self.modification_type = "" 

41 self.relates_to = "" 

42 self.related_files = "" 

43 self.title = "" 

44 self.description = "" 

45 self.publication_date = "" 

46 self.modification_date = "" 

47 self.num_alternatives = 0 

48 self.alternatives_name = {} 

49 self.num_voters = 0 

50 self.alt_name_pattern = re.compile(r'# ALTERNATIVE NAME (\d+): (.*)') 

51 

52 def type_validator(self, data_type): 

53 pass 

54 

55 def parse(self, lines, autocorrect=False): 

56 pass 

57 

58 def parse_lines(self, lines, autocorrect=False): 

59 """ Parses the lines provided as argument. The parser to be used is deducted from the instance's inner value of 

60 data_type. 

61 

62 :param lines: A list of string, each string being one line of the instance to parse. 

63 :type lines: list 

64 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

65 in the file. Default is False. 

66 :type autocorrect: bool 

67 """ 

68 

69 if self.type_validator(self.data_type): 

70 self.parse(lines, autocorrect=autocorrect) 

71 else: 

72 raise TypeError("File extension " + str(self.data_type) + " is not valid for an ordinal PrefLib instance." + 

73 " This file cannot be parsed.") 

74 

75 def parse_file(self, filepath, autocorrect=False): 

76 """ Parses the file whose path is provided as argument and populates the PreflibInstance object accordingly. 

77 The parser to be used is deduced from the file extension. 

78 

79 :param filepath: The path to the file to be parsed. 

80 :type filepath: str 

81 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

82 in the file. Default is False. 

83 :type autocorrect: bool 

84 """ 

85 

86 # Populating basic properties of the instance 

87 self.file_path = filepath 

88 self.file_name = path.split(filepath)[1] 

89 self.data_type = path.splitext(filepath)[1][1:] 

90 

91 # Read the file 

92 file = open(filepath, "r", encoding="utf-8") 

93 lines = file.readlines() 

94 file.close() 

95 

96 self.parse_lines(lines, autocorrect=autocorrect) 

97 

98 def parse_str(self, string, data_type, file_name="", autocorrect=False): 

99 """ Parses the string provided as argument and populates the PreflibInstance object accordingly. 

100 The parser to be used is deduced from the file extension passed as argument. 

101 

102 :param string: The string to parse. 

103 :type string: str 

104 :param data_type: The data type represented by the string. 

105 :type data_type: str 

106 :param file_name: The value to store in the file_name member of the instance. Default is the empty string. 

107 :type file_name: str 

108 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

109 in the file. Default is False. 

110 :type autocorrect: bool 

111 """ 

112 

113 self.file_path = "parsed_from_string" 

114 self.file_name = file_name 

115 self.data_type = data_type 

116 

117 self.parse_lines(string.splitlines(), autocorrect=autocorrect) 

118 

119 def parse_url(self, url, autocorrect=False): 

120 """ Parses the file located at the provided URL and populates the PreflibInstance object accordingly. 

121 The parser to be used (whether the file describes a graph or an order for instance) is deduced based 

122 on the file extension. 

123 

124 :param url: The target URL. 

125 :type url: str 

126 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

127 in the file. Default is False. 

128 :type autocorrect: bool 

129 """ 

130 

131 data = urllib.request.urlopen(url) 

132 lines = [line.decode("utf-8").strip() for line in data] 

133 data.close() 

134 

135 self.file_path = url 

136 self.file_name = url.split('/')[-1].split('.')[0] 

137 self.data_type = url.split('.')[-1] 

138 

139 self.parse_lines(lines, autocorrect=autocorrect) 

140 

141 def parse_metadata(self, line, autocorrect=False): 

142 if line.startswith("# FILE NAME"): 

143 self.file_name = line[12:].strip() 

144 elif line.startswith("# TITLE"): 

145 self.title = line[8:].strip() 

146 elif line.startswith("# DESCRIPTION"): 

147 self.description = line[14:].strip() 

148 elif line.startswith("# DATA TYPE"): 

149 self.data_type = line[12:].strip() 

150 elif line.startswith("# MODIFICATION TYPE"): 

151 self.modification_type = line[20:].strip() 

152 elif line.startswith("# RELATES TO"): 

153 self.relates_to = line[13:].strip() 

154 elif line.startswith("# RELATED FILES"): 

155 self.related_files = line[16:].strip() 

156 elif line.startswith("# PUBLICATION DATE"): 

157 self.publication_date = line[19:].strip() 

158 elif line.startswith("# MODIFICATION DATE"): 

159 self.modification_date = line[20:].strip() 

160 elif line.startswith("# NUMBER ALTERNATIVES"): 

161 self.num_alternatives = int(line[22:].strip()) 

162 elif line.startswith("# NUMBER VOTERS"): 

163 self.num_voters = int(line[16:].strip()) 

164 elif line.startswith("# ALTERNATIVE NAME"): 

165 match = re.match(self.alt_name_pattern, line) 

166 if match: 

167 alt = int(match.group(1)) 

168 alt_name = match.group(2) 

169 if autocorrect and alt_name in self.alternatives_name.values(): 

170 tmp = 1 

171 while alt_name + "__" + str(tmp) in self.alternatives_name.values(): 

172 tmp += 1 

173 self.alternatives_name[alt] = alt_name + "__" + str(tmp) 

174 else: 

175 self.alternatives_name[alt] = alt_name 

176 

177 def write(self, filepath): 

178 pass 

179 

180 def write_metadata(self, file): 

181 file.write("# FILE NAME: {}\n# TITLE: {}\n# DESCRIPTION: {}\n# DATA TYPE: {}\n# MODIFICATION TYPE: {}\n".format( 

182 self.file_name, self.title, self.description, self.data_type, self.modification_type)) 

183 file.write("# RELATES TO: {}\n# RELATED FILES: {}\n# PUBLICATION DATE: {}\n# MODIFICATION DATE: {}\n".format( 

184 self.relates_to, self.related_files, self.publication_date, self.modification_date)) 

185 

186 def __str__(self): 

187 return "PrefLib-Instance: {} <{},{}>".format(self.file_name, self.num_voters, self.num_alternatives) 

188 

189 

190class OrdinalInstance(PrefLibInstance): 

191 """ This is the class representing a PrefLib instance of ordinal preferences. It basically contains the data and 

192 information written within a PrefLib file. 

193 

194 :param file_path: The path to the file the instance is taken from. If a path is provided as a parameter, 

195 the file is immediately parsed and the instance populated accordingly. 

196 :type file_path: str, optional 

197 

198 :ivar num_unique_orders: The number of unique orders in the instance. 

199 :ivar multiplicity: A dictionary mapping each order to the number of voters who submitted that order. 

200 :ivar orders: The list of all the distinct orders in the instance. 

201 """ 

202 

203 def __init__(self, file_path=""): 

204 PrefLibInstance.__init__(self) 

205 self.file_path = file_path 

206 self.num_unique_orders = 0 

207 self.multiplicity = {} 

208 self.orders = [] 

209 

210 # If a filePath is given as argument, we parse it immediately 

211 if len(file_path) > 0: 

212 self.parse_file(file_path) 

213 

214 def type_validator(self, data_type): 

215 return data_type in ['soc', 'soi', 'toc', 'toi'] 

216 

217 def parse(self, lines, autocorrect=False): 

218 """ Parses the strings provided as argument, assuming that the latter describes an order. 

219 

220 :param lines: A list of string, each string being one line of the instance to parse. 

221 :type lines: list 

222 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

223 in the file. Default is False. 

224 :type autocorrect: bool 

225 """ 

226 

227 # The first few lines contain the metadata 

228 i = 0 

229 for i in range(len(lines)): 

230 line = lines[i].strip() 

231 if line.startswith('#'): 

232 if line.startswith("# NUMBER UNIQUE ORDERS"): 

233 self.num_unique_orders = int(line[23:].strip()) 

234 else: 

235 self.parse_metadata(line, autocorrect=autocorrect) 

236 else: 

237 break 

238 

239 # The rest of the lines are about the preferences 

240 order_pattern = re.compile(r'{[\d,]+?}|[\d,]+') 

241 for line in lines[i:]: 

242 # The first element indicates the multiplicity of the order 

243 multiplicity, order_str = line.strip().split(":") 

244 multiplicity = int(multiplicity) 

245 order = [] 

246 for group in re.findall(order_pattern, order_str): 

247 if group.startswith('{'): 

248 group = group[1:-1] 

249 order.append(tuple(int(alt.strip()) for alt in group.split(',') if len(alt) > 0)) 

250 else: 

251 for alt in group.split(','): 

252 if len(alt) > 0: 

253 order.append((int(alt.strip()),)) 

254 order = tuple(order) 

255 self.orders.append(order) 

256 if autocorrect and order in self.multiplicity: 

257 self.multiplicity[tuple(order)] += multiplicity 

258 else: 

259 self.multiplicity[tuple(order)] = multiplicity 

260 

261 if autocorrect: 

262 self.orders = list(set(self.orders)) 

263 self.num_alternatives = len(self.alternatives_name) 

264 self.num_unique_orders = len(self.orders) 

265 self.num_voters = sum(self.multiplicity.values()) 

266 

267 def parse_old(self, lines, autocorrect=False): 

268 """ Parses the strings provided as argument, assuming that the latter describes an order, in the old PrefLib 

269 format. 

270 

271 :param lines: A list of string, each string being one line of the instance to parse. 

272 :type lines: list 

273 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

274 in the file. Default is False. 

275 :type autocorrect: bool 

276 """ 

277 

278 # The first line gives us the number of alternatives, then comes the names of the alternatives 

279 self.num_alternatives = int(lines[0]) 

280 for i in range(1, self.num_alternatives + 1): 

281 alt_name = lines[i].split(",")[1].strip() 

282 if autocorrect and alt_name in self.alternatives_name.values(): 

283 tmp = 1 

284 while alt_name + "__" + str(tmp) in self.alternatives_name.values(): 

285 tmp += 1 

286 self.alternatives_name[i] = alt_name + "__" + str(tmp) 

287 else: 

288 self.alternatives_name[i] = alt_name 

289 

290 # We've reached the description of the preferences. We start by some numbers... 

291 self.num_voters = int(lines[self.num_alternatives + 1].split(",")[0]) 

292 self.num_unique_orders = int(lines[self.num_alternatives + 1].split(",")[2]) 

293 

294 # ... and finally comes the preferences 

295 for line in lines[self.num_alternatives + 2:]: 

296 # The first element indicates the multiplicity of the order 

297 elements = line.strip().split(",") 

298 multiplicity = int(elements[0]) 

299 

300 # Then we deal with the rest 

301 in_braces = False 

302 order = [] 

303 indif_class = [] 

304 for w in elements[1:]: 

305 # If there is something in w 

306 if w != "{}" and len(w) > 0: 

307 # If we are entering a series of ties (grouped by {}) 

308 if w.startswith("{"): 

309 # If w also ends with a }, we remove it 

310 if w.endswith("}"): 

311 w = w[:-1] 

312 in_braces = True 

313 indif_class.append(int(w[1:])) # The first element of w is {, so we go beyond that 

314 # If we finished reading a series of ties (grouped by {}) 

315 elif w.endswith("}"): 

316 in_braces = False 

317 indif_class.append(int(w[:-1])) # The first element of w is }, so we go beyond that 

318 order.append(tuple(indif_class)) 

319 indif_class = [] 

320 # Otherwise, we are just reading numbers 

321 else: 

322 # If we are facing ties, we add in the indifference class 

323 if in_braces: 

324 if int(w) not in indif_class: 

325 indif_class.append(int(w)) 

326 # Otherwise, we add the strict preference. 

327 else: 

328 if (int(w),) not in order: 

329 order.append((int(w),)) 

330 order = tuple(order) 

331 self.orders.append(order) 

332 if autocorrect and order in self.multiplicity: 

333 self.multiplicity[tuple(order)] += multiplicity 

334 else: 

335 self.multiplicity[tuple(order)] = multiplicity 

336 

337 if autocorrect: 

338 self.orders = list(set(self.orders)) 

339 

340 def write(self, filepath): 

341 """ Writes the instance into a file whose destination has been given as argument. If no file extension is 

342 provided the data type of the instance is used. 

343 

344 :param filepath: The destination where to write the instance. 

345 :type filepath: str 

346 """ 

347 if len(path.splitext(filepath)[1]) == 0: 

348 filepath += "." + str(self.data_type) 

349 file = open(filepath, "w", encoding="utf-8") 

350 # Writing metadata in the file header 

351 self.write_metadata(file) 

352 file.write("# NUMBER ALTERNATIVES: {}\n# NUMBER VOTERS: {}\n# NUMBER UNIQUE ORDERS: {}\n".format( 

353 self.num_alternatives, self.num_voters, self.num_unique_orders 

354 )) 

355 for alt, name in self.alternatives_name.items(): 

356 file.write("# ALTERNATIVE NAME {}: {}\n".format(alt, name)) 

357 # Writing the actual ballots with their multiplicity 

358 orders = deepcopy(self.orders) 

359 orders.sort(key=lambda o: (-self.multiplicity[o], -len(o))) 

360 for order in orders: 

361 order_str = "" 

362 for indif_class in order: 

363 if len(indif_class) == 1: 

364 order_str += str(indif_class[0]) + "," 

365 else: 

366 order_str += "{" + ",".join((str(alt) for alt in indif_class)) + "}," 

367 file.write("{}: {}\n".format(self.multiplicity[order], order_str[:-1])) 

368 file.close() 

369 

370 def vote_map(self): 

371 """ Returns the instance described as a vote map, i.e., a dictionary whose keys are orders, mapping 

372 to the number of voters with the given order as their preferences. This format can be useful for some 

373 applications. It also ensures interoperability with the old preflibtools (vote maps were the main object). 

374 

375 :return: A vote map representing the preferences in the instance. 

376 :rtype: dict of (tuples, int) 

377 """ 

378 vote_map = {} 

379 for order in self.orders: 

380 vote_map[order] = self.multiplicity[order] 

381 return vote_map 

382 

383 def full_profile(self): 

384 """ Returns a list containing all the orders appearing in the preferences, with each order appearing as 

385 many times as their multiplicity. 

386 

387 :return: A list of preferences (lists of alternatives). 

388 :rtype: list 

389 """ 

390 res = [] 

391 for order in self.orders: 

392 res += [order] * self.multiplicity[order] 

393 return res 

394 

395 def flatten_strict(self): 

396 """ Strict orders are represented as orders with indifference classes of size 1. This is somewhat heavy when 

397 working with strict preferences. This function flattens strict preferences by removing the indifference 

398 class. 

399 

400 :return: A list of tuples of preference order and multiplicity. 

401 :rtype: list 

402 """ 

403 res = [] 

404 for order in self.orders: 

405 if len(order) != self.num_alternatives: 

406 print("WARNING: You are flattening a non-strict order.") 

407 res.append((tuple(indif_class[0] for indif_class in order), self.multiplicity[order])) 

408 return res 

409 

410 def infer_type(self): 

411 """ Loops through the orders of the instance to infer whether the preferences strict and/or complete,. 

412 

413 :return: The data type of the instance. 

414 :rtype: str  

415 """ 

416 strict = True 

417 complete = True 

418 for order in self.orders: 

419 if max(len(indif_class) for indif_class in order) != 1: 

420 strict = False 

421 if len([alt for indif_class in order for alt in indif_class]) != self.num_alternatives: 

422 complete = False 

423 if not strict and not complete: 

424 return "toi" 

425 if strict and complete: 

426 return "soc" 

427 if strict and not complete: 

428 return "soi" 

429 if not strict and complete: 

430 return "toc" 

431 

432 def recompute_cardinality_param(self): 

433 """ Recomputes the basic cardinality parameters based on the order list in the instance. Numbers that are 

434 recomputed are the number of voters, the sum of voter count and the number of unique orders. 

435 """ 

436 num_voters = 0 

437 for order in self.orders: 

438 num_voters += self.multiplicity[order] 

439 self.num_voters = num_voters 

440 self.num_unique_orders = len(set(self.orders)) 

441 

442 def append_order_list(self, orders): 

443 """ Appends a vote map to the instance. That function incorporates the new orders into the instance and 

444 updates the set of alternatives if needed. 

445 

446 :param orders: A list of tuples of tuples, each tuple representing a preference order.  

447 :type orders: list 

448 """ 

449 alternatives = set(alt for order in orders for indif_class in order for alt in indif_class) 

450 for alt in alternatives: 

451 if alt not in self.alternatives_name: 

452 self.alternatives_name[alt] = "Alternative " + str(alt) 

453 self.num_alternatives = len(self.alternatives_name) 

454 

455 self.num_voters += len(orders) 

456 

457 for order in orders: 

458 multiplicity = len([o for o in orders if o == order]) 

459 if order in self.multiplicity: 

460 self.multiplicity[order] += multiplicity 

461 else: 

462 self.multiplicity[order] = multiplicity 

463 

464 self.num_unique_orders = len(self.multiplicity) 

465 self.orders += deepcopy(orders) 

466 

467 self.data_type = self.infer_type() 

468 

469 def append_vote_map(self, vote_map): 

470 """ Appends a vote map to the instance. That function incorporates the new orders into the instance and 

471 updates the set of alternatives if needed. 

472  

473 :param vote_map: A vote map representing preferences. A vote map is a dictionary whose keys represent 

474 orders (tuples of tuples of int) that are mapped to the number of voters with the given order as  

475 their preferences. We re-map the orders to tuple of tuples to be sure we are dealing with the correct 

476 type. 

477 :type vote_map: dict of (tuple, int) 

478 """ 

479 for ballot, multiplicity in vote_map.items(): 

480 order = tuple(tuple(indif_class) for indif_class in ballot) 

481 if order not in self.orders: 

482 self.orders.append(order) 

483 self.multiplicity[order] = multiplicity 

484 else: 

485 self.multiplicity[order] += multiplicity 

486 self.num_voters += multiplicity 

487 

488 for indif_class in ballot: 

489 for alt in indif_class: 

490 if alt not in self.alternatives_name: 

491 self.alternatives_name[alt] = "Alternative " + str(alt) 

492 

493 self.num_alternatives = len(self.alternatives_name) 

494 self.num_unique_orders = len(self.multiplicity) 

495 

496 self.data_type = self.infer_type() 

497 

498 def populate_IC(self, num_voters, num_alternatives): 

499 """ Populates the instance with a random profile of strict preferences taken from the impartial culture 

500 distribution. Uses :math:`preflibtools.instances.sampling.urnModel` for sampling. 

501 

502 :param num_voters: Number of orders to sample. 

503 :type num_voters: int 

504 :param num_alternatives: Number of alternatives for the sampled orders. 

505 :type num_alternatives: int 

506 """ 

507 self.append_vote_map(generate_IC(num_voters, list(range(num_alternatives)))) 

508 

509 def populate_IC_anon(self, num_voters, num_alternatives): 

510 """ Populates the instance with a random profile of strict preferences taken from the impartial anonymous 

511 culture distribution. Uses :class:`preflibtools.instances.sampling` for sampling. 

512 

513 :param num_voters: Number of orders to sample. 

514 :type num_voters: int 

515 :param num_alternatives: Number of alternatives for the sampled orders. 

516 :type num_alternatives: int 

517 """ 

518 self.append_vote_map(generate_IC_anon(num_voters, list(range(num_alternatives)))) 

519 

520 def populate_urn(self, num_voters, num_alternatives, replace): 

521 """ Populates the instance with a random profile of strict preferences taken from the urn distribution. 

522 Uses :class:`preflibtools.instances.sampling` for sampling. 

523 

524 :param num_voters: Number of orders to sample. 

525 :type num_voters: int 

526 :param num_alternatives: Number of alternatives for the sampled orders. 

527 :type num_alternatives: int 

528 :param replace: The number of replacements for the urn model. 

529 :type replace: int 

530 """ 

531 self.append_vote_map(generate_urn(num_voters, list(range(num_alternatives)), replace)) 

532 

533 def populate_mallows(self, num_voters, num_alternatives, mixture, dispersions, references): 

534 """ Populates the instance with a random profile of strict preferences taken from a mixture of Mallows' 

535 models. Uses :class:`preflibtools.instances.sampling` for sampling. 

536 

537 :param num_voters: Number of orders to sample. 

538 :type num_voters: int 

539 :param num_alternatives: Number of alternatives for the sampled orders. 

540 :type num_alternatives: int 

541 :param mixture: A list of the weights of each element of the mixture. 

542 :type mixture: list of positive numbers 

543 :param dispersions: A list of the dispersion coefficient of each element of the mixture. 

544 :type dispersions: list of float 

545 :param references: A list of the reference orders for each element of the mixture. 

546 :type references: list of tuples of tuples of int 

547 """ 

548 self.append_vote_map(generate_mallows(num_voters, num_alternatives, mixture, dispersions, references)) 

549 

550 def populate_mallows_mix(self, num_voters, num_alternatives, num_references): 

551 """ Populates the instance with a random profile of strict preferences taken from a mixture of Mallows' 

552 models for which reference points and dispersion coefficients are independently and identically  

553 distributed. Uses :class:`preflibtools.instances.sampling` for sampling. 

554 

555 :param num_voters: Number of orders to sample. 

556 :type num_voters: int 

557 :param num_alternatives: Number of alternatives for the sampled orders. 

558 :type num_alternatives: int 

559 :param num_references: Number of element 

560 :type num_references: int 

561 """ 

562 self.append_vote_map(generate_mallows_mix(num_voters, list(range(num_alternatives)), num_references)) 

563 

564 

565class ComparisonInstance(PrefLibInstance): 

566 """ To be implemented. 

567 

568 """ 

569 

570 def __init__(self): 

571 PrefLibInstance.__init__(self) 

572 

573 

574class CategoricalInstance(PrefLibInstance): 

575 """ This is the class representing a PrefLib instance of categorical preferences. It basically contains the data and 

576 information written within a PrefLib file. 

577 """ 

578 

579 def __init__(self, file_path=""): 

580 PrefLibInstance.__init__(self) 

581 self.file_path = file_path 

582 self.num_unique_preferences = 0 

583 self.multiplicity = {} 

584 self.num_categories = 0 

585 self.categories_name = {} 

586 self.preferences = [] 

587 

588 # If a filePath is given as argument, we parse it immediately 

589 if len(file_path) > 0: 

590 self.parse_file(file_path) 

591 

592 def type_validator(self, data_type): 

593 return data_type == "cat" 

594 

595 def parse(self, lines, autocorrect=False): 

596 """ Parses the strings provided as argument, assuming that the latter describes categorical preferences. 

597 

598 :param lines: A list of string, each string being one line of the instance to parse. 

599 :type lines: list 

600 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

601 in the file. Default is False. 

602 :type autocorrect: bool 

603 """ 

604 

605 # The first few lines contain the metadata 

606 i = 0 

607 cat_name_pattern = re.compile(r'# CATEGORY NAME (\d+): (.*)') 

608 for i in range(len(lines)): 

609 line = lines[i].strip() 

610 if line.startswith('#'): 

611 if line.startswith("# NUMBER UNIQUE PREFERENCES"): 

612 self.num_unique_preferences = int(line[28:].strip()) 

613 if line.startswith("# NUMBER CATEGORIES"): 

614 self.num_categories = int(line[20:].strip()) 

615 elif line.startswith("# CATEGORY NAME"): 

616 match = re.match(cat_name_pattern, line) 

617 if match: 

618 cat = int(match.group(1)) 

619 cat_name = match.group(2) 

620 if autocorrect and cat_name in self.categories_name.values(): 

621 tmp = 1 

622 while cat_name + "__" + str(tmp) in self.categories_name.values(): 

623 tmp += 1 

624 self.categories_name[cat] = cat_name + "__" + str(tmp) 

625 else: 

626 self.categories_name[cat] = cat_name 

627 else: 

628 self.parse_metadata(line, autocorrect=autocorrect) 

629 else: 

630 break 

631 

632 # The rest of the lines are about the preferences 

633 pref_pattern = re.compile(r'{[\d,]+?}|[\d,]+|{}') 

634 for line in lines[i:]: 

635 # The first element indicates the multiplicity of the order 

636 multiplicity, pref_str = line.strip().split(":") 

637 multiplicity = int(multiplicity) 

638 pref = [] 

639 for group in re.findall(pref_pattern, pref_str): 

640 if group == '{}': 

641 pref.append(tuple()) 

642 elif group.startswith('{'): 

643 group = group[1:-1] 

644 pref.append(tuple(int(alt.strip()) for alt in group.split(',') if len(alt) > 0)) 

645 else: 

646 for alt in group.split(','): 

647 if len(alt) > 0: 

648 pref.append((int(alt.strip()),)) 

649 pref = tuple(pref) 

650 self.preferences.append(pref) 

651 if autocorrect and pref in self.multiplicity: 

652 self.multiplicity[tuple(pref)] += multiplicity 

653 else: 

654 self.multiplicity[tuple(pref)] = multiplicity 

655 

656 if autocorrect: 

657 self.preferences = list(set(self.preferences)) 

658 self.num_alternatives = len(self.alternatives_name) 

659 self.num_unique_preferences = len(self.preferences) 

660 self.num_voters = sum(self.multiplicity.values()) 

661 

662 def write(self, filepath): 

663 """ Writes the instance into a file whose destination has been given as argument. If no file extension is 

664 provided the data type of the instance is used. 

665 

666 :param filepath: The destination where to write the instance. 

667 :type filepath: str 

668 """ 

669 if len(path.splitext(filepath)[1]) == 0: 

670 filepath += "." + str(self.data_type) 

671 file = open(filepath, "w", encoding="utf-8") 

672 # Writing metadata in the file header 

673 self.write_metadata(file) 

674 file.write("# NUMBER ALTERNATIVES: {}\n# NUMBER VOTERS: {}\n# NUMBER UNIQUE PREFERENCES: {}\n".format( 

675 self.num_alternatives, self.num_voters, self.num_unique_preferences 

676 )) 

677 file.write("# NUMBER CATEGORIES: {}\n".format(self.num_categories)) 

678 for cat, name in self.categories_name.items(): 

679 file.write("# CATEGORY NAME {}: {}\n".format(cat, name)) 

680 for alt, name in self.alternatives_name.items(): 

681 file.write("# ALTERNATIVE NAME {}: {}\n".format(alt, name)) 

682 # Writing the actual ballots with their multiplicity 

683 preferences = deepcopy(self.preferences) 

684 preferences.sort(key=lambda o: (-self.multiplicity[o], -len(o))) 

685 for pref in preferences: 

686 pref_str = "" 

687 for category in pref: 

688 if len(category) == 0: 

689 pref_str += '{},' 

690 elif len(category) == 1: 

691 pref_str += str(category[0]) + "," 

692 else: 

693 pref_str += "{" + ",".join((str(alt) for alt in category)) + "}," 

694 file.write("{}: {}\n".format(self.multiplicity[pref], pref_str[:-1])) 

695 file.close() 

696 

697 

698class WeightedGraph(object): 

699 """ This class is used to represent (weighted) graphs. 

700 

701 :ivar dict: The dictionary representing the graph mapping each node to its neighbourhood (set of nodes 

702 to which it is connected). A node can be of any hashable type. 

703 :ivar weight: The dictionary mapping every node to its weight. 

704 """ 

705 

706 def __init__(self): 

707 self.node_mapping = dict() 

708 self.weight = dict() 

709 

710 def neighbours(self, node): 

711 """ Returns all the neighbours of a given node. 

712 

713 :param node: The node whose neighbours we want to know. 

714 

715 :return: The set of the neighbours of the node. 

716 :rtype: set 

717 """ 

718 return self.node_mapping[node] 

719 

720 def outgoing_edges(self, node): 

721 """ Returns all the edges leaving a given node. 

722 

723 :param node: The node whose edges we want to get. 

724 

725 :return: The set of the tuples (node, neighbour, edgeWeight) representing (weighted) edges. 

726 :rtype: set of tuples 

727 """ 

728 return {(node, n, self.weight[(node, n)]) for n in self.node_mapping[node]} 

729 

730 def add_node(self, node): 

731 """ Adds a node to the graph if the node does not already exist. 

732 

733 :param node: The node to add. 

734 """ 

735 if node not in self.node_mapping: 

736 self.node_mapping[node] = set() 

737 

738 def add_edge(self, node1, node2, weight): 

739 """ Adds an edge to the graph. If the nodes do not exist in the graph, those are also added. 

740 

741 :param node1: The departure node of the edge. 

742 :param node2: The arrival node of the edge. 

743 :param weight: The weight of the edge. 

744 """ 

745 self.add_node(node1) 

746 self.add_node(node2) 

747 self.node_mapping[node1].add(node2) 

748 self.weight[(node1, node2)] = weight 

749 

750 def edges(self): 

751 """ Returns the set of all the edges of the graph. 

752 

753 :return: A set of tuples (node, neighbour, weight) representing (weighted) edges. 

754 :rtype: set of tuples 

755 """ 

756 return {(n1, n2, self.weight[(n1, n2)]) for n1 in self.node_mapping for n2 in self.node_mapping[n1]} 

757 

758 def nodes(self): 

759 """ Returns the set of all the nodes of the graph. 

760 

761 :return: The set of all the nodes of the graph. 

762 :rtype: set 

763 """ 

764 return self.node_mapping.keys() 

765 

766 def __str__(self): 

767 """ Returns the string used when printing the graph """ 

768 res = "Graph with {} vertices and {} edges :\n".format(len(self.node_mapping), len(self.edges())) 

769 for node in self.node_mapping: 

770 res += str(node) + ": " 

771 for edge in self.outgoing_edges(node): 

772 res += str(edge) + " " 

773 res = res[:-1] + "\n" 

774 return res[:-1] 

775 

776 

777class MatchingInstance(PrefLibInstance, WeightedGraph): 

778 """ This is the class representing a PrefLib instance for matching preferences. It basically contains the data and 

779 information written within a PrefLib file. 

780 

781 """ 

782 

783 def __init__(self, file_path = ""): 

784 PrefLibInstance.__init__(self) 

785 WeightedGraph.__init__(self) 

786 self.num_edges = 0 

787 self.file_path = file_path 

788 

789 # If a filePath is given as argument, we parse it immediately 

790 if len(file_path) > 0: 

791 self.parse_file(file_path) 

792 

793 def type_validator(self, data_type): 

794 return data_type == "wmd" 

795 

796 def parse(self, lines, autocorrect=False): 

797 """ Parses the strings, assuming that the latter describes a graph. 

798 

799 :param lines: A list of string, each string being one line of the instance to parse. 

800 :type lines: list 

801 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

802 in the file. Default is False. 

803 :type autocorrect: bool 

804 """ 

805 # The first few lines contain the metadata 

806 i = 0 

807 for i in range(len(lines)): 

808 line = lines[i].strip() 

809 if line.startswith('#'): 

810 if line.startswith("# NUMBER EDGES"): 

811 self.num_edges = int(line[15:].strip()) 

812 else: 

813 self.parse_metadata(line, autocorrect=autocorrect) 

814 else: 

815 break 

816 self.num_voters = self.num_alternatives 

817 

818 for line in lines[i:]: 

819 (vertex1, vertex2, weight) = line.strip().split(",") 

820 self.add_edge(int(vertex1), int(vertex2), float(weight)) 

821 self.num_edges = sum(len(edge_set) for edge_set in self.node_mapping.values()) 

822 

823 def parse_old(self, lines, autocorrect=False): 

824 """ Parses the strings, assuming that the latter describes a graph. 

825 

826 :param lines: A list of string, each string being one line of the instance to parse. 

827 :type lines: list 

828 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

829 in the file. Default is False. 

830 :type autocorrect: bool 

831 """ 

832 

833 self.num_alternatives = int(lines[0].strip().split(",")[0]) 

834 self.num_voters = int(lines[0].strip().split(",")[1]) 

835 for i in range(1, self.num_alternatives + 1): 

836 self.alternatives_name[i] = lines[i].split(",")[1].strip() 

837 

838 # Skip the lines that describe the data 

839 graph_first_line = self.num_alternatives + 1 

840 for line in lines[graph_first_line:]: 

841 (vertex1, vertex2, weight) = line.strip().split(",") 

842 weight = float(weight) 

843 vertex1 = int(vertex1) 

844 vertex2 = int(vertex2) 

845 self.add_edge(vertex1, vertex2, weight) 

846 self.num_edges = sum(len(edge_set) for edge_set in self.node_mapping.values()) 

847 

848 def write(self, filepath): 

849 """ Writes the instance into a file whose destination has been given as argument, assuming the instance 

850 represents a graph. If no file extension is provided the data type of the instance is used. 

851 

852 :param filepath: The destination where to write the instance. 

853 :type filepath: str 

854 """ 

855 

856 if len(path.splitext(filepath)[1]) == 0: 

857 filepath += "." + str(self.data_type) 

858 file = open(filepath, "w", encoding="utf-8") 

859 # Writing metadata in the file header 

860 self.write_metadata(file) 

861 file.write("# NUMBER ALTERNATIVES: {}\n# NUMBER EDGES: {}\n".format( 

862 self.num_alternatives, self.num_edges, 

863 )) 

864 for alt, name in self.alternatives_name.items(): 

865 file.write("# ALTERNATIVE NAME {}: {}\n".format(alt, name)) 

866 

867 # Writing the actual graph 

868 nodes = sorted(list(self.nodes())) 

869 for n in nodes: 

870 out_edges = sorted(list(self.outgoing_edges(n)), key=lambda x: x[1]) 

871 for (vertex1, vertex2, weight) in out_edges: 

872 file.write("{},{},{}\n".format(vertex1, vertex2, weight)) 

873 file.close() 

874 

875 

876def get_parsed_instance(file_path): 

877 """ Infers from the extension of the file given as input the correct instance to use. Parses the file and return 

878 the instance. 

879 

880 :param file_path: The path to the file to be parsed. 

881 :type file_path: str 

882 

883 :return: The instance with the file already parsed. 

884 :rtype: :class:`preflibtools.instances.preflibinstance.PrefLibInstance` 

885 """ 

886 extension = os.path.splitext(file_path)[1] 

887 if extension in ["soc", "soi", "toc", "toi"]: 

888 return OrdinalInstance(file_path) 

889 elif extension == "cat": 

890 return CategoricalInstance(file_path) 

891 elif extension == "wmd": 

892 return MatchingInstance(file_path)