Coverage for C:\Users\reysi\PycharmProjects\preflibtools\preflibtools\instances\preflibinstance.py: 69%
446 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-20 19:24 +0100
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-20 19:24 +0100
1""" This module describes the main class to deal with PrefLib instances..
2"""
3import os.path
5from .sampling import *
7from copy import deepcopy
8from os import path
10import urllib
11import re
14class PrefLibInstance(object):
15 """ This class provide a general template to implement specific classes representing PrefLib instances. It should
16 mainly be used as an abstract class.
18 :ivar file_path: The path to the file the instance is taken from.
19 :ivar file_name: The name of the file the instance is taken from.
20 :ivar data_type: The data type of the instance. Whenever a function only applies to certain types of data
21 (strict and complete orders for instance), we do so by checking this value.
22 :ivar modification_type: The modification type of the file: original if it represents original data; induced
23 or imbued it is derived from some other data; synthetic if it is synthetically generated.
24 :ivar relates_to: The data file this instance relates to, typically the original data file for induced
25 preferences.
26 :ivar related_files: The data files that are to the instance.
27 :ivar title: The title of the instance.
28 :ivar description: A description of the instance.
29 :ivar publication_date: Date at which the corresponding file has been added to PrefLib.com.
30 :ivar modification_date: Last date the file has been modified on PrefLib.com.
31 :ivar num_alternatives: The number of alternatives in the instance.
32 :ivar alternatives_name: A dictionary mapping alternative (int) to their name (str).
33 :ivar num_voters: The number of voters in the instance.
34 """
36 def __init__(self):
37 self.file_path = ""
38 self.file_name = ""
39 self.data_type = ""
40 self.modification_type = ""
41 self.relates_to = ""
42 self.related_files = ""
43 self.title = ""
44 self.description = ""
45 self.publication_date = ""
46 self.modification_date = ""
47 self.num_alternatives = 0
48 self.alternatives_name = {}
49 self.num_voters = 0
50 self.alt_name_pattern = re.compile(r'# ALTERNATIVE NAME (\d+): (.*)')
52 def type_validator(self, data_type):
53 pass
55 def parse(self, lines, autocorrect=False):
56 pass
58 def parse_lines(self, lines, autocorrect=False):
59 """ Parses the lines provided as argument. The parser to be used is deducted from the instance's inner value of
60 data_type.
62 :param lines: A list of string, each string being one line of the instance to parse.
63 :type lines: list
64 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
65 in the file. Default is False.
66 :type autocorrect: bool
67 """
69 if self.type_validator(self.data_type):
70 self.parse(lines, autocorrect=autocorrect)
71 else:
72 raise TypeError("File extension " + str(self.data_type) + " is not valid for an ordinal PrefLib instance." +
73 " This file cannot be parsed.")
75 def parse_file(self, filepath, autocorrect=False):
76 """ Parses the file whose path is provided as argument and populates the PreflibInstance object accordingly.
77 The parser to be used is deduced from the file extension.
79 :param filepath: The path to the file to be parsed.
80 :type filepath: str
81 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
82 in the file. Default is False.
83 :type autocorrect: bool
84 """
86 # Populating basic properties of the instance
87 self.file_path = filepath
88 self.file_name = path.split(filepath)[1]
89 self.data_type = path.splitext(filepath)[1][1:]
91 # Read the file
92 file = open(filepath, "r", encoding="utf-8")
93 lines = file.readlines()
94 file.close()
96 self.parse_lines(lines, autocorrect=autocorrect)
98 def parse_str(self, string, data_type, file_name="", autocorrect=False):
99 """ Parses the string provided as argument and populates the PreflibInstance object accordingly.
100 The parser to be used is deduced from the file extension passed as argument.
102 :param string: The string to parse.
103 :type string: str
104 :param data_type: The data type represented by the string.
105 :type data_type: str
106 :param file_name: The value to store in the file_name member of the instance. Default is the empty string.
107 :type file_name: str
108 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
109 in the file. Default is False.
110 :type autocorrect: bool
111 """
113 self.file_path = "parsed_from_string"
114 self.file_name = file_name
115 self.data_type = data_type
117 self.parse_lines(string.splitlines(), autocorrect=autocorrect)
119 def parse_url(self, url, autocorrect=False):
120 """ Parses the file located at the provided URL and populates the PreflibInstance object accordingly.
121 The parser to be used (whether the file describes a graph or an order for instance) is deduced based
122 on the file extension.
124 :param url: The target URL.
125 :type url: str
126 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
127 in the file. Default is False.
128 :type autocorrect: bool
129 """
131 data = urllib.request.urlopen(url)
132 lines = [line.decode("utf-8").strip() for line in data]
133 data.close()
135 self.file_path = url
136 self.file_name = url.split('/')[-1].split('.')[0]
137 self.data_type = url.split('.')[-1]
139 self.parse_lines(lines, autocorrect=autocorrect)
141 def parse_metadata(self, line, autocorrect=False):
142 if line.startswith("# FILE NAME"):
143 self.file_name = line[12:].strip()
144 elif line.startswith("# TITLE"):
145 self.title = line[8:].strip()
146 elif line.startswith("# DESCRIPTION"):
147 self.description = line[14:].strip()
148 elif line.startswith("# DATA TYPE"):
149 self.data_type = line[12:].strip()
150 elif line.startswith("# MODIFICATION TYPE"):
151 self.modification_type = line[20:].strip()
152 elif line.startswith("# RELATES TO"):
153 self.relates_to = line[13:].strip()
154 elif line.startswith("# RELATED FILES"):
155 self.related_files = line[16:].strip()
156 elif line.startswith("# PUBLICATION DATE"):
157 self.publication_date = line[19:].strip()
158 elif line.startswith("# MODIFICATION DATE"):
159 self.modification_date = line[20:].strip()
160 elif line.startswith("# NUMBER ALTERNATIVES"):
161 self.num_alternatives = int(line[22:].strip())
162 elif line.startswith("# NUMBER VOTERS"):
163 self.num_voters = int(line[16:].strip())
164 elif line.startswith("# ALTERNATIVE NAME"):
165 match = re.match(self.alt_name_pattern, line)
166 if match:
167 alt = int(match.group(1))
168 alt_name = match.group(2)
169 if autocorrect and alt_name in self.alternatives_name.values():
170 tmp = 1
171 while alt_name + "__" + str(tmp) in self.alternatives_name.values():
172 tmp += 1
173 self.alternatives_name[alt] = alt_name + "__" + str(tmp)
174 else:
175 self.alternatives_name[alt] = alt_name
177 def write(self, filepath):
178 pass
180 def write_metadata(self, file):
181 file.write("# FILE NAME: {}\n# TITLE: {}\n# DESCRIPTION: {}\n# DATA TYPE: {}\n# MODIFICATION TYPE: {}\n".format(
182 self.file_name, self.title, self.description, self.data_type, self.modification_type))
183 file.write("# RELATES TO: {}\n# RELATED FILES: {}\n# PUBLICATION DATE: {}\n# MODIFICATION DATE: {}\n".format(
184 self.relates_to, self.related_files, self.publication_date, self.modification_date))
186 def __str__(self):
187 return "PrefLib-Instance: {} <{},{}>".format(self.file_name, self.num_voters, self.num_alternatives)
190class OrdinalInstance(PrefLibInstance):
191 """ This is the class representing a PrefLib instance of ordinal preferences. It basically contains the data and
192 information written within a PrefLib file.
194 :param file_path: The path to the file the instance is taken from. If a path is provided as a parameter,
195 the file is immediately parsed and the instance populated accordingly.
196 :type file_path: str, optional
198 :ivar num_unique_orders: The number of unique orders in the instance.
199 :ivar multiplicity: A dictionary mapping each order to the number of voters who submitted that order.
200 :ivar orders: The list of all the distinct orders in the instance.
201 """
203 def __init__(self, file_path=""):
204 PrefLibInstance.__init__(self)
205 self.file_path = file_path
206 self.num_unique_orders = 0
207 self.multiplicity = {}
208 self.orders = []
210 # If a filePath is given as argument, we parse it immediately
211 if len(file_path) > 0:
212 self.parse_file(file_path)
214 def type_validator(self, data_type):
215 return data_type in ['soc', 'soi', 'toc', 'toi']
217 def parse(self, lines, autocorrect=False):
218 """ Parses the strings provided as argument, assuming that the latter describes an order.
220 :param lines: A list of string, each string being one line of the instance to parse.
221 :type lines: list
222 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
223 in the file. Default is False.
224 :type autocorrect: bool
225 """
227 # The first few lines contain the metadata
228 i = 0
229 for i in range(len(lines)):
230 line = lines[i].strip()
231 if line.startswith('#'):
232 if line.startswith("# NUMBER UNIQUE ORDERS"):
233 self.num_unique_orders = int(line[23:].strip())
234 else:
235 self.parse_metadata(line, autocorrect=autocorrect)
236 else:
237 break
239 # The rest of the lines are about the preferences
240 order_pattern = re.compile(r'{[\d,]+?}|[\d,]+')
241 for line in lines[i:]:
242 # The first element indicates the multiplicity of the order
243 multiplicity, order_str = line.strip().split(":")
244 multiplicity = int(multiplicity)
245 order = []
246 for group in re.findall(order_pattern, order_str):
247 if group.startswith('{'):
248 group = group[1:-1]
249 order.append(tuple(int(alt.strip()) for alt in group.split(',') if len(alt) > 0))
250 else:
251 for alt in group.split(','):
252 if len(alt) > 0:
253 order.append((int(alt.strip()),))
254 order = tuple(order)
255 self.orders.append(order)
256 if autocorrect and order in self.multiplicity:
257 self.multiplicity[tuple(order)] += multiplicity
258 else:
259 self.multiplicity[tuple(order)] = multiplicity
261 if autocorrect:
262 self.orders = list(set(self.orders))
263 self.num_alternatives = len(self.alternatives_name)
264 self.num_unique_orders = len(self.orders)
265 self.num_voters = sum(self.multiplicity.values())
267 def parse_old(self, lines, autocorrect=False):
268 """ Parses the strings provided as argument, assuming that the latter describes an order, in the old PrefLib
269 format.
271 :param lines: A list of string, each string being one line of the instance to parse.
272 :type lines: list
273 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
274 in the file. Default is False.
275 :type autocorrect: bool
276 """
278 # The first line gives us the number of alternatives, then comes the names of the alternatives
279 self.num_alternatives = int(lines[0])
280 for i in range(1, self.num_alternatives + 1):
281 alt_name = lines[i].split(",")[1].strip()
282 if autocorrect and alt_name in self.alternatives_name.values():
283 tmp = 1
284 while alt_name + "__" + str(tmp) in self.alternatives_name.values():
285 tmp += 1
286 self.alternatives_name[i] = alt_name + "__" + str(tmp)
287 else:
288 self.alternatives_name[i] = alt_name
290 # We've reached the description of the preferences. We start by some numbers...
291 self.num_voters = int(lines[self.num_alternatives + 1].split(",")[0])
292 self.num_unique_orders = int(lines[self.num_alternatives + 1].split(",")[2])
294 # ... and finally comes the preferences
295 for line in lines[self.num_alternatives + 2:]:
296 # The first element indicates the multiplicity of the order
297 elements = line.strip().split(",")
298 multiplicity = int(elements[0])
300 # Then we deal with the rest
301 in_braces = False
302 order = []
303 indif_class = []
304 for w in elements[1:]:
305 # If there is something in w
306 if w != "{}" and len(w) > 0:
307 # If we are entering a series of ties (grouped by {})
308 if w.startswith("{"):
309 # If w also ends with a }, we remove it
310 if w.endswith("}"):
311 w = w[:-1]
312 in_braces = True
313 indif_class.append(int(w[1:])) # The first element of w is {, so we go beyond that
314 # If we finished reading a series of ties (grouped by {})
315 elif w.endswith("}"):
316 in_braces = False
317 indif_class.append(int(w[:-1])) # The first element of w is }, so we go beyond that
318 order.append(tuple(indif_class))
319 indif_class = []
320 # Otherwise, we are just reading numbers
321 else:
322 # If we are facing ties, we add in the indifference class
323 if in_braces:
324 if int(w) not in indif_class:
325 indif_class.append(int(w))
326 # Otherwise, we add the strict preference.
327 else:
328 if (int(w),) not in order:
329 order.append((int(w),))
330 order = tuple(order)
331 self.orders.append(order)
332 if autocorrect and order in self.multiplicity:
333 self.multiplicity[tuple(order)] += multiplicity
334 else:
335 self.multiplicity[tuple(order)] = multiplicity
337 if autocorrect:
338 self.orders = list(set(self.orders))
340 def write(self, filepath):
341 """ Writes the instance into a file whose destination has been given as argument. If no file extension is
342 provided the data type of the instance is used.
344 :param filepath: The destination where to write the instance.
345 :type filepath: str
346 """
347 if len(path.splitext(filepath)[1]) == 0:
348 filepath += "." + str(self.data_type)
349 file = open(filepath, "w", encoding="utf-8")
350 # Writing metadata in the file header
351 self.write_metadata(file)
352 file.write("# NUMBER ALTERNATIVES: {}\n# NUMBER VOTERS: {}\n# NUMBER UNIQUE ORDERS: {}\n".format(
353 self.num_alternatives, self.num_voters, self.num_unique_orders
354 ))
355 for alt, name in self.alternatives_name.items():
356 file.write("# ALTERNATIVE NAME {}: {}\n".format(alt, name))
357 # Writing the actual ballots with their multiplicity
358 orders = deepcopy(self.orders)
359 orders.sort(key=lambda o: (-self.multiplicity[o], -len(o)))
360 for order in orders:
361 order_str = ""
362 for indif_class in order:
363 if len(indif_class) == 1:
364 order_str += str(indif_class[0]) + ","
365 else:
366 order_str += "{" + ",".join((str(alt) for alt in indif_class)) + "},"
367 file.write("{}: {}\n".format(self.multiplicity[order], order_str[:-1]))
368 file.close()
370 def vote_map(self):
371 """ Returns the instance described as a vote map, i.e., a dictionary whose keys are orders, mapping
372 to the number of voters with the given order as their preferences. This format can be useful for some
373 applications. It also ensures interoperability with the old preflibtools (vote maps were the main object).
375 :return: A vote map representing the preferences in the instance.
376 :rtype: dict of (tuples, int)
377 """
378 vote_map = {}
379 for order in self.orders:
380 vote_map[order] = self.multiplicity[order]
381 return vote_map
383 def full_profile(self):
384 """ Returns a list containing all the orders appearing in the preferences, with each order appearing as
385 many times as their multiplicity.
387 :return: A list of preferences (lists of alternatives).
388 :rtype: list
389 """
390 res = []
391 for order in self.orders:
392 res += [order] * self.multiplicity[order]
393 return res
395 def flatten_strict(self):
396 """ Strict orders are represented as orders with indifference classes of size 1. This is somewhat heavy when
397 working with strict preferences. This function flattens strict preferences by removing the indifference
398 class.
400 :return: A list of tuples of preference order and multiplicity.
401 :rtype: list
402 """
403 res = []
404 for order in self.orders:
405 if len(order) != self.num_alternatives:
406 print("WARNING: You are flattening a non-strict order.")
407 res.append((tuple(indif_class[0] for indif_class in order), self.multiplicity[order]))
408 return res
410 def infer_type(self):
411 """ Loops through the orders of the instance to infer whether the preferences strict and/or complete,.
413 :return: The data type of the instance.
414 :rtype: str
415 """
416 strict = True
417 complete = True
418 for order in self.orders:
419 if max(len(indif_class) for indif_class in order) != 1:
420 strict = False
421 if len([alt for indif_class in order for alt in indif_class]) != self.num_alternatives:
422 complete = False
423 if not strict and not complete:
424 return "toi"
425 if strict and complete:
426 return "soc"
427 if strict and not complete:
428 return "soi"
429 if not strict and complete:
430 return "toc"
432 def recompute_cardinality_param(self):
433 """ Recomputes the basic cardinality parameters based on the order list in the instance. Numbers that are
434 recomputed are the number of voters, the sum of voter count and the number of unique orders.
435 """
436 num_voters = 0
437 for order in self.orders:
438 num_voters += self.multiplicity[order]
439 self.num_voters = num_voters
440 self.num_unique_orders = len(set(self.orders))
442 def append_order_list(self, orders):
443 """ Appends a vote map to the instance. That function incorporates the new orders into the instance and
444 updates the set of alternatives if needed.
446 :param orders: A list of tuples of tuples, each tuple representing a preference order.
447 :type orders: list
448 """
449 alternatives = set(alt for order in orders for indif_class in order for alt in indif_class)
450 for alt in alternatives:
451 if alt not in self.alternatives_name:
452 self.alternatives_name[alt] = "Alternative " + str(alt)
453 self.num_alternatives = len(self.alternatives_name)
455 self.num_voters += len(orders)
457 for order in orders:
458 multiplicity = len([o for o in orders if o == order])
459 if order in self.multiplicity:
460 self.multiplicity[order] += multiplicity
461 else:
462 self.multiplicity[order] = multiplicity
464 self.num_unique_orders = len(self.multiplicity)
465 self.orders += deepcopy(orders)
467 self.data_type = self.infer_type()
469 def append_vote_map(self, vote_map):
470 """ Appends a vote map to the instance. That function incorporates the new orders into the instance and
471 updates the set of alternatives if needed.
473 :param vote_map: A vote map representing preferences. A vote map is a dictionary whose keys represent
474 orders (tuples of tuples of int) that are mapped to the number of voters with the given order as
475 their preferences. We re-map the orders to tuple of tuples to be sure we are dealing with the correct
476 type.
477 :type vote_map: dict of (tuple, int)
478 """
479 for ballot, multiplicity in vote_map.items():
480 order = tuple(tuple(indif_class) for indif_class in ballot)
481 if order not in self.orders:
482 self.orders.append(order)
483 self.multiplicity[order] = multiplicity
484 else:
485 self.multiplicity[order] += multiplicity
486 self.num_voters += multiplicity
488 for indif_class in ballot:
489 for alt in indif_class:
490 if alt not in self.alternatives_name:
491 self.alternatives_name[alt] = "Alternative " + str(alt)
493 self.num_alternatives = len(self.alternatives_name)
494 self.num_unique_orders = len(self.multiplicity)
496 self.data_type = self.infer_type()
498 def populate_IC(self, num_voters, num_alternatives):
499 """ Populates the instance with a random profile of strict preferences taken from the impartial culture
500 distribution. Uses :math:`preflibtools.instances.sampling.urnModel` for sampling.
502 :param num_voters: Number of orders to sample.
503 :type num_voters: int
504 :param num_alternatives: Number of alternatives for the sampled orders.
505 :type num_alternatives: int
506 """
507 self.append_vote_map(generate_IC(num_voters, list(range(num_alternatives))))
509 def populate_IC_anon(self, num_voters, num_alternatives):
510 """ Populates the instance with a random profile of strict preferences taken from the impartial anonymous
511 culture distribution. Uses :class:`preflibtools.instances.sampling` for sampling.
513 :param num_voters: Number of orders to sample.
514 :type num_voters: int
515 :param num_alternatives: Number of alternatives for the sampled orders.
516 :type num_alternatives: int
517 """
518 self.append_vote_map(generate_IC_anon(num_voters, list(range(num_alternatives))))
520 def populate_urn(self, num_voters, num_alternatives, replace):
521 """ Populates the instance with a random profile of strict preferences taken from the urn distribution.
522 Uses :class:`preflibtools.instances.sampling` for sampling.
524 :param num_voters: Number of orders to sample.
525 :type num_voters: int
526 :param num_alternatives: Number of alternatives for the sampled orders.
527 :type num_alternatives: int
528 :param replace: The number of replacements for the urn model.
529 :type replace: int
530 """
531 self.append_vote_map(generate_urn(num_voters, list(range(num_alternatives)), replace))
533 def populate_mallows(self, num_voters, num_alternatives, mixture, dispersions, references):
534 """ Populates the instance with a random profile of strict preferences taken from a mixture of Mallows'
535 models. Uses :class:`preflibtools.instances.sampling` for sampling.
537 :param num_voters: Number of orders to sample.
538 :type num_voters: int
539 :param num_alternatives: Number of alternatives for the sampled orders.
540 :type num_alternatives: int
541 :param mixture: A list of the weights of each element of the mixture.
542 :type mixture: list of positive numbers
543 :param dispersions: A list of the dispersion coefficient of each element of the mixture.
544 :type dispersions: list of float
545 :param references: A list of the reference orders for each element of the mixture.
546 :type references: list of tuples of tuples of int
547 """
548 self.append_vote_map(generate_mallows(num_voters, num_alternatives, mixture, dispersions, references))
550 def populate_mallows_mix(self, num_voters, num_alternatives, num_references):
551 """ Populates the instance with a random profile of strict preferences taken from a mixture of Mallows'
552 models for which reference points and dispersion coefficients are independently and identically
553 distributed. Uses :class:`preflibtools.instances.sampling` for sampling.
555 :param num_voters: Number of orders to sample.
556 :type num_voters: int
557 :param num_alternatives: Number of alternatives for the sampled orders.
558 :type num_alternatives: int
559 :param num_references: Number of element
560 :type num_references: int
561 """
562 self.append_vote_map(generate_mallows_mix(num_voters, list(range(num_alternatives)), num_references))
565class ComparisonInstance(PrefLibInstance):
566 """ To be implemented.
568 """
570 def __init__(self):
571 PrefLibInstance.__init__(self)
574class CategoricalInstance(PrefLibInstance):
575 """ This is the class representing a PrefLib instance of categorical preferences. It basically contains the data and
576 information written within a PrefLib file.
577 """
579 def __init__(self, file_path=""):
580 PrefLibInstance.__init__(self)
581 self.file_path = file_path
582 self.num_unique_preferences = 0
583 self.multiplicity = {}
584 self.num_categories = 0
585 self.categories_name = {}
586 self.preferences = []
588 # If a filePath is given as argument, we parse it immediately
589 if len(file_path) > 0:
590 self.parse_file(file_path)
592 def type_validator(self, data_type):
593 return data_type == "cat"
595 def parse(self, lines, autocorrect=False):
596 """ Parses the strings provided as argument, assuming that the latter describes categorical preferences.
598 :param lines: A list of string, each string being one line of the instance to parse.
599 :type lines: list
600 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
601 in the file. Default is False.
602 :type autocorrect: bool
603 """
605 # The first few lines contain the metadata
606 i = 0
607 cat_name_pattern = re.compile(r'# CATEGORY NAME (\d+): (.*)')
608 for i in range(len(lines)):
609 line = lines[i].strip()
610 if line.startswith('#'):
611 if line.startswith("# NUMBER UNIQUE PREFERENCES"):
612 self.num_unique_preferences = int(line[28:].strip())
613 if line.startswith("# NUMBER CATEGORIES"):
614 self.num_categories = int(line[20:].strip())
615 elif line.startswith("# CATEGORY NAME"):
616 match = re.match(cat_name_pattern, line)
617 if match:
618 cat = int(match.group(1))
619 cat_name = match.group(2)
620 if autocorrect and cat_name in self.categories_name.values():
621 tmp = 1
622 while cat_name + "__" + str(tmp) in self.categories_name.values():
623 tmp += 1
624 self.categories_name[cat] = cat_name + "__" + str(tmp)
625 else:
626 self.categories_name[cat] = cat_name
627 else:
628 self.parse_metadata(line, autocorrect=autocorrect)
629 else:
630 break
632 # The rest of the lines are about the preferences
633 pref_pattern = re.compile(r'{[\d,]+?}|[\d,]+|{}')
634 for line in lines[i:]:
635 # The first element indicates the multiplicity of the order
636 multiplicity, pref_str = line.strip().split(":")
637 multiplicity = int(multiplicity)
638 pref = []
639 for group in re.findall(pref_pattern, pref_str):
640 if group == '{}':
641 pref.append(tuple())
642 elif group.startswith('{'):
643 group = group[1:-1]
644 pref.append(tuple(int(alt.strip()) for alt in group.split(',') if len(alt) > 0))
645 else:
646 for alt in group.split(','):
647 if len(alt) > 0:
648 pref.append((int(alt.strip()),))
649 pref = tuple(pref)
650 self.preferences.append(pref)
651 if autocorrect and pref in self.multiplicity:
652 self.multiplicity[tuple(pref)] += multiplicity
653 else:
654 self.multiplicity[tuple(pref)] = multiplicity
656 if autocorrect:
657 self.preferences = list(set(self.preferences))
658 self.num_alternatives = len(self.alternatives_name)
659 self.num_unique_preferences = len(self.preferences)
660 self.num_voters = sum(self.multiplicity.values())
662 def write(self, filepath):
663 """ Writes the instance into a file whose destination has been given as argument. If no file extension is
664 provided the data type of the instance is used.
666 :param filepath: The destination where to write the instance.
667 :type filepath: str
668 """
669 if len(path.splitext(filepath)[1]) == 0:
670 filepath += "." + str(self.data_type)
671 file = open(filepath, "w", encoding="utf-8")
672 # Writing metadata in the file header
673 self.write_metadata(file)
674 file.write("# NUMBER ALTERNATIVES: {}\n# NUMBER VOTERS: {}\n# NUMBER UNIQUE PREFERENCES: {}\n".format(
675 self.num_alternatives, self.num_voters, self.num_unique_preferences
676 ))
677 file.write("# NUMBER CATEGORIES: {}\n".format(self.num_categories))
678 for cat, name in self.categories_name.items():
679 file.write("# CATEGORY NAME {}: {}\n".format(cat, name))
680 for alt, name in self.alternatives_name.items():
681 file.write("# ALTERNATIVE NAME {}: {}\n".format(alt, name))
682 # Writing the actual ballots with their multiplicity
683 preferences = deepcopy(self.preferences)
684 preferences.sort(key=lambda o: (-self.multiplicity[o], -len(o)))
685 for pref in preferences:
686 pref_str = ""
687 for category in pref:
688 if len(category) == 0:
689 pref_str += '{},'
690 elif len(category) == 1:
691 pref_str += str(category[0]) + ","
692 else:
693 pref_str += "{" + ",".join((str(alt) for alt in category)) + "},"
694 file.write("{}: {}\n".format(self.multiplicity[pref], pref_str[:-1]))
695 file.close()
698class WeightedGraph(object):
699 """ This class is used to represent (weighted) graphs.
701 :ivar dict: The dictionary representing the graph mapping each node to its neighbourhood (set of nodes
702 to which it is connected). A node can be of any hashable type.
703 :ivar weight: The dictionary mapping every node to its weight.
704 """
706 def __init__(self):
707 self.node_mapping = dict()
708 self.weight = dict()
710 def neighbours(self, node):
711 """ Returns all the neighbours of a given node.
713 :param node: The node whose neighbours we want to know.
715 :return: The set of the neighbours of the node.
716 :rtype: set
717 """
718 return self.node_mapping[node]
720 def outgoing_edges(self, node):
721 """ Returns all the edges leaving a given node.
723 :param node: The node whose edges we want to get.
725 :return: The set of the tuples (node, neighbour, edgeWeight) representing (weighted) edges.
726 :rtype: set of tuples
727 """
728 return {(node, n, self.weight[(node, n)]) for n in self.node_mapping[node]}
730 def add_node(self, node):
731 """ Adds a node to the graph if the node does not already exist.
733 :param node: The node to add.
734 """
735 if node not in self.node_mapping:
736 self.node_mapping[node] = set()
738 def add_edge(self, node1, node2, weight):
739 """ Adds an edge to the graph. If the nodes do not exist in the graph, those are also added.
741 :param node1: The departure node of the edge.
742 :param node2: The arrival node of the edge.
743 :param weight: The weight of the edge.
744 """
745 self.add_node(node1)
746 self.add_node(node2)
747 self.node_mapping[node1].add(node2)
748 self.weight[(node1, node2)] = weight
750 def edges(self):
751 """ Returns the set of all the edges of the graph.
753 :return: A set of tuples (node, neighbour, weight) representing (weighted) edges.
754 :rtype: set of tuples
755 """
756 return {(n1, n2, self.weight[(n1, n2)]) for n1 in self.node_mapping for n2 in self.node_mapping[n1]}
758 def nodes(self):
759 """ Returns the set of all the nodes of the graph.
761 :return: The set of all the nodes of the graph.
762 :rtype: set
763 """
764 return self.node_mapping.keys()
766 def __str__(self):
767 """ Returns the string used when printing the graph """
768 res = "Graph with {} vertices and {} edges :\n".format(len(self.node_mapping), len(self.edges()))
769 for node in self.node_mapping:
770 res += str(node) + ": "
771 for edge in self.outgoing_edges(node):
772 res += str(edge) + " "
773 res = res[:-1] + "\n"
774 return res[:-1]
777class MatchingInstance(PrefLibInstance, WeightedGraph):
778 """ This is the class representing a PrefLib instance for matching preferences. It basically contains the data and
779 information written within a PrefLib file.
781 """
783 def __init__(self, file_path = ""):
784 PrefLibInstance.__init__(self)
785 WeightedGraph.__init__(self)
786 self.num_edges = 0
787 self.file_path = file_path
789 # If a filePath is given as argument, we parse it immediately
790 if len(file_path) > 0:
791 self.parse_file(file_path)
793 def type_validator(self, data_type):
794 return data_type == "wmd"
796 def parse(self, lines, autocorrect=False):
797 """ Parses the strings, assuming that the latter describes a graph.
799 :param lines: A list of string, each string being one line of the instance to parse.
800 :type lines: list
801 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
802 in the file. Default is False.
803 :type autocorrect: bool
804 """
805 # The first few lines contain the metadata
806 i = 0
807 for i in range(len(lines)):
808 line = lines[i].strip()
809 if line.startswith('#'):
810 if line.startswith("# NUMBER EDGES"):
811 self.num_edges = int(line[15:].strip())
812 else:
813 self.parse_metadata(line, autocorrect=autocorrect)
814 else:
815 break
816 self.num_voters = self.num_alternatives
818 for line in lines[i:]:
819 (vertex1, vertex2, weight) = line.strip().split(",")
820 self.add_edge(int(vertex1), int(vertex2), float(weight))
821 self.num_edges = sum(len(edge_set) for edge_set in self.node_mapping.values())
823 def parse_old(self, lines, autocorrect=False):
824 """ Parses the strings, assuming that the latter describes a graph.
826 :param lines: A list of string, each string being one line of the instance to parse.
827 :type lines: list
828 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
829 in the file. Default is False.
830 :type autocorrect: bool
831 """
833 self.num_alternatives = int(lines[0].strip().split(",")[0])
834 self.num_voters = int(lines[0].strip().split(",")[1])
835 for i in range(1, self.num_alternatives + 1):
836 self.alternatives_name[i] = lines[i].split(",")[1].strip()
838 # Skip the lines that describe the data
839 graph_first_line = self.num_alternatives + 1
840 for line in lines[graph_first_line:]:
841 (vertex1, vertex2, weight) = line.strip().split(",")
842 weight = float(weight)
843 vertex1 = int(vertex1)
844 vertex2 = int(vertex2)
845 self.add_edge(vertex1, vertex2, weight)
846 self.num_edges = sum(len(edge_set) for edge_set in self.node_mapping.values())
848 def write(self, filepath):
849 """ Writes the instance into a file whose destination has been given as argument, assuming the instance
850 represents a graph. If no file extension is provided the data type of the instance is used.
852 :param filepath: The destination where to write the instance.
853 :type filepath: str
854 """
856 if len(path.splitext(filepath)[1]) == 0:
857 filepath += "." + str(self.data_type)
858 file = open(filepath, "w", encoding="utf-8")
859 # Writing metadata in the file header
860 self.write_metadata(file)
861 file.write("# NUMBER ALTERNATIVES: {}\n# NUMBER EDGES: {}\n".format(
862 self.num_alternatives, self.num_edges,
863 ))
864 for alt, name in self.alternatives_name.items():
865 file.write("# ALTERNATIVE NAME {}: {}\n".format(alt, name))
867 # Writing the actual graph
868 nodes = sorted(list(self.nodes()))
869 for n in nodes:
870 out_edges = sorted(list(self.outgoing_edges(n)), key=lambda x: x[1])
871 for (vertex1, vertex2, weight) in out_edges:
872 file.write("{},{},{}\n".format(vertex1, vertex2, weight))
873 file.close()
876def get_parsed_instance(file_path):
877 """ Infers from the extension of the file given as input the correct instance to use. Parses the file and return
878 the instance.
880 :param file_path: The path to the file to be parsed.
881 :type file_path: str
883 :return: The instance with the file already parsed.
884 :rtype: :class:`preflibtools.instances.preflibinstance.PrefLibInstance`
885 """
886 extension = os.path.splitext(file_path)[1]
887 if extension in ["soc", "soi", "toc", "toi"]:
888 return OrdinalInstance(file_path)
889 elif extension == "cat":
890 return CategoricalInstance(file_path)
891 elif extension == "wmd":
892 return MatchingInstance(file_path)