Coverage for person/person.py : 99%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# person.py
2import gender_guesser.detector as sex
3from dataclasses import dataclass, field
4from typing import List
7# https://gist.github.com/jhazelwo/86124774833c6ab8f973323cb9c7e251
8# class QuietError(Exception):
9# pass
10#
11#
12# def quiet_hook(kind, message, traceback):
13# if QuietError in kind.__bases__:
14# print(f'{kind.__name__} : {message}')
15# else:
16# sys.__excepthook__(kind, message, traceback)
17#
18#
19# sys.excepthook = quiet_hook
22class NotInRange(Exception):
23 pass
26class TooManyFirstNames(Exception):
27 def __init__(self, message):
28 print(message)
31class AttrDisplay:
32 '''
33 Mark Lutz, Programming Python
34 Provides an inheritable display overload method that shows instances
35 with their class names and a name=value pair for each attribute stored
36 on the instance itself (but not attrs inherited from its classes). Can
37 be mixed into any class, and will work on any instance.
38 '''
39 def gatherAttrs(self) -> list:
40 attrs = []
41 for key in sorted(self.__dict__):
42 if self.__dict__[key] and self.__dict__[key] not in ['unknown',
43 'ew', None]:
44 attrs.append(f'{key}={getattr(self, key)}')
45 return attrs
47 def __str__(self) -> str:
48 comp_repr = (f'{self.__class__.__name__}:\n' +
49 '\n'.join(str(attr) for attr in self.gatherAttrs()) + '\n')
50 return comp_repr
53@dataclass
54class _Name_default:
55 middle_name_1: str = field(default=None)
56 middle_name_2: str = field(default=None)
57 maiden_name: str = field(default=None)
58 divorcée: str = field(default=None)
61@dataclass
62class _Name_base:
63 first_name: str
64 last_name: str
67@dataclass
68class Name(_Name_default, _Name_base, AttrDisplay):
69 def __post_init__(self):
70 first_names = self.first_name.split(' ')
71 self.first_name = first_names[0]
72 if len(first_names) == 2:
73 self.middle_name_1 = first_names[1]
74 if len(first_names) == 3:
75 self.middle_name_1 = first_names[1]
76 self.middle_name_2 = first_names[-1]
77 if len(first_names) > 3:
78 print(first_names)
79 raise TooManyFirstNames("There are more than three first names!")
82@dataclass
83class _Peertitle_default:
84 peer_title: str = field(default=None)
85 peer_preposition: str = field(default=None)
87 PEER_PREPOSITIONS = ['von', 'van', 'de', 'zu', 'dos', 'auf', 'der', 'und',
88 'vom', 'den']
89 PEERTITLES = ['Freifrau', 'Freiherr', 'Graf', 'Gräfin', 'Herzogin',
90 'Herzog', 'Baronin', 'Baron', 'Erzherzog', 'Erzherzogin',
91 'Großherzog', 'Großherzogin', 'Kurfürst', 'Kurfürstin',
92 'Landgraf', 'Landgräfin', 'Pfalzgraf', 'Pfalzgräfin',
93 'Fürst', 'Fürstin', 'Markgraf', 'Markgräfin', 'Ritter',
94 'Edler', 'Junker', 'Landmann']
96 def title(self) -> str:
97 if self.peer_title is not None:
98 titles = self.peer_title.split(' ')
99 peer_title = ''
100 peer_preposition = ''
101 for prep in titles:
102 if prep.lower() in self.PEER_PREPOSITIONS:
103 peer_preposition = peer_preposition + prep.lower() + ' '
104 elif prep in self.PEERTITLES:
105 peer_title = peer_title + prep + ' '
106 self.peer_preposition = peer_preposition.strip()
107 self.peer_title = peer_title.strip()
110@dataclass
111class Noble(_Peertitle_default, Name, AttrDisplay):
112 def __post_init__(self):
113 Name.__post_init__(self)
114 self.title()
117@dataclass
118class _Academic_title_default:
119 academic_title: str = field(default=None)
121 def degree_title(self) -> str:
122 if self.academic_title is not None:
123 if '.D' in self.academic_title:
124 self.academic_title =\
125 '. '.join(c for c in self.academic_title.split('.'))
126 if '.A' in self.academic_title:
127 self.academic_title =\
128 '. '.join(c for c in self.academic_title.split('.'))
129 if self.academic_title.endswith('Dr'):
130 self.academic_title = self.academic_title[:-2] + 'Dr.'
131 while ' ' in self.academic_title:
132 self.academic_title = self.academic_title.replace(' ', ' ')
133 self.academic_title = self.academic_title.strip()
136@dataclass
137class Academic(_Academic_title_default, Name, AttrDisplay):
138 def __post_init__(self):
139 Name.__post_init__(self)
140 self.degree_title()
143@dataclass
144class _Person_default:
145 gender: str = field(default='unknown')
146 born: str = field(default='unknown')
147 age: str = field(default='unknown')
148 deceased: str = field(default='unknown')
150 def get_sex(self) -> str:
151 if '-' in self.first_name:
152 first_name = self.first_name.split('-')[0]
153 else:
154 first_name = self.first_name
155 d = sex.Detector()
156 gender = d.get_gender(f'{first_name}')
157 if 'female' in gender:
158 self.gender = 'female'
159 elif 'male' in gender:
160 self.gender = 'male'
162 def get_age(self) -> str:
163 from datetime import date
164 if self.born != 'unknown':
165 if len(self.born) > 4:
166 self.deceased = self.born.strip()[5:]
167 self.born = self.born[:4]
168 else:
169 today = date.today()
170 self.age = str(int(today.year) - int(self.born.strip()))
173@dataclass
174class Person(_Peertitle_default, _Academic_title_default, _Person_default,
175 Name, AttrDisplay):
176 def __post_init__(self):
177 Name.__post_init__(self)
178 Academic.__post_init__(self)
179 self.get_sex()
180 self.get_age()
183@dataclass
184class _Politician_default:
185 electoral_ward: str = field(default='ew')
186 ward_no: int = field(default=None)
187 voter_count: int = field(default=None)
188 minister: str = field(default=None)
189 offices: List[str] = field(default_factory=lambda: [])
190 party: str = field(default=None)
191 parties: List[str] = field(default_factory=lambda: [])
193 def renamed_wards(self):
194 renamed_wards = ["Kreis Aachen I", "Hochsauerlandkreis II – Soest III",
195 "Kreis Aachen II"]
196 wards = {"Kreis Aachen I": "Aachen III",
197 "Hochsauerlandkreis II – Soest III": "Hochsauerlandkreis II",
198 "Kreis Aachen II": "Aachen IV" if self.last_name in
199 ["Wirtz", "Weidenhaupt"] else "Kreis Aachen I"}
200 if self.electoral_ward in renamed_wards:
201 self.electoral_ward = wards[self.electoral_ward]
203 def scrape_wiki_for_ward(self):
204 from bs4 import BeautifulSoup
205 import requests
207 URL_base = 'https://de.wikipedia.org/wiki/Landtagswahlkreis_{}'
208 URL = URL_base.format(self.electoral_ward)
209 req = requests.get(URL)
210 bsObj = BeautifulSoup(req.text, 'lxml')
211 table = bsObj.find(class_='infobox float-right toptextcells')
212 for td in table.find_all('td'):
213 if 'Wahlkreisnummer' in td.text:
214 ward_no = td.find_next().text.strip()
215 ward_no = ward_no.split(' ')[0]
216 self.ward_no = int(ward_no)
217 elif 'Wahlberechtigte' in td.text:
218 voter_count = td.find_next().text.strip()
219 if voter_count[-1] == ']':
220 voter_count = voter_count[:-3]
221 if ' ' in voter_count:
222 voter_count = ''.join(voter_count.split(' '))
223 elif '.' in voter_count: 223 ↛ 225line 223 didn't jump to line 225, because the condition on line 223 was never false
224 voter_count = ''.join(voter_count.split('.'))
225 self.voter_count = int(voter_count)
228@dataclass
229class Politician(_Peertitle_default, _Academic_title_default, _Person_default,
230 _Politician_default, Name, AttrDisplay):
232 def __post_init__(self):
233 Name.__post_init__(self)
234 Academic.__post_init__(self)
235 Noble.__post_init__(self)
236 _Person_default.get_sex(self)
237 _Person_default.get_age(self)
238 if self.electoral_ward not in ['ew', 'Landesliste']:
239 self.renamed_wards()
240 self.scrape_wiki_for_ward()
241 else:
242 self.electoral_ward = "ew"
243 if self.party and self.party not in self.parties:
244 self.parties.append(self.party)
245 if self.minister and self.minister not in self.offices:
246 self.offices.append(self.minister)
249@dataclass
250class _MdL_default:
251 parl_pres: bool = field(default=False)
252 parl_vicePres: bool = field(default=False)
255@dataclass
256class _MdL_base:
257 legislature: int
260@dataclass
261class MdL(_MdL_default, Politician, _MdL_base, AttrDisplay):
262 def __post_init__(self):
263 if int(self.legislature) not in range(10, 21):
264 raise NotInRange('Number for legislature not in range')
265 Politician.__post_init__(self)