Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# person.py 

2import gender_guesser.detector as sex 

3from dataclasses import dataclass, field 

4from typing import List 

5 

6 

7# https://gist.github.com/jhazelwo/86124774833c6ab8f973323cb9c7e251 

8# class QuietError(Exception): 

9# pass 

10# 

11# 

12# def quiet_hook(kind, message, traceback): 

13# if QuietError in kind.__bases__: 

14# print(f'{kind.__name__} : {message}') 

15# else: 

16# sys.__excepthook__(kind, message, traceback) 

17# 

18# 

19# sys.excepthook = quiet_hook 

20 

21 

22class NotInRange(Exception): 

23 pass 

24 

25 

26class TooManyFirstNames(Exception): 

27 def __init__(self, message): 

28 print(message) 

29 

30 

31class AttrDisplay: 

32 ''' 

33 Mark Lutz, Programming Python 

34 Provides an inheritable display overload method that shows instances 

35 with their class names and a name=value pair for each attribute stored 

36 on the instance itself (but not attrs inherited from its classes). Can 

37 be mixed into any class, and will work on any instance. 

38 ''' 

39 def gatherAttrs(self) -> list: 

40 attrs = [] 

41 for key in sorted(self.__dict__): 

42 if self.__dict__[key] and self.__dict__[key] not in ['unknown', 

43 'ew', None]: 

44 attrs.append(f'{key}={getattr(self, key)}') 

45 return attrs 

46 

47 def __str__(self) -> str: 

48 comp_repr = (f'{self.__class__.__name__}:\n' + 

49 '\n'.join(str(attr) for attr in self.gatherAttrs()) + '\n') 

50 return comp_repr 

51 

52 

53@dataclass 

54class _Name_default: 

55 middle_name_1: str = field(default=None) 

56 middle_name_2: str = field(default=None) 

57 maiden_name: str = field(default=None) 

58 divorcée: str = field(default=None) 

59 

60 

61@dataclass 

62class _Name_base: 

63 first_name: str 

64 last_name: str 

65 

66 

67@dataclass 

68class Name(_Name_default, _Name_base, AttrDisplay): 

69 def __post_init__(self): 

70 first_names = self.first_name.split(' ') 

71 self.first_name = first_names[0] 

72 if len(first_names) == 2: 

73 self.middle_name_1 = first_names[1] 

74 if len(first_names) == 3: 

75 self.middle_name_1 = first_names[1] 

76 self.middle_name_2 = first_names[-1] 

77 if len(first_names) > 3: 

78 print(first_names) 

79 raise TooManyFirstNames("There are more than three first names!") 

80 

81 

82@dataclass 

83class _Peertitle_default: 

84 peer_title: str = field(default=None) 

85 peer_preposition: str = field(default=None) 

86 

87 PEER_PREPOSITIONS = ['von', 'van', 'de', 'zu', 'dos', 'auf', 'der', 'und', 

88 'vom', 'den'] 

89 PEERTITLES = ['Freifrau', 'Freiherr', 'Graf', 'Gräfin', 'Herzogin', 

90 'Herzog', 'Baronin', 'Baron', 'Erzherzog', 'Erzherzogin', 

91 'Großherzog', 'Großherzogin', 'Kurfürst', 'Kurfürstin', 

92 'Landgraf', 'Landgräfin', 'Pfalzgraf', 'Pfalzgräfin', 

93 'Fürst', 'Fürstin', 'Markgraf', 'Markgräfin', 'Ritter', 

94 'Edler', 'Junker', 'Landmann'] 

95 

96 def title(self) -> str: 

97 if self.peer_title is not None: 

98 titles = self.peer_title.split(' ') 

99 peer_title = '' 

100 peer_preposition = '' 

101 for prep in titles: 

102 if prep.lower() in self.PEER_PREPOSITIONS: 

103 peer_preposition = peer_preposition + prep.lower() + ' ' 

104 elif prep in self.PEERTITLES: 

105 peer_title = peer_title + prep + ' ' 

106 self.peer_preposition = peer_preposition.strip() 

107 self.peer_title = peer_title.strip() 

108 

109 

110@dataclass 

111class Noble(_Peertitle_default, Name, AttrDisplay): 

112 def __post_init__(self): 

113 Name.__post_init__(self) 

114 self.title() 

115 

116 

117@dataclass 

118class _Academic_title_default: 

119 academic_title: str = field(default=None) 

120 

121 def degree_title(self) -> str: 

122 if self.academic_title is not None: 

123 if '.D' in self.academic_title: 

124 self.academic_title =\ 

125 '. '.join(c for c in self.academic_title.split('.')) 

126 if '.A' in self.academic_title: 

127 self.academic_title =\ 

128 '. '.join(c for c in self.academic_title.split('.')) 

129 if self.academic_title.endswith('Dr'): 

130 self.academic_title = self.academic_title[:-2] + 'Dr.' 

131 while ' ' in self.academic_title: 

132 self.academic_title = self.academic_title.replace(' ', ' ') 

133 self.academic_title = self.academic_title.strip() 

134 

135 

136@dataclass 

137class Academic(_Academic_title_default, Name, AttrDisplay): 

138 def __post_init__(self): 

139 Name.__post_init__(self) 

140 self.degree_title() 

141 

142 

143@dataclass 

144class _Person_default: 

145 gender: str = field(default='unknown') 

146 born: str = field(default='unknown') 

147 age: str = field(default='unknown') 

148 deceased: str = field(default='unknown') 

149 

150 def get_sex(self) -> str: 

151 if '-' in self.first_name: 

152 first_name = self.first_name.split('-')[0] 

153 else: 

154 first_name = self.first_name 

155 d = sex.Detector() 

156 gender = d.get_gender(f'{first_name}') 

157 if 'female' in gender: 

158 self.gender = 'female' 

159 elif 'male' in gender: 

160 self.gender = 'male' 

161 

162 def get_age(self) -> str: 

163 from datetime import date 

164 if self.born != 'unknown': 

165 if len(self.born) > 4: 

166 self.deceased = self.born.strip()[5:] 

167 self.born = self.born[:4] 

168 else: 

169 today = date.today() 

170 self.age = str(int(today.year) - int(self.born.strip())) 

171 

172 

173@dataclass 

174class Person(_Peertitle_default, _Academic_title_default, _Person_default, 

175 Name, AttrDisplay): 

176 def __post_init__(self): 

177 Name.__post_init__(self) 

178 Academic.__post_init__(self) 

179 self.get_sex() 

180 self.get_age() 

181 

182 

183@dataclass 

184class _Politician_default: 

185 electoral_ward: str = field(default='ew') 

186 ward_no: int = field(default=None) 

187 voter_count: int = field(default=None) 

188 minister: str = field(default=None) 

189 offices: List[str] = field(default_factory=lambda: []) 

190 party: str = field(default=None) 

191 parties: List[str] = field(default_factory=lambda: []) 

192 

193 def renamed_wards(self): 

194 renamed_wards = ["Kreis Aachen I", "Hochsauerlandkreis II – Soest III", 

195 "Kreis Aachen II"] 

196 wards = {"Kreis Aachen I": "Aachen III", 

197 "Hochsauerlandkreis II – Soest III": "Hochsauerlandkreis II", 

198 "Kreis Aachen II": "Aachen IV" if self.last_name in 

199 ["Wirtz", "Weidenhaupt"] else "Kreis Aachen I"} 

200 if self.electoral_ward in renamed_wards: 

201 self.electoral_ward = wards[self.electoral_ward] 

202 

203 def scrape_wiki_for_ward(self): 

204 from bs4 import BeautifulSoup 

205 import requests 

206 

207 URL_base = 'https://de.wikipedia.org/wiki/Landtagswahlkreis_{}' 

208 URL = URL_base.format(self.electoral_ward) 

209 req = requests.get(URL) 

210 bsObj = BeautifulSoup(req.text, 'lxml') 

211 table = bsObj.find(class_='infobox float-right toptextcells') 

212 for td in table.find_all('td'): 

213 if 'Wahlkreisnummer' in td.text: 

214 ward_no = td.find_next().text.strip() 

215 ward_no = ward_no.split(' ')[0] 

216 self.ward_no = int(ward_no) 

217 elif 'Wahlberechtigte' in td.text: 

218 voter_count = td.find_next().text.strip() 

219 if voter_count[-1] == ']': 

220 voter_count = voter_count[:-3] 

221 if ' ' in voter_count: 

222 voter_count = ''.join(voter_count.split(' ')) 

223 elif '.' in voter_count: 223 ↛ 225line 223 didn't jump to line 225, because the condition on line 223 was never false

224 voter_count = ''.join(voter_count.split('.')) 

225 self.voter_count = int(voter_count) 

226 

227 

228@dataclass 

229class Politician(_Peertitle_default, _Academic_title_default, _Person_default, 

230 _Politician_default, Name, AttrDisplay): 

231 

232 def __post_init__(self): 

233 Name.__post_init__(self) 

234 Academic.__post_init__(self) 

235 Noble.__post_init__(self) 

236 _Person_default.get_sex(self) 

237 _Person_default.get_age(self) 

238 if self.electoral_ward not in ['ew', 'Landesliste']: 

239 self.renamed_wards() 

240 self.scrape_wiki_for_ward() 

241 else: 

242 self.electoral_ward = "ew" 

243 if self.party and self.party not in self.parties: 

244 self.parties.append(self.party) 

245 if self.minister and self.minister not in self.offices: 

246 self.offices.append(self.minister) 

247 

248 

249@dataclass 

250class _MdL_default: 

251 parl_pres: bool = field(default=False) 

252 parl_vicePres: bool = field(default=False) 

253 

254 

255@dataclass 

256class _MdL_base: 

257 legislature: int 

258 

259 

260@dataclass 

261class MdL(_MdL_default, Politician, _MdL_base, AttrDisplay): 

262 def __post_init__(self): 

263 if int(self.legislature) not in range(10, 21): 

264 raise NotInRange('Number for legislature not in range') 

265 Politician.__post_init__(self)