Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from collections.abc import Mapping 

2 

3import numpy as np 

4import pandas as pd 

5 

6 

7def _right_squeeze(arr, stop_dim=0): 

8 """ 

9 Remove trailing singleton dimensions 

10 

11 Parameters 

12 ---------- 

13 arr : ndarray 

14 Input array 

15 stop_dim : int 

16 Dimension where checking should stop so that shape[i] is not checked 

17 for i < stop_dim 

18 

19 Returns 

20 ------- 

21 squeezed : ndarray 

22 Array with all trailing singleton dimensions (0 or 1) removed. 

23 Singleton dimensions for dimension < stop_dim are retained. 

24 """ 

25 last = arr.ndim 

26 for s in reversed(arr.shape): 

27 if s > 1: 

28 break 

29 last -= 1 

30 last = max(last, stop_dim) 

31 

32 return arr.reshape(arr.shape[:last]) 

33 

34 

35def array_like(obj, name, dtype=np.double, ndim=1, maxdim=None, 

36 shape=None, order='C', contiguous=False, optional=False): 

37 """ 

38 Convert array-like to a ndarray and check conditions 

39 

40 Parameters 

41 ---------- 

42 obj : array_like 

43 An array, any object exposing the array interface, an object whose 

44 __array__ method returns an array, or any (nested) sequence. 

45 name : str 

46 Name of the variable to use in exceptions 

47 dtype : {None, numpy.dtype, str} 

48 Required dtype. Default is double. If None, does not change the dtype 

49 of obj (if present) or uses NumPy to automatically detect the dtype 

50 ndim : {int, None} 

51 Required number of dimensions of obj. If None, no check is performed. 

52 If the numebr of dimensions of obj is less than ndim, additional axes 

53 are inserted on the right. See examples. 

54 maxdim : {int, None} 

55 Maximum allowed dimension. Use ``maxdim`` instead of ``ndim`` when 

56 inputs are allowed to have ndim 1, 2, ..., or maxdim. 

57 shape : {tuple[int], None} 

58 Required shape obj. If None, no check is performed. Partially 

59 restricted shapes can be checked using None. See examples. 

60 order : {'C', 'F'} 

61 Order of the array 

62 contiguous : bool 

63 Ensure that the array's data is contiguous with order ``order`` 

64 optional : bool 

65 Flag indicating whether None is allowed 

66 

67 Returns 

68 ------- 

69 ndarray 

70 The converted input. 

71 

72 Examples 

73 -------- 

74 Convert a list or pandas series to an array 

75 >>> import pandas as pd 

76 >>> x = [0, 1, 2, 3] 

77 >>> a = array_like(x, 'x', ndim=1) 

78 >>> a.shape 

79 (4,) 

80 

81 >>> a = array_like(pd.Series(x), 'x', ndim=1) 

82 >>> a.shape 

83 (4,) 

84 >>> type(a.orig) 

85 pandas.core.series.Series 

86 

87 Squeezes singleton dimensions when required 

88 >>> x = np.array(x).reshape((4, 1)) 

89 >>> a = array_like(x, 'x', ndim=1) 

90 >>> a.shape 

91 (4,) 

92 

93 Right-appends when required size is larger than actual 

94 >>> x = [0, 1, 2, 3] 

95 >>> a = array_like(x, 'x', ndim=2) 

96 >>> a.shape 

97 (4, 1) 

98 

99 Check only the first and last dimension of the input 

100 >>> x = np.arange(4*10*4).reshape((4, 10, 4)) 

101 >>> y = array_like(x, 'x', ndim=3, shape=(4, None, 4)) 

102 

103 Check only the first two dimensions 

104 >>> z = array_like(x, 'x', ndim=3, shape=(4, 10)) 

105 

106 Raises ValueError if constraints are not satisfied 

107 >>> z = array_like(x, 'x', ndim=2) 

108 Traceback (most recent call last): 

109 ... 

110 ValueError: x is required to have ndim 2 but has ndim 3 

111 

112 >>> z = array_like(x, 'x', shape=(10, 4, 4)) 

113 Traceback (most recent call last): 

114 ... 

115 ValueError: x is required to have shape (10, 4, 4) but has shape (4, 10, 4) 

116 

117 >>> z = array_like(x, 'x', shape=(None, 4, 4)) 

118 Traceback (most recent call last): 

119 ... 

120 ValueError: x is required to have shape (*, 4, 4) but has shape (4, 10, 4) 

121 """ 

122 if optional and obj is None: 

123 return None 

124 arr = np.asarray(obj, dtype=dtype, order=order) 

125 if maxdim is not None: 

126 if arr.ndim > maxdim: 

127 msg = '{0} must have ndim <= {1}'.format(name, maxdim) 

128 raise ValueError(msg) 

129 elif ndim is not None: 

130 if arr.ndim > ndim: 

131 arr = _right_squeeze(arr, stop_dim=ndim) 

132 elif arr.ndim < ndim: 

133 arr = np.reshape(arr, arr.shape + (1,) * (ndim - arr.ndim)) 

134 if arr.ndim != ndim: 

135 msg = '{0} is required to have ndim {1} but has ndim {2}' 

136 raise ValueError(msg.format(name, ndim, arr.ndim)) 

137 if shape is not None: 

138 for actual, req in zip(arr.shape, shape): 

139 if req is not None and actual != req: 

140 req_shape = str(shape).replace('None, ', '*, ') 

141 msg = '{0} is required to have shape {1} but has shape {2}' 

142 raise ValueError(msg.format(name, req_shape, arr.shape)) 

143 if contiguous: 

144 arr = np.ascontiguousarray(arr, dtype=dtype) 

145 return arr 

146 

147 

148class PandasWrapper(object): 

149 """ 

150 Wrap array_like using the index from the original input, if pandas 

151 

152 Parameters 

153 ---------- 

154 pandas_obj : {Series, DataFrame} 

155 Object to extract the index from for wrapping 

156 

157 Notes 

158 ----- 

159 Raises if ``orig`` is a pandas type but obj and and ``orig`` have 

160 different numbers of elements in axis 0. Also raises if the ndim of obj 

161 is larger than 2. 

162 """ 

163 

164 def __init__(self, pandas_obj): 

165 self._pandas_obj = pandas_obj 

166 self._is_pandas = isinstance(pandas_obj, (pd.Series, pd.DataFrame)) 

167 

168 def wrap(self, obj, columns=None, append=None, trim_start=0, trim_end=0): 

169 """ 

170 Parameters 

171 ---------- 

172 obj : {array_like} 

173 The value to wrap like to a pandas Series or DataFrame. 

174 columns : {str, list[str]} 

175 Column names or series name, if obj is 1d. 

176 append : str 

177 String to append to the columns to create a new column name. 

178 trim_start : int 

179 The number of observations to drop from the start of the index, so 

180 that the index applied is index[trim_start:]. 

181 trim_end : int 

182 The number of observations to drop from the end of the index , so 

183 that the index applied is index[:nobs - trim_end]. 

184 

185 Returns 

186 ------- 

187 array_like 

188 A pandas Series or DataFrame, depending on the shape of obj. 

189 """ 

190 obj = np.asarray(obj) 

191 if not self._is_pandas: 

192 return obj 

193 

194 if obj.shape[0] + trim_start + trim_end != self._pandas_obj.shape[0]: 

195 raise ValueError('obj must have the same number of elements in ' 

196 'axis 0 as orig') 

197 index = self._pandas_obj.index 

198 index = index[trim_start:index.shape[0] - trim_end] 

199 if obj.ndim == 1: 

200 if columns is None: 

201 name = getattr(self._pandas_obj, 'name', None) 

202 elif isinstance(columns, str): 

203 name = columns 

204 else: 

205 name = columns[0] 

206 if append is not None: 

207 name = append if name is None else name + '_' + append 

208 

209 return pd.Series(obj, name=name, index=index) 

210 elif obj.ndim == 2: 

211 if columns is None: 

212 columns = getattr(self._pandas_obj, 'columns', None) 

213 if append is not None: 

214 new = [] 

215 for c in columns: 

216 new.append(append if c is None else str(c) + '_' + append) 

217 columns = new 

218 return pd.DataFrame(obj, columns=columns, index=index) 

219 else: 

220 raise ValueError('Can only wrap 1 or 2-d array_like') 

221 

222 

223def bool_like(value, name, optional=False, strict=False): 

224 """ 

225 Convert to bool or raise if not bool_like 

226 

227 Parameters 

228 ---------- 

229 value : object 

230 Value to verify 

231 name : str 

232 Variable name for exceptions 

233 optional : bool 

234 Flag indicating whether None is allowed 

235 strict : bool 

236 If True, then only allow bool. If False, allow types that support 

237 casting to bool. 

238 

239 Returns 

240 ------- 

241 converted : bool 

242 value converted to a bool 

243 """ 

244 if optional and value is None: 

245 return value 

246 extra_text = ' or None' if optional else '' 

247 if strict: 

248 if isinstance(value, bool): 

249 return value 

250 else: 

251 raise TypeError('{0} must be a bool{1}'.format(name, extra_text)) 

252 

253 if hasattr(value, 'squeeze') and callable(value.squeeze): 

254 value = value.squeeze() 

255 try: 

256 return bool(value) 

257 except Exception: 

258 raise TypeError('{0} must be a bool (or bool-compatible)' 

259 '{1}'.format(name, extra_text)) 

260 

261 

262def int_like(value, name, optional=False, strict=False): 

263 """ 

264 Convert to int or raise if not int_like 

265 

266 Parameters 

267 ---------- 

268 value : object 

269 Value to verify 

270 name : str 

271 Variable name for exceptions 

272 optional : bool 

273 Flag indicating whether None is allowed 

274 strict : bool 

275 If True, then only allow int or np.integer that are not bool. If False, 

276 allow types that support integer division by 1 and conversion to int. 

277 

278 Returns 

279 ------- 

280 converted : int 

281 value converted to a int 

282 """ 

283 if optional and value is None: 

284 return None 

285 is_bool_timedelta = isinstance(value, (bool, np.timedelta64)) 

286 

287 if hasattr(value, 'squeeze') and callable(value.squeeze): 

288 value = value.squeeze() 

289 

290 if isinstance(value, (int, np.integer)) and not is_bool_timedelta: 

291 return int(value) 

292 elif not strict and not is_bool_timedelta: 

293 try: 

294 if value == (value // 1): 

295 return int(value) 

296 except Exception: 

297 pass 

298 extra_text = ' or None' if optional else '' 

299 raise TypeError('{0} must be integer_like (int or np.integer, but not bool' 

300 ' or timedelta64){1}'.format(name, extra_text)) 

301 

302 

303def float_like(value, name, optional=False, strict=False): 

304 """ 

305 Convert to float or raise if not float_like 

306 

307 Parameters 

308 ---------- 

309 value : object 

310 Value to verify 

311 name : str 

312 Variable name for exceptions 

313 optional : bool 

314 Flag indicating whether None is allowed 

315 strict : bool 

316 If True, then only allow int, np.integer, float or np.inexact that are 

317 not bool or complex. If False, allow complex types with 0 imag part or 

318 any other type that is float like in the sense that it support 

319 multiplication by 1.0 and conversion to float. 

320 

321 Returns 

322 ------- 

323 converted : float 

324 value converted to a float 

325 """ 

326 if optional and value is None: 

327 return None 

328 is_bool = isinstance(value, bool) 

329 is_complex = isinstance(value, (complex, np.complexfloating)) 

330 if hasattr(value, 'squeeze') and callable(value.squeeze): 

331 value = value.squeeze() 

332 

333 if (isinstance(value, (int, np.integer, float, np.inexact)) and 

334 not (is_bool or is_complex)): 

335 return float(value) 

336 elif not strict and is_complex: 

337 imag = np.imag(value) 

338 if imag == 0: 

339 return float(np.real(value)) 

340 elif not strict and not is_bool: 

341 try: 

342 return float(value / 1.0) 

343 except Exception: 

344 pass 

345 extra_text = ' or None' if optional else '' 

346 raise TypeError('{0} must be float_like (float or np.inexact)' 

347 '{1}'.format(name, extra_text)) 

348 

349 

350def string_like(value, name, optional=False, options=None, lower=True): 

351 if value is None: 

352 return None 

353 if not isinstance(value, str): 

354 extra_text = ' or None' if optional else '' 

355 raise TypeError('{0} must be a string{1}'.format(name, extra_text)) 

356 if lower: 

357 value = value.lower() 

358 if options is not None and value not in options: 

359 extra_text = 'If not None, ' if optional else '' 

360 options_text = "'" + '\', \''.join(options) + "'" 

361 msg = '{0}{1} must be one of: {2}'.format(extra_text, 

362 name, options_text) 

363 raise ValueError(msg) 

364 return value 

365 

366 

367def dict_like(value, name, optional=False, strict=True): 

368 """ 

369 Check if dict_like (dict, Mapping) or raise if not 

370 

371 Parameters 

372 ---------- 

373 value : object 

374 Value to verify 

375 name : str 

376 Variable name for exceptions 

377 optional : bool 

378 Flag indicating whether None is allowed 

379 strict : bool 

380 If True, then only allow dict. If False, allow any Mapping-like object. 

381 

382 Returns 

383 ------- 

384 converted : dict_like 

385 value 

386 """ 

387 if optional and value is None: 

388 return None 

389 if (not isinstance(value, Mapping) or 

390 (strict and not(isinstance(value, dict)))): 

391 extra_text = 'If not None, ' if optional else '' 

392 strict_text = ' or dict_like (i.e., a Mapping)' if strict else '' 

393 msg = '{0}{1} must be a dict{2}'.format(extra_text, name, strict_text) 

394 raise TypeError(msg) 

395 return value