Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/tools/validation/validation.py : 9%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from collections.abc import Mapping
3import numpy as np
4import pandas as pd
7def _right_squeeze(arr, stop_dim=0):
8 """
9 Remove trailing singleton dimensions
11 Parameters
12 ----------
13 arr : ndarray
14 Input array
15 stop_dim : int
16 Dimension where checking should stop so that shape[i] is not checked
17 for i < stop_dim
19 Returns
20 -------
21 squeezed : ndarray
22 Array with all trailing singleton dimensions (0 or 1) removed.
23 Singleton dimensions for dimension < stop_dim are retained.
24 """
25 last = arr.ndim
26 for s in reversed(arr.shape):
27 if s > 1:
28 break
29 last -= 1
30 last = max(last, stop_dim)
32 return arr.reshape(arr.shape[:last])
35def array_like(obj, name, dtype=np.double, ndim=1, maxdim=None,
36 shape=None, order='C', contiguous=False, optional=False):
37 """
38 Convert array-like to a ndarray and check conditions
40 Parameters
41 ----------
42 obj : array_like
43 An array, any object exposing the array interface, an object whose
44 __array__ method returns an array, or any (nested) sequence.
45 name : str
46 Name of the variable to use in exceptions
47 dtype : {None, numpy.dtype, str}
48 Required dtype. Default is double. If None, does not change the dtype
49 of obj (if present) or uses NumPy to automatically detect the dtype
50 ndim : {int, None}
51 Required number of dimensions of obj. If None, no check is performed.
52 If the numebr of dimensions of obj is less than ndim, additional axes
53 are inserted on the right. See examples.
54 maxdim : {int, None}
55 Maximum allowed dimension. Use ``maxdim`` instead of ``ndim`` when
56 inputs are allowed to have ndim 1, 2, ..., or maxdim.
57 shape : {tuple[int], None}
58 Required shape obj. If None, no check is performed. Partially
59 restricted shapes can be checked using None. See examples.
60 order : {'C', 'F'}
61 Order of the array
62 contiguous : bool
63 Ensure that the array's data is contiguous with order ``order``
64 optional : bool
65 Flag indicating whether None is allowed
67 Returns
68 -------
69 ndarray
70 The converted input.
72 Examples
73 --------
74 Convert a list or pandas series to an array
75 >>> import pandas as pd
76 >>> x = [0, 1, 2, 3]
77 >>> a = array_like(x, 'x', ndim=1)
78 >>> a.shape
79 (4,)
81 >>> a = array_like(pd.Series(x), 'x', ndim=1)
82 >>> a.shape
83 (4,)
84 >>> type(a.orig)
85 pandas.core.series.Series
87 Squeezes singleton dimensions when required
88 >>> x = np.array(x).reshape((4, 1))
89 >>> a = array_like(x, 'x', ndim=1)
90 >>> a.shape
91 (4,)
93 Right-appends when required size is larger than actual
94 >>> x = [0, 1, 2, 3]
95 >>> a = array_like(x, 'x', ndim=2)
96 >>> a.shape
97 (4, 1)
99 Check only the first and last dimension of the input
100 >>> x = np.arange(4*10*4).reshape((4, 10, 4))
101 >>> y = array_like(x, 'x', ndim=3, shape=(4, None, 4))
103 Check only the first two dimensions
104 >>> z = array_like(x, 'x', ndim=3, shape=(4, 10))
106 Raises ValueError if constraints are not satisfied
107 >>> z = array_like(x, 'x', ndim=2)
108 Traceback (most recent call last):
109 ...
110 ValueError: x is required to have ndim 2 but has ndim 3
112 >>> z = array_like(x, 'x', shape=(10, 4, 4))
113 Traceback (most recent call last):
114 ...
115 ValueError: x is required to have shape (10, 4, 4) but has shape (4, 10, 4)
117 >>> z = array_like(x, 'x', shape=(None, 4, 4))
118 Traceback (most recent call last):
119 ...
120 ValueError: x is required to have shape (*, 4, 4) but has shape (4, 10, 4)
121 """
122 if optional and obj is None:
123 return None
124 arr = np.asarray(obj, dtype=dtype, order=order)
125 if maxdim is not None:
126 if arr.ndim > maxdim:
127 msg = '{0} must have ndim <= {1}'.format(name, maxdim)
128 raise ValueError(msg)
129 elif ndim is not None:
130 if arr.ndim > ndim:
131 arr = _right_squeeze(arr, stop_dim=ndim)
132 elif arr.ndim < ndim:
133 arr = np.reshape(arr, arr.shape + (1,) * (ndim - arr.ndim))
134 if arr.ndim != ndim:
135 msg = '{0} is required to have ndim {1} but has ndim {2}'
136 raise ValueError(msg.format(name, ndim, arr.ndim))
137 if shape is not None:
138 for actual, req in zip(arr.shape, shape):
139 if req is not None and actual != req:
140 req_shape = str(shape).replace('None, ', '*, ')
141 msg = '{0} is required to have shape {1} but has shape {2}'
142 raise ValueError(msg.format(name, req_shape, arr.shape))
143 if contiguous:
144 arr = np.ascontiguousarray(arr, dtype=dtype)
145 return arr
148class PandasWrapper(object):
149 """
150 Wrap array_like using the index from the original input, if pandas
152 Parameters
153 ----------
154 pandas_obj : {Series, DataFrame}
155 Object to extract the index from for wrapping
157 Notes
158 -----
159 Raises if ``orig`` is a pandas type but obj and and ``orig`` have
160 different numbers of elements in axis 0. Also raises if the ndim of obj
161 is larger than 2.
162 """
164 def __init__(self, pandas_obj):
165 self._pandas_obj = pandas_obj
166 self._is_pandas = isinstance(pandas_obj, (pd.Series, pd.DataFrame))
168 def wrap(self, obj, columns=None, append=None, trim_start=0, trim_end=0):
169 """
170 Parameters
171 ----------
172 obj : {array_like}
173 The value to wrap like to a pandas Series or DataFrame.
174 columns : {str, list[str]}
175 Column names or series name, if obj is 1d.
176 append : str
177 String to append to the columns to create a new column name.
178 trim_start : int
179 The number of observations to drop from the start of the index, so
180 that the index applied is index[trim_start:].
181 trim_end : int
182 The number of observations to drop from the end of the index , so
183 that the index applied is index[:nobs - trim_end].
185 Returns
186 -------
187 array_like
188 A pandas Series or DataFrame, depending on the shape of obj.
189 """
190 obj = np.asarray(obj)
191 if not self._is_pandas:
192 return obj
194 if obj.shape[0] + trim_start + trim_end != self._pandas_obj.shape[0]:
195 raise ValueError('obj must have the same number of elements in '
196 'axis 0 as orig')
197 index = self._pandas_obj.index
198 index = index[trim_start:index.shape[0] - trim_end]
199 if obj.ndim == 1:
200 if columns is None:
201 name = getattr(self._pandas_obj, 'name', None)
202 elif isinstance(columns, str):
203 name = columns
204 else:
205 name = columns[0]
206 if append is not None:
207 name = append if name is None else name + '_' + append
209 return pd.Series(obj, name=name, index=index)
210 elif obj.ndim == 2:
211 if columns is None:
212 columns = getattr(self._pandas_obj, 'columns', None)
213 if append is not None:
214 new = []
215 for c in columns:
216 new.append(append if c is None else str(c) + '_' + append)
217 columns = new
218 return pd.DataFrame(obj, columns=columns, index=index)
219 else:
220 raise ValueError('Can only wrap 1 or 2-d array_like')
223def bool_like(value, name, optional=False, strict=False):
224 """
225 Convert to bool or raise if not bool_like
227 Parameters
228 ----------
229 value : object
230 Value to verify
231 name : str
232 Variable name for exceptions
233 optional : bool
234 Flag indicating whether None is allowed
235 strict : bool
236 If True, then only allow bool. If False, allow types that support
237 casting to bool.
239 Returns
240 -------
241 converted : bool
242 value converted to a bool
243 """
244 if optional and value is None:
245 return value
246 extra_text = ' or None' if optional else ''
247 if strict:
248 if isinstance(value, bool):
249 return value
250 else:
251 raise TypeError('{0} must be a bool{1}'.format(name, extra_text))
253 if hasattr(value, 'squeeze') and callable(value.squeeze):
254 value = value.squeeze()
255 try:
256 return bool(value)
257 except Exception:
258 raise TypeError('{0} must be a bool (or bool-compatible)'
259 '{1}'.format(name, extra_text))
262def int_like(value, name, optional=False, strict=False):
263 """
264 Convert to int or raise if not int_like
266 Parameters
267 ----------
268 value : object
269 Value to verify
270 name : str
271 Variable name for exceptions
272 optional : bool
273 Flag indicating whether None is allowed
274 strict : bool
275 If True, then only allow int or np.integer that are not bool. If False,
276 allow types that support integer division by 1 and conversion to int.
278 Returns
279 -------
280 converted : int
281 value converted to a int
282 """
283 if optional and value is None:
284 return None
285 is_bool_timedelta = isinstance(value, (bool, np.timedelta64))
287 if hasattr(value, 'squeeze') and callable(value.squeeze):
288 value = value.squeeze()
290 if isinstance(value, (int, np.integer)) and not is_bool_timedelta:
291 return int(value)
292 elif not strict and not is_bool_timedelta:
293 try:
294 if value == (value // 1):
295 return int(value)
296 except Exception:
297 pass
298 extra_text = ' or None' if optional else ''
299 raise TypeError('{0} must be integer_like (int or np.integer, but not bool'
300 ' or timedelta64){1}'.format(name, extra_text))
303def float_like(value, name, optional=False, strict=False):
304 """
305 Convert to float or raise if not float_like
307 Parameters
308 ----------
309 value : object
310 Value to verify
311 name : str
312 Variable name for exceptions
313 optional : bool
314 Flag indicating whether None is allowed
315 strict : bool
316 If True, then only allow int, np.integer, float or np.inexact that are
317 not bool or complex. If False, allow complex types with 0 imag part or
318 any other type that is float like in the sense that it support
319 multiplication by 1.0 and conversion to float.
321 Returns
322 -------
323 converted : float
324 value converted to a float
325 """
326 if optional and value is None:
327 return None
328 is_bool = isinstance(value, bool)
329 is_complex = isinstance(value, (complex, np.complexfloating))
330 if hasattr(value, 'squeeze') and callable(value.squeeze):
331 value = value.squeeze()
333 if (isinstance(value, (int, np.integer, float, np.inexact)) and
334 not (is_bool or is_complex)):
335 return float(value)
336 elif not strict and is_complex:
337 imag = np.imag(value)
338 if imag == 0:
339 return float(np.real(value))
340 elif not strict and not is_bool:
341 try:
342 return float(value / 1.0)
343 except Exception:
344 pass
345 extra_text = ' or None' if optional else ''
346 raise TypeError('{0} must be float_like (float or np.inexact)'
347 '{1}'.format(name, extra_text))
350def string_like(value, name, optional=False, options=None, lower=True):
351 if value is None:
352 return None
353 if not isinstance(value, str):
354 extra_text = ' or None' if optional else ''
355 raise TypeError('{0} must be a string{1}'.format(name, extra_text))
356 if lower:
357 value = value.lower()
358 if options is not None and value not in options:
359 extra_text = 'If not None, ' if optional else ''
360 options_text = "'" + '\', \''.join(options) + "'"
361 msg = '{0}{1} must be one of: {2}'.format(extra_text,
362 name, options_text)
363 raise ValueError(msg)
364 return value
367def dict_like(value, name, optional=False, strict=True):
368 """
369 Check if dict_like (dict, Mapping) or raise if not
371 Parameters
372 ----------
373 value : object
374 Value to verify
375 name : str
376 Variable name for exceptions
377 optional : bool
378 Flag indicating whether None is allowed
379 strict : bool
380 If True, then only allow dict. If False, allow any Mapping-like object.
382 Returns
383 -------
384 converted : dict_like
385 value
386 """
387 if optional and value is None:
388 return None
389 if (not isinstance(value, Mapping) or
390 (strict and not(isinstance(value, dict)))):
391 extra_text = 'If not None, ' if optional else ''
392 strict_text = ' or dict_like (i.e., a Mapping)' if strict else ''
393 msg = '{0}{1} must be a dict{2}'.format(extra_text, name, strict_text)
394 raise TypeError(msg)
395 return value