Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/indexers.py : 13%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Low-dependency indexing utilities.
3"""
4import warnings
6import numpy as np
8from pandas._typing import Any, AnyArrayLike
10from pandas.core.dtypes.common import (
11 is_array_like,
12 is_bool_dtype,
13 is_extension_array_dtype,
14 is_integer_dtype,
15 is_list_like,
16)
17from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
19# -----------------------------------------------------------
20# Indexer Identification
23def is_list_like_indexer(key) -> bool:
24 """
25 Check if we have a list-like indexer that is *not* a NamedTuple.
27 Parameters
28 ----------
29 key : object
31 Returns
32 -------
33 bool
34 """
35 # allow a list_like, but exclude NamedTuples which can be indexers
36 return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple)
39def is_scalar_indexer(indexer, arr_value) -> bool:
40 """
41 Return True if we are all scalar indexers.
43 Returns
44 -------
45 bool
46 """
47 if arr_value.ndim == 1:
48 if not isinstance(indexer, tuple):
49 indexer = tuple([indexer])
50 return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer)
51 return False
54def is_empty_indexer(indexer, arr_value: np.ndarray) -> bool:
55 """
56 Check if we have an empty indexer.
58 Parameters
59 ----------
60 indexer : object
61 arr_value : np.ndarray
63 Returns
64 -------
65 bool
66 """
67 if is_list_like(indexer) and not len(indexer):
68 return True
69 if arr_value.ndim == 1:
70 if not isinstance(indexer, tuple):
71 indexer = tuple([indexer])
72 return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer)
73 return False
76# -----------------------------------------------------------
77# Indexer Validation
80def check_setitem_lengths(indexer, value, values) -> None:
81 """
82 Validate that value and indexer are the same length.
84 An special-case is allowed for when the indexer is a boolean array
85 and the number of true values equals the length of ``value``. In
86 this case, no exception is raised.
88 Parameters
89 ----------
90 indexer : sequence
91 Key for the setitem.
92 value : array-like
93 Value for the setitem.
94 values : array-like
95 Values being set into.
97 Returns
98 -------
99 None
101 Raises
102 ------
103 ValueError
104 When the indexer is an ndarray or list and the lengths don't match.
105 """
106 # boolean with truth values == len of the value is ok too
107 if isinstance(indexer, (np.ndarray, list)):
108 if is_list_like(value) and len(indexer) != len(value):
109 if not (
110 isinstance(indexer, np.ndarray)
111 and indexer.dtype == np.bool_
112 and len(indexer[indexer]) == len(value)
113 ):
114 raise ValueError(
115 "cannot set using a list-like indexer "
116 "with a different length than the value"
117 )
119 elif isinstance(indexer, slice):
120 # slice
121 if is_list_like(value) and len(values):
122 if len(value) != length_of_indexer(indexer, values):
123 raise ValueError(
124 "cannot set using a slice indexer with a "
125 "different length than the value"
126 )
129def validate_indices(indices: np.ndarray, n: int) -> None:
130 """
131 Perform bounds-checking for an indexer.
133 -1 is allowed for indicating missing values.
135 Parameters
136 ----------
137 indices : ndarray
138 n : int
139 Length of the array being indexed.
141 Raises
142 ------
143 ValueError
145 Examples
146 --------
147 >>> validate_indices([1, 2], 3)
148 # OK
149 >>> validate_indices([1, -2], 3)
150 ValueError
151 >>> validate_indices([1, 2, 3], 3)
152 IndexError
153 >>> validate_indices([-1, -1], 0)
154 # OK
155 >>> validate_indices([0, 1], 0)
156 IndexError
157 """
158 if len(indices):
159 min_idx = indices.min()
160 if min_idx < -1:
161 msg = f"'indices' contains values less than allowed ({min_idx} < -1)"
162 raise ValueError(msg)
164 max_idx = indices.max()
165 if max_idx >= n:
166 raise IndexError("indices are out-of-bounds")
169# -----------------------------------------------------------
170# Indexer Conversion
173def maybe_convert_indices(indices, n: int):
174 """
175 Attempt to convert indices into valid, positive indices.
177 If we have negative indices, translate to positive here.
178 If we have indices that are out-of-bounds, raise an IndexError.
180 Parameters
181 ----------
182 indices : array-like
183 Array of indices that we are to convert.
184 n : int
185 Number of elements in the array that we are indexing.
187 Returns
188 -------
189 array-like
190 An array-like of positive indices that correspond to the ones
191 that were passed in initially to this function.
193 Raises
194 ------
195 IndexError
196 One of the converted indices either exceeded the number of,
197 elements (specified by `n`), or was still negative.
198 """
199 if isinstance(indices, list):
200 indices = np.array(indices)
201 if len(indices) == 0:
202 # If `indices` is empty, np.array will return a float,
203 # and will cause indexing errors.
204 return np.empty(0, dtype=np.intp)
206 mask = indices < 0
207 if mask.any():
208 indices = indices.copy()
209 indices[mask] += n
211 mask = (indices >= n) | (indices < 0)
212 if mask.any():
213 raise IndexError("indices are out-of-bounds")
214 return indices
217# -----------------------------------------------------------
218# Unsorted
221def length_of_indexer(indexer, target=None) -> int:
222 """
223 Return the length of a single non-tuple indexer which could be a slice.
225 Returns
226 -------
227 int
228 """
229 if target is not None and isinstance(indexer, slice):
230 target_len = len(target)
231 start = indexer.start
232 stop = indexer.stop
233 step = indexer.step
234 if start is None:
235 start = 0
236 elif start < 0:
237 start += target_len
238 if stop is None or stop > target_len:
239 stop = target_len
240 elif stop < 0:
241 stop += target_len
242 if step is None:
243 step = 1
244 elif step < 0:
245 start, stop = stop + 1, start + 1
246 step = -step
247 return (stop - start + step - 1) // step
248 elif isinstance(indexer, (ABCSeries, ABCIndexClass, np.ndarray, list)):
249 return len(indexer)
250 elif not is_list_like_indexer(indexer):
251 return 1
252 raise AssertionError("cannot find the length of the indexer")
255def deprecate_ndim_indexing(result):
256 """
257 Helper function to raise the deprecation warning for multi-dimensional
258 indexing on 1D Series/Index.
260 GH#27125 indexer like idx[:, None] expands dim, but we cannot do that
261 and keep an index, so we currently return ndarray, which is deprecated
262 (Deprecation GH#30588).
263 """
264 if np.ndim(result) > 1:
265 warnings.warn(
266 "Support for multi-dimensional indexing (e.g. `index[:, None]`) "
267 "on an Index is deprecated and will be removed in a future "
268 "version. Convert to a numpy array before indexing instead.",
269 DeprecationWarning,
270 stacklevel=3,
271 )
274# -----------------------------------------------------------
275# Public indexer validation
278def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any:
279 """
280 Check if `indexer` is a valid array indexer for `array`.
282 For a boolean mask, `array` and `indexer` are checked to have the same
283 length. The dtype is validated, and if it is an integer or boolean
284 ExtensionArray, it is checked if there are missing values present, and
285 it is converted to the appropriate numpy array. Other dtypes will raise
286 an error.
288 Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed
289 through as is.
291 .. versionadded:: 1.0.0
293 Parameters
294 ----------
295 array : array-like
296 The array that is being indexed (only used for the length).
297 indexer : array-like or list-like
298 The array-like that's used to index. List-like input that is not yet
299 a numpy array or an ExtensionArray is converted to one. Other input
300 types are passed through as is
302 Returns
303 -------
304 numpy.ndarray
305 The validated indexer as a numpy array that can be used to index.
307 Raises
308 ------
309 IndexError
310 When the lengths don't match.
311 ValueError
312 When `indexer` cannot be converted to a numpy ndarray to index
313 (e.g. presence of missing values).
315 See Also
316 --------
317 api.types.is_bool_dtype : Check if `key` is of boolean dtype.
319 Examples
320 --------
321 When checking a boolean mask, a boolean ndarray is returned when the
322 arguments are all valid.
324 >>> mask = pd.array([True, False])
325 >>> arr = pd.array([1, 2])
326 >>> pd.api.indexers.check_array_indexer(arr, mask)
327 array([ True, False])
329 An IndexError is raised when the lengths don't match.
331 >>> mask = pd.array([True, False, True])
332 >>> pd.api.indexers.check_array_indexer(arr, mask)
333 Traceback (most recent call last):
334 ...
335 IndexError: Boolean index has wrong length: 3 instead of 2.
337 NA values in a boolean array are treated as False.
339 >>> mask = pd.array([True, pd.NA])
340 >>> pd.api.indexers.check_array_indexer(arr, mask)
341 array([ True, False])
343 A numpy boolean mask will get passed through (if the length is correct):
345 >>> mask = np.array([True, False])
346 >>> pd.api.indexers.check_array_indexer(arr, mask)
347 array([ True, False])
349 Similarly for integer indexers, an integer ndarray is returned when it is
350 a valid indexer, otherwise an error is (for integer indexers, a matching
351 length is not required):
353 >>> indexer = pd.array([0, 2], dtype="Int64")
354 >>> arr = pd.array([1, 2, 3])
355 >>> pd.api.indexers.check_array_indexer(arr, indexer)
356 array([0, 2])
358 >>> indexer = pd.array([0, pd.NA], dtype="Int64")
359 >>> pd.api.indexers.check_array_indexer(arr, indexer)
360 Traceback (most recent call last):
361 ...
362 ValueError: Cannot index with an integer indexer containing NA values
364 For non-integer/boolean dtypes, an appropriate error is raised:
366 >>> indexer = np.array([0., 2.], dtype="float64")
367 >>> pd.api.indexers.check_array_indexer(arr, indexer)
368 Traceback (most recent call last):
369 ...
370 IndexError: arrays used as indices must be of integer or boolean type
371 """
372 from pandas.core.construction import array as pd_array
374 # whathever is not an array-like is returned as-is (possible valid array
375 # indexers that are not array-like: integer, slice, Ellipsis, None)
376 # In this context, tuples are not considered as array-like, as they have
377 # a specific meaning in indexing (multi-dimensional indexing)
378 if is_list_like(indexer):
379 if isinstance(indexer, tuple):
380 return indexer
381 else:
382 return indexer
384 # convert list-likes to array
385 if not is_array_like(indexer):
386 indexer = pd_array(indexer)
387 if len(indexer) == 0:
388 # empty list is converted to float array by pd.array
389 indexer = np.array([], dtype=np.intp)
391 dtype = indexer.dtype
392 if is_bool_dtype(dtype):
393 if is_extension_array_dtype(dtype):
394 indexer = indexer.to_numpy(dtype=bool, na_value=False)
395 else:
396 indexer = np.asarray(indexer, dtype=bool)
398 # GH26658
399 if len(indexer) != len(array):
400 raise IndexError(
401 f"Boolean index has wrong length: "
402 f"{len(indexer)} instead of {len(array)}"
403 )
404 elif is_integer_dtype(dtype):
405 try:
406 indexer = np.asarray(indexer, dtype=np.intp)
407 except ValueError:
408 raise ValueError(
409 "Cannot index with an integer indexer containing NA values"
410 )
411 else:
412 raise IndexError("arrays used as indices must be of integer or boolean type")
414 return indexer