Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Functions for arithmetic and comparison operations on NumPy arrays and 

3ExtensionArrays. 

4""" 

5from functools import partial 

6import operator 

7from typing import Any, Optional, Union 

8 

9import numpy as np 

10 

11from pandas._libs import Timedelta, Timestamp, lib, ops as libops 

12 

13from pandas.core.dtypes.cast import ( 

14 construct_1d_object_array_from_listlike, 

15 find_common_type, 

16 maybe_upcast_putmask, 

17) 

18from pandas.core.dtypes.common import ( 

19 ensure_object, 

20 is_bool_dtype, 

21 is_integer_dtype, 

22 is_list_like, 

23 is_object_dtype, 

24 is_scalar, 

25) 

26from pandas.core.dtypes.generic import ( 

27 ABCDatetimeArray, 

28 ABCExtensionArray, 

29 ABCIndex, 

30 ABCIndexClass, 

31 ABCSeries, 

32 ABCTimedeltaArray, 

33) 

34from pandas.core.dtypes.missing import isna, notna 

35 

36from pandas.core.ops import missing 

37from pandas.core.ops.dispatch import dispatch_to_extension_op, should_extension_dispatch 

38from pandas.core.ops.invalid import invalid_comparison 

39from pandas.core.ops.roperator import rpow 

40 

41 

42def comp_method_OBJECT_ARRAY(op, x, y): 

43 if isinstance(y, list): 

44 y = construct_1d_object_array_from_listlike(y) 

45 

46 # TODO: Should the checks below be ABCIndexClass? 

47 if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)): 

48 # TODO: should this be ABCIndexClass?? 

49 if not is_object_dtype(y.dtype): 

50 y = y.astype(np.object_) 

51 

52 if isinstance(y, (ABCSeries, ABCIndex)): 

53 y = y.values 

54 

55 result = libops.vec_compare(x.ravel(), y, op) 

56 else: 

57 result = libops.scalar_compare(x.ravel(), y, op) 

58 return result.reshape(x.shape) 

59 

60 

61def masked_arith_op(x, y, op): 

62 """ 

63 If the given arithmetic operation fails, attempt it again on 

64 only the non-null elements of the input array(s). 

65 

66 Parameters 

67 ---------- 

68 x : np.ndarray 

69 y : np.ndarray, Series, Index 

70 op : binary operator 

71 """ 

72 # For Series `x` is 1D so ravel() is a no-op; calling it anyway makes 

73 # the logic valid for both Series and DataFrame ops. 

74 xrav = x.ravel() 

75 assert isinstance(x, np.ndarray), type(x) 

76 if isinstance(y, np.ndarray): 

77 dtype = find_common_type([x.dtype, y.dtype]) 

78 result = np.empty(x.size, dtype=dtype) 

79 

80 # NB: ravel() is only safe since y is ndarray; for e.g. PeriodIndex 

81 # we would get int64 dtype, see GH#19956 

82 yrav = y.ravel() 

83 mask = notna(xrav) & notna(yrav) 

84 

85 if yrav.shape != mask.shape: 

86 # FIXME: GH#5284, GH#5035, GH#19448 

87 # Without specifically raising here we get mismatched 

88 # errors in Py3 (TypeError) vs Py2 (ValueError) 

89 # Note: Only = an issue in DataFrame case 

90 raise ValueError("Cannot broadcast operands together.") 

91 

92 if mask.any(): 

93 with np.errstate(all="ignore"): 

94 result[mask] = op(xrav[mask], yrav[mask]) 

95 

96 else: 

97 if not is_scalar(y): 

98 raise TypeError(type(y)) 

99 

100 # mask is only meaningful for x 

101 result = np.empty(x.size, dtype=x.dtype) 

102 mask = notna(xrav) 

103 

104 # 1 ** np.nan is 1. So we have to unmask those. 

105 if op is pow: 

106 mask = np.where(x == 1, False, mask) 

107 elif op is rpow: 

108 mask = np.where(y == 1, False, mask) 

109 

110 if mask.any(): 

111 with np.errstate(all="ignore"): 

112 result[mask] = op(xrav[mask], y) 

113 

114 result, _ = maybe_upcast_putmask(result, ~mask, np.nan) 

115 result = result.reshape(x.shape) # 2D compat 

116 return result 

117 

118 

119def define_na_arithmetic_op(op, str_rep: str): 

120 def na_op(x, y): 

121 return na_arithmetic_op(x, y, op, str_rep) 

122 

123 return na_op 

124 

125 

126def na_arithmetic_op(left, right, op, str_rep: str): 

127 """ 

128 Return the result of evaluating op on the passed in values. 

129 

130 If native types are not compatible, try coersion to object dtype. 

131 

132 Parameters 

133 ---------- 

134 left : np.ndarray 

135 right : np.ndarray or scalar 

136 str_rep : str or None 

137 

138 Returns 

139 ------- 

140 array-like 

141 

142 Raises 

143 ------ 

144 TypeError : invalid operation 

145 """ 

146 import pandas.core.computation.expressions as expressions 

147 

148 try: 

149 result = expressions.evaluate(op, str_rep, left, right) 

150 except TypeError: 

151 result = masked_arith_op(left, right, op) 

152 

153 return missing.dispatch_fill_zeros(op, left, right, result) 

154 

155 

156def arithmetic_op( 

157 left: Union[np.ndarray, ABCExtensionArray], right: Any, op, str_rep: str 

158): 

159 """ 

160 Evaluate an arithmetic operation `+`, `-`, `*`, `/`, `//`, `%`, `**`, ... 

161 

162 Parameters 

163 ---------- 

164 left : np.ndarray or ExtensionArray 

165 right : object 

166 Cannot be a DataFrame or Index. Series is *not* excluded. 

167 op : {operator.add, operator.sub, ...} 

168 Or one of the reversed variants from roperator. 

169 str_rep : str 

170 

171 Returns 

172 ------- 

173 ndarrray or ExtensionArray 

174 Or a 2-tuple of these in the case of divmod or rdivmod. 

175 """ 

176 

177 from pandas.core.ops import maybe_upcast_for_op 

178 

179 # NB: We assume that extract_array has already been called 

180 # on `left` and `right`. 

181 lvalues = left 

182 rvalues = right 

183 

184 rvalues = maybe_upcast_for_op(rvalues, lvalues.shape) 

185 

186 if should_extension_dispatch(left, rvalues) or isinstance( 

187 rvalues, (ABCTimedeltaArray, ABCDatetimeArray, Timestamp, Timedelta) 

188 ): 

189 # TimedeltaArray, DatetimeArray, and Timestamp are included here 

190 # because they have `freq` attribute which is handled correctly 

191 # by dispatch_to_extension_op. 

192 # Timedelta is included because numexpr will fail on it, see GH#31457 

193 res_values = dispatch_to_extension_op(op, lvalues, rvalues) 

194 

195 else: 

196 with np.errstate(all="ignore"): 

197 res_values = na_arithmetic_op(lvalues, rvalues, op, str_rep) 

198 

199 return res_values 

200 

201 

202def comparison_op( 

203 left: Union[np.ndarray, ABCExtensionArray], right: Any, op 

204) -> Union[np.ndarray, ABCExtensionArray]: 

205 """ 

206 Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`. 

207 

208 Parameters 

209 ---------- 

210 left : np.ndarray or ExtensionArray 

211 right : object 

212 Cannot be a DataFrame, Series, or Index. 

213 op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le} 

214 

215 Returns 

216 ------- 

217 ndarrray or ExtensionArray 

218 """ 

219 

220 # NB: We assume extract_array has already been called on left and right 

221 lvalues = left 

222 rvalues = right 

223 

224 rvalues = lib.item_from_zerodim(rvalues) 

225 if isinstance(rvalues, list): 

226 # TODO: same for tuples? 

227 rvalues = np.asarray(rvalues) 

228 

229 if isinstance(rvalues, (np.ndarray, ABCExtensionArray, ABCIndexClass)): 

230 # TODO: make this treatment consistent across ops and classes. 

231 # We are not catching all listlikes here (e.g. frozenset, tuple) 

232 # The ambiguous case is object-dtype. See GH#27803 

233 if len(lvalues) != len(rvalues): 

234 raise ValueError("Lengths must match to compare") 

235 

236 if should_extension_dispatch(lvalues, rvalues): 

237 res_values = dispatch_to_extension_op(op, lvalues, rvalues) 

238 

239 elif is_scalar(rvalues) and isna(rvalues): 

240 # numpy does not like comparisons vs None 

241 if op is operator.ne: 

242 res_values = np.ones(lvalues.shape, dtype=bool) 

243 else: 

244 res_values = np.zeros(lvalues.shape, dtype=bool) 

245 

246 elif is_object_dtype(lvalues.dtype): 

247 res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) 

248 

249 else: 

250 op_name = f"__{op.__name__}__" 

251 method = getattr(lvalues, op_name) 

252 with np.errstate(all="ignore"): 

253 res_values = method(rvalues) 

254 

255 if res_values is NotImplemented: 

256 res_values = invalid_comparison(lvalues, rvalues, op) 

257 if is_scalar(res_values): 

258 typ = type(rvalues) 

259 raise TypeError(f"Could not compare {typ} type with Series") 

260 

261 return res_values 

262 

263 

264def na_logical_op(x: np.ndarray, y, op): 

265 try: 

266 # For exposition, write: 

267 # yarr = isinstance(y, np.ndarray) 

268 # yint = is_integer(y) or (yarr and y.dtype.kind == "i") 

269 # ybool = is_bool(y) or (yarr and y.dtype.kind == "b") 

270 # xint = x.dtype.kind == "i" 

271 # xbool = x.dtype.kind == "b" 

272 # Then Cases where this goes through without raising include: 

273 # (xint or xbool) and (yint or bool) 

274 result = op(x, y) 

275 except TypeError: 

276 if isinstance(y, np.ndarray): 

277 # bool-bool dtype operations should be OK, should not get here 

278 assert not (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)) 

279 x = ensure_object(x) 

280 y = ensure_object(y) 

281 result = libops.vec_binop(x, y, op) 

282 else: 

283 # let null fall thru 

284 assert lib.is_scalar(y) 

285 if not isna(y): 

286 y = bool(y) 

287 try: 

288 result = libops.scalar_binop(x, y, op) 

289 except ( 

290 TypeError, 

291 ValueError, 

292 AttributeError, 

293 OverflowError, 

294 NotImplementedError, 

295 ): 

296 typ = type(y).__name__ 

297 raise TypeError( 

298 f"Cannot perform '{op.__name__}' with a dtyped [{x.dtype}] array " 

299 f"and scalar of type [{typ}]" 

300 ) 

301 

302 return result 

303 

304 

305def logical_op( 

306 left: Union[np.ndarray, ABCExtensionArray], right: Any, op 

307) -> Union[np.ndarray, ABCExtensionArray]: 

308 """ 

309 Evaluate a logical operation `|`, `&`, or `^`. 

310 

311 Parameters 

312 ---------- 

313 left : np.ndarray or ExtensionArray 

314 right : object 

315 Cannot be a DataFrame, Series, or Index. 

316 op : {operator.and_, operator.or_, operator.xor} 

317 Or one of the reversed variants from roperator. 

318 

319 Returns 

320 ------- 

321 ndarrray or ExtensionArray 

322 """ 

323 

324 fill_int = lambda x: x 

325 

326 def fill_bool(x, left=None): 

327 # if `left` is specifically not-boolean, we do not cast to bool 

328 if x.dtype.kind in ["c", "f", "O"]: 

329 # dtypes that can hold NA 

330 mask = isna(x) 

331 if mask.any(): 

332 x = x.astype(object) 

333 x[mask] = False 

334 

335 if left is None or is_bool_dtype(left.dtype): 

336 x = x.astype(bool) 

337 return x 

338 

339 is_self_int_dtype = is_integer_dtype(left.dtype) 

340 

341 right = lib.item_from_zerodim(right) 

342 if is_list_like(right) and not hasattr(right, "dtype"): 

343 # e.g. list, tuple 

344 right = construct_1d_object_array_from_listlike(right) 

345 

346 # NB: We assume extract_array has already been called on left and right 

347 lvalues = left 

348 rvalues = right 

349 

350 if should_extension_dispatch(lvalues, rvalues): 

351 res_values = dispatch_to_extension_op(op, lvalues, rvalues) 

352 

353 else: 

354 if isinstance(rvalues, np.ndarray): 

355 is_other_int_dtype = is_integer_dtype(rvalues.dtype) 

356 rvalues = rvalues if is_other_int_dtype else fill_bool(rvalues, lvalues) 

357 

358 else: 

359 # i.e. scalar 

360 is_other_int_dtype = lib.is_integer(rvalues) 

361 

362 # For int vs int `^`, `|`, `&` are bitwise operators and return 

363 # integer dtypes. Otherwise these are boolean ops 

364 filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool 

365 

366 res_values = na_logical_op(lvalues, rvalues, op) 

367 res_values = filler(res_values) # type: ignore 

368 

369 return res_values 

370 

371 

372def get_array_op(op, str_rep: Optional[str] = None): 

373 """ 

374 Return a binary array operation corresponding to the given operator op. 

375 

376 Parameters 

377 ---------- 

378 op : function 

379 Binary operator from operator or roperator module. 

380 str_rep : str or None, default None 

381 str_rep to pass to arithmetic_op 

382 

383 Returns 

384 ------- 

385 function 

386 """ 

387 op_name = op.__name__.strip("_") 

388 if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}: 

389 return partial(comparison_op, op=op) 

390 elif op_name in {"and", "or", "xor", "rand", "ror", "rxor"}: 

391 return partial(logical_op, op=op) 

392 else: 

393 return partial(arithmetic_op, op=op, str_rep=str_rep)