From 702ef27bc7ba2fa9985a509b7b2a3901b1b19e0f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 18 Sep 2018 13:24:23 -0700 Subject: [PATCH 1/5] WIP: Use dispatch_to_series for combine_const --- pandas/core/frame.py | 31 +++++++++++++++++++++++++++---- pandas/core/ops.py | 25 +++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 251bc6587872d..71c1099ab3a20 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4867,10 +4867,33 @@ def _combine_const(self, other, func, errors='raise', try_cast=True): if lib.is_scalar(other) or np.ndim(other) == 0: return ops.dispatch_to_series(self, other, func) - new_data = self._data.eval(func=func, other=other, - errors=errors, - try_cast=try_cast) - return self._constructor(new_data) + elif (np.ndim(other) == 1 and isinstance(other, np.ndarray) and + len(other) == len(self.columns)): + right = np.broadcast_to(other, self.shape) + return ops.dispatch_to_series(self, right, func) + + elif (np.ndim(other) == 1 and isinstance(other, (tuple,np.ndarray)) and + len(other) == len(self) != len(self.columns)): + # tests include at least 1 tuple in this case + right = np.array(other)[:, None] + right = np.broadcast_to(right, self.shape) + return ops.dispatch_to_series(self, right, func) + + elif np.ndim(other) == 1: + raise ValueError("Shape incompatible") + + elif np.ndim(other) == 2 and other.shape == self.shape: + return ops.dispatch_to_series(self, other, func) + + elif (np.ndim(other) == 2 and isinstance(other, np.ndarray) and + other.shape[0] == 1 and other.shape[1] == len(self.columns)): + other = np.broadcast_to(other, self.shape) + return ops.dispatch_to_series(self, other, func) + + elif np.ndim(other) > 2: + raise ValueError("Wrong number of dimensions", other.shape) + + raise ValueError(getattr(other, 'shape', type(other))) def combine(self, other, func, fill_value=None, overwrite=True): """ diff --git a/pandas/core/ops.py b/pandas/core/ops.py index ca9c2528f0aef..9cdcde28c0c3c 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1662,6 +1662,31 @@ def column_op(a, b): return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))} + elif np.ndim(right) == 2 and right.shape == left.shape: + # ndarray with same shape + + def column_op(a, b): + return {i: func(a.iloc[:, i], b[:, i]) + for i in range(len(a.columns))} + + elif (np.ndim(right) == 2 and + right.shape[0] == 1 and + right.shape[1] == len(left.columns)): + # operate row-by-row + + def column_op(a, b): + return {i: func(a.iloc[:, i], b[0, i]) + for i in range(len(a.columns))} + + elif (np.ndim(right) == 2 and + right.shape[1] == 1 and + right.shape[0] == len(left.index)): + # operate column-by-column + + def column_op(a, b): + return {i: func(a.iloc[:, i], b[:, 0]) + for i in range(len(a.columns))} + else: # Remaining cases have less-obvious dispatch rules raise NotImplementedError(right) From d6d62e73ddba3791ee070ce8db068246a4e7e9dc Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 18 Sep 2018 13:32:56 -0700 Subject: [PATCH 2/5] whitespace fixup --- pandas/core/frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 71c1099ab3a20..7b1373841fb36 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4872,7 +4872,8 @@ def _combine_const(self, other, func, errors='raise', try_cast=True): right = np.broadcast_to(other, self.shape) return ops.dispatch_to_series(self, right, func) - elif (np.ndim(other) == 1 and isinstance(other, (tuple,np.ndarray)) and + elif (np.ndim(other) == 1 and + isinstance(other, (tuple, np.ndarray)) and len(other) == len(self) != len(self.columns)): # tests include at least 1 tuple in this case right = np.array(other)[:, None] From 9e84535d39e48f69a129cf85a2dec0c4b4e86f57 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 18 Sep 2018 14:48:45 -0700 Subject: [PATCH 3/5] Remove unused branch --- pandas/core/ops.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 9cdcde28c0c3c..af19972231e27 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1678,15 +1678,6 @@ def column_op(a, b): return {i: func(a.iloc[:, i], b[0, i]) for i in range(len(a.columns))} - elif (np.ndim(right) == 2 and - right.shape[1] == 1 and - right.shape[0] == len(left.index)): - # operate column-by-column - - def column_op(a, b): - return {i: func(a.iloc[:, i], b[:, 0]) - for i in range(len(a.columns))} - else: # Remaining cases have less-obvious dispatch rules raise NotImplementedError(right) From 706912f89f9f1d1807ffbd4d21cd002f9eb0688b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 19 Sep 2018 17:53:03 -0700 Subject: [PATCH 4/5] numpy<1.10 compat --- pandas/core/frame.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7b1373841fb36..db7dcfc51642e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4869,7 +4869,11 @@ def _combine_const(self, other, func, errors='raise', try_cast=True): elif (np.ndim(other) == 1 and isinstance(other, np.ndarray) and len(other) == len(self.columns)): - right = np.broadcast_to(other, self.shape) + try: + right = np.broadcast_to(other, self.shape) + except AttributeError: + # numpy < 1.10 + right = np.tile(other, self.shape) return ops.dispatch_to_series(self, right, func) elif (np.ndim(other) == 1 and @@ -4877,7 +4881,11 @@ def _combine_const(self, other, func, errors='raise', try_cast=True): len(other) == len(self) != len(self.columns)): # tests include at least 1 tuple in this case right = np.array(other)[:, None] - right = np.broadcast_to(right, self.shape) + try: + right = np.broadcast_to(right, self.shape) + except AttributeError: + # numpy < 1.10 + right = np.tile(right, self.shape) return ops.dispatch_to_series(self, right, func) elif np.ndim(other) == 1: From 914fd806114048b6368378f0e0938d65b64eeb39 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 3 Oct 2018 14:57:53 -0700 Subject: [PATCH 5/5] remove no-longer-needed eval --- pandas/core/internals/blocks.py | 139 ------------------------------ pandas/core/internals/managers.py | 6 -- 2 files changed, 145 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 0e57dd33b1c4e..ff352bbd6e40e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1313,145 +1313,6 @@ def shift(self, periods, axis=0, mgr=None): return [self.make_block(new_values)] - def eval(self, func, other, errors='raise', try_cast=False, mgr=None): - """ - evaluate the block; return result block from the result - - Parameters - ---------- - func : how to combine self, other - other : a ndarray/object - errors : str, {'raise', 'ignore'}, default 'raise' - - ``raise`` : allow exceptions to be raised - - ``ignore`` : suppress exceptions. On error return original object - - try_cast : try casting the results to the input type - - Returns - ------- - a new block, the result of the func - """ - orig_other = other - values = self.values - - other = getattr(other, 'values', other) - - # make sure that we can broadcast - is_transposed = False - if hasattr(other, 'ndim') and hasattr(values, 'ndim'): - if values.ndim != other.ndim: - is_transposed = True - else: - if values.shape == other.shape[::-1]: - is_transposed = True - elif values.shape[0] == other.shape[-1]: - is_transposed = True - else: - # this is a broadcast error heree - raise ValueError( - "cannot broadcast shape [{t_shape}] with " - "block values [{oth_shape}]".format( - t_shape=values.T.shape, oth_shape=other.shape)) - - transf = (lambda x: x.T) if is_transposed else (lambda x: x) - - # coerce/transpose the args if needed - try: - values, values_mask, other, other_mask = self._try_coerce_args( - transf(values), other) - except TypeError: - block = self.coerce_to_target_dtype(orig_other) - return block.eval(func, orig_other, - errors=errors, - try_cast=try_cast, mgr=mgr) - - # get the result, may need to transpose the other - def get_result(other): - - # avoid numpy warning of comparisons again None - if other is None: - result = not func.__name__ == 'eq' - - # avoid numpy warning of elementwise comparisons to object - elif is_numeric_v_string_like(values, other): - result = False - - # avoid numpy warning of elementwise comparisons - elif func.__name__ == 'eq': - if is_list_like(other) and not isinstance(other, np.ndarray): - other = np.asarray(other) - - # if we can broadcast, then ok - if values.shape[-1] != other.shape[-1]: - return False - result = func(values, other) - else: - result = func(values, other) - - # mask if needed - if isinstance(values_mask, np.ndarray) and values_mask.any(): - result = result.astype('float64', copy=False) - result[values_mask] = np.nan - if other_mask is True: - result = result.astype('float64', copy=False) - result[:] = np.nan - elif isinstance(other_mask, np.ndarray) and other_mask.any(): - result = result.astype('float64', copy=False) - result[other_mask.ravel()] = np.nan - - return result - - # error handler if we have an issue operating with the function - def handle_error(): - - if errors == 'raise': - # The 'detail' variable is defined in outer scope. - raise TypeError( - 'Could not operate {other!r} with block values ' - '{detail!s}'.format(other=other, detail=detail)) # noqa - else: - # return the values - result = np.empty(values.shape, dtype='O') - result.fill(np.nan) - return result - - # get the result - try: - with np.errstate(all='ignore'): - result = get_result(other) - - # if we have an invalid shape/broadcast error - # GH4576, so raise instead of allowing to pass through - except ValueError as detail: - raise - except Exception as detail: - result = handle_error() - - # technically a broadcast error in numpy can 'work' by returning a - # boolean False - if not isinstance(result, np.ndarray): - if not isinstance(result, np.ndarray): - - # differentiate between an invalid ndarray-ndarray comparison - # and an invalid type comparison - if isinstance(values, np.ndarray) and is_list_like(other): - raise ValueError( - 'Invalid broadcasting comparison [{other!r}] with ' - 'block values'.format(other=other)) - - raise TypeError('Could not compare [{other!r}] ' - 'with block values'.format(other=other)) - - # transpose if needed - result = transf(result) - - # try to cast if requested - if try_cast: - result = self._try_cast_result(result) - - result = _block_shape(result, ndim=self.ndim) - return [self.make_block(result)] - def where(self, other, cond, align=True, errors='raise', try_cast=False, axis=0, transpose=False, mgr=None): """ diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 2f29f1ae2509f..1cbc09b4ca51a 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -373,9 +373,6 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, align_keys = ['new', 'mask'] else: align_keys = ['mask'] - elif f == 'eval': - align_copy = False - align_keys = ['other'] elif f == 'fillna': # fillna internally does putmask, maybe it's better to do this # at mgr, not block level? @@ -511,9 +508,6 @@ def isna(self, func, **kwargs): def where(self, **kwargs): return self.apply('where', **kwargs) - def eval(self, **kwargs): - return self.apply('eval', **kwargs) - def quantile(self, **kwargs): return self.reduction('quantile', **kwargs)