From 92783f189ba400548a479493a185a40ffbdea8b6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 12 Mar 2020 12:50:16 -0700 Subject: [PATCH 1/8] Checkpoint passing --- pandas/core/arrays/interval.py | 4 ++-- pandas/core/base.py | 3 ++- pandas/core/computation/expressions.py | 7 +++++-- pandas/core/dtypes/missing.py | 3 +++ pandas/core/internals/blocks.py | 13 +++++++++---- pandas/core/strings.py | 3 ++- 6 files changed, 23 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 51c94d5059f8b..27b1850d6e720 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -40,7 +40,7 @@ from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs from pandas.core.arrays.categorical import Categorical import pandas.core.common as com -from pandas.core.construction import array +from pandas.core.construction import array, extract_array from pandas.core.indexers import check_array_indexer from pandas.core.indexes.base import ensure_index @@ -649,7 +649,7 @@ def fillna(self, value=None, method=None, limit=None): ) raise TypeError(msg) - value = getattr(value, "_values", value) + value = extract_array(value, extract_numpy=True) self._check_closed_matches(value, name="value") left = self.left.fillna(value=value.left) diff --git a/pandas/core/base.py b/pandas/core/base.py index 40ff0640a5bc4..bb4f7001712ca 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -32,7 +32,7 @@ from pandas.core.accessor import DirNamesMixin from pandas.core.algorithms import duplicated, unique1d, value_counts from pandas.core.arrays import ExtensionArray -from pandas.core.construction import create_series_with_explicit_dtype +from pandas.core.construction import create_series_with_explicit_dtype, extract_array import pandas.core.nanops as nanops _shared_docs: Dict[str, str] = dict() @@ -1160,6 +1160,7 @@ def _map_values(self, mapper, na_action=None): else: values = self.astype(object) values = getattr(values, "values", values) + #values = extract_array(values, extract_numpy=True) if na_action == "ignore": def map_f(values, f): diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index fdc299ccdfde8..66d978cc94277 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -15,6 +15,7 @@ from pandas.core.dtypes.generic import ABCDataFrame from pandas.core.computation.check import _NUMEXPR_INSTALLED +from pandas.core.construction import extract_array if _NUMEXPR_INSTALLED: import numexpr as ne @@ -102,8 +103,10 @@ def _evaluate_numexpr(op, op_str, a, b): # we were originally called by a reversed op method a, b = b, a - a_value = getattr(a, "values", a) - b_value = getattr(b, "values", b) + #a_value = getattr(a, "values", a) + #b_value = getattr(b, "values", b) + a_value = a + b_value = b result = ne.evaluate( f"a_value {op_str} b_value", diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 682a0722de3b7..3ce1cf3e19dbb 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -43,6 +43,8 @@ ) from pandas.core.dtypes.inference import is_list_like +#from pandas.core.construction import extract_array + isposinf_scalar = libmissing.isposinf_scalar isneginf_scalar = libmissing.isneginf_scalar @@ -229,6 +231,7 @@ def _isna_ndarraylike(obj): if not is_extension: # Avoid accessing `.values` on things like # PeriodIndex, which may be expensive. + #values = extract_array(obj, extract_numpy=True) values = getattr(obj, "values", obj) else: values = obj diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6d4ee6222933c..88f663d6fc87d 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -956,8 +956,11 @@ def putmask( """ new_values = self.values if inplace else self.values.copy() - new = getattr(new, "values", new) - mask = getattr(mask, "values", mask) + # TODO: Ideally we would ensure not-Series/Index/DataFrame before here + new = extract_array(new, extract_numpy=True) + new = new.values if isinstance(new, ABCDataFrame) else new # FIXME: kludge + mask = extract_array(mask, extract_numpy=True) + mask = mask.values if isinstance(mask, ABCDataFrame) else mask # FIXME: kludge # if we are passed a scalar None, convert it here if not is_list_like(new) and isna(new) and not self.is_object: @@ -1383,8 +1386,10 @@ def where( if transpose: values = values.T - other = getattr(other, "_values", getattr(other, "values", other)) - cond = getattr(cond, "values", cond) + # TODO: ideally we would ensure not Series/Index/DataFrame before here + other = extract_array(other, extract_numpy=True) + cond = extract_array(cond, extract_numpy=True) + cond = cond.values if isinstance(cond, ABCDataFrame) else cond # kludge # If the default broadcasting would go in the wrong direction, then # explicitly reshape other instead diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 71d9e8e7a577c..78a88e2c093b0 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2073,7 +2073,8 @@ def _validate(data): # see _libs/lib.pyx for list of inferred types allowed_types = ["string", "empty", "bytes", "mixed", "mixed-integer"] - values = getattr(data, "values", data) # Series / Index + values = getattr(data, "values", data) # Series / Index # without this we segfault in tests.io.excel.test_readers + #values = extract_array(data, extract_numpy=True) # breaks tests.extension.test_numpy tests bc infer_dtype breaks on PandasArray/PandasDtype values = getattr(values, "categories", values) # categorical / normal # explicitly allow StringDtype From 9db84a2c2630eebc4e5fcfbe6da25178e9a6997a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 23 Mar 2020 15:46:44 -0700 Subject: [PATCH 2/8] revert --- pandas/core/internals/blocks.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 80c329b072de2..935ff09585b17 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -933,11 +933,8 @@ def putmask( """ new_values = self.values if inplace else self.values.copy() - # TODO: Ideally we would ensure not-Series/Index/DataFrame before here - new = extract_array(new, extract_numpy=True) - new = new.values if isinstance(new, ABCDataFrame) else new # FIXME: kludge - mask = extract_array(mask, extract_numpy=True) - mask = mask.values if isinstance(mask, ABCDataFrame) else mask # FIXME: kludge + new = getattr(new, "values", new) + mask = getattr(mask, "values", mask) # if we are passed a scalar None, convert it here if not is_list_like(new) and isna(new) and not self.is_object: @@ -1337,10 +1334,8 @@ def where( if transpose: values = values.T - # TODO: ideally we would ensure not Series/Index/DataFrame before here - other = extract_array(other, extract_numpy=True) - cond = extract_array(cond, extract_numpy=True) - cond = cond.values if isinstance(cond, ABCDataFrame) else cond # kludge + other = getattr(other, "_values", getattr(other, "values", other)) + cond = getattr(cond, "values", cond) # If the default broadcasting would go in the wrong direction, then # explicitly reshape other instead From 471feb56c219db2eadaf46ca2a865ec91c02d27e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 30 Mar 2020 17:59:52 -0700 Subject: [PATCH 3/8] revert --- pandas/core/base.py | 6 ++---- pandas/core/dtypes/missing.py | 2 -- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 78042fcb9157b..e3ce96a7482be 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -32,7 +32,7 @@ from pandas.core.accessor import DirNamesMixin from pandas.core.algorithms import duplicated, unique1d, value_counts from pandas.core.arrays import ExtensionArray -from pandas.core.construction import create_series_with_explicit_dtype, extract_array +from pandas.core.construction import create_series_with_explicit_dtype import pandas.core.nanops as nanops _shared_docs: Dict[str, str] = dict() @@ -1144,9 +1144,7 @@ def _map_values(self, mapper, na_action=None): raise NotImplementedError map_f = lambda values, f: values.map(f) else: - values = self.astype(object) - values = getattr(values, "values", values) - #values = extract_array(values, extract_numpy=True) + values = self.astype(object)._values if na_action == "ignore": def map_f(values, f): diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index fbfb66125e379..d461db2d05f9d 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -43,8 +43,6 @@ ) from pandas.core.dtypes.inference import is_list_like -#from pandas.core.construction import extract_array - isposinf_scalar = libmissing.isposinf_scalar isneginf_scalar = libmissing.isneginf_scalar From aa17f00edd4032299c4174501db35e005e4d2fc6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 24 Apr 2020 11:33:25 -0700 Subject: [PATCH 4/8] Avoid getattr values --- pandas/core/algorithms.py | 3 ++- pandas/core/computation/expressions.py | 3 --- pandas/core/window/rolling.py | 3 ++- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index e6967630b97ac..fe841232c4c33 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -89,6 +89,8 @@ def _ensure_data(values, dtype=None): values : ndarray pandas_dtype : str or dtype """ + values = extract_array(values, extract_numpy=True) + # we check some simple dtypes first if is_object_dtype(dtype): return ensure_object(np.asarray(values)), "object" @@ -151,7 +153,6 @@ def _ensure_data(values, dtype=None): elif is_categorical_dtype(values) and ( is_categorical_dtype(dtype) or dtype is None ): - values = getattr(values, "values", values) values = values.codes dtype = "category" diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 55a262d57946e..d9cd2c7be0093 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -15,7 +15,6 @@ from pandas.core.dtypes.generic import ABCDataFrame from pandas.core.computation.check import _NUMEXPR_INSTALLED -from pandas.core.construction import extract_array if _NUMEXPR_INSTALLED: import numexpr as ne @@ -103,8 +102,6 @@ def _evaluate_numexpr(op, op_str, a, b): # we were originally called by a reversed op method a, b = b, a - #a_value = getattr(a, "values", a) - #b_value = getattr(b, "values", b) a_value = a b_value = b diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 3b14921528890..05400f63db972 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -37,6 +37,7 @@ from pandas.core.base import DataError, PandasObject, SelectionMixin, ShallowMixin import pandas.core.common as com +from pandas.core.construction import extract_array from pandas.core.indexes.api import Index, ensure_index from pandas.core.util.numba_ import NUMBA_FUNC_CACHE from pandas.core.window.common import ( @@ -252,7 +253,7 @@ def __iter__(self): def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray: """Convert input to numpy arrays for Cython routines""" if values is None: - values = getattr(self._selected_obj, "values", self._selected_obj) + values = extract_array(self._selected_obj, extract_numpy=True) # GH #12373 : rolling functions error on float32 data # make sure the data is coerced to float64 From d99d1c99ff8dd23622754fae5c3917e9673f95fd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 24 Apr 2020 11:44:14 -0700 Subject: [PATCH 5/8] revert --- pandas/core/strings.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 0a779e33990f8..76b851d8ac923 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2134,8 +2134,7 @@ def _validate(data): # see _libs/lib.pyx for list of inferred types allowed_types = ["string", "empty", "bytes", "mixed", "mixed-integer"] - values = getattr(data, "values", data) # Series / Index # without this we segfault in tests.io.excel.test_readers - #values = extract_array(data, extract_numpy=True) # breaks tests.extension.test_numpy tests bc infer_dtype breaks on PandasArray/PandasDtype + values = getattr(data, "values", data) # Series / Index values = getattr(values, "categories", values) # categorical / normal # explicitly allow StringDtype From de64f05b41257155a12f92b46bd87328cef77fb8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 24 Apr 2020 16:45:10 -0700 Subject: [PATCH 6/8] multiindex fix --- pandas/core/algorithms.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index fe841232c4c33..eca1733b61a52 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -49,6 +49,7 @@ ABCExtensionArray, ABCIndex, ABCIndexClass, + ABCMultiIndex, ABCSeries, ) from pandas.core.dtypes.missing import isna, na_value_for_dtype @@ -89,7 +90,9 @@ def _ensure_data(values, dtype=None): values : ndarray pandas_dtype : str or dtype """ - values = extract_array(values, extract_numpy=True) + if not isinstance(values, ABCMultiIndex): + # extract_array would raise + values = extract_array(values, extract_numpy=True) # we check some simple dtypes first if is_object_dtype(dtype): From 1bf828877c7565f07fe42ddd05fc8923c530bcea Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 24 Apr 2020 18:20:58 -0700 Subject: [PATCH 7/8] const fix --- pandas/_libs/hashtable_func_helper.pxi.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index 6e5509a5570e8..c63f368dfae43 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -125,7 +125,7 @@ cpdef value_count_{{dtype}}({{c_type}}[:] values, bint dropna): {{if dtype == 'object'}} def duplicated_{{dtype}}(ndarray[{{dtype}}] values, object keep='first'): {{else}} -def duplicated_{{dtype}}({{c_type}}[:] values, object keep='first'): +def duplicated_{{dtype}}(const {{c_type}}[:] values, object keep='first'): {{endif}} cdef: int ret = 0 From ea4ece7544b59942a0d8cc658f4e8fcd1909b676 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 25 Apr 2020 10:33:28 -0700 Subject: [PATCH 8/8] remove unnecessary check --- pandas/core/arrays/interval.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 3afa42d8d2f24..66faca29670cb 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -39,7 +39,7 @@ from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs from pandas.core.arrays.categorical import Categorical import pandas.core.common as com -from pandas.core.construction import array, extract_array +from pandas.core.construction import array from pandas.core.indexers import check_array_indexer from pandas.core.indexes.base import ensure_index @@ -648,7 +648,6 @@ def fillna(self, value=None, method=None, limit=None): ) raise TypeError(msg) - value = extract_array(value, extract_numpy=True) self._check_closed_matches(value, name="value") left = self.left.fillna(value=value.left)