From 771b35446ac3d151743509935f615fc249a8e9c4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 1 Nov 2019 10:22:04 -0700 Subject: [PATCH 1/5] REF: use array instead of Series --- pandas/core/algorithms.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c70e623778315..c18e767026e84 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -8,7 +8,7 @@ import numpy as np -from pandas._libs import algos, hashtable as htable, lib +from pandas._libs import Timestamp, algos, hashtable as htable, lib from pandas._libs.tslib import iNaT from pandas.util._decorators import Appender, Substitution, deprecate_kwarg @@ -1440,7 +1440,9 @@ def _take_nd_object(arr, indexer, out, axis: int, fill_value, mask_info): } -def _get_take_nd_function(ndim, arr_dtype, out_dtype, axis: int = 0, mask_info=None): +def _get_take_nd_function( + ndim: int, arr_dtype, out_dtype, axis: int = 0, mask_info=None +): if ndim <= 2: tup = (arr_dtype.name, out_dtype.name) if ndim == 1: @@ -1474,7 +1476,7 @@ def func2(arr, indexer, out, fill_value=np.nan): return func2 -def take(arr, indices, axis=0, allow_fill: bool = False, fill_value=None): +def take(arr, indices, axis: int = 0, allow_fill: bool = False, fill_value=None): """ Take elements from an array. @@ -1570,7 +1572,7 @@ def take(arr, indices, axis=0, allow_fill: bool = False, fill_value=None): def take_nd( arr, indexer, - axis=0, + axis: int = 0, out=None, fill_value=np.nan, mask_info=None, @@ -1818,12 +1820,12 @@ def searchsorted(arr, value, side="left", sorter=None): elif not ( is_object_dtype(arr) or is_numeric_dtype(arr) or is_categorical_dtype(arr) ): - from pandas.core.series import Series - # E.g. if `arr` is an array with dtype='datetime64[ns]' # and `value` is a pd.Timestamp, we may need to convert value - value_ser = Series(value)._values + value_ser = array([value]) if is_scalar(value) else array(value) value = value_ser[0] if is_scalar(value) else value_ser + if isinstance(value, Timestamp) and value.tzinfo is None: + value = value.to_datetime64() result = arr.searchsorted(value, side=side, sorter=sorter) return result From e6a01f3260616c14660e46a0db71ffa4dc83ea18 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 1 Nov 2019 10:30:15 -0700 Subject: [PATCH 2/5] REF: avoid Series dependency --- pandas/core/reshape/tile.py | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 2cc9f8927effb..14d0c5590f80f 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -20,6 +20,7 @@ is_scalar, is_timedelta64_dtype, ) +from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna from pandas import ( @@ -27,7 +28,6 @@ Index, Interval, IntervalIndex, - Series, to_datetime, to_timedelta, ) @@ -208,7 +208,8 @@ def cut( # NOTE: this binning code is changed a bit from histogram for var(x) == 0 # for handling the cut for datetime and timedelta objects - x_is_series, series_index, name, x = _preprocess_for_cut(x) + original = x + x = _preprocess_for_cut(x) x, dtype = _coerce_to_type(x) if not np.iterable(bins): @@ -270,9 +271,7 @@ def cut( duplicates=duplicates, ) - return _postprocess_for_cut( - fac, bins, retbins, x_is_series, series_index, name, dtype - ) + return _postprocess_for_cut(fac, bins, retbins, dtype, original) def qcut(x, q, labels=None, retbins=False, precision=3, duplicates="raise"): @@ -328,8 +327,8 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates="raise"): >>> pd.qcut(range(5), 4, labels=False) array([0, 0, 1, 2, 3]) """ - x_is_series, series_index, name, x = _preprocess_for_cut(x) - + original = x + x = _preprocess_for_cut(x) x, dtype = _coerce_to_type(x) if is_integer(q): @@ -347,9 +346,7 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates="raise"): duplicates=duplicates, ) - return _postprocess_for_cut( - fac, bins, retbins, x_is_series, series_index, name, dtype - ) + return _postprocess_for_cut(fac, bins, retbins, dtype, original) def _bins_to_cuts( @@ -537,13 +534,6 @@ def _preprocess_for_cut(x): input to array, strip the index information and store it separately """ - x_is_series = isinstance(x, Series) - series_index = None - name = None - - if x_is_series: - series_index = x.index - name = x.name # Check that the passed array is a Pandas or Numpy object # We don't want to strip away a Pandas data-type here (e.g. datetimetz) @@ -553,17 +543,17 @@ def _preprocess_for_cut(x): if x.ndim != 1: raise ValueError("Input array must be 1 dimensional") - return x_is_series, series_index, name, x + return x -def _postprocess_for_cut(fac, bins, retbins, x_is_series, series_index, name, dtype): +def _postprocess_for_cut(fac, bins, retbins, dtype, original): """ handles post processing for the cut method where we combine the index information if the originally passed datatype was a series """ - if x_is_series: - fac = Series(fac, index=series_index, name=name) + if isinstance(original, ABCSeries): + fac = original._constructor(fac, index=original.index, name=original.name) if not retbins: return fac From 413b925cd9e8f020d669497e2f31d4899cc47a71 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 2 Nov 2019 09:37:58 -0700 Subject: [PATCH 3/5] remove unused _isfinite check --- pandas/core/nanops.py | 13 ------------ pandas/tests/test_nanops.py | 40 ------------------------------------- 2 files changed, 53 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 070891c4acb5e..ca29a3fad07bf 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -325,19 +325,6 @@ def _get_values( return values, mask, dtype, dtype_max, fill_value -def _isfinite(values): - if is_datetime_or_timedelta_dtype(values): - return isna(values) - if ( - is_complex_dtype(values) - or is_float_dtype(values) - or is_integer_dtype(values) - or is_bool_dtype(values) - ): - return ~np.isfinite(values) - return ~np.isfinite(values.astype("float64")) - - def _na_ok_dtype(dtype): # TODO: what about datetime64tz? PeriodDtype? return not issubclass(dtype.type, (np.integer, np.timedelta64, np.datetime64)) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 49d1777df0751..52ad56967220f 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -704,46 +704,6 @@ def test__has_infs(self): self.check_bool(nanops._has_infs, val.astype("f4"), correct) self.check_bool(nanops._has_infs, val.astype("f2"), correct) - def test__isfinite(self): - pairs = [ - ("arr_complex", False), - ("arr_int", False), - ("arr_bool", False), - ("arr_str", False), - ("arr_utf", False), - ("arr_complex", False), - ("arr_complex_nan", True), - ("arr_nan_nanj", True), - ("arr_nan_infj", True), - ("arr_complex_nan_infj", True), - ] - pairs_float = [ - ("arr_float", False), - ("arr_nan", True), - ("arr_float_nan", True), - ("arr_nan_nan", True), - ("arr_float_inf", True), - ("arr_inf", True), - ("arr_nan_inf", True), - ("arr_float_nan_inf", True), - ("arr_nan_nan_inf", True), - ] - - func1 = lambda x: np.any(nanops._isfinite(x).ravel()) - - # TODO: unused? - # func2 = lambda x: np.any(nanops._isfinite(x).values.ravel()) - - for arr, correct in pairs: - val = getattr(self, arr) - self.check_bool(func1, val, correct) - - for arr, correct in pairs_float: - val = getattr(self, arr) - self.check_bool(func1, val, correct) - self.check_bool(func1, val.astype("f4"), correct) - self.check_bool(func1, val.astype("f2"), correct) - def test__bn_ok_dtype(self): assert nanops._bn_ok_dtype(self.arr_float.dtype, "test") assert nanops._bn_ok_dtype(self.arr_complex.dtype, "test") From 13ea61d0e37339e0d488d33c2a65c72d8c058299 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 3 Nov 2019 09:10:56 -0800 Subject: [PATCH 4/5] dont pass mask_info --- pandas/core/algorithms.py | 15 ++++----------- pandas/core/reshape/tile.py | 10 ++-------- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 95d9bd166fbdd..083c5a4cba74a 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1575,7 +1575,6 @@ def take_nd( axis: int = 0, out=None, fill_value=np.nan, - mask_info=None, allow_fill: bool = True, ): """ @@ -1599,10 +1598,6 @@ def take_nd( maybe_promote to determine this type for any fill_value fill_value : any, default np.nan Fill value to replace -1 values with - mask_info : tuple of (ndarray, boolean) - If provided, value should correspond to: - (indexer != -1, (indexer != -1).any()) - If not provided, it will be computed internally if necessary allow_fill : boolean, default True If False, indexer is assumed to contain no -1 values so no filling will be done. This short-circuits computation of a mask. Result is @@ -1613,6 +1608,7 @@ def take_nd( subarray : array-like May be the same type as the input, or cast to an ndarray. """ + mask_info = None if is_extension_array_dtype(arr): return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) @@ -1634,12 +1630,9 @@ def take_nd( dtype, fill_value = maybe_promote(arr.dtype, fill_value) if dtype != arr.dtype and (out is None or out.dtype != dtype): # check if promotion is actually required based on indexer - if mask_info is not None: - mask, needs_masking = mask_info - else: - mask = indexer == -1 - needs_masking = mask.any() - mask_info = mask, needs_masking + mask = indexer == -1 + needs_masking = mask.any() + mask_info = mask, needs_masking if needs_masking: if out is not None and out.dtype != dtype: raise TypeError("Incompatible type for fill_value") diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 428dfb9c75f8f..95534755b8beb 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -4,6 +4,7 @@ import numpy as np from pandas._libs import Timedelta, Timestamp +from pandas._libs.interval import Interval from pandas._libs.lib import infer_dtype from pandas.core.dtypes.common import ( @@ -21,14 +22,7 @@ from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna -from pandas import ( - Categorical, - Index, - Interval, - IntervalIndex, - to_datetime, - to_timedelta, -) +from pandas import Categorical, Index, IntervalIndex, to_datetime, to_timedelta import pandas.core.algorithms as algos import pandas.core.nanops as nanops From a8a10af320f35e3c0cfb520cd567d060846b6616 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 3 Nov 2019 12:13:24 -0800 Subject: [PATCH 5/5] blackify --- pandas/core/algorithms.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 083c5a4cba74a..ceec0652b7ce2 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1570,12 +1570,7 @@ def take(arr, indices, axis: int = 0, allow_fill: bool = False, fill_value=None) def take_nd( - arr, - indexer, - axis: int = 0, - out=None, - fill_value=np.nan, - allow_fill: bool = True, + arr, indexer, axis: int = 0, out=None, fill_value=np.nan, allow_fill: bool = True ): """ Specialized Cython take which sets NaN values in one pass