From 1bb2f60f12d2ea99937bbf5e79ed7b36b94ffa74 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 21 Mar 2017 10:31:26 -0400 Subject: [PATCH 1/3] BUG: cut/qcut should always return int64 bins closes #14866 --- pandas/tests/tools/test_tile.py | 4 ++-- pandas/tools/tile.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/tools/test_tile.py b/pandas/tests/tools/test_tile.py index 11b242bc06e15..cc80c1ff5db29 100644 --- a/pandas/tests/tools/test_tile.py +++ b/pandas/tests/tools/test_tile.py @@ -19,8 +19,8 @@ class TestCut(tm.TestCase): def test_simple(self): data = np.ones(5) result = cut(data, 4, labels=False) - desired = np.array([1, 1, 1, 1, 1]) - tm.assert_numpy_array_equal(result, desired, + expected = np.array([1, 1, 1, 1, 1]) + tm.assert_numpy_array_equal(result, expected, check_dtype=False) def test_bins(self): diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index ccd8c2478e8a5..4a3d452228e01 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -4,7 +4,7 @@ from pandas.types.missing import isnull from pandas.types.common import (is_float, is_integer, - is_scalar) + is_scalar, _ensure_int64) from pandas.core.api import Series from pandas.core.categorical import Categorical @@ -215,7 +215,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, bins = unique_bins side = 'left' if right else 'right' - ids = bins.searchsorted(x, side=side) + ids = _ensure_int64(bins.searchsorted(x, side=side)) if include_lowest: ids[x == bins[0]] = 1 From 4163918d82d359b42aebfee07c6a3d2c4340683d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 21 Mar 2017 10:40:40 -0400 Subject: [PATCH 2/3] BUG: fix isin for 32bit platform issues --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/algorithms.py | 44 +++++++++++++++++++++------------ 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index e0d15c218ec85..09358f77c9c9e 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -824,6 +824,7 @@ Bug Fixes - Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) - Compat with SciPy 0.19.0 for testing on ``.interpolate()`` (:issue:`15662`) +- Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``in64`` dtype (:issue:`14866`) - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index f9d4c9107d7cd..00a3264e6c74a 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -169,33 +169,45 @@ def isin(comps, values): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(comps).__name__)) - comps = np.asarray(comps) if not is_list_like(values): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(values).__name__)) - if not isinstance(values, np.ndarray): - values = list(values) + + from pandas import DatetimeIndex, PeriodIndex + + if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)): + values = np.array(list(values), dtype='object') + + if needs_i8_conversion(comps): + if is_period_dtype(values): + comps = PeriodIndex(comps) + values = PeriodIndex(values) + else: + comps = DatetimeIndex(comps) + values = DatetimeIndex(values) + + values = values.asi8 + comps = comps.asi8 + elif is_bool_dtype(comps): + + try: + comps = np.asarray(comps).view('uint8') + values = np.asarray(values).view('uint8') + except TypeError: + # object array conversion will fail + pass + else: + comps = np.asarray(comps) + values = np.asarray(values) # GH11232 # work-around for numpy < 1.8 and comparisions on py3 # faster for larger cases to use np.in1d if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000: f = lambda x, y: np.in1d(x, np.asarray(list(y))) - else: - f = lambda x, y: lib.ismember_int64(x, set(y)) - - # may need i8 conversion for proper membership testing - if is_datetime64_dtype(comps): - from pandas.tseries.tools import to_datetime - values = to_datetime(values)._values.view('i8') - comps = comps.view('i8') - elif is_timedelta64_dtype(comps): - from pandas.tseries.timedeltas import to_timedelta - values = to_timedelta(values)._values.view('i8') - comps = comps.view('i8') elif is_int64_dtype(comps): - pass + f = lambda x, y: lib.ismember_int64(x, set(y)) else: f = lambda x, y: lib.ismember(x, set(values)) From 93c03e31e8da0873c23044aca265d738a02e4073 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 21 Mar 2017 11:25:24 -0400 Subject: [PATCH 3/3] BUG: 32bit compat for .get_indexer --- pandas/tests/indexes/test_multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index f67231e78983c..0c274b2f6c4ff 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1359,7 +1359,7 @@ def test_hash_collisions(self): names=['one', 'two']) result = index.get_indexer(index.values) self.assert_numpy_array_equal(result, - np.arange(len(index), dtype='int64')) + np.arange(len(index), dtype='intp')) for i in [0, 1, len(index) - 2, len(index) - 1]: result = index.get_loc(index[i])