diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 5cbdbe6168bba..b555d17cf47bc 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -767,6 +767,42 @@ Note that the limitation is applied to ``fill_value`` which default is ``np.nan` - Bug in ``SparseSeries.abs`` incorrectly keeps negative ``fill_value`` (:issue:`13853`) - Bug in single row slicing on multi-type ``SparseDataFrame``s, types were previously forced to float (:issue:`13917`) +Indexer dtype Changes +^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + + This change only affects 64 bit python running on Windows, and only affects relatively advanced + indexing operations + +Methods such as ``Index.get_indexer`` that return an indexer array coerce that array to a "platform int", so that it can be +directly used in 3rd party library operations like ``numpy.take``. Previously, a platform int was defined as ``np.int_`` +which corresponds to a C integer, but the correct type, and what is being used now, is ``np.intp``, which corresponds +to the C integer size that can hold a pointer. (:issue:`13972`) + +These types are the same on many platform, but for 64 bit python on Windows, +``np.int_`` is 32 bits, and ``np.intp`` is 64 bits. Changing this behavior improves performance for many +operations on that platform. + +Previous behaviour: + +.. code-block:: ipython + + In [1]: i = pd.Index(['a', 'b', 'c']) + + In [2]: i.get_indexer(['b', 'b', 'c']).dtype + Out[2]: dtype('int32') + +New behaviour: + +.. code-block:: ipython + + In [1]: i = pd.Index(['a', 'b', 'c']) + + In [2]: i.get_indexer(['b', 'b', 'c']).dtype + Out[2]: dtype('int64') + + .. _whatsnew_0190.deprecations: Deprecations diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 7920f05b5e7a1..1f863bf7247a0 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -259,7 +259,7 @@ def sort_mixed(values): new_labels = reverse_indexer.take(labels, mode='wrap') np.putmask(new_labels, mask, na_sentinel) - return ordered, new_labels + return ordered, _ensure_platform_int(new_labels) def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): diff --git a/pandas/hashtable.pyx b/pandas/hashtable.pyx index d1b6b326d7de6..af694c276b5b7 100644 --- a/pandas/hashtable.pyx +++ b/pandas/hashtable.pyx @@ -64,10 +64,10 @@ cdef class Factorizer: mask = (labels == na_sentinel) # sort on if sort: - if labels.dtype != np.int_: - labels = labels.astype(np.int_) + if labels.dtype != np.intp: + labels = labels.astype(np.intp) sorter = self.uniques.to_array().argsort() - reverse_indexer = np.empty(len(sorter), dtype=np.int_) + reverse_indexer = np.empty(len(sorter), dtype=np.intp) reverse_indexer.put(sorter, np.arange(len(sorter))) labels = reverse_indexer.take(labels, mode='clip') labels[mask] = na_sentinel @@ -100,11 +100,11 @@ cdef class Int64Factorizer: # sort on if sort: - if labels.dtype != np.int_: - labels = labels.astype(np.int_) + if labels.dtype != np.intp: + labels = labels.astype(np.intp) sorter = self.uniques.to_array().argsort() - reverse_indexer = np.empty(len(sorter), dtype=np.int_) + reverse_indexer = np.empty(len(sorter), dtype=np.intp) reverse_indexer.put(sorter, np.arange(len(sorter))) labels = reverse_indexer.take(labels) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index de7780d25b1e5..2e4fb405d3bf9 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -2816,7 +2816,7 @@ def _get_leaf_sorter(labels): new_levels[level] = new_level if keep_order: # just drop missing values. o.w. keep order - left_indexer = np.arange(len(left)) + left_indexer = np.arange(len(left), dtype=np.intp) mask = new_lev_labels != -1 if not mask.all(): new_labels = [lab[mask] for lab in new_labels] @@ -2859,6 +2859,10 @@ def _get_leaf_sorter(labels): left_indexer, right_indexer = right_indexer, left_indexer if return_indexers: + left_indexer = (None if left_indexer is None + else _ensure_platform_int(left_indexer)) + right_indexer = (None if right_indexer is None + else _ensure_platform_int(right_indexer)) return join_index, left_indexer, right_indexer else: return join_index @@ -2902,6 +2906,8 @@ def _join_monotonic(self, other, how='left', return_indexers=False): join_index = self._wrap_joined_index(join_index, other) if return_indexers: + lidx = None if lidx is None else _ensure_platform_int(lidx) + ridx = None if ridx is None else _ensure_platform_int(ridx) return join_index, lidx, ridx else: return join_index diff --git a/pandas/src/algos_common_helper.pxi b/pandas/src/algos_common_helper.pxi index 59b3ddff46dec..a282d318c8823 100644 --- a/pandas/src/algos_common_helper.pxi +++ b/pandas/src/algos_common_helper.pxi @@ -2848,16 +2848,16 @@ def put2d_int64_float64(ndarray[int64_t, ndim=2, cast=True] values, # ensure_dtype #---------------------------------------------------------------------- -cdef int PLATFORM_INT = ( np.arange(0, dtype=np.int_)).descr.type_num +cdef int PLATFORM_INT = ( np.arange(0, dtype=np.intp)).descr.type_num cpdef ensure_platform_int(object arr): if util.is_array(arr): if ( arr).descr.type_num == PLATFORM_INT: return arr else: - return arr.astype(np.int_) + return arr.astype(np.intp) else: - return np.array(arr, dtype=np.int_) + return np.array(arr, dtype=np.intp) cpdef ensure_object(object arr): if util.is_array(arr): diff --git a/pandas/src/algos_common_helper.pxi.in b/pandas/src/algos_common_helper.pxi.in index 2327f10389cb5..8fdd86dd97dd4 100644 --- a/pandas/src/algos_common_helper.pxi.in +++ b/pandas/src/algos_common_helper.pxi.in @@ -548,16 +548,16 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values, # ensure_dtype #---------------------------------------------------------------------- -cdef int PLATFORM_INT = ( np.arange(0, dtype=np.int_)).descr.type_num +cdef int PLATFORM_INT = ( np.arange(0, dtype=np.intp)).descr.type_num cpdef ensure_platform_int(object arr): if util.is_array(arr): if ( arr).descr.type_num == PLATFORM_INT: return arr else: - return arr.astype(np.int_) + return arr.astype(np.intp) else: - return np.array(arr, dtype=np.int_) + return np.array(arr, dtype=np.intp) cpdef ensure_object(object arr): if util.is_array(arr): @@ -600,4 +600,4 @@ cpdef ensure_{{name}}(object arr): else: return np.array(arr, dtype=np.{{dtype}}) -{{endfor}} \ No newline at end of file +{{endfor}} diff --git a/pandas/src/join.pyx b/pandas/src/join.pyx index 9281453c643ee..65c790beb5dbf 100644 --- a/pandas/src/join.pyx +++ b/pandas/src/join.pyx @@ -32,7 +32,8 @@ float64 = np.dtype(np.float64) cdef double NaN = np.NaN cdef double nan = NaN -from pandas.algos import groupsort_indexer +from pandas.algos import groupsort_indexer, ensure_platform_int +from pandas.core.algorithms import take_nd include "joins_func_helper.pxi" @@ -148,16 +149,14 @@ def left_outer_join(ndarray[int64_t] left, ndarray[int64_t] right, # no multiple matches for any row on the left # this is a short-cut to avoid groupsort_indexer # otherwise, the `else` path also works in this case - if left_sorter.dtype != np.int_: - left_sorter = left_sorter.astype(np.int_) + left_sorter = ensure_platform_int(left_sorter) - rev = np.empty(len(left), dtype=np.int_) + rev = np.empty(len(left), dtype=np.intp) rev.put(left_sorter, np.arange(len(left))) else: rev, _ = groupsort_indexer(left_indexer, len(left)) - if rev.dtype != np.int_: - rev = rev.astype(np.int_) + rev = ensure_platform_int(rev) right_indexer = right_indexer.take(rev) left_indexer = left_indexer.take(rev) @@ -228,11 +227,8 @@ def full_outer_join(ndarray[int64_t] left, ndarray[int64_t] right, def _get_result_indexer(sorter, indexer): - if indexer.dtype != np.int_: - indexer = indexer.astype(np.int_) if len(sorter) > 0: - res = sorter.take(indexer) - np.putmask(res, indexer == -1, -1) + res = take_nd(sorter, indexer, fill_value=-1) else: # length-0 case res = np.empty(len(indexer), dtype=np.int64) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index c91585a28d867..3459a96be07f2 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -1196,7 +1196,7 @@ def test_alignment_non_pandas(self): align = pd.core.ops._align_method_FRAME - for val in [[1, 2, 3], (1, 2, 3), np.array([1, 2, 3], dtype=np.intp)]: + for val in [[1, 2, 3], (1, 2, 3), np.array([1, 2, 3], dtype=np.int64)]: tm.assert_series_equal(align(df, val, 'index'), Series([1, 2, 3], index=df.index)) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 92560363be8fe..a6fde7f85084d 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -110,7 +110,7 @@ def f(): def test_reindex_base(self): idx = self.create_index() - expected = np.arange(idx.size) + expected = np.arange(idx.size, dtype=np.intp) actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 88e49c4b55c8a..c1de707d7e58a 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -936,10 +936,10 @@ def test_get_indexer(self): idx2 = Index([2, 4, 6]) r1 = idx1.get_indexer(idx2) - assert_almost_equal(r1, np.array([1, 3, -1])) + assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp)) r1 = idx2.get_indexer(idx1, method='pad') - e1 = np.array([-1, 0, 0, 1, 1]) + e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp) assert_almost_equal(r1, e1) r2 = idx2.get_indexer(idx1[::-1], method='pad') @@ -949,7 +949,7 @@ def test_get_indexer(self): assert_almost_equal(r1, rffill1) r1 = idx2.get_indexer(idx1, method='backfill') - e1 = np.array([0, 0, 1, 1, 2]) + e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp) assert_almost_equal(r1, e1) rbfill1 = idx2.get_indexer(idx1, method='bfill') @@ -974,25 +974,30 @@ def test_get_indexer_nearest(self): all_methods = ['pad', 'backfill', 'nearest'] for method in all_methods: actual = idx.get_indexer([0, 5, 9], method=method) - tm.assert_numpy_array_equal(actual, np.array([0, 5, 9])) + tm.assert_numpy_array_equal(actual, np.array([0, 5, 9], + dtype=np.intp)) actual = idx.get_indexer([0, 5, 9], method=method, tolerance=0) - tm.assert_numpy_array_equal(actual, np.array([0, 5, 9])) + tm.assert_numpy_array_equal(actual, np.array([0, 5, 9], + dtype=np.intp)) for method, expected in zip(all_methods, [[0, 1, 8], [1, 2, 9], [0, 2, 9]]): actual = idx.get_indexer([0.2, 1.8, 8.5], method=method) - tm.assert_numpy_array_equal(actual, np.array(expected)) + tm.assert_numpy_array_equal(actual, np.array(expected, + dtype=np.intp)) actual = idx.get_indexer([0.2, 1.8, 8.5], method=method, tolerance=1) - tm.assert_numpy_array_equal(actual, np.array(expected)) + tm.assert_numpy_array_equal(actual, np.array(expected, + dtype=np.intp)) for method, expected in zip(all_methods, [[0, -1, -1], [-1, 2, -1], [0, 2, -1]]): actual = idx.get_indexer([0.2, 1.8, 8.5], method=method, tolerance=0.2) - tm.assert_numpy_array_equal(actual, np.array(expected)) + tm.assert_numpy_array_equal(actual, np.array(expected, + dtype=np.intp)) with tm.assertRaisesRegexp(ValueError, 'limit argument'): idx.get_indexer([1, 0], method='nearest', limit=1) @@ -1003,22 +1008,24 @@ def test_get_indexer_nearest_decreasing(self): all_methods = ['pad', 'backfill', 'nearest'] for method in all_methods: actual = idx.get_indexer([0, 5, 9], method=method) - tm.assert_numpy_array_equal(actual, np.array([9, 4, 0])) + tm.assert_numpy_array_equal(actual, np.array([9, 4, 0], + dtype=np.intp)) for method, expected in zip(all_methods, [[8, 7, 0], [9, 8, 1], [9, 7, 0]]): actual = idx.get_indexer([0.2, 1.8, 8.5], method=method) - tm.assert_numpy_array_equal(actual, np.array(expected)) + tm.assert_numpy_array_equal(actual, np.array(expected, + dtype=np.intp)) def test_get_indexer_strings(self): idx = pd.Index(['b', 'c']) actual = idx.get_indexer(['a', 'b', 'c', 'd'], method='pad') - expected = np.array([-1, 0, 1, 1]) + expected = np.array([-1, 0, 1, 1], dtype=np.intp) tm.assert_numpy_array_equal(actual, expected) actual = idx.get_indexer(['a', 'b', 'c', 'd'], method='backfill') - expected = np.array([0, 0, 1, -1]) + expected = np.array([0, 0, 1, -1], dtype=np.intp) tm.assert_numpy_array_equal(actual, expected) with tm.assertRaises(TypeError): diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index e066842c33126..901b57dcc7bfe 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -336,7 +336,7 @@ def test_reindex_base(self): # determined by cat ordering idx = self.create_index() - expected = np.array([4, 0, 1, 5, 2, 3]) + expected = np.array([4, 0, 1, 5, 2, 3], dtype=np.intp) actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) @@ -403,7 +403,7 @@ def test_get_indexer(self): for indexer in [idx2, list('abf'), Index(list('abf'))]: r1 = idx1.get_indexer(idx2) - assert_almost_equal(r1, np.array([0, 1, 2, -1])) + assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp)) self.assertRaises(NotImplementedError, lambda: idx2.get_indexer(idx1, method='pad')) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 9371bef8b8f2e..3ff52380a62d8 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -552,20 +552,21 @@ def test_get_loc(self): def test_get_indexer(self): idx = pd.date_range('2000-01-01', periods=3) - tm.assert_numpy_array_equal(idx.get_indexer(idx), np.array([0, 1, 2])) + exp = np.array([0, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(idx.get_indexer(idx), exp) target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), - np.array([-1, 0, 1])) + np.array([-1, 0, 1], dtype=np.intp)) tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), - np.array([0, 1, 2])) + np.array([0, 1, 2], dtype=np.intp)) tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), - np.array([0, 1, 1])) + np.array([0, 1, 1], dtype=np.intp)) tm.assert_numpy_array_equal( idx.get_indexer(target, 'nearest', tolerance=pd.Timedelta('1 hour')), - np.array([0, -1, 1])) + np.array([0, -1, 1], dtype=np.intp)) with tm.assertRaises(ValueError): idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') @@ -872,19 +873,19 @@ def test_where_other(self): def test_get_indexer(self): idx = pd.period_range('2000-01-01', periods=3).asfreq('H', how='start') tm.assert_numpy_array_equal(idx.get_indexer(idx), - np.array([0, 1, 2], dtype=np.int_)) + np.array([0, 1, 2], dtype=np.intp)) target = pd.PeriodIndex(['1999-12-31T23', '2000-01-01T12', '2000-01-02T01'], freq='H') tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), - np.array([-1, 0, 1], dtype=np.int_)) + np.array([-1, 0, 1], dtype=np.intp)) tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), - np.array([0, 1, 2], dtype=np.int_)) + np.array([0, 1, 2], dtype=np.intp)) tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), - np.array([0, 1, 1], dtype=np.int_)) + np.array([0, 1, 1], dtype=np.intp)) tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', tolerance='1 hour'), - np.array([0, -1, 1], dtype=np.int_)) + np.array([0, -1, 1], dtype=np.intp)) msg = 'Input has different freq from PeriodIndex\\(freq=H\\)' with self.assertRaisesRegexp(ValueError, msg): @@ -892,7 +893,7 @@ def test_get_indexer(self): tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', tolerance='1 day'), - np.array([0, 1, 1], dtype=np.int_)) + np.array([0, 1, 1], dtype=np.intp)) def test_repeat(self): # GH10183 @@ -1048,19 +1049,19 @@ def test_get_loc(self): def test_get_indexer(self): idx = pd.to_timedelta(['0 days', '1 days', '2 days']) tm.assert_numpy_array_equal(idx.get_indexer(idx), - np.array([0, 1, 2], dtype=np.int_)) + np.array([0, 1, 2], dtype=np.intp)) target = pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), - np.array([-1, 0, 1], dtype=np.int_)) + np.array([-1, 0, 1], dtype=np.intp)) tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), - np.array([0, 1, 2], dtype=np.int_)) + np.array([0, 1, 2], dtype=np.intp)) tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), - np.array([0, 1, 1], dtype=np.int_)) + np.array([0, 1, 1], dtype=np.intp)) res = idx.get_indexer(target, 'nearest', tolerance=pd.Timedelta('1 hour')) - tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.int_)) + tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp)) def test_numeric_compat(self): diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 809e1ab05ef6e..604a735e7adef 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -775,7 +775,7 @@ def test_legacy_pickle(self): self.assertTrue(obj.equals(obj2)) res = obj.get_indexer(obj) - exp = np.arange(len(obj)) + exp = np.arange(len(obj), dtype=np.intp) assert_almost_equal(res, exp) res = obj.get_indexer(obj2[::-1]) @@ -794,7 +794,7 @@ def test_legacy_v2_unpickle(self): self.assertTrue(obj.equals(obj2)) res = obj.get_indexer(obj) - exp = np.arange(len(obj)) + exp = np.arange(len(obj), dtype=np.intp) assert_almost_equal(res, exp) res = obj.get_indexer(obj2[::-1]) @@ -1039,8 +1039,8 @@ def test_get_indexer(self): major_axis = Index(lrange(4)) minor_axis = Index(lrange(2)) - major_labels = np.array([0, 0, 1, 2, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 0, 1, 0, 1]) + major_labels = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp) + minor_labels = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp) index = MultiIndex(levels=[major_axis, minor_axis], labels=[major_labels, minor_labels]) @@ -1048,10 +1048,10 @@ def test_get_indexer(self): idx2 = index[[1, 3, 5]] r1 = idx1.get_indexer(idx2) - assert_almost_equal(r1, np.array([1, 3, -1])) + assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp)) r1 = idx2.get_indexer(idx1, method='pad') - e1 = np.array([-1, 0, 0, 1, 1]) + e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp) assert_almost_equal(r1, e1) r2 = idx2.get_indexer(idx1[::-1], method='pad') @@ -1061,7 +1061,7 @@ def test_get_indexer(self): assert_almost_equal(r1, rffill1) r1 = idx2.get_indexer(idx1, method='backfill') - e1 = np.array([0, 0, 1, 1, 2]) + e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp) assert_almost_equal(r1, e1) r2 = idx2.get_indexer(idx1[::-1], method='backfill') @@ -1723,8 +1723,8 @@ def test_join_multi(self): jidx, lidx, ridx = midx.join(idx, how='inner', return_indexers=True) exp_idx = pd.MultiIndex.from_product( [np.arange(4), [1, 2]], names=['a', 'b']) - exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.int_) - exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.int64) + exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp) + exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp) self.assert_index_equal(jidx, exp_idx) self.assert_numpy_array_equal(lidx, exp_lidx) self.assert_numpy_array_equal(ridx, exp_ridx) @@ -1737,7 +1737,7 @@ def test_join_multi(self): # keep MultiIndex jidx, lidx, ridx = midx.join(idx, how='left', return_indexers=True) exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, - 1, -1], dtype=np.int64) + 1, -1], dtype=np.intp) self.assert_index_equal(jidx, midx) self.assertIsNone(lidx) self.assert_numpy_array_equal(ridx, exp_ridx) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 90025fa014b78..f0af43e3513bb 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -284,15 +284,15 @@ def test_equals(self): def test_get_indexer(self): idx = Float64Index([0.0, 1.0, 2.0]) tm.assert_numpy_array_equal(idx.get_indexer(idx), - np.array([0, 1, 2], dtype=np.int_)) + np.array([0, 1, 2], dtype=np.intp)) target = [-0.1, 0.5, 1.1] tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), - np.array([-1, 0, 1], dtype=np.int_)) + np.array([-1, 0, 1], dtype=np.intp)) tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), - np.array([0, 1, 2], dtype=np.int_)) + np.array([0, 1, 2], dtype=np.intp)) tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), - np.array([0, 1, 1], dtype=np.int_)) + np.array([0, 1, 1], dtype=np.intp)) def test_get_loc(self): idx = Float64Index([0.0, 1.0, 2.0]) @@ -560,19 +560,19 @@ def test_identical(self): def test_get_indexer(self): target = Int64Index(np.arange(10)) indexer = self.index.get_indexer(target) - expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1]) + expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) def test_get_indexer_pad(self): target = Int64Index(np.arange(10)) indexer = self.index.get_indexer(target, method='pad') - expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]) + expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) def test_get_indexer_backfill(self): target = Int64Index(np.arange(10)) indexer = self.index.get_indexer(target, method='backfill') - expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5]) + expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) def test_join_outer(self): @@ -588,9 +588,9 @@ def test_join_outer(self): eres = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25]) elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], - dtype=np.int_) + dtype=np.intp) eridx = np.array([-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], - dtype=np.int_) + dtype=np.intp) tm.assertIsInstance(res, Int64Index) self.assert_index_equal(res, eres) @@ -604,9 +604,9 @@ def test_join_outer(self): self.assert_index_equal(res, noidx_res) elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], - dtype=np.int64) + dtype=np.intp) eridx = np.array([-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], - dtype=np.int64) + dtype=np.intp) tm.assertIsInstance(res, Int64Index) self.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) @@ -627,8 +627,8 @@ def test_join_inner(self): ridx = ridx.take(ind) eres = Int64Index([2, 12]) - elidx = np.array([1, 6], dtype=np.int_) - eridx = np.array([4, 1], dtype=np.int_) + elidx = np.array([1, 6], dtype=np.intp) + eridx = np.array([4, 1], dtype=np.intp) tm.assertIsInstance(res, Int64Index) self.assert_index_equal(res, eres) @@ -642,8 +642,8 @@ def test_join_inner(self): res2 = self.index.intersection(other_mono) self.assert_index_equal(res, res2) - elidx = np.array([1, 6], dtype=np.int64) - eridx = np.array([1, 4], dtype=np.int64) + elidx = np.array([1, 6], dtype=np.intp) + eridx = np.array([1, 4], dtype=np.intp) tm.assertIsInstance(res, Int64Index) self.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) @@ -658,7 +658,7 @@ def test_join_left(self): return_indexers=True) eres = self.index eridx = np.array([-1, 4, -1, -1, -1, -1, 1, -1, -1, -1], - dtype=np.int_) + dtype=np.intp) tm.assertIsInstance(res, Int64Index) self.assert_index_equal(res, eres) @@ -669,7 +669,7 @@ def test_join_left(self): res, lidx, ridx = self.index.join(other_mono, how='left', return_indexers=True) eridx = np.array([-1, 1, -1, -1, -1, -1, 4, -1, -1, -1], - dtype=np.int64) + dtype=np.intp) tm.assertIsInstance(res, Int64Index) self.assert_index_equal(res, eres) self.assertIsNone(lidx) @@ -680,8 +680,8 @@ def test_join_left(self): idx2 = Index([1, 2, 5, 7, 9]) res, lidx, ridx = idx2.join(idx, how='left', return_indexers=True) eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 - eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.int64) - elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.int64) + eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) self.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) @@ -694,7 +694,7 @@ def test_join_right(self): res, lidx, ridx = self.index.join(other, how='right', return_indexers=True) eres = other - elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.int_) + elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.intp) tm.assertIsInstance(other, Int64Index) self.assert_index_equal(res, eres) @@ -705,7 +705,7 @@ def test_join_right(self): res, lidx, ridx = self.index.join(other_mono, how='right', return_indexers=True) eres = other_mono - elidx = np.array([-1, 1, -1, -1, 6, -1], dtype=np.int64) + elidx = np.array([-1, 1, -1, -1, 6, -1], dtype=np.intp) tm.assertIsInstance(other, Int64Index) self.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) @@ -716,8 +716,8 @@ def test_join_right(self): idx2 = Index([1, 2, 5, 7, 9]) res, lidx, ridx = idx.join(idx2, how='right', return_indexers=True) eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 - elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.int64) - eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.int64) + elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) self.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) @@ -757,10 +757,10 @@ def test_join_non_unique(self): exp_joined = Index([3, 3, 3, 3, 4, 4, 4, 4]) self.assert_index_equal(joined, exp_joined) - exp_lidx = np.array([2, 2, 3, 3, 0, 0, 1, 1], dtype=np.int_) + exp_lidx = np.array([2, 2, 3, 3, 0, 0, 1, 1], dtype=np.intp) tm.assert_numpy_array_equal(lidx, exp_lidx) - exp_ridx = np.array([2, 3, 2, 3, 0, 1, 0, 1], dtype=np.int_) + exp_ridx = np.array([2, 3, 2, 3, 0, 1, 0, 1], dtype=np.intp) tm.assert_numpy_array_equal(ridx, exp_ridx) def test_join_self(self): diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 8a036def0be1b..a00e6f58120d6 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -376,19 +376,19 @@ def test_identical(self): def test_get_indexer(self): target = RangeIndex(10) indexer = self.index.get_indexer(target) - expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1]) + expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp) self.assert_numpy_array_equal(indexer, expected) def test_get_indexer_pad(self): target = RangeIndex(10) indexer = self.index.get_indexer(target, method='pad') - expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]) + expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp) self.assert_numpy_array_equal(indexer, expected) def test_get_indexer_backfill(self): target = RangeIndex(10) indexer = self.index.get_indexer(target, method='backfill') - expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5]) + expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) self.assert_numpy_array_equal(indexer, expected) def test_join_outer(self): @@ -403,9 +403,9 @@ def test_join_outer(self): eres = Int64Index([0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]) elidx = np.array([0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, - -1, -1, -1, -1, -1, -1, -1], dtype=np.int_) + -1, -1, -1, -1, -1, -1, -1], dtype=np.intp) eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6, - 5, 4, 3, 2, 1, 0], dtype=np.int_) + 5, 4, 3, 2, 1, 0], dtype=np.intp) self.assertIsInstance(res, Int64Index) self.assertFalse(isinstance(res, RangeIndex)) @@ -441,8 +441,8 @@ def test_join_inner(self): ridx = ridx.take(ind) eres = Int64Index([16, 18]) - elidx = np.array([8, 9]) - eridx = np.array([9, 7]) + elidx = np.array([8, 9], dtype=np.intp) + eridx = np.array([9, 7], dtype=np.intp) self.assertIsInstance(res, Int64Index) self.assert_index_equal(res, eres) @@ -467,7 +467,7 @@ def test_join_left(self): res, lidx, ridx = self.index.join(other, how='left', return_indexers=True) eres = self.index - eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], dtype=np.int_) + eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], dtype=np.intp) self.assertIsInstance(res, RangeIndex) self.assert_index_equal(res, eres) @@ -493,7 +493,7 @@ def test_join_right(self): return_indexers=True) eres = other elidx = np.array([-1, -1, -1, -1, -1, -1, -1, 9, -1, 8, -1], - dtype=np.int_) + dtype=np.intp) self.assertIsInstance(other, Int64Index) self.assert_index_equal(res, eres) @@ -545,9 +545,9 @@ def test_join_non_unique(self): res, lidx, ridx = self.index.join(other, return_indexers=True) eres = Int64Index([0, 2, 4, 4, 6, 8, 10, 12, 14, 16, 18]) - elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.int_) + elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.intp) eridx = np.array([-1, -1, 0, 1, -1, -1, -1, -1, -1, -1, -1], - dtype=np.int_) + dtype=np.intp) self.assert_index_equal(res, eres) self.assert_numpy_array_equal(lidx, elidx) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 66fd1861f08f9..4b168ec419ae5 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -81,7 +81,7 @@ def test_labels(self): labels = [0, 1, 1, 2, 3, 0, -1, 4] result, result_labels = algos.safe_sort(values, labels) - expected_labels = np.array([3, 1, 1, 2, 0, 3, -1, 4]) + expected_labels = np.array([3, 1, 1, 2, 0, 3, -1, 4], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) tm.assert_numpy_array_equal(result_labels, expected_labels) @@ -89,20 +89,20 @@ def test_labels(self): labels = [0, 1, 1, 2, 3, 0, 99, 4] result, result_labels = algos.safe_sort(values, labels, na_sentinel=99) - expected_labels = np.array([3, 1, 1, 2, 0, 3, 99, 4]) + expected_labels = np.array([3, 1, 1, 2, 0, 3, 99, 4], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) tm.assert_numpy_array_equal(result_labels, expected_labels) # out of bound indices labels = [0, 101, 102, 2, 3, 0, 99, 4] result, result_labels = algos.safe_sort(values, labels) - expected_labels = np.array([3, -1, -1, 2, 0, 3, -1, 4]) + expected_labels = np.array([3, -1, -1, 2, 0, 3, -1, 4], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) tm.assert_numpy_array_equal(result_labels, expected_labels) labels = [] result, result_labels = algos.safe_sort(values, labels) - expected_labels = np.array([], dtype=np.int_) + expected_labels = np.array([], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) tm.assert_numpy_array_equal(result_labels, expected_labels) @@ -116,7 +116,7 @@ def test_mixed_integer(self): labels = [0, 1, 2, 3, 0, -1, 1] result, result_labels = algos.safe_sort(values, labels) expected = np.array([0, 1, 'a', 'b'], dtype=object) - expected_labels = np.array([3, 1, 0, 2, 3, -1, 1]) + expected_labels = np.array([3, 1, 0, 2, 3, -1, 1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) tm.assert_numpy_array_equal(result_labels, expected_labels) @@ -155,33 +155,33 @@ def test_basic(self): labels, uniques = algos.factorize(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], sort=True) - exp = np.array([0, 1, 1, 0, 0, 2, 2, 2], dtype=np.int_) + exp = np.array([0, 1, 1, 0, 0, 2, 2, 2], dtype=np.intp) self.assert_numpy_array_equal(labels, exp) exp = np.array(['a', 'b', 'c'], dtype=object) self.assert_numpy_array_equal(uniques, exp) labels, uniques = algos.factorize(list(reversed(range(5)))) - exp = np.array([0, 1, 2, 3, 4], dtype=np.int_) + exp = np.array([0, 1, 2, 3, 4], dtype=np.intp) self.assert_numpy_array_equal(labels, exp) exp = np.array([4, 3, 2, 1, 0], dtype=np.int64) self.assert_numpy_array_equal(uniques, exp) labels, uniques = algos.factorize(list(reversed(range(5))), sort=True) - exp = np.array([4, 3, 2, 1, 0], dtype=np.int_) + exp = np.array([4, 3, 2, 1, 0], dtype=np.intp) self.assert_numpy_array_equal(labels, exp) exp = np.array([0, 1, 2, 3, 4], dtype=np.int64) self.assert_numpy_array_equal(uniques, exp) labels, uniques = algos.factorize(list(reversed(np.arange(5.)))) - exp = np.array([0, 1, 2, 3, 4], dtype=np.int_) + exp = np.array([0, 1, 2, 3, 4], dtype=np.intp) self.assert_numpy_array_equal(labels, exp) exp = np.array([4., 3., 2., 1., 0.], dtype=np.float64) self.assert_numpy_array_equal(uniques, exp) labels, uniques = algos.factorize(list(reversed(np.arange(5.))), sort=True) - exp = np.array([4, 3, 2, 1, 0], dtype=np.int_) + exp = np.array([4, 3, 2, 1, 0], dtype=np.intp) self.assert_numpy_array_equal(labels, exp) exp = np.array([0., 1., 2., 3., 4.], dtype=np.float64) self.assert_numpy_array_equal(uniques, exp) @@ -192,13 +192,13 @@ def test_mixed(self): x = Series(['A', 'A', np.nan, 'B', 3.14, np.inf]) labels, uniques = algos.factorize(x) - exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.int_) + exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp) self.assert_numpy_array_equal(labels, exp) exp = pd.Index(['A', 'B', 3.14, np.inf]) tm.assert_index_equal(uniques, exp) labels, uniques = algos.factorize(x, sort=True) - exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.int_) + exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp) self.assert_numpy_array_equal(labels, exp) exp = pd.Index([3.14, np.inf, 'A', 'B']) tm.assert_index_equal(uniques, exp) @@ -211,13 +211,13 @@ def test_datelike(self): x = Series([v1, v1, v1, v2, v2, v1]) labels, uniques = algos.factorize(x) - exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.int_) + exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp) self.assert_numpy_array_equal(labels, exp) exp = pd.DatetimeIndex([v1, v2]) self.assert_index_equal(uniques, exp) labels, uniques = algos.factorize(x, sort=True) - exp = np.array([1, 1, 1, 0, 0, 1], dtype=np.int_) + exp = np.array([1, 1, 1, 0, 0, 1], dtype=np.intp) self.assert_numpy_array_equal(labels, exp) exp = pd.DatetimeIndex([v2, v1]) self.assert_index_equal(uniques, exp) @@ -229,12 +229,12 @@ def test_datelike(self): # periods are not 'sorted' as they are converted back into an index labels, uniques = algos.factorize(x) - exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.int_) + exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp) self.assert_numpy_array_equal(labels, exp) self.assert_index_equal(uniques, pd.PeriodIndex([v1, v2])) labels, uniques = algos.factorize(x, sort=True) - exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.int_) + exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp) self.assert_numpy_array_equal(labels, exp) self.assert_index_equal(uniques, pd.PeriodIndex([v1, v2])) @@ -243,12 +243,12 @@ def test_datelike(self): v2 = pd.to_timedelta('1 day') x = Series([v1, v2, v1, v1, v2, v2, v1]) labels, uniques = algos.factorize(x) - exp = np.array([0, 1, 0, 0, 1, 1, 0], dtype=np.int_) + exp = np.array([0, 1, 0, 0, 1, 1, 0], dtype=np.intp) self.assert_numpy_array_equal(labels, exp) self.assert_index_equal(uniques, pd.to_timedelta([v1, v2])) labels, uniques = algos.factorize(x, sort=True) - exp = np.array([1, 0, 1, 1, 0, 0, 1], dtype=np.int_) + exp = np.array([1, 0, 1, 1, 0, 0, 1], dtype=np.intp) self.assert_numpy_array_equal(labels, exp) self.assert_index_equal(uniques, pd.to_timedelta([v2, v1])) @@ -1271,7 +1271,7 @@ def test_groupsort_indexer(): def test_ensure_platform_int(): - arr = np.arange(100) + arr = np.arange(100, dtype=np.intp) result = _algos.ensure_platform_int(arr) assert (result is arr) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 2721d8d0e5e69..52cd65af42c5e 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -748,11 +748,11 @@ def test_factorize(self): o = orig.copy() if isinstance(o, Index) and o.is_boolean(): - exp_arr = np.array([0, 1] + [0] * 8) + exp_arr = np.array([0, 1] + [0] * 8, dtype=np.intp) exp_uniques = o exp_uniques = Index([False, True]) else: - exp_arr = np.array(range(len(o))) + exp_arr = np.array(range(len(o)), dtype=np.intp) exp_uniques = o labels, uniques = o.factorize() @@ -782,7 +782,8 @@ def test_factorize_repeated(self): o = o.take(indexer) n = o[5:].append(o) - exp_arr = np.array([5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + exp_arr = np.array([5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + dtype=np.intp) labels, uniques = n.factorize(sort=True) self.assert_numpy_array_equal(labels, exp_arr) @@ -792,7 +793,8 @@ def test_factorize_repeated(self): else: self.assert_index_equal(uniques, o, check_names=False) - exp_arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4]) + exp_arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4], + np.intp) labels, uniques = n.factorize(sort=False) self.assert_numpy_array_equal(labels, exp_arr) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index cc588d891b398..80e68d39b144a 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2576,12 +2576,12 @@ def test_groupby_complex(self): def test_level_preserve_order(self): grouped = self.mframe.groupby(level=0) - exp_labels = np.array([0, 0, 0, 1, 1, 2, 2, 3, 3, 3]) + exp_labels = np.array([0, 0, 0, 1, 1, 2, 2, 3, 3, 3], np.intp) assert_almost_equal(grouped.grouper.labels[0], exp_labels) def test_grouping_labels(self): grouped = self.mframe.groupby(self.mframe.index.get_level_values(0)) - exp_labels = np.array([2, 2, 2, 0, 0, 1, 1, 3, 3, 3]) + exp_labels = np.array([2, 2, 2, 0, 0, 1, 1, 3, 3, 3], dtype=np.intp) assert_almost_equal(grouped.grouper.labels[0], exp_labels) def test_cython_fail_agg(self): @@ -5958,22 +5958,22 @@ def test_lexsort_indexer(self): # orders=True, na_position='last' result = _lexsort_indexer(keys, orders=True, na_position='last') exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) # orders=True, na_position='first' result = _lexsort_indexer(keys, orders=True, na_position='first') exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) - tm.assert_numpy_array_equal(result, np.array(exp)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) # orders=False, na_position='last' result = _lexsort_indexer(keys, orders=False, na_position='last') exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) # orders=False, na_position='first' result = _lexsort_indexer(keys, orders=False, na_position='first') exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) - tm.assert_numpy_array_equal(result, np.array(exp)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) def test_nargsort(self): # np.argsort(items) places NaNs last diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 1572363fc6136..ca7288b048427 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -572,7 +572,7 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer): if name in self.left: if left_has_missing is None: - left_has_missing = any(left_indexer == -1) + left_has_missing = (left_indexer == -1).any() if left_has_missing: take_right = self.right_join_keys[i] @@ -584,7 +584,7 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer): elif name in self.right: if right_has_missing is None: - right_has_missing = any(right_indexer == -1) + right_has_missing = (right_indexer == -1).any() if right_has_missing: take_left = self.left_join_keys[i] diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index d7f1a52612819..c24d8bf061f89 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -3527,7 +3527,7 @@ def test_factorize(self): idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02', '2014-03', '2014-03'], freq='M') - exp_arr = np.array([0, 0, 1, 1, 2, 2]) + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M') arr, idx = idx1.factorize() @@ -3541,12 +3541,12 @@ def test_factorize(self): idx2 = pd.PeriodIndex(['2014-03', '2014-03', '2014-02', '2014-01', '2014-03', '2014-01'], freq='M') - exp_arr = np.array([2, 2, 1, 0, 2, 0]) + exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) arr, idx = idx2.factorize(sort=True) self.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) - exp_arr = np.array([0, 0, 1, 2, 0, 2]) + exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) exp_idx = PeriodIndex(['2014-03', '2014-02', '2014-01'], freq='M') arr, idx = idx2.factorize() self.assert_numpy_array_equal(arr, exp_arr) diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 0bdf8590ec487..c1cd1cea0cad7 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -1799,7 +1799,7 @@ def test_factorize(self): idx1 = TimedeltaIndex(['1 day', '1 day', '2 day', '2 day', '3 day', '3 day']) - exp_arr = np.array([0, 0, 1, 1, 2, 2]) + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) exp_idx = TimedeltaIndex(['1 day', '2 day', '3 day']) arr, idx = idx1.factorize() @@ -1812,7 +1812,7 @@ def test_factorize(self): # freq must be preserved idx3 = timedelta_range('1 day', periods=4, freq='s') - exp_arr = np.array([0, 1, 2, 3]) + exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) arr, idx = idx3.factorize() self.assert_numpy_array_equal(arr, exp_arr) self.assert_index_equal(idx, idx3) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index e5bbb923935e0..b34a377c7f889 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -3800,7 +3800,7 @@ def test_factorize(self): idx1 = DatetimeIndex(['2014-01', '2014-01', '2014-02', '2014-02', '2014-03', '2014-03']) - exp_arr = np.array([0, 0, 1, 1, 2, 2]) + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) exp_idx = DatetimeIndex(['2014-01', '2014-02', '2014-03']) arr, idx = idx1.factorize() @@ -3822,13 +3822,13 @@ def test_factorize(self): idx2 = pd.DatetimeIndex(['2014-03', '2014-03', '2014-02', '2014-01', '2014-03', '2014-01']) - exp_arr = np.array([2, 2, 1, 0, 2, 0]) + exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) exp_idx = DatetimeIndex(['2014-01', '2014-02', '2014-03']) arr, idx = idx2.factorize(sort=True) self.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) - exp_arr = np.array([0, 0, 1, 2, 0, 2]) + exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) exp_idx = DatetimeIndex(['2014-03', '2014-02', '2014-01']) arr, idx = idx2.factorize() self.assert_numpy_array_equal(arr, exp_arr) @@ -3836,7 +3836,7 @@ def test_factorize(self): # freq must be preserved idx3 = date_range('2000-01', periods=4, freq='M', tz='Asia/Tokyo') - exp_arr = np.array([0, 1, 2, 3]) + exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) arr, idx = idx3.factorize() self.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, idx3) @@ -3847,7 +3847,7 @@ def test_factorize_tz(self): base = pd.date_range('2016-11-05', freq='H', periods=100, tz=tz) idx = base.repeat(5) - exp_arr = np.arange(100).repeat(5) + exp_arr = np.arange(100, dtype=np.intp).repeat(5) for obj in [idx, pd.Series(idx)]: arr, res = obj.factorize() @@ -3861,7 +3861,7 @@ def test_factorize_dst(self): for obj in [idx, pd.Series(idx)]: arr, res = obj.factorize() - self.assert_numpy_array_equal(arr, np.arange(12)) + self.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) tm.assert_index_equal(res, idx) idx = pd.date_range('2016-06-13', freq='H', periods=12, @@ -3869,7 +3869,7 @@ def test_factorize_dst(self): for obj in [idx, pd.Series(idx)]: arr, res = obj.factorize() - self.assert_numpy_array_equal(arr, np.arange(12)) + self.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) tm.assert_index_equal(res, idx) def test_slice_with_negative_step(self):