Skip to content

Commit 84f38b2

Browse files
committed
adjust test for platform independence
1 parent 3ced5d5 commit 84f38b2

17 files changed

+135
-123
lines changed

doc/source/whatsnew/v0.19.0.txt

+7-5
Original file line numberDiff line numberDiff line change
@@ -777,8 +777,8 @@ Indexer dtype Changes
777777

778778
Methods such as ``Index.get_indexer`` that return an indexer array coerce that array to a "platform int", so that it can be
779779
directly used in 3rd party library operations like ``numpy.take``. Previously, a platform int was defined as ``np.int_``
780-
which corresponds to a C integer - but the correct type, and what is being used now, is ``np.intp``, which corresponds
781-
to the C integer size that can hold a pointer.
780+
which corresponds to a C integer, but the correct type, and what is being used now, is ``np.intp``, which corresponds
781+
to the C integer size that can hold a pointer. (:issue:`13972`)
782782

783783
These types are the same on many platform, but for 64 bit python on Windows,
784784
``np.int_`` is 32 bits, and ``np.intp`` is 64 bits. Changing this behavior improves performance for many
@@ -795,10 +795,12 @@ Previous behaviour:
795795

796796
New behaviour:
797797

798-
.. ipython :: python
798+
.. code-block:: ipython
799799

800-
i = pd.Index(['a', 'b', 'c'])
801-
i.get_indexer(['b', 'b', 'c']).dtype
800+
In [1]: i = pd.Index(['a', 'b', 'c'])
801+
802+
In [2]: i.get_indexer(['b', 'b', 'c']).dtype
803+
Out[2]: dtype('int64')
802804

803805

804806
.. _whatsnew_0190.deprecations:

pandas/core/algorithms.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ def sort_mixed(values):
259259
new_labels = reverse_indexer.take(labels, mode='wrap')
260260
np.putmask(new_labels, mask, na_sentinel)
261261

262-
return ordered, new_labels
262+
return ordered, _ensure_platform_int(new_labels)
263263

264264

265265
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):

pandas/hashtable.pyx

+6-6
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,10 @@ cdef class Factorizer:
6464
mask = (labels == na_sentinel)
6565
# sort on
6666
if sort:
67-
if labels.dtype != np.int_:
68-
labels = labels.astype(np.int_)
67+
if labels.dtype != np.intp:
68+
labels = labels.astype(np.intp)
6969
sorter = self.uniques.to_array().argsort()
70-
reverse_indexer = np.empty(len(sorter), dtype=np.int_)
70+
reverse_indexer = np.empty(len(sorter), dtype=np.intp)
7171
reverse_indexer.put(sorter, np.arange(len(sorter)))
7272
labels = reverse_indexer.take(labels, mode='clip')
7373
labels[mask] = na_sentinel
@@ -100,11 +100,11 @@ cdef class Int64Factorizer:
100100

101101
# sort on
102102
if sort:
103-
if labels.dtype != np.int_:
104-
labels = labels.astype(np.int_)
103+
if labels.dtype != np.intp:
104+
labels = labels.astype(np.intp)
105105

106106
sorter = self.uniques.to_array().argsort()
107-
reverse_indexer = np.empty(len(sorter), dtype=np.int_)
107+
reverse_indexer = np.empty(len(sorter), dtype=np.intp)
108108
reverse_indexer.put(sorter, np.arange(len(sorter)))
109109

110110
labels = reverse_indexer.take(labels)

pandas/indexes/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2816,7 +2816,7 @@ def _get_leaf_sorter(labels):
28162816
new_levels[level] = new_level
28172817

28182818
if keep_order: # just drop missing values. o.w. keep order
2819-
left_indexer = np.arange(len(left))
2819+
left_indexer = np.arange(len(left), dtype=np.intp)
28202820
mask = new_lev_labels != -1
28212821
if not mask.all():
28222822
new_labels = [lab[mask] for lab in new_labels]

pandas/tests/indexes/common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def f():
110110

111111
def test_reindex_base(self):
112112
idx = self.create_index()
113-
expected = np.arange(idx.size)
113+
expected = np.arange(idx.size, dtype=np.intp)
114114

115115
actual = idx.get_indexer(idx)
116116
tm.assert_numpy_array_equal(expected, actual)

pandas/tests/indexes/test_base.py

+19-12
Original file line numberDiff line numberDiff line change
@@ -936,10 +936,10 @@ def test_get_indexer(self):
936936
idx2 = Index([2, 4, 6])
937937

938938
r1 = idx1.get_indexer(idx2)
939-
assert_almost_equal(r1, np.array([1, 3, -1]))
939+
assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp))
940940

941941
r1 = idx2.get_indexer(idx1, method='pad')
942-
e1 = np.array([-1, 0, 0, 1, 1])
942+
e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp)
943943
assert_almost_equal(r1, e1)
944944

945945
r2 = idx2.get_indexer(idx1[::-1], method='pad')
@@ -949,7 +949,7 @@ def test_get_indexer(self):
949949
assert_almost_equal(r1, rffill1)
950950

951951
r1 = idx2.get_indexer(idx1, method='backfill')
952-
e1 = np.array([0, 0, 1, 1, 2])
952+
e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp)
953953
assert_almost_equal(r1, e1)
954954

955955
rbfill1 = idx2.get_indexer(idx1, method='bfill')
@@ -974,25 +974,30 @@ def test_get_indexer_nearest(self):
974974
all_methods = ['pad', 'backfill', 'nearest']
975975
for method in all_methods:
976976
actual = idx.get_indexer([0, 5, 9], method=method)
977-
tm.assert_numpy_array_equal(actual, np.array([0, 5, 9]))
977+
tm.assert_numpy_array_equal(actual, np.array([0, 5, 9],
978+
dtype=np.intp))
978979

979980
actual = idx.get_indexer([0, 5, 9], method=method, tolerance=0)
980-
tm.assert_numpy_array_equal(actual, np.array([0, 5, 9]))
981+
tm.assert_numpy_array_equal(actual, np.array([0, 5, 9],
982+
dtype=np.intp))
981983

982984
for method, expected in zip(all_methods, [[0, 1, 8], [1, 2, 9],
983985
[0, 2, 9]]):
984986
actual = idx.get_indexer([0.2, 1.8, 8.5], method=method)
985-
tm.assert_numpy_array_equal(actual, np.array(expected))
987+
tm.assert_numpy_array_equal(actual, np.array(expected,
988+
dtype=np.intp))
986989

987990
actual = idx.get_indexer([0.2, 1.8, 8.5], method=method,
988991
tolerance=1)
989-
tm.assert_numpy_array_equal(actual, np.array(expected))
992+
tm.assert_numpy_array_equal(actual, np.array(expected,
993+
dtype=np.intp))
990994

991995
for method, expected in zip(all_methods, [[0, -1, -1], [-1, 2, -1],
992996
[0, 2, -1]]):
993997
actual = idx.get_indexer([0.2, 1.8, 8.5], method=method,
994998
tolerance=0.2)
995-
tm.assert_numpy_array_equal(actual, np.array(expected))
999+
tm.assert_numpy_array_equal(actual, np.array(expected,
1000+
dtype=np.intp))
9961001

9971002
with tm.assertRaisesRegexp(ValueError, 'limit argument'):
9981003
idx.get_indexer([1, 0], method='nearest', limit=1)
@@ -1003,22 +1008,24 @@ def test_get_indexer_nearest_decreasing(self):
10031008
all_methods = ['pad', 'backfill', 'nearest']
10041009
for method in all_methods:
10051010
actual = idx.get_indexer([0, 5, 9], method=method)
1006-
tm.assert_numpy_array_equal(actual, np.array([9, 4, 0]))
1011+
tm.assert_numpy_array_equal(actual, np.array([9, 4, 0],
1012+
dtype=np.intp))
10071013

10081014
for method, expected in zip(all_methods, [[8, 7, 0], [9, 8, 1],
10091015
[9, 7, 0]]):
10101016
actual = idx.get_indexer([0.2, 1.8, 8.5], method=method)
1011-
tm.assert_numpy_array_equal(actual, np.array(expected))
1017+
tm.assert_numpy_array_equal(actual, np.array(expected,
1018+
dtype=np.intp))
10121019

10131020
def test_get_indexer_strings(self):
10141021
idx = pd.Index(['b', 'c'])
10151022

10161023
actual = idx.get_indexer(['a', 'b', 'c', 'd'], method='pad')
1017-
expected = np.array([-1, 0, 1, 1])
1024+
expected = np.array([-1, 0, 1, 1], dtype=np.intp)
10181025
tm.assert_numpy_array_equal(actual, expected)
10191026

10201027
actual = idx.get_indexer(['a', 'b', 'c', 'd'], method='backfill')
1021-
expected = np.array([0, 0, 1, -1])
1028+
expected = np.array([0, 0, 1, -1], dtype=np.intp)
10221029
tm.assert_numpy_array_equal(actual, expected)
10231030

10241031
with tm.assertRaises(TypeError):

pandas/tests/indexes/test_category.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ def test_reindex_base(self):
336336

337337
# determined by cat ordering
338338
idx = self.create_index()
339-
expected = np.array([4, 0, 1, 5, 2, 3])
339+
expected = np.array([4, 0, 1, 5, 2, 3], dtype=np.intp)
340340

341341
actual = idx.get_indexer(idx)
342342
tm.assert_numpy_array_equal(expected, actual)
@@ -403,7 +403,7 @@ def test_get_indexer(self):
403403

404404
for indexer in [idx2, list('abf'), Index(list('abf'))]:
405405
r1 = idx1.get_indexer(idx2)
406-
assert_almost_equal(r1, np.array([0, 1, 2, -1]))
406+
assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp))
407407

408408
self.assertRaises(NotImplementedError,
409409
lambda: idx2.get_indexer(idx1, method='pad'))

pandas/tests/indexes/test_datetimelike.py

+17-16
Original file line numberDiff line numberDiff line change
@@ -552,20 +552,21 @@ def test_get_loc(self):
552552

553553
def test_get_indexer(self):
554554
idx = pd.date_range('2000-01-01', periods=3)
555-
tm.assert_numpy_array_equal(idx.get_indexer(idx), np.array([0, 1, 2]))
555+
exp = np.array([0, 1, 2], dtype=np.intp)
556+
tm.assert_numpy_array_equal(idx.get_indexer(idx), exp)
556557

557558
target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours',
558559
'1 day 1 hour'])
559560
tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'),
560-
np.array([-1, 0, 1]))
561+
np.array([-1, 0, 1], dtype=np.intp))
561562
tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'),
562-
np.array([0, 1, 2]))
563+
np.array([0, 1, 2], dtype=np.intp))
563564
tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'),
564-
np.array([0, 1, 1]))
565+
np.array([0, 1, 1], dtype=np.intp))
565566
tm.assert_numpy_array_equal(
566567
idx.get_indexer(target, 'nearest',
567568
tolerance=pd.Timedelta('1 hour')),
568-
np.array([0, -1, 1]))
569+
np.array([0, -1, 1], dtype=np.intp))
569570
with tm.assertRaises(ValueError):
570571
idx.get_indexer(idx[[0]], method='nearest', tolerance='foo')
571572

@@ -872,27 +873,27 @@ def test_where_other(self):
872873
def test_get_indexer(self):
873874
idx = pd.period_range('2000-01-01', periods=3).asfreq('H', how='start')
874875
tm.assert_numpy_array_equal(idx.get_indexer(idx),
875-
np.array([0, 1, 2], dtype=np.int_))
876+
np.array([0, 1, 2], dtype=np.intp))
876877

877878
target = pd.PeriodIndex(['1999-12-31T23', '2000-01-01T12',
878879
'2000-01-02T01'], freq='H')
879880
tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'),
880-
np.array([-1, 0, 1], dtype=np.int_))
881+
np.array([-1, 0, 1], dtype=np.intp))
881882
tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'),
882-
np.array([0, 1, 2], dtype=np.int_))
883+
np.array([0, 1, 2], dtype=np.intp))
883884
tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'),
884-
np.array([0, 1, 1], dtype=np.int_))
885+
np.array([0, 1, 1], dtype=np.intp))
885886
tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest',
886887
tolerance='1 hour'),
887-
np.array([0, -1, 1], dtype=np.int_))
888+
np.array([0, -1, 1], dtype=np.intp))
888889

889890
msg = 'Input has different freq from PeriodIndex\\(freq=H\\)'
890891
with self.assertRaisesRegexp(ValueError, msg):
891892
idx.get_indexer(target, 'nearest', tolerance='1 minute')
892893

893894
tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest',
894895
tolerance='1 day'),
895-
np.array([0, 1, 1], dtype=np.int_))
896+
np.array([0, 1, 1], dtype=np.intp))
896897

897898
def test_repeat(self):
898899
# GH10183
@@ -1048,19 +1049,19 @@ def test_get_loc(self):
10481049
def test_get_indexer(self):
10491050
idx = pd.to_timedelta(['0 days', '1 days', '2 days'])
10501051
tm.assert_numpy_array_equal(idx.get_indexer(idx),
1051-
np.array([0, 1, 2], dtype=np.int_))
1052+
np.array([0, 1, 2], dtype=np.intp))
10521053

10531054
target = pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour'])
10541055
tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'),
1055-
np.array([-1, 0, 1], dtype=np.int_))
1056+
np.array([-1, 0, 1], dtype=np.intp))
10561057
tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'),
1057-
np.array([0, 1, 2], dtype=np.int_))
1058+
np.array([0, 1, 2], dtype=np.intp))
10581059
tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'),
1059-
np.array([0, 1, 1], dtype=np.int_))
1060+
np.array([0, 1, 1], dtype=np.intp))
10601061

10611062
res = idx.get_indexer(target, 'nearest',
10621063
tolerance=pd.Timedelta('1 hour'))
1063-
tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.int_))
1064+
tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp))
10641065

10651066
def test_numeric_compat(self):
10661067

pandas/tests/indexes/test_multi.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -775,7 +775,7 @@ def test_legacy_pickle(self):
775775
self.assertTrue(obj.equals(obj2))
776776

777777
res = obj.get_indexer(obj)
778-
exp = np.arange(len(obj))
778+
exp = np.arange(len(obj), dtype=np.intp)
779779
assert_almost_equal(res, exp)
780780

781781
res = obj.get_indexer(obj2[::-1])
@@ -794,7 +794,7 @@ def test_legacy_v2_unpickle(self):
794794
self.assertTrue(obj.equals(obj2))
795795

796796
res = obj.get_indexer(obj)
797-
exp = np.arange(len(obj))
797+
exp = np.arange(len(obj), dtype=np.intp)
798798
assert_almost_equal(res, exp)
799799

800800
res = obj.get_indexer(obj2[::-1])
@@ -1039,19 +1039,19 @@ def test_get_indexer(self):
10391039
major_axis = Index(lrange(4))
10401040
minor_axis = Index(lrange(2))
10411041

1042-
major_labels = np.array([0, 0, 1, 2, 2, 3, 3])
1043-
minor_labels = np.array([0, 1, 0, 0, 1, 0, 1])
1042+
major_labels = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp)
1043+
minor_labels = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp)
10441044

10451045
index = MultiIndex(levels=[major_axis, minor_axis],
10461046
labels=[major_labels, minor_labels])
10471047
idx1 = index[:5]
10481048
idx2 = index[[1, 3, 5]]
10491049

10501050
r1 = idx1.get_indexer(idx2)
1051-
assert_almost_equal(r1, np.array([1, 3, -1]))
1051+
assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp))
10521052

10531053
r1 = idx2.get_indexer(idx1, method='pad')
1054-
e1 = np.array([-1, 0, 0, 1, 1])
1054+
e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp)
10551055
assert_almost_equal(r1, e1)
10561056

10571057
r2 = idx2.get_indexer(idx1[::-1], method='pad')
@@ -1061,7 +1061,7 @@ def test_get_indexer(self):
10611061
assert_almost_equal(r1, rffill1)
10621062

10631063
r1 = idx2.get_indexer(idx1, method='backfill')
1064-
e1 = np.array([0, 0, 1, 1, 2])
1064+
e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp)
10651065
assert_almost_equal(r1, e1)
10661066

10671067
r2 = idx2.get_indexer(idx1[::-1], method='backfill')
@@ -1723,8 +1723,8 @@ def test_join_multi(self):
17231723
jidx, lidx, ridx = midx.join(idx, how='inner', return_indexers=True)
17241724
exp_idx = pd.MultiIndex.from_product(
17251725
[np.arange(4), [1, 2]], names=['a', 'b'])
1726-
exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.int_)
1727-
exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.int64)
1726+
exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp)
1727+
exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp)
17281728
self.assert_index_equal(jidx, exp_idx)
17291729
self.assert_numpy_array_equal(lidx, exp_lidx)
17301730
self.assert_numpy_array_equal(ridx, exp_ridx)

0 commit comments

Comments
 (0)