Skip to content

Commit 71b9cbb

Browse files
committed
Fixed bug in outer_indexer where the special case of an empty right array resulted in bogus return data.
1 parent d197833 commit 71b9cbb

File tree

4 files changed

+39
-6
lines changed

4 files changed

+39
-6
lines changed

doc/source/whatsnew/v0.17.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,7 @@ Bug Fixes
389389
- Bug in ``MultiIndex.get_level_values`` including ``Categorical`` raises ``AttributeError`` (:issue:`10460`)
390390
- Bug in ``pd.get_dummies`` with `sparse=True` not returning ``SparseDataFrame`` (:issue:`10531`)
391391
- Bug in ``Index`` subtypes (such as ``PeriodIndex``) not returning their own type for ``.drop`` and ``.insert`` methods (:issue:`10620`)
392+
- Bug in ``algos.outer_join_indexer`` when ``right`` array is empty (:issue:`10618`)
392393

393394

394395

pandas/src/generate_code.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2131,7 +2131,7 @@ def outer_join_indexer_%(name)s(ndarray[%(c_type)s] left,
21312131
rindexer[j] = j
21322132
result[j] = right[j]
21332133
elif nright == 0:
2134-
for i in range(nright):
2134+
for i in range(nleft):
21352135
lindexer[i] = i
21362136
rindexer[i] = -1
21372137
result[i] = left[i]

pandas/src/generated.pyx

+5-5
Original file line numberDiff line numberDiff line change
@@ -10345,7 +10345,7 @@ def outer_join_indexer_float64(ndarray[float64_t] left,
1034510345
rindexer[j] = j
1034610346
result[j] = right[j]
1034710347
elif nright == 0:
10348-
for i in range(nright):
10348+
for i in range(nleft):
1034910349
lindexer[i] = i
1035010350
rindexer[i] = -1
1035110351
result[i] = left[i]
@@ -10474,7 +10474,7 @@ def outer_join_indexer_float32(ndarray[float32_t] left,
1047410474
rindexer[j] = j
1047510475
result[j] = right[j]
1047610476
elif nright == 0:
10477-
for i in range(nright):
10477+
for i in range(nleft):
1047810478
lindexer[i] = i
1047910479
rindexer[i] = -1
1048010480
result[i] = left[i]
@@ -10603,7 +10603,7 @@ def outer_join_indexer_object(ndarray[object] left,
1060310603
rindexer[j] = j
1060410604
result[j] = right[j]
1060510605
elif nright == 0:
10606-
for i in range(nright):
10606+
for i in range(nleft):
1060710607
lindexer[i] = i
1060810608
rindexer[i] = -1
1060910609
result[i] = left[i]
@@ -10732,7 +10732,7 @@ def outer_join_indexer_int32(ndarray[int32_t] left,
1073210732
rindexer[j] = j
1073310733
result[j] = right[j]
1073410734
elif nright == 0:
10735-
for i in range(nright):
10735+
for i in range(nleft):
1073610736
lindexer[i] = i
1073710737
rindexer[i] = -1
1073810738
result[i] = left[i]
@@ -10861,7 +10861,7 @@ def outer_join_indexer_int64(ndarray[int64_t] left,
1086110861
rindexer[j] = j
1086210862
result[j] = right[j]
1086310863
elif nright == 0:
10864-
for i in range(nright):
10864+
for i in range(nleft):
1086510865
lindexer[i] = i
1086610866
rindexer[i] = -1
1086710867
result[i] = left[i]

pandas/tests/test_algos.py

+32
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,38 @@ def _test_vector_resize(htable, uniques, dtype, nvals):
168168
_test_vector_resize(tbl(), vect(), dtype, 0)
169169
_test_vector_resize(tbl(), vect(), dtype, 10)
170170

171+
class TestIndexer(tm.TestCase):
172+
_multiprocess_can_split_ = True
173+
174+
def test_outer_join_indexer(self):
175+
typemap = [('int32', algos.algos.outer_join_indexer_int32),
176+
('int64', algos.algos.outer_join_indexer_int64),
177+
('float32', algos.algos.outer_join_indexer_float32),
178+
('float64', algos.algos.outer_join_indexer_float64),
179+
('object', algos.algos.outer_join_indexer_object)]
180+
181+
for dtype, indexer in typemap:
182+
left = np.arange(3, dtype = dtype)
183+
right = np.arange(2,5, dtype = dtype)
184+
empty = np.array([], dtype = dtype)
185+
186+
result, lindexer, rindexer = indexer(left, right)
187+
tm.assertIsInstance(result, np.ndarray)
188+
tm.assertIsInstance(lindexer, np.ndarray)
189+
tm.assertIsInstance(rindexer, np.ndarray)
190+
tm.assert_numpy_array_equal(result, np.arange(5, dtype = dtype))
191+
tm.assert_numpy_array_equal(lindexer, np.array([0, 1, 2, -1, -1]))
192+
tm.assert_numpy_array_equal(rindexer, np.array([-1, -1, 0, 1, 2]))
193+
194+
result, lindexer, rindexer = indexer(empty, right)
195+
tm.assert_numpy_array_equal(result, right)
196+
tm.assert_numpy_array_equal(lindexer, np.array([-1, -1, -1]))
197+
tm.assert_numpy_array_equal(rindexer, np.array([0, 1, 2]))
198+
199+
result, lindexer, rindexer = indexer(left, empty)
200+
tm.assert_numpy_array_equal(result, left)
201+
tm.assert_numpy_array_equal(lindexer, np.array([0, 1, 2]))
202+
tm.assert_numpy_array_equal(rindexer, np.array([-1, -1, -1]))
171203

172204
class TestUnique(tm.TestCase):
173205
_multiprocess_can_split_ = True

0 commit comments

Comments
 (0)