From 71b9cbbc71432b593430b23ece36cf67145a273d Mon Sep 17 00:00:00 2001 From: agijsberts Date: Sun, 19 Jul 2015 01:56:35 +0200 Subject: [PATCH] Fixed bug in outer_indexer where the special case of an empty right array resulted in bogus return data. --- doc/source/whatsnew/v0.17.0.txt | 1 + pandas/src/generate_code.py | 2 +- pandas/src/generated.pyx | 10 +++++----- pandas/tests/test_algos.py | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 39 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 83e5ec5b1d107..610a1dbc65c6e 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -389,6 +389,7 @@ Bug Fixes - Bug in ``MultiIndex.get_level_values`` including ``Categorical`` raises ``AttributeError`` (:issue:`10460`) - Bug in ``pd.get_dummies`` with `sparse=True` not returning ``SparseDataFrame`` (:issue:`10531`) - Bug in ``Index`` subtypes (such as ``PeriodIndex``) not returning their own type for ``.drop`` and ``.insert`` methods (:issue:`10620`) +- Bug in ``algos.outer_join_indexer`` when ``right`` array is empty (:issue:`10618`) diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py index 48e828af826c1..29a991a9acfd3 100644 --- a/pandas/src/generate_code.py +++ b/pandas/src/generate_code.py @@ -2131,7 +2131,7 @@ def outer_join_indexer_%(name)s(ndarray[%(c_type)s] left, rindexer[j] = j result[j] = right[j] elif nright == 0: - for i in range(nright): + for i in range(nleft): lindexer[i] = i rindexer[i] = -1 result[i] = left[i] diff --git a/pandas/src/generated.pyx b/pandas/src/generated.pyx index db0e96d158f0c..d4cf7824c8911 100644 --- a/pandas/src/generated.pyx +++ b/pandas/src/generated.pyx @@ -10345,7 +10345,7 @@ def outer_join_indexer_float64(ndarray[float64_t] left, rindexer[j] = j result[j] = right[j] elif nright == 0: - for i in range(nright): + for i in range(nleft): lindexer[i] = i rindexer[i] = -1 result[i] = left[i] @@ -10474,7 +10474,7 @@ def outer_join_indexer_float32(ndarray[float32_t] left, rindexer[j] = j result[j] = right[j] elif nright == 0: - for i in range(nright): + for i in range(nleft): lindexer[i] = i rindexer[i] = -1 result[i] = left[i] @@ -10603,7 +10603,7 @@ def outer_join_indexer_object(ndarray[object] left, rindexer[j] = j result[j] = right[j] elif nright == 0: - for i in range(nright): + for i in range(nleft): lindexer[i] = i rindexer[i] = -1 result[i] = left[i] @@ -10732,7 +10732,7 @@ def outer_join_indexer_int32(ndarray[int32_t] left, rindexer[j] = j result[j] = right[j] elif nright == 0: - for i in range(nright): + for i in range(nleft): lindexer[i] = i rindexer[i] = -1 result[i] = left[i] @@ -10861,7 +10861,7 @@ def outer_join_indexer_int64(ndarray[int64_t] left, rindexer[j] = j result[j] = right[j] elif nright == 0: - for i in range(nright): + for i in range(nleft): lindexer[i] = i rindexer[i] = -1 result[i] = left[i] diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index cb5687acf3a34..30dcd8631f13a 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -168,6 +168,38 @@ def _test_vector_resize(htable, uniques, dtype, nvals): _test_vector_resize(tbl(), vect(), dtype, 0) _test_vector_resize(tbl(), vect(), dtype, 10) +class TestIndexer(tm.TestCase): + _multiprocess_can_split_ = True + + def test_outer_join_indexer(self): + typemap = [('int32', algos.algos.outer_join_indexer_int32), + ('int64', algos.algos.outer_join_indexer_int64), + ('float32', algos.algos.outer_join_indexer_float32), + ('float64', algos.algos.outer_join_indexer_float64), + ('object', algos.algos.outer_join_indexer_object)] + + for dtype, indexer in typemap: + left = np.arange(3, dtype = dtype) + right = np.arange(2,5, dtype = dtype) + empty = np.array([], dtype = dtype) + + result, lindexer, rindexer = indexer(left, right) + tm.assertIsInstance(result, np.ndarray) + tm.assertIsInstance(lindexer, np.ndarray) + tm.assertIsInstance(rindexer, np.ndarray) + tm.assert_numpy_array_equal(result, np.arange(5, dtype = dtype)) + tm.assert_numpy_array_equal(lindexer, np.array([0, 1, 2, -1, -1])) + tm.assert_numpy_array_equal(rindexer, np.array([-1, -1, 0, 1, 2])) + + result, lindexer, rindexer = indexer(empty, right) + tm.assert_numpy_array_equal(result, right) + tm.assert_numpy_array_equal(lindexer, np.array([-1, -1, -1])) + tm.assert_numpy_array_equal(rindexer, np.array([0, 1, 2])) + + result, lindexer, rindexer = indexer(left, empty) + tm.assert_numpy_array_equal(result, left) + tm.assert_numpy_array_equal(lindexer, np.array([0, 1, 2])) + tm.assert_numpy_array_equal(rindexer, np.array([-1, -1, -1])) class TestUnique(tm.TestCase): _multiprocess_can_split_ = True