Skip to content

Commit 9375460

Browse files
committed
BUG: Merge with empty dataframe may raise IndexError
1 parent 35cc80d commit 9375460

File tree

3 files changed

+100
-2
lines changed

3 files changed

+100
-2
lines changed

doc/source/whatsnew/v0.17.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,8 @@ Bug Fixes
660660
- Bug in ``io.common.get_filepath_or_buffer`` which caused reading of valid S3 files to fail if the bucket also contained keys for which the user does not have read permission (:issue:`10604`)
661661
- Bug in vectorised setting of timestamp columns with python ``datetime.date`` and numpy ``datetime64`` (:issue:`10408`, :issue:`10412`)
662662
- Bug in ``Index.take`` may add unnecessary ``freq`` attribute (:issue:`10791`)
663+
- Bug in ``merge`` with empty ``DataFrame`` may raise ``IndexError`` (:issue:`10824`)
664+
663665

664666
- Bug in ``read_csv`` when using the ``nrows`` or ``chunksize`` parameters if file contains only a header line (:issue:`9535`)
665667

pandas/tools/merge.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,9 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
217217
if left_indexer is not None and right_indexer is not None:
218218

219219
if name in self.left:
220+
if len(self.left) == 0:
221+
continue
222+
220223
na_indexer = (left_indexer == -1).nonzero()[0]
221224
if len(na_indexer) == 0:
222225
continue
@@ -226,6 +229,9 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
226229
na_indexer, com.take_1d(self.right_join_keys[i],
227230
right_na_indexer))
228231
elif name in self.right:
232+
if len(self.right) == 0:
233+
continue
234+
229235
na_indexer = (right_indexer == -1).nonzero()[0]
230236
if len(na_indexer) == 0:
231237
continue
@@ -270,9 +276,17 @@ def _get_join_info(self):
270276
sort=self.sort, how=self.how)
271277

272278
if self.right_index:
273-
join_index = self.left.index.take(left_indexer)
279+
if len(self.left) > 0:
280+
join_index = self.left.index.take(left_indexer)
281+
else:
282+
join_index = self.right.index.take(right_indexer)
283+
left_indexer = np.array([-1] * len(join_index))
274284
elif self.left_index:
275-
join_index = self.right.index.take(right_indexer)
285+
if len(self.right) > 0:
286+
join_index = self.right.index.take(right_indexer)
287+
else:
288+
join_index = self.left.index.take(left_indexer)
289+
right_indexer = np.array([-1] * len(join_index))
276290
else:
277291
join_index = Index(np.arange(len(left_indexer)))
278292

pandas/tools/tests/test_merge.py

+82
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,88 @@ def test_left_merge_empty_dataframe(self):
737737
result = merge(right, left, on='key', how='right')
738738
assert_frame_equal(result, left)
739739

740+
def test_merge_left_empty_right_empty(self):
741+
# GH 10824
742+
left = pd.DataFrame([], columns=['a', 'b', 'c'])
743+
right = pd.DataFrame([], columns=['x', 'y', 'z'])
744+
745+
exp_in = pd.DataFrame([], columns=['a', 'b', 'c', 'x', 'y', 'z'],
746+
dtype=object)
747+
748+
for kwarg in [dict(left_index=True, right_index=True),
749+
dict(left_index=True, right_on='x'),
750+
dict(left_on='a', right_index=True),
751+
dict(left_on='a', right_on='x')]:
752+
753+
result = pd.merge(left, right, how='inner', **kwarg)
754+
tm.assert_frame_equal(result, exp_in)
755+
result = pd.merge(left, right, how='left', **kwarg)
756+
tm.assert_frame_equal(result, exp_in)
757+
result = pd.merge(left, right, how='right', **kwarg)
758+
tm.assert_frame_equal(result, exp_in)
759+
result = pd.merge(left, right, how='outer', **kwarg)
760+
tm.assert_frame_equal(result, exp_in)
761+
762+
def test_merge_left_empty_right_notempty(self):
763+
# GH 10824
764+
left = pd.DataFrame([], columns=['a', 'b', 'c'])
765+
right = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
766+
columns=['x', 'y', 'z'])
767+
768+
exp_out = pd.DataFrame({'a': np.array([np.nan]*3, dtype=object),
769+
'b': np.array([np.nan]*3, dtype=object),
770+
'c': np.array([np.nan]*3, dtype=object),
771+
'x': [1, 4, 7],
772+
'y': [2, 5, 8],
773+
'z': [3, 6, 9]},
774+
columns=['a', 'b', 'c', 'x', 'y', 'z'])
775+
exp_in = exp_out[0:0] # make empty DataFrame keeping dtype
776+
777+
for kwarg in [dict(left_index=True, right_index=True),
778+
dict(left_index=True, right_on='x'),
779+
dict(left_on='a', right_index=True),
780+
dict(left_on='a', right_on='x')]:
781+
782+
result = pd.merge(left, right, how='inner', **kwarg)
783+
tm.assert_frame_equal(result, exp_in)
784+
result = pd.merge(left, right, how='left', **kwarg)
785+
tm.assert_frame_equal(result, exp_in)
786+
787+
result = pd.merge(left, right, how='right', **kwarg)
788+
tm.assert_frame_equal(result, exp_out)
789+
result = pd.merge(left, right, how='outer', **kwarg)
790+
tm.assert_frame_equal(result, exp_out)
791+
792+
def test_merge_left_notempty_right_empty(self):
793+
# GH 10824
794+
left = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
795+
columns=['a', 'b', 'c'])
796+
right = pd.DataFrame([], columns=['x', 'y', 'z'])
797+
798+
exp_out = pd.DataFrame({'a': [1, 4, 7],
799+
'b': [2, 5, 8],
800+
'c': [3, 6, 9],
801+
'x': np.array([np.nan]*3, dtype=object),
802+
'y': np.array([np.nan]*3, dtype=object),
803+
'z': np.array([np.nan]*3, dtype=object)},
804+
columns=['a', 'b', 'c', 'x', 'y', 'z'])
805+
exp_in = exp_out[0:0] # make empty DataFrame keeping dtype
806+
807+
for kwarg in [dict(left_index=True, right_index=True),
808+
dict(left_index=True, right_on='x'),
809+
dict(left_on='a', right_index=True),
810+
dict(left_on='a', right_on='x')]:
811+
812+
result = pd.merge(left, right, how='inner', **kwarg)
813+
tm.assert_frame_equal(result, exp_in)
814+
result = pd.merge(left, right, how='right', **kwarg)
815+
tm.assert_frame_equal(result, exp_in)
816+
817+
result = pd.merge(left, right, how='left', **kwarg)
818+
tm.assert_frame_equal(result, exp_out)
819+
result = pd.merge(left, right, how='outer', **kwarg)
820+
tm.assert_frame_equal(result, exp_out)
821+
740822
def test_merge_nosort(self):
741823
# #2098, anything to do?
742824

0 commit comments

Comments
 (0)