From b66893374b6eca38eac620aec666a6c61bc3563f Mon Sep 17 00:00:00 2001 From: Robert Meyer Date: Thu, 9 Nov 2017 15:46:38 +0100 Subject: [PATCH 1/3] Fix for #18178 and #18187 by changing the concat of empty RangeIndex The `_concat_rangeindex_same_dtype` now keeps track of the last non-empty RangeIndex to extract the new stop value. This fixes two issues with concatenating non-empty and empty DataFrames and Series. Two regression tests were added as well. --- pandas/core/dtypes/concat.py | 12 +++++++++--- pandas/tests/reshape/test_concat.py | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 4e15aa50e4319..5e45cc6889eea 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -572,11 +572,13 @@ def _concat_rangeindex_same_dtype(indexes): indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5]) """ - start = step = next = None + start = step = next = last_non_empty = None for obj in indexes: if not len(obj): continue + # Remember the last non-empty index for the stop value + last_non_empty = obj if start is None: # This is set by the first non-empty index @@ -599,8 +601,12 @@ def _concat_rangeindex_same_dtype(indexes): if step is not None: next = obj[-1] + step - if start is None: + if last_non_empty is None: + # Here all "indexes" had 0 length, i.e. were empty. + # Simply take start, stop, and step from the last "obj". start = obj._start step = obj._step - stop = obj._stop if next is None else next + stop = obj._stop + else: + stop = last_non_empty._stop if next is None else next return indexes[0].__class__(start, stop, step) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index c9c294e70e7b1..fd5b4611e58d6 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1983,3 +1983,21 @@ def test_concat_will_upcast(dt, pdt): pdt(np.array([5], dtype=dt, ndmin=dims))] x = pd.concat(dfs) assert x.values.dtype == 'float64' + + +def test_concat_empty_and_non_empty_frame_regression(): + # GH 18178 regression test + df1 = pd.DataFrame({'foo': [1]}) + df2 = pd.DataFrame({'foo': []}) + expected = pd.DataFrame({'foo': [1.0]}) + result = pd.concat([df1, df2]) + assert_frame_equal(result, expected) + + +def test_concat_empty_and_non_empty_series_regression(): + # GH 18187 regression test + s1 = pd.Series([1]) + s2 = pd.Series([]) + expected = s1 + result = pd.concat([s1, s2]) + tm.assert_series_equal(result, expected) From ffc3b6cd2dcacc2d6e8d88c8adcf2b911a4f6102 Mon Sep 17 00:00:00 2001 From: Robert Meyer Date: Thu, 9 Nov 2017 16:01:12 +0100 Subject: [PATCH 2/3] Added whatsnew entry --- doc/source/whatsnew/v0.21.1.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index ffabc7dfe81ac..185f08514641f 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -61,6 +61,7 @@ Bug Fixes - Bug in :class:`DatetimeIndex` subtracting datetimelike from DatetimeIndex could fail to overflow (:issue:`18020`) - Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`) - Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`) +- Bug in ``pd.concat`` when empty and non-empty DataFrames or Series are concatenated (:issue:`18178` :issue:`18187`) Conversion ^^^^^^^^^^ From 3082d0e342bffbcb8e8d3bfa46a36fba4e4f2da0 Mon Sep 17 00:00:00 2001 From: Robert Meyer Date: Fri, 10 Nov 2017 09:27:43 +0100 Subject: [PATCH 3/3] Filtering the non empty indexes before the main loop --- pandas/core/dtypes/concat.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 5e45cc6889eea..dc4d819383dfb 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -572,13 +572,12 @@ def _concat_rangeindex_same_dtype(indexes): indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5]) """ - start = step = next = last_non_empty = None + start = step = next = None - for obj in indexes: - if not len(obj): - continue - # Remember the last non-empty index for the stop value - last_non_empty = obj + # Filter the empty indexes + non_empty_indexes = [obj for obj in indexes if len(obj)] + + for obj in non_empty_indexes: if start is None: # This is set by the first non-empty index @@ -601,12 +600,16 @@ def _concat_rangeindex_same_dtype(indexes): if step is not None: next = obj[-1] + step - if last_non_empty is None: + if non_empty_indexes: + # Get the stop value from "next" or alternatively + # from the last non-empty index + stop = non_empty_indexes[-1]._stop if next is None else next + else: # Here all "indexes" had 0 length, i.e. were empty. - # Simply take start, stop, and step from the last "obj". + # Simply take start, stop, and step from the last empty index. + obj = indexes[-1] start = obj._start step = obj._step stop = obj._stop - else: - stop = last_non_empty._stop if next is None else next + return indexes[0].__class__(start, stop, step)