From 9e760a068535f52b27d3115f0a564afd6614c3fe Mon Sep 17 00:00:00 2001 From: Tony Tao Date: Sun, 7 Oct 2018 19:04:30 -0500 Subject: [PATCH 01/12] BUG-22796 Concat multicolumn tz-aware DataFrame fails --- pandas/core/internals/concat.py | 6 +++++- pandas/tests/frame/test_combine_concat.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 5a3f11525acf8..1802ce56298c6 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -18,6 +18,8 @@ from pandas.core.dtypes.cast import maybe_promote import pandas.core.dtypes.concat as _concat +from pandas.core.indexes.datetimes import DatetimeIndex + import pandas.core.algorithms as algos @@ -186,7 +188,9 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): if getattr(self.block, 'is_datetimetz', False) or \ is_datetimetz(empty_dtype): - pass + missing_time = DatetimeIndex([fill_value], + dtype=empty_dtype) + return missing_time elif getattr(self.block, 'is_categorical', False): pass elif getattr(self.block, 'is_sparse', False): diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 15ca65395e4fc..155022d949f8a 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -52,6 +52,21 @@ def test_concat_multiple_tzs(self): results = pd.concat([df2, df3]).reset_index(drop=True) expected = DataFrame(dict(time=[ts2, ts3])) assert_frame_equal(results, expected) + + def test_concat_tz_NaT(self): + # GH 22796 + # Concating tz-aware multicolumn DataFrames + ts1 = Timestamp('2015-01-01', tz='UTC') + ts2 = Timestamp('2015-01-01', tz='UTC') + ts3 = Timestamp('2015-01-01', tz='UTC') + + df1 = pd.DataFrame([[ts1, ts2]]) + df2 = pd.DataFrame([[ts3]]) + + result = pd.concat([df1, df2]) + expected = DataFrame([[ts1, ts2], [ts3, pd.NaT]], index=[0, 0]) + + assert_frame_equal(result, expected) def test_concat_tuple_keys(self): # GH 14438 From 5ef5dd295e9d25167c5c4795ca8b36b97372761f Mon Sep 17 00:00:00 2001 From: Tony Tao Date: Sun, 7 Oct 2018 19:49:47 -0500 Subject: [PATCH 02/12] Fixed shape issues --- pandas/core/internals/concat.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 1802ce56298c6..598cd9086fef1 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -188,8 +188,9 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): if getattr(self.block, 'is_datetimetz', False) or \ is_datetimetz(empty_dtype): - missing_time = DatetimeIndex([fill_value], - dtype=empty_dtype) + missing_arr = np.full(self.shape, fill_value) + missing_time = DatetimeIndex(missing_arr[0], + dtype=empty_dtype) return missing_time elif getattr(self.block, 'is_categorical', False): pass From c640f693a1f2d1ae42d496fac7d7f278f00cdc3b Mon Sep 17 00:00:00 2001 From: Tony Tao Date: Sun, 7 Oct 2018 20:08:17 -0500 Subject: [PATCH 03/12] Pass when block is not None --- pandas/core/internals/concat.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 598cd9086fef1..e90447d564d4c 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -188,10 +188,12 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): if getattr(self.block, 'is_datetimetz', False) or \ is_datetimetz(empty_dtype): - missing_arr = np.full(self.shape, fill_value) - missing_time = DatetimeIndex(missing_arr[0], - dtype=empty_dtype) - return missing_time + if self.block is None: + missing_arr = np.full(self.shape, fill_value) + missing_time = DatetimeIndex(missing_arr[0], + dtype=empty_dtype) + return missing_time + pass elif getattr(self.block, 'is_categorical', False): pass elif getattr(self.block, 'is_sparse', False): From 5e57d77c8233b867d69066c738d454f9a6b0d6c2 Mon Sep 17 00:00:00 2001 From: Tony Tao Date: Sun, 7 Oct 2018 20:12:56 -0500 Subject: [PATCH 04/12] Added whatsnew entry --- doc/source/whatsnew/v0.24.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 91575c311b409..0c868dff76044 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -850,6 +850,7 @@ Reshaping - Bug in :func:`pandas.wide_to_long` when a string is passed to the stubnames argument and a column name is a substring of that stubname (:issue:`22468`) - Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`) - Bug in :func:`merge_asof` when merging on float values within defined tolerance (:issue:`22981`) +- Bug in :func:`pandas.concat` when merging multicolumn DataFrames with tz-aware data (:issue`22796`) Build Changes ^^^^^^^^^^^^^ From 2e898484c74540d985000ae09a1cc6fb24c131ab Mon Sep 17 00:00:00 2001 From: Tony Tao Date: Sun, 7 Oct 2018 20:33:26 -0500 Subject: [PATCH 05/12] Fixed whitespace issues --- pandas/tests/frame/test_combine_concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 155022d949f8a..b1cda557065a3 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -52,7 +52,7 @@ def test_concat_multiple_tzs(self): results = pd.concat([df2, df3]).reset_index(drop=True) expected = DataFrame(dict(time=[ts2, ts3])) assert_frame_equal(results, expected) - + def test_concat_tz_NaT(self): # GH 22796 # Concating tz-aware multicolumn DataFrames From 53c85aa25673abc09fbc72d5e17406229240c765 Mon Sep 17 00:00:00 2001 From: Tony Tao Date: Mon, 8 Oct 2018 01:26:08 -0500 Subject: [PATCH 06/12] Added additional test, linked to new issue --- pandas/tests/frame/test_combine_concat.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index b1cda557065a3..5d283e3bcb2a4 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -17,6 +17,8 @@ import pandas.util.testing as tm from pandas.util.testing import assert_frame_equal, assert_series_equal +import pytest + class TestDataFrameConcatCommon(TestData): @@ -53,10 +55,13 @@ def test_concat_multiple_tzs(self): expected = DataFrame(dict(time=[ts2, ts3])) assert_frame_equal(results, expected) - def test_concat_tz_NaT(self): + @pytest.mark.parametrize('t1', ['2015-01-01', + pytest.param('pd.NaT', marks=pytest.mark.xfail( + reason='GH23037 incorrect dtype when concatenating'))]) + def test_concat_tz_NaT(self, t1): # GH 22796 # Concating tz-aware multicolumn DataFrames - ts1 = Timestamp('2015-01-01', tz='UTC') + ts1 = Timestamp(t1, tz='UTC') ts2 = Timestamp('2015-01-01', tz='UTC') ts3 = Timestamp('2015-01-01', tz='UTC') From 51ef6e5acdd4e7816dd545f40b83d5e0965b7774 Mon Sep 17 00:00:00 2001 From: Tony Tao Date: Mon, 8 Oct 2018 16:30:31 -0500 Subject: [PATCH 07/12] Fixed test, explained bug in more detail for whatsnew --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/tests/frame/test_combine_concat.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index c0af1dea3febc..a4a1586047692 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -883,7 +883,7 @@ Reshaping - Bug in :func:`pandas.wide_to_long` when a string is passed to the stubnames argument and a column name is a substring of that stubname (:issue:`22468`) - Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`) - Bug in :func:`merge_asof` when merging on float values within defined tolerance (:issue:`22981`) -- Bug in :func:`pandas.concat` when merging multicolumn DataFrames with tz-aware data (:issue`22796`) +- Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with less columns (:issue`22796`) Build Changes ^^^^^^^^^^^^^ diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 5d283e3bcb2a4..ae6b8ce720f88 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -56,7 +56,7 @@ def test_concat_multiple_tzs(self): assert_frame_equal(results, expected) @pytest.mark.parametrize('t1', ['2015-01-01', - pytest.param('pd.NaT', marks=pytest.mark.xfail( + pytest.param(pd.NaT, marks=pytest.mark.xfail( reason='GH23037 incorrect dtype when concatenating'))]) def test_concat_tz_NaT(self, t1): # GH 22796 @@ -65,8 +65,8 @@ def test_concat_tz_NaT(self, t1): ts2 = Timestamp('2015-01-01', tz='UTC') ts3 = Timestamp('2015-01-01', tz='UTC') - df1 = pd.DataFrame([[ts1, ts2]]) - df2 = pd.DataFrame([[ts3]]) + df1 = DataFrame([[ts1, ts2]]) + df2 = DataFrame([[ts3]]) result = pd.concat([df1, df2]) expected = DataFrame([[ts1, ts2], [ts3, pd.NaT]], index=[0, 0]) From 54c153dd2ae8fed26e910915959da0d1e732df7a Mon Sep 17 00:00:00 2001 From: Tony Tao Date: Mon, 8 Oct 2018 16:35:42 -0500 Subject: [PATCH 08/12] Edited whatsnew --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index a4a1586047692..0b7225f93afb0 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -883,7 +883,7 @@ Reshaping - Bug in :func:`pandas.wide_to_long` when a string is passed to the stubnames argument and a column name is a substring of that stubname (:issue:`22468`) - Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`) - Bug in :func:`merge_asof` when merging on float values within defined tolerance (:issue:`22981`) -- Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with less columns (:issue`22796`) +- Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with a different number of columns (:issue`22796`) Build Changes ^^^^^^^^^^^^^ From a0570bdadf5cdec2ca7c75c87f997c48869e3336 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 9 Oct 2018 08:04:59 -0400 Subject: [PATCH 09/12] use construct_array_type --- pandas/core/dtypes/dtypes.py | 11 +++++++++++ pandas/core/internals/concat.py | 9 +++------ pandas/tests/frame/test_combine_concat.py | 9 ++++++--- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 611cae28877c3..f07fb3cd80eab 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -546,6 +546,17 @@ def __new__(cls, unit=None, tz=None): cls._cache[key] = u return u + @classmethod + def construct_array_type(cls): + """Return the array type associated with this dtype + + Returns + ------- + type + """ + from pandas import DatetimeIndex + return DatetimeIndex + @classmethod def construct_from_string(cls, string): """ attempt to construct this type from a string, raise a TypeError if diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index e90447d564d4c..17b5a8e81a792 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -18,8 +18,6 @@ from pandas.core.dtypes.cast import maybe_promote import pandas.core.dtypes.concat as _concat -from pandas.core.indexes.datetimes import DatetimeIndex - import pandas.core.algorithms as algos @@ -189,10 +187,9 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): if getattr(self.block, 'is_datetimetz', False) or \ is_datetimetz(empty_dtype): if self.block is None: - missing_arr = np.full(self.shape, fill_value) - missing_time = DatetimeIndex(missing_arr[0], - dtype=empty_dtype) - return missing_time + array = empty_dtype.construct_array_type() + missing_arr = array([fill_value], dtype=empty_dtype) + return missing_arr.repeat(self.shape[0]) pass elif getattr(self.block, 'is_categorical', False): pass diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index ae6b8ce720f88..9cb50b9602eda 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -55,9 +55,12 @@ def test_concat_multiple_tzs(self): expected = DataFrame(dict(time=[ts2, ts3])) assert_frame_equal(results, expected) - @pytest.mark.parametrize('t1', ['2015-01-01', - pytest.param(pd.NaT, marks=pytest.mark.xfail( - reason='GH23037 incorrect dtype when concatenating'))]) + @pytest.mark.parametrize( + 't1', + [ + '2015-01-01', + pytest.param(pd.NaT, marks=pytest.mark.xfail( + reason='GH23037 incorrect dtype when concatenating'))]) def test_concat_tz_NaT(self, t1): # GH 22796 # Concating tz-aware multicolumn DataFrames From 644b3490f47a75dfe739c309ecb6567b67fa2796 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 9 Oct 2018 08:29:26 -0400 Subject: [PATCH 10/12] add strict --- pandas/tests/frame/test_combine_concat.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 9cb50b9602eda..6c9a866184710 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -60,7 +60,8 @@ def test_concat_multiple_tzs(self): [ '2015-01-01', pytest.param(pd.NaT, marks=pytest.mark.xfail( - reason='GH23037 incorrect dtype when concatenating'))]) + reason='GH23037 incorrect dtype when concatenating', + strict=True))]) def test_concat_tz_NaT(self, t1): # GH 22796 # Concating tz-aware multicolumn DataFrames From ce312744201644f4ee1baead6585577b396f7c60 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 9 Oct 2018 12:10:49 -0400 Subject: [PATCH 11/12] handle alignment case --- pandas/core/internals/concat.py | 2 +- pandas/tests/frame/test_combine_concat.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 17b5a8e81a792..6d67070000dcd 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -189,7 +189,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): if self.block is None: array = empty_dtype.construct_array_type() missing_arr = array([fill_value], dtype=empty_dtype) - return missing_arr.repeat(self.shape[0]) + return missing_arr.repeat(self.shape[1]) pass elif getattr(self.block, 'is_categorical', False): pass diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 6c9a866184710..73923726d74e5 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -77,6 +77,16 @@ def test_concat_tz_NaT(self, t1): assert_frame_equal(result, expected) + def test_concat_tz_not_aligned(self): + # GH 22796 + ts = pd.to_datetime([1, 2]).tz_localize("UTC") + a = pd.DataFrame({"A": ts}) + b = pd.DataFrame({"A": ts, "B": ts}) + result = pd.concat([a, b], sort=True, ignore_index=True) + expected = pd.DataFrame({"A": list(ts) + list(ts), + "B": [pd.NaT, pd.NaT] + list(ts)}) + assert_frame_equal(result, expected) + def test_concat_tuple_keys(self): # GH 14438 df1 = pd.DataFrame(np.ones((2, 2)), columns=list('AB')) From de4c427949bddd0306c204a3362398d59337f791 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 9 Oct 2018 13:19:38 -0400 Subject: [PATCH 12/12] lint --- pandas/tests/frame/test_combine_concat.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index f82ae073d3217..ece9559313ba0 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -18,8 +18,6 @@ import pandas.util.testing as tm from pandas.util.testing import assert_frame_equal, assert_series_equal -import pytest - class TestDataFrameConcatCommon(TestData):