diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index ca4ea8e366754..a4209ba90aaee 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -892,6 +892,7 @@ Reshaping - Bug in :func:`pandas.wide_to_long` when a string is passed to the stubnames argument and a column name is a substring of that stubname (:issue:`22468`) - Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`) - Bug in :func:`merge_asof` when merging on float values within defined tolerance (:issue:`22981`) +- Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with a different number of columns (:issue`22796`) Build Changes ^^^^^^^^^^^^^ diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 611cae28877c3..f07fb3cd80eab 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -546,6 +546,17 @@ def __new__(cls, unit=None, tz=None): cls._cache[key] = u return u + @classmethod + def construct_array_type(cls): + """Return the array type associated with this dtype + + Returns + ------- + type + """ + from pandas import DatetimeIndex + return DatetimeIndex + @classmethod def construct_from_string(cls, string): """ attempt to construct this type from a string, raise a TypeError if diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 5a3f11525acf8..6d67070000dcd 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -186,6 +186,10 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): if getattr(self.block, 'is_datetimetz', False) or \ is_datetimetz(empty_dtype): + if self.block is None: + array = empty_dtype.construct_array_type() + missing_arr = array([fill_value], dtype=empty_dtype) + return missing_arr.repeat(self.shape[1]) pass elif getattr(self.block, 'is_categorical', False): pass diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index d1f921bc5e894..ece9559313ba0 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -54,6 +54,38 @@ def test_concat_multiple_tzs(self): expected = DataFrame(dict(time=[ts2, ts3])) assert_frame_equal(results, expected) + @pytest.mark.parametrize( + 't1', + [ + '2015-01-01', + pytest.param(pd.NaT, marks=pytest.mark.xfail( + reason='GH23037 incorrect dtype when concatenating', + strict=True))]) + def test_concat_tz_NaT(self, t1): + # GH 22796 + # Concating tz-aware multicolumn DataFrames + ts1 = Timestamp(t1, tz='UTC') + ts2 = Timestamp('2015-01-01', tz='UTC') + ts3 = Timestamp('2015-01-01', tz='UTC') + + df1 = DataFrame([[ts1, ts2]]) + df2 = DataFrame([[ts3]]) + + result = pd.concat([df1, df2]) + expected = DataFrame([[ts1, ts2], [ts3, pd.NaT]], index=[0, 0]) + + assert_frame_equal(result, expected) + + def test_concat_tz_not_aligned(self): + # GH 22796 + ts = pd.to_datetime([1, 2]).tz_localize("UTC") + a = pd.DataFrame({"A": ts}) + b = pd.DataFrame({"A": ts, "B": ts}) + result = pd.concat([a, b], sort=True, ignore_index=True) + expected = pd.DataFrame({"A": list(ts) + list(ts), + "B": [pd.NaT, pd.NaT] + list(ts)}) + assert_frame_equal(result, expected) + def test_concat_tuple_keys(self): # GH 14438 df1 = pd.DataFrame(np.ones((2, 2)), columns=list('AB'))