pandas-dev · paul-mannino · Jan 18, 2018 · Apr 28, 2018 · Apr 28, 2018 · Apr 28, 2018
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -1370,6 +1370,8 @@ Reshaping
 - Bug in :meth:`DataFrame.astype` where column metadata is lost when converting to categorical or a dictionary of dtypes (:issue:`19920`)
 - Bug in :func:`cut` and :func:`qcut` where timezone information was dropped (:issue:`19872`)
 - Bug in :class:`Series` constructor with a ``dtype=str``, previously raised in some cases (:issue:`19853`)
+- Bug in :func:`concat` which raises an error when concatenating TZ-aware dataframes and all-NaT dataframes (:issue:`12396`)
+- Bug in :func:`concat` which raises an error when concatenating empty TZ-aware series (:issue:`18447`)
 - Bug in :func:`get_dummies`, and :func:`select_dtypes`, where duplicate column names caused incorrect behavior (:issue:`20848`)
 - Bug in :func:`isna`, which cannot handle ambiguous typed lists (:issue:`20675`)
 

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -465,8 +465,12 @@ def convert_to_pydatetime(x, axis):
         if _contains_datetime:
 
             if 'datetime' in typs:
-                new_values = np.concatenate([x.view(np.int64) for x in
-                                             to_concat], axis=axis)
+                to_concat = [np.array(x, copy=False).view(np.int64)
+                             for x in to_concat]
+                if axis == 1:
+                    to_concat = [np.atleast_2d(x) for x in to_concat]
+
+                new_values = np.concatenate(to_concat, axis=axis)
                 return new_values.view(_NS_DTYPE)
             else:
                 # when to_concat has different tz, len(typs) > 1.

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -2183,17 +2183,19 @@ def _assert_take_fillable(self, values, indices, allow_fill=True,
                               fill_value=None, na_value=np.nan):
         """ Internal method to handle NA filling of take """
         indices = _ensure_platform_int(indices)
-
         # only fill if we are passing a non-None fill_value
         if allow_fill and fill_value is not None:
             if (indices < -1).any():
                 msg = ('When allow_fill=True and fill_value is not None, '
                        'all indices must be >= -1')
                 raise ValueError(msg)
-            taken = values.take(indices)
             mask = indices == -1
-            if mask.any():
-                taken[mask] = na_value
+            if mask.all():
+                taken = np.full(indices.shape, fill_value=na_value)
+            else:
+                taken = values.take(indices)
+                if mask.any():
+                    taken[mask] = na_value
         else:
             taken = values.take(indices)
         return taken

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -5835,8 +5835,10 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
                     if len(values) and values[0] is None:
                         fill_value = None
 
-                if getattr(self.block, 'is_datetimetz', False):
-                    pass
+                if getattr(self.block, 'is_datetimetz', False) or \
+                        is_datetimetz(empty_dtype):
+                    missing_arr = np.full(np.prod(self.shape), fill_value)
+                    return DatetimeIndex(missing_arr, dtype=empty_dtype)
                 elif getattr(self.block, 'is_categorical', False):
                     pass
                 elif getattr(self.block, 'is_sparse', False):

diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
@@ -1917,6 +1917,92 @@ def test_concat_tz_series_tzlocal(self):
         tm.assert_series_equal(result, pd.Series(x + y))
         assert result.dtype == 'datetime64[ns, tzlocal()]'
 
+    @pytest.mark.parametrize('tz1', [None, 'UTC'])
+    @pytest.mark.parametrize('tz2', [None, 'UTC'])
+    @pytest.mark.parametrize('s', [pd.NaT, pd.Timestamp('20150101')])
+    def test_concat_NaT_dataframes_all_NaT_axis_0(self, tz1, tz2, s):
+        # GH 12396
+
+        # tz-naive
+        first = pd.DataFrame([[pd.NaT], [pd.NaT]]).apply(
+            lambda x: x.dt.tz_localize(tz1))
+        second = pd.DataFrame([s]).apply(lambda x: x.dt.tz_localize(tz2))
+
+        # we are all NaT so this is ok
+        if tz1 is None:
+            tz = tz2
+        elif tz2 is None:
+            tz = tz1
+        elif tz1 == tz2:
+            tz = tz1
+        else:
+            tz = None
+
+        result = pd.concat([first, second], axis=0)
+        expected = pd.DataFrame(pd.Series(
+            [pd.NaT, pd.NaT, s], index=[0, 1, 0]))
+        expected = expected.apply(lambda x: x.dt.tz_localize(tz))
+        assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize('tz1', [None, 'UTC'])
+    @pytest.mark.parametrize('tz2', [None, 'UTC'])
+    def test_concat_NaT_dataframes_all_NaT_axis_1(self, tz1, tz2):
+        # GH 12396
+
+        first = pd.DataFrame(pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1))
+        second = pd.DataFrame(pd.Series(
+            [pd.NaT]).dt.tz_localize(tz2), columns=[1])
+        expected = pd.DataFrame(
+            {0: pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1),
+             1: pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz2)}
+        )
+        result = pd.concat([first, second], axis=1)
+        assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize('tz1', [None, 'UTC'])
+    @pytest.mark.parametrize('tz2', [None, 'UTC'])
+    def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2):
+        # GH 12396
+
+        # tz-naive
+        first = pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1)
+        second = pd.DataFrame([[pd.Timestamp('2015/01/01', tz=tz2)],
+                               [pd.Timestamp('2016/01/01', tz=tz2)]],
+                              index=[2, 3])
+
+        if tz1 is None and tz2 is None:
+            tz = None
+
+        # we are all NaT so this is ok
+        elif tz1 is None:
+            tz = tz2
+        elif tz1 == tz2:
+            tz = tz1
+        else:
+            tz = None
+        expected = pd.DataFrame([pd.NaT, pd.NaT,
+                                 pd.Timestamp('2015/01/01', tz=tz),
+                                 pd.Timestamp('2016/01/01', tz=tz)])
+
+        result = pd.concat([first, second])
+        assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize('tz', [None, 'UTC'])
+    def test_concat_NaT_dataframes(self, tz):
+        # GH 12396
+
+        first = pd.DataFrame([[pd.NaT], [pd.NaT]])
+        first = first.apply(lambda x: x.dt.tz_localize(tz))
+        second = pd.DataFrame([[pd.Timestamp('2015/01/01', tz=tz)],
+                               [pd.Timestamp('2016/01/01', tz=tz)]],
+                              index=[2, 3])
+        expected = pd.DataFrame([pd.NaT, pd.NaT,
+                                 pd.Timestamp('2015/01/01', tz=tz),
+                                 pd.Timestamp('2016/01/01', tz=tz)])
+
+        result = pd.concat([first, second], axis=0)
+        assert_frame_equal(result, expected)
+
     def test_concat_period_series(self):
         x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
         y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='D'))
@@ -1978,6 +2064,21 @@ def test_concat_empty_series(self):
                            columns=['x', 0])
         tm.assert_frame_equal(res, exp)
 
+    @pytest.mark.parametrize('tz', [None, 'UTC'])
+    @pytest.mark.parametrize('values', [[], [1, 2, 3]])
+    def test_concat_empty_series_timelike(self, tz, values):
+        # GH 18447
+
+        first = Series([], dtype='M8[ns]').dt.tz_localize(tz)
+        second = Series(values)
+        expected = DataFrame(
+            {0: pd.Series([pd.NaT] * len(values),
+                          dtype='M8[ns]'
+                          ).dt.tz_localize(tz),
+             1: values})
+        result = concat([first, second], axis=1)
+        assert_frame_equal(result, expected)
+
     def test_default_index(self):
         # is_series and ignore_index
         s1 = pd.Series([1, 2, 3], name='x')