Skip to content

Commit 58e4035

Browse files
tonytao2012tm9k1
authored andcommitted
BUG-22796 Concat multicolumn tz-aware DataFrame (pandas-dev#23036)
1 parent 2d7b58b commit 58e4035

File tree

4 files changed

+48
-0
lines changed

4 files changed

+48
-0
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -893,6 +893,7 @@ Reshaping
893893
- Bug in :func:`pandas.wide_to_long` when a string is passed to the stubnames argument and a column name is a substring of that stubname (:issue:`22468`)
894894
- Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`)
895895
- Bug in :func:`merge_asof` when merging on float values within defined tolerance (:issue:`22981`)
896+
- Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with a different number of columns (:issue`22796`)
896897

897898
Build Changes
898899
^^^^^^^^^^^^^

pandas/core/dtypes/dtypes.py

+11
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,17 @@ def __new__(cls, unit=None, tz=None):
546546
cls._cache[key] = u
547547
return u
548548

549+
@classmethod
550+
def construct_array_type(cls):
551+
"""Return the array type associated with this dtype
552+
553+
Returns
554+
-------
555+
type
556+
"""
557+
from pandas import DatetimeIndex
558+
return DatetimeIndex
559+
549560
@classmethod
550561
def construct_from_string(cls, string):
551562
""" attempt to construct this type from a string, raise a TypeError if

pandas/core/internals/concat.py

+4
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,10 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
186186

187187
if getattr(self.block, 'is_datetimetz', False) or \
188188
is_datetimetz(empty_dtype):
189+
if self.block is None:
190+
array = empty_dtype.construct_array_type()
191+
missing_arr = array([fill_value], dtype=empty_dtype)
192+
return missing_arr.repeat(self.shape[1])
189193
pass
190194
elif getattr(self.block, 'is_categorical', False):
191195
pass

pandas/tests/frame/test_combine_concat.py

+32
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,38 @@ def test_concat_multiple_tzs(self):
5454
expected = DataFrame(dict(time=[ts2, ts3]))
5555
assert_frame_equal(results, expected)
5656

57+
@pytest.mark.parametrize(
58+
't1',
59+
[
60+
'2015-01-01',
61+
pytest.param(pd.NaT, marks=pytest.mark.xfail(
62+
reason='GH23037 incorrect dtype when concatenating',
63+
strict=True))])
64+
def test_concat_tz_NaT(self, t1):
65+
# GH 22796
66+
# Concating tz-aware multicolumn DataFrames
67+
ts1 = Timestamp(t1, tz='UTC')
68+
ts2 = Timestamp('2015-01-01', tz='UTC')
69+
ts3 = Timestamp('2015-01-01', tz='UTC')
70+
71+
df1 = DataFrame([[ts1, ts2]])
72+
df2 = DataFrame([[ts3]])
73+
74+
result = pd.concat([df1, df2])
75+
expected = DataFrame([[ts1, ts2], [ts3, pd.NaT]], index=[0, 0])
76+
77+
assert_frame_equal(result, expected)
78+
79+
def test_concat_tz_not_aligned(self):
80+
# GH 22796
81+
ts = pd.to_datetime([1, 2]).tz_localize("UTC")
82+
a = pd.DataFrame({"A": ts})
83+
b = pd.DataFrame({"A": ts, "B": ts})
84+
result = pd.concat([a, b], sort=True, ignore_index=True)
85+
expected = pd.DataFrame({"A": list(ts) + list(ts),
86+
"B": [pd.NaT, pd.NaT] + list(ts)})
87+
assert_frame_equal(result, expected)
88+
5789
def test_concat_tuple_keys(self):
5890
# GH 14438
5991
df1 = pd.DataFrame(np.ones((2, 2)), columns=list('AB'))

0 commit comments

Comments
 (0)