Skip to content

Commit 7fc879c

Browse files
committed
BUG: Concatentation of TZ-aware dataframes (pandas-dev#12396) (pandas-dev#18447)
1 parent 21e884f commit 7fc879c

File tree

3 files changed

+165
-6
lines changed

3 files changed

+165
-6
lines changed

pandas/core/indexes/base.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -2170,17 +2170,19 @@ def _assert_take_fillable(self, values, indices, allow_fill=True,
21702170
fill_value=None, na_value=np.nan):
21712171
""" Internal method to handle NA filling of take """
21722172
indices = _ensure_platform_int(indices)
2173-
21742173
# only fill if we are passing a non-None fill_value
21752174
if allow_fill and fill_value is not None:
21762175
if (indices < -1).any():
21772176
msg = ('When allow_fill=True and fill_value is not None, '
21782177
'all indices must be >= -1')
21792178
raise ValueError(msg)
2180-
taken = values.take(indices)
21812179
mask = indices == -1
2182-
if mask.any():
2183-
taken[mask] = na_value
2180+
if mask.all():
2181+
taken = np.full(indices.shape, fill_value=na_value)
2182+
else:
2183+
taken = values.take(indices)
2184+
if mask.any():
2185+
taken[mask] = na_value
21842186
else:
21852187
taken = values.take(indices)
21862188
return taken

pandas/core/internals.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -5815,8 +5815,10 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
58155815
if len(values) and values[0] is None:
58165816
fill_value = None
58175817

5818-
if getattr(self.block, 'is_datetimetz', False):
5819-
pass
5818+
if getattr(self.block, 'is_datetimetz', False) or \
5819+
is_datetimetz(empty_dtype):
5820+
missing_arr = np.full(np.prod(self.shape), fill_value)
5821+
return DatetimeIndex(missing_arr, dtype=empty_dtype)
58205822
elif getattr(self.block, 'is_categorical', False):
58215823
pass
58225824
elif getattr(self.block, 'is_sparse', False):

pandas/tests/reshape/test_concat.py

+155
Original file line numberDiff line numberDiff line change
@@ -1865,6 +1865,135 @@ def test_concat_tz_series_tzlocal(self):
18651865
tm.assert_series_equal(result, pd.Series(x + y))
18661866
assert result.dtype == 'datetime64[ns, tzlocal()]'
18671867

1868+
def test_concat_NaT_dataframes_all_NaT_axis_0(self):
1869+
# GH 12396
1870+
1871+
# tz-naive
1872+
first = pd.DataFrame([[pd.NaT], [pd.NaT]])
1873+
second = pd.DataFrame([[pd.NaT]])
1874+
1875+
result = pd.concat([first, second], axis=0)
1876+
expected = pd.DataFrame([pd.NaT, pd.NaT, pd.NaT], index=[0, 1, 0])
1877+
assert_frame_equal(result, expected)
1878+
1879+
# one side timezone-aware
1880+
# upcasts for mixed case
1881+
first = pd.DataFrame(pd.Series([pd.NaT, pd.NaT]).dt.tz_localize('UTC'))
1882+
result = pd.concat([first, second], axis=0)
1883+
expected = pd.DataFrame(
1884+
pd.Series([pd.NaT, pd.NaT, pd.NaT]).dt.tz_localize('UTC'),
1885+
index=[0, 1, 0]
1886+
)
1887+
assert_frame_equal(result, expected)
1888+
1889+
# both sides timezone-aware
1890+
# upcasts to tz-aware
1891+
second = pd.DataFrame(pd.Series([pd.NaT]).dt.tz_localize('UTC'))
1892+
result = pd.concat([first, second], axis=0)
1893+
assert_frame_equal(result, expected)
1894+
1895+
def test_concat_NaT_dataframes_all_NaT_axis_1(self):
1896+
# GH 12396
1897+
1898+
# tz-naive
1899+
first = pd.DataFrame([[pd.NaT], [pd.NaT]])
1900+
second = pd.DataFrame([[pd.NaT]], columns=[1])
1901+
expected = pd.DataFrame([[pd.NaT, pd.NaT], [pd.NaT, pd.NaT]],
1902+
columns=[0, 1])
1903+
result = pd.concat([first, second], axis=1)
1904+
assert_frame_equal(result, expected)
1905+
1906+
# one side timezone-aware
1907+
# upcasts result to tz-aware
1908+
first = pd.DataFrame(pd.Series([pd.NaT, pd.NaT]).dt.tz_localize('UTC'))
1909+
expected = pd.DataFrame(
1910+
{0: pd.Series([pd.NaT, pd.NaT]).dt.tz_localize('UTC'),
1911+
1: pd.Series([pd.NaT, pd.NaT])}
1912+
)
1913+
result = pd.concat([first, second], axis=1)
1914+
assert_frame_equal(result, expected)
1915+
1916+
# both sides timezone-aware
1917+
# upcasts result to tz-aware
1918+
second[1] = second[1].dt.tz_localize('UTC')
1919+
expected = pd.DataFrame(
1920+
{0: pd.Series([pd.NaT, pd.NaT]).dt.tz_localize('UTC'),
1921+
1: pd.Series([pd.NaT, pd.NaT]).dt.tz_localize('UTC')}
1922+
)
1923+
result = pd.concat([first, second], axis=1)
1924+
assert_frame_equal(result, expected)
1925+
1926+
def test_concat_NaT_dataframes_mixed_timestamps_and_NaT(self):
1927+
# GH 12396
1928+
1929+
# tz-naive
1930+
first = pd.DataFrame([[pd.NaT], [pd.NaT]])
1931+
second = pd.DataFrame([[pd.Timestamp('2015/01/01')],
1932+
[pd.Timestamp('2016/01/01')]],
1933+
index=[2, 3])
1934+
expected = pd.DataFrame([pd.NaT, pd.NaT,
1935+
pd.Timestamp('2015/01/01'),
1936+
pd.Timestamp('2016/01/01')])
1937+
1938+
result = pd.concat([first, second], axis=0)
1939+
assert_frame_equal(result, expected)
1940+
1941+
# one side timezone-aware
1942+
second = second[0].dt.tz_localize('UTC')
1943+
expected = pd.DataFrame(
1944+
pd.Series([pd.NaT, pd.NaT,
1945+
pd.Timestamp('2015/01/01'),
1946+
pd.Timestamp('2016/01/01')]).dt.tz_localize('UTC')
1947+
)
1948+
result = pd.concat([first, second], axis=0)
1949+
assert_frame_equal(result, expected)
1950+
1951+
def test_concat_NaT_series_dataframe_all_NaT(self):
1952+
# GH 12396
1953+
1954+
# tz-naive
1955+
first = pd.Series([pd.NaT, pd.NaT])
1956+
second = pd.DataFrame([[pd.Timestamp('2015/01/01')],
1957+
[pd.Timestamp('2016/01/01')]],
1958+
index=[2, 3])
1959+
1960+
expected = pd.DataFrame([pd.NaT, pd.NaT,
1961+
pd.Timestamp('2015/01/01'),
1962+
pd.Timestamp('2016/01/01')])
1963+
1964+
result = pd.concat([first, second])
1965+
assert_frame_equal(result, expected)
1966+
1967+
# one side timezone-aware
1968+
second[0] = second[0].dt.tz_localize('UTC')
1969+
result = pd.concat([first, second])
1970+
1971+
expected = pd.DataFrame(
1972+
pd.Series([pd.NaT, pd.NaT,
1973+
pd.Timestamp('2015/01/01'),
1974+
pd.Timestamp('2016/01/01')]).dt.tz_localize('UTC')
1975+
)
1976+
assert_frame_equal(result, expected)
1977+
1978+
# both sides timezone-aware
1979+
first = first.dt.tz_localize('UTC')
1980+
result = pd.concat([first, second])
1981+
assert_frame_equal(result, expected)
1982+
1983+
# mixed tz
1984+
first = pd.DataFrame([[pd.NaT], [pd.NaT]])
1985+
second = pd.DataFrame([[pd.Timestamp('2015/01/01', tz='UTC')],
1986+
[pd.Timestamp('2016/01/01', tz='US/Eastern')]],
1987+
index=[2, 3])
1988+
1989+
expected = pd.DataFrame([pd.NaT,
1990+
pd.NaT,
1991+
pd.Timestamp('2015/01/01', tz='UTC'),
1992+
pd.Timestamp('2016/01/01', tz='US/Eastern')])
1993+
1994+
result = pd.concat([first, second], axis=0)
1995+
assert_frame_equal(result, expected)
1996+
18681997
def test_concat_period_series(self):
18691998
x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
18701999
y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='D'))
@@ -1926,6 +2055,32 @@ def test_concat_empty_series(self):
19262055
columns=['x', 0])
19272056
tm.assert_frame_equal(res, exp)
19282057

2058+
# GH 18447
2059+
# tz-naive
2060+
first = Series(pd.to_datetime([], utc=False))
2061+
second = Series([1, 2, 3])
2062+
expected = DataFrame([[pd.NaT, 1], [pd.NaT, 2], [pd.NaT, 3]])
2063+
result = concat([first, second], axis=1)
2064+
assert_frame_equal(result, expected)
2065+
2066+
# timezone-aware
2067+
first = Series(pd.to_datetime([], utc=True))
2068+
second = Series([1, 2, 3])
2069+
expected = DataFrame(
2070+
{0: pd.Series([pd.NaT, pd.NaT, pd.NaT]).dt.tz_localize('UTC'),
2071+
1: pd.Series([1, 2, 3])}
2072+
)
2073+
result = concat([first, second], axis=1)
2074+
assert_frame_equal(result, expected)
2075+
2076+
# both empty
2077+
first = Series(pd.to_datetime([], utc=True))
2078+
second = Series([])
2079+
result = concat([first, second], axis=1)
2080+
assert result.size == 0
2081+
assert result.dtypes[0] == 'datetime64[ns, UTC]'
2082+
assert result.dtypes[1] == 'float64'
2083+
19292084
def test_default_index(self):
19302085
# is_series and ignore_index
19312086
s1 = pd.Series([1, 2, 3], name='x')

0 commit comments

Comments
 (0)