Skip to content

Commit 78e1523

Browse files
committed
BUG: Concatentation of TZ-aware dataframes (pandas-dev#12396) (pandas-dev#18447)
1 parent 1245f06 commit 78e1523

File tree

3 files changed

+161
-3
lines changed

3 files changed

+161
-3
lines changed

pandas/core/indexes/base.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1833,13 +1833,14 @@ def _assert_take_fillable(self, values, indices, allow_fill=True,
18331833
fill_value=None, na_value=np.nan):
18341834
""" Internal method to handle NA filling of take """
18351835
indices = _ensure_platform_int(indices)
1836-
18371836
# only fill if we are passing a non-None fill_value
18381837
if allow_fill and fill_value is not None:
18391838
if (indices < -1).any():
18401839
msg = ('When allow_fill=True and fill_value is not None, '
18411840
'all indices must be >= -1')
18421841
raise ValueError(msg)
1842+
if values.size == 0:
1843+
return np.full(indices.shape, fill_value=na_value)
18431844
taken = values.take(indices)
18441845
mask = indices == -1
18451846
if mask.any():

pandas/core/internals.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -5598,8 +5598,10 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
55985598
if len(values) and values[0] is None:
55995599
fill_value = None
56005600

5601-
if getattr(self.block, 'is_datetimetz', False):
5602-
pass
5601+
if getattr(self.block, 'is_datetimetz', False) or \
5602+
is_datetimetz(empty_dtype):
5603+
missing_arr = np.full(np.prod(self.shape), fill_value)
5604+
return DatetimeIndex(missing_arr, dtype=empty_dtype)
56035605
elif getattr(self.block, 'is_categorical', False):
56045606
pass
56055607
elif getattr(self.block, 'is_sparse', False):

pandas/tests/reshape/test_concat.py

+155
Original file line numberDiff line numberDiff line change
@@ -1771,6 +1771,135 @@ def test_concat_tz_series_tzlocal(self):
17711771
tm.assert_series_equal(result, pd.Series(x + y))
17721772
assert result.dtype == 'datetime64[ns, tzlocal()]'
17731773

1774+
def test_concat_NaT_dataframes_all_NaT_axis_0(self):
1775+
# GH 12396
1776+
1777+
# tz-naive
1778+
first = pd.DataFrame([[pd.NaT], [pd.NaT]])
1779+
second = pd.DataFrame([[pd.NaT]])
1780+
1781+
result = pd.concat([first, second], axis=0)
1782+
expected = pd.DataFrame([pd.NaT, pd.NaT, pd.NaT], index=[0, 1, 0])
1783+
assert_frame_equal(result, expected)
1784+
1785+
# one side timezone-aware
1786+
# upcasts for mixed case
1787+
first = pd.DataFrame(pd.Series([pd.NaT, pd.NaT]).dt.tz_localize('UTC'))
1788+
result = pd.concat([first, second], axis=0)
1789+
expected = pd.DataFrame(
1790+
pd.Series([pd.NaT, pd.NaT, pd.NaT]).dt.tz_localize('UTC'),
1791+
index=[0, 1, 0]
1792+
)
1793+
assert_frame_equal(result, expected)
1794+
1795+
# both sides timezone-aware
1796+
# upcasts to tz-aware
1797+
second = pd.DataFrame(pd.Series([pd.NaT]).dt.tz_localize('UTC'))
1798+
result = pd.concat([first, second], axis=0)
1799+
assert_frame_equal(result, expected)
1800+
1801+
def test_concat_NaT_dataframes_all_NaT_axis_1(self):
1802+
# GH 12396
1803+
1804+
# tz-naive
1805+
first = pd.DataFrame([[pd.NaT], [pd.NaT]])
1806+
second = pd.DataFrame([[pd.NaT]], columns=[1])
1807+
expected = pd.DataFrame([[pd.NaT, pd.NaT], [pd.NaT, pd.NaT]],
1808+
columns=[0, 1])
1809+
result = pd.concat([first, second], axis=1)
1810+
assert_frame_equal(result, expected)
1811+
1812+
# one side timezone-aware
1813+
# upcasts result to tz-aware
1814+
first = pd.DataFrame(pd.Series([pd.NaT, pd.NaT]).dt.tz_localize('UTC'))
1815+
expected = pd.DataFrame(
1816+
{0: pd.Series([pd.NaT, pd.NaT]).dt.tz_localize('UTC'),
1817+
1: pd.Series([pd.NaT, pd.NaT])}
1818+
)
1819+
result = pd.concat([first, second], axis=1)
1820+
assert_frame_equal(result, expected)
1821+
1822+
# both sides timezone-aware
1823+
# upcasts result to tz-aware
1824+
second[1] = second[1].dt.tz_localize('UTC')
1825+
expected = pd.DataFrame(
1826+
{0: pd.Series([pd.NaT, pd.NaT]).dt.tz_localize('UTC'),
1827+
1: pd.Series([pd.NaT, pd.NaT]).dt.tz_localize('UTC')}
1828+
)
1829+
result = pd.concat([first, second], axis=1)
1830+
assert_frame_equal(result, expected)
1831+
1832+
def test_concat_NaT_dataframes_mixed_timestamps_and_NaT(self):
1833+
# GH 12396
1834+
1835+
# tz-naive
1836+
first = pd.DataFrame([[pd.NaT], [pd.NaT]])
1837+
second = pd.DataFrame([[pd.Timestamp('2015/01/01')],
1838+
[pd.Timestamp('2016/01/01')]],
1839+
index=[2, 3])
1840+
expected = pd.DataFrame([pd.NaT, pd.NaT,
1841+
pd.Timestamp('2015/01/01'),
1842+
pd.Timestamp('2016/01/01')])
1843+
1844+
result = pd.concat([first, second], axis=0)
1845+
assert_frame_equal(result, expected)
1846+
1847+
# one side timezone-aware
1848+
second = second[0].dt.tz_localize('UTC')
1849+
expected = pd.DataFrame(
1850+
pd.Series([pd.NaT, pd.NaT,
1851+
pd.Timestamp('2015/01/01'),
1852+
pd.Timestamp('2016/01/01')]).dt.tz_localize('UTC')
1853+
)
1854+
result = pd.concat([first, second], axis=0)
1855+
assert_frame_equal(result, expected)
1856+
1857+
def test_concat_NaT_series_dataframe_all_NaT(self):
1858+
# GH 12396
1859+
1860+
# tz-naive
1861+
first = pd.Series([pd.NaT, pd.NaT])
1862+
second = pd.DataFrame([[pd.Timestamp('2015/01/01')],
1863+
[pd.Timestamp('2016/01/01')]],
1864+
index=[2, 3])
1865+
1866+
expected = pd.DataFrame([pd.NaT, pd.NaT,
1867+
pd.Timestamp('2015/01/01'),
1868+
pd.Timestamp('2016/01/01')])
1869+
1870+
result = pd.concat([first, second])
1871+
assert_frame_equal(result, expected)
1872+
1873+
# one side timezone-aware
1874+
second[0] = second[0].dt.tz_localize('UTC')
1875+
result = pd.concat([first, second])
1876+
1877+
expected = pd.DataFrame(
1878+
pd.Series([pd.NaT, pd.NaT,
1879+
pd.Timestamp('2015/01/01'),
1880+
pd.Timestamp('2016/01/01')]).dt.tz_localize('UTC')
1881+
)
1882+
assert_frame_equal(result, expected)
1883+
1884+
# both sides timezone-aware
1885+
first = first.dt.tz_localize('UTC')
1886+
result = pd.concat([first, second])
1887+
assert_frame_equal(result, expected)
1888+
1889+
# mixed tz
1890+
first = pd.DataFrame([[pd.NaT], [pd.NaT]])
1891+
second = pd.DataFrame([[pd.Timestamp('2015/01/01', tz='UTC')],
1892+
[pd.Timestamp('2016/01/01', tz='US/Eastern')]],
1893+
index=[2, 3])
1894+
1895+
expected = pd.DataFrame([pd.NaT,
1896+
pd.NaT,
1897+
pd.Timestamp('2015/01/01', tz='UTC'),
1898+
pd.Timestamp('2016/01/01', tz='US/Eastern')])
1899+
1900+
result = pd.concat([first, second], axis=0)
1901+
assert_frame_equal(result, expected, check_datetimelike_compat=True)
1902+
17741903
def test_concat_period_series(self):
17751904
x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
17761905
y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='D'))
@@ -1832,6 +1961,32 @@ def test_concat_empty_series(self):
18321961
columns=['x', 0])
18331962
tm.assert_frame_equal(res, exp)
18341963

1964+
# GH 18447
1965+
# tz-naive
1966+
first = Series(pd.to_datetime([], utc=False))
1967+
second = Series([1, 2, 3])
1968+
expected = DataFrame([[pd.NaT, 1], [pd.NaT, 2], [pd.NaT, 3]])
1969+
result = concat([first, second], axis=1)
1970+
assert_frame_equal(result, expected)
1971+
1972+
# timezone-aware
1973+
first = Series(pd.to_datetime([], utc=True))
1974+
second = Series([1, 2, 3])
1975+
expected = DataFrame(
1976+
{0: pd.Series([pd.NaT, pd.NaT, pd.NaT]).dt.tz_localize('UTC'),
1977+
1: pd.Series([1, 2, 3])}
1978+
)
1979+
result = concat([first, second], axis=1)
1980+
assert_frame_equal(result, expected)
1981+
1982+
# both empty
1983+
first = Series(pd.to_datetime([], utc=True))
1984+
second = Series([])
1985+
result = concat([first, second], axis=1)
1986+
assert result.size == 0
1987+
assert result.dtypes[0] == 'datetime64[ns, UTC]'
1988+
assert result.dtypes[1] == 'float64'
1989+
18351990
def test_default_index(self):
18361991
# is_series and ignore_index
18371992
s1 = pd.Series([1, 2, 3], name='x')

0 commit comments

Comments
 (0)