Skip to content

Commit c17f377

Browse files
committed
BUG: Concatentation of TZ-aware dataframes (pandas-dev#12396) (pandas-dev#18447)
1 parent 1245f06 commit c17f377

File tree

3 files changed

+150
-4
lines changed

3 files changed

+150
-4
lines changed

pandas/core/indexes/base.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1833,13 +1833,14 @@ def _assert_take_fillable(self, values, indices, allow_fill=True,
18331833
fill_value=None, na_value=np.nan):
18341834
""" Internal method to handle NA filling of take """
18351835
indices = _ensure_platform_int(indices)
1836-
18371836
# only fill if we are passing a non-None fill_value
18381837
if allow_fill and fill_value is not None:
18391838
if (indices < -1).any():
18401839
msg = ('When allow_fill=True and fill_value is not None, '
18411840
'all indices must be >= -1')
18421841
raise ValueError(msg)
1842+
if values.size == 0:
1843+
return np.full(indices.shape, fill_value=na_value)
18431844
taken = values.take(indices)
18441845
mask = indices == -1
18451846
if mask.any():

pandas/core/internals.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -5598,8 +5598,10 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
55985598
if len(values) and values[0] is None:
55995599
fill_value = None
56005600

5601-
if getattr(self.block, 'is_datetimetz', False):
5602-
pass
5601+
if getattr(self.block, 'is_datetimetz', False) or \
5602+
is_datetimetz(empty_dtype):
5603+
missing_arr = np.full(np.prod(self.shape), fill_value)
5604+
return DatetimeIndex(missing_arr, dtype=empty_dtype)
56035605
elif getattr(self.block, 'is_categorical', False):
56045606
pass
56055607
elif getattr(self.block, 'is_sparse', False):

pandas/tests/reshape/test_concat.py

+144-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
read_csv, isna, Series, date_range,
1212
Index, Panel, MultiIndex, Timestamp,
1313
DatetimeIndex, Categorical)
14-
from pandas.core.dtypes.dtypes import CategoricalDtype
14+
from pandas.core.dtypes.dtypes import (CategoricalDtype,
15+
DatetimeTZDtype)
1516
from pandas.util import testing as tm
1617
from pandas.util.testing import (assert_frame_equal,
1718
makeCustomDataframe as mkdf)
@@ -1771,6 +1772,148 @@ def test_concat_tz_series_tzlocal(self):
17711772
tm.assert_series_equal(result, pd.Series(x + y))
17721773
assert result.dtype == 'datetime64[ns, tzlocal()]'
17731774

1775+
def test_concat_NaT_dataframes_all_NaT_axis_0(self):
1776+
# GH 12396
1777+
expected = pd.DataFrame([pd.NaT, pd.NaT, pd.NaT], index=[0, 1, 0])
1778+
1779+
# non-timezone aware
1780+
first = pd.DataFrame([[pd.NaT], [pd.NaT]])
1781+
second = pd.DataFrame([[pd.NaT]])
1782+
1783+
result = pd.concat([first, second], axis=0)
1784+
assert_frame_equal(result, expected, check_datetimelike_compat=True)
1785+
1786+
# one side timezone-aware
1787+
dtype = DatetimeTZDtype('ns', tz='UTC')
1788+
first = pd.DataFrame([[pd.NaT], [pd.NaT]], dtype=dtype)
1789+
1790+
result = pd.concat([first, second], axis=0)
1791+
# upcasts for mixed case
1792+
expected = expected.apply(lambda x: x.astype(dtype))
1793+
assert_frame_equal(result, expected, check_datetimelike_compat=True)
1794+
1795+
# both sides timezone-aware
1796+
second = pd.DataFrame([[pd.NaT]], dtype=dtype)
1797+
1798+
# upcasts to tz-aware
1799+
result = pd.concat([first, second], axis=0)
1800+
assert_frame_equal(result, expected, check_datetimelike_compat=True)
1801+
1802+
def test_concat_NaT_dataframes_all_NaT_axis_1(self):
1803+
# GH 12396
1804+
expected = pd.DataFrame([[pd.NaT, pd.NaT], [pd.NaT, pd.NaT]],
1805+
columns=[0, 1])
1806+
1807+
# non-timezone aware
1808+
first = pd.DataFrame([[pd.NaT], [pd.NaT]])
1809+
second = pd.DataFrame([[pd.NaT]], columns=[1])
1810+
1811+
result = pd.concat([first, second], axis=1)
1812+
assert_frame_equal(result, expected, check_datetimelike_compat=True)
1813+
1814+
# one side timezone-aware
1815+
dtype = DatetimeTZDtype('ns', tz='UTC')
1816+
first = pd.DataFrame([[pd.NaT], [pd.NaT]], dtype=dtype)
1817+
1818+
# upcasts result to tz-aware
1819+
expected.loc[:, 0] = expected.loc[:, 0].astype(dtype)
1820+
result = pd.concat([first, second], axis=1)
1821+
assert_frame_equal(result, expected, check_datetimelike_compat=True)
1822+
1823+
# both sides timezone-aware
1824+
second = pd.DataFrame([[pd.NaT]], dtype=dtype, columns=[1])
1825+
1826+
# upcasts to tz-aware
1827+
expected = expected.apply(lambda x: x.astype(dtype))
1828+
result = pd.concat([first, second], axis=1)
1829+
assert_frame_equal(result, expected, check_datetimelike_compat=True)
1830+
1831+
def test_concat_NaT_dataframes_mixed_timestamps_and_NaT(self):
1832+
# GH 12396
1833+
1834+
# non-timezone aware
1835+
first = pd.DataFrame([[pd.NaT], [pd.NaT]])
1836+
second = pd.DataFrame([[pd.Timestamp('2015/01/01')],
1837+
[pd.Timestamp('2016/01/01')]])
1838+
1839+
expected = pd.DataFrame([pd.NaT, pd.NaT, second.iloc[0, 0],
1840+
second.iloc[1, 0]], index=[0, 1, 0, 1])
1841+
1842+
result = pd.concat([first, second], axis=0)
1843+
assert_frame_equal(result, expected, check_datetimelike_compat=True)
1844+
1845+
# one side timezone-aware
1846+
dtype = DatetimeTZDtype('ns', tz='UTC')
1847+
second = second.apply(lambda x: x.astype(dtype))
1848+
1849+
result = pd.concat([first, second], axis=0)
1850+
expected = expected.apply(lambda x: x.astype(dtype))
1851+
assert_frame_equal(result, expected, check_datetimelike_compat=True)
1852+
1853+
def test_concat_NaT_series_dataframe_all_NaT(self):
1854+
# GH 12396
1855+
1856+
# non-timezone aware
1857+
first = pd.Series([pd.NaT, pd.NaT])
1858+
second = pd.DataFrame([[pd.Timestamp('2015/01/01')],
1859+
[pd.Timestamp('2016/01/01')]])
1860+
1861+
expected = pd.DataFrame([pd.NaT, pd.NaT, second.iloc[0, 0],
1862+
second.iloc[1, 0]], index=[0, 1, 0, 1])
1863+
1864+
result = pd.concat([first, second])
1865+
assert_frame_equal(result, expected, check_datetimelike_compat=True)
1866+
1867+
# one side timezone-aware
1868+
dtype = DatetimeTZDtype('ns', tz='UTC')
1869+
second = second.apply(lambda x: x.astype(dtype))
1870+
1871+
result = pd.concat([first, second])
1872+
1873+
expected = expected.apply(lambda x: x.astype(dtype))
1874+
assert_frame_equal(result, expected, check_datetimelike_compat=True)
1875+
1876+
# both sides timezone-aware
1877+
first = first.astype(dtype)
1878+
result = pd.concat([first, second])
1879+
assert_frame_equal(result, expected, check_datetimelike_compat=True)
1880+
1881+
first = pd.DataFrame([[pd.NaT], [pd.NaT]])
1882+
second = pd.DataFrame([[pd.Timestamp('2015/01/01', tz='UTC')],
1883+
[pd.Timestamp('2016/01/01', tz='US/Eastern')]])
1884+
1885+
expect = pd.DataFrame([pd.NaT, pd.NaT, second.iloc[0, 0],
1886+
second.iloc[1, 0]], index=[0, 1, 0, 1])
1887+
1888+
result = pd.concat([first, second], axis=0)
1889+
assert_frame_equal(result, expect, check_datetimelike_compat=True)
1890+
1891+
def test_concat_empty_datetime_series(self):
1892+
# GH 18447
1893+
1894+
# not timezone-aware
1895+
first = Series(pd.to_datetime([], utc=False))
1896+
second = Series([1, 2, 3])
1897+
expected = DataFrame([[pd.NaT, 1], [pd.NaT, 2], [pd.NaT, 3]])
1898+
result = concat([first, second], axis=1)
1899+
assert_frame_equal(result, expected)
1900+
1901+
# timezone-aware
1902+
first = Series(pd.to_datetime([], utc=True))
1903+
second = Series([1, 2, 3])
1904+
expected = DataFrame([[pd.NaT, 1], [pd.NaT, 2], [pd.NaT, 3]])
1905+
to_utc = lambda x: pd.to_datetime(x, utc=True)
1906+
expected[[0]] = expected[[0]].apply(to_utc)
1907+
result = concat([first, second], axis=1)
1908+
assert_frame_equal(result, expected)
1909+
1910+
first = Series(pd.to_datetime([], utc=True))
1911+
second = Series([])
1912+
result = concat([first, second], axis=1)
1913+
assert result.size == 0
1914+
assert result.dtypes[0] == 'datetime64[ns, UTC]'
1915+
assert result.dtypes[1] == 'float64'
1916+
17741917
def test_concat_period_series(self):
17751918
x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
17761919
y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='D'))

0 commit comments

Comments
 (0)