Skip to content

Commit 44a0fcf

Browse files
multilocafragner
authored and
afragner
committed
handle tz-aware all NaT concatenation incl. mixed cases (GH12396)
1 parent af7bdd3 commit 44a0fcf

File tree

2 files changed

+121
-0
lines changed

2 files changed

+121
-0
lines changed

pandas/core/internals.py

+6
Original file line numberDiff line numberDiff line change
@@ -4911,6 +4911,12 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
49114911
pass
49124912
elif getattr(self.block, 'is_sparse', False):
49134913
pass
4914+
elif com.is_extension_type(empty_dtype) and \
4915+
com.is_datetimetz(empty_dtype):
4916+
num_elements = np.prod(self.shape)
4917+
# handle timezone-aware all NaT cases
4918+
return DatetimeIndex([fill_value] * num_elements,
4919+
dtype=empty_dtype)
49144920
else:
49154921
missing_arr = np.empty(self.shape, dtype=empty_dtype)
49164922
missing_arr.fill(fill_value)

pandas/tools/tests/test_merge.py

+115
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas.compat import range, lrange, lzip, StringIO
1313
from pandas import compat
1414
from pandas.tseries.index import DatetimeIndex
15+
from pandas.types.dtypes import DatetimeTZDtype
1516
from pandas.tools.merge import merge, concat, ordered_merge, MergeError
1617
from pandas import Categorical, Timestamp
1718
from pandas.util.testing import (assert_frame_equal, assert_series_equal,
@@ -2522,6 +2523,120 @@ def test_concat_multiindex_with_tz(self):
25222523
result = concat([df, df])
25232524
tm.assert_frame_equal(result, expected)
25242525

2526+
def test_concat_NaT_dataframes_all_NaT_axis_0(self):
2527+
# GH 12396
2528+
expect = pd.DataFrame([pd.NaT, pd.NaT, pd.NaT], index=[0, 1, 0])
2529+
2530+
# non-timezone aware
2531+
first = pd.DataFrame([[pd.NaT], [pd.NaT]])
2532+
second = pd.DataFrame([[pd.NaT]])
2533+
2534+
result = pd.concat([first, second], axis=0)
2535+
assert_frame_equal(result, expect)
2536+
2537+
# one side timezone-aware
2538+
dtype = DatetimeTZDtype('ns', tz='UTC')
2539+
first = pd.DataFrame([[pd.NaT], [pd.NaT]], dtype=dtype)
2540+
2541+
result = pd.concat([first, second], axis=0)
2542+
# upcasts for mixed case
2543+
assert_frame_equal(result, expect, check_dtype=False)
2544+
self.assertEqual(result.dtypes[0], dtype)
2545+
2546+
# both sides timezone-aware
2547+
second = pd.DataFrame([[pd.NaT]], dtype=dtype)
2548+
2549+
result = pd.concat([first, second], axis=0)
2550+
# upcasts to tz-aware
2551+
assert_frame_equal(result, expect, check_dtype=False)
2552+
self.assertEqual(result.dtypes[0], dtype)
2553+
2554+
def test_concat_NaT_dataframes_all_NaT_axis_1(self):
2555+
# GH 12396
2556+
expect = pd.DataFrame([[pd.NaT, pd.NaT], [pd.NaT, pd.NaT]],
2557+
columns=[0, 0])
2558+
2559+
# non-timezone aware
2560+
first = pd.DataFrame([[pd.NaT], [pd.NaT]])
2561+
second = pd.DataFrame([[pd.NaT]])
2562+
2563+
result = pd.concat([first, second], axis=1)
2564+
assert_frame_equal(result, expect)
2565+
2566+
# one side timezone-aware
2567+
dtype = DatetimeTZDtype('ns', tz='UTC')
2568+
first = pd.DataFrame([[pd.NaT], [pd.NaT]], dtype=dtype)
2569+
2570+
# upcasts result to tz-aware
2571+
assert_frame_equal(result, expect, check_dtype=False)
2572+
result = pd.concat([first, second], axis=1)
2573+
self.assertEqual(result.dtypes.iloc[0], dtype)
2574+
self.assertEqual(result.dtypes.iloc[0], first.dtypes[0])
2575+
self.assertEqual(result.dtypes.iloc[1], second.dtypes[0])
2576+
2577+
# both sides timezone-aware
2578+
second = pd.DataFrame([[pd.NaT]], dtype=dtype)
2579+
2580+
result = pd.concat([first, second], axis=1)
2581+
assert_frame_equal(result, expect, check_dtype=False)
2582+
# upcasts to tz-aware
2583+
self.assertEqual(result.dtypes.iloc[0], dtype)
2584+
self.assertEqual(result.dtypes.iloc[0], first.dtypes[0])
2585+
self.assertEqual(result.dtypes.iloc[1], second.dtypes[0])
2586+
2587+
def test_concat_NaT_dataframes_mixed_timestamps_and_NaT(self):
2588+
# GH 12396
2589+
2590+
# non-timezone aware
2591+
first = pd.DataFrame([[pd.NaT], [pd.NaT]])
2592+
second = pd.DataFrame([[pd.Timestamp('2015/01/01')],
2593+
[pd.Timestamp('2016/01/01')]])
2594+
2595+
expect = pd.DataFrame([pd.NaT, pd.NaT, second.iloc[0, 0],
2596+
second.iloc[1, 0]], index=[0, 1, 0, 1])
2597+
2598+
result = pd.concat([first, second], axis=0)
2599+
assert_frame_equal(result, expect)
2600+
self.assertEqual(result.dtypes.iloc[0], first.dtypes[0])
2601+
2602+
# one side timezone-aware
2603+
dtype = DatetimeTZDtype('ns', tz='UTC')
2604+
second = second.apply(lambda x: x.astype(dtype))
2605+
2606+
result = pd.concat([first, second], axis=0)
2607+
assert_frame_equal(result, expect, check_dtype=False)
2608+
# upcasts
2609+
self.assertEqual(result.dtypes.iloc[0], dtype)
2610+
self.assertEqual(result.dtypes.iloc[0], second.dtypes[0])
2611+
2612+
def test_concat_NaT_series_dataframe_all_NaT(self):
2613+
# GH 12396
2614+
2615+
# non-timezone aware
2616+
first = pd.Series([pd.NaT, pd.NaT])
2617+
second = pd.DataFrame([[pd.Timestamp('2015/01/01')],
2618+
[pd.Timestamp('2016/01/01')]])
2619+
2620+
expect = pd.DataFrame([pd.NaT, pd.NaT, second.iloc[0, 0],
2621+
second.iloc[1, 0]], index=[0, 1, 0, 1])
2622+
2623+
result = pd.concat([first, second])
2624+
assert_frame_equal(result, expect)
2625+
2626+
# one side timezone-aware
2627+
dtype = DatetimeTZDtype('ns', tz='UTC')
2628+
second = second.apply(lambda x: x.astype(dtype))
2629+
2630+
result = pd.concat([first, second])
2631+
2632+
expect = expect.apply(lambda x: x.astype(dtype))
2633+
assert_frame_equal(result, expect, check_dtype=True)
2634+
2635+
# both sides timezone-aware
2636+
first = first.astype(dtype)
2637+
result = pd.concat([first, second])
2638+
assert_frame_equal(result, expect, check_dtype=True)
2639+
25252640
def test_concat_keys_and_levels(self):
25262641
df = DataFrame(np.random.randn(1, 3))
25272642
df2 = DataFrame(np.random.randn(1, 4))

0 commit comments

Comments
 (0)