|
12 | 12 | from pandas.compat import range, lrange, lzip, StringIO
|
13 | 13 | from pandas import compat
|
14 | 14 | from pandas.tseries.index import DatetimeIndex
|
| 15 | +from pandas.types.dtypes import DatetimeTZDtype |
15 | 16 | from pandas.tools.merge import merge, concat, ordered_merge, MergeError
|
16 | 17 | from pandas import Categorical, Timestamp
|
17 | 18 | from pandas.util.testing import (assert_frame_equal, assert_series_equal,
|
@@ -2522,6 +2523,120 @@ def test_concat_multiindex_with_tz(self):
|
2522 | 2523 | result = concat([df, df])
|
2523 | 2524 | tm.assert_frame_equal(result, expected)
|
2524 | 2525 |
|
| 2526 | + def test_concat_NaT_dataframes_all_NaT_axis_0(self): |
| 2527 | + # GH 12396 |
| 2528 | + expect = pd.DataFrame([pd.NaT, pd.NaT, pd.NaT], index=[0, 1, 0]) |
| 2529 | + |
| 2530 | + # non-timezone aware |
| 2531 | + first = pd.DataFrame([[pd.NaT], [pd.NaT]]) |
| 2532 | + second = pd.DataFrame([[pd.NaT]]) |
| 2533 | + |
| 2534 | + result = pd.concat([first, second], axis=0) |
| 2535 | + assert_frame_equal(result, expect) |
| 2536 | + |
| 2537 | + # one side timezone-aware |
| 2538 | + dtype = DatetimeTZDtype('ns', tz='UTC') |
| 2539 | + first = pd.DataFrame([[pd.NaT], [pd.NaT]], dtype=dtype) |
| 2540 | + |
| 2541 | + result = pd.concat([first, second], axis=0) |
| 2542 | + # upcasts for mixed case |
| 2543 | + assert_frame_equal(result, expect, check_dtype=False) |
| 2544 | + self.assertEqual(result.dtypes[0], dtype) |
| 2545 | + |
| 2546 | + # both sides timezone-aware |
| 2547 | + second = pd.DataFrame([[pd.NaT]], dtype=dtype) |
| 2548 | + |
| 2549 | + result = pd.concat([first, second], axis=0) |
| 2550 | + # upcasts to tz-aware |
| 2551 | + assert_frame_equal(result, expect, check_dtype=False) |
| 2552 | + self.assertEqual(result.dtypes[0], dtype) |
| 2553 | + |
| 2554 | + def test_concat_NaT_dataframes_all_NaT_axis_1(self): |
| 2555 | + # GH 12396 |
| 2556 | + expect = pd.DataFrame([[pd.NaT, pd.NaT], [pd.NaT, pd.NaT]], |
| 2557 | + columns=[0, 0]) |
| 2558 | + |
| 2559 | + # non-timezone aware |
| 2560 | + first = pd.DataFrame([[pd.NaT], [pd.NaT]]) |
| 2561 | + second = pd.DataFrame([[pd.NaT]]) |
| 2562 | + |
| 2563 | + result = pd.concat([first, second], axis=1) |
| 2564 | + assert_frame_equal(result, expect) |
| 2565 | + |
| 2566 | + # one side timezone-aware |
| 2567 | + dtype = DatetimeTZDtype('ns', tz='UTC') |
| 2568 | + first = pd.DataFrame([[pd.NaT], [pd.NaT]], dtype=dtype) |
| 2569 | + |
| 2570 | + # upcasts result to tz-aware |
| 2571 | + assert_frame_equal(result, expect, check_dtype=False) |
| 2572 | + result = pd.concat([first, second], axis=1) |
| 2573 | + self.assertEqual(result.dtypes.iloc[0], dtype) |
| 2574 | + self.assertEqual(result.dtypes.iloc[0], first.dtypes[0]) |
| 2575 | + self.assertEqual(result.dtypes.iloc[1], second.dtypes[0]) |
| 2576 | + |
| 2577 | + # both sides timezone-aware |
| 2578 | + second = pd.DataFrame([[pd.NaT]], dtype=dtype) |
| 2579 | + |
| 2580 | + result = pd.concat([first, second], axis=1) |
| 2581 | + assert_frame_equal(result, expect, check_dtype=False) |
| 2582 | + # upcasts to tz-aware |
| 2583 | + self.assertEqual(result.dtypes.iloc[0], dtype) |
| 2584 | + self.assertEqual(result.dtypes.iloc[0], first.dtypes[0]) |
| 2585 | + self.assertEqual(result.dtypes.iloc[1], second.dtypes[0]) |
| 2586 | + |
| 2587 | + def test_concat_NaT_dataframes_mixed_timestamps_and_NaT(self): |
| 2588 | + # GH 12396 |
| 2589 | + |
| 2590 | + # non-timezone aware |
| 2591 | + first = pd.DataFrame([[pd.NaT], [pd.NaT]]) |
| 2592 | + second = pd.DataFrame([[pd.Timestamp('2015/01/01')], |
| 2593 | + [pd.Timestamp('2016/01/01')]]) |
| 2594 | + |
| 2595 | + expect = pd.DataFrame([pd.NaT, pd.NaT, second.iloc[0, 0], |
| 2596 | + second.iloc[1, 0]], index=[0, 1, 0, 1]) |
| 2597 | + |
| 2598 | + result = pd.concat([first, second], axis=0) |
| 2599 | + assert_frame_equal(result, expect) |
| 2600 | + self.assertEqual(result.dtypes.iloc[0], first.dtypes[0]) |
| 2601 | + |
| 2602 | + # one side timezone-aware |
| 2603 | + dtype = DatetimeTZDtype('ns', tz='UTC') |
| 2604 | + second = second.apply(lambda x: x.astype(dtype)) |
| 2605 | + |
| 2606 | + result = pd.concat([first, second], axis=0) |
| 2607 | + assert_frame_equal(result, expect, check_dtype=False) |
| 2608 | + # upcasts |
| 2609 | + self.assertEqual(result.dtypes.iloc[0], dtype) |
| 2610 | + self.assertEqual(result.dtypes.iloc[0], second.dtypes[0]) |
| 2611 | + |
| 2612 | + def test_concat_NaT_series_dataframe_all_NaT(self): |
| 2613 | + # GH 12396 |
| 2614 | + |
| 2615 | + # non-timezone aware |
| 2616 | + first = pd.Series([pd.NaT, pd.NaT]) |
| 2617 | + second = pd.DataFrame([[pd.Timestamp('2015/01/01')], |
| 2618 | + [pd.Timestamp('2016/01/01')]]) |
| 2619 | + |
| 2620 | + expect = pd.DataFrame([pd.NaT, pd.NaT, second.iloc[0, 0], |
| 2621 | + second.iloc[1, 0]], index=[0, 1, 0, 1]) |
| 2622 | + |
| 2623 | + result = pd.concat([first, second]) |
| 2624 | + assert_frame_equal(result, expect) |
| 2625 | + |
| 2626 | + # one side timezone-aware |
| 2627 | + dtype = DatetimeTZDtype('ns', tz='UTC') |
| 2628 | + second = second.apply(lambda x: x.astype(dtype)) |
| 2629 | + |
| 2630 | + result = pd.concat([first, second]) |
| 2631 | + |
| 2632 | + expect = expect.apply(lambda x: x.astype(dtype)) |
| 2633 | + assert_frame_equal(result, expect, check_dtype=True) |
| 2634 | + |
| 2635 | + # both sides timezone-aware |
| 2636 | + first = first.astype(dtype) |
| 2637 | + result = pd.concat([first, second]) |
| 2638 | + assert_frame_equal(result, expect, check_dtype=True) |
| 2639 | + |
2525 | 2640 | def test_concat_keys_and_levels(self):
|
2526 | 2641 | df = DataFrame(np.random.randn(1, 3))
|
2527 | 2642 | df2 = DataFrame(np.random.randn(1, 4))
|
|
0 commit comments