From f3e60efe8063ace62802e77bfd86b19a334e9ab3 Mon Sep 17 00:00:00 2001 From: Jason Mok Date: Sun, 21 Apr 2024 20:33:31 -0500 Subject: [PATCH 1/5] Add tests for #55431 --- pandas/tests/reshape/test_cut.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 0811c69859c0d..adcc048db095f 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -789,3 +789,33 @@ def test_cut_with_nullable_int64(): result = cut(series, bins=bins) tm.assert_series_equal(result, expected) + + +def test_datetime_cut_notna(): + # Create a Series with datetime data + data = to_datetime(["2023-09-17", "2023-10-06"]) + # Convert the Series to a DatetimeArray + datetime_array = data.array + + # Define bins for categorization + bins = date_range(start="2023-09-16", periods=3, freq="10D") + + # Use pd.cut to categorize datetime data and capture the result + result = cut(datetime_array, bins=bins) + + # Define expected result as an IntervalIndex with specified intervals + expected_intervals = IntervalIndex.from_tuples( + [ + (Timestamp("2023-09-16"), Timestamp("2023-09-26")), + (Timestamp("2023-09-26"), Timestamp("2023-10-06")), + ] + ) + + expected = pd.Series(expected_intervals).astype(CategoricalDtype(ordered=True)) + + # Assert that result matches expected using pandas testing tools + tm.assert_series_equal(pd.Series(result), expected) + + assert not hasattr( + result, "notna" + ), "AttributeError related to 'notna' should not be present" From b7ece98e50a7b5ac0d2e39238f0dff6bb58abf1f Mon Sep 17 00:00:00 2001 From: Jason Mok Date: Sun, 21 Apr 2024 20:41:55 -0500 Subject: [PATCH 2/5] Fix inconsistent pandas namespace usage --- pandas/tests/reshape/test_cut.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index adcc048db095f..dd9ff337d3e1e 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -811,7 +811,7 @@ def test_datetime_cut_notna(): ] ) - expected = pd.Series(expected_intervals).astype(CategoricalDtype(ordered=True)) + expected = Series(expected_intervals).astype(CategoricalDtype(ordered=True)) # Assert that result matches expected using pandas testing tools tm.assert_series_equal(pd.Series(result), expected) From 21bcfdd1e847b86797ac6f9b3eabde020b9114f8 Mon Sep 17 00:00:00 2001 From: Jason Mok Date: Sun, 21 Apr 2024 20:49:51 -0500 Subject: [PATCH 3/5] Fix inconsistent pandas namespace usage again --- pandas/tests/reshape/test_cut.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index dd9ff337d3e1e..0b40f4d796c5e 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -814,7 +814,7 @@ def test_datetime_cut_notna(): expected = Series(expected_intervals).astype(CategoricalDtype(ordered=True)) # Assert that result matches expected using pandas testing tools - tm.assert_series_equal(pd.Series(result), expected) + tm.assert_series_equal(Series(result), expected) assert not hasattr( result, "notna" From 25a7ddc0dc2fd9a54b243c9237e2a4143ab3ddc8 Mon Sep 17 00:00:00 2001 From: Jason Mok Date: Tue, 23 Apr 2024 01:03:24 -0500 Subject: [PATCH 4/5] Temp disable part of test potentialy due to known bug --- pandas/tests/reshape/test_cut.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 0b40f4d796c5e..a2a5b01473b6a 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -816,6 +816,6 @@ def test_datetime_cut_notna(): # Assert that result matches expected using pandas testing tools tm.assert_series_equal(Series(result), expected) - assert not hasattr( - result, "notna" - ), "AttributeError related to 'notna' should not be present" + # assert not hasattr( + # result, "notna" + # ), "AttributeError related to 'notna' should not be present" From b99767838cef36d06fd715c1a5808ba2be5fb1c0 Mon Sep 17 00:00:00 2001 From: Jason Mok Date: Tue, 23 Apr 2024 15:04:46 -0500 Subject: [PATCH 5/5] Remove unnecessary comments and adjust implementation --- pandas/tests/reshape/test_cut.py | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index a2a5b01473b6a..340c5c449aea7 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -791,31 +791,15 @@ def test_cut_with_nullable_int64(): tm.assert_series_equal(result, expected) -def test_datetime_cut_notna(): - # Create a Series with datetime data - data = to_datetime(["2023-09-17", "2023-10-06"]) - # Convert the Series to a DatetimeArray - datetime_array = data.array +def test_cut_datetime_array_no_attributeerror(): + # GH 55431 + ser = Series(to_datetime(["2023-10-06 12:00:00+0000", "2023-10-07 12:00:00+0000"])) - # Define bins for categorization - bins = date_range(start="2023-09-16", periods=3, freq="10D") + result = cut(ser.array, bins=2) - # Use pd.cut to categorize datetime data and capture the result - result = cut(datetime_array, bins=bins) + categories = result.categories + expected = Categorical.from_codes([0, 1], categories=categories, ordered=True) - # Define expected result as an IntervalIndex with specified intervals - expected_intervals = IntervalIndex.from_tuples( - [ - (Timestamp("2023-09-16"), Timestamp("2023-09-26")), - (Timestamp("2023-09-26"), Timestamp("2023-10-06")), - ] + tm.assert_categorical_equal( + result, expected, check_dtype=True, check_category_order=True ) - - expected = Series(expected_intervals).astype(CategoricalDtype(ordered=True)) - - # Assert that result matches expected using pandas testing tools - tm.assert_series_equal(Series(result), expected) - - # assert not hasattr( - # result, "notna" - # ), "AttributeError related to 'notna' should not be present"