diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 66b26e56b2258..cd2e6a297d4d7 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -549,7 +549,7 @@ Missing ^^^^^^^ - Bug in :meth:`DataFrame.fillna` with limit and no method ignores axis='columns' or ``axis = 1`` (:issue:`40989`) - Bug in :meth:`DataFrame.fillna` not replacing missing values when using a dict-like ``value`` and duplicate column names (:issue:`43476`) -- +- Bug in :meth:`DataFrame.max`, :meth:`DataFrame.min`, :meth:`Series.max` and :meth:`Series.min` when called on datetime columns with timezone aware data and missing elements (:issue:`27794` and :issue:`44196`) MultiIndex ^^^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ebf3428020652..ae0343e5a3fca 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9979,7 +9979,11 @@ def _get_data() -> DataFrame: data = self._get_bool_data() return data - if numeric_only is not None or axis == 0: + if ( + numeric_only is not None + or axis == 0 + or (name in ["max", "min"] and axis == 1) + ): # For numeric_only non-None and axis non-None, we know # which blocks to use and no try/except is needed. # For numeric_only=None only the case with axis==0 and no object diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 919d8ab14778e..4a5e1a9d1a864 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -17,7 +17,9 @@ DataFrame, Index, MultiIndex, + PeriodDtype, Series, + Timedelta, Timestamp, date_range, isna, @@ -756,7 +758,7 @@ def test_operators_timedelta64(self): # excludes numeric with tm.assert_produces_warning(FutureWarning, match="Select only valid"): result = mixed.min(axis=1) - expected = Series([1, 1, 1.0], index=[0, 1, 2]) + expected = Series([]) tm.assert_series_equal(result, expected) # works when only those columns are selected @@ -1763,3 +1765,141 @@ def test_prod_sum_min_count_mixed_object(): msg = re.escape("unsupported operand type(s) for +: 'int' and 'str'") with pytest.raises(TypeError, match=msg): df.sum(axis=0, min_count=1, numeric_only=False) + + +def test_timezone_min_max_with_nat(): + # GH#27794 + df = pd.DataFrame( + { + "A": pd.date_range(start="2018-01-01", end="2018-01-03", tz="UTC"), + "B": pd.date_range(start="2018-01-01", end="2018-01-02", tz="UTC").insert( + 2, pd.NaT + ), + } + ) + + expected = pd.Series( + [ + pd.Timestamp("2018-01-01", tz="UTC"), + pd.Timestamp("2018-01-02", tz="UTC"), + pd.Timestamp("2018-01-03", tz="UTC"), + ], + ) + result = df.min(axis=1) + tm.assert_series_equal(result, expected) + + expected = pd.Series( + [ + pd.Timestamp("2018-01-01", tz="UTC"), + pd.Timestamp("2018-01-02", tz="UTC"), + pd.Timestamp("2018-01-03", tz="UTC"), + ], + ) + result = df.max(axis=1) + tm.assert_series_equal(result, expected) + + +def test_min_max_timestamp_timezone_nat(): + # GH#44196 + rng_with_tz = pd.date_range( + start="2021-10-01T12:00:00+02:00", end="2021-10-02T12:00:00+02:00", freq="4H" + ) + df_with_tz = DataFrame( + data={"A": rng_with_tz, "B": rng_with_tz + pd.Timedelta(minutes=20)} + ) + df_with_tz.iloc[2, 1] = pd.NaT + + result = df_with_tz.max(axis=1) + expected = pd.Series( + [ + pd.Timestamp("2021-10-01T12:20:00+02:00"), + pd.Timestamp("2021-10-01T16:20:00+02:00"), + pd.Timestamp("2021-10-01T20:00:00+02:00"), + pd.Timestamp("2021-10-02T00:20:00+02:00"), + pd.Timestamp("2021-10-02T04:20:00+02:00"), + pd.Timestamp("2021-10-02T08:20:00+02:00"), + pd.Timestamp("2021-10-02T12:20:00+02:00"), + ] + ) + tm.assert_series_equal(result, expected) + + +def test_timezone_min_max_both_axis(): + rng_with_tz = pd.date_range( + start="2021-10-01T12:00:00+02:00", end="2021-10-02T12:00:00+02:00", freq="4H" + ) + df_with_tz = DataFrame( + data={"A": rng_with_tz, "B": rng_with_tz + pd.Timedelta(minutes=20)} + ) + df_with_tz.iloc[2, 1] = pd.NaT + + result = df_with_tz.max(axis=1) + expected = df_with_tz.T.max(axis=0) + + tm.assert_series_equal(result, expected) + + result = df_with_tz.min(axis=1) + expected = df_with_tz.T.min(axis=0) + + tm.assert_series_equal(result, expected) + + +def test_min_max_timedelta64_nat(): + df = DataFrame( + [ + [Timedelta(minutes=20), Timedelta(days=2), Timedelta(seconds=3)], + [Timedelta(minutes=2, seconds=2), Timedelta(days=2, minutes=30), pd.NaT], + ] + ) + expected = pd.Series( + [Timedelta(minutes=2, seconds=2), Timedelta(days=2), Timedelta(seconds=3)] + ) + result = df.min(axis=0) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(df.min(axis=0), df.T.min(axis=1)) + + expected = pd.Series([Timedelta(seconds=3), Timedelta(minutes=2, seconds=2)]) + result = df.min(axis=1) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(df.min(axis=1), df.T.min(axis=0)) + + expected = pd.Series( + [Timedelta(minutes=20), Timedelta(days=2, minutes=30), Timedelta(seconds=3)] + ) + result = df.max(axis=0) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(df.max(axis=0), df.T.max(axis=1)) + + expected = pd.Series([Timedelta(days=2), Timedelta(days=2, minutes=30)]) + result = df.max(axis=1) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(df.max(axis=1), df.T.max(axis=0)) + + +def test_min_max_perioddtype_nat(): + df = DataFrame( + [ + [PeriodDtype(freq="20m"), PeriodDtype(freq="1h"), PeriodDtype(freq="1d")], + [PeriodDtype(freq="25m"), PeriodDtype(freq="2h"), pd.NaT], + ] + ) + + expected = Series([]) + result = df.min(axis=0) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(df.min(axis=0), df.T.min(axis=1)) + + expected = Series([]) + result = df.min(axis=1) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(df.min(axis=1), df.T.min(axis=0)) + + expected = Series([]) + result = df.max(axis=0) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(df.max(axis=0), df.T.max(axis=1)) + + expected = Series([]) + result = df.max(axis=1) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(df.max(axis=1), df.T.max(axis=0)) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 80c86e0103436..a99d2f590be97 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -746,7 +746,7 @@ def test_cached_range_bug(self): assert len(rng) == 50 assert rng[0] == datetime(2010, 9, 1, 5) - def test_timezone_comparaison_bug(self): + def test_timezone_comparison_bug(self): # smoke test start = Timestamp("20130220 10:00", tz="US/Eastern") result = date_range(start, periods=2, tz="US/Eastern")