From 6fde3251e5a0d6875acd5994f280bda01c212339 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 27 Apr 2023 11:06:56 +0100 Subject: [PATCH] dont upcast where unnecessary --- doc/source/user_guide/cookbook.rst | 2 +- doc/source/user_guide/missing_data.rst | 2 +- doc/source/whatsnew/v0.15.0.rst | 2 +- doc/source/whatsnew/v0.17.0.rst | 2 +- pandas/_testing/contexts.py | 24 +++++++++++++++--------- pandas/core/generic.py | 8 ++++---- pandas/tests/groupby/test_groupby.py | 3 ++- pandas/tests/groupby/test_nunique.py | 1 + 8 files changed, 26 insertions(+), 18 deletions(-) diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 3eee4ce7ac25c..604a5cc56ab04 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -125,7 +125,7 @@ Building criteria .. ipython:: python - df.loc[(df["BBB"] > 25) | (df["CCC"] >= 75), "AAA"] = 0.1 + df.loc[(df["BBB"] > 25) | (df["CCC"] >= 75), "AAA"] = 999 df `Select rows with data closest to certain value using argsort diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index 4d645cd75ac76..a17d0eba294b2 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -123,7 +123,7 @@ the missing value type chosen: .. ipython:: python - s = pd.Series([1, 2, 3]) + s = pd.Series([1., 2., 3.]) s.loc[0] = None s diff --git a/doc/source/whatsnew/v0.15.0.rst b/doc/source/whatsnew/v0.15.0.rst index 67e91751e9527..6b962cbb49c74 100644 --- a/doc/source/whatsnew/v0.15.0.rst +++ b/doc/source/whatsnew/v0.15.0.rst @@ -748,7 +748,7 @@ Other notable API changes: .. ipython:: python - s = pd.Series([1, 2, 3]) + s = pd.Series([1., 2., 3.]) s.loc[0] = None s diff --git a/doc/source/whatsnew/v0.17.0.rst b/doc/source/whatsnew/v0.17.0.rst index 7067407604d24..abbda2ffc9be2 100644 --- a/doc/source/whatsnew/v0.17.0.rst +++ b/doc/source/whatsnew/v0.17.0.rst @@ -738,7 +738,7 @@ Boolean comparisons of a ``Series`` vs ``None`` will now be equivalent to compar .. ipython:: python - s = pd.Series(range(3)) + s = pd.Series(range(3), dtype="float") s.iloc[1] = None s diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index fb5b7b967f6bf..ab00c80886794 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -205,18 +205,24 @@ def use_numexpr(use, min_elements=None) -> Generator[None, None, None]: set_option("compute.use_numexpr", olduse) -def raises_chained_assignment_error(): - if PYPY: +def raises_chained_assignment_error(extra_warnings=(), extra_match=()): + from pandas._testing import assert_produces_warning + + if PYPY and not extra_warnings: from contextlib import nullcontext return nullcontext() + elif PYPY and extra_warnings: + return assert_produces_warning( + extra_warnings, + match="|".join(extra_match), + ) else: - from pandas._testing import assert_produces_warning - + match = ( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment" + ) return assert_produces_warning( - ChainedAssignmentError, - match=( - "A value is trying to be set on a copy of a DataFrame or Series " - "through chained assignment" - ), + (ChainedAssignmentError, *extra_warnings), + match="|".join((match, *extra_match)), ) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2ee1e0512de74..6b3be257a1b30 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7894,7 +7894,7 @@ def asof(self, where, subset=None): Take all columns into consideration - >>> df = pd.DataFrame({'a': [10, 20, 30, 40, 50], + >>> df = pd.DataFrame({'a': [10., 20., 30., 40., 50.], ... 'b': [None, None, None, None, 500]}, ... index=pd.DatetimeIndex(['2018-02-27 09:01:00', ... '2018-02-27 09:02:00', @@ -7912,9 +7912,9 @@ def asof(self, where, subset=None): >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', ... '2018-02-27 09:04:30']), ... subset=['a']) - a b - 2018-02-27 09:03:30 30 NaN - 2018-02-27 09:04:30 40 NaN + a b + 2018-02-27 09:03:30 30.0 NaN + 2018-02-27 09:04:30 40.0 NaN """ if isinstance(where, str): where = Timestamp(where) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 514c0fe82ff5f..42e3db2c72f26 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -184,7 +184,8 @@ def f_2(grp): msg = "DataFrameGroupBy.apply operated on the grouping columns" with tm.assert_produces_warning(FutureWarning, match=msg): result = df.groupby("A").apply(f_2)[["B"]] - e = expected.copy() + # Explicit cast to float to avoid implicit cast when setting nan + e = expected.copy().astype({"B": "float"}) e.loc["Pony"] = np.nan tm.assert_frame_equal(result, e) diff --git a/pandas/tests/groupby/test_nunique.py b/pandas/tests/groupby/test_nunique.py index 282c91c82f5b1..661003d081bda 100644 --- a/pandas/tests/groupby/test_nunique.py +++ b/pandas/tests/groupby/test_nunique.py @@ -51,6 +51,7 @@ def check_nunique(df, keys, as_index=True): check_nunique(frame, ["jim"]) check_nunique(frame, ["jim", "joe"]) + frame = frame.astype({"julie": float}) # Explicit cast to avoid implicit cast below frame.loc[1::17, "jim"] = None frame.loc[3::37, "joe"] = None frame.loc[7::19, "julie"] = None