From 96e4dc90ae7234075c6b9716b2b7e313db3b13d0 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 29 Dec 2022 17:53:45 +0000 Subject: [PATCH 1/3] avoid some upcasting when its not the purpose of the test --- pandas/tests/frame/test_query_eval.py | 4 ++-- pandas/tests/frame/test_reductions.py | 8 ++++++-- pandas/tests/groupby/test_timegrouper.py | 2 +- pandas/tests/series/methods/test_replace.py | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index e81837898c927..3f5a216a921b8 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -448,7 +448,7 @@ def test_date_index_query(self): def test_date_index_query_with_NaT(self): engine, parser = self.engine, self.parser n = 10 - df = DataFrame(np.random.randn(n, 3)) + df = DataFrame(np.random.randn(n, 3)).astype({0: object}) df["dates1"] = date_range("1/1/2012", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) df.iloc[0, 0] = pd.NaT @@ -808,7 +808,7 @@ def test_date_index_query(self): def test_date_index_query_with_NaT(self): engine, parser = self.engine, self.parser n = 10 - df = DataFrame(np.random.randn(n, 3)) + df = DataFrame(np.random.randn(n, 3)).astype({0: object}) df["dates1"] = date_range("1/1/2012", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) df.iloc[0, 0] = pd.NaT diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index a3cd3e4afdda1..8df6eaaf85232 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -448,11 +448,15 @@ def test_var_std(self, datetime_frame): @pytest.mark.parametrize("meth", ["sem", "var", "std"]) def test_numeric_only_flag(self, meth): # GH 9201 - df1 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"]) + df1 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"]).astype( + {"foo": object} + ) # set one entry to a number in str format df1.loc[0, "foo"] = "100" - df2 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"]) + df2 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"]).astype( + {"foo": object} + ) # set one entry to a non-number str df2.loc[0, "foo"] = "a" diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 4a707d8875db3..710c2e43e8254 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -102,7 +102,7 @@ def test_groupby_with_timegrouper(self): index=date_range( "20130901", "20131205", freq="5D", name="Date", inclusive="left" ), - ) + ).astype({"Buyer": object}) expected.iloc[0, 0] = "CarlCarlCarl" expected.iloc[6, 0] = "CarlCarl" expected.iloc[18, 0] = "Joe" diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 59afe22e40f7a..8eed3eeecff84 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -16,7 +16,7 @@ def test_replace_explicit_none(self): expected = pd.Series([0, 0, None], dtype=object) tm.assert_series_equal(result, expected) - df = pd.DataFrame(np.zeros((3, 3))) + df = pd.DataFrame(np.zeros((3, 3))).astype({2: object}) df.iloc[2, 2] = "" result = df.replace("", None) expected = pd.DataFrame( From d4ecbc8973fbceafb7a90dc60564ebdccbbf036b Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Fri, 30 Dec 2022 08:57:45 +0000 Subject: [PATCH 2/3] add comments --- pandas/tests/frame/test_query_eval.py | 2 ++ pandas/tests/frame/test_reductions.py | 12 ++++++------ pandas/tests/groupby/test_timegrouper.py | 4 +++- pandas/tests/series/methods/test_replace.py | 1 + 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 3f5a216a921b8..159dab04e7da6 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -448,6 +448,7 @@ def test_date_index_query(self): def test_date_index_query_with_NaT(self): engine, parser = self.engine, self.parser n = 10 + # Cast to object to avoid implicit cast when setting entry to pd.NaT below df = DataFrame(np.random.randn(n, 3)).astype({0: object}) df["dates1"] = date_range("1/1/2012", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) @@ -808,6 +809,7 @@ def test_date_index_query(self): def test_date_index_query_with_NaT(self): engine, parser = self.engine, self.parser n = 10 + # Cast to object to avoid implicit cast when setting entry to pd.NaT below df = DataFrame(np.random.randn(n, 3)).astype({0: object}) df["dates1"] = date_range("1/1/2012", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 8df6eaaf85232..2e0aa5fd0cf40 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -448,15 +448,15 @@ def test_var_std(self, datetime_frame): @pytest.mark.parametrize("meth", ["sem", "var", "std"]) def test_numeric_only_flag(self, meth): # GH 9201 - df1 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"]).astype( - {"foo": object} - ) + df1 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"]) + # Cast to object to avoid implicit cast when setting entry to "100" below + df1 = df1.astype({"foo": object}) # set one entry to a number in str format df1.loc[0, "foo"] = "100" - df2 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"]).astype( - {"foo": object} - ) + df2 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"]) + # Cast to object to avoid implicit cast when setting entry to "a" below + df2 = df2.astype({"foo": object}) # set one entry to a non-number str df2.loc[0, "foo"] = "a" diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 710c2e43e8254..f16cf4dd27016 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -102,7 +102,9 @@ def test_groupby_with_timegrouper(self): index=date_range( "20130901", "20131205", freq="5D", name="Date", inclusive="left" ), - ).astype({"Buyer": object}) + ) + # Cast to object to avoid implicit cast when setting entry to "CarlCarlCarl" + expected = expected.astype({"Buyer": object}) expected.iloc[0, 0] = "CarlCarlCarl" expected.iloc[6, 0] = "CarlCarl" expected.iloc[18, 0] = "Joe" diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 8eed3eeecff84..18ad275083022 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -16,6 +16,7 @@ def test_replace_explicit_none(self): expected = pd.Series([0, 0, None], dtype=object) tm.assert_series_equal(result, expected) + # Cast column 2 to object to avoid implicit cast when setting entry to "" df = pd.DataFrame(np.zeros((3, 3))).astype({2: object}) df.iloc[2, 2] = "" result = df.replace("", None) From 37a605396a8206fe1f536a9e0d881bebab813f4c Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Fri, 30 Dec 2022 13:46:45 +0000 Subject: [PATCH 3/3] add one more explicit upcast --- pandas/tests/frame/methods/test_equals.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_equals.py b/pandas/tests/frame/methods/test_equals.py index dddd6c6d2eaf2..beec3e965d542 100644 --- a/pandas/tests/frame/methods/test_equals.py +++ b/pandas/tests/frame/methods/test_equals.py @@ -36,7 +36,8 @@ def test_equals(self): df1["start"] = date_range("2000-1-1", periods=10, freq="T") df1["end"] = date_range("2000-1-1", periods=10, freq="D") df1["diff"] = df1["end"] - df1["start"] - df1["bool"] = np.arange(10) % 3 == 0 + # Explicitly cast to object, to avoid implicit cast when setting np.nan + df1["bool"] = (np.arange(10) % 3 == 0).astype(object) df1.loc[::2] = np.nan df2 = df1.copy() assert df1["text"].equals(df2["text"])