From 12d0b4db65fb681b7aca30209711ceb997f3d4b7 Mon Sep 17 00:00:00 2001 From: matteosantama Date: Tue, 19 May 2020 23:24:10 +0000 Subject: [PATCH 1/5] TST: GH28813 test .diff() on Sparse dtype --- pandas/tests/frame/methods/test_diff.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index 6a9248e1cba1e..4ab0e80b73d90 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -36,6 +36,14 @@ def test_diff(self, datetime_frame): ).astype("float64") tm.assert_frame_equal(result, expected) + # Result should be the same for sparse df, see GH28813 + arr = [[0, 1], [1, 0]] + normal = pd.DataFrame(arr) + sparse = pd.DataFrame(arr, dtype='Sparse[int]') + # we don't check dtype because one is sparse and the other isn't + tm.assert_frame_equal(normal.diff(), sparse.diff(), check_dtype=False) + + @pytest.mark.parametrize("tz", [None, "UTC"]) def test_diff_datetime_axis0(self, tz): # GH#18578 From 02c4a85908d8cb6906feeb539be5b694e746a1a4 Mon Sep 17 00:00:00 2001 From: matteosantama Date: Wed, 20 May 2020 01:02:42 +0000 Subject: [PATCH 2/5] TST: GH28813 test .diff() on Sparse dtype --- pandas/tests/frame/methods/test_diff.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index 4ab0e80b73d90..2640e2192d798 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -39,11 +39,10 @@ def test_diff(self, datetime_frame): # Result should be the same for sparse df, see GH28813 arr = [[0, 1], [1, 0]] normal = pd.DataFrame(arr) - sparse = pd.DataFrame(arr, dtype='Sparse[int]') + sparse = pd.DataFrame(arr, dtype="Sparse[int]") # we don't check dtype because one is sparse and the other isn't tm.assert_frame_equal(normal.diff(), sparse.diff(), check_dtype=False) - @pytest.mark.parametrize("tz", [None, "UTC"]) def test_diff_datetime_axis0(self, tz): # GH#18578 From 7e3256ba22eb4957c35ff582282029f0904332b7 Mon Sep 17 00:00:00 2001 From: matteosantama Date: Wed, 20 May 2020 16:38:12 +0000 Subject: [PATCH 3/5] TST: GH28813 pull sparse diff() test into its own function --- pandas/tests/frame/methods/test_diff.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index 2640e2192d798..e876e40aa2eb1 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -36,13 +36,6 @@ def test_diff(self, datetime_frame): ).astype("float64") tm.assert_frame_equal(result, expected) - # Result should be the same for sparse df, see GH28813 - arr = [[0, 1], [1, 0]] - normal = pd.DataFrame(arr) - sparse = pd.DataFrame(arr, dtype="Sparse[int]") - # we don't check dtype because one is sparse and the other isn't - tm.assert_frame_equal(normal.diff(), sparse.diff(), check_dtype=False) - @pytest.mark.parametrize("tz", [None, "UTC"]) def test_diff_datetime_axis0(self, tz): # GH#18578 @@ -165,3 +158,14 @@ def test_diff_axis1_mixed_dtypes_negative_periods(self): result = df.diff(axis=1, periods=-1) tm.assert_frame_equal(result, expected) + + def test_diff_sparse(self): + # GH#28813 .diff() should work for sparse dataframes as well + sparse_df = pd.DataFrame([[0, 1], [1, 0]], dtype="Sparse[int]") + + result = sparse_df.diff() + expected = pd.DataFrame( + [[np.nan, np.nan], [1.0, -1.0]], dtype=pd.SparseDtype("float", 0.0) + ) + + tm.assert_frame_equal(result, expected) From a852a57b0d04a85268da3a28c1df295626a2f821 Mon Sep 17 00:00:00 2001 From: matteosantama Date: Wed, 27 May 2020 02:17:05 +0000 Subject: [PATCH 4/5] CLN: GH29547 update formatting to f-strings --- pandas/util/_validators.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index fbb44408f01be..9abec7fd4573b 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -295,13 +295,13 @@ def validate_axis_style_args(data, args, kwargs, arg_name, method_name): raise TypeError(msg) msg = ( - "Interpreting call\n\t'.{method_name}(a, b)' as " - "\n\t'.{method_name}(index=a, columns=b)'.\nUse named " - "arguments to remove any ambiguity. In the future, using " - "positional arguments for 'index' or 'columns' will raise " - "a 'TypeError'." + f"Interpreting call\n\t'.{method_name}(a, b)' as " + f"\n\t'.{method_name}(index=a, columns=b)'.\nUse named " + f"arguments to remove any ambiguity. In the future, using " + f"positional arguments for 'index' or 'columns' will raise " + f"a 'TypeError'." ) - warnings.warn(msg.format(method_name=method_name), FutureWarning, stacklevel=4) + warnings.warn(msg, FutureWarning, stacklevel=4) out[data._get_axis_name(0)] = args[0] out[data._get_axis_name(1)] = args[1] else: @@ -370,12 +370,15 @@ def validate_percentile(q: Union[float, Iterable[float]]) -> np.ndarray: ------ ValueError if percentiles are not in given interval([0, 1]). """ - msg = "percentiles should all be in the interval [0, 1]. Try {0} instead." q_arr = np.asarray(q) + msg = ( + f"percentiles should all be in the interval [0, 1]." + f"Try {q_arr / 100.0} instead." + ) if q_arr.ndim == 0: if not 0 <= q_arr <= 1: - raise ValueError(msg.format(q_arr / 100.0)) + raise ValueError(msg) else: if not all(0 <= qs <= 1 for qs in q_arr): - raise ValueError(msg.format(q_arr / 100.0)) + raise ValueError(msg) return q_arr From 5ba57d00415afad1f645a12daee90194babc4521 Mon Sep 17 00:00:00 2001 From: matteosantama Date: Wed, 27 May 2020 16:38:52 +0000 Subject: [PATCH 5/5] Remove unnecessary f --- pandas/util/_validators.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 9abec7fd4573b..bb6c6de441558 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -297,9 +297,9 @@ def validate_axis_style_args(data, args, kwargs, arg_name, method_name): msg = ( f"Interpreting call\n\t'.{method_name}(a, b)' as " f"\n\t'.{method_name}(index=a, columns=b)'.\nUse named " - f"arguments to remove any ambiguity. In the future, using " - f"positional arguments for 'index' or 'columns' will raise " - f"a 'TypeError'." + "arguments to remove any ambiguity. In the future, using " + "positional arguments for 'index' or 'columns' will raise " + "a 'TypeError'." ) warnings.warn(msg, FutureWarning, stacklevel=4) out[data._get_axis_name(0)] = args[0] @@ -372,7 +372,7 @@ def validate_percentile(q: Union[float, Iterable[float]]) -> np.ndarray: """ q_arr = np.asarray(q) msg = ( - f"percentiles should all be in the interval [0, 1]." + "percentiles should all be in the interval [0, 1]." f"Try {q_arr / 100.0} instead." ) if q_arr.ndim == 0: