From 183870b4cd83ed5e43129218fc320bf47acb88df Mon Sep 17 00:00:00 2001 From: Eric Leerssen Date: Tue, 23 Mar 2021 20:47:37 +0100 Subject: [PATCH 1/5] TST Add test for loc on sparse dataframes --- pandas/tests/indexing/test_loc.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 85accac5a8235..a59319d31fec2 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1163,6 +1163,25 @@ def test_loc_getitem_listlike_all_retains_sparse(self): result = df.loc[[0, 1]] tm.assert_frame_equal(result, df) + @td.skip_if_no_scipy + def test_loc_sparse_frame(self): + # GH34687 + from scipy.sparse import eye + + df = DataFrame.sparse.from_spmatrix(eye(5)) + result = df.loc[range(2)] + expected = DataFrame( + [[1.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0]], + dtype=SparseDtype("float64", 0.0), + ) + tm.assert_frame_equal(result, expected) + + result = df.loc[range(2)].loc[range(1)] + expected = DataFrame( + [[1.0, 0.0, 0.0, 0.0, 0.0]], dtype=SparseDtype("float64", 0.0) + ) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index]) def test_loc_getitem_iterable(self, float_frame, key_type): idx = key_type(["A", "B", "C"]) From d46710d49d1b7fa62620b3beb5e2ed3271be9011 Mon Sep 17 00:00:00 2001 From: Eric Leerssen Date: Sun, 28 Mar 2021 22:29:10 +0200 Subject: [PATCH 2/5] TST Add test for sparse series --- pandas/tests/indexing/test_loc.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index a59319d31fec2..e5f536b5e7cb7 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1164,7 +1164,7 @@ def test_loc_getitem_listlike_all_retains_sparse(self): tm.assert_frame_equal(result, df) @td.skip_if_no_scipy - def test_loc_sparse_frame(self): + def test_loc_getitem_sparse_frame(self): # GH34687 from scipy.sparse import eye @@ -1182,6 +1182,17 @@ def test_loc_sparse_frame(self): ) tm.assert_frame_equal(result, expected) + ser = df[0] + ser.name = None + + result = ser.loc[range(2)] + expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0)) + tm.assert_series_equal(result, expected) + + result = ser.loc[range(3)].loc[range(2)] + expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0)) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index]) def test_loc_getitem_iterable(self, float_frame, key_type): idx = key_type(["A", "B", "C"]) From b13e6cdc5a4f80e6cb7ad1c14bc62c03aa537890 Mon Sep 17 00:00:00 2001 From: Eric Leerssen Date: Tue, 30 Mar 2021 22:02:16 +0200 Subject: [PATCH 3/5] TST move sparse series tests --- pandas/tests/indexing/test_loc.py | 11 ----------- pandas/tests/series/indexing/test_indexing.py | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index e5f536b5e7cb7..bff011ba9e8e7 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1182,17 +1182,6 @@ def test_loc_getitem_sparse_frame(self): ) tm.assert_frame_equal(result, expected) - ser = df[0] - ser.name = None - - result = ser.loc[range(2)] - expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0)) - tm.assert_series_equal(result, expected) - - result = ser.loc[range(3)].loc[range(2)] - expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0)) - tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index]) def test_loc_getitem_iterable(self, float_frame, key_type): idx = key_type(["A", "B", "C"]) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 30c37113f6b8f..cb40463d0dd63 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -15,6 +15,7 @@ date_range, period_range, timedelta_range, + SparseDtype, ) import pandas._testing as tm @@ -377,3 +378,17 @@ def test_frozenset_index(): assert s[idx1] == 2 s[idx1] = 3 assert s[idx1] == 3 + + +def test_loc_getitem_sparse_series(): + # GH34687 + + s = Series([1.0, 0.0, 0.0, 0.0, 0.0], dtype=SparseDtype("float64", 0.0)) + + result = s.loc[range(2)] + expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0)) + tm.assert_series_equal(result, expected) + + result = s.loc[range(3)].loc[range(2)] + expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0)) + tm.assert_series_equal(result, expected) From 362ae26feded3a692da6fdeff7a9d23354e159c4 Mon Sep 17 00:00:00 2001 From: Eric Leerssen Date: Wed, 31 Mar 2021 21:59:31 +0200 Subject: [PATCH 4/5] TST Fix failing test --- pandas/tests/series/indexing/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index cb40463d0dd63..d9f966633a9f5 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -10,12 +10,12 @@ IndexSlice, MultiIndex, Series, + SparseDtype, Timedelta, Timestamp, date_range, period_range, timedelta_range, - SparseDtype, ) import pandas._testing as tm From 6b08715a825c5b184366062fa1c02077e29edfa5 Mon Sep 17 00:00:00 2001 From: Eric Leerssen Date: Thu, 1 Apr 2021 22:01:55 +0200 Subject: [PATCH 5/5] TST Move back sparse series tests --- pandas/tests/indexing/test_loc.py | 12 ++++++++++++ pandas/tests/series/indexing/test_indexing.py | 15 --------------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index bff011ba9e8e7..e6bc6e5a9ea38 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1182,6 +1182,18 @@ def test_loc_getitem_sparse_frame(self): ) tm.assert_frame_equal(result, expected) + def test_loc_getitem_sparse_series(self): + # GH34687 + s = Series([1.0, 0.0, 0.0, 0.0, 0.0], dtype=SparseDtype("float64", 0.0)) + + result = s.loc[range(2)] + expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0)) + tm.assert_series_equal(result, expected) + + result = s.loc[range(3)].loc[range(2)] + expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0)) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index]) def test_loc_getitem_iterable(self, float_frame, key_type): idx = key_type(["A", "B", "C"]) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index d9f966633a9f5..30c37113f6b8f 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -10,7 +10,6 @@ IndexSlice, MultiIndex, Series, - SparseDtype, Timedelta, Timestamp, date_range, @@ -378,17 +377,3 @@ def test_frozenset_index(): assert s[idx1] == 2 s[idx1] = 3 assert s[idx1] == 3 - - -def test_loc_getitem_sparse_series(): - # GH34687 - - s = Series([1.0, 0.0, 0.0, 0.0, 0.0], dtype=SparseDtype("float64", 0.0)) - - result = s.loc[range(2)] - expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0)) - tm.assert_series_equal(result, expected) - - result = s.loc[range(3)].loc[range(2)] - expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0)) - tm.assert_series_equal(result, expected)