Skip to content

Commit 02a6acf

Browse files
authored
BUG: incorrect type when indexing sparse dataframe with iterable (#34908)
* TST: regression tests for indexing sparse dataframe with iterable closes #34526
1 parent 42a6d44 commit 02a6acf

File tree

4 files changed

+54
-20
lines changed

4 files changed

+54
-20
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1124,6 +1124,7 @@ Sparse
11241124
- Bug where :class:`DataFrame` containing :class:`SparseArray` filled with ``NaN`` when indexed by a list-like (:issue:`27781`, :issue:`29563`)
11251125
- The repr of :class:`SparseDtype` now includes the repr of its ``fill_value`` attribute. Previously it used ``fill_value``'s string representation (:issue:`34352`)
11261126
- Bug where empty :class:`DataFrame` could not be cast to :class:`SparseDtype` (:issue:`33113`)
1127+
- Bug in :meth:`arrays.SparseArray` was returning the incorrect type when indexing a sparse dataframe with an iterable (:issue:`34526`, :issue:`34540`)
11271128

11281129
ExtensionArray
11291130
^^^^^^^^^^^^^^

pandas/core/arrays/sparse/array.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -866,11 +866,8 @@ def _take_with_fill(self, indices, fill_value=None) -> np.ndarray:
866866

867867
if self.sp_index.npoints == 0:
868868
# Avoid taking from the empty self.sp_values
869-
taken = np.full(
870-
sp_indexer.shape,
871-
fill_value=fill_value,
872-
dtype=np.result_type(type(fill_value)),
873-
)
869+
_dtype = np.result_type(self.dtype.subtype, type(fill_value))
870+
taken = np.full(sp_indexer.shape, fill_value=fill_value, dtype=_dtype)
874871
else:
875872
taken = self.sp_values.take(sp_indexer)
876873

pandas/tests/frame/indexing/test_indexing.py

-15
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
notna,
2222
)
2323
import pandas._testing as tm
24-
from pandas.arrays import SparseArray
2524
import pandas.core.common as com
2625
from pandas.core.indexing import IndexingError
2726

@@ -1907,20 +1906,6 @@ def test_getitem_ix_float_duplicates(self):
19071906
expect = df.iloc[[1, -1], 0]
19081907
tm.assert_series_equal(df.loc[0.2, "a"], expect)
19091908

1910-
def test_getitem_sparse_column(self):
1911-
# https://github.com/pandas-dev/pandas/issues/23559
1912-
data = SparseArray([0, 1])
1913-
df = pd.DataFrame({"A": data})
1914-
expected = pd.Series(data, name="A")
1915-
result = df["A"]
1916-
tm.assert_series_equal(result, expected)
1917-
1918-
result = df.iloc[:, 0]
1919-
tm.assert_series_equal(result, expected)
1920-
1921-
result = df.loc[:, "A"]
1922-
tm.assert_series_equal(result, expected)
1923-
19241909
def test_setitem_with_unaligned_tz_aware_datetime_column(self):
19251910
# GH 12981
19261911
# Assignment of unaligned offset-aware datetime series.
+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import numpy as np
2+
import pytest
3+
4+
import pandas.util._test_decorators as td
5+
6+
import pandas as pd
7+
import pandas._testing as tm
8+
from pandas.arrays import SparseArray
9+
from pandas.core.arrays.sparse import SparseDtype
10+
11+
12+
class TestSparseDataFrameIndexing:
13+
def test_getitem_sparse_column(self):
14+
# https://github.com/pandas-dev/pandas/issues/23559
15+
data = SparseArray([0, 1])
16+
df = pd.DataFrame({"A": data})
17+
expected = pd.Series(data, name="A")
18+
result = df["A"]
19+
tm.assert_series_equal(result, expected)
20+
21+
result = df.iloc[:, 0]
22+
tm.assert_series_equal(result, expected)
23+
24+
result = df.loc[:, "A"]
25+
tm.assert_series_equal(result, expected)
26+
27+
@pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
28+
@pytest.mark.parametrize("dtype", [np.int64, np.float64, complex])
29+
@td.skip_if_no_scipy
30+
def test_locindexer_from_spmatrix(self, spmatrix_t, dtype):
31+
import scipy.sparse
32+
33+
spmatrix_t = getattr(scipy.sparse, spmatrix_t)
34+
35+
# The bug is triggered by a sparse matrix with purely sparse columns. So the
36+
# recipe below generates a rectangular matrix of dimension (5, 7) where all the
37+
# diagonal cells are ones, meaning the last two columns are purely sparse.
38+
rows, cols = 5, 7
39+
spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype)
40+
df = pd.DataFrame.sparse.from_spmatrix(spmatrix)
41+
42+
# regression test for #34526
43+
itr_idx = range(2, rows)
44+
result = df.loc[itr_idx].values
45+
expected = spmatrix.toarray()[itr_idx]
46+
tm.assert_numpy_array_equal(result, expected)
47+
48+
# regression test for #34540
49+
result = df.loc[itr_idx].dtypes.values
50+
expected = np.full(cols, SparseDtype(dtype, fill_value=0))
51+
tm.assert_numpy_array_equal(result, expected)

0 commit comments

Comments
 (0)