Skip to content

Commit a3a0942

Browse files
sinhrksjreback
authored andcommitted
BUG: SparseDataFrame indexing may return normal Series
closes pandas-dev#12787
1 parent 610d3d5 commit a3a0942

File tree

5 files changed

+172
-6
lines changed

5 files changed

+172
-6
lines changed

doc/source/whatsnew/v0.18.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ These changes conform sparse handling to return the correct types and work to ma
7777
- Bug in ``SparseSeries.loc[]`` with list-like input raises ``TypeError`` (:issue:`10560`)
7878
- Bug in ``SparseSeries.iloc[]`` with scalar input may raise ``IndexError`` (:issue:`10560`)
7979
- Bug in ``SparseSeries.loc[]``, ``.iloc[]`` with ``slice`` returns ``SparseArray``, rather than ``SparseSeries`` (:issue:`10560`)
80+
- Bug in ``SparseDataFrame.loc[]``, ``.iloc[]`` may results in dense ``Series``, rather than ``SparseSeries`` (:issue:`12787`)
8081
- Bug in ``SparseSeries.__repr__`` raises ``TypeError`` when it is longer than ``max_rows`` (:issue:`10560`)
8182
- Bug in ``SparseSeries.shape`` ignores ``fill_value`` (:issue:`10452`)
8283
- Bug in ``SparseArray.to_dense()`` does not preserve ``dtype`` (:issue:`10648`)

pandas/core/frame.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1915,8 +1915,10 @@ def _ixs(self, i, axis=0):
19151915
# if we are a copy, mark as such
19161916
copy = (isinstance(new_values, np.ndarray) and
19171917
new_values.base is None)
1918-
result = Series(new_values, index=self.columns,
1919-
name=self.index[i], dtype=new_values.dtype)
1918+
result = self._constructor_sliced(new_values,
1919+
index=self.columns,
1920+
name=self.index[i],
1921+
dtype=new_values.dtype)
19201922
result._set_is_copy(self, copy=copy)
19211923
return result
19221924

pandas/core/generic.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -1752,7 +1752,6 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True):
17521752
new_index = self.index[loc]
17531753

17541754
if lib.isscalar(loc):
1755-
from pandas import Series
17561755
new_values = self._data.fast_xs(loc)
17571756

17581757
# may need to box a datelike-scalar
@@ -1763,9 +1762,9 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True):
17631762
if not is_list_like(new_values) or self.ndim == 1:
17641763
return _maybe_box_datetimelike(new_values)
17651764

1766-
result = Series(new_values, index=self.columns,
1767-
name=self.index[loc], copy=copy,
1768-
dtype=new_values.dtype)
1765+
result = self._constructor_sliced(new_values, index=self.columns,
1766+
name=self.index[loc], copy=copy,
1767+
dtype=new_values.dtype)
17691768

17701769
else:
17711770
result = self.iloc[loc]

pandas/sparse/frame.py

+2
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,8 @@ def wrapper(data=None, index=None, columns=None,
136136

137137
return wrapper
138138

139+
_constructor_sliced = SparseSeries
140+
139141
def _init_dict(self, data, index, columns, dtype=None):
140142
# pre-filter out columns if we passed it
141143
if columns is not None:

pandas/sparse/tests/test_indexing.py

+162
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,165 @@ def test_iloc_slice(self):
8282
orig = pd.Series([1, np.nan, np.nan, 3, np.nan])
8383
sparse = orig.to_sparse()
8484
tm.assert_sp_series_equal(sparse.iloc[2:], orig.iloc[2:].to_sparse())
85+
86+
87+
class TestSparseDataFrameIndexing(tm.TestCase):
88+
89+
_multiprocess_can_split_ = True
90+
91+
def test_loc(self):
92+
orig = pd.DataFrame([[1, np.nan, np.nan],
93+
[2, 3, np.nan],
94+
[np.nan, np.nan, 4]],
95+
columns=list('xyz'))
96+
sparse = orig.to_sparse()
97+
98+
self.assertEqual(sparse.loc[0, 'x'], 1)
99+
self.assertTrue(np.isnan(sparse.loc[1, 'z']))
100+
self.assertEqual(sparse.loc[2, 'z'], 4)
101+
102+
tm.assert_sp_series_equal(sparse.loc[0], orig.loc[0].to_sparse())
103+
tm.assert_sp_series_equal(sparse.loc[1], orig.loc[1].to_sparse())
104+
tm.assert_sp_series_equal(sparse.loc[2, :],
105+
orig.loc[2, :].to_sparse())
106+
tm.assert_sp_series_equal(sparse.loc[2, :],
107+
orig.loc[2, :].to_sparse())
108+
tm.assert_sp_series_equal(sparse.loc[:, 'y'],
109+
orig.loc[:, 'y'].to_sparse())
110+
tm.assert_sp_series_equal(sparse.loc[:, 'y'],
111+
orig.loc[:, 'y'].to_sparse())
112+
113+
result = sparse.loc[[1, 2]]
114+
exp = orig.loc[[1, 2]].to_sparse()
115+
tm.assert_sp_frame_equal(result, exp)
116+
117+
result = sparse.loc[[1, 2], :]
118+
exp = orig.loc[[1, 2], :].to_sparse()
119+
tm.assert_sp_frame_equal(result, exp)
120+
121+
result = sparse.loc[:, ['x', 'z']]
122+
exp = orig.loc[:, ['x', 'z']].to_sparse()
123+
tm.assert_sp_frame_equal(result, exp)
124+
125+
result = sparse.loc[[0, 2], ['x', 'z']]
126+
exp = orig.loc[[0, 2], ['x', 'z']].to_sparse()
127+
tm.assert_sp_frame_equal(result, exp)
128+
129+
# exceeds the bounds
130+
result = sparse.loc[[1, 3, 4, 5]]
131+
exp = orig.loc[[1, 3, 4, 5]].to_sparse()
132+
tm.assert_sp_frame_equal(result, exp)
133+
134+
# dense array
135+
result = sparse.loc[orig.x % 2 == 1]
136+
exp = orig.loc[orig.x % 2 == 1].to_sparse()
137+
tm.assert_sp_frame_equal(result, exp)
138+
139+
# sparse array (actuary it coerces to normal Series)
140+
result = sparse.loc[sparse.x % 2 == 1]
141+
exp = orig.loc[orig.x % 2 == 1].to_sparse()
142+
tm.assert_sp_frame_equal(result, exp)
143+
144+
def test_loc_index(self):
145+
orig = pd.DataFrame([[1, np.nan, np.nan],
146+
[2, 3, np.nan],
147+
[np.nan, np.nan, 4]],
148+
index=list('abc'), columns=list('xyz'))
149+
sparse = orig.to_sparse()
150+
151+
self.assertEqual(sparse.loc['a', 'x'], 1)
152+
self.assertTrue(np.isnan(sparse.loc['b', 'z']))
153+
self.assertEqual(sparse.loc['c', 'z'], 4)
154+
155+
tm.assert_sp_series_equal(sparse.loc['a'], orig.loc['a'].to_sparse())
156+
tm.assert_sp_series_equal(sparse.loc['b'], orig.loc['b'].to_sparse())
157+
tm.assert_sp_series_equal(sparse.loc['b', :],
158+
orig.loc['b', :].to_sparse())
159+
tm.assert_sp_series_equal(sparse.loc['b', :],
160+
orig.loc['b', :].to_sparse())
161+
162+
tm.assert_sp_series_equal(sparse.loc[:, 'z'],
163+
orig.loc[:, 'z'].to_sparse())
164+
tm.assert_sp_series_equal(sparse.loc[:, 'z'],
165+
orig.loc[:, 'z'].to_sparse())
166+
167+
result = sparse.loc[['a', 'b']]
168+
exp = orig.loc[['a', 'b']].to_sparse()
169+
tm.assert_sp_frame_equal(result, exp)
170+
171+
result = sparse.loc[['a', 'b'], :]
172+
exp = orig.loc[['a', 'b'], :].to_sparse()
173+
tm.assert_sp_frame_equal(result, exp)
174+
175+
result = sparse.loc[:, ['x', 'z']]
176+
exp = orig.loc[:, ['x', 'z']].to_sparse()
177+
tm.assert_sp_frame_equal(result, exp)
178+
179+
result = sparse.loc[['c', 'a'], ['x', 'z']]
180+
exp = orig.loc[['c', 'a'], ['x', 'z']].to_sparse()
181+
tm.assert_sp_frame_equal(result, exp)
182+
183+
# dense array
184+
result = sparse.loc[orig.x % 2 == 1]
185+
exp = orig.loc[orig.x % 2 == 1].to_sparse()
186+
tm.assert_sp_frame_equal(result, exp)
187+
188+
# sparse array (actuary it coerces to normal Series)
189+
result = sparse.loc[sparse.x % 2 == 1]
190+
exp = orig.loc[orig.x % 2 == 1].to_sparse()
191+
tm.assert_sp_frame_equal(result, exp)
192+
193+
def test_loc_slice(self):
194+
orig = pd.DataFrame([[1, np.nan, np.nan],
195+
[2, 3, np.nan],
196+
[np.nan, np.nan, 4]],
197+
columns=list('xyz'))
198+
sparse = orig.to_sparse()
199+
tm.assert_sp_frame_equal(sparse.loc[2:], orig.loc[2:].to_sparse())
200+
201+
def test_iloc(self):
202+
orig = pd.DataFrame([[1, np.nan, np.nan],
203+
[2, 3, np.nan],
204+
[np.nan, np.nan, 4]])
205+
sparse = orig.to_sparse()
206+
207+
self.assertEqual(sparse.iloc[1, 1], 3)
208+
self.assertTrue(np.isnan(sparse.iloc[2, 0]))
209+
210+
tm.assert_sp_series_equal(sparse.iloc[0], orig.loc[0].to_sparse())
211+
tm.assert_sp_series_equal(sparse.iloc[1], orig.loc[1].to_sparse())
212+
tm.assert_sp_series_equal(sparse.iloc[2, :],
213+
orig.iloc[2, :].to_sparse())
214+
tm.assert_sp_series_equal(sparse.iloc[2, :],
215+
orig.iloc[2, :].to_sparse())
216+
tm.assert_sp_series_equal(sparse.iloc[:, 1],
217+
orig.iloc[:, 1].to_sparse())
218+
tm.assert_sp_series_equal(sparse.iloc[:, 1],
219+
orig.iloc[:, 1].to_sparse())
220+
221+
result = sparse.iloc[[1, 2]]
222+
exp = orig.iloc[[1, 2]].to_sparse()
223+
tm.assert_sp_frame_equal(result, exp)
224+
225+
result = sparse.iloc[[1, 2], :]
226+
exp = orig.iloc[[1, 2], :].to_sparse()
227+
tm.assert_sp_frame_equal(result, exp)
228+
229+
result = sparse.iloc[:, [1, 0]]
230+
exp = orig.iloc[:, [1, 0]].to_sparse()
231+
tm.assert_sp_frame_equal(result, exp)
232+
233+
result = sparse.iloc[[2], [1, 0]]
234+
exp = orig.iloc[[2], [1, 0]].to_sparse()
235+
tm.assert_sp_frame_equal(result, exp)
236+
237+
with tm.assertRaises(IndexError):
238+
sparse.iloc[[1, 3, 5]]
239+
240+
def test_iloc_slice(self):
241+
orig = pd.DataFrame([[1, np.nan, np.nan],
242+
[2, 3, np.nan],
243+
[np.nan, np.nan, 4]],
244+
columns=list('xyz'))
245+
sparse = orig.to_sparse()
246+
tm.assert_sp_frame_equal(sparse.iloc[2:], orig.iloc[2:].to_sparse())

0 commit comments

Comments
 (0)