Skip to content

Commit 3cecb52

Browse files
TomAugspurgertm9k1
authored andcommitted
API: DataFrame.__getitem__ returns Series for sparse column (pandas-dev#23561)
closes pandas-dev#23559
1 parent f4f56bb commit 3cecb52

File tree

5 files changed

+23
-34
lines changed

5 files changed

+23
-34
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,7 @@ changes were made:
563563
- The result of concatenating a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``.
564564
- ``SparseDataFrame.combine`` and ``DataFrame.combine_first`` no longer supports combining a sparse column with a dense column while preserving the sparse subtype. The result will be an object-dtype SparseArray.
565565
- Setting :attr:`SparseArray.fill_value` to a fill value with a different dtype is now allowed.
566+
- ``DataFrame[column]`` is now a :class:`Series` with sparse values, rather than a :class:`SparseSeries`, when slicing a single column with sparse values (:issue:`23559`).
566567

567568
Some new warnings are issued for operations that require or are likely to materialize a large dense array:
568569

pandas/core/dtypes/concat.py

-21
Original file line numberDiff line numberDiff line change
@@ -101,27 +101,6 @@ def _get_frame_result_type(result, objs):
101101
ABCSparseDataFrame))
102102

103103

104-
def _get_sliced_frame_result_type(data, obj):
105-
"""
106-
return appropriate class of Series. When data is sparse
107-
it will return a SparseSeries, otherwise it will return
108-
the Series.
109-
110-
Parameters
111-
----------
112-
data : array-like
113-
obj : DataFrame
114-
115-
Returns
116-
-------
117-
Series or SparseSeries
118-
"""
119-
if is_sparse(data):
120-
from pandas.core.sparse.api import SparseSeries
121-
return SparseSeries
122-
return obj._constructor_sliced
123-
124-
125104
def _concat_compat(to_concat, axis=0):
126105
"""
127106
provide concatenation of an array of arrays each of which is a single

pandas/core/frame.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@
7272
is_iterator,
7373
is_sequence,
7474
is_named_tuple)
75-
from pandas.core.dtypes.concat import _get_sliced_frame_result_type
7675
from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCMultiIndex
7776
from pandas.core.dtypes.missing import isna, notna
7877

@@ -3241,7 +3240,7 @@ def _box_item_values(self, key, values):
32413240

32423241
def _box_col_values(self, values, items):
32433242
""" provide boxed values for a column """
3244-
klass = _get_sliced_frame_result_type(values, self)
3243+
klass = self._constructor_sliced
32453244
return klass(values, index=self.index, name=items, fastpath=True)
32463245

32473246
def __setitem__(self, key, value):

pandas/tests/frame/test_indexing.py

+21-6
Original file line numberDiff line numberDiff line change
@@ -2277,19 +2277,34 @@ def test_getitem_ix_float_duplicates(self):
22772277
expect = df.iloc[[1, -1], 0]
22782278
assert_series_equal(df.loc[0.2, 'a'], expect)
22792279

2280+
def test_getitem_sparse_column(self):
2281+
# https://github.com/pandas-dev/pandas/issues/23559
2282+
data = pd.SparseArray([0, 1])
2283+
df = pd.DataFrame({"A": data})
2284+
expected = pd.Series(data, name="A")
2285+
result = df['A']
2286+
tm.assert_series_equal(result, expected)
2287+
2288+
result = df.iloc[:, 0]
2289+
tm.assert_series_equal(result, expected)
2290+
2291+
result = df.loc[:, 'A']
2292+
tm.assert_series_equal(result, expected)
2293+
22802294
def test_setitem_with_sparse_value(self):
22812295
# GH8131
22822296
df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]})
2283-
sp_series = pd.Series([0, 0, 1]).to_sparse(fill_value=0)
2284-
df['new_column'] = sp_series
2285-
assert_series_equal(df['new_column'], sp_series, check_names=False)
2297+
sp_array = pd.SparseArray([0, 0, 1])
2298+
df['new_column'] = sp_array
2299+
assert_series_equal(df['new_column'],
2300+
pd.Series(sp_array, name='new_column'),
2301+
check_names=False)
22862302

22872303
def test_setitem_with_unaligned_sparse_value(self):
22882304
df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]})
2289-
sp_series = (pd.Series([0, 0, 1], index=[2, 1, 0])
2290-
.to_sparse(fill_value=0))
2305+
sp_series = pd.Series(pd.SparseArray([0, 0, 1]), index=[2, 1, 0])
22912306
df['new_column'] = sp_series
2292-
exp = pd.SparseSeries([1, 0, 0], name='new_column')
2307+
exp = pd.Series(pd.SparseArray([1, 0, 0]), name='new_column')
22932308
assert_series_equal(df['new_column'], exp)
22942309

22952310
def test_setitem_with_unaligned_tz_aware_datetime_column(self):

pandas/tests/sparse/series/test_series.py

-5
Original file line numberDiff line numberDiff line change
@@ -160,11 +160,6 @@ def test_construct_DataFrame_with_sp_series(self):
160160
df.dtypes
161161
str(df)
162162

163-
tm.assert_sp_series_equal(df['col'], self.bseries, check_names=False)
164-
165-
result = df.iloc[:, 0]
166-
tm.assert_sp_series_equal(result, self.bseries, check_names=False)
167-
168163
# blocking
169164
expected = Series({'col': 'float64:sparse'})
170165
result = df.ftypes

0 commit comments

Comments
 (0)