Skip to content

Commit 17ab4f5

Browse files
committed
API: DataFrame.__getitem__ returns Series for sparse column
Breaking API change for ```python In [1]: import pandas as pd In [2]: df = pd.DataFrame({"A": pd.SparseSeries([1, 0])}) In [3]: type(df['A']) Out[3]: pandas.core.sparse.series.SparseSeries ``` Now Out[3] is a Series. closes pandas-dev#23559
1 parent 28a42da commit 17ab4f5

File tree

5 files changed

+23
-34
lines changed

5 files changed

+23
-34
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,7 @@ changes were made:
560560
- The result of concatenating a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``.
561561
- ``SparseDataFrame.combine`` and ``DataFrame.combine_first`` no longer supports combining a sparse column with a dense column while preserving the sparse subtype. The result will be an object-dtype SparseArray.
562562
- Setting :attr:`SparseArray.fill_value` to a fill value with a different dtype is now allowed.
563+
- ``DataFrame[column]`` is now a :class:`Series` with sparse values, rather than a :class:`SparseSeries`, when slicing a single column with sparse values (:issue:`23559`).
563564

564565
Some new warnings are issued for operations that require or are likely to materialize a large dense array:
565566

pandas/core/dtypes/concat.py

-21
Original file line numberDiff line numberDiff line change
@@ -101,27 +101,6 @@ def _get_frame_result_type(result, objs):
101101
ABCSparseDataFrame))
102102

103103

104-
def _get_sliced_frame_result_type(data, obj):
105-
"""
106-
return appropriate class of Series. When data is sparse
107-
it will return a SparseSeries, otherwise it will return
108-
the Series.
109-
110-
Parameters
111-
----------
112-
data : array-like
113-
obj : DataFrame
114-
115-
Returns
116-
-------
117-
Series or SparseSeries
118-
"""
119-
if is_sparse(data):
120-
from pandas.core.sparse.api import SparseSeries
121-
return SparseSeries
122-
return obj._constructor_sliced
123-
124-
125104
def _concat_compat(to_concat, axis=0):
126105
"""
127106
provide concatenation of an array of arrays each of which is a single

pandas/core/frame.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@
7272
is_iterator,
7373
is_sequence,
7474
is_named_tuple)
75-
from pandas.core.dtypes.concat import _get_sliced_frame_result_type
7675
from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCMultiIndex
7776
from pandas.core.dtypes.missing import isna, notna
7877

@@ -3225,7 +3224,7 @@ def _box_item_values(self, key, values):
32253224

32263225
def _box_col_values(self, values, items):
32273226
""" provide boxed values for a column """
3228-
klass = _get_sliced_frame_result_type(values, self)
3227+
klass = self._constructor_sliced
32293228
return klass(values, index=self.index, name=items, fastpath=True)
32303229

32313230
def __setitem__(self, key, value):

pandas/tests/frame/test_indexing.py

+21-6
Original file line numberDiff line numberDiff line change
@@ -2278,19 +2278,34 @@ def test_getitem_ix_float_duplicates(self):
22782278
expect = df.iloc[[1, -1], 0]
22792279
assert_series_equal(df.loc[0.2, 'a'], expect)
22802280

2281+
def test_getitem_sparse_column(self):
2282+
# https://github.com/pandas-dev/pandas/issues/23559
2283+
data = pd.SparseArray([0, 1])
2284+
df = pd.DataFrame({"A": data})
2285+
expected = pd.Series(data, name="A")
2286+
result = df['A']
2287+
tm.assert_series_equal(result, expected)
2288+
2289+
result = df.iloc[:, 0]
2290+
tm.assert_series_equal(result, expected)
2291+
2292+
result = df.loc[:, 'A']
2293+
tm.assert_series_equal(result, expected)
2294+
22812295
def test_setitem_with_sparse_value(self):
22822296
# GH8131
22832297
df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]})
2284-
sp_series = pd.Series([0, 0, 1]).to_sparse(fill_value=0)
2285-
df['new_column'] = sp_series
2286-
assert_series_equal(df['new_column'], sp_series, check_names=False)
2298+
sp_array = pd.SparseArray([0, 0, 1])
2299+
df['new_column'] = sp_array
2300+
assert_series_equal(df['new_column'],
2301+
pd.Series(sp_array, name='new_column'),
2302+
check_names=False)
22872303

22882304
def test_setitem_with_unaligned_sparse_value(self):
22892305
df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]})
2290-
sp_series = (pd.Series([0, 0, 1], index=[2, 1, 0])
2291-
.to_sparse(fill_value=0))
2306+
sp_series = pd.Series(pd.SparseArray([0, 0, 1]), index=[2, 1, 0])
22922307
df['new_column'] = sp_series
2293-
exp = pd.SparseSeries([1, 0, 0], name='new_column')
2308+
exp = pd.Series(pd.SparseArray([1, 0, 0]), name='new_column')
22942309
assert_series_equal(df['new_column'], exp)
22952310

22962311
def test_setitem_with_unaligned_tz_aware_datetime_column(self):

pandas/tests/sparse/series/test_series.py

-5
Original file line numberDiff line numberDiff line change
@@ -160,11 +160,6 @@ def test_construct_DataFrame_with_sp_series(self):
160160
df.dtypes
161161
str(df)
162162

163-
tm.assert_sp_series_equal(df['col'], self.bseries, check_names=False)
164-
165-
result = df.iloc[:, 0]
166-
tm.assert_sp_series_equal(result, self.bseries, check_names=False)
167-
168163
# blocking
169164
expected = Series({'col': 'float64:sparse'})
170165
result = df.ftypes

0 commit comments

Comments
 (0)