Skip to content

Commit 2919be4

Browse files
committed
Squashed commit of the following:
commit 96508ca Author: Tom Augspurger <[email protected]> Date: Wed Mar 13 08:23:21 2019 -0500 API: concat on sparse values API breaking change to `concat(List[DataFrame[Sparse]])` to return a DataFrame with sparse values, rather than a SparseDataFrame. Doing an outright break, rather than deprecation, because I have a followup PR deprecating SparseDataFrame. We return this internally in a few places (e.g. get_dummies on all-sparse data). Closes pandas-dev#25702
1 parent d6b2cdc commit 2919be4

File tree

4 files changed

+55
-2
lines changed

4 files changed

+55
-2
lines changed

doc/source/whatsnew/v0.25.0.rst

+36
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,42 @@ is respected in indexing. (:issue:`24076`, :issue:`16785`)
6666
df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific'))
6767
df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00']
6868

69+
Concatenating Sparse Values
70+
^^^^^^^^^^^^^^^^^^^^^^^^^^^
71+
72+
When passed DataFrames whose values are sparse, :func:`concat` will now return a
73+
Series or DataFrame with sparse values, rather than a ``SparseDataFrame`` (:issue:`25702`).
74+
75+
.. ipython:: python
76+
77+
df = pd.DataFrame({"A": pd.SparseArray([0, 1])})
78+
79+
*Previous Behavior:*
80+
81+
.. code-block:: ipython
82+
83+
In [2]: type(pd.concat([df, df]))
84+
pandas.core.sparse.frame.SparseDataFrame
85+
86+
*New Behavior:*
87+
88+
.. ipython:: python
89+
90+
type(pd.concat([df, df]))
91+
92+
93+
This now matches the existing behavior of :class:`concat` on ``Series`` with sparse values.
94+
:func:`concat` will continue to return a ``SparseDataFrame`` when all the values
95+
are instances of ``SparseDataFrame``.
96+
97+
This change also affects routines using :func:`concat` internally, like :func:`get_dummies`,
98+
which now returns a :class:`DataFrame` in all cases (previously a ``SparseDataFrame`` was
99+
returned if all the columns were dummy encoded, and a :class:`DataFrame` otherwise).
100+
101+
Providing any ``SparseSeries`` or ``SparseDataFrame`` to :func:`concat` will
102+
cause a ``SparseSeries`` or ``SparseDataFrame`` to be returned, as before.
103+
104+
69105
.. _whatsnew_0250.api_breaking.deps:
70106

71107
Increased minimum versions for dependencies

pandas/core/dtypes/concat.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,7 @@ def _get_frame_result_type(result, objs):
8989
"""
9090

9191
if (result.blocks and (
92-
all(is_sparse(b) for b in result.blocks) or
93-
all(isinstance(obj, ABCSparseDataFrame) for obj in objs))):
92+
any(isinstance(obj, ABCSparseDataFrame) for obj in objs))):
9493
from pandas.core.sparse.api import SparseDataFrame
9594
return SparseDataFrame
9695
else:

pandas/core/groupby/generic.py

+8
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import pandas.core.indexes.base as ibase
4141
from pandas.core.internals import BlockManager, make_block
4242
from pandas.core.series import Series
43+
from pandas.core.sparse.frame import SparseDataFrame
4344

4445
from pandas.plotting._core import boxplot_frame_groupby
4546

@@ -198,9 +199,16 @@ def aggregate(self, arg, *args, **kwargs):
198199
assert not args and not kwargs
199200
result = self._aggregate_multiple_funcs(
200201
[arg], _level=_level, _axis=self.axis)
202+
201203
result.columns = Index(
202204
result.columns.levels[0],
203205
name=self._selected_obj.columns.name)
206+
207+
if isinstance(self.obj, SparseDataFrame):
208+
# Backwards compat for groupby.agg() with sparse
209+
# values. concat no longer converts DataFrame[Sparse]
210+
# to SparseDataFrame, so we do it here.
211+
result = SparseDataFrame(result._data)
204212
except Exception:
205213
result = self._aggregate_generic(arg, *args, **kwargs)
206214

pandas/tests/reshape/test_reshape.py

+10
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,16 @@ def test_get_dummies_duplicate_columns(self, df):
577577

578578
tm.assert_frame_equal(result, expected)
579579

580+
def test_get_dummies_all_sparse(self):
581+
df = pd.DataFrame({"A": [1, 2]})
582+
result = pd.get_dummies(df, columns=['A'], sparse=True)
583+
dtype = SparseDtype('uint8', 0)
584+
expected = pd.DataFrame({
585+
'A_1': SparseArray([1, 0], dtype=dtype),
586+
'A_2': SparseArray([0, 1], dtype=dtype),
587+
})
588+
tm.assert_frame_equal(result, expected)
589+
580590

581591
class TestCategoricalReshape(object):
582592

0 commit comments

Comments
 (0)