Skip to content

Commit b253674

Browse files
committed
BUG: Fix concat(Series[sparse], axis=1)
* Preserve sparsity * Preserve fill value
1 parent ae026b2 commit b253674

File tree

3 files changed

+18
-8
lines changed

3 files changed

+18
-8
lines changed

doc/source/whatsnew/v0.24.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -647,7 +647,7 @@ changes were made:
647647
* The ``out`` and ``mode`` parameters are now longer accepted (previously, this raised if they were specified).
648648
* Passing a scalar for ``indices`` is no longer allowed.
649649

650-
- The result of concatenating a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``.
650+
- The result of :func:`concat` with a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``.
651651
- ``SparseDataFrame.combine`` and ``DataFrame.combine_first`` no longer supports combining a sparse column with a dense column while preserving the sparse subtype. The result will be an object-dtype SparseArray.
652652
- Setting :attr:`SparseArray.fill_value` to a fill value with a different dtype is now allowed.
653653
- ``DataFrame[column]`` is now a :class:`Series` with sparse values, rather than a :class:`SparseSeries`, when slicing a single column with sparse values (:issue:`23559`).
@@ -1613,6 +1613,7 @@ Sparse
16131613
- Bug in :meth:`SparseArary.unique` not returning the unique values (:issue:`19595`)
16141614
- Bug in :meth:`SparseArray.nonzero` and :meth:`SparseDataFrame.dropna` returning shifted/incorrect results (:issue:`21172`)
16151615
- Bug in :meth:`DataFrame.apply` where dtypes would lose sparseness (:issue:`23744`)
1616+
- Bug in :func:`concat` when concatenating a list of :class:`Series` with all-sparse values changing the ``fill_value`` and converting to a dense Series (:issue:`24371`)
16161617

16171618
Style
16181619
^^^^^

pandas/core/dtypes/concat.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -66,19 +66,19 @@ def _get_series_result_type(result, objs=None):
6666
return appropriate class of Series concat
6767
input is either dict or array-like
6868
"""
69+
from pandas import SparseSeries, SparseDataFrame, DataFrame
70+
6971
# concat Series with axis 1
7072
if isinstance(result, dict):
7173
# concat Series with axis 1
72-
if all(is_sparse(c) for c in compat.itervalues(result)):
73-
from pandas.core.sparse.api import SparseDataFrame
74+
if all(isinstance(c, (SparseSeries, SparseDataFrame))
75+
for c in compat.itervalues(result)):
7476
return SparseDataFrame
7577
else:
76-
from pandas.core.frame import DataFrame
7778
return DataFrame
7879

7980
# otherwise it is a SingleBlockManager (axis = 0)
8081
if result._block.is_sparse:
81-
from pandas.core.sparse.api import SparseSeries
8282
return SparseSeries
8383
else:
8484
return objs[0]._constructor

pandas/core/reshape/reshape.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111

1212
from pandas.core.dtypes.cast import maybe_promote
1313
from pandas.core.dtypes.common import (
14-
ensure_platform_int, is_bool_dtype, is_extension_array_dtype, is_list_like,
15-
is_object_dtype, needs_i8_conversion)
14+
ensure_platform_int, is_bool_dtype, is_extension_array_dtype,
15+
is_integer_dtype, is_list_like, is_object_dtype, needs_i8_conversion)
1616
from pandas.core.dtypes.missing import notna
1717

1818
from pandas import compat
@@ -910,6 +910,14 @@ def _make_col_name(prefix, prefix_sep, level):
910910
index = None
911911

912912
if sparse:
913+
914+
if is_integer_dtype(dtype):
915+
fill_value = 0
916+
elif dtype == bool:
917+
fill_value = False
918+
else:
919+
fill_value = 0.0
920+
913921
sparse_series = []
914922
N = len(data)
915923
sp_indices = [[] for _ in range(len(dummy_cols))]
@@ -927,7 +935,8 @@ def _make_col_name(prefix, prefix_sep, level):
927935
dummy_cols = dummy_cols[1:]
928936
for col, ixs in zip(dummy_cols, sp_indices):
929937
sarr = SparseArray(np.ones(len(ixs), dtype=dtype),
930-
sparse_index=IntIndex(N, ixs), fill_value=0,
938+
sparse_index=IntIndex(N, ixs),
939+
fill_value=fill_value,
931940
dtype=dtype)
932941
sparse_series.append(Series(data=sarr, index=index, name=col))
933942

0 commit comments

Comments
 (0)