Skip to content

Commit aa97648

Browse files
hexgnuharisbal
authored and
harisbal
committed
BUG: assign doesnt cast SparseDataFrame to DataFrame (pandas-dev#19178)
1 parent 067984a commit aa97648

File tree

4 files changed

+32
-4
lines changed

4 files changed

+32
-4
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,7 @@ Sparse
822822
- Bug in which creating a ``SparseDataFrame`` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`)
823823
- Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`)
824824
- Bug in :class:`SparseSeries.memory_usage` which caused segfault by accessing non sparse elements (:issue:`19368`)
825+
- Bug in constructing a ``SparseArray``: if ``data`` is a scalar and ``index`` is defined it will coerce to ``float64`` regardless of scalar's dtype. (:issue:`19163`)
825826

826827
Reshaping
827828
^^^^^^^^^

pandas/core/sparse/array.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
is_scalar, is_dtype_equal)
2727
from pandas.core.dtypes.cast import (
2828
maybe_convert_platform, maybe_promote,
29-
astype_nansafe, find_common_type)
29+
astype_nansafe, find_common_type, infer_dtype_from_scalar,
30+
construct_1d_arraylike_from_scalar)
3031
from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype
3132

3233
import pandas._libs.sparse as splib
@@ -162,9 +163,9 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer',
162163
data = np.nan
163164
if not is_scalar(data):
164165
raise Exception("must only pass scalars with an index ")
165-
values = np.empty(len(index), dtype='float64')
166-
values.fill(data)
167-
data = values
166+
dtype = infer_dtype_from_scalar(data)[0]
167+
data = construct_1d_arraylike_from_scalar(
168+
data, len(index), dtype)
168169

169170
if isinstance(data, ABCSparseSeries):
170171
data = data.values

pandas/tests/sparse/frame/test_frame.py

+11
Original file line numberDiff line numberDiff line change
@@ -1257,3 +1257,14 @@ def test_quantile_multi(self):
12571257

12581258
tm.assert_frame_equal(result, dense_expected)
12591259
tm.assert_sp_frame_equal(result, sparse_expected)
1260+
1261+
def test_assign_with_sparse_frame(self):
1262+
# GH 19163
1263+
df = pd.DataFrame({"a": [1, 2, 3]})
1264+
res = df.to_sparse(fill_value=False).assign(newcol=False)
1265+
exp = df.assign(newcol=False).to_sparse(fill_value=False)
1266+
1267+
tm.assert_sp_frame_equal(res, exp)
1268+
1269+
for column in res.columns:
1270+
assert type(res[column]) is SparseSeries

pandas/tests/sparse/test_array.py

+15
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,21 @@ def test_constructor_spindex_dtype(self):
113113
assert arr.dtype == np.int64
114114
assert arr.fill_value == 0
115115

116+
@pytest.mark.parametrize('scalar,dtype', [
117+
(False, bool),
118+
(0.0, 'float64'),
119+
(1, 'int64'),
120+
('z', 'object')])
121+
def test_scalar_with_index_infer_dtype(self, scalar, dtype):
122+
# GH 19163
123+
arr = SparseArray(scalar, index=[1, 2, 3], fill_value=scalar)
124+
exp = SparseArray([scalar, scalar, scalar], fill_value=scalar)
125+
126+
tm.assert_sp_array_equal(arr, exp)
127+
128+
assert arr.dtype == dtype
129+
assert exp.dtype == dtype
130+
116131
def test_sparseseries_roundtrip(self):
117132
# GH 13999
118133
for kind in ['integer', 'block']:

0 commit comments

Comments
 (0)