Skip to content

Commit a72ee1a

Browse files
committed
Fixed DataFrame.__setitem__ for updating to sparse.
Closes pandas-dev#22367
1 parent ec5eb9a commit a72ee1a

File tree

4 files changed

+38
-9
lines changed

4 files changed

+38
-9
lines changed

doc/source/whatsnew/v0.24.0.txt

+6
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,12 @@ Reshaping
731731
- Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the `to_replace` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`)
732732
-
733733

734+
Sparse
735+
^^^^^^
736+
737+
- Updating a boolean, datetime, or timedelta column to be Sparse now works (:issue:`22367`)
738+
739+
734740
Build Changes
735741
^^^^^^^^^^^^^
736742

pandas/core/internals/blocks.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -2298,7 +2298,8 @@ def _try_coerce_result(self, result):
22982298
return result
22992299

23002300
def should_store(self, value):
2301-
return issubclass(value.dtype.type, np.timedelta64)
2301+
return (issubclass(value.dtype.type, np.timedelta64) and
2302+
not is_extension_array_dtype(value))
23022303

23032304
def to_native_types(self, slicer=None, na_rep=None, quoting=None,
23042305
**kwargs):
@@ -2337,7 +2338,8 @@ def _can_hold_element(self, element):
23372338
return isinstance(element, (bool, np.bool_))
23382339

23392340
def should_store(self, value):
2340-
return issubclass(value.dtype.type, np.bool_)
2341+
return (issubclass(value.dtype.type, np.bool_) and not
2342+
is_extension_array_dtype(value))
23412343

23422344
def replace(self, to_replace, value, inplace=False, filter=None,
23432345
regex=False, convert=True, mgr=None):
@@ -2879,7 +2881,8 @@ def to_native_types(self, slicer=None, na_rep=None, date_format=None,
28792881

28802882
def should_store(self, value):
28812883
return (issubclass(value.dtype.type, np.datetime64) and
2882-
not is_datetimetz(value))
2884+
not is_datetimetz(value) and
2885+
not is_extension_array_dtype(value))
28832886

28842887
def set(self, locs, values, check=False):
28852888
"""

pandas/tests/reshape/test_reshape.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -247,10 +247,16 @@ def test_dataframe_dummies_prefix_str(self, df, sparse):
247247
dtype=np.uint8)
248248
expected = expected.astype({"C": np.int64})
249249
if sparse:
250-
expected.iloc[1:] = expected.iloc[1:].astype(SparseDtype("uint8"))
251-
# seemingly impossible to make expected .
252-
# raise pytest.xfail(reason="can't make expected")
253-
pass
250+
# work around astyping & assigning with duplicate columns
251+
# https://github.com/pandas-dev/pandas/issues/14427
252+
expected = pd.concat([
253+
pd.Series([1, 2, 3], name='C'),
254+
pd.Series([1, 0, 1], name='bad_a', dtype='Sparse[uint8]'),
255+
pd.Series([0, 1, 0], name='bad_b', dtype='Sparse[uint8]'),
256+
pd.Series([1, 1, 0], name='bad_b', dtype='Sparse[uint8]'),
257+
pd.Series([0, 0, 1], name='bad_c', dtype='Sparse[uint8]'),
258+
], axis=1)
259+
254260
assert_frame_equal(result, expected)
255261

256262
def test_dataframe_dummies_subset(self, df, sparse):
@@ -336,10 +342,10 @@ def test_dataframe_dummies_with_na(self, df, sparse, dtype):
336342
columns = ['A_a', 'A_b', 'A_nan', 'B_b', 'B_c', 'B_nan']
337343
expected[columns] = expected[columns].astype(e_dtype)
338344
if sparse:
339-
expected[columns] = expected[columns].apply(
345+
tmp = expected[columns].apply(
340346
lambda x: pd.SparseSeries(x)
341347
)
342-
raise pytest.xfail(reason="that apply is broken?")
348+
expected[tmp.columns] = tmp
343349
assert_frame_equal(result, expected)
344350

345351
result = get_dummies(df, dummy_na=False, sparse=sparse, dtype=dtype)

pandas/tests/sparse/frame/test_frame.py

+14
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,20 @@ def _check_frame(frame, orig):
582582

583583
self._check_all(_check_frame)
584584

585+
@pytest.mark.parametrize('values', [
586+
[True, False],
587+
[0, 1],
588+
[1, None],
589+
['a', 'b'],
590+
[pd.Timestamp('2017'), pd.NaT],
591+
[pd.Timedelta('10s'), pd.NaT],
592+
])
593+
def test_setitem_more(self, values):
594+
df = pd.DataFrame({"A": values})
595+
df['A'] = pd.SparseArray(values)
596+
expected = pd.DataFrame({'A': pd.SparseArray(values)})
597+
tm.assert_frame_equal(df, expected)
598+
585599
def test_setitem_corner(self):
586600
self.frame['a'] = self.frame['B']
587601
tm.assert_sp_series_equal(self.frame['a'], self.frame['B'],

0 commit comments

Comments
 (0)