Skip to content

Commit e23272e

Browse files
committed
BUG: Fix sparse where
1 parent 8601a18 commit e23272e

File tree

2 files changed

+107
-24
lines changed

2 files changed

+107
-24
lines changed

pandas/core/internals.py

+98-24
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
is_bool_dtype,
2828
is_object_dtype,
2929
is_datetimelike_v_numeric,
30+
is_complex_dtype,
3031
is_float_dtype, is_numeric_dtype,
3132
is_numeric_v_string_like, is_extension_type,
3233
is_list_like,
@@ -46,7 +47,6 @@
4647
find_common_type)
4748
from pandas.core.dtypes.missing import (
4849
isna, notna, array_equivalent,
49-
na_value_for_dtype,
5050
_isna_compat,
5151
is_null_datelike_scalar)
5252
import pandas.core.dtypes.concat as _concat
@@ -444,18 +444,14 @@ def make_a_block(nv, ref_loc):
444444
except (AttributeError, NotImplementedError):
445445
pass
446446

447-
if isinstance(self, SparseBlock):
448-
block = self.make_block_same_class(values=nv,
449-
placement=ref_loc,
450-
fastpath=True)
451-
else:
452-
block = self.make_block(values=nv,
453-
placement=ref_loc,
454-
fastpath=True)
447+
block = self.make_block(values=nv,
448+
placement=ref_loc,
449+
fastpath=True)
450+
455451
return block
456452

457453
# ndim == 1
458-
if self.ndim == 1 or isinstance(self, SparseBlock):
454+
if self.ndim == 1:
459455
if mask.any():
460456
nv = f(mask, new_values, None)
461457
else:
@@ -1392,6 +1388,11 @@ def where(self, other, cond, align=True, raise_on_error=True,
13921388
if hasattr(other, 'reindex_axis'):
13931389
other = other.values
13941390

1391+
if is_scalar(other) or is_list_like(other):
1392+
fill_value = other
1393+
else:
1394+
fill_value = None
1395+
13951396
if hasattr(cond, 'reindex_axis'):
13961397
cond = cond.values
13971398

@@ -1405,7 +1406,8 @@ def where(self, other, cond, align=True, raise_on_error=True,
14051406
raise ValueError("where must have a condition that is ndarray "
14061407
"like")
14071408
else:
1408-
cond = cond.reshape(values.shape)
1409+
if values.ndim == 1:
1410+
cond = cond.flatten()
14091411

14101412
# our where function
14111413
def func(cond, values, other):
@@ -1443,7 +1445,8 @@ def func(cond, values, other):
14431445
transpose=transpose)
14441446
return self._maybe_downcast(blocks, 'infer')
14451447

1446-
if self._can_hold_na or self.ndim == 1:
1448+
if self._can_hold_element(fill_value) or \
1449+
self.is_sparse or self.ndim == 1:
14471450

14481451
if transpose:
14491452
result = result.T
@@ -1452,13 +1455,12 @@ def func(cond, values, other):
14521455
if try_cast:
14531456
result = self._try_cast_result(result)
14541457

1455-
if isinstance(self, SparseBlock):
1456-
fill_value = na_value_for_dtype(result.dtype)
1457-
return self.make_block_same_class(result,
1458-
self.mgr_locs,
1459-
fill_value=fill_value)
1458+
if isinstance(result, np.ndarray):
1459+
ndim = result.ndim
14601460
else:
1461-
return self.make_block(result)
1461+
ndim = None
1462+
1463+
return self.make_block(result, ndim=ndim, fill_value=fill_value)
14621464

14631465
# might need to separate out blocks
14641466
axis = cond.ndim - 1
@@ -1582,9 +1584,6 @@ def _nanpercentile(values, q, axis, **kw):
15821584
if is_scalar(result):
15831585
return ax, self.make_block_scalar(result)
15841586

1585-
if isinstance(self, SparseBlock):
1586-
result = SparseArray(result.flatten())
1587-
15881587
return ax, make_block(result,
15891588
placement=np.arange(len(result)),
15901589
ndim=ndim)
@@ -1736,6 +1735,7 @@ class FloatBlock(FloatOrComplexBlock):
17361735
is_float = True
17371736
_downcast_dtype = 'int64'
17381737

1738+
@classmethod
17391739
def _can_hold_element(self, element):
17401740
if is_list_like(element):
17411741
element = np.asarray(element)
@@ -1786,6 +1786,7 @@ class ComplexBlock(FloatOrComplexBlock):
17861786
__slots__ = ()
17871787
is_complex = True
17881788

1789+
@classmethod
17891790
def _can_hold_element(self, element):
17901791
if is_list_like(element):
17911792
element = np.array(element)
@@ -1948,6 +1949,7 @@ class BoolBlock(NumericBlock):
19481949
is_bool = True
19491950
_can_hold_na = False
19501951

1952+
@classmethod
19511953
def _can_hold_element(self, element):
19521954
if is_list_like(element):
19531955
element = np.asarray(element)
@@ -2632,10 +2634,62 @@ class SparseBlock(NonConsolidatableMixIn, Block):
26322634
is_sparse = True
26332635
is_numeric = True
26342636
_box_to_block_values = False
2635-
_can_hold_na = True
26362637
_ftype = 'sparse'
26372638
_holder = SparseArray
26382639

2640+
def __init__(self, values, placement, ndim=None, fastpath=False, **kwargs):
2641+
super(SparseBlock, self).__init__(values, placement,
2642+
ndim, fastpath,
2643+
**kwargs)
2644+
2645+
dtype = self.values.sp_values.dtype
2646+
2647+
if is_float_dtype(dtype):
2648+
self.is_float = True
2649+
self._can_hold_na = True
2650+
elif is_complex_dtype(dtype):
2651+
self.is_complex = True
2652+
self._can_hold_na = True
2653+
elif is_integer_dtype(dtype):
2654+
self.is_integer = True
2655+
self._can_hold_na = False
2656+
elif is_bool_dtype(dtype):
2657+
self.is_bool = True
2658+
self._can_hold_na = False
2659+
elif is_object_dtype(dtype):
2660+
self.is_object = True
2661+
self._can_hold_na = True
2662+
else:
2663+
self._can_hold_na = False
2664+
2665+
def _can_hold_element(self, element):
2666+
""" require the same dtype as ourselves """
2667+
dtype = self.values.sp_values.dtype
2668+
2669+
if is_bool_dtype(dtype):
2670+
return BoolBlock._can_hold_element(element)
2671+
elif is_integer_dtype(dtype):
2672+
if is_list_like(element):
2673+
element = np.array(element)
2674+
tipo = element.dtype.type
2675+
return (issubclass(tipo, np.integer) and
2676+
not issubclass(tipo,
2677+
(np.datetime64,
2678+
np.timedelta64)) and
2679+
dtype.itemsize >= element.dtype.itemsize)
2680+
return is_integer(element)
2681+
elif is_float_dtype(dtype):
2682+
return FloatBlock._can_hold_element(element)
2683+
elif is_complex_dtype(dtype):
2684+
return ComplexBlock._can_hold_element(element)
2685+
elif is_object_dtype(dtype):
2686+
return True
2687+
else:
2688+
return False
2689+
2690+
def coerce_to_target_dtype(self, other, copy=True):
2691+
return super(SparseBlock, self).coerce_to_target_dtype(other, copy)
2692+
26392693
@property
26402694
def shape(self):
26412695
return (len(self.mgr_locs), self.sp_index.length)
@@ -2696,6 +2750,20 @@ def copy(self, deep=True, mgr=None):
26962750
kind=self.kind, copy=deep,
26972751
placement=self.mgr_locs)
26982752

2753+
def make_block(self, values, placement=None,
2754+
ndim=None, fill_value=None, **kwargs):
2755+
"""
2756+
Create a new block, with type inference propagate any values that are
2757+
not specified
2758+
"""
2759+
if fill_value is not None and isinstance(values, SparseArray):
2760+
values = SparseArray(values.to_dense(), fill_value=fill_value,
2761+
kind=values.kind, dtype=values.dtype)
2762+
2763+
return super(SparseBlock, self).make_block(values, placement=placement,
2764+
ndim=ndim, fill_value=None,
2765+
**kwargs)
2766+
26992767
def make_block_same_class(self, values, placement, sparse_index=None,
27002768
kind=None, dtype=None, fill_value=None,
27012769
copy=False, fastpath=True, **kwargs):
@@ -2726,7 +2794,7 @@ def make_block_same_class(self, values, placement, sparse_index=None,
27262794

27272795
new_values = SparseArray(values, sparse_index=sparse_index,
27282796
kind=kind or self.kind, dtype=dtype,
2729-
fill_value=fill_value, copy=copy)
2797+
fill_value=fill_value, copy=copy).flatten()
27302798
return self.make_block(new_values, fastpath=fastpath,
27312799
placement=placement)
27322800

@@ -2792,9 +2860,15 @@ def sparse_reindex(self, new_index):
27922860
return self.make_block_same_class(values, sparse_index=new_index,
27932861
placement=self.mgr_locs)
27942862

2863+
def _try_coerce_result(self, result):
2864+
""" reverse of try_coerce_args """
2865+
if isinstance(result, np.ndarray):
2866+
result = SparseArray(result.flatten(), kind=self.kind)
2867+
return result
2868+
27952869

27962870
def make_block(values, placement, klass=None, ndim=None, dtype=None,
2797-
fastpath=False):
2871+
fastpath=False, **kwargs):
27982872
if klass is None:
27992873
dtype = dtype or values.dtype
28002874
vtype = dtype.type

pandas/core/sparse/frame.py

+9
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,15 @@ def copy(self, deep=True):
334334
result._default_kind = self._default_kind
335335
return result
336336

337+
def where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
338+
try_cast=False, raise_on_error=True):
339+
result = super(SparseDataFrame, self).where(cond, other,
340+
inplace, axis,
341+
level, try_cast,
342+
raise_on_error)
343+
result._default_fill_value = other
344+
return result
345+
337346
@property
338347
def default_fill_value(self):
339348
return self._default_fill_value

0 commit comments

Comments
 (0)