Skip to content

Commit 7b09a3c

Browse files
committed
ENH: 'replaced' series.replace with generic.replace !
CLN: cleaned up internal block action routines, now always return a list of blocks
1 parent 217bec2 commit 7b09a3c

File tree

5 files changed

+50
-154
lines changed

5 files changed

+50
-154
lines changed

pandas/core/generic.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1620,8 +1620,11 @@ def is_dictlike(x):
16201620
return self.replace(to_replace, value, inplace=inplace,
16211621
limit=limit, regex=regex)
16221622
else:
1623-
if not len(self.columns):
1624-
return self
1623+
1624+
# need a non-zero len on all axes
1625+
for a in self._AXIS_ORDERS:
1626+
if not len(self._get_axis(a)):
1627+
return self
16251628

16261629
new_data = self._data
16271630
if is_dictlike(to_replace):

pandas/core/internals.py

+43-28
Original file line numberDiff line numberDiff line change
@@ -175,16 +175,6 @@ def itemsize(self):
175175
def dtype(self):
176176
return self.values.dtype
177177

178-
def copy(self, deep=True, ref_items=None):
179-
values = self.values
180-
if deep:
181-
values = values.copy()
182-
if ref_items is None:
183-
ref_items = self.ref_items
184-
return make_block(
185-
values, self.items, ref_items, ndim=self.ndim, klass=self.__class__,
186-
fastpath=True, placement=self._ref_locs)
187-
188178
@property
189179
def ftype(self):
190180
return "%s:%s" % (self.dtype, self._ftype)
@@ -293,17 +283,23 @@ def split_block_at(self, item):
293283
def fillna(self, value, inplace=False, downcast=None):
294284
if not self._can_hold_na:
295285
if inplace:
296-
return self
286+
return [self]
297287
else:
298-
return self.copy()
288+
return [self.copy()]
299289

300290
mask = com.isnull(self.values)
301291
value = self._try_fill(value)
302292
blocks = self.putmask(mask, value, inplace=inplace)
303293

304-
if downcast:
305-
blocks = [ b.downcast() for b in blocks ]
306-
return blocks
294+
# possibily downcast the blocks
295+
if not downcast:
296+
return blocks
297+
298+
result_blocks = []
299+
for b in blocks:
300+
result_blocks.extend(b.downcast())
301+
302+
return result_blocks
307303

308304
def downcast(self, dtypes=None):
309305
""" try to downcast each item to the dict of dtypes if present """
@@ -361,14 +357,14 @@ def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
361357
"(%s [%s]) with smaller itemsize that current "
362358
"(%s [%s])" % (copy, self.dtype.name,
363359
self.itemsize, newb.dtype.name, newb.itemsize))
364-
return newb
360+
return [ newb ]
365361

366362
def convert(self, copy=True, **kwargs):
367363
""" attempt to coerce any object types to better types
368364
return a copy of the block (if copy = True)
369365
by definition we are not an ObjectBlock here! """
370366

371-
return self.copy() if copy else self
367+
return [ self.copy() ] if copy else [ self ]
372368

373369
def prepare_for_merge(self, **kwargs):
374370
""" a regular block is ok to merge as is """
@@ -428,6 +424,17 @@ def to_native_types(self, slicer=None, na_rep='', **kwargs):
428424
values[mask] = na_rep
429425
return values.tolist()
430426

427+
#### block actions ####
428+
def copy(self, deep=True, ref_items=None):
429+
values = self.values
430+
if deep:
431+
values = values.copy()
432+
if ref_items is None:
433+
ref_items = self.ref_items
434+
return make_block(
435+
values, self.items, ref_items, ndim=self.ndim, klass=self.__class__,
436+
fastpath=True, placement=self._ref_locs)
437+
431438
def replace(self, to_replace, value, inplace=False, filter=None,
432439
regex=False):
433440
""" replace the to_replace value with value, possible to create new
@@ -541,7 +548,7 @@ def create_block(v, m, n, item, reshape=True):
541548
if inplace:
542549
return [self]
543550

544-
return make_block(new_values, self.items, self.ref_items, fastpath=True)
551+
return [make_block(new_values, self.items, self.ref_items, fastpath=True)]
545552

546553
def interpolate(self, method='pad', axis=0, inplace=False,
547554
limit=None, missing=None, coerce=False):
@@ -551,20 +558,20 @@ def interpolate(self, method='pad', axis=0, inplace=False,
551558
if coerce:
552559
if not self._can_hold_na:
553560
if inplace:
554-
return self
561+
return [self]
555562
else:
556-
return self.copy()
563+
return [self.copy()]
557564

558565
values = self.values if inplace else self.values.copy()
559566
values = com.interpolate_2d(values, method, axis, limit, missing)
560-
return make_block(values, self.items, self.ref_items, ndim=self.ndim, klass=self.__class__, fastpath=True)
567+
return [make_block(values, self.items, self.ref_items, ndim=self.ndim, klass=self.__class__, fastpath=True)]
561568

562569
def take(self, indexer, ref_items, axis=1):
563570
if axis < 1:
564571
raise AssertionError('axis must be at least 1, got %d' % axis)
565572
new_values = com.take_nd(self.values, indexer, axis=axis,
566573
allow_fill=False)
567-
return make_block(new_values, self.items, ref_items, ndim=self.ndim, klass=self.__class__, fastpath=True)
574+
return [make_block(new_values, self.items, ref_items, ndim=self.ndim, klass=self.__class__, fastpath=True)]
568575

569576
def get_values(self, dtype=None):
570577
return self.values
@@ -575,7 +582,7 @@ def get_merge_length(self):
575582
def diff(self, n):
576583
""" return block for the diff of the values """
577584
new_values = com.diff(self.values, n, axis=1)
578-
return make_block(new_values, self.items, self.ref_items, ndim=self.ndim, fastpath=True)
585+
return [make_block(new_values, self.items, self.ref_items, ndim=self.ndim, fastpath=True)]
579586

580587
def shift(self, indexer, periods):
581588
""" shift the block by periods, possibly upcast """
@@ -588,7 +595,7 @@ def shift(self, indexer, periods):
588595
new_values[:, :periods] = fill_value
589596
else:
590597
new_values[:, periods:] = fill_value
591-
return make_block(new_values, self.items, self.ref_items, ndim=self.ndim, fastpath=True)
598+
return [make_block(new_values, self.items, self.ref_items, ndim=self.ndim, fastpath=True)]
592599

593600
def eval(self, func, other, raise_on_error=True, try_cast=False):
594601
"""
@@ -644,7 +651,7 @@ def eval(self, func, other, raise_on_error=True, try_cast=False):
644651
if try_cast:
645652
result = self._try_cast_result(result)
646653

647-
return make_block(result, self.items, self.ref_items, ndim=self.ndim, fastpath=True)
654+
return [make_block(result, self.items, self.ref_items, ndim=self.ndim, fastpath=True)]
648655

649656
def where(self, other, cond, raise_on_error=True, try_cast=False):
650657
"""
@@ -1054,6 +1061,14 @@ def _try_fill(self, value):
10541061
value = tslib.iNaT
10551062
return value
10561063

1064+
def fillna(self, value, inplace=False, downcast=None):
1065+
values = self.values if inplace else self.values.copy()
1066+
mask = com.isnull(self.values)
1067+
value = self._try_fill(value)
1068+
np.putmask(values,mask,value)
1069+
return [self if inplace else make_block(values, self.items,
1070+
self.ref_items, fastpath=True)]
1071+
10571072
def to_native_types(self, slicer=None, na_rep=None, **kwargs):
10581073
""" convert to our native types format, slicing if desired """
10591074

@@ -1250,7 +1265,7 @@ def fillna(self, value, inplace=False, downcast=None):
12501265
if issubclass(self.dtype.type, np.floating):
12511266
value = float(value)
12521267
values = self.values if inplace else self.values.copy()
1253-
return self.make_block(values.get_values(value), fill_value=value)
1268+
return [ self.make_block(values.get_values(value), fill_value=value) ]
12541269

12551270
def shift(self, indexer, periods):
12561271
""" shift the block by periods """
@@ -1263,15 +1278,15 @@ def shift(self, indexer, periods):
12631278
new_values[:periods] = fill_value
12641279
else:
12651280
new_values[periods:] = fill_value
1266-
return self.make_block(new_values)
1281+
return [ self.make_block(new_values) ]
12671282

12681283
def take(self, indexer, ref_items, axis=1):
12691284
""" going to take our items
12701285
along the long dimension"""
12711286
if axis < 1:
12721287
raise AssertionError('axis must be at least 1, got %d' % axis)
12731288

1274-
return self.make_block(self.values.take(indexer))
1289+
return [ self.make_block(self.values.take(indexer)) ]
12751290

12761291
def reindex_axis(self, indexer, method=None, axis=1, fill_value=None, limit=None, mask_info=None):
12771292
"""

pandas/core/series.py

-102
Original file line numberDiff line numberDiff line change
@@ -2687,108 +2687,6 @@ def apply(self, func, convert_dtype=True, args=(), **kwds):
26872687
else:
26882688
return self._constructor(mapped, index=self.index, name=self.name)
26892689

2690-
def replace(self, to_replace, value=None, method='pad', inplace=False,
2691-
limit=None):
2692-
"""
2693-
Replace arbitrary values in a Series
2694-
2695-
Parameters
2696-
----------
2697-
to_replace : list or dict
2698-
list of values to be replaced or dict of replacement values
2699-
value : anything
2700-
if to_replace is a list then value is the replacement value
2701-
method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad'
2702-
Method to use for filling holes in reindexed Series
2703-
pad / ffill: propagate last valid observation forward to next valid
2704-
backfill / bfill: use NEXT valid observation to fill gap
2705-
inplace : boolean, default False
2706-
If True, fill the Series in place. Note: this will modify any other
2707-
views on this Series, for example a column in a DataFrame. Returns
2708-
a reference to the filled object, which is self if inplace=True
2709-
limit : int, default None
2710-
Maximum size gap to forward or backward fill
2711-
2712-
Notes
2713-
-----
2714-
replace does not distinguish between NaN and None
2715-
2716-
See also
2717-
--------
2718-
fillna, reindex, asfreq
2719-
2720-
Returns
2721-
-------
2722-
replaced : Series
2723-
"""
2724-
2725-
if inplace:
2726-
result = self
2727-
change = self
2728-
else:
2729-
result = self.copy()
2730-
change = None
2731-
2732-
def _rep_one(s, to_rep, v): # replace single value
2733-
mask = com.mask_missing(s.values, to_rep)
2734-
com._maybe_upcast_putmask(s.values, mask, v, change=change)
2735-
2736-
def _rep_dict(rs, to_rep): # replace {[src] -> dest}
2737-
2738-
all_src = set()
2739-
dd = {} # group by unique destination value
2740-
for s, d in to_rep.iteritems():
2741-
dd.setdefault(d, []).append(s)
2742-
all_src.add(s)
2743-
2744-
if any(d in all_src for d in dd.keys()):
2745-
# don't clobber each other at the cost of temporaries
2746-
masks = {}
2747-
for d, sset in dd.iteritems(): # now replace by each dest
2748-
masks[d] = com.mask_missing(rs.values, sset)
2749-
2750-
for d, m in masks.iteritems():
2751-
com._maybe_upcast_putmask(rs.values, m, d, change=change)
2752-
else: # if no risk of clobbering then simple
2753-
for d, sset in dd.iteritems():
2754-
_rep_one(rs, sset, d)
2755-
2756-
if np.isscalar(to_replace):
2757-
to_replace = [to_replace]
2758-
2759-
if isinstance(to_replace, dict):
2760-
_rep_dict(result, to_replace)
2761-
elif isinstance(to_replace, (list, pa.Array, Series)):
2762-
2763-
# check same length
2764-
if isinstance(value, (list, pa.Array, Series)):
2765-
vl, rl = len(value), len(to_replace)
2766-
if vl == rl:
2767-
_rep_dict(result, dict(zip(to_replace, value)))
2768-
else:
2769-
raise ValueError('Got %d to replace but %d values'
2770-
% (rl, vl))
2771-
2772-
elif value is not None: # otherwise all replaced with same value
2773-
_rep_one(result, to_replace, value)
2774-
else: # method
2775-
if method is None: # pragma: no cover
2776-
raise ValueError('must specify a fill method')
2777-
fill_f = _get_fill_func(method)
2778-
2779-
mask = com.mask_missing(result.values, to_replace)
2780-
fill_f(result.values, limit=limit, mask=mask)
2781-
2782-
if not inplace:
2783-
result = Series(result.values, index=self.index,
2784-
name=self.name)
2785-
else:
2786-
raise ValueError('Unrecognized to_replace type %s' %
2787-
type(to_replace))
2788-
2789-
if not inplace:
2790-
return result
2791-
27922690
def align(self, other, join='outer', level=None, copy=True,
27932691
fill_value=None, method=None, limit=None):
27942692
"""

pandas/io/pytables.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1430,7 +1430,7 @@ def get_atom_string(self, block, itemsize):
14301430

14311431
def set_atom_string(self, block, existing_col, min_itemsize, nan_rep, encoding):
14321432
# fill nan items with myself
1433-
block = block.fillna(nan_rep)
1433+
block = block.fillna(nan_rep)[0]
14341434
data = block.values
14351435

14361436
# see if we have a valid string type

pandas/tests/test_series.py

+1-21
Original file line numberDiff line numberDiff line change
@@ -4338,16 +4338,6 @@ def test_replace(self):
43384338
rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3])
43394339
assert_series_equal(rs, rs2)
43404340

4341-
# replace with forward fill not considering np.nan missing
4342-
s2 = ser.copy()
4343-
s2[5] = np.nan
4344-
rs3 = s2.replace(['foo', 'bar'])
4345-
self.assert_(isnull(rs3[6]))
4346-
4347-
# replace with back fill considering np.nan as missing
4348-
rs4 = ser.replace([np.nan, 'foo', 'bar'], method='bfill')
4349-
assert_almost_equal(rs4[4], ser[5])
4350-
43514341
# replace inplace
43524342
ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True)
43534343

@@ -4369,7 +4359,7 @@ def test_replace(self):
43694359

43704360
# malformed
43714361
self.assertRaises(ValueError, ser.replace, [1, 2, 3], [np.nan, 0])
4372-
self.assertRaises(ValueError, ser.replace, range(1, 3), [np.nan, 0])
4362+
self.assertRaises(TypeError, ser.replace, range(1, 3), [np.nan, 0])
43734363

43744364
ser = Series([0, 1, 2, 3, 4])
43754365
result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])
@@ -4687,16 +4677,6 @@ def test_replace(self):
46874677
rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3])
46884678
assert_series_equal(rs, rs2)
46894679

4690-
# replace with forward fill not considering np.nan missing
4691-
s2 = ser.copy()
4692-
s2[5] = np.nan
4693-
rs3 = s2.replace(['foo', 'bar'])
4694-
self.assert_(isnull(rs3[6]))
4695-
4696-
# replace with back fill considering np.nan as missing
4697-
rs4 = ser.replace([np.nan, 'foo', 'bar'], method='bfill')
4698-
assert_almost_equal(rs4[4], ser[5])
4699-
47004680
# replace inplace
47014681
ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True)
47024682
self.assert_((ser[:5] == -1).all())

0 commit comments

Comments
 (0)