Skip to content

Commit 11d6e95

Browse files
committed
Merge pull request #5112 from jreback/dtype_conv
API: convert objects on fillna when object result dtype, related (GH5103)
2 parents 7a886b5 + 51573ef commit 11d6e95

File tree

6 files changed

+51
-10
lines changed

6 files changed

+51
-10
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,7 @@ Bug Fixes
562562
(:issue:`5102`).
563563
- Fixed a bug where ``groupby.plot()`` and friends were duplicating figures
564564
multiple times (:issue:`5102`).
565+
- Provide automatic conversion of ``object`` dtypes on fillna, related (:issue:`5103`)
565566

566567

567568
pandas 0.12.0

pandas/core/internals.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -1177,7 +1177,7 @@ def convert(self, convert_dates=True, convert_numeric=True, copy=True, by_item=T
11771177
# attempt to create new type blocks
11781178
is_unique = self.items.is_unique
11791179
blocks = []
1180-
if by_item:
1180+
if by_item and not self._is_single_block:
11811181

11821182
for i, c in enumerate(self.items):
11831183
values = self.iget(i)
@@ -1200,6 +1200,17 @@ def convert(self, convert_dates=True, convert_numeric=True, copy=True, by_item=T
12001200

12011201
return blocks
12021202

1203+
def _maybe_downcast(self, blocks, downcast=None):
1204+
1205+
if downcast is not None:
1206+
return blocks
1207+
1208+
# split and convert the blocks
1209+
result_blocks = []
1210+
for blk in blocks:
1211+
result_blocks.extend(blk.convert(convert_dates=True,convert_numeric=False))
1212+
return result_blocks
1213+
12031214
def _can_hold_element(self, element):
12041215
return True
12051216

@@ -2050,6 +2061,8 @@ def apply(self, f, *args, **kwargs):
20502061
result_blocks.extend(applied)
20512062
else:
20522063
result_blocks.append(applied)
2064+
if len(result_blocks) == 0:
2065+
return self.make_empty(axes or self.axes)
20532066
bm = self.__class__(
20542067
result_blocks, axes or self.axes, do_integrity_check=do_integrity_check)
20552068
bm._consolidate_inplace()

pandas/core/ops.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -411,11 +411,13 @@ def na_op(x, y):
411411
result = expressions.evaluate(op, str_rep, x, y,
412412
raise_on_error=True, **eval_kwargs)
413413
except TypeError:
414-
result = pa.empty(len(x), dtype=x.dtype)
415414
if isinstance(y, (pa.Array, pd.Series)):
415+
dtype = np.find_common_type([x.dtype,y.dtype],[])
416+
result = np.empty(x.size, dtype=dtype)
416417
mask = notnull(x) & notnull(y)
417418
result[mask] = op(x[mask], y[mask])
418419
else:
420+
result = pa.empty(len(x), dtype=x.dtype)
419421
mask = notnull(x)
420422
result[mask] = op(x[mask], y)
421423

@@ -690,12 +692,14 @@ def na_op(x, y):
690692
op, str_rep, x, y, raise_on_error=True, **eval_kwargs)
691693
except TypeError:
692694
xrav = x.ravel()
693-
result = np.empty(x.size, dtype=x.dtype)
694695
if isinstance(y, (np.ndarray, pd.Series)):
696+
dtype = np.find_common_type([x.dtype,y.dtype],[])
697+
result = np.empty(x.size, dtype=dtype)
695698
yrav = y.ravel()
696699
mask = notnull(xrav) & notnull(yrav)
697700
result[mask] = op(xrav[mask], yrav[mask])
698701
else:
702+
result = np.empty(x.size, dtype=x.dtype)
699703
mask = notnull(xrav)
700704
result[mask] = op(xrav[mask], y)
701705

@@ -855,6 +859,8 @@ def na_op(x, y):
855859
result = expressions.evaluate(op, str_rep, x, y,
856860
raise_on_error=True, **eval_kwargs)
857861
except TypeError:
862+
863+
# TODO: might need to find_common_type here?
858864
result = pa.empty(len(x), dtype=x.dtype)
859865
mask = notnull(x)
860866
result[mask] = op(x[mask], y)

pandas/io/pytables.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1664,8 +1664,9 @@ def get_atom_string(self, block, itemsize):
16641664

16651665
def set_atom_string(
16661666
self, block, existing_col, min_itemsize, nan_rep, encoding):
1667-
# fill nan items with myself
1668-
block = block.fillna(nan_rep)[0]
1667+
# fill nan items with myself, don't disturb the blocks by
1668+
# trying to downcast
1669+
block = block.fillna(nan_rep, downcast=False)[0]
16691670
data = block.values
16701671

16711672
# see if we have a valid string type

pandas/tests/test_frame.py

+23-4
Original file line numberDiff line numberDiff line change
@@ -4311,23 +4311,24 @@ def test_operators_none_as_na(self):
43114311

43124312
ops = [operator.add, operator.sub, operator.mul, operator.truediv]
43134313

4314+
# since filling converts dtypes from object, changed expected to be object
43144315
for op in ops:
43154316
filled = df.fillna(np.nan)
43164317
result = op(df, 3)
4317-
expected = op(filled, 3)
4318+
expected = op(filled, 3).astype(object)
43184319
expected[com.isnull(expected)] = None
43194320
assert_frame_equal(result, expected)
43204321

43214322
result = op(df, df)
4322-
expected = op(filled, filled)
4323+
expected = op(filled, filled).astype(object)
43234324
expected[com.isnull(expected)] = None
43244325
assert_frame_equal(result, expected)
43254326

43264327
result = op(df, df.fillna(7))
43274328
assert_frame_equal(result, expected)
43284329

43294330
result = op(df.fillna(7), df)
4330-
assert_frame_equal(result, expected)
4331+
assert_frame_equal(result, expected, check_dtype=False)
43314332

43324333
def test_comparison_invalid(self):
43334334

@@ -6695,6 +6696,25 @@ def test_fillna(self):
66956696
df.fillna({ 2: 'foo' }, inplace=True)
66966697
assert_frame_equal(df, expected)
66976698

6699+
def test_fillna_dtype_conversion(self):
6700+
# make sure that fillna on an empty frame works
6701+
df = DataFrame(index=["A","B","C"], columns = [1,2,3,4,5])
6702+
result = df.get_dtype_counts().order()
6703+
expected = Series({ 'object' : 5 })
6704+
assert_series_equal(result, expected)
6705+
6706+
result = df.fillna(1)
6707+
expected = DataFrame(1, index=["A","B","C"], columns = [1,2,3,4,5])
6708+
result = result.get_dtype_counts().order()
6709+
expected = Series({ 'int64' : 5 })
6710+
assert_series_equal(result, expected)
6711+
6712+
# empty block
6713+
df = DataFrame(index=lrange(3),columns=['A','B'],dtype='float64')
6714+
result = df.fillna('nan')
6715+
expected = DataFrame('nan',index=lrange(3),columns=['A','B'])
6716+
assert_frame_equal(result, expected)
6717+
66986718
def test_ffill(self):
66996719
self.tsframe['A'][:5] = nan
67006720
self.tsframe['A'][-5:] = nan
@@ -10812,7 +10832,6 @@ def test_boolean_indexing_mixed(self):
1081210832
expected.loc[35,4] = 1
1081310833
assert_frame_equal(df2,expected)
1081410834

10815-
# add object, should this raise?
1081610835
df['foo'] = 'test'
1081710836
with tm.assertRaisesRegexp(TypeError, 'boolean setting on mixed-type'):
1081810837
df[df > 0.3] = 1

pandas/tests/test_panel.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -732,8 +732,9 @@ def test_logical_with_nas(self):
732732
expected = DataFrame({'a': [np.nan, True]})
733733
assert_frame_equal(result, expected)
734734

735+
# this is autodowncasted here
735736
result = d['ItemA'].fillna(False) | d['ItemB']
736-
expected = DataFrame({'a': [True, True]}, dtype=object)
737+
expected = DataFrame({'a': [True, True]})
737738
assert_frame_equal(result, expected)
738739

739740
def test_neg(self):

0 commit comments

Comments
 (0)