From 12caeceb83358573416f58f103a007dafa5aaef7 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 25 Feb 2021 21:07:44 -0800 Subject: [PATCH 1/2] REF/API: dont cast to object unless necessary --- pandas/core/internals/blocks.py | 13 ++++--------- pandas/tests/frame/methods/test_fillna.py | 11 ++++++----- pandas/tests/frame/methods/test_replace.py | 11 +++++++++-- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 689a067e1c211..9a06d67777c98 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -796,7 +796,6 @@ def replace( It is used in ObjectBlocks. It is here for API compatibility. """ inplace = validate_bool_kwarg(inplace, "inplace") - original_to_replace = to_replace if not self._can_hold_element(to_replace): # We cannot hold `to_replace`, so we know immediately that @@ -814,9 +813,9 @@ def replace( return [self] if inplace else [self.copy()] if not self._can_hold_element(value): - blk = self.astype(object) + blk = self.coerce_to_target_dtype(value) return blk.replace( - to_replace=original_to_replace, + to_replace=to_replace, value=value, inplace=True, regex=regex, @@ -824,7 +823,7 @@ def replace( blk = self if inplace else self.copy() putmask_inplace(blk.values, mask, value) - blocks = blk.convert(numeric=False, copy=not inplace) + blocks = blk.convert(numeric=False, copy=False) return blocks @final @@ -867,11 +866,7 @@ def _replace_regex( replace_regex(new_values, rx, value, mask) block = self.make_block(new_values) - if convert: - nbs = block.convert(numeric=False) - else: - nbs = [block] - return nbs + return [block] @final def _replace_list( diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 58016be82c405..564481d01abc8 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -265,12 +265,13 @@ def test_fillna_dtype_conversion(self): expected = DataFrame("nan", index=range(3), columns=["A", "B"]) tm.assert_frame_equal(result, expected) - # equiv of replace + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) object upcasting + @pytest.mark.parametrize("val", ["", 1, np.nan, 1.0]) + def test_fillna_dtype_conversion_equiv_replace(self, val): df = DataFrame({"A": [1, np.nan], "B": [1.0, 2.0]}) - for v in ["", 1, np.nan, 1.0]: - expected = df.replace(np.nan, v) - result = df.fillna(v) - tm.assert_frame_equal(result, expected) + expected = df.replace(np.nan, val) + result = df.fillna(val) + tm.assert_frame_equal(result, expected) @td.skip_array_manager_invalid_test def test_fillna_datetime_columns(self): diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 9ae5bb151b685..6080c52645327 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -783,6 +783,8 @@ def test_replace_mixed(self, float_string_frame): tm.assert_frame_equal(result, expected) tm.assert_frame_equal(result.replace(-1e8, np.nan), float_string_frame) + def test_replace_mixed_int_block_upcasting(self): + # int block upcasting df = DataFrame( { @@ -803,7 +805,9 @@ def test_replace_mixed(self, float_string_frame): assert return_value is None tm.assert_frame_equal(df, expected) - # int block splitting + def test_replace_mixed_int_block_splitting(self): + + # int block (non)-splitting df = DataFrame( { "A": Series([1.0, 2.0], dtype="float64"), @@ -815,12 +819,14 @@ def test_replace_mixed(self, float_string_frame): { "A": Series([1.0, 2.0], dtype="float64"), "B": Series([0.5, 1], dtype="float64"), - "C": Series([1, 2], dtype="int64"), + "C": Series([1, 2], dtype="float64"), } ) result = df.replace(0, 0.5) tm.assert_frame_equal(result, expected) + def test_replace_mixed2(self): + # to object block upcasting df = DataFrame( { @@ -846,6 +852,7 @@ def test_replace_mixed(self, float_string_frame): result = df.replace([1, 2], ["foo", "bar"]) tm.assert_frame_equal(result, expected) + def test_replace_mixed3(self): # test case from df = DataFrame( {"A": Series([3, 0], dtype="int64"), "B": Series([0, 3], dtype="int64")} From 9a9916a397da4144cb53bdaaf9c97344fee9cbc6 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 26 Feb 2021 07:31:59 -0800 Subject: [PATCH 2/2] split before casting --- pandas/core/internals/blocks.py | 11 +++++++++++ pandas/tests/frame/methods/test_replace.py | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 9a06d67777c98..b65043be6fda6 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -813,6 +813,17 @@ def replace( return [self] if inplace else [self.copy()] if not self._can_hold_element(value): + if self.ndim == 2 and self.shape[0] > 1: + # split so that we only upcast where necessary + nbs = self._split() + res_blocks = extend_blocks( + [ + blk.replace(to_replace, value, inplace=inplace, regex=regex) + for blk in nbs + ] + ) + return res_blocks + blk = self.coerce_to_target_dtype(value) return blk.replace( to_replace=to_replace, diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 6080c52645327..6d1e90e2f9646 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -807,7 +807,7 @@ def test_replace_mixed_int_block_upcasting(self): def test_replace_mixed_int_block_splitting(self): - # int block (non)-splitting + # int block splitting df = DataFrame( { "A": Series([1.0, 2.0], dtype="float64"), @@ -819,7 +819,7 @@ def test_replace_mixed_int_block_splitting(self): { "A": Series([1.0, 2.0], dtype="float64"), "B": Series([0.5, 1], dtype="float64"), - "C": Series([1, 2], dtype="float64"), + "C": Series([1, 2], dtype="int64"), } ) result = df.replace(0, 0.5)