From 997e8ae723cd64683b8e799f3d9d7c8a21bf7cb0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 Mar 2021 14:52:50 +0100 Subject: [PATCH 1/3] Revert "DEPR: CategoricalBlock; combine Block.replace methods (#40527)" This reverts commit 09e2036a63300cc14cb99fc0c839aff8dd80f695. --- pandas/core/internals/__init__.py | 17 +------- pandas/core/internals/blocks.py | 64 ++++++++++++++++++++---------- pandas/core/internals/managers.py | 8 ++++ pandas/tests/internals/test_api.py | 1 + pandas/tests/io/test_common.py | 3 -- pandas/tests/io/test_feather.py | 1 - pandas/tests/io/test_parquet.py | 1 - 7 files changed, 53 insertions(+), 42 deletions(-) diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index 18e584575bc97..e6ed69107fc44 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -9,6 +9,7 @@ ) from pandas.core.internals.blocks import ( # io.pytables, io.packers Block, + CategoricalBlock, DatetimeBlock, DatetimeTZBlock, ExtensionBlock, @@ -26,6 +27,7 @@ __all__ = [ "Block", + "CategoricalBlock", "NumericBlock", "DatetimeBlock", "DatetimeTZBlock", @@ -44,18 +46,3 @@ "create_block_manager_from_arrays", "create_block_manager_from_blocks", ] - - -def __getattr__(name: str): - import warnings - - if name == "CategoricalBlock": - warnings.warn( - "CategoricalBlock is deprecated and will be removed in a future version. " - "Use ExtensionBlock instead.", - DeprecationWarning, - stacklevel=2, - ) - return ExtensionBlock - - raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'") diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c13eb3f109354..4b5d4a737f0f0 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -678,7 +678,6 @@ def copy(self, deep: bool = True): # --------------------------------------------------------------------- # Replace - @final def replace( self, to_replace, @@ -693,23 +692,6 @@ def replace( """ inplace = validate_bool_kwarg(inplace, "inplace") - # Note: the checks we do in NDFrame.replace ensure we never get - # here with listlike to_replace or value, as those cases - # go through _replace_list - - values = self.values - - if isinstance(values, Categorical): - # TODO: avoid special-casing - blk = self if inplace else self.copy() - blk.values.replace(to_replace, value, inplace=True) - return [blk] - - regex = should_use_regex(regex, to_replace) - - if regex: - return self._replace_regex(to_replace, value, inplace=inplace) - if not self._can_hold_element(to_replace): # We cannot hold `to_replace`, so we know immediately that # replacing it is a no-op. @@ -717,6 +699,8 @@ def replace( # replace_list instead of replace. return [self] if inplace else [self.copy()] + values = self.values + mask = missing.mask_missing(values, to_replace) if not mask.any(): # Note: we get here with test_replace_extension_other incorrectly @@ -741,7 +725,7 @@ def replace( else: # split so that we only upcast where necessary return self.split_and_operate( - type(self).replace, to_replace, value, inplace=True, regex=regex + type(self).replace, to_replace, value, inplace=inplace, regex=regex ) @final @@ -1244,7 +1228,7 @@ def take_nd( Take values according to indexer and return them as a block.bb """ - # algos.take_nd dispatches for DatetimeTZBlock + # algos.take_nd dispatches for DatetimeTZBlock, CategoricalBlock # so need to preserve types # sparse is treated like an ndarray, but needs .get_values() shaping @@ -1443,7 +1427,7 @@ class ExtensionBlock(Block): Notes ----- This holds all 3rd-party extension array types. It's also the immediate - parent class for our internal extension types' blocks. + parent class for our internal extension types' blocks, CategoricalBlock. ExtensionArrays are limited to 1-D. """ @@ -1595,6 +1579,7 @@ def take_nd( def _can_hold_element(self, element: Any) -> bool: # TODO: We may need to think about pushing this onto the array. + # We're doing the same as CategoricalBlock here. return True def _slice(self, slicer): @@ -2016,6 +2001,41 @@ def _maybe_downcast(self, blocks: List[Block], downcast=None) -> List[Block]: def _can_hold_element(self, element: Any) -> bool: return True + def replace( + self, + to_replace, + value, + inplace: bool = False, + regex: bool = False, + ) -> List[Block]: + # Note: the checks we do in NDFrame.replace ensure we never get + # here with listlike to_replace or value, as those cases + # go through _replace_list + + regex = should_use_regex(regex, to_replace) + + if regex: + return self._replace_regex(to_replace, value, inplace=inplace) + else: + return super().replace(to_replace, value, inplace=inplace, regex=False) + + +class CategoricalBlock(ExtensionBlock): + __slots__ = () + + def replace( + self, + to_replace, + value, + inplace: bool = False, + regex: bool = False, + ) -> List[Block]: + inplace = validate_bool_kwarg(inplace, "inplace") + result = self if inplace else self.copy() + + result.values.replace(to_replace, value, inplace=True) + return [result] + # ----------------------------------------------------------------- # Constructor Helpers @@ -2078,7 +2098,7 @@ def get_block_type(values, dtype: Optional[Dtype] = None): # Need this first(ish) so that Sparse[datetime] is sparse cls = ExtensionBlock elif isinstance(dtype, CategoricalDtype): - cls = ExtensionBlock + cls = CategoricalBlock elif vtype is Timestamp: cls = DatetimeTZBlock elif vtype is Interval or vtype is Period: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index ef2925874c0ac..759500827f344 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -67,6 +67,7 @@ ) from pandas.core.internals.blocks import ( Block, + CategoricalBlock, DatetimeTZBlock, ExtensionBlock, ObjectValuesExtensionBlock, @@ -1860,6 +1861,13 @@ def _form_blocks( object_blocks = _simple_blockify(items_dict["ObjectBlock"], np.object_) blocks.extend(object_blocks) + if len(items_dict["CategoricalBlock"]) > 0: + cat_blocks = [ + new_block(array, klass=CategoricalBlock, placement=i, ndim=2) + for i, array in items_dict["CategoricalBlock"] + ] + blocks.extend(cat_blocks) + if len(items_dict["ExtensionBlock"]): external_blocks = [ new_block(array, klass=ExtensionBlock, placement=i, ndim=2) diff --git a/pandas/tests/internals/test_api.py b/pandas/tests/internals/test_api.py index 60fbd2da70e79..17d7a17f9736b 100644 --- a/pandas/tests/internals/test_api.py +++ b/pandas/tests/internals/test_api.py @@ -26,6 +26,7 @@ def test_namespace(): ] expected = [ "Block", + "CategoricalBlock", "NumericBlock", "DatetimeBlock", "DatetimeTZBlock", diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index d882eb930137b..e530f3e37883a 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -258,9 +258,6 @@ def test_read_expands_user_home_dir( ), ], ) - @pytest.mark.filterwarnings( - "ignore:CategoricalBlock is deprecated:DeprecationWarning" - ) def test_read_fspath_all(self, reader, module, path, datapath): pytest.importorskip(module) path = datapath(*path) diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 81af799640135..ab0b3b08a11e8 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -20,7 +20,6 @@ @filter_sparse @pytest.mark.single -@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:DeprecationWarning") class TestFeather: def check_error_on_write(self, df, exc, err_msg): # check that we are raising the exception diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index e74c915bbaf74..3ef77d2fbacd0 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -573,7 +573,6 @@ def test_write_column_index_nonstring(self, pa): self.check_error_on_write(df, engine, ValueError, msg) -@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:DeprecationWarning") class TestParquetPyArrow(Base): def test_basic(self, pa, df_full): From 4d68f151918c1e9fca317402f3563603e5276ae3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 Mar 2021 14:56:49 +0100 Subject: [PATCH 2/3] add back replace changes --- pandas/core/internals/blocks.py | 55 ++++++++++++--------------------- 1 file changed, 20 insertions(+), 35 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4b5d4a737f0f0..0c92a2f8515fd 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -678,6 +678,7 @@ def copy(self, deep: bool = True): # --------------------------------------------------------------------- # Replace + @final def replace( self, to_replace, @@ -692,6 +693,23 @@ def replace( """ inplace = validate_bool_kwarg(inplace, "inplace") + # Note: the checks we do in NDFrame.replace ensure we never get + # here with listlike to_replace or value, as those cases + # go through _replace_list + + values = self.values + + if isinstance(values, Categorical): + # TODO: avoid special-casing + blk = self if inplace else self.copy() + blk.values.replace(to_replace, value, inplace=True) + return [blk] + + regex = should_use_regex(regex, to_replace) + + if regex: + return self._replace_regex(to_replace, value, inplace=inplace) + if not self._can_hold_element(to_replace): # We cannot hold `to_replace`, so we know immediately that # replacing it is a no-op. @@ -699,8 +717,6 @@ def replace( # replace_list instead of replace. return [self] if inplace else [self.copy()] - values = self.values - mask = missing.mask_missing(values, to_replace) if not mask.any(): # Note: we get here with test_replace_extension_other incorrectly @@ -725,7 +741,7 @@ def replace( else: # split so that we only upcast where necessary return self.split_and_operate( - type(self).replace, to_replace, value, inplace=inplace, regex=regex + type(self).replace, to_replace, value, inplace=True, regex=regex ) @final @@ -1579,7 +1595,6 @@ def take_nd( def _can_hold_element(self, element: Any) -> bool: # TODO: We may need to think about pushing this onto the array. - # We're doing the same as CategoricalBlock here. return True def _slice(self, slicer): @@ -2001,41 +2016,11 @@ def _maybe_downcast(self, blocks: List[Block], downcast=None) -> List[Block]: def _can_hold_element(self, element: Any) -> bool: return True - def replace( - self, - to_replace, - value, - inplace: bool = False, - regex: bool = False, - ) -> List[Block]: - # Note: the checks we do in NDFrame.replace ensure we never get - # here with listlike to_replace or value, as those cases - # go through _replace_list - - regex = should_use_regex(regex, to_replace) - - if regex: - return self._replace_regex(to_replace, value, inplace=inplace) - else: - return super().replace(to_replace, value, inplace=inplace, regex=False) - class CategoricalBlock(ExtensionBlock): + # this Block type is kept for backwards-compatibility __slots__ = () - def replace( - self, - to_replace, - value, - inplace: bool = False, - regex: bool = False, - ) -> List[Block]: - inplace = validate_bool_kwarg(inplace, "inplace") - result = self if inplace else self.copy() - - result.values.replace(to_replace, value, inplace=True) - return [result] - # ----------------------------------------------------------------- # Constructor Helpers From bc8398bcf15b40a3d7b0850aa78cef62c66f8642 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 Mar 2021 15:41:42 +0100 Subject: [PATCH 3/3] keep deprecation in pandas.core.internals.__init__ --- pandas/core/internals/__init__.py | 18 +++++++++++++++++- pandas/tests/internals/test_api.py | 1 - pandas/tests/io/test_common.py | 3 +++ pandas/tests/io/test_feather.py | 1 + pandas/tests/io/test_parquet.py | 1 + 5 files changed, 22 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index e6ed69107fc44..f0018928255e6 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -9,7 +9,6 @@ ) from pandas.core.internals.blocks import ( # io.pytables, io.packers Block, - CategoricalBlock, DatetimeBlock, DatetimeTZBlock, ExtensionBlock, @@ -46,3 +45,20 @@ "create_block_manager_from_arrays", "create_block_manager_from_blocks", ] + + +def __getattr__(name: str): + import warnings + + if name == "CategoricalBlock": + warnings.warn( + "CategoricalBlock is deprecated and will be removed in a future version. " + "Use ExtensionBlock instead.", + DeprecationWarning, + stacklevel=2, + ) + from pandas.core.internals.blocks import CategoricalBlock + + return CategoricalBlock + + raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'") diff --git a/pandas/tests/internals/test_api.py b/pandas/tests/internals/test_api.py index 17d7a17f9736b..60fbd2da70e79 100644 --- a/pandas/tests/internals/test_api.py +++ b/pandas/tests/internals/test_api.py @@ -26,7 +26,6 @@ def test_namespace(): ] expected = [ "Block", - "CategoricalBlock", "NumericBlock", "DatetimeBlock", "DatetimeTZBlock", diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index e530f3e37883a..d882eb930137b 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -258,6 +258,9 @@ def test_read_expands_user_home_dir( ), ], ) + @pytest.mark.filterwarnings( + "ignore:CategoricalBlock is deprecated:DeprecationWarning" + ) def test_read_fspath_all(self, reader, module, path, datapath): pytest.importorskip(module) path = datapath(*path) diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index ab0b3b08a11e8..81af799640135 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -20,6 +20,7 @@ @filter_sparse @pytest.mark.single +@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:DeprecationWarning") class TestFeather: def check_error_on_write(self, df, exc, err_msg): # check that we are raising the exception diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 3ef77d2fbacd0..e74c915bbaf74 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -573,6 +573,7 @@ def test_write_column_index_nonstring(self, pa): self.check_error_on_write(df, engine, ValueError, msg) +@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:DeprecationWarning") class TestParquetPyArrow(Base): def test_basic(self, pa, df_full):