Skip to content

Commit 09e2036

Browse files
authored
DEPR: CategoricalBlock; combine Block.replace methods (pandas-dev#40527)
1 parent 38640d1 commit 09e2036

File tree

7 files changed

+40
-53
lines changed

7 files changed

+40
-53
lines changed

pandas/core/internals/__init__.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
)
1010
from pandas.core.internals.blocks import ( # io.pytables, io.packers
1111
Block,
12-
CategoricalBlock,
1312
DatetimeBlock,
1413
DatetimeTZBlock,
1514
ExtensionBlock,
@@ -28,7 +27,6 @@
2827

2928
__all__ = [
3029
"Block",
31-
"CategoricalBlock",
3230
"NumericBlock",
3331
"DatetimeBlock",
3432
"DatetimeTZBlock",
@@ -48,3 +46,18 @@
4846
"create_block_manager_from_arrays",
4947
"create_block_manager_from_blocks",
5048
]
49+
50+
51+
def __getattr__(name: str):
52+
import warnings
53+
54+
if name == "CategoricalBlock":
55+
warnings.warn(
56+
"CategoricalBlock is deprecated and will be removed in a future version. "
57+
"Use ExtensionBlock instead.",
58+
FutureWarning,
59+
stacklevel=2,
60+
)
61+
return ExtensionBlock
62+
63+
raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'")

pandas/core/internals/blocks.py

+22-42
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,7 @@ def copy(self, deep: bool = True):
673673
# ---------------------------------------------------------------------
674674
# Replace
675675

676+
@final
676677
def replace(
677678
self,
678679
to_replace,
@@ -687,15 +688,30 @@ def replace(
687688
"""
688689
inplace = validate_bool_kwarg(inplace, "inplace")
689690

691+
# Note: the checks we do in NDFrame.replace ensure we never get
692+
# here with listlike to_replace or value, as those cases
693+
# go through _replace_list
694+
695+
values = self.values
696+
697+
if isinstance(values, Categorical):
698+
# TODO: avoid special-casing
699+
blk = self if inplace else self.copy()
700+
blk.values.replace(to_replace, value, inplace=True)
701+
return [blk]
702+
703+
regex = should_use_regex(regex, to_replace)
704+
705+
if regex:
706+
return self._replace_regex(to_replace, value, inplace=inplace)
707+
690708
if not self._can_hold_element(to_replace):
691709
# We cannot hold `to_replace`, so we know immediately that
692710
# replacing it is a no-op.
693711
# Note: If to_replace were a list, NDFrame.replace would call
694712
# replace_list instead of replace.
695713
return [self] if inplace else [self.copy()]
696714

697-
values = self.values
698-
699715
mask = missing.mask_missing(values, to_replace)
700716
if not mask.any():
701717
# Note: we get here with test_replace_extension_other incorrectly
@@ -720,7 +736,7 @@ def replace(
720736
else:
721737
# split so that we only upcast where necessary
722738
return self.split_and_operate(
723-
type(self).replace, to_replace, value, inplace=inplace, regex=regex
739+
type(self).replace, to_replace, value, inplace=True, regex=regex
724740
)
725741

726742
@final
@@ -1223,7 +1239,7 @@ def take_nd(
12231239
Take values according to indexer and return them as a block.bb
12241240
12251241
"""
1226-
# algos.take_nd dispatches for DatetimeTZBlock, CategoricalBlock
1242+
# algos.take_nd dispatches for DatetimeTZBlock
12271243
# so need to preserve types
12281244
# sparse is treated like an ndarray, but needs .get_values() shaping
12291245

@@ -1422,7 +1438,7 @@ class ExtensionBlock(Block):
14221438
Notes
14231439
-----
14241440
This holds all 3rd-party extension array types. It's also the immediate
1425-
parent class for our internal extension types' blocks, CategoricalBlock.
1441+
parent class for our internal extension types' blocks.
14261442
14271443
ExtensionArrays are limited to 1-D.
14281444
"""
@@ -1579,7 +1595,6 @@ def take_nd(
15791595

15801596
def _can_hold_element(self, element: Any) -> bool:
15811597
# TODO: We may need to think about pushing this onto the array.
1582-
# We're doing the same as CategoricalBlock here.
15831598
return True
15841599

15851600
def _slice(self, slicer):
@@ -2019,41 +2034,6 @@ def _maybe_downcast(self, blocks: List[Block], downcast=None) -> List[Block]:
20192034
def _can_hold_element(self, element: Any) -> bool:
20202035
return True
20212036

2022-
def replace(
2023-
self,
2024-
to_replace,
2025-
value,
2026-
inplace: bool = False,
2027-
regex: bool = False,
2028-
) -> List[Block]:
2029-
# Note: the checks we do in NDFrame.replace ensure we never get
2030-
# here with listlike to_replace or value, as those cases
2031-
# go through _replace_list
2032-
2033-
regex = should_use_regex(regex, to_replace)
2034-
2035-
if regex:
2036-
return self._replace_regex(to_replace, value, inplace=inplace)
2037-
else:
2038-
return super().replace(to_replace, value, inplace=inplace, regex=False)
2039-
2040-
2041-
class CategoricalBlock(ExtensionBlock):
2042-
__slots__ = ()
2043-
2044-
def replace(
2045-
self,
2046-
to_replace,
2047-
value,
2048-
inplace: bool = False,
2049-
regex: bool = False,
2050-
) -> List[Block]:
2051-
inplace = validate_bool_kwarg(inplace, "inplace")
2052-
result = self if inplace else self.copy()
2053-
2054-
result.values.replace(to_replace, value, inplace=True)
2055-
return [result]
2056-
20572037

20582038
# -----------------------------------------------------------------
20592039
# Constructor Helpers
@@ -2116,7 +2096,7 @@ def get_block_type(values, dtype: Optional[Dtype] = None):
21162096
# Need this first(ish) so that Sparse[datetime] is sparse
21172097
cls = ExtensionBlock
21182098
elif isinstance(dtype, CategoricalDtype):
2119-
cls = CategoricalBlock
2099+
cls = ExtensionBlock
21202100
elif vtype is Timestamp:
21212101
cls = DatetimeTZBlock
21222102
elif vtype is Interval or vtype is Period:

pandas/core/internals/managers.py

-8
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@
6767
)
6868
from pandas.core.internals.blocks import (
6969
Block,
70-
CategoricalBlock,
7170
DatetimeTZBlock,
7271
ExtensionBlock,
7372
ObjectValuesExtensionBlock,
@@ -1867,13 +1866,6 @@ def _form_blocks(
18671866
object_blocks = _simple_blockify(items_dict["ObjectBlock"], np.object_)
18681867
blocks.extend(object_blocks)
18691868

1870-
if len(items_dict["CategoricalBlock"]) > 0:
1871-
cat_blocks = [
1872-
new_block(array, klass=CategoricalBlock, placement=i, ndim=2)
1873-
for i, array in items_dict["CategoricalBlock"]
1874-
]
1875-
blocks.extend(cat_blocks)
1876-
18771869
if len(items_dict["ExtensionBlock"]):
18781870
external_blocks = [
18791871
new_block(array, klass=ExtensionBlock, placement=i, ndim=2)

pandas/tests/internals/test_api.py

-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ def test_namespace():
2626
]
2727
expected = [
2828
"Block",
29-
"CategoricalBlock",
3029
"NumericBlock",
3130
"DatetimeBlock",
3231
"DatetimeTZBlock",

pandas/tests/io/test_common.py

+1
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ def test_read_expands_user_home_dir(
258258
),
259259
],
260260
)
261+
@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:FutureWarning")
261262
def test_read_fspath_all(self, reader, module, path, datapath):
262263
pytest.importorskip(module)
263264
path = datapath(*path)

pandas/tests/io/test_feather.py

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
@filter_sparse
2222
@pytest.mark.single
23+
@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:FutureWarning")
2324
class TestFeather:
2425
def check_error_on_write(self, df, exc, err_msg):
2526
# check that we are raising the exception

pandas/tests/io/test_parquet.py

+1
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,7 @@ def test_write_column_index_nonstring(self, pa):
573573
self.check_error_on_write(df, engine, ValueError, msg)
574574

575575

576+
@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:FutureWarning")
576577
class TestParquetPyArrow(Base):
577578
def test_basic(self, pa, df_full):
578579

0 commit comments

Comments
 (0)