Skip to content

Commit e16ff8e

Browse files
ngoldbaumim-vinicius
authored and
im-vinicius
committed
CLN: unify NumpyBlock, ObjectBlock, and NumericBlock (pandas-dev#52817)
* CLN: unify NumpyBlock, ObjectBlock, and NumericBlock * CLN: respond to review comments * CLN: deprecate ObjectBlock and NumericBlock * CLN: appease mypy * CLN: remove out-of-date reference to maybe_split * CLN: respond to review comments * CLN: remove NumpyBlock from the semi-public API * CLN: test for is_numeric in block internals tests
1 parent 43ceef4 commit e16ff8e

File tree

6 files changed

+93
-74
lines changed

6 files changed

+93
-74
lines changed

pandas/core/internals/__init__.py

+24-4
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
Block,
1212
DatetimeTZBlock,
1313
ExtensionBlock,
14-
NumericBlock,
15-
ObjectBlock,
1614
)
1715
from pandas.core.internals.concat import concatenate_managers
1816
from pandas.core.internals.managers import (
@@ -23,10 +21,8 @@
2321

2422
__all__ = [
2523
"Block",
26-
"NumericBlock",
2724
"DatetimeTZBlock",
2825
"ExtensionBlock",
29-
"ObjectBlock",
3026
"make_block",
3127
"DataManager",
3228
"ArrayManager",
@@ -38,3 +34,27 @@
3834
# this is preserved here for downstream compatibility (GH-33892)
3935
"create_block_manager_from_blocks",
4036
]
37+
38+
39+
def __getattr__(name: str):
40+
import warnings
41+
42+
from pandas.util._exceptions import find_stack_level
43+
44+
if name in ["NumericBlock", "ObjectBlock"]:
45+
warnings.warn(
46+
f"{name} is deprecated and will be removed in a future version. "
47+
"Use public APIs instead.",
48+
DeprecationWarning,
49+
stacklevel=find_stack_level(),
50+
)
51+
if name == "NumericBlock":
52+
from pandas.core.internals.blocks import NumericBlock
53+
54+
return NumericBlock
55+
else:
56+
from pandas.core.internals.blocks import ObjectBlock
57+
58+
return ObjectBlock
59+
60+
raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'")

pandas/core/internals/blocks.py

+53-56
Original file line numberDiff line numberDiff line change
@@ -469,13 +469,36 @@ def convert(
469469
using_cow: bool = False,
470470
) -> list[Block]:
471471
"""
472-
attempt to coerce any object types to better types return a copy
473-
of the block (if copy = True) by definition we are not an ObjectBlock
474-
here!
472+
Attempt to coerce any object types to better types. Return a copy
473+
of the block (if copy = True).
475474
"""
476-
if not copy and using_cow:
477-
return [self.copy(deep=False)]
478-
return [self.copy()] if copy else [self]
475+
if not self.is_object:
476+
if not copy and using_cow:
477+
return [self.copy(deep=False)]
478+
return [self.copy()] if copy else [self]
479+
480+
if self.ndim != 1 and self.shape[0] != 1:
481+
return self.split_and_operate(Block.convert, copy=copy, using_cow=using_cow)
482+
483+
values = self.values
484+
if values.ndim == 2:
485+
# the check above ensures we only get here with values.shape[0] == 1,
486+
# avoid doing .ravel as that might make a copy
487+
values = values[0]
488+
489+
res_values = lib.maybe_convert_objects(
490+
values, # type: ignore[arg-type]
491+
convert_non_numeric=True,
492+
)
493+
refs = None
494+
if copy and res_values is values:
495+
res_values = values.copy()
496+
elif res_values is values and using_cow:
497+
refs = self.refs
498+
499+
res_values = ensure_block_shape(res_values, self.ndim)
500+
res_values = maybe_coerce_values(res_values)
501+
return [self.make_block(res_values, refs=refs)]
479502

480503
# ---------------------------------------------------------------------
481504
# Array-Like Methods
@@ -680,7 +703,7 @@ def _replace_regex(
680703
List[Block]
681704
"""
682705
if not self._can_hold_element(to_replace):
683-
# i.e. only ObjectBlock, but could in principle include a
706+
# i.e. only if self.is_object is True, but could in principle include a
684707
# String ExtensionBlock
685708
if using_cow:
686709
return [self.copy(deep=False)]
@@ -1273,7 +1296,7 @@ def fillna(
12731296
) -> list[Block]:
12741297
"""
12751298
fillna on the block with the value. If we fail, then convert to
1276-
ObjectBlock and try again
1299+
block to hold objects instead and try again
12771300
"""
12781301
# Caller is responsible for validating limit; if int it is strictly positive
12791302
inplace = validate_bool_kwarg(inplace, "inplace")
@@ -2064,7 +2087,7 @@ def _unstack(
20642087
needs_masking: npt.NDArray[np.bool_],
20652088
):
20662089
# ExtensionArray-safe unstack.
2067-
# We override ObjectBlock._unstack, which unstacks directly on the
2090+
# We override Block._unstack, which unstacks directly on the
20682091
# values of the array. For EA-backed blocks, this would require
20692092
# converting to a 2-D ndarray of objects.
20702093
# Instead, we unstack an ndarray of integer positions, followed by
@@ -2100,6 +2123,7 @@ def _unstack(
21002123

21012124
class NumpyBlock(libinternals.NumpyBlock, Block):
21022125
values: np.ndarray
2126+
__slots__ = ()
21032127

21042128
@property
21052129
def is_view(self) -> bool:
@@ -2118,10 +2142,28 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
21182142
def values_for_json(self) -> np.ndarray:
21192143
return self.values
21202144

2145+
@cache_readonly
2146+
def is_numeric(self) -> bool: # type: ignore[override]
2147+
dtype = self.values.dtype
2148+
kind = dtype.kind
2149+
2150+
return kind in "fciub"
2151+
2152+
@cache_readonly
2153+
def is_object(self) -> bool: # type: ignore[override]
2154+
return self.values.dtype.kind == "O"
2155+
21212156

21222157
class NumericBlock(NumpyBlock):
2158+
# this Block type is kept for backwards-compatibility
2159+
# TODO(3.0): delete and remove deprecation in __init__.py.
2160+
__slots__ = ()
2161+
2162+
2163+
class ObjectBlock(NumpyBlock):
2164+
# this Block type is kept for backwards-compatibility
2165+
# TODO(3.0): delete and remove deprecation in __init__.py.
21232166
__slots__ = ()
2124-
is_numeric = True
21252167

21262168

21272169
class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock):
@@ -2257,49 +2299,6 @@ class DatetimeTZBlock(DatetimeLikeBlock):
22572299
values_for_json = NDArrayBackedExtensionBlock.values_for_json
22582300

22592301

2260-
class ObjectBlock(NumpyBlock):
2261-
__slots__ = ()
2262-
is_object = True
2263-
2264-
@maybe_split
2265-
def convert(
2266-
self,
2267-
*,
2268-
copy: bool = True,
2269-
using_cow: bool = False,
2270-
) -> list[Block]:
2271-
"""
2272-
attempt to cast any object types to better types return a copy of
2273-
the block (if copy = True) by definition we ARE an ObjectBlock!!!!!
2274-
"""
2275-
if self.dtype != _dtype_obj:
2276-
# GH#50067 this should be impossible in ObjectBlock, but until
2277-
# that is fixed, we short-circuit here.
2278-
if using_cow:
2279-
return [self.copy(deep=False)]
2280-
return [self]
2281-
2282-
values = self.values
2283-
if values.ndim == 2:
2284-
# maybe_split ensures we only get here with values.shape[0] == 1,
2285-
# avoid doing .ravel as that might make a copy
2286-
values = values[0]
2287-
2288-
res_values = lib.maybe_convert_objects(
2289-
values,
2290-
convert_non_numeric=True,
2291-
)
2292-
refs = None
2293-
if copy and res_values is values:
2294-
res_values = values.copy()
2295-
elif res_values is values and using_cow:
2296-
refs = self.refs
2297-
2298-
res_values = ensure_block_shape(res_values, self.ndim)
2299-
res_values = maybe_coerce_values(res_values)
2300-
return [self.make_block(res_values, refs=refs)]
2301-
2302-
23032302
# -----------------------------------------------------------------
23042303
# Constructor Helpers
23052304

@@ -2358,10 +2357,8 @@ def get_block_type(dtype: DtypeObj) -> type[Block]:
23582357
kind = dtype.kind
23592358
if kind in "Mm":
23602359
return DatetimeLikeBlock
2361-
elif kind in "fciub":
2362-
return NumericBlock
23632360

2364-
return ObjectBlock
2361+
return NumpyBlock
23652362

23662363

23672364
def new_block_2d(

pandas/tests/extension/base/casting.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pandas.util._test_decorators as td
55

66
import pandas as pd
7-
from pandas.core.internals import ObjectBlock
7+
from pandas.core.internals.blocks import NumpyBlock
88
from pandas.tests.extension.base.base import BaseExtensionTests
99

1010

@@ -16,7 +16,9 @@ def test_astype_object_series(self, all_data):
1616
result = ser.astype(object)
1717
assert result.dtype == np.dtype(object)
1818
if hasattr(result._mgr, "blocks"):
19-
assert isinstance(result._mgr.blocks[0], ObjectBlock)
19+
blk = result._mgr.blocks[0]
20+
assert isinstance(blk, NumpyBlock)
21+
assert blk.is_object
2022
assert isinstance(result._mgr.array, np.ndarray)
2123
assert result._mgr.array.dtype == np.dtype(object)
2224

@@ -26,7 +28,8 @@ def test_astype_object_frame(self, all_data):
2628
result = df.astype(object)
2729
if hasattr(result._mgr, "blocks"):
2830
blk = result._mgr.blocks[0]
29-
assert isinstance(blk, ObjectBlock), type(blk)
31+
assert isinstance(blk, NumpyBlock), type(blk)
32+
assert blk.is_object
3033
assert isinstance(result._mgr.arrays[0], np.ndarray)
3134
assert result._mgr.arrays[0].dtype == np.dtype(object)
3235

pandas/tests/frame/test_block_internals.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,7 @@
2020
option_context,
2121
)
2222
import pandas._testing as tm
23-
from pandas.core.internals import (
24-
NumericBlock,
25-
ObjectBlock,
26-
)
23+
from pandas.core.internals.blocks import NumpyBlock
2724

2825
# Segregated collection of methods that require the BlockManager internal data
2926
# structure
@@ -387,7 +384,8 @@ def test_constructor_no_pandas_array(self):
387384
result = DataFrame({"A": arr})
388385
expected = DataFrame({"A": [1, 2, 3]})
389386
tm.assert_frame_equal(result, expected)
390-
assert isinstance(result._mgr.blocks[0], NumericBlock)
387+
assert isinstance(result._mgr.blocks[0], NumpyBlock)
388+
assert result._mgr.blocks[0].is_numeric
391389

392390
def test_add_column_with_pandas_array(self):
393391
# GH 26390
@@ -400,8 +398,10 @@ def test_add_column_with_pandas_array(self):
400398
"c": pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)),
401399
}
402400
)
403-
assert type(df["c"]._mgr.blocks[0]) == ObjectBlock
404-
assert type(df2["c"]._mgr.blocks[0]) == ObjectBlock
401+
assert type(df["c"]._mgr.blocks[0]) == NumpyBlock
402+
assert df["c"]._mgr.blocks[0].is_object
403+
assert type(df2["c"]._mgr.blocks[0]) == NumpyBlock
404+
assert df2["c"]._mgr.blocks[0].is_object
405405
tm.assert_frame_equal(df, df2)
406406

407407

pandas/tests/internals/test_api.py

-2
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,8 @@ def test_namespace():
2727
]
2828
expected = [
2929
"Block",
30-
"NumericBlock",
3130
"DatetimeTZBlock",
3231
"ExtensionBlock",
33-
"ObjectBlock",
3432
"make_block",
3533
"DataManager",
3634
"ArrayManager",

pandas/tests/series/test_constructors.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
IntervalArray,
4747
period_array,
4848
)
49-
from pandas.core.internals.blocks import NumericBlock
49+
from pandas.core.internals.blocks import NumpyBlock
5050

5151

5252
class TestSeriesConstructors:
@@ -2098,7 +2098,8 @@ def test_constructor_no_pandas_array(self, using_array_manager):
20982098
result = Series(ser.array)
20992099
tm.assert_series_equal(ser, result)
21002100
if not using_array_manager:
2101-
assert isinstance(result._mgr.blocks[0], NumericBlock)
2101+
assert isinstance(result._mgr.blocks[0], NumpyBlock)
2102+
assert result._mgr.blocks[0].is_numeric
21022103

21032104
@td.skip_array_manager_invalid_test
21042105
def test_from_array(self):

0 commit comments

Comments
 (0)