Skip to content

Commit c93c30f

Browse files
jbrockmendelfeefladder
authored andcommitted
PERF: extract_array earlier in DataFrame construction (pandas-dev#42774)
1 parent b7ce336 commit c93c30f

File tree

6 files changed

+10
-31
lines changed

6 files changed

+10
-31
lines changed

pandas/core/construction.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -420,9 +420,9 @@ def extract_array(
420420
return obj._values
421421
return obj
422422

423-
obj = obj.array
423+
obj = obj._values
424424

425-
if extract_numpy and isinstance(obj, ABCPandasArray):
425+
elif extract_numpy and isinstance(obj, ABCPandasArray):
426426
obj = obj.to_numpy()
427427

428428
return obj

pandas/core/internals/blocks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1920,11 +1920,11 @@ def get_block_type(values, dtype: DtypeObj | None = None):
19201920

19211921

19221922
def new_block(values, placement, *, ndim: int, klass=None) -> Block:
1923+
# caller is responsible for ensuring values is NOT a PandasArray
19231924

19241925
if not isinstance(placement, BlockPlacement):
19251926
placement = BlockPlacement(placement)
19261927

1927-
values, _ = extract_pandas_array(values, None, ndim)
19281928
check_ndim(values, placement, ndim)
19291929

19301930
if klass is None:

pandas/core/internals/managers.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -1779,11 +1779,6 @@ def create_block_manager_from_blocks(
17791779
return mgr
17801780

17811781

1782-
# We define this here so we can override it in tests.extension.test_numpy
1783-
def _extract_array(obj):
1784-
return extract_array(obj, extract_numpy=True)
1785-
1786-
17871782
def create_block_manager_from_arrays(
17881783
arrays,
17891784
names: Index,
@@ -1795,7 +1790,7 @@ def create_block_manager_from_arrays(
17951790
# assert isinstance(axes, list)
17961791
# assert all(isinstance(x, Index) for x in axes)
17971792

1798-
arrays = [_extract_array(x) for x in arrays]
1793+
arrays = [extract_array(x, extract_numpy=True) for x in arrays]
17991794

18001795
try:
18011796
blocks = _form_blocks(arrays, names, axes, consolidate)

pandas/core/strings/accessor.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from pandas.core.dtypes.missing import isna
3535

3636
from pandas.core.base import NoNewAttributesMixin
37+
from pandas.core.construction import extract_array
3738

3839
if TYPE_CHECKING:
3940
from pandas import (
@@ -213,10 +214,7 @@ def _validate(data):
213214
# see _libs/lib.pyx for list of inferred types
214215
allowed_types = ["string", "empty", "bytes", "mixed", "mixed-integer"]
215216

216-
# TODO: avoid kludge for tests.extension.test_numpy
217-
from pandas.core.internals.managers import _extract_array
218-
219-
data = _extract_array(data)
217+
data = extract_array(data)
220218

221219
values = getattr(data, "categories", data) # categorical / normal
222220

pandas/tests/extension/test_numpy.py

+1-17
Original file line numberDiff line numberDiff line change
@@ -23,32 +23,17 @@
2323
ExtensionDtype,
2424
PandasDtype,
2525
)
26-
from pandas.core.dtypes.generic import ABCPandasArray
2726

2827
import pandas as pd
2928
import pandas._testing as tm
3029
from pandas.core.arrays.numpy_ import PandasArray
31-
from pandas.core.internals import (
32-
blocks,
33-
managers,
34-
)
30+
from pandas.core.internals import blocks
3531
from pandas.tests.extension import base
3632

3733
# TODO(ArrayManager) PandasArray
3834
pytestmark = td.skip_array_manager_not_yet_implemented
3935

4036

41-
def _extract_array_patched(obj):
42-
if isinstance(obj, (pd.Index, pd.Series)):
43-
obj = obj._values
44-
if isinstance(obj, ABCPandasArray):
45-
# TODO for reasons unclear, we get here in a couple of tests
46-
# with PandasArray._typ *not* patched
47-
obj = obj.to_numpy()
48-
49-
return obj
50-
51-
5237
def _can_hold_element_patched(obj, element) -> bool:
5338
if isinstance(element, PandasArray):
5439
element = element.to_numpy()
@@ -98,7 +83,6 @@ def allow_in_pandas(monkeypatch):
9883
"""
9984
with monkeypatch.context() as m:
10085
m.setattr(PandasArray, "_typ", "extension")
101-
m.setattr(managers, "_extract_array", _extract_array_patched)
10286
m.setattr(blocks, "can_hold_element", _can_hold_element_patched)
10387
m.setattr(tm.asserters, "assert_attr_equal", _assert_attr_equal)
10488
yield

pandas/tests/internals/test_internals.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1376,9 +1376,11 @@ def test_make_block_no_pandas_array(block_maker):
13761376
# PandasArray, no dtype
13771377
result = block_maker(arr, slice(len(arr)), ndim=arr.ndim)
13781378
assert result.dtype.kind in ["i", "u"]
1379-
assert result.is_extension is False
13801379

13811380
if block_maker is make_block:
1381+
# new_block requires caller to unwrap PandasArray
1382+
assert result.is_extension is False
1383+
13821384
# PandasArray, PandasDtype
13831385
result = block_maker(arr, slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim)
13841386
assert result.dtype.kind in ["i", "u"]

0 commit comments

Comments
 (0)