Skip to content

Commit af27d92

Browse files
jbrockmendelim-vinicius
authored and
im-vinicius
committed
REF: implement NDFrame._from_mgr (pandas-dev#52132)
1 parent d9595b7 commit af27d92

File tree

11 files changed

+155
-73
lines changed

11 files changed

+155
-73
lines changed

pandas/core/apply.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -735,7 +735,7 @@ def apply(self) -> DataFrame | Series:
735735
with np.errstate(all="ignore"):
736736
results = self.obj._mgr.apply("apply", func=self.func)
737737
# _constructor will retain self.index and self.columns
738-
return self.obj._constructor(data=results)
738+
return self.obj._constructor_from_mgr(results, axes=results.axes)
739739

740740
# broadcasting
741741
if self.result_type == "broadcast":

pandas/core/arraylike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ def _reconstruct(result):
349349
return result
350350
if isinstance(result, BlockManager):
351351
# we went through BlockManager.apply e.g. np.sqrt
352-
result = self._constructor(result, **reconstruct_kwargs, copy=False)
352+
result = self._constructor_from_mgr(result, axes=result.axes)
353353
else:
354354
# we converted an array, lost our axes
355355
result = self._constructor(

pandas/core/frame.py

+43-22
Original file line numberDiff line numberDiff line change
@@ -637,8 +637,25 @@ class DataFrame(NDFrame, OpsMixin):
637637
def _constructor(self) -> Callable[..., DataFrame]:
638638
return DataFrame
639639

640+
def _constructor_from_mgr(self, mgr, axes):
641+
if self._constructor is DataFrame:
642+
# we are pandas.DataFrame (or a subclass that doesn't override _constructor)
643+
return self._from_mgr(mgr, axes=axes)
644+
else:
645+
assert axes is mgr.axes
646+
return self._constructor(mgr)
647+
640648
_constructor_sliced: Callable[..., Series] = Series
641649

650+
def _sliced_from_mgr(self, mgr, axes) -> Series:
651+
return Series._from_mgr(mgr, axes)
652+
653+
def _constructor_sliced_from_mgr(self, mgr, axes):
654+
if self._constructor_sliced is Series:
655+
return self._sliced_from_mgr(mgr, axes)
656+
assert axes is mgr.axes
657+
return self._constructor_sliced(mgr)
658+
642659
# ----------------------------------------------------------------------
643660
# Constructors
644661

@@ -3668,9 +3685,9 @@ def _ixs(self, i: int, axis: AxisInt = 0) -> Series:
36683685

36693686
# if we are a copy, mark as such
36703687
copy = isinstance(new_mgr.array, np.ndarray) and new_mgr.array.base is None
3671-
result = self._constructor_sliced(new_mgr, name=self.index[i]).__finalize__(
3672-
self
3673-
)
3688+
result = self._constructor_sliced_from_mgr(new_mgr, axes=new_mgr.axes)
3689+
result._name = self.index[i]
3690+
result = result.__finalize__(self)
36743691
result._set_is_copy(self, copy=copy)
36753692
return result
36763693

@@ -3723,7 +3740,7 @@ def _getitem_nocopy(self, key: list):
37233740
copy=False,
37243741
only_slice=True,
37253742
)
3726-
return self._constructor(new_mgr)
3743+
return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes)
37273744

37283745
def __getitem__(self, key):
37293746
check_dict_or_set_indexers(key)
@@ -4259,9 +4276,10 @@ def _box_col_values(self, values: SingleDataManager, loc: int) -> Series:
42594276
# Lookup in columns so that if e.g. a str datetime was passed
42604277
# we attach the Timestamp object as the name.
42614278
name = self.columns[loc]
4262-
klass = self._constructor_sliced
42634279
# We get index=self.index bc values is a SingleDataManager
4264-
return klass(values, name=name, fastpath=True).__finalize__(self)
4280+
obj = self._constructor_sliced_from_mgr(values, axes=values.axes)
4281+
obj._name = name
4282+
return obj.__finalize__(self)
42654283

42664284
# ----------------------------------------------------------------------
42674285
# Lookup Caching
@@ -4735,7 +4753,7 @@ def predicate(arr: ArrayLike) -> bool:
47354753
return True
47364754

47374755
mgr = self._mgr._get_data_subset(predicate).copy(deep=None)
4738-
return type(self)(mgr).__finalize__(self)
4756+
return self._constructor_from_mgr(mgr, axes=mgr.axes).__finalize__(self)
47394757

47404758
def insert(
47414759
self,
@@ -5547,7 +5565,7 @@ def shift(
55475565
fill_value=fill_value,
55485566
allow_dups=True,
55495567
)
5550-
res_df = self._constructor(mgr)
5568+
res_df = self._constructor_from_mgr(mgr, axes=mgr.axes)
55515569
return res_df.__finalize__(self, method="shift")
55525570

55535571
return super().shift(
@@ -6075,7 +6093,8 @@ class max type
60756093

60766094
@doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"])
60776095
def isna(self) -> DataFrame:
6078-
result = self._constructor(self._mgr.isna(func=isna))
6096+
res_mgr = self._mgr.isna(func=isna)
6097+
result = self._constructor_from_mgr(res_mgr, axes=res_mgr.axes)
60796098
return result.__finalize__(self, method="isna")
60806099

60816100
@doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"])
@@ -6787,7 +6806,7 @@ def sort_values(
67876806
self._get_block_manager_axis(axis), default_index(len(indexer))
67886807
)
67896808

6790-
result = self._constructor(new_data)
6809+
result = self._constructor_from_mgr(new_data, axes=new_data.axes)
67916810
if inplace:
67926811
return self._update_inplace(result)
67936812
else:
@@ -7481,7 +7500,7 @@ def _dispatch_frame_op(
74817500
if not is_list_like(right):
74827501
# i.e. scalar, faster than checking np.ndim(right) == 0
74837502
bm = self._mgr.apply(array_op, right=right)
7484-
return self._constructor(bm)
7503+
return self._constructor_from_mgr(bm, axes=bm.axes)
74857504

74867505
elif isinstance(right, DataFrame):
74877506
assert self.index.equals(right.index)
@@ -7501,7 +7520,7 @@ def _dispatch_frame_op(
75017520
right._mgr, # type: ignore[arg-type]
75027521
array_op,
75037522
)
7504-
return self._constructor(bm)
7523+
return self._constructor_from_mgr(bm, axes=bm.axes)
75057524

75067525
elif isinstance(right, Series) and axis == 1:
75077526
# axis=1 means we want to operate row-by-row
@@ -9480,7 +9499,8 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame:
94809499
axis = 0
94819500

94829501
new_data = self._mgr.diff(n=periods)
9483-
return self._constructor(new_data).__finalize__(self, "diff")
9502+
res_df = self._constructor_from_mgr(new_data, axes=new_data.axes)
9503+
return res_df.__finalize__(self, "diff")
94849504

94859505
# ----------------------------------------------------------------------
94869506
# Function application
@@ -10336,12 +10356,13 @@ def _series_round(ser: Series, decimals: int) -> Series:
1033610356
# Dispatch to Block.round
1033710357
# Argument "decimals" to "round" of "BaseBlockManager" has incompatible
1033810358
# type "Union[int, integer[Any]]"; expected "int"
10339-
return self._constructor(
10340-
self._mgr.round(
10341-
decimals=decimals, # type: ignore[arg-type]
10342-
using_cow=using_copy_on_write(),
10343-
),
10344-
).__finalize__(self, method="round")
10359+
new_mgr = self._mgr.round(
10360+
decimals=decimals, # type: ignore[arg-type]
10361+
using_cow=using_copy_on_write(),
10362+
)
10363+
return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(
10364+
self, method="round"
10365+
)
1034510366
else:
1034610367
raise TypeError("decimals must be an integer, a dict-like or a Series")
1034710368

@@ -10893,7 +10914,7 @@ def _get_data() -> DataFrame:
1089310914
# After possibly _get_data and transposing, we are now in the
1089410915
# simple case where we can use BlockManager.reduce
1089510916
res = df._mgr.reduce(blk_func)
10896-
out = df._constructor(res).iloc[0]
10917+
out = df._constructor_from_mgr(res, axes=res.axes).iloc[0]
1089710918
if out_dtype is not None:
1089810919
out = out.astype(out_dtype)
1089910920
elif (df._mgr.get_dtypes() == object).any():
@@ -11507,7 +11528,7 @@ def quantile(
1150711528
res = data._mgr.take(indexer[q_idx], verify=False)
1150811529
res.axes[1] = q
1150911530

11510-
result = self._constructor(res)
11531+
result = self._constructor_from_mgr(res, axes=res.axes)
1151111532
return result.__finalize__(self, method="quantile")
1151211533

1151311534
def to_timestamp(
@@ -11835,7 +11856,7 @@ def _to_dict_of_blocks(self, copy: bool = True):
1183511856
mgr = mgr_to_mgr(mgr, "block")
1183611857
mgr = cast(BlockManager, mgr)
1183711858
return {
11838-
k: self._constructor(v).__finalize__(self)
11859+
k: self._constructor_from_mgr(v, axes=v.axes).__finalize__(self)
1183911860
for k, v, in mgr.to_dict(copy=copy).items()
1184011861
}
1184111862

0 commit comments

Comments
 (0)