-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
REF: implement NDFrame._from_mgr #52132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
de51d2c
f5dbd91
9815154
714bae7
77c4a1d
4b29645
c0aac8b
da22ff2
15019a5
3ca1ae4
38e5759
a89edb2
78790e5
191a08a
7aba6b5
132894c
68bb080
8b0e785
a125080
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -637,8 +637,30 @@ class DataFrame(NDFrame, OpsMixin): | |
def _constructor(self) -> Callable[..., DataFrame]: | ||
return DataFrame | ||
|
||
def _constructor_from_mgr(self, mgr, axes): | ||
if self._constructor is DataFrame: | ||
# we are pandas.DataFrame (or a subclass that doesn't override _constructor) | ||
return self._from_mgr(mgr, axes=axes) | ||
else: | ||
assert axes is mgr.axes | ||
return self._constructor(mgr) | ||
|
||
_constructor_sliced: Callable[..., Series] = Series | ||
|
||
def _sliced_from_mgr(self, mgr, axes) -> Series: | ||
# https://github.com/pandas-dev/pandas/pull/52132#issuecomment-1481491828 | ||
# This is a short-term implementation that will be replaced | ||
# with self._constructor_sliced._from_mgr(...) | ||
# once downstream packages (geopandas) have had a chance to implement | ||
# their own overrides. | ||
return self._constructor_sliced(mgr) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't this be something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch. i think this is leftover from a previous round of editing. will update. |
||
|
||
def _constructor_sliced_from_mgr(self, mgr, axes): | ||
if self._constructor_sliced is Series: | ||
return self._sliced_from_mgr(mgr, axes) | ||
assert axes is mgr.axes | ||
return self._constructor_sliced(mgr) | ||
|
||
# ---------------------------------------------------------------------- | ||
# Constructors | ||
|
||
|
@@ -3667,9 +3689,9 @@ def _ixs(self, i: int, axis: AxisInt = 0) -> Series: | |
|
||
# if we are a copy, mark as such | ||
copy = isinstance(new_mgr.array, np.ndarray) and new_mgr.array.base is None | ||
result = self._constructor_sliced(new_mgr, name=self.index[i]).__finalize__( | ||
self | ||
) | ||
result = self._constructor_sliced_from_mgr(new_mgr, axes=new_mgr.axes) | ||
result._name = self.index[i] | ||
result = result.__finalize__(self) | ||
result._set_is_copy(self, copy=copy) | ||
return result | ||
|
||
|
@@ -3722,7 +3744,7 @@ def _getitem_nocopy(self, key: list): | |
copy=False, | ||
only_slice=True, | ||
) | ||
return self._constructor(new_mgr) | ||
return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) | ||
|
||
def __getitem__(self, key): | ||
check_dict_or_set_indexers(key) | ||
|
@@ -4261,9 +4283,10 @@ def _box_col_values(self, values: SingleDataManager, loc: int) -> Series: | |
# Lookup in columns so that if e.g. a str datetime was passed | ||
# we attach the Timestamp object as the name. | ||
name = self.columns[loc] | ||
klass = self._constructor_sliced | ||
# We get index=self.index bc values is a SingleDataManager | ||
return klass(values, name=name, fastpath=True).__finalize__(self) | ||
obj = self._constructor_sliced_from_mgr(values, axes=values.axes) | ||
obj._name = name | ||
return obj.__finalize__(self) | ||
|
||
# ---------------------------------------------------------------------- | ||
# Lookup Caching | ||
|
@@ -4737,7 +4760,7 @@ def predicate(arr: ArrayLike) -> bool: | |
return True | ||
|
||
mgr = self._mgr._get_data_subset(predicate).copy(deep=None) | ||
return type(self)(mgr).__finalize__(self) | ||
return self._constructor_from_mgr(mgr, axes=mgr.axes).__finalize__(self) | ||
|
||
def insert( | ||
self, | ||
|
@@ -5551,7 +5574,7 @@ def shift( | |
fill_value=fill_value, | ||
allow_dups=True, | ||
) | ||
res_df = self._constructor(mgr) | ||
res_df = self._constructor_from_mgr(mgr, axes=mgr.axes) | ||
return res_df.__finalize__(self, method="shift") | ||
|
||
return super().shift( | ||
|
@@ -6079,7 +6102,8 @@ class max type | |
|
||
@doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) | ||
def isna(self) -> DataFrame: | ||
result = self._constructor(self._mgr.isna(func=isna)) | ||
res_mgr = self._mgr.isna(func=isna) | ||
result = self._constructor_from_mgr(res_mgr, axes=res_mgr.axes) | ||
return result.__finalize__(self, method="isna") | ||
|
||
@doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) | ||
|
@@ -6791,7 +6815,7 @@ def sort_values( | |
self._get_block_manager_axis(axis), default_index(len(indexer)) | ||
) | ||
|
||
result = self._constructor(new_data) | ||
result = self._constructor_from_mgr(new_data, axes=new_data.axes) | ||
if inplace: | ||
return self._update_inplace(result) | ||
else: | ||
|
@@ -7485,7 +7509,7 @@ def _dispatch_frame_op( | |
if not is_list_like(right): | ||
# i.e. scalar, faster than checking np.ndim(right) == 0 | ||
bm = self._mgr.apply(array_op, right=right) | ||
return self._constructor(bm) | ||
return self._constructor_from_mgr(bm, axes=bm.axes) | ||
|
||
elif isinstance(right, DataFrame): | ||
assert self.index.equals(right.index) | ||
|
@@ -7505,7 +7529,7 @@ def _dispatch_frame_op( | |
right._mgr, # type: ignore[arg-type] | ||
array_op, | ||
) | ||
return self._constructor(bm) | ||
return self._constructor_from_mgr(bm, axes=bm.axes) | ||
|
||
elif isinstance(right, Series) and axis == 1: | ||
# axis=1 means we want to operate row-by-row | ||
|
@@ -9474,7 +9498,9 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: | |
axis = 0 | ||
|
||
new_data = self._mgr.diff(n=periods, axis=axis) | ||
return self._constructor(new_data).__finalize__(self, "diff") | ||
return self._constructor_from_mgr(new_data, axes=new_data.axes).__finalize__( | ||
self, "diff" | ||
) | ||
|
||
# ---------------------------------------------------------------------- | ||
# Function application | ||
|
@@ -10330,12 +10356,13 @@ def _series_round(ser: Series, decimals: int) -> Series: | |
# Dispatch to Block.round | ||
# Argument "decimals" to "round" of "BaseBlockManager" has incompatible | ||
# type "Union[int, integer[Any]]"; expected "int" | ||
return self._constructor( | ||
self._mgr.round( | ||
decimals=decimals, # type: ignore[arg-type] | ||
using_cow=using_copy_on_write(), | ||
), | ||
).__finalize__(self, method="round") | ||
new_mgr = self._mgr.round( | ||
decimals=decimals, # type: ignore[arg-type] | ||
using_cow=using_copy_on_write(), | ||
) | ||
return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__( | ||
self, method="round" | ||
) | ||
else: | ||
raise TypeError("decimals must be an integer, a dict-like or a Series") | ||
|
||
|
@@ -10888,7 +10915,7 @@ def _get_data() -> DataFrame: | |
# After possibly _get_data and transposing, we are now in the | ||
# simple case where we can use BlockManager.reduce | ||
res = df._mgr.reduce(blk_func) | ||
out = df._constructor(res).iloc[0] | ||
out = df._constructor_from_mgr(res, axes=res.axes).iloc[0] | ||
if out_dtype is not None: | ||
out = out.astype(out_dtype) | ||
elif (df._mgr.get_dtypes() == object).any(): | ||
|
@@ -11502,7 +11529,7 @@ def quantile( | |
res = data._mgr.take(indexer[q_idx], verify=False) | ||
res.axes[1] = q | ||
|
||
result = self._constructor(res) | ||
result = self._constructor_from_mgr(res, axes=res.axes) | ||
return result.__finalize__(self, method="quantile") | ||
|
||
def to_timestamp( | ||
|
@@ -11824,7 +11851,7 @@ def _to_dict_of_blocks(self, copy: bool = True): | |
mgr = mgr_to_mgr(mgr, "block") | ||
mgr = cast(BlockManager, mgr) | ||
return { | ||
k: self._constructor(v).__finalize__(self) | ||
k: self._constructor_from_mgr(v, axes=v.axes).__finalize__(self) | ||
for k, v, in mgr.to_dict(copy=copy).items() | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this assert needed here? (it's also not done in
_from_mgr
)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not strictly. ATM _from_mgr is documented as requiring them to match, so this assertion seemed like an easy way of making it required along this path too. could remove the assertion and document the requirement in the docstring