From de51d2c2ad9d198e8a7e6326768cbf79702247d8 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 22 Mar 2023 19:54:46 -0700 Subject: [PATCH 1/6] REF: implement NDFrame._from_mgr --- doc/source/development/extending.rst | 2 + pandas/_testing/__init__.py | 6 ++ pandas/core/apply.py | 2 +- pandas/core/arraylike.py | 2 +- pandas/core/frame.py | 53 +++++++++-------- pandas/core/generic.py | 89 ++++++++++++++++++---------- pandas/core/groupby/generic.py | 8 ++- pandas/core/groupby/ops.py | 5 +- pandas/core/resample.py | 2 +- pandas/core/reshape/concat.py | 4 +- pandas/core/reshape/merge.py | 4 +- pandas/core/reshape/reshape.py | 2 +- pandas/core/series.py | 7 ++- 13 files changed, 117 insertions(+), 69 deletions(-) diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 1d52a5595472b..f22e75864eba3 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -332,6 +332,8 @@ There are 3 possible constructor properties to be defined on a subclass: * ``DataFrame._constructor_sliced``: Used when a ``DataFrame`` (sub-)class manipulation result should be a ``Series`` (sub-)class. * ``Series._constructor_expanddim``: Used when a ``Series`` (sub-)class manipulation result should be a ``DataFrame`` (sub-)class, e.g. ``Series.to_frame()``. +If a subclass's ``_constructor``, ``_constructor_sliced``, or ``_constructor_expanddim`` do not return a class, then you will also need to override ``_from_mgr``, ``_sliced_from_mgr``, and ``_expanddim_from_mgr``, which return analogous-shaped objects but take only ``BlockManager`` and ``axes`` inputs. + Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame`` overriding constructor properties. .. code-block:: python diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index c49dda2763c83..85047510f678a 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -822,6 +822,9 @@ def _constructor(self): def _constructor_expanddim(self): return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs) + def _expanddim_from_mgr(self, mgr, axes): + return SubclassedDataFrame._from_mgr(mgr, axes) + class SubclassedDataFrame(DataFrame): _metadata = ["testattr"] @@ -834,6 +837,9 @@ def _constructor(self): def _constructor_sliced(self): return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs) + def _sliced_from_mgr(self, mgr, axes): + return SubclassedSeries._from_mgr(mgr, axes) + class SubclassedCategorical(Categorical): @property diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 08618d5a6aa16..5fed07226fe18 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -675,7 +675,7 @@ def apply(self) -> DataFrame | Series: with np.errstate(all="ignore"): results = self.obj._mgr.apply("apply", func=self.f) # _constructor will retain self.index and self.columns - return self.obj._constructor(data=results) + return self.obj._from_mgr(results, axes=results.axes) # broadcasting if self.result_type == "broadcast": diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 1d10d797866f4..fec4143266144 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -349,7 +349,7 @@ def _reconstruct(result): return result if isinstance(result, BlockManager): # we went through BlockManager.apply e.g. np.sqrt - result = self._constructor(result, **reconstruct_kwargs, copy=False) + result = self._from_mgr(result, axes=result.axes) else: # we converted an array, lost our axes result = self._constructor( diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a2e3b6fc10e43..86ddbc7173d95 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -645,6 +645,9 @@ def _constructor(self) -> Callable[..., DataFrame]: _constructor_sliced: Callable[..., Series] = Series + def _sliced_from_mgr(self, mgr, axes) -> Series: + return self._constructor_sliced._from_mgr(mgr, axes=axes) + # ---------------------------------------------------------------------- # Constructors @@ -2335,7 +2338,7 @@ def maybe_reorder( manager = get_option("mode.data_manager") mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager) - return cls(mgr) + return cls._from_mgr(mgr, axes=mgr.axes) def to_records( self, index: bool = True, column_dtypes=None, index_dtypes=None @@ -2545,7 +2548,7 @@ def _from_arrays( verify_integrity=verify_integrity, typ=manager, ) - return cls(mgr) + return cls._from_mgr(mgr, axes=mgr.axes) @doc( storage_options=_shared_docs["storage_options"], @@ -3681,9 +3684,9 @@ def _ixs(self, i: int, axis: AxisInt = 0) -> Series: # if we are a copy, mark as such copy = isinstance(new_mgr.array, np.ndarray) and new_mgr.array.base is None - result = self._constructor_sliced(new_mgr, name=self.index[i]).__finalize__( - self - ) + result = self._sliced_from_mgr(new_mgr, axes=new_mgr.axes) + result._name = self.index[i] + result = result.__finalize__(self) result._set_is_copy(self, copy=copy) return result @@ -3736,7 +3739,7 @@ def _getitem_nocopy(self, key: list): copy=False, only_slice=True, ) - return self._constructor(new_mgr) + return self._from_mgr(new_mgr, axes=new_mgr.axes) def __getitem__(self, key): check_dict_or_set_indexers(key) @@ -4279,9 +4282,10 @@ def _box_col_values(self, values: SingleDataManager, loc: int) -> Series: # Lookup in columns so that if e.g. a str datetime was passed # we attach the Timestamp object as the name. name = self.columns[loc] - klass = self._constructor_sliced # We get index=self.index bc values is a SingleDataManager - return klass(values, name=name, fastpath=True).__finalize__(self) + obj = self._sliced_from_mgr(values, axes=values.axes) + obj._name = name + return obj.__finalize__(self) # ---------------------------------------------------------------------- # Lookup Caching @@ -4754,7 +4758,7 @@ def predicate(arr: ArrayLike) -> bool: return True mgr = self._mgr._get_data_subset(predicate).copy(deep=None) - return type(self)(mgr).__finalize__(self) + return self._from_mgr(mgr, axes=mgr.axes).__finalize__(self) def insert( self, @@ -5750,7 +5754,7 @@ def shift( fill_value=fill_value, allow_dups=True, ) - res_df = self._constructor(mgr) + res_df = self._from_mgr(mgr, axes=mgr.axes) return res_df.__finalize__(self, method="shift") return super().shift( @@ -6278,7 +6282,7 @@ class max type @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) def isna(self) -> DataFrame: - result = self._constructor(self._mgr.isna(func=isna)) + result = self._from_mgr(self._mgr.isna(func=isna), axes=self._mgr.axes) return result.__finalize__(self, method="isna") @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) @@ -6843,7 +6847,7 @@ def sort_values( self._get_block_manager_axis(axis), default_index(len(indexer)) ) - result = self._constructor(new_data) + result = self._from_mgr(new_data, axes=new_data.axes) if inplace: return self._update_inplace(result) else: @@ -7531,7 +7535,7 @@ def _dispatch_frame_op(self, right, func: Callable, axis: AxisInt | None = None) # i.e. scalar, faster than checking np.ndim(right) == 0 with np.errstate(all="ignore"): bm = self._mgr.apply(array_op, right=right) - return self._constructor(bm) + return self._from_mgr(bm, axes=bm.axes) elif isinstance(right, DataFrame): assert self.index.equals(right.index) @@ -7552,7 +7556,7 @@ def _dispatch_frame_op(self, right, func: Callable, axis: AxisInt | None = None) right._mgr, # type: ignore[arg-type] array_op, ) - return self._constructor(bm) + return self._from_mgr(bm, axes=bm.axes) elif isinstance(right, Series) and axis == 1: # axis=1 means we want to operate row-by-row @@ -9538,7 +9542,7 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: axis = 0 new_data = self._mgr.diff(n=periods, axis=axis) - return self._constructor(new_data).__finalize__(self, "diff") + return self._from_mgr(new_data, axes=new_data.axes).__finalize__(self, "diff") # ---------------------------------------------------------------------- # Function application @@ -10397,12 +10401,13 @@ def _series_round(ser: Series, decimals: int) -> Series: # Dispatch to Block.round # Argument "decimals" to "round" of "BaseBlockManager" has incompatible # type "Union[int, integer[Any]]"; expected "int" - return self._constructor( - self._mgr.round( - decimals=decimals, # type: ignore[arg-type] - using_cow=using_copy_on_write(), - ), - ).__finalize__(self, method="round") + new_mgr = self._mgr.round( + decimals=decimals, # type: ignore[arg-type] + using_cow=using_copy_on_write(), + ) + return self._from_mgr(new_mgr, axes=new_mgr.axes).__finalize__( + self, method="round" + ) else: raise TypeError("decimals must be an integer, a dict-like or a Series") @@ -10956,7 +10961,7 @@ def _get_data() -> DataFrame: # After possibly _get_data and transposing, we are now in the # simple case where we can use BlockManager.reduce res = df._mgr.reduce(blk_func) - out = df._constructor(res).iloc[0] + out = df._from_mgr(res, axes=res.axes).iloc[0] if out_dtype is not None: out = out.astype(out_dtype) elif (df._mgr.get_dtypes() == object).any(): @@ -11394,7 +11399,7 @@ def quantile( res = data._mgr.take(indexer[q_idx], verify=False) res.axes[1] = q - result = self._constructor(res) + result = self._from_mgr(res, axes=res.axes) return result.__finalize__(self, method="quantile") @doc(NDFrame.asfreq, **_shared_doc_kwargs) @@ -11719,7 +11724,7 @@ def _to_dict_of_blocks(self, copy: bool = True): mgr = mgr_to_mgr(mgr, "block") mgr = cast(BlockManager, mgr) return { - k: self._constructor(v).__finalize__(self) + k: self._from_mgr(v, axes=v.axes).__finalize__(self) for k, v, in mgr.to_dict(copy=copy).items() } diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d621a1c68b0f8..a4b75fcd0db91 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -333,6 +333,26 @@ def _as_manager(self, typ: str, copy: bool_t = True) -> Self: # fastpath of passing a manager doesn't check the option/manager class return self._constructor(new_mgr).__finalize__(self) + @classmethod + def _from_mgr(cls, mgr: Manager, axes: list[Index]) -> Self: + """ + Construct a new object of this type from a Manager object and axes. + + Parameters + ---------- + mgr : Manager + Must have the same ndim as cls. + axes : list[Index] + + Notes + ----- + The axes must match mgr.axes, but are required for future-proofing + in the event that axes are refactored out of the Manager objects. + """ + obj = cls.__new__(cls) + NDFrame.__init__(obj, mgr) + return obj + # ---------------------------------------------------------------------- # attrs and flags @@ -1434,7 +1454,7 @@ def blk_func(values: ArrayLike): return operator.neg(values) # type: ignore[arg-type] new_data = self._mgr.apply(blk_func) - res = self._constructor(new_data) + res = self._from_mgr(new_data, axes=new_data.axes) return res.__finalize__(self, method="__neg__") @final @@ -1449,7 +1469,7 @@ def blk_func(values: ArrayLike): return operator.pos(values) # type: ignore[arg-type] new_data = self._mgr.apply(blk_func) - res = self._constructor(new_data) + res = self._from_mgr(new_data, axes=new_data.axes) return res.__finalize__(self, method="__pos__") @final @@ -1459,7 +1479,8 @@ def __invert__(self) -> Self: return self.copy(deep=False) new_data = self._mgr.apply(operator.invert) - return self._constructor(new_data).__finalize__(self, method="__invert__") + res = self._from_mgr(new_data, axes=new_data.axes) + return res.__finalize__(self, method="__invert__") @final def __nonzero__(self) -> NoReturn: @@ -1586,7 +1607,7 @@ def abs(self) -> Self: 3 7 40 -50 """ res_mgr = self._mgr.apply(np.abs) - return self._constructor(res_mgr).__finalize__(self, name="abs") + return self._from_mgr(res_mgr, axes=res_mgr.axes).__finalize__(self, name="abs") @final def __abs__(self) -> Self: @@ -3939,7 +3960,9 @@ class max_speed axis=self._get_block_manager_axis(axis), verify=True, ) - return self._constructor(new_data).__finalize__(self, method="take") + return self._from_mgr(new_data, axes=new_data.axes).__finalize__( + self, method="take" + ) @final def _take_with_is_copy(self, indices, axis: Axis = 0) -> Self: @@ -4122,9 +4145,9 @@ class animal locomotion new_mgr = self._mgr.fast_xs(loc) - result = self._constructor_sliced( - new_mgr, name=self.index[loc] - ).__finalize__(self) + result = self._sliced_from_mgr(new_mgr, axes=new_mgr.axes) + result._name = self.index[loc] + result = result.__finalize__(self) elif is_scalar(loc): result = self.iloc[:, slice(loc, loc + 1)] elif axis == 1: @@ -4149,7 +4172,8 @@ def _slice(self, slobj: slice, axis: Axis = 0) -> Self: """ assert isinstance(slobj, slice), type(slobj) axis = self._get_block_manager_axis(axis) - result = self._constructor(self._mgr.get_slice(slobj, axis=axis)) + new_mgr = self._mgr.get_slice(slobj, axis=axis) + result = self._from_mgr(new_mgr, axes=new_mgr.axes) result = result.__finalize__(self) # this could be a view @@ -4643,7 +4667,7 @@ def _drop_axis( copy=None, only_slice=only_slice, ) - result = self._constructor(new_mgr) + result = self._from_mgr(new_mgr, axes=new_mgr.axes) if self.ndim == 1: result.name = self.name @@ -5104,7 +5128,7 @@ def sort_index( axis = 1 if isinstance(self, ABCDataFrame) else 0 new_data.set_axis(axis, default_index(len(indexer))) - result = self._constructor(new_data) + result = self._from_mgr(new_data, axes=new_data.axes) if inplace: return self._update_inplace(result) @@ -5456,7 +5480,7 @@ def _reindex_with_indexers( elif using_copy_on_write() and new_data is self._mgr: new_data = new_data.copy(deep=False) - return self._constructor(new_data).__finalize__(self) + return self._from_mgr(new_data, axes=new_data.axes).__finalize__(self) def filter( self, @@ -6129,7 +6153,7 @@ def _consolidate(self): """ f = lambda: self._mgr.consolidate() cons_data = self._protect_consolidate(f) - return self._constructor(cons_data).__finalize__(self) + return self._from_mgr(cons_data, axes=cons_data.axes).__finalize__(self) @property def _is_mixed_type(self) -> bool_t: @@ -6160,11 +6184,13 @@ def _check_inplace_setting(self, value) -> bool_t: @final def _get_numeric_data(self) -> Self: - return self._constructor(self._mgr.get_numeric_data()).__finalize__(self) + new_mgr = self._mgr.get_numeric_data() + return self._from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(self) @final def _get_bool_data(self): - return self._constructor(self._mgr.get_bool_data()).__finalize__(self) + new_mgr = self._mgr.get_bool_data() + return self._from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(self) # ---------------------------------------------------------------------- # Internal Interface Methods @@ -6373,7 +6399,8 @@ def astype( else: # else, only a single dtype is given new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors) - return self._constructor(new_data).__finalize__(self, method="astype") + res = self._from_mgr(new_data, axes=new_data.axes) + return res.__finalize__(self, method="astype") # GH 33113: handle empty frame or series if not results: @@ -6502,7 +6529,7 @@ def copy(self, deep: bool_t | None = True) -> Self: """ data = self._mgr.copy(deep=deep) self._clear_item_cache() - return self._constructor(data).__finalize__(self, method="copy") + return self._from_mgr(data, axes=data.axes).__finalize__(self, method="copy") @final def __copy__(self, deep: bool_t = True) -> Self: @@ -6564,7 +6591,8 @@ def infer_objects(self, copy: bool_t | None = None) -> Self: dtype: object """ new_mgr = self._mgr.convert(copy=copy) - return self._constructor(new_mgr).__finalize__(self, method="infer_objects") + res = self._from_mgr(new_mgr, axes=new_mgr.axes) + return res.__finalize__(self, method="infer_objects") @final def convert_dtypes( @@ -7036,7 +7064,7 @@ def fillna( if axis == 1: result = self.T.fillna(value=value, limit=limit).T - new_data = result + new_data = result._mgr else: new_data = self._mgr.fillna( value=value, limit=limit, inplace=inplace, downcast=downcast @@ -7046,7 +7074,7 @@ def fillna( else: raise ValueError(f"invalid fill value with a {type(value)}") - result = self._constructor(new_data) + result = self._from_mgr(new_data, axes=new_data.axes) if inplace: return self._update_inplace(result) else: @@ -7452,7 +7480,7 @@ def replace( f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}' ) - result = self._constructor(new_data) + result = self._from_mgr(new_data, axes=new_data.axes) if inplace: return self._update_inplace(result) else: @@ -7748,7 +7776,7 @@ def interpolate( **kwargs, ) - result = self._constructor(new_data) + result = self._from_mgr(new_data, axes=new_data.axes) if should_transpose: result = result.T if inplace: @@ -9659,9 +9687,8 @@ def _align_series( elif lidx is None or join_index is None: left = self.copy(deep=copy) else: - left = self._constructor( - self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy) - ) + new_mgr = self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy) + left = self._from_mgr(new_mgr, axes=new_mgr.axes) right = other._reindex_indexer(join_index, ridx, copy) @@ -9682,7 +9709,7 @@ def _align_series( if copy and fdata is self._mgr: fdata = fdata.copy() - left = self._constructor(fdata) + left = self._from_mgr(fdata, axes=fdata.axes) if ridx is None: right = other.copy(deep=copy) @@ -9824,7 +9851,7 @@ def _where( self._check_inplace_setting(other) new_data = self._mgr.putmask(mask=cond, new=other, align=align) - result = self._constructor(new_data) + result = self._from_mgr(new_data, axes=new_data.axes) return self._update_inplace(result) else: @@ -9833,7 +9860,7 @@ def _where( cond=cond, align=align, ) - result = self._constructor(new_data) + result = self._from_mgr(new_data, axes=new_data.axes) return result.__finalize__(self) @overload @@ -10211,7 +10238,9 @@ def shift( new_data = self._mgr.shift( periods=periods, axis=axis, fill_value=fill_value ) - return self._constructor(new_data).__finalize__(self, method="shift") + return self._from_mgr(new_data, axes=new_data.axes).__finalize__( + self, method="shift" + ) # when freq is given, index is shifted, data is not index = self._get_axis(axis) @@ -11160,7 +11189,7 @@ def block_accum_func(blk_values): result = self._mgr.apply(block_accum_func) - return self._constructor(result).__finalize__(self, method=name) + return self._from_mgr(result, axes=result.axes).__finalize__(self, method=name) def cummax(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): return self._accum_func( diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a9df4237601db..afda8bf87eda2 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -140,7 +140,9 @@ class NamedAgg(NamedTuple): class SeriesGroupBy(GroupBy[Series]): def _wrap_agged_manager(self, mgr: Manager) -> Series: - return self.obj._constructor(mgr, name=self.obj.name) + out = self.obj._from_mgr(mgr, axes=mgr.axes) + out._name = self.obj.name + return out def _get_data_to_aggregate( self, *, numeric_only: bool = False, name: str | None = None @@ -1549,7 +1551,7 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike: res_mgr = mgr.grouped_reduce(arr_func) res_mgr.set_axis(1, mgr.axes[1]) - res_df = self.obj._constructor(res_mgr) + res_df = self.obj._from_mgr(res_mgr, axes=res_mgr.axes) res_df = self._maybe_transpose_result(res_df) return res_df @@ -1855,7 +1857,7 @@ def _get_data_to_aggregate( return mgr def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: - return self.obj._constructor(mgr) + return self.obj._from_mgr(mgr, axes=mgr.axes) def _apply_to_column_groupbys(self, func) -> DataFrame: from pandas.core.reshape.concat import concat diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 4f21d90ac5116..135e7010ee965 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -1327,7 +1327,8 @@ class SeriesSplitter(DataSplitter): def _chop(self, sdata: Series, slice_obj: slice) -> Series: # fastpath equivalent to `sdata.iloc[slice_obj]` mgr = sdata._mgr.get_slice(slice_obj) - ser = sdata._constructor(mgr, name=sdata.name, fastpath=True) + ser = sdata._from_mgr(mgr, axes=mgr.axes) + ser._name = sdata.name return ser.__finalize__(sdata, method="groupby") @@ -1339,7 +1340,7 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: # else: # return sdata.iloc[:, slice_obj] mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis) - df = sdata._constructor(mgr) + df = sdata._from_mgr(mgr, axes=mgr.axes) return df.__finalize__(sdata, method="groupby") diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 3b31932952867..98731447b78e5 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1967,7 +1967,7 @@ def _take_new_index( new_mgr = obj._mgr.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1) # error: Incompatible return value type # (got "DataFrame", expected "NDFrameT") - return obj._constructor(new_mgr) # type: ignore[return-value] + return obj._from_mgr(new_mgr, axes=new_mgr.axes) # type: ignore[return-value] else: raise ValueError("'obj' should be either a Series or a DataFrame") diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 395db8060ce0e..a6833cb88bd1c 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -624,8 +624,8 @@ def get_result(self): if not self.copy and not using_copy_on_write(): new_data._consolidate_inplace() - cons = sample._constructor - return cons(new_data).__finalize__(self, method="concat") + out = sample._from_mgr(new_data, axes=new_data.axes) + return out.__finalize__(self, method="concat") def _get_result_dim(self) -> int: if self._is_series and self.bm_axis == 1: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index d2b022214167f..79d89ef2f6eba 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -771,7 +771,7 @@ def _reindex_and_concat( allow_dups=True, use_na_proxy=True, ) - left = left._constructor(lmgr) + left = left._from_mgr(lmgr, axes=lmgr.axes) left.index = join_index if right_indexer is not None and not is_range_indexer( @@ -786,7 +786,7 @@ def _reindex_and_concat( allow_dups=True, use_na_proxy=True, ) - right = right._constructor(rmgr) + right = right._from_mgr(rmgr, axes=rmgr.axes) right.index = join_index from pandas import concat diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index eaeb4a50d0bf3..b344346d54cc0 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -503,7 +503,7 @@ def _unstack_frame(obj: DataFrame, level, fill_value=None): if not obj._can_fast_transpose: mgr = obj._mgr.unstack(unstacker, fill_value=fill_value) - return obj._constructor(mgr) + return obj._from_mgr(mgr, axes=mgr.axes) else: return unstacker.get_result( obj._values, value_columns=obj.columns, fill_value=fill_value diff --git a/pandas/core/series.py b/pandas/core/series.py index f8875555fdf97..96cf23f3609df 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -578,6 +578,9 @@ def _constructor_expanddim(self) -> Callable[..., DataFrame]: return DataFrame + def _expanddim_from_mgr(self, mgr, axes) -> DataFrame: + return self._constructor_expanddim._from_mgr(mgr, axes) + # types @property def _can_hold_na(self) -> bool: @@ -1053,7 +1056,7 @@ def _get_values_tuple(self, key: tuple): def _get_values(self, indexer: slice | npt.NDArray[np.bool_]) -> Series: new_mgr = self._mgr.getitem_mgr(indexer) - return self._constructor(new_mgr).__finalize__(self) + return self._from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(self) def _get_value(self, label, takeable: bool = False): """ @@ -1896,7 +1899,7 @@ def to_frame(self, name: Hashable = lib.no_default) -> DataFrame: columns = Index([name]) mgr = self._mgr.to_2d_mgr(columns) - df = self._constructor_expanddim(mgr) + df = self._expanddim_from_mgr(mgr, axes=mgr.axes) return df.__finalize__(self, method="to_frame") def _set_name(self, name, inplace: bool = False) -> Series: From 981515494c98172da87a10f6450eb3945048d498 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 23 Mar 2023 09:03:51 -0700 Subject: [PATCH 2/6] mypy fixup --- pandas/core/frame.py | 5 ++++- pandas/core/series.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0e67a503ece86..642f3ea8eca03 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -646,7 +646,10 @@ def _constructor(self) -> Callable[..., DataFrame]: _constructor_sliced: Callable[..., Series] = Series def _sliced_from_mgr(self, mgr, axes) -> Series: - return self._constructor_sliced._from_mgr(mgr, axes=axes) + # error: "Callable[..., DataFrame]" has no attribute "_from_mgr" + return self._constructor_sliced._from_mgr( # type: ignore[attr-defined] + mgr, axes=axes + ) # ---------------------------------------------------------------------- # Constructors diff --git a/pandas/core/series.py b/pandas/core/series.py index 53e21a519051c..78608074668e5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -579,7 +579,10 @@ def _constructor_expanddim(self) -> Callable[..., DataFrame]: return DataFrame def _expanddim_from_mgr(self, mgr, axes) -> DataFrame: - return self._constructor_expanddim._from_mgr(mgr, axes) + # error: "Callable[..., Series]" has no attribute "_from_mgr" + return self._constructor_expanddim._from_mgr( # type: ignore[attr-defined] + mgr, axes + ) # types @property From 714bae7752b95e5b1a43f98d68f11966eccce483 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 23 Mar 2023 19:08:50 -0700 Subject: [PATCH 3/6] short-term alias --- pandas/_testing/__init__.py | 4 ++-- pandas/core/frame.py | 14 ++++++++------ pandas/core/generic.py | 14 +++++++++----- pandas/core/series.py | 10 ++++++---- 4 files changed, 25 insertions(+), 17 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 85047510f678a..3a1775fc53a68 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -823,7 +823,7 @@ def _constructor_expanddim(self): return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs) def _expanddim_from_mgr(self, mgr, axes): - return SubclassedDataFrame._from_mgr(mgr, axes) + return self._constructor_expanddim(mgr) class SubclassedDataFrame(DataFrame): @@ -838,7 +838,7 @@ def _constructor_sliced(self): return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs) def _sliced_from_mgr(self, mgr, axes): - return SubclassedSeries._from_mgr(mgr, axes) + return self._constructor_sliced(mgr) class SubclassedCategorical(Categorical): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 642f3ea8eca03..4149a3575cfcb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -646,10 +646,12 @@ def _constructor(self) -> Callable[..., DataFrame]: _constructor_sliced: Callable[..., Series] = Series def _sliced_from_mgr(self, mgr, axes) -> Series: - # error: "Callable[..., DataFrame]" has no attribute "_from_mgr" - return self._constructor_sliced._from_mgr( # type: ignore[attr-defined] - mgr, axes=axes - ) + # https://github.com/pandas-dev/pandas/pull/52132#issuecomment-1481491828 + # This is a short-term implementation that will be replaced + # with self._constructor_slcied._from_mgr(...) + # once downstream packages (geopandas) have had a chance to implement + # their own overrides. + return self._constructor_sliced(mgr) # ---------------------------------------------------------------------- # Constructors @@ -2341,7 +2343,7 @@ def maybe_reorder( manager = get_option("mode.data_manager") mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager) - return cls._from_mgr(mgr, axes=mgr.axes) + return cls(mgr) def to_records( self, index: bool = True, column_dtypes=None, index_dtypes=None @@ -2551,7 +2553,7 @@ def _from_arrays( verify_integrity=verify_integrity, typ=manager, ) - return cls._from_mgr(mgr, axes=mgr.axes) + return cls(mgr) @doc( storage_options=_shared_docs["storage_options"], diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9a5bc0d9c6571..72ebb76ae6535 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -333,8 +333,7 @@ def _as_manager(self, typ: str, copy: bool_t = True) -> Self: # fastpath of passing a manager doesn't check the option/manager class return self._constructor(new_mgr).__finalize__(self) - @classmethod - def _from_mgr(cls, mgr: Manager, axes: list[Index]) -> Self: + def _from_mgr(self, mgr: Manager, axes: list[Index]) -> Self: """ Construct a new object of this type from a Manager object and axes. @@ -349,9 +348,14 @@ def _from_mgr(cls, mgr: Manager, axes: list[Index]) -> Self: The axes must match mgr.axes, but are required for future-proofing in the event that axes are refactored out of the Manager objects. """ - obj = cls.__new__(cls) - NDFrame.__init__(obj, mgr) - return obj + # https://github.com/pandas-dev/pandas/pull/52132#issuecomment-1481491828 + # This is a short-term implementation that will be replaced + # obj = cls.__new__(cls) + # NDFrame.__init__(obj, mgr) + # return obj + # once downstream packages (geopandas) have had a chance to implement + # their own overrides. + return self._constructor(mgr) # ---------------------------------------------------------------------- # attrs and flags diff --git a/pandas/core/series.py b/pandas/core/series.py index 78608074668e5..f2c9d9111dd11 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -579,10 +579,12 @@ def _constructor_expanddim(self) -> Callable[..., DataFrame]: return DataFrame def _expanddim_from_mgr(self, mgr, axes) -> DataFrame: - # error: "Callable[..., Series]" has no attribute "_from_mgr" - return self._constructor_expanddim._from_mgr( # type: ignore[attr-defined] - mgr, axes - ) + # https://github.com/pandas-dev/pandas/pull/52132#issuecomment-1481491828 + # This is a short-term implementation that will be replaced + # with self._constructor_expanddim._from_mgr(...) + # once downstream packages (geopandas) have had a chance to implement + # their own overrides. + return self._constructor_expanddim(mgr) # types @property From 38e5759696092646f0c57a29a151f007083fb8cb Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 27 May 2023 13:58:45 -0700 Subject: [PATCH 4/6] REF: implement _constructor_from_mgr --- doc/source/development/extending.rst | 2 - pandas/_testing/__init__.py | 6 --- pandas/core/apply.py | 2 +- pandas/core/arraylike.py | 2 +- pandas/core/frame.py | 47 +++++++++++------ pandas/core/generic.py | 78 +++++++++++++++------------- pandas/core/groupby/generic.py | 6 +-- pandas/core/groupby/ops.py | 4 +- pandas/core/resample.py | 4 +- pandas/core/reshape/concat.py | 2 +- pandas/core/reshape/merge.py | 4 +- pandas/core/reshape/reshape.py | 2 +- pandas/core/series.py | 23 ++++++-- 13 files changed, 106 insertions(+), 76 deletions(-) diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 1aa2cabb509e5..f74eacb6b861d 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -332,8 +332,6 @@ There are 3 possible constructor properties to be defined on a subclass: * ``DataFrame._constructor_sliced``: Used when a ``DataFrame`` (sub-)class manipulation result should be a ``Series`` (sub-)class. * ``Series._constructor_expanddim``: Used when a ``Series`` (sub-)class manipulation result should be a ``DataFrame`` (sub-)class, e.g. ``Series.to_frame()``. -If a subclass's ``_constructor``, ``_constructor_sliced``, or ``_constructor_expanddim`` do not return a class, then you will also need to override ``_from_mgr``, ``_sliced_from_mgr``, and ``_expanddim_from_mgr``, which return analogous-shaped objects but take only ``BlockManager`` and ``axes`` inputs. - Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame`` overriding constructor properties. .. code-block:: python diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 6876cb84ec24e..7908c9df60df8 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -838,9 +838,6 @@ def _constructor(self): def _constructor_expanddim(self): return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs) - def _expanddim_from_mgr(self, mgr, axes): - return self._constructor_expanddim(mgr) - class SubclassedDataFrame(DataFrame): _metadata = ["testattr"] @@ -853,9 +850,6 @@ def _constructor(self): def _constructor_sliced(self): return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs) - def _sliced_from_mgr(self, mgr, axes): - return self._constructor_sliced(mgr) - class SubclassedCategorical(Categorical): pass diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 2a8fad395f449..13943e15efe63 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -715,7 +715,7 @@ def apply(self) -> DataFrame | Series: with np.errstate(all="ignore"): results = self.obj._mgr.apply("apply", func=self.f) # _constructor will retain self.index and self.columns - return self.obj._from_mgr(results, axes=results.axes) + return self.obj._constructor_from_mgr(results, axes=results.axes) # broadcasting if self.result_type == "broadcast": diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index fec4143266144..62f6737d86d51 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -349,7 +349,7 @@ def _reconstruct(result): return result if isinstance(result, BlockManager): # we went through BlockManager.apply e.g. np.sqrt - result = self._from_mgr(result, axes=result.axes) + result = self._constructor_from_mgr(result, axes=result.axes) else: # we converted an array, lost our axes result = self._constructor( diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1faaa5972c5f6..e86737e6f83ed 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -637,16 +637,30 @@ class DataFrame(NDFrame, OpsMixin): def _constructor(self) -> Callable[..., DataFrame]: return DataFrame + def _constructor_from_mgr(self, mgr, axes): + if self._constructor is DataFrame: + # we are pandas.DataFrame (or a subclass that doesn't override _constructor) + return self._from_mgr(mgr, axes=axes) + else: + assert axes is mgr.axes + return self._constructor(mgr) + _constructor_sliced: Callable[..., Series] = Series def _sliced_from_mgr(self, mgr, axes) -> Series: # https://github.com/pandas-dev/pandas/pull/52132#issuecomment-1481491828 # This is a short-term implementation that will be replaced - # with self._constructor_slcied._from_mgr(...) + # with self._constructor_sliced._from_mgr(...) # once downstream packages (geopandas) have had a chance to implement # their own overrides. return self._constructor_sliced(mgr) + def _constructor_sliced_from_mgr(self, mgr, axes): + if self._constructor_sliced is Series: + return self._sliced_from_mgr(mgr, axes) + assert axes is mgr.axes + return self._constructor_sliced(mgr) + # ---------------------------------------------------------------------- # Constructors @@ -3675,7 +3689,7 @@ def _ixs(self, i: int, axis: AxisInt = 0) -> Series: # if we are a copy, mark as such copy = isinstance(new_mgr.array, np.ndarray) and new_mgr.array.base is None - result = self._sliced_from_mgr(new_mgr, axes=new_mgr.axes) + result = self._constructor_sliced_from_mgr(new_mgr, axes=new_mgr.axes) result._name = self.index[i] result = result.__finalize__(self) result._set_is_copy(self, copy=copy) @@ -3730,7 +3744,7 @@ def _getitem_nocopy(self, key: list): copy=False, only_slice=True, ) - return self._from_mgr(new_mgr, axes=new_mgr.axes) + return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) def __getitem__(self, key): check_dict_or_set_indexers(key) @@ -4270,7 +4284,7 @@ def _box_col_values(self, values: SingleDataManager, loc: int) -> Series: # we attach the Timestamp object as the name. name = self.columns[loc] # We get index=self.index bc values is a SingleDataManager - obj = self._sliced_from_mgr(values, axes=values.axes) + obj = self._constructor_sliced_from_mgr(values, axes=values.axes) obj._name = name return obj.__finalize__(self) @@ -4746,7 +4760,7 @@ def predicate(arr: ArrayLike) -> bool: return True mgr = self._mgr._get_data_subset(predicate).copy(deep=None) - return self._from_mgr(mgr, axes=mgr.axes).__finalize__(self) + return self._constructor_from_mgr(mgr, axes=mgr.axes).__finalize__(self) def insert( self, @@ -5560,7 +5574,7 @@ def shift( fill_value=fill_value, allow_dups=True, ) - res_df = self._from_mgr(mgr, axes=mgr.axes) + res_df = self._constructor_from_mgr(mgr, axes=mgr.axes) return res_df.__finalize__(self, method="shift") return super().shift( @@ -6088,7 +6102,8 @@ class max type @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) def isna(self) -> DataFrame: - result = self._from_mgr(self._mgr.isna(func=isna), axes=self._mgr.axes) + res_mgr = self._mgr.isna(func=isna) + result = self._constructor_from_mgr(res_mgr, axes=res_mgr.axes) return result.__finalize__(self, method="isna") @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) @@ -6800,7 +6815,7 @@ def sort_values( self._get_block_manager_axis(axis), default_index(len(indexer)) ) - result = self._from_mgr(new_data, axes=new_data.axes) + result = self._constructor_from_mgr(new_data, axes=new_data.axes) if inplace: return self._update_inplace(result) else: @@ -7494,7 +7509,7 @@ def _dispatch_frame_op( if not is_list_like(right): # i.e. scalar, faster than checking np.ndim(right) == 0 bm = self._mgr.apply(array_op, right=right) - return self._from_mgr(bm, axes=bm.axes) + return self._constructor_from_mgr(bm, axes=bm.axes) elif isinstance(right, DataFrame): assert self.index.equals(right.index) @@ -7514,7 +7529,7 @@ def _dispatch_frame_op( right._mgr, # type: ignore[arg-type] array_op, ) - return self._from_mgr(bm, axes=bm.axes) + return self._constructor_from_mgr(bm, axes=bm.axes) elif isinstance(right, Series) and axis == 1: # axis=1 means we want to operate row-by-row @@ -9483,7 +9498,9 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: axis = 0 new_data = self._mgr.diff(n=periods, axis=axis) - return self._from_mgr(new_data, axes=new_data.axes).__finalize__(self, "diff") + return self._constructor_from_mgr(new_data, axes=new_data.axes).__finalize__( + self, "diff" + ) # ---------------------------------------------------------------------- # Function application @@ -10343,7 +10360,7 @@ def _series_round(ser: Series, decimals: int) -> Series: decimals=decimals, # type: ignore[arg-type] using_cow=using_copy_on_write(), ) - return self._from_mgr(new_mgr, axes=new_mgr.axes).__finalize__( + return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__( self, method="round" ) else: @@ -10898,7 +10915,7 @@ def _get_data() -> DataFrame: # After possibly _get_data and transposing, we are now in the # simple case where we can use BlockManager.reduce res = df._mgr.reduce(blk_func) - out = df._from_mgr(res, axes=res.axes).iloc[0] + out = df._constructor_from_mgr(res, axes=res.axes).iloc[0] if out_dtype is not None: out = out.astype(out_dtype) elif (df._mgr.get_dtypes() == object).any(): @@ -11512,7 +11529,7 @@ def quantile( res = data._mgr.take(indexer[q_idx], verify=False) res.axes[1] = q - result = self._from_mgr(res, axes=res.axes) + result = self._constructor_from_mgr(res, axes=res.axes) return result.__finalize__(self, method="quantile") def to_timestamp( @@ -11834,7 +11851,7 @@ def _to_dict_of_blocks(self, copy: bool = True): mgr = mgr_to_mgr(mgr, "block") mgr = cast(BlockManager, mgr) return { - k: self._from_mgr(v, axes=v.axes).__finalize__(self) + k: self._constructor_from_mgr(v, axes=v.axes).__finalize__(self) for k, v, in mgr.to_dict(copy=copy).items() } diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 90673bd5a92ac..0ca97ae8135df 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -319,7 +319,8 @@ def _as_manager(self, typ: str, copy: bool_t = True) -> Self: # fastpath of passing a manager doesn't check the option/manager class return self._constructor(new_mgr).__finalize__(self) - def _from_mgr(self, mgr: Manager, axes: list[Index]) -> Self: + @classmethod + def _from_mgr(cls, mgr: Manager, axes: list[Index]) -> Self: """ Construct a new object of this type from a Manager object and axes. @@ -334,14 +335,9 @@ def _from_mgr(self, mgr: Manager, axes: list[Index]) -> Self: The axes must match mgr.axes, but are required for future-proofing in the event that axes are refactored out of the Manager objects. """ - # https://github.com/pandas-dev/pandas/pull/52132#issuecomment-1481491828 - # This is a short-term implementation that will be replaced - # obj = cls.__new__(cls) - # NDFrame.__init__(obj, mgr) - # return obj - # once downstream packages (geopandas) have had a chance to implement - # their own overrides. - return self._constructor(mgr) + obj = cls.__new__(cls) + NDFrame.__init__(obj, mgr) + return obj # ---------------------------------------------------------------------- # attrs and flags @@ -1456,7 +1452,7 @@ def blk_func(values: ArrayLike): return operator.neg(values) # type: ignore[arg-type] new_data = self._mgr.apply(blk_func) - res = self._from_mgr(new_data, axes=new_data.axes) + res = self._constructor_from_mgr(new_data, axes=new_data.axes) return res.__finalize__(self, method="__neg__") @final @@ -1471,7 +1467,7 @@ def blk_func(values: ArrayLike): return operator.pos(values) # type: ignore[arg-type] new_data = self._mgr.apply(blk_func) - res = self._from_mgr(new_data, axes=new_data.axes) + res = self._constructor_from_mgr(new_data, axes=new_data.axes) return res.__finalize__(self, method="__pos__") @final @@ -1481,7 +1477,7 @@ def __invert__(self) -> Self: return self.copy(deep=False) new_data = self._mgr.apply(operator.invert) - res = self._from_mgr(new_data, axes=new_data.axes) + res = self._constructor_from_mgr(new_data, axes=new_data.axes) return res.__finalize__(self, method="__invert__") @final @@ -1620,7 +1616,9 @@ def abs(self) -> Self: 3 7 40 -50 """ res_mgr = self._mgr.apply(np.abs) - return self._from_mgr(res_mgr, axes=res_mgr.axes).__finalize__(self, name="abs") + return self._constructor_from_mgr(res_mgr, axes=res_mgr.axes).__finalize__( + self, name="abs" + ) @final def __abs__(self) -> Self: @@ -4003,7 +4001,7 @@ class max_speed axis=self._get_block_manager_axis(axis), verify=True, ) - return self._from_mgr(new_data, axes=new_data.axes).__finalize__( + return self._constructor_from_mgr(new_data, axes=new_data.axes).__finalize__( self, method="take" ) @@ -4188,7 +4186,7 @@ class animal locomotion new_mgr = self._mgr.fast_xs(loc) - result = self._sliced_from_mgr(new_mgr, axes=new_mgr.axes) + result = self._constructor_sliced_from_mgr(new_mgr, axes=new_mgr.axes) result._name = self.index[loc] result = result.__finalize__(self) elif is_scalar(loc): @@ -4235,7 +4233,7 @@ def _slice(self, slobj: slice, axis: AxisInt = 0) -> Self: assert isinstance(slobj, slice), type(slobj) axis = self._get_block_manager_axis(axis) new_mgr = self._mgr.get_slice(slobj, axis=axis) - result = self._from_mgr(new_mgr, axes=new_mgr.axes) + result = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) result = result.__finalize__(self) # this could be a view @@ -4730,7 +4728,7 @@ def _drop_axis( copy=None, only_slice=only_slice, ) - result = self._from_mgr(new_mgr, axes=new_mgr.axes) + result = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) if self.ndim == 1: result._name = self.name @@ -5189,7 +5187,7 @@ def sort_index( axis = 1 if isinstance(self, ABCDataFrame) else 0 new_data.set_axis(axis, default_index(len(indexer))) - result = self._from_mgr(new_data, axes=new_data.axes) + result = self._constructor_from_mgr(new_data, axes=new_data.axes) if inplace: return self._update_inplace(result) @@ -5550,7 +5548,9 @@ def _reindex_with_indexers( elif using_copy_on_write() and new_data is self._mgr: new_data = new_data.copy(deep=False) - return self._from_mgr(new_data, axes=new_data.axes).__finalize__(self) + return self._constructor_from_mgr(new_data, axes=new_data.axes).__finalize__( + self + ) def filter( self, @@ -6220,7 +6220,9 @@ def _consolidate(self): """ f = lambda: self._mgr.consolidate() cons_data = self._protect_consolidate(f) - return self._from_mgr(cons_data, axes=cons_data.axes).__finalize__(self) + return self._constructor_from_mgr(cons_data, axes=cons_data.axes).__finalize__( + self + ) @property def _is_mixed_type(self) -> bool_t: @@ -6237,12 +6239,12 @@ def _is_mixed_type(self) -> bool_t: @final def _get_numeric_data(self) -> Self: new_mgr = self._mgr.get_numeric_data() - return self._from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(self) + return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(self) @final def _get_bool_data(self): new_mgr = self._mgr.get_bool_data() - return self._from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(self) + return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(self) # ---------------------------------------------------------------------- # Internal Interface Methods @@ -6452,7 +6454,7 @@ def astype( else: # else, only a single dtype is given new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors) - res = self._from_mgr(new_data, axes=new_data.axes) + res = self._constructor_from_mgr(new_data, axes=new_data.axes) return res.__finalize__(self, method="astype") # GH 33113: handle empty frame or series @@ -6582,7 +6584,9 @@ def copy(self, deep: bool_t | None = True) -> Self: """ data = self._mgr.copy(deep=deep) self._clear_item_cache() - return self._from_mgr(data, axes=data.axes).__finalize__(self, method="copy") + return self._constructor_from_mgr(data, axes=data.axes).__finalize__( + self, method="copy" + ) @final def __copy__(self, deep: bool_t = True) -> Self: @@ -6644,7 +6648,7 @@ def infer_objects(self, copy: bool_t | None = None) -> Self: dtype: object """ new_mgr = self._mgr.convert(copy=copy) - res = self._from_mgr(new_mgr, axes=new_mgr.axes) + res = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) return res.__finalize__(self, method="infer_objects") @final @@ -7143,7 +7147,7 @@ def fillna( else: raise ValueError(f"invalid fill value with a {type(value)}") - result = self._from_mgr(new_data, axes=new_data.axes) + result = self._constructor_from_mgr(new_data, axes=new_data.axes) if inplace: return self._update_inplace(result) else: @@ -7599,7 +7603,7 @@ def replace( f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}' ) - result = self._from_mgr(new_data, axes=new_data.axes) + result = self._constructor_from_mgr(new_data, axes=new_data.axes) if inplace: return self._update_inplace(result) else: @@ -7918,7 +7922,7 @@ def interpolate( **kwargs, ) - result = self._from_mgr(new_data, axes=new_data.axes) + result = self._constructor_from_mgr(new_data, axes=new_data.axes) if should_transpose: result = result.T if inplace: @@ -9890,7 +9894,7 @@ def _align_series( left = self.copy(deep=copy) else: new_mgr = self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy) - left = self._from_mgr(new_mgr, axes=new_mgr.axes) + left = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) right = other._reindex_indexer(join_index, ridx, copy) @@ -9911,7 +9915,7 @@ def _align_series( if copy and fdata is self._mgr: fdata = fdata.copy() - left = self._from_mgr(fdata, axes=fdata.axes) + left = self._constructor_from_mgr(fdata, axes=fdata.axes) if ridx is None: right = other.copy(deep=copy) @@ -10058,7 +10062,7 @@ def _where( # reconstruct the block manager new_data = self._mgr.putmask(mask=cond, new=other, align=align) - result = self._from_mgr(new_data, axes=new_data.axes) + result = self._constructor_from_mgr(new_data, axes=new_data.axes) return self._update_inplace(result) else: @@ -10067,7 +10071,7 @@ def _where( cond=cond, align=align, ) - result = self._from_mgr(new_data, axes=new_data.axes) + result = self._constructor_from_mgr(new_data, axes=new_data.axes) return result.__finalize__(self) @overload @@ -10445,9 +10449,9 @@ def shift( new_data = self._mgr.shift( periods=periods, axis=axis, fill_value=fill_value ) - return self._from_mgr(new_data, axes=new_data.axes).__finalize__( - self, method="shift" - ) + return self._constructor_from_mgr( + new_data, axes=new_data.axes + ).__finalize__(self, method="shift") # when freq is given, index is shifted, data is not index = self._get_axis(axis) @@ -11406,7 +11410,9 @@ def block_accum_func(blk_values): result = self._mgr.apply(block_accum_func) - return self._from_mgr(result, axes=result.axes).__finalize__(self, method=name) + return self._constructor_from_mgr(result, axes=result.axes).__finalize__( + self, method=name + ) def cummax(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): return self._accum_func( diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index cf532844617a6..00207e359aed1 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -147,7 +147,7 @@ class NamedAgg(NamedTuple): class SeriesGroupBy(GroupBy[Series]): def _wrap_agged_manager(self, mgr: Manager) -> Series: - out = self.obj._from_mgr(mgr, axes=mgr.axes) + out = self.obj._constructor_from_mgr(mgr, axes=mgr.axes) out._name = self.obj.name return out @@ -1656,7 +1656,7 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike: res_mgr = mgr.grouped_reduce(arr_func) res_mgr.set_axis(1, mgr.axes[1]) - res_df = self.obj._from_mgr(res_mgr, axes=res_mgr.axes) + res_df = self.obj._constructor_from_mgr(res_mgr, axes=res_mgr.axes) res_df = self._maybe_transpose_result(res_df) return res_df @@ -1962,7 +1962,7 @@ def _get_data_to_aggregate( return mgr def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: - return self.obj._from_mgr(mgr, axes=mgr.axes) + return self.obj._constructor_from_mgr(mgr, axes=mgr.axes) def _apply_to_column_groupbys(self, func) -> DataFrame: from pandas.core.reshape.concat import concat diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 3b1dedba1d275..e2e4f65b5dc61 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -1157,7 +1157,7 @@ class SeriesSplitter(DataSplitter): def _chop(self, sdata: Series, slice_obj: slice) -> Series: # fastpath equivalent to `sdata.iloc[slice_obj]` mgr = sdata._mgr.get_slice(slice_obj) - ser = sdata._from_mgr(mgr, axes=mgr.axes) + ser = sdata._constructor_from_mgr(mgr, axes=mgr.axes) ser._name = sdata.name return ser.__finalize__(sdata, method="groupby") @@ -1170,7 +1170,7 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: # else: # return sdata.iloc[:, slice_obj] mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis) - df = sdata._from_mgr(mgr, axes=mgr.axes) + df = sdata._constructor_from_mgr(mgr, axes=mgr.axes) return df.__finalize__(sdata, method="groupby") diff --git a/pandas/core/resample.py b/pandas/core/resample.py index bdd2a9e78eba3..6aea0a8b3c556 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -2114,9 +2114,7 @@ def _take_new_index( if axis == 1: raise NotImplementedError("axis 1 is not supported") new_mgr = obj._mgr.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1) - # error: Incompatible return value type - # (got "DataFrame", expected "NDFrameT") - return obj._from_mgr(new_mgr, axes=new_mgr.axes) # type: ignore[return-value] + return obj._constructor_from_mgr(new_mgr, axes=new_mgr.axes) else: raise ValueError("'obj' should be either a Series or a DataFrame") diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 1ef6676e41fc6..4994ed347629e 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -681,7 +681,7 @@ def get_result(self): if not self.copy and not using_copy_on_write(): new_data._consolidate_inplace() - out = sample._from_mgr(new_data, axes=new_data.axes) + out = sample._constructor_from_mgr(new_data, axes=new_data.axes) return out.__finalize__(self, method="concat") def _get_result_dim(self) -> int: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index e7ed33c477a52..922d3301e02a1 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -775,7 +775,7 @@ def _reindex_and_concat( allow_dups=True, use_na_proxy=True, ) - left = left._from_mgr(lmgr, axes=lmgr.axes) + left = left._constructor_from_mgr(lmgr, axes=lmgr.axes) left.index = join_index if right_indexer is not None and not is_range_indexer( @@ -790,7 +790,7 @@ def _reindex_and_concat( allow_dups=True, use_na_proxy=True, ) - right = right._from_mgr(rmgr, axes=rmgr.axes) + right = right._constructor_from_mgr(rmgr, axes=rmgr.axes) right.index = join_index from pandas import concat diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 643d8a641c66d..bb5349f58a642 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -505,7 +505,7 @@ def _unstack_frame(obj: DataFrame, level, fill_value=None) -> DataFrame: if not obj._can_fast_transpose: mgr = obj._mgr.unstack(unstacker, fill_value=fill_value) - return obj._from_mgr(mgr, axes=mgr.axes) + return obj._constructor_from_mgr(mgr, axes=mgr.axes) else: return unstacker.get_result( obj._values, value_columns=obj.columns, fill_value=fill_value diff --git a/pandas/core/series.py b/pandas/core/series.py index a4373f34222d1..2fe93b6185c01 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -571,6 +571,14 @@ def _init_dict( def _constructor(self) -> Callable[..., Series]: return Series + def _constructor_from_mgr(self, mgr, axes): + if self._constructor is Series: + # we are pandas.Series (or a subclass that doesn't override _constructor) + return self._from_mgr(mgr, axes=axes) + else: + assert axes is mgr.axes + return self._constructor(mgr) + @property def _constructor_expanddim(self) -> Callable[..., DataFrame]: """ @@ -584,9 +592,18 @@ def _constructor_expanddim(self) -> Callable[..., DataFrame]: def _expanddim_from_mgr(self, mgr, axes) -> DataFrame: # https://github.com/pandas-dev/pandas/pull/52132#issuecomment-1481491828 # This is a short-term implementation that will be replaced - # with self._constructor_expanddim._from_mgr(...) + # with self._constructor_expanddim._constructor_from_mgr(...) # once downstream packages (geopandas) have had a chance to implement # their own overrides. + # error: "Callable[..., DataFrame]" has no attribute "_from_mgr" [attr-defined] + return self._constructor_expanddim._from_mgr( # type: ignore[attr-defined] + mgr, axes=mgr.axes + ) + + def _constructor_expanddim_from_mgr(self, mgr, axes): + if self._constructor is Series: + return self._expanddim_from_mgr(mgr, axes) + assert axes is mgr.axes return self._constructor_expanddim(mgr) # types @@ -1088,7 +1105,7 @@ def _get_values_tuple(self, key: tuple): def _get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> Series: new_mgr = self._mgr.get_rows_with_mask(indexer) - return self._from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(self) + return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(self) def _get_value(self, label, takeable: bool = False): """ @@ -1954,7 +1971,7 @@ def to_frame(self, name: Hashable = lib.no_default) -> DataFrame: columns = Index([name]) mgr = self._mgr.to_2d_mgr(columns) - df = self._expanddim_from_mgr(mgr, axes=mgr.axes) + df = self._constructor_expanddim_from_mgr(mgr, axes=mgr.axes) return df.__finalize__(self, method="to_frame") def _set_name( From 78790e5d9fafac5bb50fae4f50cb9791744bd1b7 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 29 May 2023 09:36:21 -0700 Subject: [PATCH 5/6] update _sliced_from_mgr --- pandas/core/frame.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e86737e6f83ed..e4804d2b9ed9b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -648,12 +648,7 @@ def _constructor_from_mgr(self, mgr, axes): _constructor_sliced: Callable[..., Series] = Series def _sliced_from_mgr(self, mgr, axes) -> Series: - # https://github.com/pandas-dev/pandas/pull/52132#issuecomment-1481491828 - # This is a short-term implementation that will be replaced - # with self._constructor_sliced._from_mgr(...) - # once downstream packages (geopandas) have had a chance to implement - # their own overrides. - return self._constructor_sliced(mgr) + return self._constructor_sliced._from_mgr(mgr, axes) def _constructor_sliced_from_mgr(self, mgr, axes): if self._constructor_sliced is Series: From 191a08ac950531353fd69487b709fb16f4b82668 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 29 May 2023 14:43:40 -0700 Subject: [PATCH 6/6] mypy fixup --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e4804d2b9ed9b..b82368a9c4cc6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -648,7 +648,7 @@ def _constructor_from_mgr(self, mgr, axes): _constructor_sliced: Callable[..., Series] = Series def _sliced_from_mgr(self, mgr, axes) -> Series: - return self._constructor_sliced._from_mgr(mgr, axes) + return Series._from_mgr(mgr, axes) def _constructor_sliced_from_mgr(self, mgr, axes): if self._constructor_sliced is Series: