diff --git a/pandas/_libs/properties.pyx b/pandas/_libs/properties.pyx index 92f1c7c51aa04..931b050dd5d2a 100644 --- a/pandas/_libs/properties.pyx +++ b/pandas/_libs/properties.pyx @@ -57,10 +57,10 @@ cdef class AxisProperty: list axes if obj is None: - # Only instances have _data, not classes + # Only instances have _mgr, not classes return self else: - axes = obj._data.axes + axes = obj._mgr.axes return axes[self.axis] def __set__(self, obj, value): diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 739ac0ed397ca..40751cf43bdfe 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -148,7 +148,7 @@ cdef class Reducer: object.__setattr__(cached_typ, 'index', self.index) object.__setattr__( - cached_typ._data._block, 'values', chunk) + cached_typ._mgr._block, 'values', chunk) object.__setattr__(cached_typ, 'name', name) res = self.f(cached_typ) else: @@ -279,7 +279,7 @@ cdef class SeriesBinGrouper: object.__setattr__(cached_ityp, '_index_data', islider.buf) cached_ityp._engine.clear_mapping() object.__setattr__( - cached_typ._data._block, 'values', vslider.buf) + cached_typ._mgr._block, 'values', vslider.buf) object.__setattr__(cached_typ, '_index', cached_ityp) object.__setattr__(cached_typ, 'name', name) @@ -405,7 +405,7 @@ cdef class SeriesGrouper: object.__setattr__(cached_ityp, '_data', islider.buf) cached_ityp._engine.clear_mapping() object.__setattr__( - cached_typ._data._block, 'values', vslider.buf) + cached_typ._mgr._block, 'values', vslider.buf) object.__setattr__(cached_typ, '_index', cached_ityp) object.__setattr__(cached_typ, 'name', name) @@ -577,7 +577,7 @@ cdef class BlockSlider: self.dummy = frame[:0] self.index = self.dummy.index - self.blocks = [b.values for b in self.dummy._data.blocks] + self.blocks = [b.values for b in self.dummy._mgr.blocks] for x in self.blocks: util.set_array_not_contiguous(x) diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 926440218b5d9..8f7f8e4d573d8 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -326,7 +326,7 @@ static PyObject *get_sub_attr(PyObject *obj, char *attr, char *subAttr) { } static int is_simple_frame(PyObject *obj) { - PyObject *check = get_sub_attr(obj, "_data", "is_mixed_type"); + PyObject *check = get_sub_attr(obj, "_mgr", "is_mixed_type"); int ret = (check == Py_False); if (!check) { @@ -984,7 +984,7 @@ void PdBlock_iterBegin(JSOBJ _obj, JSONTypeContext *tc) { goto BLKRET; } - blocks = get_sub_attr(obj, "_data", "blocks"); + blocks = get_sub_attr(obj, "_mgr", "blocks"); if (!blocks) { GET_TC(tc)->iterNext = NpyArr_iterNextNone; goto BLKRET; diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 79f205de11878..1b487e70d7541 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1710,7 +1710,7 @@ def take_nd( if arr.flags.f_contiguous and axis == arr.ndim - 1: # minor tweak that can make an order-of-magnitude difference # for dataframes initialized directly from 2-d ndarrays - # (s.t. df.values is c-contiguous and df._data.blocks[0] is its + # (s.t. df.values is c-contiguous and df._mgr.blocks[0] is its # f-contiguous transpose) out = np.empty(out_shape, dtype=dtype, order="F") else: diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 2246bbfde636d..799c46f3969b6 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -166,7 +166,7 @@ def get_result(self): # ufunc elif isinstance(self.f, np.ufunc): with np.errstate(all="ignore"): - results = self.obj._data.apply("apply", func=self.f) + results = self.obj._mgr.apply("apply", func=self.f) return self.obj._constructor( data=results, index=self.index, columns=self.columns, copy=False ) diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index de41644f09b66..2a234b8ab9951 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -80,7 +80,7 @@ def _check(cls, inst): class _ABCGeneric(type): def __instancecheck__(cls, inst): - return hasattr(inst, "_data") + return hasattr(inst, "_mgr") ABCGeneric = _ABCGeneric("ABCGeneric", tuple(), {}) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index f540e9297738a..2c0812440125d 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -144,7 +144,7 @@ def _isna_new(obj): ): return _isna_ndarraylike(obj) elif isinstance(obj, ABCGeneric): - return obj._constructor(obj._data.isna(func=isna)) + return obj._constructor(obj._mgr.isna(func=isna)) elif isinstance(obj, list): return _isna_ndarraylike(np.asarray(obj, dtype=object)) elif hasattr(obj, "__array__"): @@ -172,7 +172,7 @@ def _isna_old(obj): elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass)): return _isna_ndarraylike_old(obj) elif isinstance(obj, ABCGeneric): - return obj._constructor(obj._data.isna(func=_isna_old)) + return obj._constructor(obj._mgr.isna(func=_isna_old)) elif isinstance(obj, list): return _isna_ndarraylike_old(np.asarray(obj, dtype=object)) elif hasattr(obj, "__array__"): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a1989fd62b6ee..1a327893d92c0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -395,7 +395,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False): dtype = self._validate_dtype(dtype) if isinstance(data, DataFrame): - data = data._data + data = data._mgr if isinstance(data, BlockManager): mgr = self._init_mgr( @@ -545,10 +545,10 @@ def _is_homogeneous_type(self): ... "B": np.array([1, 2], dtype=np.int64)})._is_homogeneous_type False """ - if self._data.any_extension_types: - return len({block.dtype for block in self._data.blocks}) == 1 + if self._mgr.any_extension_types: + return len({block.dtype for block in self._mgr.blocks}) == 1 else: - return not self._data.is_mixed_type + return not self._mgr.is_mixed_type # ---------------------------------------------------------------------- # Rendering Methods @@ -2521,7 +2521,7 @@ def _sizeof_fmt(num, size_qualifier): else: _verbose_repr() - counts = self._data.get_dtype_counts() + counts = self._mgr.get_dtype_counts() dtypes = ["{k}({kk:d})".format(k=k[0], kk=k[1]) for k in sorted(counts.items())] lines.append("dtypes: {types}".format(types=", ".join(dtypes))) @@ -2755,7 +2755,7 @@ def _unpickle_frame_compat(self, state): # pragma: no cover columns = com._unpickle_array(cols) index = com._unpickle_array(idx) - self._data = self._init_dict(series, index, columns, None) + self._mgr = self._init_dict(series, index, columns, None) def _unpickle_matrix_compat(self, state): # pragma: no cover # old unpickling @@ -2772,7 +2772,7 @@ def _unpickle_matrix_compat(self, state): # pragma: no cover dm = dm.join(objects) - self._data = dm._data + self._mgr = dm._mgr # ---------------------------------------------------------------------- # Getting and setting elements @@ -2905,7 +2905,7 @@ def _ixs(self, i, axis=0): result = self.take(i, axis=axis) copy = True else: - new_values = self._data.fast_xs(i) + new_values = self._mgr.fast_xs(i) if is_scalar(new_values): return new_values @@ -2939,7 +2939,7 @@ def _ixs(self, i, axis=0): # as the index (iow a not found value), iget returns # a 0-len ndarray. This is effectively catching # a numpy error (as numpy should really raise) - values = self._data.iget(i) + values = self._mgr.iget(i) if index_len and not len(values): values = np.array([np.nan] * index_len, dtype=object) @@ -3538,7 +3538,7 @@ def _ensure_valid_index(self, value): "Series" ) - self._data = self._data.reindex_axis( + self._mgr = self._mgr.reindex_axis( value.index.copy(), axis=1, fill_value=np.nan ) @@ -3581,7 +3581,7 @@ def insert(self, loc, column, value, allow_duplicates=False): """ self._ensure_valid_index(value) value = self._sanitize_column(column, value, broadcast=False) - self._data.insert(loc, column, value, allow_duplicates=allow_duplicates) + self._mgr.insert(loc, column, value, allow_duplicates=allow_duplicates) def assign(self, **kwargs): r""" @@ -3780,7 +3780,7 @@ def reindexer(value): @property def _series(self): return { - item: Series(self._data.iget(idx), index=self.index, name=item) + item: Series(self._mgr.iget(idx), index=self.index, name=item) for idx, item in enumerate(self.columns) } @@ -4903,7 +4903,7 @@ def drop_duplicates(self, subset=None, keep="first", inplace=False): if inplace: inds, = (-duplicated)._ndarray_values.nonzero() - new_data = self._data.take(inds) + new_data = self._mgr.take(inds) self._update_inplace(new_data) else: return self[-duplicated] @@ -5007,7 +5007,7 @@ def sort_values( k, kind=kind, ascending=ascending, na_position=na_position ) - new_data = self._data.take( + new_data = self._mgr.take( indexer, axis=self._get_block_manager_axis(axis), verify=False ) @@ -5084,7 +5084,7 @@ def sort_index( ) baxis = self._get_block_manager_axis(axis) - new_data = self._data.take(indexer, axis=baxis, verify=False) + new_data = self._mgr.take(indexer, axis=baxis, verify=False) # reconstruct axis if needed new_data.axes[baxis] = new_data.axes[baxis]._sort_levels_monotonic() @@ -6527,7 +6527,7 @@ def diff(self, periods=1, axis=0): 5 NaN NaN NaN """ bm_axis = self._get_block_manager_axis(axis) - new_data = self._data.diff(n=periods, axis=bm_axis) + new_data = self._mgr.diff(n=periods, axis=bm_axis) return self._constructor(new_data) # ---------------------------------------------------------------------- @@ -7754,7 +7754,7 @@ def count(self, axis=0, level=None, numeric_only=False): if len(frame._get_axis(axis)) == 0: result = Series(0, index=frame._get_agg_axis(axis)) else: - if frame._is_mixed_type or frame._data.any_extension_types: + if frame._is_mixed_type or frame._mgr.any_extension_types: # the or any_extension_types is really only hit for single- # column frames with an extension array result = notna(frame).sum(axis=axis) @@ -8209,7 +8209,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation="linear"): if is_transposed: data = data.T - result = data._data.quantile( + result = data._mgr.quantile( qs=q, axis=1, interpolation=interpolation, transposed=is_transposed ) @@ -8243,7 +8243,7 @@ def to_timestamp(self, freq=None, how="start", axis=0, copy=True): ------- DataFrame with DatetimeIndex """ - new_data = self._data + new_data = self._mgr if copy: new_data = new_data.copy() @@ -8275,7 +8275,7 @@ def to_period(self, freq=None, axis=0, copy=True): ------- TimeSeries with PeriodIndex """ - new_data = self._data + new_data = self._mgr if copy: new_data = new_data.copy() diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4e9f74162ae78..0d2074594f1a8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -115,7 +115,7 @@ def _single_replace(self, to_replace, method, inplace, limit): result = pd.Series(values, index=self.index, dtype=self.dtype).__finalize__(self) if inplace: - self._update_inplace(result._data) + self._update_inplace(result._mgr) return return result @@ -134,7 +134,7 @@ class NDFrame(PandasObject, SelectionMixin): """ _internal_names = [ - "_data", + "_mgr", "_cacher", "_item_cache", "_cache", @@ -155,7 +155,13 @@ class NDFrame(PandasObject, SelectionMixin): ) # type: FrozenSet[str] _metadata = [] # type: List[str] _is_copy = None - _data = None # type: BlockManager + _mgr = None # type: BlockManager + + @property + def _data(self): + # Retain alias for downstream compat + warnings.warn("'_data' attribute should not be accessed directly.") + return self._mgr # ---------------------------------------------------------------------- # Constructors @@ -180,7 +186,7 @@ def __init__( data = data.reindex_axis(ax, axis=i) object.__setattr__(self, "_is_copy", None) - object.__setattr__(self, "_data", data) + object.__setattr__(self, "_mgr", data) object.__setattr__(self, "_item_cache", {}) def _init_mgr(self, mgr, axes=None, dtype=None, copy=False): @@ -541,7 +547,7 @@ def ndim(self): >>> df.ndim 2 """ - return self._data.ndim + return self._mgr.ndim @property def size(self): @@ -712,7 +718,7 @@ def set_axis(self, labels, axis=0, inplace=None): return obj def _set_axis(self, axis, labels): - self._data.set_axis(axis, labels) + self._mgr.set_axis(axis, labels) self._clear_item_cache() def transpose(self, *args, **kwargs): @@ -1029,8 +1035,8 @@ def swaplevel(self, i=-2, j=-1, axis=0): """ axis = self._get_axis_number(axis) result = self.copy() - labels = result._data.axes[axis] - result._data.set_axis(axis, labels.swaplevel(i, j)) + labels = result._mgr.axes[axis] + result._mgr.set_axis(axis, labels.swaplevel(i, j)) return result # ---------------------------------------------------------------------- @@ -1190,13 +1196,11 @@ def rename(self, *args, **kwargs): ] raise KeyError("{} not found in axis".format(missing_labels)) - result._data = result._data.rename_axis( - f, axis=baxis, copy=copy, level=level - ) + result._mgr = result._mgr.rename_axis(f, axis=baxis, copy=copy, level=level) result._clear_item_cache() if inplace: - self._update_inplace(result._data) + self._update_inplace(result._mgr) else: return result.__finalize__(self) @@ -1524,7 +1528,7 @@ def equals(self, other): """ if not isinstance(other, self._constructor): return False - return self._data.equals(other._data) + return self._mgr.equals(other._mgr) # ------------------------------------------------------------------------- # Unary Methods @@ -2049,16 +2053,20 @@ def to_dense(self): def __getstate__(self): meta = {k: getattr(self, k, None) for k in self._metadata} - return dict(_data=self._data, _typ=self._typ, _metadata=self._metadata, **meta) + return dict(_mgr=self._mgr, _typ=self._typ, _metadata=self._metadata, **meta) def __setstate__(self, state): if isinstance(state, BlockManager): - self._data = state + self._mgr = state elif isinstance(state, dict): typ = state.get("_typ") if typ is not None: + if "_data" in state and "_mgr" not in state: + # Backwards compat + state["_mgr"] = state.pop("_data") + # set in the order of internal names # to avoid definitional recursion # e.g. say fill_value needing _data to be @@ -3285,7 +3293,7 @@ def _get_item_cache(self, item): cache = self._item_cache res = cache.get(item) if res is None: - values = self._data.get(item) + values = self._mgr.get(item) res = self._box_item_values(item, values) cache[item] = res res._set_as_cached(item, self) @@ -3320,7 +3328,7 @@ def _box_item_values(self, key, values): def _maybe_cache_changed(self, item, value): """The object has called back to us saying maybe it has changed. """ - self._data.set(item, value) + self._mgr.set(item, value) @property def _is_cached(self): @@ -3337,7 +3345,7 @@ def _get_cacher(self): @property def _is_view(self): """Return boolean indicating if self is view of another array """ - return self._data.is_view + return self._mgr.is_view def _maybe_update_cacher(self, clear=False, verify_is_copy=True): """ @@ -3386,7 +3394,7 @@ def _slice(self, slobj, axis=0, kind=None): kind parameter is maintained for compatibility with Series slicing. """ axis = self._get_block_manager_axis(axis) - result = self._constructor(self._data.get_slice(slobj, axis=axis)) + result = self._constructor(self._mgr.get_slice(slobj, axis=axis)) result = result.__finalize__(self) # this could be a view @@ -3396,7 +3404,7 @@ def _slice(self, slobj, axis=0, kind=None): return result def _set_item(self, key, value): - self._data.set(key, value) + self._mgr.set(key, value) self._clear_item_cache() def _set_is_copy(self, ref=None, copy=True): @@ -3531,7 +3539,7 @@ def __delitem__(self, key): # If the above loop ran and didn't delete anything because # there was no match, this call should raise the appropriate # exception: - self._data.delete(key) + self._mgr.delete(key) # delete from the caches try: @@ -3573,7 +3581,7 @@ def _take(self, indices, axis=0, is_copy=True): """ self._consolidate_inplace() - new_data = self._data.take( + new_data = self._mgr.take( indices, axis=self._get_block_manager_axis(axis), verify=True ) result = self._constructor(new_data).__finalize__(self) @@ -3798,7 +3806,7 @@ class animal locomotion new_index = self.index[loc] if is_scalar(loc): - new_values = self._data.fast_xs(loc) + new_values = self._mgr.fast_xs(loc) # may need to box a datelike-scalar # @@ -4037,7 +4045,7 @@ def _update_inplace(self, result, verify_is_copy=True): self._reset_cache() self._clear_item_cache() - self._data = getattr(result, "_data", result) + self._mgr = getattr(result, "_mgr", result) self._maybe_update_cacher(verify_is_copy=verify_is_copy) def add_prefix(self, prefix): @@ -4606,7 +4614,7 @@ def _reindex_with_indexers( """allow_dups indicates an internal call here """ # reindex doing multiple operations on different axes if indicated - new_data = self._data + new_data = self._mgr for axis in sorted(reindexers.keys()): index, indexer = reindexers[axis] baxis = self._get_block_manager_axis(axis) @@ -4628,7 +4636,7 @@ def _reindex_with_indexers( copy=copy, ) - if copy and new_data is self._data: + if copy and new_data is self._mgr: new_data = new_data.copy() return self._constructor(new_data).__finalize__(self) @@ -5291,9 +5299,9 @@ def _protect_consolidate(self, f): """Consolidate _data -- if the blocks have changed, then clear the cache """ - blocks_before = len(self._data.blocks) + blocks_before = len(self._mgr.blocks) result = f() - if len(self._data.blocks) != blocks_before: + if len(self._mgr.blocks) != blocks_before: self._clear_item_cache() return result @@ -5301,7 +5309,7 @@ def _consolidate_inplace(self): """Consolidate data in place and return None""" def f(): - self._data = self._data.consolidate() + self._mgr = self._mgr.consolidate() self._protect_consolidate(f) @@ -5323,23 +5331,23 @@ def _consolidate(self, inplace=False): if inplace: self._consolidate_inplace() else: - f = lambda: self._data.consolidate() + f = lambda: self._mgr.consolidate() cons_data = self._protect_consolidate(f) return self._constructor(cons_data).__finalize__(self) @property def _is_mixed_type(self): - f = lambda: self._data.is_mixed_type + f = lambda: self._mgr.is_mixed_type return self._protect_consolidate(f) @property def _is_numeric_mixed_type(self): - f = lambda: self._data.is_numeric_mixed_type + f = lambda: self._mgr.is_numeric_mixed_type return self._protect_consolidate(f) @property def _is_datelike_mixed_type(self): - f = lambda: self._data.is_datelike_mixed_type + f = lambda: self._mgr.is_datelike_mixed_type return self._protect_consolidate(f) def _check_inplace_setting(self, value): @@ -5363,10 +5371,10 @@ def _check_inplace_setting(self, value): return True def _get_numeric_data(self): - return self._constructor(self._data.get_numeric_data()).__finalize__(self) + return self._constructor(self._mgr.get_numeric_data()).__finalize__(self) def _get_bool_data(self): - return self._constructor(self._data.get_bool_data()).__finalize__(self) + return self._constructor(self._mgr.get_bool_data()).__finalize__(self) # ---------------------------------------------------------------------- # Internal Interface Methods @@ -5417,7 +5425,7 @@ def as_matrix(self, columns=None): stacklevel=2, ) self._consolidate_inplace() - return self._data.as_array(transpose=self._AXIS_REVERSED, items=columns) + return self._mgr.as_array(transpose=self._AXIS_REVERSED, items=columns) @property def values(self): @@ -5494,7 +5502,7 @@ def values(self): ['monkey', nan, None]], dtype=object) """ self._consolidate_inplace() - return self._data.as_array(transpose=self._AXIS_REVERSED) + return self._mgr.as_array(transpose=self._AXIS_REVERSED) @property def _values(self): @@ -5605,7 +5613,7 @@ def get_dtype_counts(self): ) from pandas import Series - return Series(self._data.get_dtype_counts()) + return Series(self._mgr.get_dtype_counts()) def get_ftype_counts(self): """ @@ -5651,7 +5659,7 @@ def get_ftype_counts(self): from pandas import Series - return Series(self._data.get_ftype_counts()) + return Series(self._mgr.get_ftype_counts()) @property def dtypes(self): @@ -5687,7 +5695,7 @@ def dtypes(self): """ from pandas import Series - return Series(self._data.get_dtypes(), index=self._info_axis, dtype=np.object_) + return Series(self._mgr.get_dtypes(), index=self._info_axis, dtype=np.object_) @property def ftypes(self): @@ -5744,7 +5752,7 @@ def ftypes(self): from pandas import Series - return Series(self._data.get_ftypes(), index=self._info_axis, dtype=np.object_) + return Series(self._mgr.get_ftypes(), index=self._info_axis, dtype=np.object_) def as_blocks(self, copy=True): """ @@ -5789,7 +5797,7 @@ def _to_dict_of_blocks(self, copy=True): """ return { k: self._constructor(v).__finalize__(self) - for k, v, in self._data.to_dict(copy=copy).items() + for k, v, in self._mgr.to_dict(copy=copy).items() } def astype(self, dtype, copy=True, errors="raise", **kwargs): @@ -5931,9 +5939,7 @@ def astype(self, dtype, copy=True, errors="raise", **kwargs): else: # else, only a single dtype is given - new_data = self._data.astype( - dtype=dtype, copy=copy, errors=errors, **kwargs - ) + new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors, **kwargs) return self._constructor(new_data).__finalize__(self) # GH 19920: retain column metadata after concat @@ -6046,7 +6052,7 @@ def copy(self, deep=True): 1 [3, 4] dtype: object """ - data = self._data.copy(deep=deep) + data = self._mgr.copy(deep=deep) return self._constructor(data).__finalize__(self) def __copy__(self, deep=True): @@ -6091,7 +6097,7 @@ def _convert( converted : same as input object """ return self._constructor( - self._data.convert( + self._mgr.convert( datetime=datetime, numeric=numeric, timedelta=timedelta, @@ -6143,7 +6149,7 @@ def infer_objects(self): # python objects will still be converted to # native numpy numeric types return self._constructor( - self._data.convert( + self._mgr.convert( datetime=True, numeric=False, timedelta=True, coerce=False, copy=True ) ).__finalize__(self) @@ -6277,11 +6283,11 @@ def fillna( result = self.T.fillna(method=method, limit=limit).T # need to downcast here because of all of the transposes - result._data = result._data.downcast() + result._mgr = result._mgr.downcast() return result - new_data = self._data.interpolate( + new_data = self._mgr.interpolate( method=method, axis=axis, limit=limit, @@ -6307,7 +6313,7 @@ def fillna( '"{0}"'.format(type(value).__name__) ) - new_data = self._data.fillna( + new_data = self._mgr.fillna( value=value, limit=limit, inplace=inplace, downcast=downcast ) @@ -6328,7 +6334,7 @@ def fillna( return result if not inplace else None elif not is_list_like(value): - new_data = self._data.fillna( + new_data = self._mgr.fillna( value=value, limit=limit, inplace=inplace, downcast=downcast ) elif isinstance(value, DataFrame) and self.ndim == 2: @@ -6741,7 +6747,7 @@ def replace( if not len(self._get_axis(a)): return self - new_data = self._data + new_data = self._mgr if is_dict_like(to_replace): if is_dict_like(value): # {'A' : NA} -> {'A' : 0} res = self if inplace else self.copy() @@ -6783,7 +6789,7 @@ def replace( % (len(to_replace), len(value)) ) - new_data = self._data.replace_list( + new_data = self._mgr.replace_list( src_list=to_replace, dest_list=value, inplace=inplace, @@ -6791,7 +6797,7 @@ def replace( ) else: # [NA, ''] -> 0 - new_data = self._data.replace( + new_data = self._mgr.replace( to_replace=to_replace, value=value, inplace=inplace, regex=regex ) elif to_replace is None: @@ -6814,7 +6820,7 @@ def replace( # dest iterable dict-like if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} - new_data = self._data + new_data = self._mgr for k, v in value.items(): if k in self: @@ -6827,7 +6833,7 @@ def replace( ) elif not is_list_like(value): # NA -> 0 - new_data = self._data.replace( + new_data = self._mgr.replace( to_replace=to_replace, value=value, inplace=inplace, regex=regex ) else: @@ -7069,7 +7075,7 @@ def interpolate( "Only `method=linear` interpolation is supported " "on MultiIndexes." ) - if _maybe_transposed_self._data.get_dtype_counts().get("object") == len( + if _maybe_transposed_self._mgr.get_dtype_counts().get("object") == len( _maybe_transposed_self.T ): raise TypeError( @@ -7104,7 +7110,7 @@ def interpolate( "has not been implemented. Try filling " "those NaNs before interpolating." ) - data = _maybe_transposed_self._data + data = _maybe_transposed_self._mgr new_data = data.interpolate( method=method, axis=ax, @@ -7120,7 +7126,7 @@ def interpolate( if inplace: if axis == 1: - new_data = self._constructor(new_data).T._data + new_data = self._constructor(new_data).T._mgr self._update_inplace(new_data) else: res = self._constructor(new_data).__finalize__(self) @@ -8972,7 +8978,7 @@ def _align_series( else: # one has > 1 ndim - fdata = self._data + fdata = self._mgr if axis == 0: join_index = self.index lidx, ridx = None, None @@ -8997,7 +9003,7 @@ def _align_series( else: raise ValueError("Must specify axis=0 or 1") - if copy and fdata is self._data: + if copy and fdata is self._mgr: fdata = fdata.copy() left = self._constructor(fdata) @@ -9157,7 +9163,7 @@ def _where( # reconstruct the block manager self._check_inplace_setting(other) - new_data = self._data.putmask( + new_data = self._mgr.putmask( mask=cond, new=other, align=align, @@ -9168,7 +9174,7 @@ def _where( self._update_inplace(new_data) else: - new_data = self._data.where( + new_data = self._mgr.where( other=other, cond=cond, align=align, @@ -9450,7 +9456,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): block_axis = self._get_block_manager_axis(axis) if freq is None: - new_data = self._data.shift( + new_data = self._mgr.shift( periods=periods, axis=block_axis, fill_value=fill_value ) else: @@ -9539,7 +9545,7 @@ def tshift(self, periods=1, freq=None, axis=0): if isinstance(index, PeriodIndex): orig_freq = to_offset(index.freq) if freq == orig_freq: - new_data = self._data.copy() + new_data = self._mgr.copy() new_data.axes[block_axis] = index.shift(periods) else: msg = "Given freq %s does not match PeriodIndex freq %s" % ( @@ -9548,7 +9554,7 @@ def tshift(self, periods=1, freq=None, axis=0): ) raise ValueError(msg) else: - new_data = self._data.copy() + new_data = self._mgr.copy() new_data.axes[block_axis] = index.shift(periods, freq) return self._constructor(new_data).__finalize__(self) @@ -9757,7 +9763,7 @@ def _tz_convert(ax, tz): raise ValueError("The level {0} is not valid".format(level)) ax = _tz_convert(ax, tz) - result = self._constructor(self._data, copy=copy) + result = self._constructor(self._mgr, copy=copy) result = result.set_axis(ax, axis=axis, inplace=False) return result.__finalize__(self) @@ -9921,7 +9927,7 @@ def _tz_localize(ax, tz, ambiguous, nonexistent): raise ValueError("The level {0} is not valid".format(level)) ax = _tz_localize(ax, tz, ambiguous, nonexistent) - result = self._constructor(self._data, copy=copy) + result = self._constructor(self._mgr, copy=copy) result = result.set_axis(ax, axis=axis, inplace=False) return result.__finalize__(self) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 7fd0ca94e7997..df5c11e227bea 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -252,7 +252,7 @@ def aggregate(self, func, *args, **kwargs): # Backwards compat for groupby.agg() with sparse # values. concat no longer converts DataFrame[Sparse] # to SparseDataFrame, so we do it here. - result = SparseDataFrame(result._data) + result = SparseDataFrame(result._mgr) except Exception: result = self._aggregate_generic(func, *args, **kwargs) @@ -1502,9 +1502,9 @@ def _wrap_generic_output(self, result, obj): def _get_data_to_aggregate(self): obj = self._obj_with_exclusions if self.axis == 1: - return obj.T._data, 1 + return obj.T._mgr, 1 else: - return obj._data, 1 + return obj._mgr, 1 def _insert_inaxis_grouper_inplace(self, result): # zip in reverse so we can always insert at loc 0 diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 818d844ca7994..896e5b45ad69c 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -591,7 +591,7 @@ def _get_grouper( def is_in_axis(key): if not _is_label_like(key): try: - obj._data.items.get_loc(key) + obj._mgr.items.get_loc(key) except Exception: return False diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 0bcaa83c49628..78c37db33423d 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -323,8 +323,8 @@ def _setitem_with_indexer(self, indexer, value): # if there is only one block/type, still have to take split path # unless the block is one-dimensional or it can hold the value - if not take_split_path and self.obj._data.blocks: - blk, = self.obj._data.blocks + if not take_split_path and self.obj._mgr.blocks: + blk, = self.obj._mgr.blocks if 1 < blk.ndim: # in case of dict, keys are indices val = list(value.values()) if isinstance(value, dict) else value take_split_path = not blk._can_hold_element(val) @@ -390,7 +390,7 @@ def _setitem_with_indexer(self, indexer, value): # so the object is the same index = self.obj._get_axis(i) labels = index.insert(len(index), key) - self.obj._data = self.obj.reindex(labels, axis=i)._data + self.obj._mgr = self.obj.reindex(labels, axis=i)._mgr self.obj._maybe_update_cacher(clear=True) self.obj._is_copy = None @@ -431,9 +431,9 @@ def _setitem_with_indexer(self, indexer, value): except TypeError: as_obj = self.obj.astype(object) new_values = np.concatenate([as_obj, new_values]) - self.obj._data = self.obj._constructor( + self.obj._mgr = self.obj._constructor( new_values, index=new_index, name=self.obj.name - )._data + )._mgr self.obj._maybe_update_cacher(clear=True) return self.obj @@ -463,7 +463,7 @@ def _setitem_with_indexer(self, indexer, value): value = Series(value, index=self.obj.columns, name=indexer) - self.obj._data = self.obj.append(value)._data + self.obj._mgr = self.obj.append(value)._mgr self.obj._maybe_update_cacher(clear=True) return self.obj @@ -518,7 +518,7 @@ def _setitem_with_indexer(self, indexer, value): idx = index._convert_slice_indexer(idx) obj._consolidate_inplace() obj = obj.copy() - obj._data = obj._data.setitem(indexer=tuple([idx]), value=value) + obj._mgr = obj._mgr.setitem(indexer=tuple([idx]), value=value) self.obj[item] = obj return @@ -549,7 +549,7 @@ def setter(item, v): # set the item, possibly having a dtype change s._consolidate_inplace() s = s.copy() - s._data = s._data.setitem(indexer=pi, value=v) + s._mgr = s._mgr.setitem(indexer=pi, value=v) s._maybe_update_cacher(clear=True) # reset the sliced object if unique @@ -673,7 +673,7 @@ def can_do_equal_len(): # actually do the set self.obj._consolidate_inplace() - self.obj._data = self.obj._data.setitem(indexer=indexer, value=value) + self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value) self.obj._maybe_update_cacher(clear=True) def _align_series(self, indexer, ser, multiindex_indexer=False): @@ -2450,7 +2450,7 @@ def convert_to_index_sliceable(obj, key): elif isinstance(key, str): # we are an actual column - if key in obj._data.items: + if key in obj._mgr.items: return None # We might have a datetimelike string that we can translate to a diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 4692ec45df0ad..326d700641553 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -1525,7 +1525,7 @@ def f(self, other): # this makes sure that we are aligned like the input # we are updating inplace so we want to ignore is_copy self._update_inplace( - result.reindex_like(self, copy=False)._data, verify_is_copy=False + result.reindex_like(self, copy=False)._mgr, verify_is_copy=False ) return self diff --git a/pandas/core/resample.py b/pandas/core/resample.py index b4a3e6ed71bf4..aeee871bb97df 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1648,7 +1648,7 @@ def _take_new_index(obj, indexer, new_index, axis=0): if axis == 1: raise NotImplementedError("axis 1 is not supported") return DataFrame( - obj._data.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1) + obj._mgr.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1) ) else: raise ValueError("'obj' should be either a Series or a DataFrame") diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 5a476dceca1f3..4eac89135132d 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -436,8 +436,8 @@ def get_result(self): if self.axis == 0: name = com.consensus_name_attr(self.objs) - mgr = self.objs[0]._data.concat( - [x._data for x in self.objs], self.new_axes + mgr = self.objs[0]._mgr.concat( + [x._mgr for x in self.objs], self.new_axes ) cons = _concat._get_series_result_type(mgr, self.objs) return cons(mgr, name=name).__finalize__(self, method="concat") @@ -456,7 +456,7 @@ def get_result(self): else: mgrs_indexers = [] for obj in self.objs: - mgr = obj._data + mgr = obj._mgr indexers = {} for ax, new_labels in enumerate(self.new_axes): if ax == self.axis: @@ -467,7 +467,7 @@ def get_result(self): if not new_labels.equals(obj_labels): indexers[ax] = obj_labels.reindex(new_labels)[1] - mgrs_indexers.append((obj._data, indexers)) + mgrs_indexers.append((obj._mgr, indexers)) new_data = concatenate_block_managers( mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=self.copy @@ -565,7 +565,7 @@ def _get_concat_axis(self): else: return ensure_index(self.keys).set_names(self.names) else: - indexes = [x._data.axes[self.axis] for x in self.objs] + indexes = [x._mgr.axes[self.axis] for x in self.objs] if self.ignore_index: idx = ibase.default_index(sum(len(i) for i in indexes)) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index c1a07c129f7cd..4d132eef5c839 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -642,7 +642,7 @@ def get_result(self): join_index, left_indexer, right_indexer = self._get_join_info() - ldata, rdata = self.left._data, self.right._data + ldata, rdata = self.left._mgr, self.right._mgr lsuf, rsuf = self.suffixes llabels, rlabels = _items_overlap_with_suffix( @@ -840,8 +840,8 @@ def _get_join_indexers(self): ) def _get_join_info(self): - left_ax = self.left._data.axes[self.axis] - right_ax = self.right._data.axes[self.axis] + left_ax = self.left._mgr.axes[self.axis] + right_ax = self.right._mgr.axes[self.axis] if self.left_index and self.right_index and self.how != "asof": join_index, left_indexer, right_indexer = left_ax.join( @@ -1449,7 +1449,7 @@ def get_result(self): join_index, left_indexer, right_indexer = self._get_join_info() # this is a bit kludgy - ldata, rdata = self.left._data, self.right._data + ldata, rdata = self.left._mgr, self.right._mgr lsuf, rsuf = self.suffixes llabels, rlabels = _items_overlap_with_suffix( diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 5d932d7ded9b8..21e5b467e0dfb 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -425,7 +425,7 @@ def _unstack_frame(obj, level, fill_value=None): unstacker = partial( _Unstacker, index=obj.index, level=level, fill_value=fill_value ) - blocks = obj._data.unstack(unstacker, fill_value=fill_value) + blocks = obj._mgr.unstack(unstacker, fill_value=fill_value) return obj._constructor(blocks) else: unstacker = _Unstacker( diff --git a/pandas/core/series.py b/pandas/core/series.py index b3a7f38aef8ef..129e964d6bd5a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -251,7 +251,7 @@ def __init__( index = data.index else: data = data.reindex(index, copy=copy) - data = data._data + data = data._mgr elif isinstance(data, dict): data, index = self._init_dict(data, index, dtype) dtype = None @@ -359,7 +359,7 @@ def _init_dict(self, data, index=None, dtype=None): s = s.sort_index() except TypeError: pass - return s._data, s.index + return s._mgr, s.index @classmethod def from_array( @@ -406,7 +406,7 @@ def _constructor_expanddim(self): # types @property def _can_hold_na(self): - return self._data._can_hold_na + return self._mgr._can_hold_na _index = None @@ -425,7 +425,7 @@ def _set_axis(self, axis, labels, fastpath=False): labels = DatetimeIndex(labels) # need to set here because we changed the index if fastpath: - self._data.set_axis(axis, labels) + self._mgr.set_axis(axis, labels) except (tslibs.OutOfBoundsDatetime, ValueError): # labels may exceeds datetime bounds, # or not be a DatetimeIndex @@ -435,7 +435,7 @@ def _set_axis(self, axis, labels, fastpath=False): object.__setattr__(self, "_index", labels) if not fastpath: - self._data.set_axis(axis, labels) + self._mgr.set_axis(axis, labels) def _set_subtyp(self, is_all_dates): if is_all_dates: @@ -466,14 +466,14 @@ def dtype(self): """ Return the dtype object of the underlying data. """ - return self._data.dtype + return self._mgr.dtype @property def dtypes(self): """ Return the dtype object of the underlying data. """ - return self._data.dtype + return self._mgr.dtype @property def ftype(self): @@ -491,7 +491,7 @@ def ftype(self): stacklevel=2, ) - return self._data.ftype + return self._mgr.ftype @property def ftypes(self): @@ -509,7 +509,7 @@ def ftypes(self): stacklevel=2, ) - return self._data.ftype + return self._mgr.ftype @property def values(self): @@ -551,21 +551,21 @@ def values(self): '2013-01-02T05:00:00.000000000', '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]') """ - return self._data.external_values() + return self._mgr.external_values() @property def _values(self): """ Return the internal repr of this data. """ - return self._data.internal_values() + return self._mgr.internal_values() def _formatting_values(self): """ Return the values that can be formatted (used by SeriesFormatter and DataFrameFormatter). """ - return self._data.formatting_values() + return self._mgr.formatting_values() def get_values(self): """ @@ -588,7 +588,7 @@ def get_values(self): return self._internal_get_values() def _internal_get_values(self): - return self._data.get_values() + return self._mgr.get_values() @property def asobject(self): @@ -718,7 +718,7 @@ def __len__(self): """ Return the length of the Series. """ - return len(self._data) + return len(self._mgr) def view(self, dtype=None): """ @@ -992,9 +992,9 @@ def imag(self, v): def _unpickle_series_compat(self, state): if isinstance(state, dict): - self._data = state["_data"] + self._mgr = state["_data"] self.name = state["name"] - self.index = self._data.index + self.index = self._mgr.index elif isinstance(state, tuple): @@ -1012,7 +1012,7 @@ def _unpickle_series_compat(self, state): name = own_state[1] # recreate - self._data = SingleBlockManager(data, index, fastpath=True) + self._mgr = SingleBlockManager(data, index, fastpath=True) self._index = index self.name = name @@ -1183,7 +1183,7 @@ def _get_values_tuple(self, key): def _get_values(self, indexer): try: return self._constructor( - self._data.get_slice(indexer), fastpath=True + self._mgr.get_slice(indexer), fastpath=True ).__finalize__(self) except Exception: return self._values[indexer] @@ -1304,7 +1304,7 @@ def _set_labels(self, key, value): def _set_values(self, key, value): if isinstance(key, Series): key = key._values - self._data = self._data.setitem(indexer=key, value=value) + self._mgr = self._mgr.setitem(indexer=key, value=value) self._maybe_update_cacher() def repeat(self, repeats, axis=None): @@ -3006,7 +3006,7 @@ def update(self, other): other = other.reindex_like(self) mask = notna(other) - self._data = self._data.putmask(mask=mask, new=other, inplace=True) + self._mgr = self._mgr.putmask(mask=mask, new=other, inplace=True) self._maybe_update_cacher() # ---------------------------------------------------------------------- diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index f195e4b5f4e37..a1363c8802347 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -115,7 +115,7 @@ def __init__( mgr = self._init_matrix(data, index, columns, dtype=dtype) elif isinstance(data, SparseDataFrame): mgr = self._init_mgr( - data._data, dict(index=index, columns=columns), dtype=dtype, copy=copy + data._mgr, dict(index=index, columns=columns), dtype=dtype, copy=copy ) elif isinstance(data, DataFrame): mgr = self._init_dict(data, data.index, data.columns, dtype=dtype) @@ -283,7 +283,7 @@ def __getstate__(self): return dict( _typ=self._typ, _subtyp=self._subtyp, - _data=self._data, + _mgr=self._mgr, _default_fill_value=self._default_fill_value, _default_kind=self._default_kind, ) @@ -314,7 +314,7 @@ def _unpickle_sparse_frame_compat(self, state): sp_values, sparse_index=sp_index, fill_value=fv ) - self._data = to_manager(series_dict, columns, index) + self._mgr = to_manager(series_dict, columns, index) self._default_fill_value = fv self._default_kind = kind diff --git a/pandas/core/sparse/scipy_sparse.py b/pandas/core/sparse/scipy_sparse.py index 73638f5965119..08d0ae2b5f3f6 100644 --- a/pandas/core/sparse/scipy_sparse.py +++ b/pandas/core/sparse/scipy_sparse.py @@ -26,7 +26,7 @@ def _to_ijv(ss, row_levels=(0,), column_levels=(1,), sort_labels=False): _check_is_partition([row_levels, column_levels], range(ss.index.nlevels)) # from the SparseSeries: get the labels and data for non-null entries - values = ss._data.internal_values()._valid_sp_values + values = ss._mgr.internal_values()._valid_sp_values nonnull_labels = ss.dropna() diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 43f2609f46bd6..b0632ff20d997 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -176,7 +176,7 @@ def __invert__(self): @property def block(self): warnings.warn("SparseSeries.block is deprecated.", FutureWarning, stacklevel=2) - return self._data._block + return self._mgr._block @property def fill_value(self): @@ -273,7 +273,7 @@ def __getstate__(self): return dict( _typ=self._typ, _subtyp=self._subtyp, - _data=self._data, + _mgr=self._mgr, fill_value=self.fill_value, name=self.name, ) @@ -343,7 +343,7 @@ def __getitem__(self, key): def _get_values(self, indexer): try: return self._constructor( - self._data.get_slice(indexer), fastpath=True + self._mgr.get_slice(indexer), fastpath=True ).__finalize__(self) except Exception: return self[indexer] @@ -465,7 +465,7 @@ def _set_value(self, label, value, takeable=False): values = new_values new_index = values.index values = SparseArray(values, fill_value=self.fill_value, kind=self.kind) - self._data = SingleBlockManager(values, new_index) + self._mgr = SingleBlockManager(values, new_index) self._index = new_index _set_value.__doc__ = set_value.__doc__ @@ -482,7 +482,7 @@ def _set_values(self, key, value): values = self.values.to_dense() values[key] = libindex.convert_scalar(values, value) values = SparseArray(values, fill_value=self.fill_value, kind=self.kind) - self._data = SingleBlockManager(values, self.index) + self._mgr = SingleBlockManager(values, self.index) def to_dense(self): """ diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index d86bf432b83c4..7d7ff90aedc16 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -129,7 +129,7 @@ def __init__( self.cols = cols # preallocate data 2d list - self.blocks = self.obj._data.blocks + self.blocks = self.obj._mgr.blocks ncols = sum(b.shape[0] for b in self.blocks) self.data = [None] * ncols diff --git a/pandas/io/packers.py b/pandas/io/packers.py index b0ce7a4ccb12a..5a68921ae6474 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -492,7 +492,7 @@ def encode(obj): # return d else: - data = obj._data + data = obj._mgr if not data.is_consolidated(): data = data.consolidate() diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9206463e18fb3..6ca410a47082a 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3204,7 +3204,7 @@ def read(self, start=None, stop=None, **kwargs): def write(self, obj, **kwargs): super().write(obj, **kwargs) - data = obj._data + data = obj._mgr if not data.is_consolidated(): data = data.consolidate() @@ -3842,20 +3842,20 @@ def get_blk_items(mgr, blocks): # figure out data_columns and get out blocks block_obj = self.get_object(obj)._consolidate() - blocks = block_obj._data.blocks - blk_items = get_blk_items(block_obj._data, blocks) + blocks = block_obj._mgr.blocks + blk_items = get_blk_items(block_obj._mgr, blocks) if len(self.non_index_axes): axis, axis_labels = self.non_index_axes[0] data_columns = self.validate_data_columns(data_columns, min_itemsize) if len(data_columns): mgr = block_obj.reindex( Index(axis_labels).difference(Index(data_columns)), axis=axis - )._data + )._mgr blocks = list(mgr.blocks) blk_items = get_blk_items(mgr, blocks) for c in data_columns: - mgr = block_obj.reindex([c], axis=axis)._data + mgr = block_obj.reindex([c], axis=axis)._mgr blocks.extend(mgr.blocks) blk_items.extend(get_blk_items(mgr, mgr.blocks)) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 211571c7dbaa1..03a974d9a106c 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -693,7 +693,7 @@ def insert_data(self): column_names = list(map(str, temp.columns)) ncols = len(column_names) data_list = [None] * ncols - blocks = temp._data.blocks + blocks = temp._mgr.blocks for b in blocks: if b.is_datetime: diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 6824266c9282b..8ba112233c35a 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1363,7 +1363,7 @@ def test_nan_to_nat_conversions(): assert result == iNaT s = df["B"].copy() - s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan) + s._mgr = s._mgr.setitem(indexer=tuple([slice(8, 9)]), value=np.nan) assert isna(s[8]) assert s[8].value == np.datetime64("NaT").astype(np.int64) diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py index 7146443bf8de5..7a05baaea16a7 100644 --- a/pandas/tests/extension/base/casting.py +++ b/pandas/tests/extension/base/casting.py @@ -10,7 +10,7 @@ class BaseCastingTests(BaseExtensionTests): def test_astype_object_series(self, all_data): ser = pd.Series({"A": all_data}) result = ser.astype(object) - assert isinstance(result._data.blocks[0], ObjectBlock) + assert isinstance(result._mgr.blocks[0], ObjectBlock) def test_tolist(self, data): result = pd.Series(data).tolist() diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index 7262a85b1fe00..841e551764122 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -25,13 +25,13 @@ def test_series_constructor(self, data): result = pd.Series(data) assert result.dtype == data.dtype assert len(result) == len(data) - assert isinstance(result._data.blocks[0], ExtensionBlock) - assert result._data.blocks[0].values is data + assert isinstance(result._mgr.blocks[0], ExtensionBlock) + assert result._mgr.blocks[0].values is data # Series[EA] is unboxed / boxed correctly result2 = pd.Series(result) assert result2.dtype == data.dtype - assert isinstance(result2._data.blocks[0], ExtensionBlock) + assert isinstance(result2._mgr.blocks[0], ExtensionBlock) @pytest.mark.parametrize("from_series", [True, False]) def test_dataframe_constructor_from_dict(self, data, from_series): @@ -40,13 +40,13 @@ def test_dataframe_constructor_from_dict(self, data, from_series): result = pd.DataFrame({"A": data}) assert result.dtypes["A"] == data.dtype assert result.shape == (len(data), 1) - assert isinstance(result._data.blocks[0], ExtensionBlock) + assert isinstance(result._mgr.blocks[0], ExtensionBlock) def test_dataframe_from_series(self, data): result = pd.DataFrame(pd.Series(data)) assert result.dtypes[0] == data.dtype assert result.shape == (len(data), 1) - assert isinstance(result._data.blocks[0], ExtensionBlock) + assert isinstance(result._mgr.blocks[0], ExtensionBlock) def test_series_given_mismatched_index_raises(self, data): msg = "Length of passed values is 3, index implies 5" diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index dee8021f5375f..c12f3fddefe1a 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -53,7 +53,7 @@ def test_no_values_attribute(self, data): def test_is_numeric_honored(self, data): result = pd.Series(data) - assert result._data.blocks[0].is_numeric is data.dtype._is_numeric + assert result._mgr.blocks[0].is_numeric is data.dtype._is_numeric def test_isna_extension_array(self, data_missing): # If your `isna` returns an ExtensionArray, you must also implement diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index 90e607343297d..dd8c5b9fd558a 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -27,7 +27,7 @@ def test_concat(self, data, in_frame): dtype = result.dtype assert dtype == data.dtype - assert isinstance(result._data.blocks[0], ExtensionBlock) + assert isinstance(result._mgr.blocks[0], ExtensionBlock) @pytest.mark.parametrize("in_frame", [True, False]) def test_concat_all_na_block(self, data_missing, in_frame): diff --git a/pandas/tests/extension/test_external_block.py b/pandas/tests/extension/test_external_block.py index 1a4f84e2c0fd2..ee301e15c7a83 100644 --- a/pandas/tests/extension/test_external_block.py +++ b/pandas/tests/extension/test_external_block.py @@ -27,7 +27,7 @@ def concat_same_type(self, to_concat, placement=None): @pytest.fixture def df(): df1 = pd.DataFrame({"a": [1, 2, 3]}) - blocks = df1._data.blocks + blocks = df1._mgr.blocks values = np.arange(3, dtype="int64") custom_block = CustomBlock(values, placement=slice(1, 2)) blocks = blocks + (custom_block,) @@ -58,17 +58,17 @@ def test_concat_series(): s = pd.Series(block, pd.RangeIndex(3), fastpath=True) res = pd.concat([s, s]) - assert isinstance(res._data.blocks[0], CustomBlock) + assert isinstance(res._mgr.blocks[0], CustomBlock) def test_concat_dataframe(df): # GH17728 res = pd.concat([df, df]) - assert isinstance(res._data.blocks[1], CustomBlock) + assert isinstance(res._mgr.blocks[1], CustomBlock) def test_concat_axis1(df): # GH17954 df2 = pd.DataFrame({"c": [0.1, 0.2, 0.3]}) res = pd.concat([df, df2], axis=1) - assert isinstance(res._data.blocks[1], CustomBlock) + assert isinstance(res._mgr.blocks[1], CustomBlock) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 77be952506964..551ca6f4b59fc 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -558,10 +558,10 @@ def test_reindex_api_equivalence(self): def test_align_float(self, float_frame): af, bf = float_frame.align(float_frame) - assert af._data is not float_frame._data + assert af._mgr is not float_frame._mgr af, bf = float_frame.align(float_frame, copy=False) - assert af._data is float_frame._data + assert af._mgr is float_frame._mgr # axis = 0 other = float_frame.iloc[:-5, :3] diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 37b0d61ee31d9..6a71ed1a3610e 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -48,18 +48,18 @@ def test_setitem_invalidates_datetime_index_freq(self): assert dti[1] == ts def test_cast_internals(self, float_frame): - casted = DataFrame(float_frame._data, dtype=int) + casted = DataFrame(float_frame._mgr, dtype=int) expected = DataFrame(float_frame._series, dtype=int) assert_frame_equal(casted, expected) - casted = DataFrame(float_frame._data, dtype=np.int32) + casted = DataFrame(float_frame._mgr, dtype=np.int32) expected = DataFrame(float_frame._series, dtype=np.int32) assert_frame_equal(casted, expected) def test_consolidate(self, float_frame): float_frame["E"] = 7.0 consolidated = float_frame._consolidate() - assert len(consolidated._data.blocks) == 1 + assert len(consolidated._mgr.blocks) == 1 # Ensure copy, do I want this? recons = consolidated._consolidate() @@ -67,10 +67,10 @@ def test_consolidate(self, float_frame): tm.assert_frame_equal(recons, consolidated) float_frame["F"] = 8.0 - assert len(float_frame._data.blocks) == 3 + assert len(float_frame._mgr.blocks) == 3 float_frame._consolidate(inplace=True) - assert len(float_frame._data.blocks) == 1 + assert len(float_frame._mgr.blocks) == 1 def test_consolidate_inplace(self, float_frame): frame = float_frame.copy() # noqa @@ -81,9 +81,9 @@ def test_consolidate_inplace(self, float_frame): def test_values_consolidate(self, float_frame): float_frame["E"] = 7.0 - assert not float_frame._data.is_consolidated() + assert not float_frame._mgr.is_consolidated() _ = float_frame.values # noqa - assert float_frame._data.is_consolidated() + assert float_frame._mgr.is_consolidated() def test_modify_values(self, float_frame): float_frame.values[5] = 5 @@ -305,7 +305,7 @@ def test_equals_different_blocks(self): df1 = df0.reset_index()[["A", "B", "C"]] # this assert verifies that the above operations have # induced a block rearrangement - assert df0._data.blocks[0].dtype != df1._data.blocks[0].dtype + assert df0._mgr.blocks[0].dtype != df1._mgr.blocks[0].dtype # do the real tests assert_frame_equal(df0, df1) @@ -353,7 +353,7 @@ def test_copy(self, float_frame, float_string_frame): # copy objects copy = float_string_frame.copy() - assert copy._data is not float_string_frame._data + assert copy._mgr is not float_string_frame._mgr def test_pickle(self, float_string_frame, timezone_frame): empty_frame = DataFrame() @@ -362,7 +362,7 @@ def test_pickle(self, float_string_frame, timezone_frame): assert_frame_equal(float_string_frame, unpickled) # buglet - float_string_frame._data.ndim + float_string_frame._mgr.ndim # empty unpickled = tm.round_trip_pickle(empty_frame) @@ -621,7 +621,7 @@ def test_constructor_no_pandas_array(self): result = pd.DataFrame({"A": arr}) expected = pd.DataFrame({"A": [1, 2, 3]}) tm.assert_frame_equal(result, expected) - assert isinstance(result._data.blocks[0], IntBlock) + assert isinstance(result._mgr.blocks[0], IntBlock) def test_add_column_with_pandas_array(self): # GH 26390 @@ -634,6 +634,6 @@ def test_add_column_with_pandas_array(self): "c": pd.array([1, 2, None, 3]), } ) - assert type(df["c"]._data.blocks[0]) == ObjectBlock - assert type(df2["c"]._data.blocks[0]) == ObjectBlock + assert type(df["c"]._mgr.blocks[0]) == ObjectBlock + assert type(df2["c"]._mgr.blocks[0]) == ObjectBlock assert_frame_equal(df, df2) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index a16ca7045cfdd..336f2df606eac 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1451,7 +1451,7 @@ def test_constructor_manager_resize(self, float_frame): index = list(float_frame.index[:5]) columns = list(float_frame.columns[:3]) - result = DataFrame(float_frame._data, index=index, columns=columns) + result = DataFrame(float_frame._mgr, index=index, columns=columns) tm.assert_index_equal(result.index, Index(index)) tm.assert_index_equal(result.columns, Index(columns)) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index c2d38b2938fca..3fe3d369dd801 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -3417,8 +3417,8 @@ def test_setitem(self, timezone_frame): # assert that A & C are not sharing the same base (e.g. they # are copies) - b1 = df._data.blocks[1] - b2 = df._data.blocks[2] + b1 = df._mgr.blocks[1] + b2 = df._mgr.blocks[2] tm.assert_extension_array_equal(b1.values, b2.values) assert id(b1.values._data.base) != id(b2.values._data.base) @@ -3548,7 +3548,7 @@ def test_assignment(self): result1 = df["D"] result2 = df["E"] - tm.assert_categorical_equal(result1._data._block.values, d) + tm.assert_categorical_equal(result1._mgr._block.values, d) # sorting s.name = "E" diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 4faa0d0e3f941..4270173af029e 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -476,8 +476,8 @@ def test_columns_with_dups(self): ) df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1) - assert len(df._data._blknos) == len(df.columns) - assert len(df._data._blklocs) == len(df.columns) + assert len(df._mgr._blknos) == len(df.columns) + assert len(df._mgr._blklocs) == len(df.columns) # testing iloc for i in range(len(df.columns)): diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 67482ddf657fb..8a783f7b1c8e6 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -699,7 +699,7 @@ def test_inplace_ops_identity(self): assert_series_equal(s, s2) assert_series_equal(s_orig + 1, s) assert s is s2 - assert s._data is s2._data + assert s._mgr is s2._mgr df = df_orig.copy() df2 = df @@ -707,7 +707,7 @@ def test_inplace_ops_identity(self): assert_frame_equal(df, df2) assert_frame_equal(df_orig + 1, df) assert df is df2 - assert df._data is df2._data + assert df._mgr is df2._mgr # dtype change s = s_orig.copy() @@ -722,7 +722,7 @@ def test_inplace_ops_identity(self): assert_frame_equal(df, df2) assert_frame_equal(df_orig + 1.5, df) assert df is df2 - assert df._data is df2._data + assert df._mgr is df2._mgr # mixed dtype arr = np.random.randint(0, 10, size=5) @@ -733,7 +733,7 @@ def test_inplace_ops_identity(self): expected = DataFrame({"A": arr.copy() + 1, "B": "foo"}) assert_frame_equal(df, expected) assert_frame_equal(df2, expected) - assert df._data is df2._data + assert df._mgr is df2._mgr df = df_orig.copy() df2 = df @@ -741,7 +741,7 @@ def test_inplace_ops_identity(self): expected = DataFrame({"A": arr.copy() + 1.5, "B": "foo"}) assert_frame_equal(df, expected) assert_frame_equal(df2, expected) - assert df._data is df2._data + assert df._mgr is df2._mgr @pytest.mark.parametrize( "op", diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index aef6c3fe8070c..511f35f285371 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -175,24 +175,24 @@ def test_downcast(self): o = self._construct(shape=4, value=9, dtype=np.int64) result = o.copy() - result._data = o._data.downcast(dtypes="infer") + result._mgr = o._mgr.downcast(dtypes="infer") self._compare(result, o) o = self._construct(shape=4, value=9.0) expected = o.astype(np.int64) result = o.copy() - result._data = o._data.downcast(dtypes="infer") + result._mgr = o._mgr.downcast(dtypes="infer") self._compare(result, expected) o = self._construct(shape=4, value=9.5) result = o.copy() - result._data = o._data.downcast(dtypes="infer") + result._mgr = o._mgr.downcast(dtypes="infer") self._compare(result, o) # are close o = self._construct(shape=4, value=9.000000000005) result = o.copy() - result._data = o._data.downcast(dtypes="infer") + result._mgr = o._mgr.downcast(dtypes="infer") expected = o.astype(np.int64) self._compare(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 261d2e9c04e77..ea2c83e8692f7 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -407,7 +407,7 @@ def test_setitem_change_dtype(self, multiindex_dataframe_random_data): s = dft["foo", "two"] dft["foo", "two"] = s > s.median() tm.assert_series_equal(dft["foo", "two"], s > s.median()) - # assert isinstance(dft._data.blocks[1].items, MultiIndex) + # assert isinstance(dft._mgr.blocks[1].items, MultiIndex) reindexed = dft.reindex(columns=[("foo", "two")]) tm.assert_series_equal(reindexed["foo", "two"], s > s.median()) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 760d8c70b9434..a659bdb82f7c6 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -595,7 +595,7 @@ def test_iloc_getitem_doc_issue(self): columns = list(range(0, 8, 2)) df = DataFrame(arr, index=index, columns=columns) - df._data.blocks[0].mgr_locs + df._mgr.blocks[0].mgr_locs result = df.iloc[1:5, 2:4] str(result) result.dtypes diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 9ce1062a6ec26..e850403492f99 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1332,7 +1332,7 @@ def test_block_shape(): a = pd.Series([1, 2, 3]).reindex(idx) b = pd.Series(pd.Categorical([1, 2, 3])).reindex(idx) - assert a._data.blocks[0].mgr_locs.indexer == b._data.blocks[0].mgr_locs.indexer + assert a._mgr.blocks[0].mgr_locs.indexer == b._mgr.blocks[0].mgr_locs.indexer def test_make_block_no_pandas_array(): diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py index fee7e1cb2ba5f..85e24123d372e 100644 --- a/pandas/tests/io/pytables/test_pytables.py +++ b/pandas/tests/io/pytables/test_pytables.py @@ -2486,7 +2486,7 @@ def test_frame(self, compression): df["foo"] = np.random.randn(len(df)) store["df"] = df recons = store["df"] - assert recons._data.is_consolidated() + assert recons._mgr.is_consolidated() # empty self._check_roundtrip(df[:0], tm.assert_frame_equal) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 83c11cd9ab996..c10dfc0fbcb1e 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -666,7 +666,7 @@ def _test_compression(self, compress): expected = self.frame[k] assert_frame_equal(value, expected) # make sure that we can write to the new frames - for block in value._data.blocks: + for block in value._mgr.blocks: assert block.values.flags.writeable def test_compression_zlib(self): @@ -719,7 +719,7 @@ def decompress(ob): assert_frame_equal(value, expected) # make sure that we can write to the new frames even though # we needed to copy the data - for block in value._data.blocks: + for block in value._mgr.blocks: assert block.values.flags.writeable # mutate the data in some way block.values[0] += rhs[block.dtype] diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 6366bf0521fbc..2b05e53ffbb93 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1151,28 +1151,28 @@ def test_concat_copy(self): # These are actual copies. result = concat([df, df2, df3], axis=1, copy=True) - for b in result._data.blocks: + for b in result._mgr.blocks: assert b.values.base is None # These are the same. result = concat([df, df2, df3], axis=1, copy=False) - for b in result._data.blocks: + for b in result._mgr.blocks: if b.is_float: - assert b.values.base is df._data.blocks[0].values.base + assert b.values.base is df._mgr.blocks[0].values.base elif b.is_integer: - assert b.values.base is df2._data.blocks[0].values.base + assert b.values.base is df2._mgr.blocks[0].values.base elif b.is_object: assert b.values.base is not None # Float block was consolidated. df4 = DataFrame(np.random.randn(4, 1)) result = concat([df, df2, df3, df4], axis=1, copy=False) - for b in result._data.blocks: + for b in result._mgr.blocks: if b.is_float: assert b.values.base is None elif b.is_integer: - assert b.values.base is df2._data.blocks[0].values.base + assert b.values.base is df2._mgr.blocks[0].values.base elif b.is_object: assert b.values.base is not None diff --git a/pandas/tests/series/test_block_internals.py b/pandas/tests/series/test_block_internals.py index 18e75c3be5bcc..d0dfbe6f5b569 100644 --- a/pandas/tests/series/test_block_internals.py +++ b/pandas/tests/series/test_block_internals.py @@ -31,8 +31,8 @@ def test_dt64tz_setitem_does_not_mutate_dti(self): ser = pd.Series(dti) assert ser._values is not dti assert ser._values._data.base is not dti._data._data.base - assert ser._data.blocks[0].values is not dti - assert ser._data.blocks[0].values._data.base is not dti._data._data.base + assert ser._mgr.blocks[0].values is not dti + assert ser._mgr.blocks[0].values._data.base is not dti._data._data.base ser[::3] = pd.NaT assert ser[0] is pd.NaT diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 2f09d777e719c..9e280a185c4ff 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -631,7 +631,7 @@ def test_constructor_limit_copies(self, index): s = pd.Series(index) # we make 1 copy; this is just a smoke test here - assert s._data.blocks[0].values is not index + assert s._mgr.blocks[0].values is not index def test_constructor_pass_none(self): s = Series(None, index=range(5)) diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index d35198ca70f37..2aed885893dcf 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -207,7 +207,7 @@ def test_constructor_no_pandas_array(self): ser = pd.Series([1, 2, 3]) result = pd.Series(ser.array) tm.assert_series_equal(ser, result) - assert isinstance(result._data.blocks[0], IntBlock) + assert isinstance(result._mgr.blocks[0], IntBlock) def test_astype_no_pandas_dtype(self): # https://github.com/pandas-dev/pandas/pull/24866 @@ -219,17 +219,17 @@ def test_astype_no_pandas_dtype(self): def test_from_array(self): result = pd.Series(pd.array(["1H", "2H"], dtype="timedelta64[ns]")) - assert result._data.blocks[0].is_extension is False + assert result._mgr.blocks[0].is_extension is False result = pd.Series(pd.array(["2015"], dtype="datetime64[ns]")) - assert result._data.blocks[0].is_extension is False + assert result._mgr.blocks[0].is_extension is False def test_from_list_dtype(self): result = pd.Series(["1H", "2H"], dtype="timedelta64[ns]") - assert result._data.blocks[0].is_extension is False + assert result._mgr.blocks[0].is_extension is False result = pd.Series(["2015"], dtype="datetime64[ns]") - assert result._data.blocks[0].is_extension is False + assert result._mgr.blocks[0].is_extension is False def test_hasnans_unchached_for_series(): diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 96e3c4640d2f6..812d4b26d18bc 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -681,7 +681,7 @@ def test_setitem_chained_no_consolidate(self): sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]]) with pd.option_context("mode.chained_assignment", None): sdf[0][1] = 2 - assert len(sdf._data.blocks) == 2 + assert len(sdf._mgr.blocks) == 2 def test_delitem(self, float_frame): A = float_frame["A"]