From ea6bbe67b3f063a651a038a88a0a267a186fe484 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 4 Apr 2023 12:23:24 -0700 Subject: [PATCH 01/17] DEPR: accepting Manager objects in DataFrame/Series --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/apply.py | 2 +- pandas/core/arraylike.py | 4 +- pandas/core/frame.py | 54 ++++++++----- pandas/core/generic.py | 79 ++++++++++++------- pandas/core/groupby/generic.py | 6 +- pandas/core/groupby/ops.py | 4 +- pandas/core/resample.py | 2 +- pandas/core/reshape/concat.py | 4 +- pandas/core/reshape/merge.py | 4 +- pandas/core/reshape/reshape.py | 2 +- pandas/core/series.py | 47 ++++++++++- pandas/tests/arrays/interval/test_interval.py | 16 +++- .../tests/arrays/masked/test_arrow_compat.py | 21 +++-- .../tests/arrays/period/test_arrow_compat.py | 17 +++- pandas/tests/arrays/string_/test_string.py | 8 +- pandas/tests/copy_view/test_constructors.py | 6 +- pandas/tests/extension/test_external_block.py | 2 +- pandas/tests/frame/test_block_internals.py | 4 +- pandas/tests/frame/test_constructors.py | 4 +- pandas/tests/internals/test_internals.py | 16 +++- pandas/tests/io/json/test_readlines.py | 4 + .../io/parser/common/test_common_basic.py | 4 + .../tests/io/parser/common/test_data_list.py | 4 + pandas/tests/io/parser/common/test_decimal.py | 4 + pandas/tests/io/parser/common/test_index.py | 4 + pandas/tests/io/parser/common/test_inf.py | 4 + pandas/tests/io/parser/common/test_ints.py | 4 + .../io/parser/dtypes/test_categorical.py | 4 + .../io/parser/dtypes/test_dtypes_basic.py | 4 + pandas/tests/io/parser/test_compression.py | 4 + pandas/tests/io/parser/test_encoding.py | 4 + pandas/tests/io/parser/test_header.py | 4 + pandas/tests/io/parser/test_index_col.py | 4 + pandas/tests/io/parser/test_na_values.py | 4 + pandas/tests/io/parser/test_parse_dates.py | 25 ++++-- .../tests/io/parser/usecols/test_strings.py | 4 + pandas/tests/io/test_common.py | 4 + pandas/tests/io/test_feather.py | 4 + pandas/tests/io/test_fsspec.py | 4 + pandas/tests/io/test_gcs.py | 4 + pandas/tests/io/test_orc.py | 44 ++++++++--- pandas/tests/io/test_parquet.py | 3 + pandas/tests/io/test_user_agent.py | 17 ++-- pandas/tests/test_downstream.py | 4 +- 45 files changed, 361 insertions(+), 111 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 024f17d36e067..423a728516c39 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -172,6 +172,7 @@ Deprecations - Deprecated the methods :meth:`Series.bool` and :meth:`DataFrame.bool` (:issue:`51749`) - Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`) - Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`) +- Deprecated allowing passing :class:`BlockManager` objects to :class:`DataFrame` or :class:`SingleBlockManager` objects to :class:`Series` (:issue:`??`) - diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 2ffd49f674cfb..b634c22b6f165 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -676,7 +676,7 @@ def apply(self) -> DataFrame | Series: with np.errstate(all="ignore"): results = self.obj._mgr.apply("apply", func=self.f) # _constructor will retain self.index and self.columns - return self.obj._constructor(data=results) + return self.obj._constructor(data=results, _allow_mgr=True) # broadcasting if self.result_type == "broadcast": diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 1d10d797866f4..d05b0d3c24eb2 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -349,7 +349,9 @@ def _reconstruct(result): return result if isinstance(result, BlockManager): # we went through BlockManager.apply e.g. np.sqrt - result = self._constructor(result, **reconstruct_kwargs, copy=False) + result = self._constructor( + result, **reconstruct_kwargs, copy=False, _allow_mgr=True + ) else: # we converted an array, lost our axes result = self._constructor( diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 74873abac0758..a019fb71c6258 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -655,18 +655,29 @@ def __init__( columns: Axes | None = None, dtype: Dtype | None = None, copy: bool | None = None, + _allow_mgr: bool = False, # NOT for public use! ) -> None: if dtype is not None: dtype = self._validate_dtype(dtype) if isinstance(data, DataFrame): data = data._mgr + _allow_mgr = True if not copy: # if not copying data, ensure to still return a shallow copy # to avoid the result sharing the same Manager data = data.copy(deep=False) if isinstance(data, (BlockManager, ArrayManager)): + if not _allow_mgr: + warnings.warn( + f"Passing a {type(data).__name__} to {type(self).__name__} " + "is deprecated and will raise in a future version. " + "Use public APIs instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if using_copy_on_write(): data = data.copy(deep=False) # first check if a Manager is passed without any other arguments @@ -2330,7 +2341,7 @@ def maybe_reorder( manager = get_option("mode.data_manager") mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager) - return cls(mgr) + return cls(mgr, _allow_mgr=True) def to_records( self, index: bool = True, column_dtypes=None, index_dtypes=None @@ -2540,7 +2551,7 @@ def _from_arrays( verify_integrity=verify_integrity, typ=manager, ) - return cls(mgr) + return cls(mgr, _allow_mgr=True) @doc( storage_options=_shared_docs["storage_options"], @@ -3676,9 +3687,9 @@ def _ixs(self, i: int, axis: AxisInt = 0) -> Series: # if we are a copy, mark as such copy = isinstance(new_mgr.array, np.ndarray) and new_mgr.array.base is None - result = self._constructor_sliced(new_mgr, name=self.index[i]).__finalize__( - self - ) + result = self._constructor_sliced( + new_mgr, name=self.index[i], _allow_mgr=True + ).__finalize__(self) result._set_is_copy(self, copy=copy) return result @@ -3731,7 +3742,7 @@ def _getitem_nocopy(self, key: list): copy=False, only_slice=True, ) - return self._constructor(new_mgr) + return self._constructor(new_mgr, _allow_mgr=True) def __getitem__(self, key): check_dict_or_set_indexers(key) @@ -4268,7 +4279,9 @@ def _box_col_values(self, values: SingleDataManager, loc: int) -> Series: name = self.columns[loc] klass = self._constructor_sliced # We get index=self.index bc values is a SingleDataManager - return klass(values, name=name, fastpath=True).__finalize__(self) + return klass(values, name=name, fastpath=True, _allow_mgr=True).__finalize__( + self + ) # ---------------------------------------------------------------------- # Lookup Caching @@ -4741,7 +4754,7 @@ def predicate(arr: ArrayLike) -> bool: return True mgr = self._mgr._get_data_subset(predicate).copy(deep=None) - return type(self)(mgr).__finalize__(self) + return type(self)(mgr, _allow_mgr=True).__finalize__(self) def insert( self, @@ -4915,7 +4928,11 @@ def _sanitize_column(self, value) -> ArrayLike: def _series(self): return { item: Series( - self._mgr.iget(idx), index=self.index, name=item, fastpath=True + self._mgr.iget(idx), + index=self.index, + name=item, + fastpath=True, + _allow_mgr=True, ) for idx, item in enumerate(self.columns) } @@ -5680,7 +5697,7 @@ def shift( fill_value=fill_value, allow_dups=True, ) - res_df = self._constructor(mgr) + res_df = self._constructor(mgr, _allow_mgr=True) return res_df.__finalize__(self, method="shift") return super().shift( @@ -6208,7 +6225,7 @@ class max type @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) def isna(self) -> DataFrame: - result = self._constructor(self._mgr.isna(func=isna)) + result = self._constructor(self._mgr.isna(func=isna), _allow_mgr=True) return result.__finalize__(self, method="isna") @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) @@ -6773,7 +6790,7 @@ def sort_values( self._get_block_manager_axis(axis), default_index(len(indexer)) ) - result = self._constructor(new_data) + result = self._constructor(new_data, _allow_mgr=True) if inplace: return self._update_inplace(result) else: @@ -7467,7 +7484,7 @@ def _dispatch_frame_op( if not is_list_like(right): # i.e. scalar, faster than checking np.ndim(right) == 0 bm = self._mgr.apply(array_op, right=right) - return self._constructor(bm) + return self._constructor(bm, _allow_mgr=True) elif isinstance(right, DataFrame): assert self.index.equals(right.index) @@ -7487,7 +7504,7 @@ def _dispatch_frame_op( right._mgr, # type: ignore[arg-type] array_op, ) - return self._constructor(bm) + return self._constructor(bm, _allow_mgr=True) elif isinstance(right, Series) and axis == 1: # axis=1 means we want to operate row-by-row @@ -9469,7 +9486,7 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: axis = 0 new_data = self._mgr.diff(n=periods, axis=axis) - return self._constructor(new_data).__finalize__(self, "diff") + return self._constructor(new_data, _allow_mgr=True).__finalize__(self, "diff") # ---------------------------------------------------------------------- # Function application @@ -10325,6 +10342,7 @@ def _series_round(ser: Series, decimals: int) -> Series: decimals=decimals, # type: ignore[arg-type] using_cow=using_copy_on_write(), ), + _allow_mgr=True, ).__finalize__(self, method="round") else: raise TypeError("decimals must be an integer, a dict-like or a Series") @@ -10880,7 +10898,7 @@ def _get_data() -> DataFrame: # After possibly _get_data and transposing, we are now in the # simple case where we can use BlockManager.reduce res = df._mgr.reduce(blk_func) - out = df._constructor(res).iloc[0] + out = df._constructor(res, _allow_mgr=True).iloc[0] if out_dtype is not None: out = out.astype(out_dtype) elif (df._mgr.get_dtypes() == object).any(): @@ -11318,7 +11336,7 @@ def quantile( res = data._mgr.take(indexer[q_idx], verify=False) res.axes[1] = q - result = self._constructor(res) + result = self._constructor(res, _allow_mgr=True) return result.__finalize__(self, method="quantile") @doc(NDFrame.asfreq, **_shared_doc_kwargs) @@ -11643,7 +11661,7 @@ def _to_dict_of_blocks(self, copy: bool = True): mgr = mgr_to_mgr(mgr, "block") mgr = cast(BlockManager, mgr) return { - k: self._constructor(v).__finalize__(self) + k: self._constructor(v, _allow_mgr=True).__finalize__(self) for k, v, in mgr.to_dict(copy=copy).items() } diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fb8a28b45b6df..1349cf4a75b41 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -330,7 +330,7 @@ def _as_manager(self, typ: str, copy: bool_t = True) -> Self: new_mgr: Manager new_mgr = mgr_to_mgr(self._mgr, typ=typ, copy=copy) # fastpath of passing a manager doesn't check the option/manager class - return self._constructor(new_mgr).__finalize__(self) + return self._constructor(new_mgr, _allow_mgr=True).__finalize__(self) # ---------------------------------------------------------------------- # attrs and flags @@ -782,7 +782,9 @@ def swapaxes(self, axis1: Axis, axis2: Axis, copy: bool_t | None = None) -> Self new_mgr.blocks[0].refs.add_reference( new_mgr.blocks[0] # type: ignore[arg-type] ) - return self._constructor(new_mgr).__finalize__(self, method="swapaxes") + return self._constructor(new_mgr, _allow_mgr=True).__finalize__( + self, method="swapaxes" + ) elif (copy or copy is None) and self._mgr.is_single_block: new_values = new_values.copy() @@ -1444,7 +1446,7 @@ def blk_func(values: ArrayLike): return operator.neg(values) # type: ignore[arg-type] new_data = self._mgr.apply(blk_func) - res = self._constructor(new_data) + res = self._constructor(new_data, _allow_mgr=True) return res.__finalize__(self, method="__neg__") @final @@ -1459,7 +1461,7 @@ def blk_func(values: ArrayLike): return operator.pos(values) # type: ignore[arg-type] new_data = self._mgr.apply(blk_func) - res = self._constructor(new_data) + res = self._constructor(new_data, _allow_mgr=True) return res.__finalize__(self, method="__pos__") @final @@ -1469,7 +1471,9 @@ def __invert__(self) -> Self: return self.copy(deep=False) new_data = self._mgr.apply(operator.invert) - return self._constructor(new_data).__finalize__(self, method="__invert__") + return self._constructor(new_data, _allow_mgr=True).__finalize__( + self, method="__invert__" + ) @final def __nonzero__(self) -> NoReturn: @@ -1603,7 +1607,9 @@ def abs(self) -> Self: 3 7 40 -50 """ res_mgr = self._mgr.apply(np.abs) - return self._constructor(res_mgr).__finalize__(self, name="abs") + return self._constructor(res_mgr, _allow_mgr=True).__finalize__( + self, name="abs" + ) @final def __abs__(self) -> Self: @@ -3963,7 +3969,9 @@ class max_speed axis=self._get_block_manager_axis(axis), verify=True, ) - return self._constructor(new_data).__finalize__(self, method="take") + return self._constructor(new_data, _allow_mgr=True).__finalize__( + self, method="take" + ) @final def _take_with_is_copy(self, indices, axis: Axis = 0) -> Self: @@ -4147,7 +4155,7 @@ class animal locomotion new_mgr = self._mgr.fast_xs(loc) result = self._constructor_sliced( - new_mgr, name=self.index[loc] + new_mgr, name=self.index[loc], _allow_mgr=True ).__finalize__(self) elif is_scalar(loc): result = self.iloc[:, slice(loc, loc + 1)] @@ -4192,7 +4200,9 @@ def _slice(self, slobj: slice, axis: AxisInt = 0) -> Self: """ assert isinstance(slobj, slice), type(slobj) axis = self._get_block_manager_axis(axis) - result = self._constructor(self._mgr.get_slice(slobj, axis=axis)) + result = self._constructor( + self._mgr.get_slice(slobj, axis=axis), _allow_mgr=True + ) result = result.__finalize__(self) # this could be a view @@ -4686,7 +4696,7 @@ def _drop_axis( copy=None, only_slice=only_slice, ) - result = self._constructor(new_mgr) + result = self._constructor(new_mgr, _allow_mgr=True) if self.ndim == 1: result._name = self.name @@ -5147,7 +5157,7 @@ def sort_index( axis = 1 if isinstance(self, ABCDataFrame) else 0 new_data.set_axis(axis, default_index(len(indexer))) - result = self._constructor(new_data) + result = self._constructor(new_data, _allow_mgr=True) if inplace: return self._update_inplace(result) @@ -5507,7 +5517,7 @@ def _reindex_with_indexers( elif using_copy_on_write() and new_data is self._mgr: new_data = new_data.copy(deep=False) - return self._constructor(new_data).__finalize__(self) + return self._constructor(new_data, _allow_mgr=True).__finalize__(self) def filter( self, @@ -6180,7 +6190,7 @@ def _consolidate(self): """ f = lambda: self._mgr.consolidate() cons_data = self._protect_consolidate(f) - return self._constructor(cons_data).__finalize__(self) + return self._constructor(cons_data, _allow_mgr=True).__finalize__(self) @property def _is_mixed_type(self) -> bool_t: @@ -6211,11 +6221,15 @@ def _check_inplace_setting(self, value) -> bool_t: @final def _get_numeric_data(self) -> Self: - return self._constructor(self._mgr.get_numeric_data()).__finalize__(self) + return self._constructor( + self._mgr.get_numeric_data(), _allow_mgr=True + ).__finalize__(self) @final def _get_bool_data(self): - return self._constructor(self._mgr.get_bool_data()).__finalize__(self) + return self._constructor( + self._mgr.get_bool_data(), _allow_mgr=True + ).__finalize__(self) # ---------------------------------------------------------------------- # Internal Interface Methods @@ -6424,7 +6438,9 @@ def astype( else: # else, only a single dtype is given new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors) - return self._constructor(new_data).__finalize__(self, method="astype") + return self._constructor(new_data, _allow_mgr=True).__finalize__( + self, method="astype" + ) # GH 33113: handle empty frame or series if not results: @@ -6553,7 +6569,9 @@ def copy(self, deep: bool_t | None = True) -> Self: """ data = self._mgr.copy(deep=deep) self._clear_item_cache() - return self._constructor(data).__finalize__(self, method="copy") + return self._constructor(data, _allow_mgr=True).__finalize__( + self, method="copy" + ) @final def __copy__(self, deep: bool_t = True) -> Self: @@ -6615,7 +6633,9 @@ def infer_objects(self, copy: bool_t | None = None) -> Self: dtype: object """ new_mgr = self._mgr.convert(copy=copy) - return self._constructor(new_mgr).__finalize__(self, method="infer_objects") + return self._constructor(new_mgr, _allow_mgr=True).__finalize__( + self, method="infer_objects" + ) @final def convert_dtypes( @@ -7097,7 +7117,7 @@ def fillna( else: raise ValueError(f"invalid fill value with a {type(value)}") - result = self._constructor(new_data) + result = self._constructor(new_data, _allow_mgr=True) if inplace: return self._update_inplace(result) else: @@ -7503,7 +7523,7 @@ def replace( f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}' ) - result = self._constructor(new_data) + result = self._constructor(new_data, _allow_mgr=True) if inplace: return self._update_inplace(result) else: @@ -7799,7 +7819,7 @@ def interpolate( **kwargs, ) - result = self._constructor(new_data) + result = self._constructor(new_data, _allow_mgr=True) if should_transpose: result = result.T if inplace: @@ -9762,7 +9782,8 @@ def _align_series( left = self.copy(deep=copy) else: left = self._constructor( - self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy) + self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy), + _allow_mgr=True, ) right = other._reindex_indexer(join_index, ridx, copy) @@ -9784,7 +9805,7 @@ def _align_series( if copy and fdata is self._mgr: fdata = fdata.copy() - left = self._constructor(fdata) + left = self._constructor(fdata, _allow_mgr=True) if ridx is None: right = other.copy(deep=copy) @@ -9932,7 +9953,7 @@ def _where( self._check_inplace_setting(other) new_data = self._mgr.putmask(mask=cond, new=other, align=align) - result = self._constructor(new_data) + result = self._constructor(new_data, _allow_mgr=True) return self._update_inplace(result) else: @@ -9941,7 +9962,7 @@ def _where( cond=cond, align=align, ) - result = self._constructor(new_data) + result = self._constructor(new_data, _allow_mgr=True) return result.__finalize__(self) @overload @@ -10319,7 +10340,9 @@ def shift( new_data = self._mgr.shift( periods=periods, axis=axis, fill_value=fill_value ) - return self._constructor(new_data).__finalize__(self, method="shift") + return self._constructor(new_data, _allow_mgr=True).__finalize__( + self, method="shift" + ) # when freq is given, index is shifted, data is not index = self._get_axis(axis) @@ -11268,7 +11291,9 @@ def block_accum_func(blk_values): result = self._mgr.apply(block_accum_func) - return self._constructor(result).__finalize__(self, method=name) + return self._constructor(result, _allow_mgr=True).__finalize__( + self, method=name + ) def cummax(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): return self._accum_func( diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c44a503bce499..39f8738288db3 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -143,7 +143,7 @@ class NamedAgg(NamedTuple): class SeriesGroupBy(GroupBy[Series]): def _wrap_agged_manager(self, mgr: Manager) -> Series: - return self.obj._constructor(mgr, name=self.obj.name) + return self.obj._constructor(mgr, name=self.obj.name, _allow_mgr=True) def _get_data_to_aggregate( self, *, numeric_only: bool = False, name: str | None = None @@ -1575,7 +1575,7 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike: res_mgr = mgr.grouped_reduce(arr_func) res_mgr.set_axis(1, mgr.axes[1]) - res_df = self.obj._constructor(res_mgr) + res_df = self.obj._constructor(res_mgr, _allow_mgr=True) res_df = self._maybe_transpose_result(res_df) return res_df @@ -1881,7 +1881,7 @@ def _get_data_to_aggregate( return mgr def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: - return self.obj._constructor(mgr) + return self.obj._constructor(mgr, _allow_mgr=True) def _apply_to_column_groupbys(self, func) -> DataFrame: from pandas.core.reshape.concat import concat diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index cf4c48dc34200..77e7be6e6adea 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -1364,7 +1364,7 @@ class SeriesSplitter(DataSplitter): def _chop(self, sdata: Series, slice_obj: slice) -> Series: # fastpath equivalent to `sdata.iloc[slice_obj]` mgr = sdata._mgr.get_slice(slice_obj) - ser = sdata._constructor(mgr, name=sdata.name, fastpath=True) + ser = sdata._constructor(mgr, name=sdata.name, fastpath=True, _allow_mgr=True) return ser.__finalize__(sdata, method="groupby") @@ -1376,7 +1376,7 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: # else: # return sdata.iloc[:, slice_obj] mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis) - df = sdata._constructor(mgr) + df = sdata._constructor(mgr, _allow_mgr=True) return df.__finalize__(sdata, method="groupby") diff --git a/pandas/core/resample.py b/pandas/core/resample.py index e8864deaaca4d..70d9c6363ebca 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1967,7 +1967,7 @@ def _take_new_index( new_mgr = obj._mgr.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1) # error: Incompatible return value type # (got "DataFrame", expected "NDFrameT") - return obj._constructor(new_mgr) # type: ignore[return-value] + return obj._constructor(new_mgr, _allow_mgr=True) # type: ignore[return-value] else: raise ValueError("'obj' should be either a Series or a DataFrame") diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 9e9711707f688..a7148d8a18731 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -638,7 +638,7 @@ def get_result(self): mgr = type(sample._mgr).from_array(res, index=new_index) - result = cons(mgr, name=name, fastpath=True) + result = cons(mgr, name=name, fastpath=True, _allow_mgr=True) return result.__finalize__(self, method="concat") # combine as columns in a frame @@ -680,7 +680,7 @@ def get_result(self): new_data._consolidate_inplace() cons = sample._constructor - return cons(new_data).__finalize__(self, method="concat") + return cons(new_data, _allow_mgr=True).__finalize__(self, method="concat") def _get_result_dim(self) -> int: if self._is_series and self.bm_axis == 1: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index bfaf403491801..c6cb70d7c8857 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -773,7 +773,7 @@ def _reindex_and_concat( allow_dups=True, use_na_proxy=True, ) - left = left._constructor(lmgr) + left = left._constructor(lmgr, _allow_mgr=True) left.index = join_index if right_indexer is not None and not is_range_indexer( @@ -788,7 +788,7 @@ def _reindex_and_concat( allow_dups=True, use_na_proxy=True, ) - right = right._constructor(rmgr) + right = right._constructor(rmgr, _allow_mgr=True) right.index = join_index from pandas import concat diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index eaeb4a50d0bf3..cc26a54256b68 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -503,7 +503,7 @@ def _unstack_frame(obj: DataFrame, level, fill_value=None): if not obj._can_fast_transpose: mgr = obj._mgr.unstack(unstacker, fill_value=fill_value) - return obj._constructor(mgr) + return obj._constructor(mgr, _allow_mgr=True) else: return unstacker.get_result( obj._values, value_columns=obj.columns, fill_value=fill_value diff --git a/pandas/core/series.py b/pandas/core/series.py index 22c8d8b047280..4f3ad0bddbed1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -376,6 +376,7 @@ def __init__( name=None, copy: bool | None = None, fastpath: bool = False, + _allow_mgr: bool = False, # NOT for public use! ) -> None: if ( isinstance(data, (SingleBlockManager, SingleArrayManager)) @@ -383,6 +384,14 @@ def __init__( and dtype is None and (copy is False or copy is None) ): + if not _allow_mgr: + warnings.warn( + f"Passing a {type(data).__name__} to {type(self).__name__} " + "is deprecated and will raise in a future version. " + "Use public APIs instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) if using_copy_on_write(): data = data.copy(deep=False) # GH#33357 called with just the SingleBlockManager @@ -410,8 +419,19 @@ def __init__( data = SingleBlockManager.from_array(data, index) elif manager == "array": data = SingleArrayManager.from_array(data, index) + _allow_mgr = True elif using_copy_on_write() and not copy: data = data.copy(deep=False) + + if not _allow_mgr: + warnings.warn( + f"Passing a {type(data).__name__} to {type(self).__name__} " + "is deprecated and will raise in a future version. " + "Use public APIs instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if copy: data = data.copy() # skips validation of the name @@ -422,6 +442,15 @@ def __init__( if isinstance(data, SingleBlockManager) and using_copy_on_write() and not copy: data = data.copy(deep=False) + if not _allow_mgr: + warnings.warn( + f"Passing a {type(data).__name__} to {type(self).__name__} " + "is deprecated and will raise in a future version. " + "Use public APIs instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + name = ibase.maybe_extract_name(name, data, type(self)) if index is not None: @@ -487,6 +516,16 @@ def __init__( "`index` argument. `copy` must be False." ) + if not _allow_mgr: + warnings.warn( + f"Passing a {type(data).__name__} to {type(self).__name__} " + "is deprecated and will raise in a future version. " + "Use public APIs instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + _allow_mgr = True + elif isinstance(data, ExtensionArray): pass else: @@ -964,7 +1003,7 @@ def _slice(self, slobj: slice, axis: AxisInt = 0) -> Series: # axis kwarg is retained for compat with NDFrame method # _slice is *always* positional mgr = self._mgr.get_slice(slobj, axis=axis) - out = self._constructor(mgr, fastpath=True) + out = self._constructor(mgr, fastpath=True, _allow_mgr=True) return out.__finalize__(self) def __getitem__(self, key): @@ -1071,7 +1110,9 @@ def _get_values_tuple(self, key: tuple): def _get_values(self, indexer: slice | npt.NDArray[np.bool_]) -> Series: new_mgr = self._mgr.getitem_mgr(indexer) - return self._constructor(new_mgr, fastpath=True).__finalize__(self) + return self._constructor(new_mgr, fastpath=True, _allow_mgr=True).__finalize__( + self + ) def _get_value(self, label, takeable: bool = False): """ @@ -1914,7 +1955,7 @@ def to_frame(self, name: Hashable = lib.no_default) -> DataFrame: columns = Index([name]) mgr = self._mgr.to_2d_mgr(columns) - df = self._constructor_expanddim(mgr) + df = self._constructor_expanddim(mgr, _allow_mgr=True) return df.__finalize__(self, method="to_frame") def _set_name(self, name, inplace: bool = False) -> Series: diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index b97eb32a60838..279923e9dabf9 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -346,12 +346,16 @@ def test_arrow_table_roundtrip(breaks): table = pa.table(df) assert isinstance(table.field("a").type, ArrowIntervalType) - result = table.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = table.to_pandas() assert isinstance(result["a"].dtype, pd.IntervalDtype) tm.assert_frame_equal(result, df) table2 = pa.concat_tables([table, table]) - result = table2.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = table2.to_pandas() expected = pd.concat([df, df], ignore_index=True) tm.assert_frame_equal(result, expected) @@ -359,7 +363,9 @@ def test_arrow_table_roundtrip(breaks): table = pa.table( [pa.chunked_array([], type=table.column(0).type)], schema=table.schema ) - result = table.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = table.to_pandas() tm.assert_frame_equal(result, expected[0:0]) @@ -381,7 +387,9 @@ def test_arrow_table_roundtrip_without_metadata(breaks): table = table.replace_schema_metadata() assert table.schema.metadata is None - result = table.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = table.to_pandas() assert isinstance(result["a"].dtype, pd.IntervalDtype) tm.assert_frame_equal(result, df) diff --git a/pandas/tests/arrays/masked/test_arrow_compat.py b/pandas/tests/arrays/masked/test_arrow_compat.py index 6b0081321ef22..8babe88a4a653 100644 --- a/pandas/tests/arrays/masked/test_arrow_compat.py +++ b/pandas/tests/arrays/masked/test_arrow_compat.py @@ -35,7 +35,10 @@ def test_arrow_roundtrip(data): df = pd.DataFrame({"a": data}) table = pa.table(df) assert table.field("a").type == str(data.dtype.numpy_dtype) - result = table.to_pandas() + + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = table.to_pandas() assert result["a"].dtype == data.dtype tm.assert_frame_equal(result, df) @@ -53,7 +56,9 @@ def types_mapper(arrow_type): record_batch = pa.RecordBatch.from_arrays( [bools_array, ints_array, small_ints_array], ["bools", "ints", "small_ints"] ) - result = record_batch.to_pandas(types_mapper=types_mapper) + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = record_batch.to_pandas(types_mapper=types_mapper) bools = pd.Series([True, None, False], dtype="boolean") ints = pd.Series([1, None, 2], dtype="Int64") small_ints = pd.Series([-1, 0, 7], dtype="Int64") @@ -70,7 +75,9 @@ def test_arrow_load_from_zero_chunks(data): table = pa.table( [pa.chunked_array([], type=table.field("a").type)], schema=table.schema ) - result = table.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = table.to_pandas() assert result["a"].dtype == data.dtype tm.assert_frame_equal(result, df) @@ -91,14 +98,18 @@ def test_arrow_sliced(data): df = pd.DataFrame({"a": data}) table = pa.table(df) - result = table.slice(2, None).to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = table.slice(2, None).to_pandas() expected = df.iloc[2:].reset_index(drop=True) tm.assert_frame_equal(result, expected) # no missing values df2 = df.fillna(data[0]) table = pa.table(df2) - result = table.slice(2, None).to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = table.slice(2, None).to_pandas() expected = df2.iloc[2:].reset_index(drop=True) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py index d76b04df4c9d7..84db892d13d4f 100644 --- a/pandas/tests/arrays/period/test_arrow_compat.py +++ b/pandas/tests/arrays/period/test_arrow_compat.py @@ -81,12 +81,16 @@ def test_arrow_table_roundtrip(): table = pa.table(df) assert isinstance(table.field("a").type, ArrowPeriodType) - result = table.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = table.to_pandas() assert isinstance(result["a"].dtype, PeriodDtype) tm.assert_frame_equal(result, df) table2 = pa.concat_tables([table, table]) - result = table2.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = table2.to_pandas() expected = pd.concat([df, df], ignore_index=True) tm.assert_frame_equal(result, expected) @@ -104,7 +108,10 @@ def test_arrow_load_from_zero_chunks(): table = pa.table( [pa.chunked_array([], type=table.column(0).type)], schema=table.schema ) - result = table.to_pandas() + + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = table.to_pandas() assert isinstance(result["a"].dtype, PeriodDtype) tm.assert_frame_equal(result, df) @@ -119,6 +126,8 @@ def test_arrow_table_roundtrip_without_metadata(): table = table.replace_schema_metadata() assert table.schema.metadata is None - result = table.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = table.to_pandas() assert isinstance(result["a"].dtype, PeriodDtype) tm.assert_frame_equal(result, df) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 7e4869589cee6..50cbb7b13d697 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -430,7 +430,9 @@ def test_arrow_roundtrip(dtype, string_storage2): table = pa.table(df) assert table.field("a").type == "string" with pd.option_context("string_storage", string_storage2): - result = table.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = table.to_pandas() assert isinstance(result["a"].dtype, pd.StringDtype) expected = df.astype(f"string[{string_storage2}]") tm.assert_frame_equal(result, expected) @@ -450,7 +452,9 @@ def test_arrow_load_from_zero_chunks(dtype, string_storage2): # Instantiate the same table with no chunks at all table = pa.table([pa.chunked_array([], type=pa.string())], schema=table.schema) with pd.option_context("string_storage", string_storage2): - result = table.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = table.to_pandas() assert isinstance(result["a"].dtype, pd.StringDtype) expected = df.astype(f"string[{string_storage2}]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index ad7812778afd8..c53c4820b4ef9 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -157,7 +157,7 @@ def test_series_from_index_different_dtypes(using_copy_on_write): def test_series_from_block_manager(using_copy_on_write, idx, dtype, fastpath): ser = Series([1, 2, 3], dtype="int64") ser_orig = ser.copy() - ser2 = Series(ser._mgr, dtype=dtype, fastpath=fastpath, index=idx) + ser2 = Series(ser._mgr, dtype=dtype, fastpath=fastpath, index=idx, _allow_mgr=True) assert np.shares_memory(get_array(ser), get_array(ser2)) if using_copy_on_write: assert not ser2._mgr._has_no_reference(0) @@ -172,7 +172,7 @@ def test_series_from_block_manager(using_copy_on_write, idx, dtype, fastpath): def test_series_from_block_manager_different_dtype(using_copy_on_write): ser = Series([1, 2, 3], dtype="int64") - ser2 = Series(ser._mgr, dtype="int32") + ser2 = Series(ser._mgr, dtype="int32", _allow_mgr=True) assert not np.shares_memory(get_array(ser), get_array(ser2)) if using_copy_on_write: assert ser2._mgr._has_no_reference(0) @@ -184,7 +184,7 @@ def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, func): df = DataFrame({"a": [1, 2, 3]}) df_orig = df.copy() - new_df = DataFrame(func(df)) + new_df = DataFrame(func(df), _allow_mgr=True) assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) new_df.iloc[0] = 100 diff --git a/pandas/tests/extension/test_external_block.py b/pandas/tests/extension/test_external_block.py index 1b5b46c6a01bb..ceaf157212bb7 100644 --- a/pandas/tests/extension/test_external_block.py +++ b/pandas/tests/extension/test_external_block.py @@ -29,7 +29,7 @@ def df(): custom_block = CustomBlock(values, placement=bp, ndim=2) blocks = blocks + (custom_block,) block_manager = BlockManager(blocks, [pd.Index(["a", "b"]), df1.index]) - return pd.DataFrame(block_manager) + return pd.DataFrame(block_manager, _allow_mgr=True) def test_concat_axis1(df): diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 88b681d18fa3b..ab9c54ad4e7aa 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -53,11 +53,11 @@ def test_setitem_invalidates_datetime_index_freq(self): assert dti[1] == ts def test_cast_internals(self, float_frame): - casted = DataFrame(float_frame._mgr, dtype=int) + casted = DataFrame(float_frame._mgr, dtype=int, _allow_mgr=True) expected = DataFrame(float_frame._series, dtype=int) tm.assert_frame_equal(casted, expected) - casted = DataFrame(float_frame._mgr, dtype=np.int32) + casted = DataFrame(float_frame._mgr, dtype=np.int32, _allow_mgr=True) expected = DataFrame(float_frame._series, dtype=np.int32) tm.assert_frame_equal(casted, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index cb61a68200411..42ce8e914a6fc 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1716,7 +1716,9 @@ def test_constructor_manager_resize(self, float_frame): index = list(float_frame.index[:5]) columns = list(float_frame.columns[:3]) - result = DataFrame(float_frame._mgr, index=index, columns=columns) + result = DataFrame( + float_frame._mgr, index=index, columns=columns, _allow_mgr=True + ) tm.assert_index_equal(result.index, Index(index)) tm.assert_index_equal(result.columns, Index(columns)) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 79eb4110cfba9..850bd3b91c134 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -397,7 +397,9 @@ def test_duplicate_ref_loc_failure(self): def test_pickle(self, mgr): mgr2 = tm.round_trip_pickle(mgr) - tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) + tm.assert_frame_equal( + DataFrame(mgr, _allow_mgr=True), DataFrame(mgr2, _allow_mgr=True) + ) # GH2431 assert hasattr(mgr2, "_is_consolidated") @@ -411,16 +413,22 @@ def test_pickle(self, mgr): def test_non_unique_pickle(self, mgr_string): mgr = create_mgr(mgr_string) mgr2 = tm.round_trip_pickle(mgr) - tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) + tm.assert_frame_equal( + DataFrame(mgr, _allow_mgr=True), DataFrame(mgr2, _allow_mgr=True) + ) def test_categorical_block_pickle(self): mgr = create_mgr("a: category") mgr2 = tm.round_trip_pickle(mgr) - tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) + tm.assert_frame_equal( + DataFrame(mgr, _allow_mgr=True), DataFrame(mgr2, _allow_mgr=True) + ) smgr = create_single_mgr("category") smgr2 = tm.round_trip_pickle(smgr) - tm.assert_series_equal(Series(smgr), Series(smgr2)) + tm.assert_series_equal( + Series(smgr, _allow_mgr=True), Series(smgr2, _allow_mgr=True) + ) def test_iget(self): cols = Index(list("abc")) diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index c06a6fcc2a037..eedba5ea29775 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -13,6 +13,10 @@ from pandas.io.json._json import JsonReader +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + @pytest.fixture def lines_json_df(): diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 6656face3be84..2b6715f0cc08a 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -29,6 +29,10 @@ from pandas.io.parsers import TextFileReader from pandas.io.parsers.c_parser_wrapper import CParserWrapper +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") diff --git a/pandas/tests/io/parser/common/test_data_list.py b/pandas/tests/io/parser/common/test_data_list.py index 8d484bba1cb9d..e8184f68646aa 100644 --- a/pandas/tests/io/parser/common/test_data_list.py +++ b/pandas/tests/io/parser/common/test_data_list.py @@ -12,6 +12,10 @@ from pandas.io.parsers import TextParser +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") diff --git a/pandas/tests/io/parser/common/test_decimal.py b/pandas/tests/io/parser/common/test_decimal.py index 72d4eb2c69845..f8705f736710b 100644 --- a/pandas/tests/io/parser/common/test_decimal.py +++ b/pandas/tests/io/parser/common/test_decimal.py @@ -9,6 +9,10 @@ from pandas import DataFrame import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") diff --git a/pandas/tests/io/parser/common/test_index.py b/pandas/tests/io/parser/common/test_index.py index 69afb9fe56472..9e4a1d846b21e 100644 --- a/pandas/tests/io/parser/common/test_index.py +++ b/pandas/tests/io/parser/common/test_index.py @@ -15,6 +15,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") # GH#43650: Some expected failures with the pyarrow engine can occasionally diff --git a/pandas/tests/io/parser/common/test_inf.py b/pandas/tests/io/parser/common/test_inf.py index d43fb2f5187e1..b42b9844892e7 100644 --- a/pandas/tests/io/parser/common/test_inf.py +++ b/pandas/tests/io/parser/common/test_inf.py @@ -13,6 +13,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") diff --git a/pandas/tests/io/parser/common/test_ints.py b/pandas/tests/io/parser/common/test_ints.py index e3159ef3e6a42..44767db3a0e1c 100644 --- a/pandas/tests/io/parser/common/test_ints.py +++ b/pandas/tests/io/parser/common/test_ints.py @@ -13,6 +13,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + # GH#43650: Some expected failures with the pyarrow engine can occasionally # cause a deadlock instead, so we skip these instead of xfailing skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") diff --git a/pandas/tests/io/parser/dtypes/test_categorical.py b/pandas/tests/io/parser/dtypes/test_categorical.py index 33422d41c2f93..9bfcf5042d432 100644 --- a/pandas/tests/io/parser/dtypes/test_categorical.py +++ b/pandas/tests/io/parser/dtypes/test_categorical.py @@ -18,6 +18,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index bb05b000c184f..c6d68a5b19f76 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -22,6 +22,10 @@ StringArray, ) +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + @pytest.mark.parametrize("dtype", [str, object]) @pytest.mark.parametrize("check_orig", [True, False]) diff --git a/pandas/tests/io/parser/test_compression.py b/pandas/tests/io/parser/test_compression.py index ab00e31bd9b43..364b25a3e965b 100644 --- a/pandas/tests/io/parser/test_compression.py +++ b/pandas/tests/io/parser/test_compression.py @@ -14,6 +14,10 @@ import pandas._testing as tm from pandas.tests.io.test_compression import _compression_to_extension +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index f537c2f0681d7..f209f6b658377 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -19,6 +19,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index 5cb54bb4e2916..6cadff511d95c 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -18,6 +18,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + # TODO(1.4): Change me to xfails at release time skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index 13c4216710f84..6ebe80a18fb90 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -15,6 +15,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + # TODO(1.4): Change me to xfails at release time skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index 0ca4884153b59..c4483c156f474 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -16,6 +16,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 8c3474220cde8..ca08683afff58 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -35,6 +35,10 @@ from pandas.io.parsers import read_csv +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") # GH#43650: Some expected failures with the pyarrow engine can occasionally @@ -1257,14 +1261,19 @@ def test_bad_date_parse(all_parsers, cache_dates, value): # pandas tries to guess the datetime format, triggering # the warning. TODO: parse dates directly in pyarrow, see # https://github.com/pandas-dev/pandas/issues/48017 - warn = UserWarning + warn = (UserWarning, FutureWarning) + msg = "Could not infer format" + elif parser.engine == "pyarrow": + warn = FutureWarning + msg = "Passing a BlockManager to DataFrame" else: # Note: warning is not raised if 'cache_dates', because here there is only a # single unique date and hence no risk of inconsistent parsing. warn = None + msg = "Could not infer format" parser.read_csv_check_warnings( warn, - "Could not infer format", + msg, s, header=None, names=["foo", "bar"], @@ -1286,16 +1295,19 @@ def test_bad_date_parse_with_warning(all_parsers, cache_dates, value): # pandas doesn't try to guess the datetime format # TODO: parse dates directly in pyarrow, see # https://github.com/pandas-dev/pandas/issues/48017 - warn = None + warn = FutureWarning + msg = "Passing a BlockManager to DataFrame" elif cache_dates: # Note: warning is not raised if 'cache_dates', because here there is only a # single unique date and hence no risk of inconsistent parsing. warn = None + msg = None else: warn = UserWarning + msg = "Could not infer format" parser.read_csv_check_warnings( warn, - "Could not infer format", + msg, s, header=None, names=["foo", "bar"], @@ -2140,7 +2152,8 @@ def test_parse_dot_separated_dates(all_parsers): dtype="object", name="a", ) - warn = None + warn = FutureWarning + msg = "Passing a BlockManager to DataFrame" else: expected_index = DatetimeIndex( ["2003-03-27 14:55:00", "2003-08-03 15:20:00"], @@ -2148,7 +2161,7 @@ def test_parse_dot_separated_dates(all_parsers): name="a", ) warn = UserWarning - msg = r"when dayfirst=False \(the default\) was specified" + msg = r"when dayfirst=False \(the default\) was specified" result = parser.read_csv_check_warnings( warn, msg, StringIO(data), parse_dates=True, index_col=0 ) diff --git a/pandas/tests/io/parser/usecols/test_strings.py b/pandas/tests/io/parser/usecols/test_strings.py index 8cecf1fc981ee..28e9ce8287a60 100644 --- a/pandas/tests/io/parser/usecols/test_strings.py +++ b/pandas/tests/io/parser/usecols/test_strings.py @@ -9,6 +9,10 @@ from pandas import DataFrame import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + _msg_validate_usecols_arg = ( "'usecols' must either be list-like " "of all strings, all unicode, all " diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index b248c0c460c74..9a2b899aaf1d0 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -25,6 +25,10 @@ import pandas.io.common as icom +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + class CustomFSPath: """For testing fspath on unknown objects""" diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index c5bd8341e1a54..7967bc16e8294 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -11,6 +11,10 @@ from pandas.io.feather_format import read_feather, to_feather # isort:skip +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + pyarrow = pytest.importorskip("pyarrow", minversion="1.0.1") diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py index a5790bb456d44..06b8d8647f32e 100644 --- a/pandas/tests/io/test_fsspec.py +++ b/pandas/tests/io/test_fsspec.py @@ -18,6 +18,10 @@ import pandas._testing as tm from pandas.util import _test_decorators as td +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + @pytest.fixture def df1(): diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index b65a19d766976..efc1a514f678e 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -19,6 +19,10 @@ from pandas.tests.io.test_compression import _compression_to_extension from pandas.util import _test_decorators as td +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + @pytest.fixture def gcs_buffer(): diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py index 36cfe5576adf9..f924acdba4de8 100644 --- a/pandas/tests/io/test_orc.py +++ b/pandas/tests/io/test_orc.py @@ -74,7 +74,9 @@ def test_orc_reader_empty(dirpath): expected[colname] = pd.Series(dtype=dtype) inputfile = os.path.join(dirpath, "TestOrcFile.emptyFile.orc") - got = read_orc(inputfile, columns=columns) + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + got = read_orc(inputfile, columns=columns) tm.assert_equal(expected, got) @@ -94,7 +96,9 @@ def test_orc_reader_basic(dirpath): expected = pd.DataFrame.from_dict(data) inputfile = os.path.join(dirpath, "TestOrcFile.test1.orc") - got = read_orc(inputfile, columns=data.keys()) + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + got = read_orc(inputfile, columns=data.keys()) tm.assert_equal(expected, got) @@ -121,7 +125,9 @@ def test_orc_reader_decimal(dirpath): expected = pd.DataFrame.from_dict(data) inputfile = os.path.join(dirpath, "TestOrcFile.decimal.orc") - got = read_orc(inputfile).iloc[:10] + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + got = read_orc(inputfile).iloc[:10] tm.assert_equal(expected, got) @@ -162,7 +168,9 @@ def test_orc_reader_date_low(dirpath): expected = pd.DataFrame.from_dict(data) inputfile = os.path.join(dirpath, "TestOrcFile.testDate1900.orc") - got = read_orc(inputfile).iloc[:10] + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + got = read_orc(inputfile).iloc[:10] tm.assert_equal(expected, got) @@ -203,7 +211,9 @@ def test_orc_reader_date_high(dirpath): expected = pd.DataFrame.from_dict(data) inputfile = os.path.join(dirpath, "TestOrcFile.testDate2038.orc") - got = read_orc(inputfile).iloc[:10] + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + got = read_orc(inputfile).iloc[:10] tm.assert_equal(expected, got) @@ -244,7 +254,9 @@ def test_orc_reader_snappy_compressed(dirpath): expected = pd.DataFrame.from_dict(data) inputfile = os.path.join(dirpath, "TestOrcFile.testSnappy.orc") - got = read_orc(inputfile).iloc[:10] + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + got = read_orc(inputfile).iloc[:10] tm.assert_equal(expected, got) @@ -268,7 +280,9 @@ def test_orc_roundtrip_file(dirpath): with tm.ensure_clean() as path: expected.to_orc(path) - got = read_orc(path) + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + got = read_orc(path) tm.assert_equal(expected, got) @@ -291,7 +305,9 @@ def test_orc_roundtrip_bytesio(): expected = pd.DataFrame.from_dict(data) bytes = expected.to_orc() - got = read_orc(BytesIO(bytes)) + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + got = read_orc(BytesIO(bytes)) tm.assert_equal(expected, got) @@ -329,7 +345,9 @@ def test_orc_dtype_backend_pyarrow(): ) bytes_data = df.copy().to_orc() - result = read_orc(BytesIO(bytes_data), dtype_backend="pyarrow") + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = read_orc(BytesIO(bytes_data), dtype_backend="pyarrow") expected = pd.DataFrame( { @@ -360,7 +378,9 @@ def test_orc_dtype_backend_numpy_nullable(): ) bytes_data = df.copy().to_orc() - result = read_orc(BytesIO(bytes_data), dtype_backend="numpy_nullable") + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = read_orc(BytesIO(bytes_data), dtype_backend="numpy_nullable") expected = pd.DataFrame( { @@ -389,7 +409,9 @@ def test_orc_uri_path(): with tm.ensure_clean("tmp.orc") as path: expected.to_orc(path) uri = pathlib.Path(path).as_uri() - result = read_orc(uri) + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = read_orc(uri) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index c74548bf63e06..a227ff8ce3833 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -43,6 +43,9 @@ except ImportError: _HAVE_FASTPARQUET = False +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) # TODO(ArrayManager) fastparquet relies on BlockManager internals diff --git a/pandas/tests/io/test_user_agent.py b/pandas/tests/io/test_user_agent.py index 3b552805198b5..b25fd3e09c2b0 100644 --- a/pandas/tests/io/test_user_agent.py +++ b/pandas/tests/io/test_user_agent.py @@ -17,12 +17,17 @@ import pandas as pd import pandas._testing as tm -pytestmark = pytest.mark.skipif( - is_ci_environment(), - reason="This test can hang in our CI min_versions build " - "and leads to '##[error]The runner has " - "received a shutdown signal...' in GHA. GH 45651", -) +pytestmark = [ + pytest.mark.skipif( + is_ci_environment(), + reason="This test can hang in our CI min_versions build " + "and leads to '##[error]The runner has " + "received a shutdown signal...' in GHA. GH 45651", + ), + pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" + ), +] class BaseUserAgentResponder(http.server.BaseHTTPRequestHandler): diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index a97676578c079..6c8bc62bfef9b 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -192,7 +192,9 @@ def test_pandas_datareader(): def test_pyarrow(df): pyarrow = import_module("pyarrow") table = pyarrow.Table.from_pandas(df) - result = table.to_pandas() + msg = "Passing a BlockManager to DataFrame is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = table.to_pandas() tm.assert_frame_equal(result, df) From 6784c70c0e5fdaee149146d9777ab407da47fc4e Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 4 Apr 2023 12:24:41 -0700 Subject: [PATCH 02/17] GH ref --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/frame.py | 1 + pandas/core/series.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 423a728516c39..e66d6ef32e3ed 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -172,7 +172,7 @@ Deprecations - Deprecated the methods :meth:`Series.bool` and :meth:`DataFrame.bool` (:issue:`51749`) - Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`) - Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`) -- Deprecated allowing passing :class:`BlockManager` objects to :class:`DataFrame` or :class:`SingleBlockManager` objects to :class:`Series` (:issue:`??`) +- Deprecated allowing passing :class:`BlockManager` objects to :class:`DataFrame` or :class:`SingleBlockManager` objects to :class:`Series` (:issue:`52419`) - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a019fb71c6258..3b107a4b852a3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -670,6 +670,7 @@ def __init__( if isinstance(data, (BlockManager, ArrayManager)): if not _allow_mgr: + # GH#52419 warnings.warn( f"Passing a {type(data).__name__} to {type(self).__name__} " "is deprecated and will raise in a future version. " diff --git a/pandas/core/series.py b/pandas/core/series.py index 4f3ad0bddbed1..695c300c56a2c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -385,6 +385,7 @@ def __init__( and (copy is False or copy is None) ): if not _allow_mgr: + # GH#52419 warnings.warn( f"Passing a {type(data).__name__} to {type(self).__name__} " "is deprecated and will raise in a future version. " From 47539346333f01359061cacad3f14115ff587216 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 4 Apr 2023 12:57:59 -0700 Subject: [PATCH 03/17] fix with ArrayManager --- pandas/core/arraylike.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index d05b0d3c24eb2..bd92009d41f39 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -263,7 +263,10 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any) Series, ) from pandas.core.generic import NDFrame - from pandas.core.internals import BlockManager + from pandas.core.internals import ( + ArrayManager, + BlockManager, + ) cls = type(self) @@ -347,7 +350,7 @@ def _reconstruct(result): if method == "outer": raise NotImplementedError return result - if isinstance(result, BlockManager): + if isinstance(result, (BlockManager, ArrayManager)): # we went through BlockManager.apply e.g. np.sqrt result = self._constructor( result, **reconstruct_kwargs, copy=False, _allow_mgr=True From 59df56ef16c99544d282d531e7862a8920d0e937 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 4 Apr 2023 15:16:43 -0700 Subject: [PATCH 04/17] suppress doc warnings --- doc/source/user_guide/io.rst | 2 ++ doc/source/user_guide/pyarrow.rst | 2 ++ doc/source/user_guide/scale.rst | 6 ++++++ doc/source/whatsnew/v2.0.0.rst | 1 + 4 files changed, 11 insertions(+) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index c33d4ab92d4c6..fec01a3313c47 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -2256,6 +2256,7 @@ For line-delimited json files, pandas can also return an iterator which reads in Line-limited json can also be read using the pyarrow reader by specifying ``engine="pyarrow"``. .. ipython:: python + :okwarning: from io import BytesIO df = pd.read_json(BytesIO(jsonl.encode()), lines=True, engine="pyarrow") @@ -5462,6 +5463,7 @@ Read from an orc file. Read only certain columns of an orc file. .. ipython:: python + :okwarning: result = pd.read_orc( "example_pa.orc", diff --git a/doc/source/user_guide/pyarrow.rst b/doc/source/user_guide/pyarrow.rst index 63937ed27b8b2..74eac53f1952c 100644 --- a/doc/source/user_guide/pyarrow.rst +++ b/doc/source/user_guide/pyarrow.rst @@ -97,6 +97,7 @@ To convert a :external+pyarrow:py:class:`pyarrow.Table` to a :class:`DataFrame`, :external+pyarrow:py:meth:`pyarrow.Table.to_pandas` method with ``types_mapper=pd.ArrowDtype``. .. ipython:: python + :okwarning: table = pa.table([pa.array([1, 2, 3], type=pa.int64())], names=["a"]) @@ -167,6 +168,7 @@ PyArrow-backed data by specifying the parameter ``dtype_backend="pyarrow"``. A r ``engine="pyarrow"`` to necessarily return PyArrow-backed data. .. ipython:: python + :okwarning: import io data = io.StringIO("""a,b,c,d,e,f,g,h,i diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst index 65ed82d9d2cf5..d7129c8314568 100644 --- a/doc/source/user_guide/scale.rst +++ b/doc/source/user_guide/scale.rst @@ -72,6 +72,7 @@ To load the columns we want, we have two options. Option 1 loads in all the data and then filters to what we need. .. ipython:: python + :okwarning: columns = ["id_0", "name_0", "x_0", "y_0"] @@ -80,6 +81,7 @@ Option 1 loads in all the data and then filters to what we need. Option 2 only loads the columns we request. .. ipython:: python + :okwarning: pd.read_parquet("timeseries_wide.parquet", columns=columns) @@ -106,6 +108,7 @@ referred to as "low-cardinality" data). By using more efficient data types, you can store larger datasets in memory. .. ipython:: python + :okwarning: ts = make_timeseries(freq="30S", seed=0) ts.to_parquet("timeseries.parquet") @@ -220,6 +223,7 @@ counts up to this point. As long as each individual file fits in memory, this wi work for arbitrary-sized datasets. .. ipython:: python + :okwarning: %%time files = pathlib.Path("data/timeseries/").glob("ts*.parquet") @@ -257,6 +261,7 @@ We'll import ``dask.dataframe`` and notice that the API feels similar to pandas. We can use Dask's ``read_parquet`` function, but provide a globstring of files to read in. .. ipython:: python + :okwarning: import dask.dataframe as dd @@ -300,6 +305,7 @@ returns a Dask Series with the same dtype and the same name. To get the actual result you can call ``.compute()``. .. ipython:: python + :okwarning: %time ddf["name"].value_counts().compute() diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 8bb07f7cd88fb..0474dbbdc9b65 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -151,6 +151,7 @@ When this keyword is set to ``"pyarrow"``, then these functions will return pyar * :meth:`Series.convert_dtypes` .. ipython:: python + :okwarning: import io data = io.StringIO("""a,b,c,d,e,f,g,h,i From 48297c848730726f517157f57ec4ca70a4f9b787 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 4 Apr 2023 19:46:10 -0700 Subject: [PATCH 05/17] suppress doc warning --- doc/source/user_guide/pyarrow.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/user_guide/pyarrow.rst b/doc/source/user_guide/pyarrow.rst index 74eac53f1952c..d55f59faf20df 100644 --- a/doc/source/user_guide/pyarrow.rst +++ b/doc/source/user_guide/pyarrow.rst @@ -154,6 +154,7 @@ functions provide an ``engine`` keyword that can dispatch to PyArrow to accelera * :func:`read_feather` .. ipython:: python + :ignore: import io data = io.StringIO("""a,b,c From 19620f522b5e43d4d4e7e220ff03503c43dec1f1 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 5 Apr 2023 13:06:27 -0700 Subject: [PATCH 06/17] okwarning --- doc/source/user_guide/pyarrow.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/user_guide/pyarrow.rst b/doc/source/user_guide/pyarrow.rst index d55f59faf20df..0c462c42483eb 100644 --- a/doc/source/user_guide/pyarrow.rst +++ b/doc/source/user_guide/pyarrow.rst @@ -155,6 +155,7 @@ functions provide an ``engine`` keyword that can dispatch to PyArrow to accelera .. ipython:: python :ignore: + :okwarning: import io data = io.StringIO("""a,b,c From 74495de7b581d3bb46f86e8d45d7793f29a25ecf Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 5 Apr 2023 15:33:42 -0700 Subject: [PATCH 07/17] troubleshoot docbuild --- doc/source/user_guide/pyarrow.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/user_guide/pyarrow.rst b/doc/source/user_guide/pyarrow.rst index 0c462c42483eb..8da5eb1b999f5 100644 --- a/doc/source/user_guide/pyarrow.rst +++ b/doc/source/user_guide/pyarrow.rst @@ -154,7 +154,6 @@ functions provide an ``engine`` keyword that can dispatch to PyArrow to accelera * :func:`read_feather` .. ipython:: python - :ignore: :okwarning: import io From 9c00fbd934f47987a0b8d859fce7e5e00001f437 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 6 Apr 2023 14:49:09 -0700 Subject: [PATCH 08/17] troubleshoot docbuild --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 11bbf597ba761..36d344395958f 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -152,7 +152,7 @@ When this keyword is set to ``"pyarrow"``, then these functions will return pyar * :meth:`Series.convert_dtypes` .. ipython:: python - :okwarning: + :okwarning: import io data = io.StringIO("""a,b,c,d,e,f,g,h,i From 5e54cf2113409e6fa90a7d60f80e031f0962bc5c Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 11 Apr 2023 14:04:07 -0700 Subject: [PATCH 09/17] restore pytestmark --- pandas/tests/io/test_parquet.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 8d05caa04e0e3..519fd99341f64 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -43,15 +43,15 @@ except ImportError: _HAVE_FASTPARQUET = False -pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" -) # TODO(ArrayManager) fastparquet relies on BlockManager internals -pytestmark = pytest.mark.filterwarnings( - "ignore:DataFrame._data is deprecated:FutureWarning" -) +pytestmark = [ + pytest.mark.filterwarnings("ignore:DataFrame._data is deprecated:FutureWarning"), + pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" + ), +] # setup engines & skips From c5eed563ac26bc4d82477f01292cb5a886694399 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 25 May 2023 07:55:02 -0700 Subject: [PATCH 10/17] mypy fixup --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 85b463568063f..efc881f4d9053 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1118,7 +1118,7 @@ def _get_values_tuple(self, key: tuple): new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type] return new_ser.__finalize__(self) - def _get_rows_with_mask(self, indexer: slice | npt.NDArray[np.bool_]) -> Series: + def _get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> Series: new_mgr = self._mgr.get_rows_with_mask(indexer) return self._constructor(new_mgr, fastpath=True, _allow_mgr=True).__finalize__( self From de507381aa39987ef2a4fc551a7a101e4c7d7a63 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 26 May 2023 16:44:34 -0700 Subject: [PATCH 11/17] suppress --- pandas/tests/io/parser/test_parse_dates.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 2832cabfad8b1..b2128f779f02f 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1557,6 +1557,9 @@ def test_parse_date_fields(all_parsers): ("date_format", "%Y %m %d %H %M %S", None), ], ) +@pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame is deprecated:FutureWarning" +) def test_parse_date_all_fields(all_parsers, key, value, warn): parser = all_parsers data = """\ @@ -1593,6 +1596,9 @@ def test_parse_date_all_fields(all_parsers, key, value, warn): ("date_format", "%Y %m %d %H %M %S.%f", None), ], ) +@pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame is deprecated:FutureWarning" +) def test_datetime_fractional_seconds(all_parsers, key, value, warn): parser = all_parsers data = """\ From c2a1be83073a7e5f5f801e614be3ed88b7a118f8 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 27 May 2023 12:19:21 -0700 Subject: [PATCH 12/17] catch warnings --- pandas/tests/io/parser/test_parse_dates.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index b2128f779f02f..d2a9aa2736cfa 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1557,9 +1557,6 @@ def test_parse_date_fields(all_parsers): ("date_format", "%Y %m %d %H %M %S", None), ], ) -@pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame is deprecated:FutureWarning" -) def test_parse_date_all_fields(all_parsers, key, value, warn): parser = all_parsers data = """\ @@ -1567,9 +1564,14 @@ def test_parse_date_all_fields(all_parsers, key, value, warn): 2001,01,05,10,00,0,0.0,10. 2001,01,5,10,0,00,1.,11. """ + msg = "use 'date_format' instead" + if parser.engine == "pyarrow" and warn is None: + msg = "Passing a BlockManager to DataFrame is deprecated" + warn = FutureWarning + result = parser.read_csv_check_warnings( warn, - "use 'date_format' instead", + msg, StringIO(data), header=0, parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]}, @@ -1596,9 +1598,6 @@ def test_parse_date_all_fields(all_parsers, key, value, warn): ("date_format", "%Y %m %d %H %M %S.%f", None), ], ) -@pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame is deprecated:FutureWarning" -) def test_datetime_fractional_seconds(all_parsers, key, value, warn): parser = all_parsers data = """\ @@ -1606,9 +1605,14 @@ def test_datetime_fractional_seconds(all_parsers, key, value, warn): 2001,01,05,10,00,0.123456,0.0,10. 2001,01,5,10,0,0.500000,1.,11. """ + msg = "use 'date_format' instead" + if parser.engine == "pyarrow" and warn is None: + msg = "Passing a BlockManager to DataFrame is deprecated" + warn = FutureWarning + result = parser.read_csv_check_warnings( warn, - "use 'date_format' instead", + msg, StringIO(data), header=0, parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]}, From 111df6e5bfae1845950b16fa1d9e83e686ca988c Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 26 Jun 2023 14:07:50 -0700 Subject: [PATCH 13/17] mypy fixup --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1f015763b5acf..767511662f696 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4945,7 +4945,7 @@ def _sanitize_column(self, value) -> tuple[ArrayLike, BlockValuesRefs | None]: def _series(self): return { item: Series( - self._mgr.iget(idx).blocks[0].values, + self._mgr.iget(idx).arrays[0], index=self.index, name=item, ) From 4bc668ecc4bc587c30d3fda37c3124c0d32d0463 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 22 Aug 2023 12:29:43 -0700 Subject: [PATCH 14/17] update tests --- pandas/conftest.py | 1 + pandas/tests/io/test_orc.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index 5210e727aeb3c..758cd6d68d82b 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -174,6 +174,7 @@ def pytest_collection_modifyitems(items, config) -> None: "DataFrameGroupBy.fillna", "DataFrame.fillna with 'method' is deprecated", ), + ("read_parquet", "Passing a BlockManager to DataFrame is deprecated"), ] for item in items: diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py index 13df36e1e7a32..f6bcdf6cd35d0 100644 --- a/pandas/tests/io/test_orc.py +++ b/pandas/tests/io/test_orc.py @@ -17,6 +17,10 @@ import pyarrow as pa +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:FutureWarning" +) + @pytest.fixture def dirpath(datapath): From 0123230e1d1a53dc2f1f9c041c865fd5a02a66c9 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 23 Aug 2023 13:49:56 -0700 Subject: [PATCH 15/17] suppress warning --- doc/source/user_guide/10min.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst index 1a891dca839e3..6a5e765f95a27 100644 --- a/doc/source/user_guide/10min.rst +++ b/doc/source/user_guide/10min.rst @@ -763,12 +763,14 @@ Parquet Writing to a Parquet file: .. ipython:: python + :okwarning: df.to_parquet("foo.parquet") Reading from a Parquet file Store using :func:`read_parquet`: .. ipython:: python + :okwarning: pd.read_parquet("foo.parquet") From 604d7165610b95dde7f8a17d353e360a6d92dee9 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 31 Aug 2023 09:49:02 -0700 Subject: [PATCH 16/17] move whatsnew to 2.2 --- doc/source/whatsnew/v2.1.0.rst | 1 - doc/source/whatsnew/v2.2.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index c642a8ef46f07..040ca048d1224 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -551,7 +551,6 @@ Other Deprecations - Deprecated ``freq`` parameter in :class:`.PeriodArray` constructor, pass ``dtype`` instead (:issue:`52462`) - Deprecated allowing non-standard inputs in :func:`take`, pass either a ``numpy.ndarray``, :class:`.ExtensionArray`, :class:`Index`, or :class:`Series` (:issue:`52981`) - Deprecated allowing non-standard sequences for :func:`isin`, :func:`value_counts`, :func:`unique`, :func:`factorize`, case to one of ``numpy.ndarray``, :class:`Index`, :class:`.ExtensionArray`, or :class:`Series` before calling (:issue:`52986`) -- Deprecated allowing passing :class:`BlockManager` objects to :class:`DataFrame` or :class:`SingleBlockManager` objects to :class:`Series` (:issue:`52419`) - Deprecated behavior of :class:`DataFrame` reductions ``sum``, ``prod``, ``std``, ``var``, ``sem`` with ``axis=None``, in a future version this will operate over both axes returning a scalar instead of behaving like ``axis=0``; note this also affects numpy functions e.g. ``np.sum(df)`` (:issue:`21597`) - Deprecated behavior of :func:`concat` when :class:`DataFrame` has columns that are all-NA, in a future version these will not be discarded when determining the resulting dtype (:issue:`40893`) - Deprecated behavior of :meth:`Series.dt.to_pydatetime`, in a future version this will return a :class:`Series` containing python ``datetime`` objects instead of an ``ndarray`` of datetimes; this matches the behavior of other :attr:`Series.dt` properties (:issue:`20306`) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 621c9159a5fe8..6bf7f6e42f288 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -145,6 +145,7 @@ Deprecations - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_parquet` except ``path``. (:issue:`54229`) - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_pickle` except ``path``. (:issue:`54229`) - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`) +- Deprecated allowing passing :class:`BlockManager` objects to :class:`DataFrame` or :class:`SingleBlockManager` objects to :class:`Series` (:issue:`52419`) - Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`) - Deprecated strings ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`) - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`) From f1154ddd413a8e6eb4dc7a91dd8952f0985ecd29 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 9 Oct 2023 18:05:06 -0700 Subject: [PATCH 17/17] DeprecationWarning instead of FutureWarning --- pandas/core/frame.py | 2 +- pandas/core/series.py | 8 +- pandas/tests/arrays/interval/test_interval.py | 8 +- .../tests/arrays/masked/test_arrow_compat.py | 10 +-- .../tests/arrays/period/test_arrow_compat.py | 8 +- pandas/tests/arrays/string_/test_string.py | 4 +- pandas/tests/copy_view/test_constructors.py | 6 +- pandas/tests/frame/test_block_internals.py | 4 +- pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/io/json/test_readlines.py | 2 +- .../io/parser/common/test_common_basic.py | 11 ++- .../tests/io/parser/common/test_data_list.py | 2 +- pandas/tests/io/parser/common/test_decimal.py | 2 +- pandas/tests/io/parser/common/test_index.py | 2 +- pandas/tests/io/parser/common/test_inf.py | 8 +- pandas/tests/io/parser/common/test_ints.py | 2 +- .../io/parser/common/test_read_errors.py | 15 +++- .../io/parser/dtypes/test_categorical.py | 2 +- .../io/parser/dtypes/test_dtypes_basic.py | 2 +- pandas/tests/io/parser/test_compression.py | 2 +- pandas/tests/io/parser/test_encoding.py | 2 +- pandas/tests/io/parser/test_header.py | 2 +- pandas/tests/io/parser/test_index_col.py | 2 +- pandas/tests/io/parser/test_na_values.py | 2 +- pandas/tests/io/parser/test_parse_dates.py | 74 ++++++++++++++----- pandas/tests/io/parser/test_unsupported.py | 21 ++++-- .../tests/io/parser/usecols/test_strings.py | 2 +- .../io/parser/usecols/test_usecols_basic.py | 10 +-- pandas/tests/io/test_common.py | 2 +- pandas/tests/io/test_feather.py | 2 +- pandas/tests/io/test_fsspec.py | 2 +- pandas/tests/io/test_gcs.py | 2 +- pandas/tests/io/test_orc.py | 24 +++--- pandas/tests/io/test_parquet.py | 2 +- pandas/tests/io/test_user_agent.py | 2 +- pandas/tests/test_downstream.py | 2 +- 36 files changed, 159 insertions(+), 96 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f035d9518dc62..ff29a34c5e8a2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -695,7 +695,7 @@ def __init__( f"Passing a {type(data).__name__} to {type(self).__name__} " "is deprecated and will raise in a future version. " "Use public APIs instead.", - FutureWarning, + DeprecationWarning, stacklevel=find_stack_level(), ) diff --git a/pandas/core/series.py b/pandas/core/series.py index bb08c85d995fd..b146ff6af60e8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -389,7 +389,7 @@ def __init__( f"Passing a {type(data).__name__} to {type(self).__name__} " "is deprecated and will raise in a future version. " "Use public APIs instead.", - FutureWarning, + DeprecationWarning, stacklevel=find_stack_level(), ) if using_copy_on_write(): @@ -428,7 +428,7 @@ def __init__( f"Passing a {type(data).__name__} to {type(self).__name__} " "is deprecated and will raise in a future version. " "Use public APIs instead.", - FutureWarning, + DeprecationWarning, stacklevel=find_stack_level(), ) @@ -447,7 +447,7 @@ def __init__( f"Passing a {type(data).__name__} to {type(self).__name__} " "is deprecated and will raise in a future version. " "Use public APIs instead.", - FutureWarning, + DeprecationWarning, stacklevel=find_stack_level(), ) @@ -521,7 +521,7 @@ def __init__( f"Passing a {type(data).__name__} to {type(self).__name__} " "is deprecated and will raise in a future version. " "Use public APIs instead.", - FutureWarning, + DeprecationWarning, stacklevel=find_stack_level(), ) allow_mgr = True diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index 880176c1a97c4..678ff13d99a5f 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -338,14 +338,14 @@ def test_arrow_table_roundtrip(breaks): table = pa.table(df) assert isinstance(table.field("a").type, ArrowIntervalType) msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = table.to_pandas() assert isinstance(result["a"].dtype, pd.IntervalDtype) tm.assert_frame_equal(result, df) table2 = pa.concat_tables([table, table]) msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = table2.to_pandas() expected = pd.concat([df, df], ignore_index=True) tm.assert_frame_equal(result, expected) @@ -355,7 +355,7 @@ def test_arrow_table_roundtrip(breaks): [pa.chunked_array([], type=table.column(0).type)], schema=table.schema ) msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = table.to_pandas() tm.assert_frame_equal(result, expected[0:0]) @@ -378,7 +378,7 @@ def test_arrow_table_roundtrip_without_metadata(breaks): assert table.schema.metadata is None msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = table.to_pandas() assert isinstance(result["a"].dtype, pd.IntervalDtype) tm.assert_frame_equal(result, df) diff --git a/pandas/tests/arrays/masked/test_arrow_compat.py b/pandas/tests/arrays/masked/test_arrow_compat.py index 8d87624e58c39..f11ec5f8a36a0 100644 --- a/pandas/tests/arrays/masked/test_arrow_compat.py +++ b/pandas/tests/arrays/masked/test_arrow_compat.py @@ -37,7 +37,7 @@ def test_arrow_roundtrip(data): assert table.field("a").type == str(data.dtype.numpy_dtype) msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = table.to_pandas() assert result["a"].dtype == data.dtype tm.assert_frame_equal(result, df) @@ -57,7 +57,7 @@ def types_mapper(arrow_type): [bools_array, ints_array, small_ints_array], ["bools", "ints", "small_ints"] ) msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = record_batch.to_pandas(types_mapper=types_mapper) bools = pd.Series([True, None, False], dtype="boolean") ints = pd.Series([1, None, 2], dtype="Int64") @@ -76,7 +76,7 @@ def test_arrow_load_from_zero_chunks(data): [pa.chunked_array([], type=table.field("a").type)], schema=table.schema ) msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = table.to_pandas() assert result["a"].dtype == data.dtype tm.assert_frame_equal(result, df) @@ -99,7 +99,7 @@ def test_arrow_sliced(data): df = pd.DataFrame({"a": data}) table = pa.table(df) msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = table.slice(2, None).to_pandas() expected = df.iloc[2:].reset_index(drop=True) tm.assert_frame_equal(result, expected) @@ -108,7 +108,7 @@ def test_arrow_sliced(data): df2 = df.fillna(data[0]) table = pa.table(df2) msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = table.slice(2, None).to_pandas() expected = df2.iloc[2:].reset_index(drop=True) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py index d92331a06677c..c97a08244a9a8 100644 --- a/pandas/tests/arrays/period/test_arrow_compat.py +++ b/pandas/tests/arrays/period/test_arrow_compat.py @@ -82,14 +82,14 @@ def test_arrow_table_roundtrip(): table = pa.table(df) assert isinstance(table.field("a").type, ArrowPeriodType) msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = table.to_pandas() assert isinstance(result["a"].dtype, PeriodDtype) tm.assert_frame_equal(result, df) table2 = pa.concat_tables([table, table]) msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = table2.to_pandas() expected = pd.concat([df, df], ignore_index=True) tm.assert_frame_equal(result, expected) @@ -110,7 +110,7 @@ def test_arrow_load_from_zero_chunks(): ) msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = table.to_pandas() assert isinstance(result["a"].dtype, PeriodDtype) tm.assert_frame_equal(result, df) @@ -127,7 +127,7 @@ def test_arrow_table_roundtrip_without_metadata(): assert table.schema.metadata is None msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = table.to_pandas() assert isinstance(result["a"].dtype, PeriodDtype) tm.assert_frame_equal(result, df) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index c0ff0a77f33a7..11f7e190233e0 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -498,7 +498,7 @@ def test_arrow_roundtrip(dtype, string_storage2): assert table.field("a").type == "string" with pd.option_context("string_storage", string_storage2): msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = table.to_pandas() assert isinstance(result["a"].dtype, pd.StringDtype) expected = df.astype(f"string[{string_storage2}]") @@ -519,7 +519,7 @@ def test_arrow_load_from_zero_chunks(dtype, string_storage2): table = pa.table([pa.chunked_array([], type=pa.string())], schema=table.schema) with pd.option_context("string_storage", string_storage2): msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = table.to_pandas() assert isinstance(result["a"].dtype, pd.StringDtype) expected = df.astype(f"string[{string_storage2}]") diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index 6de464b2b04c5..69f8c2414a6bc 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -158,7 +158,7 @@ def test_series_from_block_manager(using_copy_on_write, idx, dtype, fastpath): ser = Series([1, 2, 3], dtype="int64") ser_orig = ser.copy() msg = "Passing a SingleBlockManager to Series" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): ser2 = Series(ser._mgr, dtype=dtype, fastpath=fastpath, index=idx) assert np.shares_memory(get_array(ser), get_array(ser2)) if using_copy_on_write: @@ -175,7 +175,7 @@ def test_series_from_block_manager(using_copy_on_write, idx, dtype, fastpath): def test_series_from_block_manager_different_dtype(using_copy_on_write): ser = Series([1, 2, 3], dtype="int64") msg = "Passing a SingleBlockManager to Series" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): ser2 = Series(ser._mgr, dtype="int32") assert not np.shares_memory(get_array(ser), get_array(ser2)) if using_copy_on_write: @@ -190,7 +190,7 @@ def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr): if use_mgr: data = df._mgr - warn = FutureWarning + warn = DeprecationWarning else: data = df warn = None diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 433e8369ffe93..8c53ffdd493d6 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -51,12 +51,12 @@ def test_setitem_invalidates_datetime_index_freq(self): def test_cast_internals(self, float_frame): msg = "Passing a BlockManager to DataFrame" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): casted = DataFrame(float_frame._mgr, dtype=int) expected = DataFrame(float_frame._series, dtype=int) tm.assert_frame_equal(casted, expected) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): casted = DataFrame(float_frame._mgr, dtype=np.int32) expected = DataFrame(float_frame._series, dtype=np.int32) tm.assert_frame_equal(casted, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 99b71d7c77599..a24868fc7373c 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1742,7 +1742,7 @@ def test_constructor_manager_resize(self, float_frame): columns = list(float_frame.columns[:3]) msg = "Passing a BlockManager to DataFrame" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = DataFrame(float_frame._mgr, index=index, columns=columns) tm.assert_index_equal(result.index, Index(index)) tm.assert_index_equal(result.columns, Index(columns)) diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index e076e186ab33b..3b6f044c3c756 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -15,7 +15,7 @@ from pandas.io.json._json import JsonReader pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index a5885dd5496e2..c2f37c75aaab3 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -30,7 +30,7 @@ from pandas.io.parsers.c_parser_wrapper import CParserWrapper pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") @@ -89,7 +89,14 @@ def test_read_csv_local(all_parsers, csv1): parser = all_parsers fname = prefix + str(os.path.abspath(csv1)) - result = parser.read_csv(fname, index_col=0, parse_dates=True) + + warn = None + if parser.engine == "pyarrow": + warn = DeprecationWarning + msg = "Passing a BlockManager to DataFrame is deprecated" + + with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): + result = parser.read_csv(fname, index_col=0, parse_dates=True) expected = DataFrame( [ diff --git a/pandas/tests/io/parser/common/test_data_list.py b/pandas/tests/io/parser/common/test_data_list.py index e8184f68646aa..3b0ff9e08d349 100644 --- a/pandas/tests/io/parser/common/test_data_list.py +++ b/pandas/tests/io/parser/common/test_data_list.py @@ -13,7 +13,7 @@ from pandas.io.parsers import TextParser pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") diff --git a/pandas/tests/io/parser/common/test_decimal.py b/pandas/tests/io/parser/common/test_decimal.py index f8705f736710b..b8a68c138eeff 100644 --- a/pandas/tests/io/parser/common/test_decimal.py +++ b/pandas/tests/io/parser/common/test_decimal.py @@ -10,7 +10,7 @@ import pandas._testing as tm pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") diff --git a/pandas/tests/io/parser/common/test_index.py b/pandas/tests/io/parser/common/test_index.py index 9e4a1d846b21e..1d5b0fec7a7c6 100644 --- a/pandas/tests/io/parser/common/test_index.py +++ b/pandas/tests/io/parser/common/test_index.py @@ -16,7 +16,7 @@ import pandas._testing as tm pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") diff --git a/pandas/tests/io/parser/common/test_inf.py b/pandas/tests/io/parser/common/test_inf.py index 7a1306b31431e..e1dc87ed0071e 100644 --- a/pandas/tests/io/parser/common/test_inf.py +++ b/pandas/tests/io/parser/common/test_inf.py @@ -14,7 +14,7 @@ import pandas._testing as tm pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") @@ -67,7 +67,11 @@ def test_read_csv_with_use_inf_as_na(all_parsers): parser = all_parsers data = "1.0\nNaN\n3.0" msg = "use_inf_as_na option is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False): + warn = FutureWarning + if parser.engine == "pyarrow": + warn = (FutureWarning, DeprecationWarning) + + with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): with option_context("use_inf_as_na", True): result = parser.read_csv(StringIO(data), header=None) expected = DataFrame([1.0, np.nan, 3.0]) diff --git a/pandas/tests/io/parser/common/test_ints.py b/pandas/tests/io/parser/common/test_ints.py index 44767db3a0e1c..939fdbc159454 100644 --- a/pandas/tests/io/parser/common/test_ints.py +++ b/pandas/tests/io/parser/common/test_ints.py @@ -14,7 +14,7 @@ import pandas._testing as tm pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) # GH#43650: Some expected failures with the pyarrow engine can occasionally diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py index 4e82dca83e2d0..4c75c05303172 100644 --- a/pandas/tests/io/parser/common/test_read_errors.py +++ b/pandas/tests/io/parser/common/test_read_errors.py @@ -148,7 +148,12 @@ def test_suppress_error_output(all_parsers): data = "a\n1\n1,2,3\n4\n5,6,7" expected = DataFrame({"a": [1, 4]}) - with tm.assert_produces_warning(None): + warn = None + if parser.engine == "pyarrow": + warn = DeprecationWarning + msg = "Passing a BlockManager to DataFrame" + + with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): result = parser.read_csv(StringIO(data), on_bad_lines="skip") tm.assert_frame_equal(result, expected) @@ -174,11 +179,13 @@ def test_warn_bad_lines(all_parsers): expected = DataFrame({"a": [1, 4]}) match_msg = "Skipping line" + expected_warning = ParserWarning if parser.engine == "pyarrow": match_msg = "Expected 1 columns, but found 3: 1,2,3" + expected_warning = (ParserWarning, DeprecationWarning) with tm.assert_produces_warning( - ParserWarning, match=match_msg, check_stacklevel=False + expected_warning, match=match_msg, check_stacklevel=False ): result = parser.read_csv(StringIO(data), on_bad_lines="warn") tm.assert_frame_equal(result, expected) @@ -282,11 +289,13 @@ def test_on_bad_lines_warn_correct_formatting(all_parsers): expected = DataFrame({"1": "a", "2": ["b"] * 2}) match_msg = "Skipping line" + expected_warning = ParserWarning if parser.engine == "pyarrow": match_msg = "Expected 2 columns, but found 3: a,b,c" + expected_warning = (ParserWarning, DeprecationWarning) with tm.assert_produces_warning( - ParserWarning, match=match_msg, check_stacklevel=False + expected_warning, match=match_msg, check_stacklevel=False ): result = parser.read_csv(StringIO(data), on_bad_lines="warn") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/dtypes/test_categorical.py b/pandas/tests/io/parser/dtypes/test_categorical.py index 62ba42d2712b7..d305c94b171f3 100644 --- a/pandas/tests/io/parser/dtypes/test_categorical.py +++ b/pandas/tests/io/parser/dtypes/test_categorical.py @@ -21,7 +21,7 @@ import pandas._testing as tm pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index c0c95876c592a..8434926e188cc 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -23,7 +23,7 @@ ) pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) diff --git a/pandas/tests/io/parser/test_compression.py b/pandas/tests/io/parser/test_compression.py index a3bb59cbf8d55..bd337ab29025c 100644 --- a/pandas/tests/io/parser/test_compression.py +++ b/pandas/tests/io/parser/test_compression.py @@ -14,7 +14,7 @@ import pandas._testing as tm pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index ec84f0f655adc..b511c162cb1a2 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -20,7 +20,7 @@ import pandas._testing as tm pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index 8fc231306e7d6..d059cc0c49db4 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -19,7 +19,7 @@ import pandas._testing as tm pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) # TODO(1.4): Change me to xfails at release time diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index d32365e3ee4d7..ed72eef31a476 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -16,7 +16,7 @@ import pandas._testing as tm pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) # TODO(1.4): Change me to xfails at release time diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index 0459461aa33dd..86c50fe103f2c 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -17,7 +17,7 @@ import pandas._testing as tm pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 2623748559be6..e904d75d95f77 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -36,7 +36,7 @@ from pandas.io.parsers import read_csv pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") @@ -187,8 +187,11 @@ def date_parser(*date_cols): "keep_date_col": keep_date_col, "names": ["X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8"], } + warn = FutureWarning + if parser.engine == "pyarrow": + warn = (FutureWarning, DeprecationWarning) result = parser.read_csv_check_warnings( - FutureWarning, + warn, "use 'date_format' instead", StringIO(data), **kwds, @@ -506,7 +509,10 @@ def test_multiple_date_cols_int_cast(all_parsers): "date_parser": pd.to_datetime, } result = parser.read_csv_check_warnings( - FutureWarning, "use 'date_format' instead", StringIO(data), **kwds + (FutureWarning, DeprecationWarning), + "use 'date_format' instead", + StringIO(data), + **kwds, ) expected = DataFrame( @@ -553,8 +559,12 @@ def test_multiple_date_col_timestamp_parse(all_parsers): data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25 05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25""" + warn = FutureWarning + if parser.engine == "pyarrow": + warn = (FutureWarning, DeprecationWarning) + result = parser.read_csv_check_warnings( - FutureWarning, + warn, "use 'date_format' instead", StringIO(data), parse_dates=[[0, 1]], @@ -715,8 +725,12 @@ def test_date_parser_int_bug(all_parsers): "12345,1,-1,3,invoice_InvoiceResource,search\n" ) + warn = FutureWarning + if parser.engine == "pyarrow": + warn = (FutureWarning, DeprecationWarning) + result = parser.read_csv_check_warnings( - FutureWarning, + warn, "use 'date_format' instead", StringIO(data), index_col=0, @@ -1277,7 +1291,7 @@ def test_bad_date_parse(all_parsers, cache_dates, value): warn = None msg = "Passing a BlockManager to DataFrame" if parser.engine == "pyarrow": - warn = FutureWarning + warn = DeprecationWarning parser.read_csv_check_warnings( warn, @@ -1303,7 +1317,7 @@ def test_bad_date_parse_with_warning(all_parsers, cache_dates, value): # pandas doesn't try to guess the datetime format # TODO: parse dates directly in pyarrow, see # https://github.com/pandas-dev/pandas/issues/48017 - warn = FutureWarning + warn = DeprecationWarning msg = "Passing a BlockManager to DataFrame" elif cache_dates: # Note: warning is not raised if 'cache_dates', because here there is only a @@ -1345,8 +1359,12 @@ def test_parse_dates_infer_datetime_format_warning(all_parsers, reader): parser = all_parsers data = "Date,test\n2012-01-01,1\n,2" + warn = FutureWarning + if parser.engine == "pyarrow": + warn = (FutureWarning, DeprecationWarning) + getattr(parser, reader)( - FutureWarning, + warn, "The argument 'infer_datetime_format' is deprecated", StringIO(data), parse_dates=["Date"], @@ -1516,8 +1534,13 @@ def test_parse_date_time_multi_level_column_name(all_parsers): ) def test_parse_date_time(all_parsers, data, kwargs, expected): parser = all_parsers + + warn = FutureWarning + if parser.engine == "pyarrow": + warn = (FutureWarning, DeprecationWarning) + result = parser.read_csv_check_warnings( - FutureWarning, + warn, "use 'date_format' instead", StringIO(data), date_parser=pd.to_datetime, @@ -1533,9 +1556,14 @@ def test_parse_date_time(all_parsers, data, kwargs, expected): def test_parse_date_fields(all_parsers): parser = all_parsers + + warn = FutureWarning + if parser.engine == "pyarrow": + warn = (FutureWarning, DeprecationWarning) + data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11." result = parser.read_csv_check_warnings( - FutureWarning, + warn, "use 'date_format' instead", StringIO(data), header=0, @@ -1569,9 +1597,12 @@ def test_parse_date_all_fields(all_parsers, key, value, warn): 2001,01,5,10,0,00,1.,11. """ msg = "use 'date_format' instead" - if parser.engine == "pyarrow" and warn is None: - msg = "Passing a BlockManager to DataFrame is deprecated" - warn = FutureWarning + if parser.engine == "pyarrow": + if warn is None: + msg = "Passing a BlockManager to DataFrame is deprecated" + warn = DeprecationWarning + else: + warn = (warn, DeprecationWarning) result = parser.read_csv_check_warnings( warn, @@ -1610,9 +1641,12 @@ def test_datetime_fractional_seconds(all_parsers, key, value, warn): 2001,01,5,10,0,0.500000,1.,11. """ msg = "use 'date_format' instead" - if parser.engine == "pyarrow" and warn is None: - msg = "Passing a BlockManager to DataFrame is deprecated" - warn = FutureWarning + if parser.engine == "pyarrow": + if warn is None: + msg = "Passing a BlockManager to DataFrame is deprecated" + warn = DeprecationWarning + else: + warn = (warn, DeprecationWarning) result = parser.read_csv_check_warnings( warn, @@ -1639,8 +1673,12 @@ def test_generic(all_parsers): def parse_function(yy, mm): return [date(year=int(y), month=int(m), day=1) for y, m in zip(yy, mm)] + warn = FutureWarning + if parser.engine == "pyarrow": + warn = (FutureWarning, DeprecationWarning) + result = parser.read_csv_check_warnings( - FutureWarning, + warn, "use 'date_format' instead", StringIO(data), header=0, @@ -2167,7 +2205,7 @@ def test_parse_dot_separated_dates(all_parsers): dtype="object", name="a", ) - warn = FutureWarning + warn = DeprecationWarning msg = "Passing a BlockManager to DataFrame" else: expected_index = DatetimeIndex( diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index b489c09e917af..4d38590b5913d 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -157,15 +157,20 @@ def test_on_bad_lines_callable_python_or_pyarrow(self, all_parsers): sio = StringIO("a,b\n1,2") bad_lines_func = lambda x: x parser = all_parsers - if all_parsers.engine not in ["python", "pyarrow"]: - msg = ( - "on_bad_line can only be a callable " - "function if engine='python' or 'pyarrow'" - ) - with pytest.raises(ValueError, match=msg): + warn = None + if parser.engine == "pyarrow": + warn = DeprecationWarning + warn_msg = "Passing a BlockManager" + with tm.assert_produces_warning(warn, match=warn_msg, check_stacklevel=False): + if all_parsers.engine not in ["python", "pyarrow"]: + msg = ( + "on_bad_line can only be a callable " + "function if engine='python' or 'pyarrow'" + ) + with pytest.raises(ValueError, match=msg): + parser.read_csv(sio, on_bad_lines=bad_lines_func) + else: parser.read_csv(sio, on_bad_lines=bad_lines_func) - else: - parser.read_csv(sio, on_bad_lines=bad_lines_func) def test_close_file_handle_on_invalid_usecols(all_parsers): diff --git a/pandas/tests/io/parser/usecols/test_strings.py b/pandas/tests/io/parser/usecols/test_strings.py index 5c461df2db27b..d4ade41d38465 100644 --- a/pandas/tests/io/parser/usecols/test_strings.py +++ b/pandas/tests/io/parser/usecols/test_strings.py @@ -10,7 +10,7 @@ import pandas._testing as tm pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py index b34b3e787c775..07c94e301b37a 100644 --- a/pandas/tests/io/parser/usecols/test_usecols_basic.py +++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py @@ -171,7 +171,7 @@ def test_usecols_index_col_conflict2(all_parsers): msg = "Passing a BlockManager to DataFrame is deprecated" warn = None if parser.engine == "pyarrow": - warn = FutureWarning + warn = DeprecationWarning with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): result = parser.read_csv( StringIO(data), usecols=["b", "c", "d"], index_col=["b", "c"] @@ -199,7 +199,7 @@ def test_usecols_index_col_middle(all_parsers): msg = "Passing a BlockManager to DataFrame is deprecated" warn = None if parser.engine == "pyarrow": - warn = FutureWarning + warn = DeprecationWarning with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): result = parser.read_csv(StringIO(data), usecols=["b", "c", "d"], index_col="c") expected = DataFrame({"b": [2], "d": [4]}, index=Index([3], name="c")) @@ -215,7 +215,7 @@ def test_usecols_index_col_end(all_parsers): msg = "Passing a BlockManager to DataFrame is deprecated" warn = None if parser.engine == "pyarrow": - warn = FutureWarning + warn = DeprecationWarning with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): result = parser.read_csv(StringIO(data), usecols=["b", "c", "d"], index_col="d") expected = DataFrame({"b": [2], "c": [3]}, index=Index([4], name="d")) @@ -286,7 +286,7 @@ def test_np_array_usecols(all_parsers): msg = "Passing a BlockManager to DataFrame is deprecated" warn = None if parser.engine == "pyarrow": - warn = FutureWarning + warn = DeprecationWarning with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): result = parser.read_csv(StringIO(data), usecols=usecols) tm.assert_frame_equal(result, expected) @@ -483,7 +483,7 @@ def test_usecols_dtype(all_parsers): msg = "Passing a BlockManager to DataFrame is deprecated" warn = None if parser.engine == "pyarrow": - warn = FutureWarning + warn = DeprecationWarning with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): result = parser.read_csv( StringIO(data), diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index a9af59facd82c..018400fcfe0cf 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -26,7 +26,7 @@ import pandas.io.common as icom pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index b5f7f966402c3..5ec8705251d95 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -12,7 +12,7 @@ from pandas.io.feather_format import read_feather, to_feather # isort:skip pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) pyarrow = pytest.importorskip("pyarrow") diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py index b0e2ecf0be7ee..8726d44c9c3ed 100644 --- a/pandas/tests/io/test_fsspec.py +++ b/pandas/tests/io/test_fsspec.py @@ -19,7 +19,7 @@ from pandas.util import _test_decorators as td pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index 4a00785f610de..6e55cde12f2f9 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -19,7 +19,7 @@ from pandas.util import _test_decorators as td pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py index 99d0854318730..6b713bfa42b53 100644 --- a/pandas/tests/io/test_orc.py +++ b/pandas/tests/io/test_orc.py @@ -18,7 +18,7 @@ import pyarrow as pa pytestmark = pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) @@ -71,7 +71,7 @@ def test_orc_reader_empty(dirpath): inputfile = os.path.join(dirpath, "TestOrcFile.emptyFile.orc") msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): got = read_orc(inputfile, columns=columns) tm.assert_equal(expected, got) @@ -93,7 +93,7 @@ def test_orc_reader_basic(dirpath): inputfile = os.path.join(dirpath, "TestOrcFile.test1.orc") msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): got = read_orc(inputfile, columns=data.keys()) tm.assert_equal(expected, got) @@ -122,7 +122,7 @@ def test_orc_reader_decimal(dirpath): inputfile = os.path.join(dirpath, "TestOrcFile.decimal.orc") msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): got = read_orc(inputfile).iloc[:10] tm.assert_equal(expected, got) @@ -165,7 +165,7 @@ def test_orc_reader_date_low(dirpath): inputfile = os.path.join(dirpath, "TestOrcFile.testDate1900.orc") msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): got = read_orc(inputfile).iloc[:10] tm.assert_equal(expected, got) @@ -208,7 +208,7 @@ def test_orc_reader_date_high(dirpath): inputfile = os.path.join(dirpath, "TestOrcFile.testDate2038.orc") msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): got = read_orc(inputfile).iloc[:10] tm.assert_equal(expected, got) @@ -251,7 +251,7 @@ def test_orc_reader_snappy_compressed(dirpath): inputfile = os.path.join(dirpath, "TestOrcFile.testSnappy.orc") msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): got = read_orc(inputfile).iloc[:10] tm.assert_equal(expected, got) @@ -278,7 +278,7 @@ def test_orc_roundtrip_file(dirpath): with tm.ensure_clean() as path: expected.to_orc(path) msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): got = read_orc(path) tm.assert_equal(expected, got) @@ -304,7 +304,7 @@ def test_orc_roundtrip_bytesio(): bytes = expected.to_orc() msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): got = read_orc(BytesIO(bytes)) tm.assert_equal(expected, got) @@ -344,7 +344,7 @@ def test_orc_dtype_backend_pyarrow(): bytes_data = df.copy().to_orc() msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = read_orc(BytesIO(bytes_data), dtype_backend="pyarrow") expected = pd.DataFrame( @@ -377,7 +377,7 @@ def test_orc_dtype_backend_numpy_nullable(): bytes_data = df.copy().to_orc() msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = read_orc(BytesIO(bytes_data), dtype_backend="numpy_nullable") expected = pd.DataFrame( @@ -408,7 +408,7 @@ def test_orc_uri_path(): expected.to_orc(path) uri = pathlib.Path(path).as_uri() msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = read_orc(uri) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 7fef68d6686b9..c3634a91f2741 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -51,7 +51,7 @@ pytestmark = [ pytest.mark.filterwarnings("ignore:DataFrame._data is deprecated:FutureWarning"), pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ), ] diff --git a/pandas/tests/io/test_user_agent.py b/pandas/tests/io/test_user_agent.py index 0d9e0e48397aa..a892e51f2f28d 100644 --- a/pandas/tests/io/test_user_agent.py +++ b/pandas/tests/io/test_user_agent.py @@ -24,7 +24,7 @@ reason="GH 45651: This test can hang in our CI min_versions build", ), pytest.mark.filterwarnings( - "ignore:Passing a BlockManager to DataFrame:FutureWarning" + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ), ] diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 7c960c00b3d20..735c6131ba319 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -176,7 +176,7 @@ def test_pyarrow(df): pyarrow = pytest.importorskip("pyarrow") table = pyarrow.Table.from_pandas(df) msg = "Passing a BlockManager to DataFrame is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = table.to_pandas() tm.assert_frame_equal(result, df)