diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 17facf9e16f4b..8c27170f65353 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -666,20 +666,20 @@ cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz): cdef tzinfo convert_timezone( - tzinfo tz_in, - tzinfo tz_out, - bint found_naive, - bint found_tz, - bint utc_convert, + tzinfo tz_in, + tzinfo tz_out, + bint found_naive, + bint found_tz, + bint utc_convert, ): """ Validate that ``tz_in`` can be converted/localized to ``tz_out``. Parameters ---------- - tz_in : tzinfo + tz_in : tzinfo or None Timezone info of element being processed. - tz_out : tzinfo + tz_out : tzinfo or None Timezone info of output. found_naive : bool Whether a timezone-naive element has been found so far. diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 0c99ae4b8e03d..5d7daec65c7d1 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -531,7 +531,7 @@ def assert_interval_array_equal( def assert_period_array_equal(left, right, obj: str = "PeriodArray") -> None: _check_isinstance(left, right, PeriodArray) - assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data") + assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray") assert_attr_equal("freq", left, right, obj=obj) @@ -541,7 +541,7 @@ def assert_datetime_array_equal( __tracebackhide__ = True _check_isinstance(left, right, DatetimeArray) - assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data") + assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray") if check_freq: assert_attr_equal("freq", left, right, obj=obj) assert_attr_equal("tz", left, right, obj=obj) @@ -552,7 +552,7 @@ def assert_timedelta_array_equal( ) -> None: __tracebackhide__ = True _check_isinstance(left, right, TimedeltaArray) - assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data") + assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray") if check_freq: assert_attr_equal("freq", left, right, obj=obj) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index a3c201b402b0f..f11d031b2f622 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1646,13 +1646,11 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): class ExtensionArraySupportsAnyAll(ExtensionArray): - def any(self, *, skipna: bool = True) -> bool: # type: ignore[empty-body] - # error: Missing return statement - pass + def any(self, *, skipna: bool = True) -> bool: + raise AbstractMethodError(self) - def all(self, *, skipna: bool = True) -> bool: # type: ignore[empty-body] - # error: Missing return statement - pass + def all(self, *, skipna: bool = True) -> bool: + raise AbstractMethodError(self) class ExtensionOpsMixin: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index a9af210e08741..bf7e28d5a4b98 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -11,7 +11,6 @@ Literal, Sequence, TypeVar, - Union, cast, overload, ) @@ -511,7 +510,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: result = self.copy() if copy else self elif is_categorical_dtype(dtype): - dtype = cast("Union[str, CategoricalDtype]", dtype) + dtype = cast(CategoricalDtype, dtype) # GH 10696/18593/18630 dtype = self.dtype.update_dtype(dtype) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index be20d825b0c80..4f01c4892db6c 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -257,13 +257,6 @@ def _check_compatible_with(self, other: DTScalarOrNaT) -> None: """ raise AbstractMethodError(self) - # ------------------------------------------------------------------ - # NDArrayBackedExtensionArray compat - - @cache_readonly - def _data(self) -> np.ndarray: - return self._ndarray - # ------------------------------------------------------------------ def _box_func(self, x): diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 60488a8ef9715..704897722e938 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1195,9 +1195,7 @@ def maybe_cast_to_datetime( # TODO: _from_sequence would raise ValueError in cases where # _ensure_nanosecond_dtype raises TypeError - # Incompatible types in assignment (expression has type "Union[dtype[Any], - # ExtensionDtype]", variable has type "Optional[dtype[Any]]") - dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment] + _ensure_nanosecond_dtype(dtype) if is_timedelta64_dtype(dtype): res = TimedeltaArray._from_sequence(value, dtype=dtype) @@ -1235,12 +1233,11 @@ def sanitize_to_nanoseconds(values: np.ndarray, copy: bool = False) -> np.ndarra return values -def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj: +def _ensure_nanosecond_dtype(dtype: DtypeObj) -> None: """ Convert dtypes with granularity less than nanosecond to nanosecond >>> _ensure_nanosecond_dtype(np.dtype("M8[us]")) - dtype('>> _ensure_nanosecond_dtype(np.dtype("M8[D]")) Traceback (most recent call last): @@ -1277,7 +1274,6 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj: f"dtype={dtype} is not supported. Supported resolutions are 's', " "'ms', 'us', and 'ns'" ) - return dtype # TODO: other value-dependent functions to standardize here include diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index a225d2cd12eac..000b5ebbdd2f7 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -18,7 +18,6 @@ import pandas._libs.missing as libmissing from pandas._libs.tslibs import ( NaT, - Period, iNaT, ) @@ -749,10 +748,8 @@ def isna_all(arr: ArrayLike) -> bool: if dtype.kind == "f" and isinstance(dtype, np.dtype): checker = nan_checker - elif ( - (isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"]) - or isinstance(dtype, DatetimeTZDtype) - or dtype.type is Period + elif (isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"]) or isinstance( + dtype, (DatetimeTZDtype, PeriodDtype) ): # error: Incompatible types in assignment (expression has type # "Callable[[Any], Any]", variable has type "ufunc") diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0144aefedaa5f..218c0e33af823 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7475,7 +7475,7 @@ def _cmp_method(self, other, op): return self._construct_result(new_data) def _arith_method(self, other, op): - if ops.should_reindex_frame_op(self, other, op, 1, 1, None, None): + if ops.should_reindex_frame_op(self, other, op, 1, None, None): return ops.frame_arith_method_with_reindex(self, other, op) axis: Literal[1] = 1 # only relevant for Series other case diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 497e0ef724373..dba36066c7952 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1647,8 +1647,6 @@ def array_func(values: ArrayLike) -> ArrayLike: return result - # TypeError -> we may have an exception in trying to aggregate - # continue and exclude the block new_mgr = data.grouped_reduce(array_func) res = self._wrap_agged_manager(new_mgr) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f0b0ec23dba1a..9cbf3b6167305 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -841,7 +841,9 @@ def _set_levels( self._reset_cache() - def set_levels(self, levels, *, level=None, verify_integrity: bool = True): + def set_levels( + self, levels, *, level=None, verify_integrity: bool = True + ) -> MultiIndex: """ Set new levels on MultiIndex. Defaults to returning new index. @@ -856,8 +858,7 @@ def set_levels(self, levels, *, level=None, verify_integrity: bool = True): Returns ------- - new index (of same type and class...etc) or None - The same type as the caller or None if ``inplace=True``. + MultiIndex Examples -------- diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index feca755fd43db..91216a9618365 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -758,9 +758,9 @@ def fast_xs(self, loc: int) -> SingleArrayManager: result = dtype.construct_array_type()._from_sequence(values, dtype=dtype) # for datetime64/timedelta64, the np.ndarray constructor cannot handle pd.NaT elif is_datetime64_ns_dtype(dtype): - result = DatetimeArray._from_sequence(values, dtype=dtype)._data + result = DatetimeArray._from_sequence(values, dtype=dtype)._ndarray elif is_timedelta64_ns_dtype(dtype): - result = TimedeltaArray._from_sequence(values, dtype=dtype)._data + result = TimedeltaArray._from_sequence(values, dtype=dtype)._ndarray else: result = np.array(values, dtype=dtype) return SingleArrayManager([result], [self._axes[1]]) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f1856fce83160..c8a6750e165ea 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2291,6 +2291,6 @@ def external_values(values: ArrayLike) -> ArrayLike: # NB: for datetime64tz this is different from np.asarray(values), since # that returns an object-dtype ndarray of Timestamps. # Avoid raising in .astype in casting from dt64tz to dt64 - return values._data + return values._ndarray else: return values diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index bfedaca093a8e..76d5fc8128a8f 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -6,7 +6,10 @@ from __future__ import annotations import operator -from typing import TYPE_CHECKING +from typing import ( + TYPE_CHECKING, + cast, +) import numpy as np @@ -312,7 +315,7 @@ def to_series(right): def should_reindex_frame_op( - left: DataFrame, right, op, axis, default_axis, fill_value, level + left: DataFrame, right, op, axis: int, fill_value, level ) -> bool: """ Check if this is an operation between DataFrames that will need to reindex. @@ -326,7 +329,7 @@ def should_reindex_frame_op( if not isinstance(right, ABCDataFrame): return False - if fill_value is None and level is None and axis is default_axis: + if fill_value is None and level is None and axis == 1: # TODO: any other cases we should handle here? # Intersection is always unique so we have to check the unique columns @@ -411,17 +414,16 @@ def _maybe_align_series_as_frame(frame: DataFrame, series: Series, axis: AxisInt def flex_arith_method_FRAME(op): op_name = op.__name__.strip("_") - default_axis = "columns" na_op = get_array_op(op) doc = make_flex_doc(op_name, "dataframe") @Appender(doc) - def f(self, other, axis=default_axis, level=None, fill_value=None): + def f(self, other, axis: Axis = "columns", level=None, fill_value=None): + axis = self._get_axis_number(axis) if axis is not None else 1 + axis = cast(int, axis) - if should_reindex_frame_op( - self, other, op, axis, default_axis, fill_value, level - ): + if should_reindex_frame_op(self, other, op, axis, fill_value, level): return frame_arith_method_with_reindex(self, other, op) if isinstance(other, ABCSeries) and fill_value is not None: @@ -429,8 +431,6 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): # through the DataFrame path raise NotImplementedError(f"fill_value {fill_value} not supported.") - axis = self._get_axis_number(axis) if axis is not None else 1 - other = maybe_prepare_scalar_for_op(other, self.shape) self, other = align_method_FRAME(self, other, axis, flex=True, level=level) @@ -456,14 +456,13 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): def flex_comp_method_FRAME(op): op_name = op.__name__.strip("_") - default_axis = "columns" # because we are "flex" doc = _flex_comp_doc_FRAME.format( op_name=op_name, desc=_op_descriptions[op_name]["desc"] ) @Appender(doc) - def f(self, other, axis=default_axis, level=None): + def f(self, other, axis: Axis = "columns", level=None): axis = self._get_axis_number(axis) if axis is not None else 1 self, other = align_method_FRAME(self, other, axis, flex=True, level=level) diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index 6ed962c8f68e6..252eff8f9a823 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -355,7 +355,6 @@ def test_transform_wont_agg_series(string_series, func): @pytest.mark.parametrize( "op_wrapper", [lambda x: x, lambda x: [x], lambda x: {"A": x}, lambda x: {"A": [x]}] ) -@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper): # GH 35964 op = op_wrapper(all_reductions) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index b4f1c5404d178..c35962d7d2e96 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -2437,7 +2437,7 @@ def test_dt64arr_addsub_object_dtype_2d(): assert isinstance(result, DatetimeArray) assert result.freq is None - tm.assert_numpy_array_equal(result._data, expected._data) + tm.assert_numpy_array_equal(result._ndarray, expected._ndarray) with tm.assert_produces_warning(PerformanceWarning): # Case where we expect to get a TimedeltaArray back diff --git a/pandas/tests/arrays/datetimes/test_constructors.py b/pandas/tests/arrays/datetimes/test_constructors.py index 992d047b1afef..6670d07a4c075 100644 --- a/pandas/tests/arrays/datetimes/test_constructors.py +++ b/pandas/tests/arrays/datetimes/test_constructors.py @@ -122,10 +122,10 @@ def test_freq_infer_raises(self): def test_copy(self): data = np.array([1, 2, 3], dtype="M8[ns]") arr = DatetimeArray(data, copy=False) - assert arr._data is data + assert arr._ndarray is data arr = DatetimeArray(data, copy=True) - assert arr._data is not data + assert arr._ndarray is not data class TestSequenceToDT64NS: diff --git a/pandas/tests/arrays/period/test_astype.py b/pandas/tests/arrays/period/test_astype.py index e9245c9ca786b..475a85ca4b644 100644 --- a/pandas/tests/arrays/period/test_astype.py +++ b/pandas/tests/arrays/period/test_astype.py @@ -42,12 +42,12 @@ def test_astype_copies(): result = arr.astype(np.int64, copy=False) # Add the `.base`, since we now use `.asi8` which returns a view. - # We could maybe override it in PeriodArray to return ._data directly. - assert result.base is arr._data + # We could maybe override it in PeriodArray to return ._ndarray directly. + assert result.base is arr._ndarray result = arr.astype(np.int64, copy=True) - assert result is not arr._data - tm.assert_numpy_array_equal(result, arr._data.view("i8")) + assert result is not arr._ndarray + tm.assert_numpy_array_equal(result, arr._ndarray.view("i8")) def test_astype_categorical(): diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 3f310d0efa2ca..fbd6f362bd9e7 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -220,7 +220,7 @@ def test_unbox_scalar(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") result = arr._unbox_scalar(arr[0]) - expected = arr._data.dtype.type + expected = arr._ndarray.dtype.type assert isinstance(result, expected) result = arr._unbox_scalar(NaT) @@ -350,13 +350,13 @@ def test_getitem_near_implementation_bounds(self): def test_getitem_2d(self, arr1d): # 2d slicing on a 1D array - expected = type(arr1d)(arr1d._data[:, np.newaxis], dtype=arr1d.dtype) + expected = type(arr1d)(arr1d._ndarray[:, np.newaxis], dtype=arr1d.dtype) result = arr1d[:, np.newaxis] tm.assert_equal(result, expected) # Lookup on a 2D array arr2d = expected - expected = type(arr2d)(arr2d._data[:3, 0], dtype=arr2d.dtype) + expected = type(arr2d)(arr2d._ndarray[:3, 0], dtype=arr2d.dtype) result = arr2d[:3, 0] tm.assert_equal(result, expected) @@ -366,7 +366,7 @@ def test_getitem_2d(self, arr1d): assert result == expected def test_iter_2d(self, arr1d): - data2d = arr1d._data[:3, np.newaxis] + data2d = arr1d._ndarray[:3, np.newaxis] arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype) result = list(arr2d) assert len(result) == 3 @@ -376,7 +376,7 @@ def test_iter_2d(self, arr1d): assert x.dtype == arr1d.dtype def test_repr_2d(self, arr1d): - data2d = arr1d._data[:3, np.newaxis] + data2d = arr1d._ndarray[:3, np.newaxis] arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype) result = repr(arr2d) @@ -632,7 +632,7 @@ def test_array_interface(self, datetime_index): # default asarray gives the same underlying data (for tz naive) result = np.asarray(arr) - expected = arr._data + expected = arr._ndarray assert result is expected tm.assert_numpy_array_equal(result, expected) result = np.array(arr, copy=False) @@ -641,7 +641,7 @@ def test_array_interface(self, datetime_index): # specifying M8[ns] gives the same result as default result = np.asarray(arr, dtype="datetime64[ns]") - expected = arr._data + expected = arr._ndarray assert result is expected tm.assert_numpy_array_equal(result, expected) result = np.array(arr, dtype="datetime64[ns]", copy=False) @@ -720,13 +720,13 @@ def test_array_i8_dtype(self, arr1d): assert result.base is None def test_from_array_keeps_base(self): - # Ensure that DatetimeArray._data.base isn't lost. + # Ensure that DatetimeArray._ndarray.base isn't lost. arr = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]") dta = DatetimeArray(arr) - assert dta._data is arr + assert dta._ndarray is arr dta = DatetimeArray(arr[:0]) - assert dta._data.base is arr + assert dta._ndarray.base is arr def test_from_dti(self, arr1d): arr = arr1d @@ -941,7 +941,7 @@ def test_array_interface(self, timedelta_index): # default asarray gives the same underlying data result = np.asarray(arr) - expected = arr._data + expected = arr._ndarray assert result is expected tm.assert_numpy_array_equal(result, expected) result = np.array(arr, copy=False) @@ -950,7 +950,7 @@ def test_array_interface(self, timedelta_index): # specifying m8[ns] gives the same result as default result = np.asarray(arr, dtype="timedelta64[ns]") - expected = arr._data + expected = arr._ndarray assert result is expected tm.assert_numpy_array_equal(result, expected) result = np.array(arr, dtype="timedelta64[ns]", copy=False) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 89c9ba85fcfa9..cd58afe368960 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -659,7 +659,7 @@ def test_shift_fill_value(self): dti = pd.date_range("2016-01-01", periods=3) dta = dti._data - expected = DatetimeArray(np.roll(dta._data, 1)) + expected = DatetimeArray(np.roll(dta._ndarray, 1)) fv = dta[-1] for fill_value in [fv, fv.to_pydatetime(), fv.to_datetime64()]: diff --git a/pandas/tests/arrays/timedeltas/test_constructors.py b/pandas/tests/arrays/timedeltas/test_constructors.py index d24fabfeecb26..3a076a6828a98 100644 --- a/pandas/tests/arrays/timedeltas/test_constructors.py +++ b/pandas/tests/arrays/timedeltas/test_constructors.py @@ -51,11 +51,11 @@ def test_incorrect_dtype_raises(self): def test_copy(self): data = np.array([1, 2, 3], dtype="m8[ns]") arr = TimedeltaArray(data, copy=False) - assert arr._data is data + assert arr._ndarray is data arr = TimedeltaArray(data, copy=True) - assert arr._data is not data - assert arr._data.base is not data + assert arr._ndarray is not data + assert arr._ndarray.base is not data def test_from_sequence_dtype(self): msg = "dtype .*object.* cannot be converted to timedelta64" diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 703ac6c89fca8..f244b348c6763 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -237,11 +237,11 @@ def test_numpy_array_all_dtypes(any_numpy_dtype): "arr, attr", [ (pd.Categorical(["a", "b"]), "_codes"), - (pd.core.arrays.period_array(["2000", "2001"], freq="D"), "_data"), + (pd.core.arrays.period_array(["2000", "2001"], freq="D"), "_ndarray"), (pd.array([0, np.nan], dtype="Int64"), "_data"), (IntervalArray.from_breaks([0, 1]), "_left"), (SparseArray([0, 1]), "_sparse_values"), - (DatetimeArray(np.array([1, 2], dtype="datetime64[ns]")), "_data"), + (DatetimeArray(np.array([1, 2], dtype="datetime64[ns]")), "_ndarray"), # tz-aware Datetime ( DatetimeArray( @@ -250,20 +250,14 @@ def test_numpy_array_all_dtypes(any_numpy_dtype): ), dtype=DatetimeTZDtype(tz="US/Central"), ), - "_data", + "_ndarray", ), ], ) def test_array(arr, attr, index_or_series, request): box = index_or_series - warn = None - if arr.dtype.name in ("Sparse[int64, 0]") and box is pd.Index: - mark = pytest.mark.xfail(reason="Index cannot yet store sparse dtype") - request.node.add_marker(mark) - warn = FutureWarning - with tm.assert_produces_warning(warn): - result = box(arr, copy=False).array + result = box(arr, copy=False).array if attr: arr = getattr(arr, attr) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index e6f1675bb8bc8..eb6ad4b575414 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -443,15 +443,12 @@ def test_reduce_series( if not pa.types.is_boolean(pa_dtype): request.node.add_marker(xfail_mark) op_name = all_boolean_reductions - s = pd.Series(data) - result = getattr(s, op_name)(skipna=skipna) + ser = pd.Series(data) + result = getattr(ser, op_name)(skipna=skipna) assert result is (op_name == "any") class TestBaseGroupby(base.BaseGroupbyTests): - def test_groupby_agg_extension(self, data_for_grouping, request): - super().test_groupby_agg_extension(data_for_grouping) - def test_groupby_extension_no_sort(self, data_for_grouping, request): pa_dtype = data_for_grouping.dtype.pyarrow_dtype if pa.types.is_boolean(pa_dtype): @@ -515,9 +512,6 @@ def test_in_numeric_groupby(self, data_for_grouping, request): ) super().test_in_numeric_groupby(data_for_grouping) - @pytest.mark.filterwarnings( - "ignore:The default value of numeric_only:FutureWarning" - ) @pytest.mark.parametrize("as_index", [True, False]) def test_groupby_extension_agg(self, as_index, data_for_grouping, request): pa_dtype = data_for_grouping.dtype.pyarrow_dtype @@ -638,15 +632,17 @@ class TestBaseIndex(base.BaseIndexTests): class TestBaseInterface(base.BaseInterfaceTests): - @pytest.mark.xfail(reason="pyarrow.ChunkedArray does not support views.") + @pytest.mark.xfail(reason="GH 45419: pyarrow.ChunkedArray does not support views.") def test_view(self, data): super().test_view(data) class TestBaseMissing(base.BaseMissingTests): - @pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning") def test_dropna_array(self, data_missing): - super().test_dropna_array(data_missing) + with tm.maybe_produces_warning( + PerformanceWarning, pa_version_under6p0, check_stacklevel=False + ): + super().test_dropna_array(data_missing) def test_fillna_no_op_returns_copy(self, data): with tm.maybe_produces_warning( @@ -949,14 +945,26 @@ def test_combine_le(self, data_repeated): def test_combine_add(self, data_repeated, request): pa_dtype = next(data_repeated(1)).dtype.pyarrow_dtype - if pa.types.is_temporal(pa_dtype): - request.node.add_marker( - pytest.mark.xfail( - raises=TypeError, - reason=f"{pa_dtype} cannot be added to {pa_dtype}", - ) - ) - super().test_combine_add(data_repeated) + if pa.types.is_duration(pa_dtype): + # TODO: this fails on the scalar addition constructing 'expected' + # but not in the actual 'combine' call, so may be salvage-able + mark = pytest.mark.xfail( + raises=TypeError, + reason=f"{pa_dtype} cannot be added to {pa_dtype}", + ) + request.node.add_marker(mark) + super().test_combine_add(data_repeated) + + elif pa.types.is_temporal(pa_dtype): + # analogous to datetime64, these cannot be added + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + with pytest.raises(TypeError): + s1.combine(s2, lambda x1, x2: x1 + x2) + + else: + super().test_combine_add(data_repeated) def test_searchsorted(self, data_for_sorting, as_series, request): pa_dtype = data_for_sorting.dtype.pyarrow_dtype diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 8331bed881ce1..e27f9fe9995ad 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -340,8 +340,8 @@ def test_setitem_dt64tz(self, timezone_frame): v1 = df._mgr.arrays[1] v2 = df._mgr.arrays[2] tm.assert_extension_array_equal(v1, v2) - v1base = v1._data.base - v2base = v2._data.base + v1base = v1._ndarray.base + v2base = v2._ndarray.base assert v1base is None or (id(v1base) != id(v2base)) # with nan diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py index 5f648c76d0aa4..b533fc12f4a79 100644 --- a/pandas/tests/frame/methods/test_rank.py +++ b/pandas/tests/frame/methods/test_rank.py @@ -247,7 +247,6 @@ def test_rank_methods_frame(self): tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("dtype", ["O", "f8", "i8"]) - @pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") def test_rank_descending(self, method, dtype): if "i" in dtype: df = self.df.dropna().astype(dtype) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 8aedac036c2c9..ca1c7b8d71071 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1185,7 +1185,6 @@ def test_zero_len_frame_with_series_corner_cases(): tm.assert_frame_equal(result, expected) -@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") def test_frame_single_columns_object_sum_axis_1(): # GH 13758 data = { diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 6c6a923e363ae..4be754994bb28 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -145,7 +145,6 @@ class TestDataFrameAnalytics: # --------------------------------------------------------------------- # Reductions - @pytest.mark.filterwarnings("ignore:Dropping of nuisance:FutureWarning") @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize( "opname", @@ -186,7 +185,6 @@ def test_stat_op_api_float_string_frame(self, float_string_frame, axis, opname): if opname != "nunique": getattr(float_string_frame, opname)(axis=axis, numeric_only=True) - @pytest.mark.filterwarnings("ignore:Dropping of nuisance:FutureWarning") @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize( "opname", @@ -283,9 +281,6 @@ def kurt(x): assert_stat_op_calc("skew", skewness, float_frame_with_na) assert_stat_op_calc("kurt", kurt, float_frame_with_na) - # TODO: Ensure warning isn't emitted in the first place - # ignore mean of empty slice and all-NaN - @pytest.mark.filterwarnings("ignore::RuntimeWarning") def test_median(self, float_frame_with_na, int_frame): def wrapper(x): if isna(x).any(): diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 86c8e36cb7bd4..4664052196797 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -286,7 +286,9 @@ def test_repr_column_name_unicode_truncation_bug(self): with option_context("display.max_columns", 20): assert "StringCol" in repr(df) - @pytest.mark.filterwarnings("ignore::FutureWarning") + @pytest.mark.filterwarnings( + "ignore:.*DataFrame.to_latex` is expected to utilise:FutureWarning" + ) def test_latex_repr(self): result = r"""\begin{tabular}{llll} \toprule diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index a06304af7a2d0..f1adff58325ce 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -577,7 +577,6 @@ def stretch(row): assert not isinstance(result, tm.SubclassedDataFrame) tm.assert_series_equal(result, expected) - @pytest.mark.filterwarnings("ignore:.*None will no longer:FutureWarning") def test_subclassed_reductions(self, all_reductions): # GH 25596 diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 03b917edd357b..b1a4eb3fb7dd8 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1075,7 +1075,6 @@ def test_mangle_series_groupby(self): tm.assert_frame_equal(result, expected) @pytest.mark.xfail(reason="GH-26611. kwargs for multi-agg.") - @pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning") def test_with_kwargs(self): f1 = lambda x, y, b=1: x.sum() + y + b f2 = lambda x, y, b=2: x.sum() + y * b diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 6a89c72354d04..eb667016b1e62 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -25,7 +25,6 @@ from pandas.io.formats.printing import pprint_thing -@pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning") def test_agg_partial_failure_raises(): # GH#43741 diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py index 5a130da4937fd..1d18e7dc6c2cf 100644 --- a/pandas/tests/groupby/test_allowlist.py +++ b/pandas/tests/groupby/test_allowlist.py @@ -70,7 +70,6 @@ def raw_frame(): @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("sort", [True, False]) -@pytest.mark.filterwarnings("ignore:The default value of numeric_only:FutureWarning") def test_regression_allowlist_methods(raw_frame, op, axis, skipna, sort): # GH6944 # GH 17537 diff --git a/pandas/tests/groupby/test_any_all.py b/pandas/tests/groupby/test_any_all.py index 3f61a4ece66c0..e49238a9e6656 100644 --- a/pandas/tests/groupby/test_any_all.py +++ b/pandas/tests/groupby/test_any_all.py @@ -171,7 +171,6 @@ def test_object_type_missing_vals(bool_agg_func, data, expected_res, frame_or_se tm.assert_equal(result, expected) -@pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning") @pytest.mark.parametrize("bool_agg_func", ["any", "all"]) def test_object_NA_raises_with_skipna_false(bool_agg_func): # GH#37501 diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index b35c4158bf420..b9e2bba0b0d31 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -311,7 +311,6 @@ def test_apply(ordered): tm.assert_series_equal(result, expected) -@pytest.mark.filterwarnings("ignore:.*value of numeric_only.*:FutureWarning") def test_observed(observed): # multiple groupers, don't re-expand the output space # of the grouper @@ -1316,7 +1315,6 @@ def test_groupby_categorical_axis_1(code): tm.assert_frame_equal(result, expected) -@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") def test_groupby_cat_preserves_structure(observed, ordered): # GH 28787 df = DataFrame( diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index ef39aabd83d22..7e5bfff53054a 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -137,9 +137,6 @@ def df(self): ) return df - @pytest.mark.filterwarnings( - "ignore:The default value of numeric_only:FutureWarning" - ) @pytest.mark.parametrize("method", ["mean", "median"]) def test_averages(self, df, method): # mean / median @@ -217,9 +214,6 @@ def test_first_last(self, df, method): self._check(df, method, expected_columns, expected_columns_numeric) - @pytest.mark.filterwarnings( - "ignore:The default value of numeric_only:FutureWarning" - ) @pytest.mark.parametrize("method", ["sum", "cumsum"]) def test_sum_cumsum(self, df, method): @@ -233,9 +227,6 @@ def test_sum_cumsum(self, df, method): self._check(df, method, expected_columns, expected_columns_numeric) - @pytest.mark.filterwarnings( - "ignore:The default value of numeric_only:FutureWarning" - ) @pytest.mark.parametrize("method", ["prod", "cumprod"]) def test_prod_cumprod(self, df, method): @@ -496,7 +487,6 @@ def test_groupby_non_arithmetic_agg_int_like_precision(i): ], ) @pytest.mark.parametrize("numeric_only", [True, False]) -@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") def test_idxmin_idxmax_returns_int_types(func, values, numeric_only): # GH 25444 df = DataFrame( @@ -1610,7 +1600,6 @@ def test_corrwith_with_1_axis(): tm.assert_series_equal(result, expected) -@pytest.mark.filterwarnings("ignore:.* is deprecated:FutureWarning") def test_multiindex_group_all_columns_when_empty(groupby_func): # GH 32464 df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"]) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index a7104c2e21049..59a8141be7db4 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1263,8 +1263,6 @@ def test_groupby_mixed_type_columns(): tm.assert_frame_equal(result, expected) -# TODO: Ensure warning isn't emitted in the first place -@pytest.mark.filterwarnings("ignore:Mean of:RuntimeWarning") def test_cython_grouper_series_bug_noncontig(): arr = np.empty((100, 100)) arr.fill(np.nan) @@ -1879,9 +1877,6 @@ def test_pivot_table_values_key_error(): @pytest.mark.parametrize( "op", ["idxmax", "idxmin", "min", "max", "sum", "prod", "skew"] ) -@pytest.mark.filterwarnings("ignore:The default value of numeric_only:FutureWarning") -@pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning") -@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") def test_empty_groupby(columns, keys, values, method, op, request, using_array_manager): # GH8093 & GH26411 override_dtype = None diff --git a/pandas/tests/indexes/timedeltas/test_constructors.py b/pandas/tests/indexes/timedeltas/test_constructors.py index 5c23d1dfd83c8..73b742591cd10 100644 --- a/pandas/tests/indexes/timedeltas/test_constructors.py +++ b/pandas/tests/indexes/timedeltas/test_constructors.py @@ -47,7 +47,7 @@ def test_int64_nocopy(self): # and copy=False arr = np.arange(10, dtype=np.int64) tdi = TimedeltaIndex(arr, copy=False) - assert tdi._data._data.base is arr + assert tdi._data._ndarray.base is arr def test_infer_from_tdi(self): # GH#23539 diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index b3e59da4b0130..727d0bade2c2c 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -883,7 +883,7 @@ def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer_sli): if tz is None: # TODO(EA2D): we can make this no-copy in tz-naive case too assert ser.dtype == dti.dtype - assert ser._values._data is values._data + assert ser._values._ndarray is values._ndarray else: assert ser._values is values @@ -911,7 +911,7 @@ def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer_sli, key, bo if tz is None: # TODO(EA2D): we can make this no-copy in tz-naive case too assert ser.dtype == dti.dtype - assert ser._values._data is values._data + assert ser._values._ndarray is values._ndarray else: assert ser._values is values @@ -925,7 +925,7 @@ def test_setitem_td64_scalar(self, indexer_sli, scalar): values._validate_setitem_value(scalar) indexer_sli(ser)[0] = scalar - assert ser._values._data is values._data + assert ser._values._ndarray is values._ndarray @pytest.mark.parametrize("box", [list, np.array, pd.array, pd.Categorical, Index]) @pytest.mark.parametrize( @@ -945,7 +945,7 @@ def test_setitem_td64_string_values(self, indexer_sli, key, box): values._validate_setitem_value(newvals) indexer_sli(ser)[key] = newvals - assert ser._values._data is values._data + assert ser._values._ndarray is values._ndarray def test_extension_array_cross_section(): diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index ecf247efd74bf..8d2d165e991f5 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -469,7 +469,7 @@ def test_copy(self, mgr): assert cp_blk.values.base is blk.values.base else: # DatetimeTZBlock has DatetimeIndex values - assert cp_blk.values._data.base is blk.values._data.base + assert cp_blk.values._ndarray.base is blk.values._ndarray.base # copy(deep=True) consolidates, so the block-wise assertions will # fail is mgr is not consolidated diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index eaeb769a94c38..e58df00c65608 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -10,10 +10,6 @@ pyarrow = pytest.importorskip("pyarrow", minversion="1.0.1") -filter_sparse = pytest.mark.filterwarnings("ignore:The Sparse") - - -@filter_sparse @pytest.mark.single_cpu class TestFeather: def check_error_on_write(self, df, exc, err_msg): diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index faaa61e84a351..86fabf0ed0ef2 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -471,7 +471,7 @@ def test_getitem_boolean_dt64_copies(self): ser = Series(dti._data) res = ser[key] - assert res._values._data.base is None + assert res._values._ndarray.base is None # compare with numeric case for reference ser2 = Series(range(4)) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 7d77a755e082b..d731aeee1b39b 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -418,14 +418,14 @@ def test_setitem_invalidates_datetime_index_freq(self): ts = dti[1] ser = Series(dti) assert ser._values is not dti - assert ser._values._data.base is not dti._data._data.base + assert ser._values._ndarray.base is not dti._data._ndarray.base assert dti.freq == "D" ser.iloc[1] = NaT assert ser._values.freq is None # check that the DatetimeIndex was not altered in place assert ser._values is not dti - assert ser._values._data.base is not dti._data._data.base + assert ser._values._ndarray.base is not dti._data._ndarray.base assert dti[1] == ts assert dti.freq == "D" @@ -435,9 +435,9 @@ def test_dt64tz_setitem_does_not_mutate_dti(self): ts = dti[0] ser = Series(dti) assert ser._values is not dti - assert ser._values._data.base is not dti._data._data.base + assert ser._values._ndarray.base is not dti._data._ndarray.base assert ser._mgr.arrays[0] is not dti - assert ser._mgr.arrays[0]._data.base is not dti._data._data.base + assert ser._mgr.arrays[0]._ndarray.base is not dti._data._ndarray.base ser[::3] = NaT assert ser[0] is NaT diff --git a/pandas/tests/series/indexing/test_xs.py b/pandas/tests/series/indexing/test_xs.py index aaccad0f2bd70..a67f3ec708f24 100644 --- a/pandas/tests/series/indexing/test_xs.py +++ b/pandas/tests/series/indexing/test_xs.py @@ -11,7 +11,7 @@ def test_xs_datetimelike_wrapping(): # GH#31630 a case where we shouldn't wrap datetime64 in Timestamp - arr = date_range("2016-01-01", periods=3)._data._data + arr = date_range("2016-01-01", periods=3)._data._ndarray ser = Series(arr, dtype=object) for i in range(len(ser)): diff --git a/pandas/tests/util/test_show_versions.py b/pandas/tests/util/test_show_versions.py index 99c7e0a1a8956..439971084fba8 100644 --- a/pandas/tests/util/test_show_versions.py +++ b/pandas/tests/util/test_show_versions.py @@ -20,14 +20,6 @@ # html5lib "ignore:Using or importing the ABCs from:DeprecationWarning" ) -@pytest.mark.filterwarnings( - # fastparquet - "ignore:pandas.core.index is deprecated:FutureWarning" -) -@pytest.mark.filterwarnings( - # pandas_datareader - "ignore:pandas.util.testing is deprecated:FutureWarning" -) @pytest.mark.filterwarnings( # https://github.com/pandas-dev/pandas/issues/35252 "ignore:Distutils:UserWarning"