From 6d7be3c6c91ea0a253cf45efd8a078a96f74cb0d Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 24 Dec 2022 16:31:03 -0800 Subject: [PATCH 01/10] CLN: Assorted --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/_libs/intervaltree.pxi.in | 3 +- pandas/_libs/tslibs/offsets.pyi | 3 ++ pandas/_libs/tslibs/util.pxd | 2 +- pandas/core/accessor.py | 2 ++ pandas/core/apply.py | 2 +- pandas/core/arrays/datetimelike.py | 22 +++++------- pandas/core/arrays/interval.py | 2 ++ pandas/core/frame.py | 4 ++- pandas/core/groupby/generic.py | 2 +- pandas/core/groupby/groupby.py | 10 ++---- pandas/core/indexes/multi.py | 2 +- pandas/core/indexing.py | 2 +- pandas/core/internals/array_manager.py | 2 -- pandas/core/internals/blocks.py | 18 +++------- pandas/core/series.py | 1 - pandas/core/tools/datetimes.py | 5 +-- pandas/plotting/_matplotlib/core.py | 2 +- pandas/plotting/_matplotlib/hist.py | 2 +- pandas/tests/arithmetic/test_timedelta64.py | 2 +- pandas/tests/arrays/test_datetimes.py | 18 +++------- pandas/tests/arrays/test_timedeltas.py | 17 ++-------- pandas/tests/frame/indexing/test_indexing.py | 1 + pandas/tests/indexes/test_base.py | 1 - pandas/tests/reshape/concat/test_concat.py | 2 +- .../scalar/timedelta/test_constructors.py | 25 +++++++------- .../tests/scalar/timedelta/test_timedelta.py | 16 ++++----- .../tests/scalar/timestamp/test_timestamp.py | 34 +++++++++---------- .../tests/scalar/timestamp/test_timezones.py | 7 ++-- .../tests/scalar/timestamp/test_unary_ops.py | 12 +++---- pandas/tests/tools/test_to_datetime.py | 3 +- 31 files changed, 91 insertions(+), 135 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 12b0d90e68ab9..fa8c76dab1fd9 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -477,7 +477,7 @@ Other API changes - Passing a sequence containing a type that cannot be converted to :class:`Timedelta` to :func:`to_timedelta` or to the :class:`Series` or :class:`DataFrame` constructor with ``dtype="timedelta64[ns]"`` or to :class:`TimedeltaIndex` now raises ``TypeError`` instead of ``ValueError`` (:issue:`49525`) - Changed behavior of :class:`Index` constructor with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`) - :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default) will now set the index on the returned :class:`DataFrame` to a :class:`RangeIndex` instead of a :class:`Int64Index` (:issue:`49745`) -- Changed behavior of :class:`Index`, :class:`Series`, and :class:`DataFrame` arithmetic methods when working with object-dtypes, the results no longer do type inference on the result of the array operations, use ``result.infer_objects()`` to do type inference on the result (:issue:`49999`) +- Changed behavior of :class:`Index`, :class:`Series`, and :class:`DataFrame` arithmetic methods when working with object-dtypes, the results no longer do type inference on the result of the array operations, use ``result.infer_objects(copy=False)`` to do type inference on the result (:issue:`49999`) - Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`) - Changed behavior of :class:`Series` and :class:`DataFrame` constructors when given an integer dtype and floating-point data that is not round numbers, this now raises ``ValueError`` instead of silently retaining the float dtype; do ``Series(data)`` or ``DataFrame(data)`` to get the old behavior, and ``Series(data).astype(dtype)`` or ``DataFrame(data).astype(dtype)`` to get the specified dtype (:issue:`49599`) - Changed behavior of :meth:`DataFrame.shift` with ``axis=1``, an integer ``fill_value``, and homogeneous datetime-like dtype, this now fills new columns with integer dtypes instead of casting to datetimelike (:issue:`49842`) diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index 0d7c96a6f2f2b..67fee7c5fbadd 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -121,9 +121,8 @@ cdef class IntervalTree(IntervalMixin): """ if self._na_count > 0: return False - values = [self.right, self.left] - sort_order = np.lexsort(values) + sort_order = self.left_sorter return is_monotonic(sort_order, False)[0] def get_indexer(self, scalar_t[:] target) -> np.ndarray: diff --git a/pandas/_libs/tslibs/offsets.pyi b/pandas/_libs/tslibs/offsets.pyi index eacdf17b0b4d3..f1aca4717665c 100644 --- a/pandas/_libs/tslibs/offsets.pyi +++ b/pandas/_libs/tslibs/offsets.pyi @@ -12,6 +12,7 @@ from typing import ( import numpy as np +from pandas._libs.tslibs.nattype import NaTType from pandas._typing import npt from .timedeltas import Timedelta @@ -51,6 +52,8 @@ class BaseOffset: def __radd__(self, other: _DatetimeT) -> _DatetimeT: ... @overload def __radd__(self, other: _TimedeltaT) -> _TimedeltaT: ... + @overload + def __radd__(self, other: NaTType) -> NaTType: ... def __sub__(self: _BaseOffsetT, other: BaseOffset) -> _BaseOffsetT: ... @overload def __rsub__(self, other: npt.NDArray[np.object_]) -> npt.NDArray[np.object_]: ... diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index a28aace5d2f15..d8bc9363f1a23 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -83,7 +83,7 @@ cdef inline bint is_integer_object(object obj) nogil: cdef inline bint is_float_object(object obj) nogil: """ - Cython equivalent of `isinstance(val, (float, np.complex_))` + Cython equivalent of `isinstance(val, (float, np.float_))` Parameters ---------- diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 87d9c39b0407c..7390b04da4787 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -6,6 +6,7 @@ """ from __future__ import annotations +from typing import final import warnings from pandas.util._decorators import doc @@ -16,6 +17,7 @@ class DirNamesMixin: _accessors: set[str] = set() _hidden_attrs: frozenset[str] = frozenset() + @final def _dir_deletions(self) -> set[str]: """ Delete unwanted __dir__ for this object. diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 722de91ba5246..02a9444dd4f97 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -955,7 +955,7 @@ def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame: result.index = res_index # infer dtypes - result = result.infer_objects() + result = result.infer_objects(copy=False) return result diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 63940741c3fe3..1b0cffc824009 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -189,7 +189,7 @@ class DatetimeLikeArrayMixin(OpsMixin, NDArrayBackedExtensionArray): Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray Assumes that __new__/__init__ defines: - _data + _ndarray _freq and that the inheriting class has methods: @@ -1422,9 +1422,8 @@ def __add__(self, other): # as is_integer returns True for these if not is_period_dtype(self.dtype): raise integer_op_not_supported(self) - result = cast("PeriodArray", self)._addsub_int_array_or_scalar( - other * self.freq.n, operator.add - ) + obj = cast("PeriodArray", self) + result = obj._addsub_int_array_or_scalar(other * obj.freq.n, operator.add) # array-like others elif is_timedelta64_dtype(other_dtype): @@ -1439,9 +1438,8 @@ def __add__(self, other): elif is_integer_dtype(other_dtype): if not is_period_dtype(self.dtype): raise integer_op_not_supported(self) - result = cast("PeriodArray", self)._addsub_int_array_or_scalar( - other * self.freq.n, operator.add - ) + obj = cast("PeriodArray", self) + result = obj._addsub_int_array_or_scalar(other * obj.freq.n, operator.add) else: # Includes Categorical, other ExtensionArrays # For PeriodDtype, if self is a TimedeltaArray and other is a @@ -1481,9 +1479,8 @@ def __sub__(self, other): # as is_integer returns True for these if not is_period_dtype(self.dtype): raise integer_op_not_supported(self) - result = cast("PeriodArray", self)._addsub_int_array_or_scalar( - other * self.freq.n, operator.sub - ) + obj = cast("PeriodArray", self) + result = obj._addsub_int_array_or_scalar(other * obj.freq.n, operator.sub) elif isinstance(other, Period): result = self._sub_periodlike(other) @@ -1504,9 +1501,8 @@ def __sub__(self, other): elif is_integer_dtype(other_dtype): if not is_period_dtype(self.dtype): raise integer_op_not_supported(self) - result = cast("PeriodArray", self)._addsub_int_array_or_scalar( - other * self.freq.n, operator.sub - ) + obj = cast("PeriodArray", self) + result = obj._addsub_int_array_or_scalar(other * obj.freq.n, operator.sub) else: # Includes ExtensionArrays, float_dtype return NotImplemented diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 3c6686b5c0173..f7107a1f7c83c 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -812,6 +812,8 @@ def argsort( ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs) if ascending and kind == "quicksort" and na_position == "last": + # TODO: in an IntervalIndex we can re-use the cached + # IntervalTree.left_sorter return np.lexsort((self.right, self.left)) # TODO: other cases we can use lexsort for? much more performant. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 21b3a0c033702..e085bb1c7cec0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9423,7 +9423,9 @@ def _append( row_df = other.to_frame().T # infer_objects is needed for # test_append_empty_frame_to_series_with_dateutil_tz - other = row_df.infer_objects().rename_axis(index.names, copy=False) + other = row_df.infer_objects(copy=False).rename_axis( + index.names, copy=False + ) elif isinstance(other, list): if not other: pass diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 955f65585963d..d85cf7e025413 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1762,7 +1762,7 @@ def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: result = result.T # Note: we really only care about inferring numeric dtypes here - return self._reindex_output(result).infer_objects() + return self._reindex_output(result).infer_objects(copy=False) def _iterate_column_groupbys(self, obj: DataFrame | Series): for i, colname in enumerate(obj.columns): diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 11e8769615470..70cf9c144c4a8 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -32,7 +32,6 @@ class providing the base-class of operations. cast, final, ) -import warnings import numpy as np @@ -2199,13 +2198,8 @@ def sem(self, ddof: int = 1, numeric_only: bool = False): counts = self.count() result_ilocs = result.columns.get_indexer_for(cols) count_ilocs = counts.columns.get_indexer_for(cols) - with warnings.catch_warnings(): - # TODO(2.0): once iloc[:, foo] = bar depecation is enforced, - # this catching will be unnecessary - warnings.filterwarnings( - "ignore", ".*will attempt to set the values inplace.*" - ) - result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs]) + + result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs]) return result @final diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 48cf6000d100d..4be3d0c17c50d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3379,7 +3379,7 @@ def _reorder_indexer( new_order = np.arange(n)[::-1][indexer] elif isinstance(k, slice) and k.start is None and k.stop is None: # slice(None) should not determine order GH#31330 - new_order = np.ones((n,))[indexer] + new_order = np.ones((n,), dtype=np.intp)[indexer] else: # For all other case, use the same order as the level new_order = np.arange(n)[indexer] diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index fa702770a0990..a49f573e29a89 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2162,7 +2162,7 @@ def _setitem_with_indexer_missing(self, indexer, value): if not has_dtype: # i.e. if we already had a Series or ndarray, keep that # dtype. But if we had a list or dict, then do inference - df = df.infer_objects() + df = df.infer_objects(copy=False) self.obj._mgr = df._mgr else: self.obj._mgr = self.obj._append(value)._mgr diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index c06b6c7a9a651..b8ef925362e7b 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -863,8 +863,6 @@ def column_setitem( This is a method on the ArrayManager level, to avoid creating an intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) - - """ if not is_integer(loc): raise TypeError("The column index should be an integer") diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 713413500f64c..f65722ac9685b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -325,6 +325,7 @@ def apply(self, func, **kwargs) -> list[Block]: return self._split_op_result(result) + @final def reduce(self, func) -> list[Block]: # We will apply the function and reshape the result into a single-row # Block with the same mgr_locs; squeezing will be done at a higher level @@ -1957,19 +1958,6 @@ class ObjectBlock(NumpyBlock): __slots__ = () is_object = True - def reduce(self, func) -> list[Block]: - """ - For object-dtype, we operate column-wise. - """ - assert self.ndim == 2 - - res = func(self.values) - - assert isinstance(res, np.ndarray) - assert res.ndim == 1 - res = res.reshape(-1, 1) - return [self.make_block_same_class(res)] - @maybe_split def convert( self, @@ -1980,7 +1968,9 @@ def convert( attempt to cast any object types to better types return a copy of the block (if copy = True) by definition we ARE an ObjectBlock!!!!! """ - if self.dtype != object: + if self.dtype != _dtype_obj: + # GH#50067 this should be impossible in ObjectBlock, but until + # that is fixed, we short-circuit here. return [self] values = self.values diff --git a/pandas/core/series.py b/pandas/core/series.py index 1bdf92e1dcf02..d08194de24561 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -856,7 +856,6 @@ def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray: # coercion __float__ = _coerce_method(float) - __long__ = _coerce_method(int) __int__ = _coerce_method(int) # ---------------------------------------------------------------------- diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index a97a866a8406e..6ce2ccb3a2925 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -398,10 +398,7 @@ def _convert_listlike_datetimes( elif is_datetime64_ns_dtype(arg_dtype): if not isinstance(arg, (DatetimeArray, DatetimeIndex)): - try: - return DatetimeIndex(arg, tz=tz, name=name) - except ValueError: - pass + return DatetimeIndex(arg, tz=tz, name=name) elif utc: # DatetimeArray, DatetimeIndex return arg.tz_localize("utc") diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 3a634a60e784e..1d7f63c1e2f64 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -602,7 +602,7 @@ def _compute_plot_data(self): # GH16953, infer_objects is needed as fallback, for ``Series`` # with ``dtype == object`` - data = data.infer_objects() + data = data.infer_objects(copy=False) include_type = [np.number, "datetime", "datetimetz", "timedelta"] # GH23719, allow plotting boolean diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 1add485e03760..aca33d27eaff2 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -80,7 +80,7 @@ def _args_adjust(self) -> None: def _calculate_bins(self, data: DataFrame) -> np.ndarray: """Calculate bins given data""" - nd_values = data.infer_objects()._get_numeric_data() + nd_values = data.infer_objects(copy=False)._get_numeric_data() values = np.ravel(nd_values) values = values[~isna(values)] diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index f3ea741607692..c29b96f3722d1 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1720,7 +1720,7 @@ def test_td64arr_floordiv_td64arr_with_nat( expected = np.array([1.0, 1.0, np.nan], dtype=np.float64) expected = tm.box_expected(expected, xbox) if box is DataFrame and using_array_manager: - # INFO(ArrayManager) floorfiv returns integer, and ArrayManager + # INFO(ArrayManager) floordiv returns integer, and ArrayManager # performs ops column-wise and thus preserves int64 dtype for # columns without missing values expected[[0, 1]] = expected[[0, 1]].astype("int64") diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index f9c32108f0ef0..d9abaf85544af 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -16,7 +16,6 @@ npy_unit_to_abbrev, tz_compare, ) -from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas.core.dtypes.dtypes import DatetimeTZDtype @@ -34,15 +33,6 @@ def unit(self, request): """Fixture returning parametrized time units""" return request.param - @pytest.fixture - def reso(self, unit): - """Fixture returning datetime resolution for a given time unit""" - return { - "s": NpyDatetimeUnit.NPY_FR_s.value, - "ms": NpyDatetimeUnit.NPY_FR_ms.value, - "us": NpyDatetimeUnit.NPY_FR_us.value, - }[unit] - @pytest.fixture def dtype(self, unit, tz_naive_fixture): tz = tz_naive_fixture @@ -71,19 +61,19 @@ def dta(self, dta_dti): dta, dti = dta_dti return dta - def test_non_nano(self, unit, reso, dtype): + def test_non_nano(self, unit, dtype): arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]") dta = DatetimeArray._simple_new(arr, dtype=dtype) assert dta.dtype == dtype - assert dta[0]._creso == reso + assert dta[0].unit == unit assert tz_compare(dta.tz, dta[0].tz) assert (dta[0] == dta[:1]).all() @pytest.mark.parametrize( "field", DatetimeArray._field_ops + DatetimeArray._bool_ops ) - def test_fields(self, unit, reso, field, dtype, dta_dti): + def test_fields(self, unit, field, dtype, dta_dti): dta, dti = dta_dti assert (dti == dta).all() @@ -166,7 +156,7 @@ def test_time_date(self, dta_dti, meth): expected = getattr(dti, meth) tm.assert_numpy_array_equal(result, expected) - def test_format_native_types(self, unit, reso, dtype, dta_dti): + def test_format_native_types(self, unit, dtype, dta_dti): # In this case we should get the same formatted values with our nano # version dti._data as we do with the non-nano dta dta, dti = dta_dti diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 2fd7ccc9cf338..0c7e3ffffb3ac 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas._libs.tslibs.dtypes import NpyDatetimeUnit - import pandas as pd from pandas import Timedelta import pandas._testing as tm @@ -19,28 +17,17 @@ class TestNonNano: def unit(self, request): return request.param - @pytest.fixture - def reso(self, unit): - if unit == "s": - return NpyDatetimeUnit.NPY_FR_s.value - elif unit == "ms": - return NpyDatetimeUnit.NPY_FR_ms.value - elif unit == "us": - return NpyDatetimeUnit.NPY_FR_us.value - else: - raise NotImplementedError(unit) - @pytest.fixture def tda(self, unit): arr = np.arange(5, dtype=np.int64).view(f"m8[{unit}]") return TimedeltaArray._simple_new(arr, dtype=arr.dtype) - def test_non_nano(self, unit, reso): + def test_non_nano(self, unit): arr = np.arange(5, dtype=np.int64).view(f"m8[{unit}]") tda = TimedeltaArray._simple_new(arr, dtype=arr.dtype) assert tda.dtype == arr.dtype - assert tda[0]._creso == reso + assert tda[0].unit == unit @pytest.mark.parametrize("field", TimedeltaArray._field_ops) def test_fields(self, tda, field): diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index c0b04b1f8e80f..3b151f3d3338a 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1455,6 +1455,7 @@ def test_iloc_ea_series_indexer_with_na(self): with pytest.raises(ValueError, match=msg): df.iloc[:, indexer.values] + @pytest.mark.filterwarnings("ignore:indexing past lexsort.*:PerformanceWarning") @pytest.mark.parametrize("indexer", [True, (True,)]) @pytest.mark.parametrize("dtype", [bool, "boolean"]) def test_loc_bool_multiindex(self, dtype, indexer): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c2c1073eef36d..969c6059b8d31 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -891,7 +891,6 @@ def test_isin_nan_common_float64(self, nulls_fixture): "index", [ Index(["qux", "baz", "foo", "bar"]), - # float64 Index overrides isin, so must be checked separately NumericIndex([1.0, 2.0, 3.0, 4.0], dtype=np.float64), ], ) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index ea526c95f20e0..3dc6f2404444b 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -745,7 +745,7 @@ def test_concat_retain_attrs(data): @td.skip_array_manager_invalid_test @pytest.mark.parametrize("df_dtype", ["float64", "int64", "datetime64[ns]"]) @pytest.mark.parametrize("empty_dtype", [None, "float64", "object"]) -def test_concat_ignore_emtpy_object_float(empty_dtype, df_dtype): +def test_concat_ignore_empty_object_float(empty_dtype, df_dtype): # https://github.com/pandas-dev/pandas/issues/45637 df = DataFrame({"foo": [1, 2], "bar": [1, 2]}, dtype=df_dtype) empty = DataFrame(columns=["foo", "bar"], dtype=empty_dtype) diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index e4120478370d1..42f51a18c070e 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -5,7 +5,6 @@ import pytest from pandas._libs.tslibs import OutOfBoundsTimedelta -from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas import ( NaT, @@ -45,12 +44,12 @@ def test_from_td64_retain_resolution(): td = Timedelta(obj) assert td.value == obj.view("i8") - assert td._creso == NpyDatetimeUnit.NPY_FR_ms.value + assert td.unit == "ms" # Case where we cast to nearest-supported reso obj2 = np.timedelta64(1234, "D") td2 = Timedelta(obj2) - assert td2._creso == NpyDatetimeUnit.NPY_FR_s.value + assert td2.unit == "s" assert td2 == obj2 assert td2.days == 1234 @@ -58,7 +57,7 @@ def test_from_td64_retain_resolution(): obj3 = np.timedelta64(1000000000000000000, "us") td3 = Timedelta(obj3) assert td3.total_seconds() == 1000000000000 - assert td3._creso == NpyDatetimeUnit.NPY_FR_us.value + assert td3.unit == "us" def test_from_pytimedelta_us_reso(): @@ -66,31 +65,31 @@ def test_from_pytimedelta_us_reso(): td = timedelta(days=4, minutes=3) result = Timedelta(td) assert result.to_pytimedelta() == td - assert result._creso == NpyDatetimeUnit.NPY_FR_us.value + assert result.unit == "us" def test_from_tick_reso(): tick = offsets.Nano() - assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timedelta(tick).unit == "ns" tick = offsets.Micro() - assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_us.value + assert Timedelta(tick).unit == "us" tick = offsets.Milli() - assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_ms.value + assert Timedelta(tick).unit == "ms" tick = offsets.Second() - assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value + assert Timedelta(tick).unit == "s" # everything above Second gets cast to the closest supported reso: second tick = offsets.Minute() - assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value + assert Timedelta(tick).unit == "s" tick = offsets.Hour() - assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value + assert Timedelta(tick).unit == "s" tick = offsets.Day() - assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value + assert Timedelta(tick).unit == "s" def test_construction(): @@ -282,7 +281,7 @@ def test_overflow_on_construction(): # used to overflow before non-ns support td = Timedelta(timedelta(days=13 * 19999)) - assert td._creso == NpyDatetimeUnit.NPY_FR_us.value + assert td.unit == "us" assert td.days == 13 * 19999 diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 924f756edb233..94a810a026b2a 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -33,7 +33,7 @@ def test_as_unit(self): res = td.as_unit("us") assert res.value == td.value // 1000 - assert res._creso == NpyDatetimeUnit.NPY_FR_us.value + assert res.unit == "us" rt = res.as_unit("ns") assert rt.value == td.value @@ -41,7 +41,7 @@ def test_as_unit(self): res = td.as_unit("ms") assert res.value == td.value // 1_000_000 - assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value + assert res.unit == "ms" rt = res.as_unit("ns") assert rt.value == td.value @@ -49,7 +49,7 @@ def test_as_unit(self): res = td.as_unit("s") assert res.value == td.value // 1_000_000_000 - assert res._creso == NpyDatetimeUnit.NPY_FR_s.value + assert res.unit == "s" rt = res.as_unit("ns") assert rt.value == td.value @@ -66,7 +66,7 @@ def test_as_unit_overflows(self): res = td.as_unit("ms") assert res.value == us // 1000 - assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value + assert res.unit == "ms" def test_as_unit_rounding(self): td = Timedelta(microseconds=1500) @@ -75,7 +75,7 @@ def test_as_unit_rounding(self): expected = Timedelta(milliseconds=1) assert res == expected - assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value + assert res.unit == "ms" assert res.value == 1 with pytest.raises(ValueError, match="Cannot losslessly convert units"): @@ -311,13 +311,13 @@ def test_timedelta_class_min_max_resolution(): # when accessed on the class (as opposed to an instance), we default # to nanoseconds assert Timedelta.min == Timedelta(NaT.value + 1) - assert Timedelta.min._creso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timedelta.min.unit == "ns" assert Timedelta.max == Timedelta(np.iinfo(np.int64).max) - assert Timedelta.max._creso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timedelta.max.unit == "ns" assert Timedelta.resolution == Timedelta(1) - assert Timedelta.resolution._creso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timedelta.resolution.unit == "ns" class TestTimedeltaUnaryOps: diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 5446e16c189b0..1ab4f2d8dc3d8 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -874,10 +874,10 @@ def test_sub_datetimelike_mismatched_reso(self, ts_tz): # this construction ensures we get cases with other._creso < ts._creso # and cases with other._creso > ts._creso unit = { - NpyDatetimeUnit.NPY_FR_us.value: "ms", - NpyDatetimeUnit.NPY_FR_ms.value: "s", - NpyDatetimeUnit.NPY_FR_s.value: "us", - }[ts._creso] + "us": "ms", + "ms": "s", + "s": "us", + }[ts.unit] other = ts.as_unit(unit) assert other._creso != ts._creso @@ -922,10 +922,10 @@ def test_sub_timedeltalike_mismatched_reso(self, ts_tz): # this construction ensures we get cases with other._creso < ts._creso # and cases with other._creso > ts._creso unit = { - NpyDatetimeUnit.NPY_FR_us.value: "ms", - NpyDatetimeUnit.NPY_FR_ms.value: "s", - NpyDatetimeUnit.NPY_FR_s.value: "us", - }[ts._creso] + "us": "ms", + "ms": "s", + "s": "us", + }[ts.unit] other = Timedelta(0).as_unit(unit) assert other._creso != ts._creso @@ -973,7 +973,7 @@ def test_sub_timedelta64_mismatched_reso(self, ts_tz): res = ts + np.timedelta64(1, "ns") exp = ts.as_unit("ns") + np.timedelta64(1, "ns") assert exp == res - assert exp._creso == NpyDatetimeUnit.NPY_FR_ns.value + assert exp.unit == "ns" def test_min(self, ts): assert ts.min <= ts @@ -996,13 +996,13 @@ def test_timestamp_class_min_max_resolution(): # when accessed on the class (as opposed to an instance), we default # to nanoseconds assert Timestamp.min == Timestamp(NaT.value + 1) - assert Timestamp.min._creso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timestamp.min.unit == "ns" assert Timestamp.max == Timestamp(np.iinfo(np.int64).max) - assert Timestamp.max._creso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timestamp.max.unit == "ns" assert Timestamp.resolution == Timedelta(1) - assert Timestamp.resolution._creso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timestamp.resolution.unit == "ns" class TestAsUnit: @@ -1013,7 +1013,7 @@ def test_as_unit(self): res = ts.as_unit("us") assert res.value == ts.value // 1000 - assert res._creso == NpyDatetimeUnit.NPY_FR_us.value + assert res.unit == "us" rt = res.as_unit("ns") assert rt.value == ts.value @@ -1021,7 +1021,7 @@ def test_as_unit(self): res = ts.as_unit("ms") assert res.value == ts.value // 1_000_000 - assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value + assert res.unit == "ms" rt = res.as_unit("ns") assert rt.value == ts.value @@ -1029,7 +1029,7 @@ def test_as_unit(self): res = ts.as_unit("s") assert res.value == ts.value // 1_000_000_000 - assert res._creso == NpyDatetimeUnit.NPY_FR_s.value + assert res.unit == "s" rt = res.as_unit("ns") assert rt.value == ts.value @@ -1046,7 +1046,7 @@ def test_as_unit_overflows(self): res = ts.as_unit("ms") assert res.value == us // 1000 - assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value + assert res.unit == "ms" def test_as_unit_rounding(self): ts = Timestamp(1_500_000) # i.e. 1500 microseconds @@ -1055,7 +1055,7 @@ def test_as_unit_rounding(self): expected = Timestamp(1_000_000) # i.e. 1 millisecond assert res == expected - assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value + assert res.unit == "ms" assert res.value == 1 with pytest.raises(ValueError, match="Cannot losslessly convert units"): diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 354a23fc257da..264b39cb49b9a 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -22,7 +22,6 @@ ) from pandas._libs.tslibs import timezones -from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas.errors import OutOfBoundsDatetime import pandas.util._test_decorators as td @@ -92,11 +91,11 @@ def test_tz_localize_ambiguous_bool(self, unit): result = ts.tz_localize("US/Central", ambiguous=True) assert result == expected0 - assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result.unit == unit result = ts.tz_localize("US/Central", ambiguous=False) assert result == expected1 - assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result.unit == unit def test_tz_localize_ambiguous(self): ts = Timestamp("2014-11-02 01:00") @@ -293,7 +292,7 @@ def test_timestamp_tz_localize_nonexistent_shift( assert result == expected.replace(microsecond=0, nanosecond=0) else: assert result == expected - assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result.unit == unit @pytest.mark.parametrize("offset", [-1, 1]) def test_timestamp_tz_localize_nonexistent_shift_invalid(self, offset, warsaw): diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 1c1f3acc8331f..5a487169ab8f5 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -176,14 +176,14 @@ def test_round_dst_border_ambiguous(self, method, unit): # result = getattr(ts, method)("H", ambiguous=True) assert result == ts - assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result.unit == unit result = getattr(ts, method)("H", ambiguous=False) expected = Timestamp("2017-10-29 01:00:00", tz="UTC").tz_convert( "Europe/Madrid" ) assert result == expected - assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result.unit == unit result = getattr(ts, method)("H", ambiguous="NaT") assert result is NaT @@ -210,7 +210,7 @@ def test_round_dst_border_nonexistent(self, method, ts_str, freq, unit): result = getattr(ts, method)(freq, nonexistent="shift_forward") expected = Timestamp("2018-03-11 03:00:00", tz="America/Chicago") assert result == expected - assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result.unit == unit result = getattr(ts, method)(freq, nonexistent="NaT") assert result is NaT @@ -490,7 +490,7 @@ def test_replace_dst_border(self, unit): result = t.replace(hour=3) expected = Timestamp("2013-11-3 03:00:00", tz="America/Chicago") assert result == expected - assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result.unit == unit @pytest.mark.parametrize("fold", [0, 1]) @pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) @@ -504,7 +504,7 @@ def test_replace_dst_fold(self, fold, tz, unit): tz, ambiguous=not fold ) assert result == expected - assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result.unit == unit # -------------------------------------------------------------- # Timestamp.normalize @@ -517,7 +517,7 @@ def test_normalize(self, tz_naive_fixture, arg, unit): result = ts.normalize() expected = Timestamp("2013-11-30", tz=tz) assert result == expected - assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result.unit == unit def test_normalize_pre_epoch_dates(self): # GH: 36294 diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 83e40f5f1d98b..7a3ed306bdcfc 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -22,7 +22,6 @@ iNaT, parsing, ) -from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas.errors import ( OutOfBoundsDatetime, OutOfBoundsTimedelta, @@ -862,7 +861,7 @@ def test_to_datetime_dt64s_out_of_bounds(self, cache, dt): # as of 2022-09-28, the Timestamp constructor has been updated # to cast to M8[s] but to_datetime has not ts = Timestamp(dt) - assert ts._creso == NpyDatetimeUnit.NPY_FR_s.value + assert ts.unit == "s" assert ts.asm8 == dt msg = "Out of bounds nanosecond timestamp" From c662d603519152e4c904c3fbefb7b043c08428d3 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 26 Dec 2022 10:46:53 -0800 Subject: [PATCH 02/10] troubleshoot --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 4be3d0c17c50d..48cf6000d100d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3379,7 +3379,7 @@ def _reorder_indexer( new_order = np.arange(n)[::-1][indexer] elif isinstance(k, slice) and k.start is None and k.stop is None: # slice(None) should not determine order GH#31330 - new_order = np.ones((n,), dtype=np.intp)[indexer] + new_order = np.ones((n,))[indexer] else: # For all other case, use the same order as the level new_order = np.arange(n)[indexer] From 295cbf215f70fac895563893fb5e2953cbab38eb Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 26 Dec 2022 10:47:58 -0800 Subject: [PATCH 03/10] troubleshoot --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a49f573e29a89..fa702770a0990 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2162,7 +2162,7 @@ def _setitem_with_indexer_missing(self, indexer, value): if not has_dtype: # i.e. if we already had a Series or ndarray, keep that # dtype. But if we had a list or dict, then do inference - df = df.infer_objects(copy=False) + df = df.infer_objects() self.obj._mgr = df._mgr else: self.obj._mgr = self.obj._append(value)._mgr From dae4c74ec327e26f2c57a97327c0eecf1fd2f8b6 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 26 Dec 2022 12:18:10 -0800 Subject: [PATCH 04/10] troubleshoot --- pandas/core/groupby/generic.py | 2 +- pandas/core/groupby/groupby.py | 10 ++++-- pandas/tests/arithmetic/test_timedelta64.py | 2 +- pandas/tests/arrays/test_datetimes.py | 18 +++++++--- pandas/tests/arrays/test_timedeltas.py | 17 ++++++++-- pandas/tests/frame/indexing/test_indexing.py | 1 - pandas/tests/indexes/test_base.py | 1 + pandas/tests/reshape/concat/test_concat.py | 2 +- .../scalar/timedelta/test_constructors.py | 25 +++++++------- .../tests/scalar/timedelta/test_timedelta.py | 16 ++++----- .../tests/scalar/timestamp/test_timestamp.py | 34 +++++++++---------- .../tests/scalar/timestamp/test_timezones.py | 7 ++-- .../tests/scalar/timestamp/test_unary_ops.py | 12 +++---- pandas/tests/tools/test_to_datetime.py | 3 +- 14 files changed, 91 insertions(+), 59 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index d85cf7e025413..955f65585963d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1762,7 +1762,7 @@ def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: result = result.T # Note: we really only care about inferring numeric dtypes here - return self._reindex_output(result).infer_objects(copy=False) + return self._reindex_output(result).infer_objects() def _iterate_column_groupbys(self, obj: DataFrame | Series): for i, colname in enumerate(obj.columns): diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 70cf9c144c4a8..11e8769615470 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -32,6 +32,7 @@ class providing the base-class of operations. cast, final, ) +import warnings import numpy as np @@ -2198,8 +2199,13 @@ def sem(self, ddof: int = 1, numeric_only: bool = False): counts = self.count() result_ilocs = result.columns.get_indexer_for(cols) count_ilocs = counts.columns.get_indexer_for(cols) - - result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs]) + with warnings.catch_warnings(): + # TODO(2.0): once iloc[:, foo] = bar depecation is enforced, + # this catching will be unnecessary + warnings.filterwarnings( + "ignore", ".*will attempt to set the values inplace.*" + ) + result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs]) return result @final diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index c29b96f3722d1..f3ea741607692 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1720,7 +1720,7 @@ def test_td64arr_floordiv_td64arr_with_nat( expected = np.array([1.0, 1.0, np.nan], dtype=np.float64) expected = tm.box_expected(expected, xbox) if box is DataFrame and using_array_manager: - # INFO(ArrayManager) floordiv returns integer, and ArrayManager + # INFO(ArrayManager) floorfiv returns integer, and ArrayManager # performs ops column-wise and thus preserves int64 dtype for # columns without missing values expected[[0, 1]] = expected[[0, 1]].astype("int64") diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index d9abaf85544af..f9c32108f0ef0 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -16,6 +16,7 @@ npy_unit_to_abbrev, tz_compare, ) +from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas.core.dtypes.dtypes import DatetimeTZDtype @@ -33,6 +34,15 @@ def unit(self, request): """Fixture returning parametrized time units""" return request.param + @pytest.fixture + def reso(self, unit): + """Fixture returning datetime resolution for a given time unit""" + return { + "s": NpyDatetimeUnit.NPY_FR_s.value, + "ms": NpyDatetimeUnit.NPY_FR_ms.value, + "us": NpyDatetimeUnit.NPY_FR_us.value, + }[unit] + @pytest.fixture def dtype(self, unit, tz_naive_fixture): tz = tz_naive_fixture @@ -61,19 +71,19 @@ def dta(self, dta_dti): dta, dti = dta_dti return dta - def test_non_nano(self, unit, dtype): + def test_non_nano(self, unit, reso, dtype): arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]") dta = DatetimeArray._simple_new(arr, dtype=dtype) assert dta.dtype == dtype - assert dta[0].unit == unit + assert dta[0]._creso == reso assert tz_compare(dta.tz, dta[0].tz) assert (dta[0] == dta[:1]).all() @pytest.mark.parametrize( "field", DatetimeArray._field_ops + DatetimeArray._bool_ops ) - def test_fields(self, unit, field, dtype, dta_dti): + def test_fields(self, unit, reso, field, dtype, dta_dti): dta, dti = dta_dti assert (dti == dta).all() @@ -156,7 +166,7 @@ def test_time_date(self, dta_dti, meth): expected = getattr(dti, meth) tm.assert_numpy_array_equal(result, expected) - def test_format_native_types(self, unit, dtype, dta_dti): + def test_format_native_types(self, unit, reso, dtype, dta_dti): # In this case we should get the same formatted values with our nano # version dti._data as we do with the non-nano dta dta, dti = dta_dti diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 0c7e3ffffb3ac..2fd7ccc9cf338 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._libs.tslibs.dtypes import NpyDatetimeUnit + import pandas as pd from pandas import Timedelta import pandas._testing as tm @@ -17,17 +19,28 @@ class TestNonNano: def unit(self, request): return request.param + @pytest.fixture + def reso(self, unit): + if unit == "s": + return NpyDatetimeUnit.NPY_FR_s.value + elif unit == "ms": + return NpyDatetimeUnit.NPY_FR_ms.value + elif unit == "us": + return NpyDatetimeUnit.NPY_FR_us.value + else: + raise NotImplementedError(unit) + @pytest.fixture def tda(self, unit): arr = np.arange(5, dtype=np.int64).view(f"m8[{unit}]") return TimedeltaArray._simple_new(arr, dtype=arr.dtype) - def test_non_nano(self, unit): + def test_non_nano(self, unit, reso): arr = np.arange(5, dtype=np.int64).view(f"m8[{unit}]") tda = TimedeltaArray._simple_new(arr, dtype=arr.dtype) assert tda.dtype == arr.dtype - assert tda[0].unit == unit + assert tda[0]._creso == reso @pytest.mark.parametrize("field", TimedeltaArray._field_ops) def test_fields(self, tda, field): diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 3b151f3d3338a..c0b04b1f8e80f 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1455,7 +1455,6 @@ def test_iloc_ea_series_indexer_with_na(self): with pytest.raises(ValueError, match=msg): df.iloc[:, indexer.values] - @pytest.mark.filterwarnings("ignore:indexing past lexsort.*:PerformanceWarning") @pytest.mark.parametrize("indexer", [True, (True,)]) @pytest.mark.parametrize("dtype", [bool, "boolean"]) def test_loc_bool_multiindex(self, dtype, indexer): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 969c6059b8d31..c2c1073eef36d 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -891,6 +891,7 @@ def test_isin_nan_common_float64(self, nulls_fixture): "index", [ Index(["qux", "baz", "foo", "bar"]), + # float64 Index overrides isin, so must be checked separately NumericIndex([1.0, 2.0, 3.0, 4.0], dtype=np.float64), ], ) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 3dc6f2404444b..ea526c95f20e0 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -745,7 +745,7 @@ def test_concat_retain_attrs(data): @td.skip_array_manager_invalid_test @pytest.mark.parametrize("df_dtype", ["float64", "int64", "datetime64[ns]"]) @pytest.mark.parametrize("empty_dtype", [None, "float64", "object"]) -def test_concat_ignore_empty_object_float(empty_dtype, df_dtype): +def test_concat_ignore_emtpy_object_float(empty_dtype, df_dtype): # https://github.com/pandas-dev/pandas/issues/45637 df = DataFrame({"foo": [1, 2], "bar": [1, 2]}, dtype=df_dtype) empty = DataFrame(columns=["foo", "bar"], dtype=empty_dtype) diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 42f51a18c070e..e4120478370d1 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -5,6 +5,7 @@ import pytest from pandas._libs.tslibs import OutOfBoundsTimedelta +from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas import ( NaT, @@ -44,12 +45,12 @@ def test_from_td64_retain_resolution(): td = Timedelta(obj) assert td.value == obj.view("i8") - assert td.unit == "ms" + assert td._creso == NpyDatetimeUnit.NPY_FR_ms.value # Case where we cast to nearest-supported reso obj2 = np.timedelta64(1234, "D") td2 = Timedelta(obj2) - assert td2.unit == "s" + assert td2._creso == NpyDatetimeUnit.NPY_FR_s.value assert td2 == obj2 assert td2.days == 1234 @@ -57,7 +58,7 @@ def test_from_td64_retain_resolution(): obj3 = np.timedelta64(1000000000000000000, "us") td3 = Timedelta(obj3) assert td3.total_seconds() == 1000000000000 - assert td3.unit == "us" + assert td3._creso == NpyDatetimeUnit.NPY_FR_us.value def test_from_pytimedelta_us_reso(): @@ -65,31 +66,31 @@ def test_from_pytimedelta_us_reso(): td = timedelta(days=4, minutes=3) result = Timedelta(td) assert result.to_pytimedelta() == td - assert result.unit == "us" + assert result._creso == NpyDatetimeUnit.NPY_FR_us.value def test_from_tick_reso(): tick = offsets.Nano() - assert Timedelta(tick).unit == "ns" + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_ns.value tick = offsets.Micro() - assert Timedelta(tick).unit == "us" + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_us.value tick = offsets.Milli() - assert Timedelta(tick).unit == "ms" + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_ms.value tick = offsets.Second() - assert Timedelta(tick).unit == "s" + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value # everything above Second gets cast to the closest supported reso: second tick = offsets.Minute() - assert Timedelta(tick).unit == "s" + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value tick = offsets.Hour() - assert Timedelta(tick).unit == "s" + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value tick = offsets.Day() - assert Timedelta(tick).unit == "s" + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value def test_construction(): @@ -281,7 +282,7 @@ def test_overflow_on_construction(): # used to overflow before non-ns support td = Timedelta(timedelta(days=13 * 19999)) - assert td.unit == "us" + assert td._creso == NpyDatetimeUnit.NPY_FR_us.value assert td.days == 13 * 19999 diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 94a810a026b2a..924f756edb233 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -33,7 +33,7 @@ def test_as_unit(self): res = td.as_unit("us") assert res.value == td.value // 1000 - assert res.unit == "us" + assert res._creso == NpyDatetimeUnit.NPY_FR_us.value rt = res.as_unit("ns") assert rt.value == td.value @@ -41,7 +41,7 @@ def test_as_unit(self): res = td.as_unit("ms") assert res.value == td.value // 1_000_000 - assert res.unit == "ms" + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value rt = res.as_unit("ns") assert rt.value == td.value @@ -49,7 +49,7 @@ def test_as_unit(self): res = td.as_unit("s") assert res.value == td.value // 1_000_000_000 - assert res.unit == "s" + assert res._creso == NpyDatetimeUnit.NPY_FR_s.value rt = res.as_unit("ns") assert rt.value == td.value @@ -66,7 +66,7 @@ def test_as_unit_overflows(self): res = td.as_unit("ms") assert res.value == us // 1000 - assert res.unit == "ms" + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value def test_as_unit_rounding(self): td = Timedelta(microseconds=1500) @@ -75,7 +75,7 @@ def test_as_unit_rounding(self): expected = Timedelta(milliseconds=1) assert res == expected - assert res.unit == "ms" + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value assert res.value == 1 with pytest.raises(ValueError, match="Cannot losslessly convert units"): @@ -311,13 +311,13 @@ def test_timedelta_class_min_max_resolution(): # when accessed on the class (as opposed to an instance), we default # to nanoseconds assert Timedelta.min == Timedelta(NaT.value + 1) - assert Timedelta.min.unit == "ns" + assert Timedelta.min._creso == NpyDatetimeUnit.NPY_FR_ns.value assert Timedelta.max == Timedelta(np.iinfo(np.int64).max) - assert Timedelta.max.unit == "ns" + assert Timedelta.max._creso == NpyDatetimeUnit.NPY_FR_ns.value assert Timedelta.resolution == Timedelta(1) - assert Timedelta.resolution.unit == "ns" + assert Timedelta.resolution._creso == NpyDatetimeUnit.NPY_FR_ns.value class TestTimedeltaUnaryOps: diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 1ab4f2d8dc3d8..5446e16c189b0 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -874,10 +874,10 @@ def test_sub_datetimelike_mismatched_reso(self, ts_tz): # this construction ensures we get cases with other._creso < ts._creso # and cases with other._creso > ts._creso unit = { - "us": "ms", - "ms": "s", - "s": "us", - }[ts.unit] + NpyDatetimeUnit.NPY_FR_us.value: "ms", + NpyDatetimeUnit.NPY_FR_ms.value: "s", + NpyDatetimeUnit.NPY_FR_s.value: "us", + }[ts._creso] other = ts.as_unit(unit) assert other._creso != ts._creso @@ -922,10 +922,10 @@ def test_sub_timedeltalike_mismatched_reso(self, ts_tz): # this construction ensures we get cases with other._creso < ts._creso # and cases with other._creso > ts._creso unit = { - "us": "ms", - "ms": "s", - "s": "us", - }[ts.unit] + NpyDatetimeUnit.NPY_FR_us.value: "ms", + NpyDatetimeUnit.NPY_FR_ms.value: "s", + NpyDatetimeUnit.NPY_FR_s.value: "us", + }[ts._creso] other = Timedelta(0).as_unit(unit) assert other._creso != ts._creso @@ -973,7 +973,7 @@ def test_sub_timedelta64_mismatched_reso(self, ts_tz): res = ts + np.timedelta64(1, "ns") exp = ts.as_unit("ns") + np.timedelta64(1, "ns") assert exp == res - assert exp.unit == "ns" + assert exp._creso == NpyDatetimeUnit.NPY_FR_ns.value def test_min(self, ts): assert ts.min <= ts @@ -996,13 +996,13 @@ def test_timestamp_class_min_max_resolution(): # when accessed on the class (as opposed to an instance), we default # to nanoseconds assert Timestamp.min == Timestamp(NaT.value + 1) - assert Timestamp.min.unit == "ns" + assert Timestamp.min._creso == NpyDatetimeUnit.NPY_FR_ns.value assert Timestamp.max == Timestamp(np.iinfo(np.int64).max) - assert Timestamp.max.unit == "ns" + assert Timestamp.max._creso == NpyDatetimeUnit.NPY_FR_ns.value assert Timestamp.resolution == Timedelta(1) - assert Timestamp.resolution.unit == "ns" + assert Timestamp.resolution._creso == NpyDatetimeUnit.NPY_FR_ns.value class TestAsUnit: @@ -1013,7 +1013,7 @@ def test_as_unit(self): res = ts.as_unit("us") assert res.value == ts.value // 1000 - assert res.unit == "us" + assert res._creso == NpyDatetimeUnit.NPY_FR_us.value rt = res.as_unit("ns") assert rt.value == ts.value @@ -1021,7 +1021,7 @@ def test_as_unit(self): res = ts.as_unit("ms") assert res.value == ts.value // 1_000_000 - assert res.unit == "ms" + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value rt = res.as_unit("ns") assert rt.value == ts.value @@ -1029,7 +1029,7 @@ def test_as_unit(self): res = ts.as_unit("s") assert res.value == ts.value // 1_000_000_000 - assert res.unit == "s" + assert res._creso == NpyDatetimeUnit.NPY_FR_s.value rt = res.as_unit("ns") assert rt.value == ts.value @@ -1046,7 +1046,7 @@ def test_as_unit_overflows(self): res = ts.as_unit("ms") assert res.value == us // 1000 - assert res.unit == "ms" + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value def test_as_unit_rounding(self): ts = Timestamp(1_500_000) # i.e. 1500 microseconds @@ -1055,7 +1055,7 @@ def test_as_unit_rounding(self): expected = Timestamp(1_000_000) # i.e. 1 millisecond assert res == expected - assert res.unit == "ms" + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value assert res.value == 1 with pytest.raises(ValueError, match="Cannot losslessly convert units"): diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 264b39cb49b9a..354a23fc257da 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -22,6 +22,7 @@ ) from pandas._libs.tslibs import timezones +from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas.errors import OutOfBoundsDatetime import pandas.util._test_decorators as td @@ -91,11 +92,11 @@ def test_tz_localize_ambiguous_bool(self, unit): result = ts.tz_localize("US/Central", ambiguous=True) assert result == expected0 - assert result.unit == unit + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = ts.tz_localize("US/Central", ambiguous=False) assert result == expected1 - assert result.unit == unit + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value def test_tz_localize_ambiguous(self): ts = Timestamp("2014-11-02 01:00") @@ -292,7 +293,7 @@ def test_timestamp_tz_localize_nonexistent_shift( assert result == expected.replace(microsecond=0, nanosecond=0) else: assert result == expected - assert result.unit == unit + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value @pytest.mark.parametrize("offset", [-1, 1]) def test_timestamp_tz_localize_nonexistent_shift_invalid(self, offset, warsaw): diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 5a487169ab8f5..1c1f3acc8331f 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -176,14 +176,14 @@ def test_round_dst_border_ambiguous(self, method, unit): # result = getattr(ts, method)("H", ambiguous=True) assert result == ts - assert result.unit == unit + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = getattr(ts, method)("H", ambiguous=False) expected = Timestamp("2017-10-29 01:00:00", tz="UTC").tz_convert( "Europe/Madrid" ) assert result == expected - assert result.unit == unit + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = getattr(ts, method)("H", ambiguous="NaT") assert result is NaT @@ -210,7 +210,7 @@ def test_round_dst_border_nonexistent(self, method, ts_str, freq, unit): result = getattr(ts, method)(freq, nonexistent="shift_forward") expected = Timestamp("2018-03-11 03:00:00", tz="America/Chicago") assert result == expected - assert result.unit == unit + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = getattr(ts, method)(freq, nonexistent="NaT") assert result is NaT @@ -490,7 +490,7 @@ def test_replace_dst_border(self, unit): result = t.replace(hour=3) expected = Timestamp("2013-11-3 03:00:00", tz="America/Chicago") assert result == expected - assert result.unit == unit + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value @pytest.mark.parametrize("fold", [0, 1]) @pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) @@ -504,7 +504,7 @@ def test_replace_dst_fold(self, fold, tz, unit): tz, ambiguous=not fold ) assert result == expected - assert result.unit == unit + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value # -------------------------------------------------------------- # Timestamp.normalize @@ -517,7 +517,7 @@ def test_normalize(self, tz_naive_fixture, arg, unit): result = ts.normalize() expected = Timestamp("2013-11-30", tz=tz) assert result == expected - assert result.unit == unit + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value def test_normalize_pre_epoch_dates(self): # GH: 36294 diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 7a3ed306bdcfc..83e40f5f1d98b 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -22,6 +22,7 @@ iNaT, parsing, ) +from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas.errors import ( OutOfBoundsDatetime, OutOfBoundsTimedelta, @@ -861,7 +862,7 @@ def test_to_datetime_dt64s_out_of_bounds(self, cache, dt): # as of 2022-09-28, the Timestamp constructor has been updated # to cast to M8[s] but to_datetime has not ts = Timestamp(dt) - assert ts.unit == "s" + assert ts._creso == NpyDatetimeUnit.NPY_FR_s.value assert ts.asm8 == dt msg = "Out of bounds nanosecond timestamp" From ad57715520deed53c6d0f65089aafc75539346c2 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 26 Dec 2022 16:53:31 -0800 Subject: [PATCH 05/10] restore --- pandas/tests/arithmetic/test_timedelta64.py | 2 +- pandas/tests/arrays/test_datetimes.py | 18 +++------- pandas/tests/arrays/test_timedeltas.py | 17 ++-------- pandas/tests/frame/indexing/test_indexing.py | 1 + pandas/tests/indexes/test_base.py | 1 - pandas/tests/reshape/concat/test_concat.py | 2 +- .../scalar/timedelta/test_constructors.py | 25 +++++++------- .../tests/scalar/timedelta/test_timedelta.py | 16 ++++----- .../tests/scalar/timestamp/test_timestamp.py | 34 +++++++++---------- .../tests/scalar/timestamp/test_timezones.py | 7 ++-- .../tests/scalar/timestamp/test_unary_ops.py | 12 +++---- pandas/tests/tools/test_to_datetime.py | 3 +- 12 files changed, 56 insertions(+), 82 deletions(-) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index f3ea741607692..c29b96f3722d1 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1720,7 +1720,7 @@ def test_td64arr_floordiv_td64arr_with_nat( expected = np.array([1.0, 1.0, np.nan], dtype=np.float64) expected = tm.box_expected(expected, xbox) if box is DataFrame and using_array_manager: - # INFO(ArrayManager) floorfiv returns integer, and ArrayManager + # INFO(ArrayManager) floordiv returns integer, and ArrayManager # performs ops column-wise and thus preserves int64 dtype for # columns without missing values expected[[0, 1]] = expected[[0, 1]].astype("int64") diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index f9c32108f0ef0..d9abaf85544af 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -16,7 +16,6 @@ npy_unit_to_abbrev, tz_compare, ) -from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas.core.dtypes.dtypes import DatetimeTZDtype @@ -34,15 +33,6 @@ def unit(self, request): """Fixture returning parametrized time units""" return request.param - @pytest.fixture - def reso(self, unit): - """Fixture returning datetime resolution for a given time unit""" - return { - "s": NpyDatetimeUnit.NPY_FR_s.value, - "ms": NpyDatetimeUnit.NPY_FR_ms.value, - "us": NpyDatetimeUnit.NPY_FR_us.value, - }[unit] - @pytest.fixture def dtype(self, unit, tz_naive_fixture): tz = tz_naive_fixture @@ -71,19 +61,19 @@ def dta(self, dta_dti): dta, dti = dta_dti return dta - def test_non_nano(self, unit, reso, dtype): + def test_non_nano(self, unit, dtype): arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]") dta = DatetimeArray._simple_new(arr, dtype=dtype) assert dta.dtype == dtype - assert dta[0]._creso == reso + assert dta[0].unit == unit assert tz_compare(dta.tz, dta[0].tz) assert (dta[0] == dta[:1]).all() @pytest.mark.parametrize( "field", DatetimeArray._field_ops + DatetimeArray._bool_ops ) - def test_fields(self, unit, reso, field, dtype, dta_dti): + def test_fields(self, unit, field, dtype, dta_dti): dta, dti = dta_dti assert (dti == dta).all() @@ -166,7 +156,7 @@ def test_time_date(self, dta_dti, meth): expected = getattr(dti, meth) tm.assert_numpy_array_equal(result, expected) - def test_format_native_types(self, unit, reso, dtype, dta_dti): + def test_format_native_types(self, unit, dtype, dta_dti): # In this case we should get the same formatted values with our nano # version dti._data as we do with the non-nano dta dta, dti = dta_dti diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 2fd7ccc9cf338..0c7e3ffffb3ac 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas._libs.tslibs.dtypes import NpyDatetimeUnit - import pandas as pd from pandas import Timedelta import pandas._testing as tm @@ -19,28 +17,17 @@ class TestNonNano: def unit(self, request): return request.param - @pytest.fixture - def reso(self, unit): - if unit == "s": - return NpyDatetimeUnit.NPY_FR_s.value - elif unit == "ms": - return NpyDatetimeUnit.NPY_FR_ms.value - elif unit == "us": - return NpyDatetimeUnit.NPY_FR_us.value - else: - raise NotImplementedError(unit) - @pytest.fixture def tda(self, unit): arr = np.arange(5, dtype=np.int64).view(f"m8[{unit}]") return TimedeltaArray._simple_new(arr, dtype=arr.dtype) - def test_non_nano(self, unit, reso): + def test_non_nano(self, unit): arr = np.arange(5, dtype=np.int64).view(f"m8[{unit}]") tda = TimedeltaArray._simple_new(arr, dtype=arr.dtype) assert tda.dtype == arr.dtype - assert tda[0]._creso == reso + assert tda[0].unit == unit @pytest.mark.parametrize("field", TimedeltaArray._field_ops) def test_fields(self, tda, field): diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index c0b04b1f8e80f..3b151f3d3338a 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1455,6 +1455,7 @@ def test_iloc_ea_series_indexer_with_na(self): with pytest.raises(ValueError, match=msg): df.iloc[:, indexer.values] + @pytest.mark.filterwarnings("ignore:indexing past lexsort.*:PerformanceWarning") @pytest.mark.parametrize("indexer", [True, (True,)]) @pytest.mark.parametrize("dtype", [bool, "boolean"]) def test_loc_bool_multiindex(self, dtype, indexer): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c2c1073eef36d..969c6059b8d31 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -891,7 +891,6 @@ def test_isin_nan_common_float64(self, nulls_fixture): "index", [ Index(["qux", "baz", "foo", "bar"]), - # float64 Index overrides isin, so must be checked separately NumericIndex([1.0, 2.0, 3.0, 4.0], dtype=np.float64), ], ) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index ea526c95f20e0..3dc6f2404444b 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -745,7 +745,7 @@ def test_concat_retain_attrs(data): @td.skip_array_manager_invalid_test @pytest.mark.parametrize("df_dtype", ["float64", "int64", "datetime64[ns]"]) @pytest.mark.parametrize("empty_dtype", [None, "float64", "object"]) -def test_concat_ignore_emtpy_object_float(empty_dtype, df_dtype): +def test_concat_ignore_empty_object_float(empty_dtype, df_dtype): # https://github.com/pandas-dev/pandas/issues/45637 df = DataFrame({"foo": [1, 2], "bar": [1, 2]}, dtype=df_dtype) empty = DataFrame(columns=["foo", "bar"], dtype=empty_dtype) diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index e4120478370d1..42f51a18c070e 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -5,7 +5,6 @@ import pytest from pandas._libs.tslibs import OutOfBoundsTimedelta -from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas import ( NaT, @@ -45,12 +44,12 @@ def test_from_td64_retain_resolution(): td = Timedelta(obj) assert td.value == obj.view("i8") - assert td._creso == NpyDatetimeUnit.NPY_FR_ms.value + assert td.unit == "ms" # Case where we cast to nearest-supported reso obj2 = np.timedelta64(1234, "D") td2 = Timedelta(obj2) - assert td2._creso == NpyDatetimeUnit.NPY_FR_s.value + assert td2.unit == "s" assert td2 == obj2 assert td2.days == 1234 @@ -58,7 +57,7 @@ def test_from_td64_retain_resolution(): obj3 = np.timedelta64(1000000000000000000, "us") td3 = Timedelta(obj3) assert td3.total_seconds() == 1000000000000 - assert td3._creso == NpyDatetimeUnit.NPY_FR_us.value + assert td3.unit == "us" def test_from_pytimedelta_us_reso(): @@ -66,31 +65,31 @@ def test_from_pytimedelta_us_reso(): td = timedelta(days=4, minutes=3) result = Timedelta(td) assert result.to_pytimedelta() == td - assert result._creso == NpyDatetimeUnit.NPY_FR_us.value + assert result.unit == "us" def test_from_tick_reso(): tick = offsets.Nano() - assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timedelta(tick).unit == "ns" tick = offsets.Micro() - assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_us.value + assert Timedelta(tick).unit == "us" tick = offsets.Milli() - assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_ms.value + assert Timedelta(tick).unit == "ms" tick = offsets.Second() - assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value + assert Timedelta(tick).unit == "s" # everything above Second gets cast to the closest supported reso: second tick = offsets.Minute() - assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value + assert Timedelta(tick).unit == "s" tick = offsets.Hour() - assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value + assert Timedelta(tick).unit == "s" tick = offsets.Day() - assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value + assert Timedelta(tick).unit == "s" def test_construction(): @@ -282,7 +281,7 @@ def test_overflow_on_construction(): # used to overflow before non-ns support td = Timedelta(timedelta(days=13 * 19999)) - assert td._creso == NpyDatetimeUnit.NPY_FR_us.value + assert td.unit == "us" assert td.days == 13 * 19999 diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 924f756edb233..94a810a026b2a 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -33,7 +33,7 @@ def test_as_unit(self): res = td.as_unit("us") assert res.value == td.value // 1000 - assert res._creso == NpyDatetimeUnit.NPY_FR_us.value + assert res.unit == "us" rt = res.as_unit("ns") assert rt.value == td.value @@ -41,7 +41,7 @@ def test_as_unit(self): res = td.as_unit("ms") assert res.value == td.value // 1_000_000 - assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value + assert res.unit == "ms" rt = res.as_unit("ns") assert rt.value == td.value @@ -49,7 +49,7 @@ def test_as_unit(self): res = td.as_unit("s") assert res.value == td.value // 1_000_000_000 - assert res._creso == NpyDatetimeUnit.NPY_FR_s.value + assert res.unit == "s" rt = res.as_unit("ns") assert rt.value == td.value @@ -66,7 +66,7 @@ def test_as_unit_overflows(self): res = td.as_unit("ms") assert res.value == us // 1000 - assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value + assert res.unit == "ms" def test_as_unit_rounding(self): td = Timedelta(microseconds=1500) @@ -75,7 +75,7 @@ def test_as_unit_rounding(self): expected = Timedelta(milliseconds=1) assert res == expected - assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value + assert res.unit == "ms" assert res.value == 1 with pytest.raises(ValueError, match="Cannot losslessly convert units"): @@ -311,13 +311,13 @@ def test_timedelta_class_min_max_resolution(): # when accessed on the class (as opposed to an instance), we default # to nanoseconds assert Timedelta.min == Timedelta(NaT.value + 1) - assert Timedelta.min._creso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timedelta.min.unit == "ns" assert Timedelta.max == Timedelta(np.iinfo(np.int64).max) - assert Timedelta.max._creso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timedelta.max.unit == "ns" assert Timedelta.resolution == Timedelta(1) - assert Timedelta.resolution._creso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timedelta.resolution.unit == "ns" class TestTimedeltaUnaryOps: diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 5446e16c189b0..1ab4f2d8dc3d8 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -874,10 +874,10 @@ def test_sub_datetimelike_mismatched_reso(self, ts_tz): # this construction ensures we get cases with other._creso < ts._creso # and cases with other._creso > ts._creso unit = { - NpyDatetimeUnit.NPY_FR_us.value: "ms", - NpyDatetimeUnit.NPY_FR_ms.value: "s", - NpyDatetimeUnit.NPY_FR_s.value: "us", - }[ts._creso] + "us": "ms", + "ms": "s", + "s": "us", + }[ts.unit] other = ts.as_unit(unit) assert other._creso != ts._creso @@ -922,10 +922,10 @@ def test_sub_timedeltalike_mismatched_reso(self, ts_tz): # this construction ensures we get cases with other._creso < ts._creso # and cases with other._creso > ts._creso unit = { - NpyDatetimeUnit.NPY_FR_us.value: "ms", - NpyDatetimeUnit.NPY_FR_ms.value: "s", - NpyDatetimeUnit.NPY_FR_s.value: "us", - }[ts._creso] + "us": "ms", + "ms": "s", + "s": "us", + }[ts.unit] other = Timedelta(0).as_unit(unit) assert other._creso != ts._creso @@ -973,7 +973,7 @@ def test_sub_timedelta64_mismatched_reso(self, ts_tz): res = ts + np.timedelta64(1, "ns") exp = ts.as_unit("ns") + np.timedelta64(1, "ns") assert exp == res - assert exp._creso == NpyDatetimeUnit.NPY_FR_ns.value + assert exp.unit == "ns" def test_min(self, ts): assert ts.min <= ts @@ -996,13 +996,13 @@ def test_timestamp_class_min_max_resolution(): # when accessed on the class (as opposed to an instance), we default # to nanoseconds assert Timestamp.min == Timestamp(NaT.value + 1) - assert Timestamp.min._creso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timestamp.min.unit == "ns" assert Timestamp.max == Timestamp(np.iinfo(np.int64).max) - assert Timestamp.max._creso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timestamp.max.unit == "ns" assert Timestamp.resolution == Timedelta(1) - assert Timestamp.resolution._creso == NpyDatetimeUnit.NPY_FR_ns.value + assert Timestamp.resolution.unit == "ns" class TestAsUnit: @@ -1013,7 +1013,7 @@ def test_as_unit(self): res = ts.as_unit("us") assert res.value == ts.value // 1000 - assert res._creso == NpyDatetimeUnit.NPY_FR_us.value + assert res.unit == "us" rt = res.as_unit("ns") assert rt.value == ts.value @@ -1021,7 +1021,7 @@ def test_as_unit(self): res = ts.as_unit("ms") assert res.value == ts.value // 1_000_000 - assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value + assert res.unit == "ms" rt = res.as_unit("ns") assert rt.value == ts.value @@ -1029,7 +1029,7 @@ def test_as_unit(self): res = ts.as_unit("s") assert res.value == ts.value // 1_000_000_000 - assert res._creso == NpyDatetimeUnit.NPY_FR_s.value + assert res.unit == "s" rt = res.as_unit("ns") assert rt.value == ts.value @@ -1046,7 +1046,7 @@ def test_as_unit_overflows(self): res = ts.as_unit("ms") assert res.value == us // 1000 - assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value + assert res.unit == "ms" def test_as_unit_rounding(self): ts = Timestamp(1_500_000) # i.e. 1500 microseconds @@ -1055,7 +1055,7 @@ def test_as_unit_rounding(self): expected = Timestamp(1_000_000) # i.e. 1 millisecond assert res == expected - assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value + assert res.unit == "ms" assert res.value == 1 with pytest.raises(ValueError, match="Cannot losslessly convert units"): diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 354a23fc257da..264b39cb49b9a 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -22,7 +22,6 @@ ) from pandas._libs.tslibs import timezones -from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas.errors import OutOfBoundsDatetime import pandas.util._test_decorators as td @@ -92,11 +91,11 @@ def test_tz_localize_ambiguous_bool(self, unit): result = ts.tz_localize("US/Central", ambiguous=True) assert result == expected0 - assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result.unit == unit result = ts.tz_localize("US/Central", ambiguous=False) assert result == expected1 - assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result.unit == unit def test_tz_localize_ambiguous(self): ts = Timestamp("2014-11-02 01:00") @@ -293,7 +292,7 @@ def test_timestamp_tz_localize_nonexistent_shift( assert result == expected.replace(microsecond=0, nanosecond=0) else: assert result == expected - assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result.unit == unit @pytest.mark.parametrize("offset", [-1, 1]) def test_timestamp_tz_localize_nonexistent_shift_invalid(self, offset, warsaw): diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 1c1f3acc8331f..5a487169ab8f5 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -176,14 +176,14 @@ def test_round_dst_border_ambiguous(self, method, unit): # result = getattr(ts, method)("H", ambiguous=True) assert result == ts - assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result.unit == unit result = getattr(ts, method)("H", ambiguous=False) expected = Timestamp("2017-10-29 01:00:00", tz="UTC").tz_convert( "Europe/Madrid" ) assert result == expected - assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result.unit == unit result = getattr(ts, method)("H", ambiguous="NaT") assert result is NaT @@ -210,7 +210,7 @@ def test_round_dst_border_nonexistent(self, method, ts_str, freq, unit): result = getattr(ts, method)(freq, nonexistent="shift_forward") expected = Timestamp("2018-03-11 03:00:00", tz="America/Chicago") assert result == expected - assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result.unit == unit result = getattr(ts, method)(freq, nonexistent="NaT") assert result is NaT @@ -490,7 +490,7 @@ def test_replace_dst_border(self, unit): result = t.replace(hour=3) expected = Timestamp("2013-11-3 03:00:00", tz="America/Chicago") assert result == expected - assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result.unit == unit @pytest.mark.parametrize("fold", [0, 1]) @pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) @@ -504,7 +504,7 @@ def test_replace_dst_fold(self, fold, tz, unit): tz, ambiguous=not fold ) assert result == expected - assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result.unit == unit # -------------------------------------------------------------- # Timestamp.normalize @@ -517,7 +517,7 @@ def test_normalize(self, tz_naive_fixture, arg, unit): result = ts.normalize() expected = Timestamp("2013-11-30", tz=tz) assert result == expected - assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value + assert result.unit == unit def test_normalize_pre_epoch_dates(self): # GH: 36294 diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 83e40f5f1d98b..7a3ed306bdcfc 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -22,7 +22,6 @@ iNaT, parsing, ) -from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas.errors import ( OutOfBoundsDatetime, OutOfBoundsTimedelta, @@ -862,7 +861,7 @@ def test_to_datetime_dt64s_out_of_bounds(self, cache, dt): # as of 2022-09-28, the Timestamp constructor has been updated # to cast to M8[s] but to_datetime has not ts = Timestamp(dt) - assert ts._creso == NpyDatetimeUnit.NPY_FR_s.value + assert ts.unit == "s" assert ts.asm8 == dt msg = "Out of bounds nanosecond timestamp" From 44e1a47d0741187c8b44bc4ffc15c75ef7667b63 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 26 Dec 2022 17:24:07 -0800 Subject: [PATCH 06/10] restore --- pandas/core/groupby/generic.py | 2 +- pandas/core/groupby/groupby.py | 10 ++-------- pandas/core/indexes/multi.py | 2 +- pandas/core/indexing.py | 2 +- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 955f65585963d..d85cf7e025413 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1762,7 +1762,7 @@ def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: result = result.T # Note: we really only care about inferring numeric dtypes here - return self._reindex_output(result).infer_objects() + return self._reindex_output(result).infer_objects(copy=False) def _iterate_column_groupbys(self, obj: DataFrame | Series): for i, colname in enumerate(obj.columns): diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 11e8769615470..70cf9c144c4a8 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -32,7 +32,6 @@ class providing the base-class of operations. cast, final, ) -import warnings import numpy as np @@ -2199,13 +2198,8 @@ def sem(self, ddof: int = 1, numeric_only: bool = False): counts = self.count() result_ilocs = result.columns.get_indexer_for(cols) count_ilocs = counts.columns.get_indexer_for(cols) - with warnings.catch_warnings(): - # TODO(2.0): once iloc[:, foo] = bar depecation is enforced, - # this catching will be unnecessary - warnings.filterwarnings( - "ignore", ".*will attempt to set the values inplace.*" - ) - result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs]) + + result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs]) return result @final diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 48cf6000d100d..4be3d0c17c50d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3379,7 +3379,7 @@ def _reorder_indexer( new_order = np.arange(n)[::-1][indexer] elif isinstance(k, slice) and k.start is None and k.stop is None: # slice(None) should not determine order GH#31330 - new_order = np.ones((n,))[indexer] + new_order = np.ones((n,), dtype=np.intp)[indexer] else: # For all other case, use the same order as the level new_order = np.arange(n)[indexer] diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index fa702770a0990..a49f573e29a89 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2162,7 +2162,7 @@ def _setitem_with_indexer_missing(self, indexer, value): if not has_dtype: # i.e. if we already had a Series or ndarray, keep that # dtype. But if we had a list or dict, then do inference - df = df.infer_objects() + df = df.infer_objects(copy=False) self.obj._mgr = df._mgr else: self.obj._mgr = self.obj._append(value)._mgr From 1a3f91d8db0061a10f31d8b9e6146ee9b3019142 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 26 Dec 2022 17:25:06 -0800 Subject: [PATCH 07/10] troubleshoot --- .../scalar/timedelta/test_constructors.py | 25 +++++++------- .../tests/scalar/timedelta/test_timedelta.py | 16 ++++----- .../tests/scalar/timestamp/test_timestamp.py | 34 +++++++++---------- .../tests/scalar/timestamp/test_timezones.py | 7 ++-- .../tests/scalar/timestamp/test_unary_ops.py | 12 +++---- 5 files changed, 48 insertions(+), 46 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 42f51a18c070e..e4120478370d1 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -5,6 +5,7 @@ import pytest from pandas._libs.tslibs import OutOfBoundsTimedelta +from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas import ( NaT, @@ -44,12 +45,12 @@ def test_from_td64_retain_resolution(): td = Timedelta(obj) assert td.value == obj.view("i8") - assert td.unit == "ms" + assert td._creso == NpyDatetimeUnit.NPY_FR_ms.value # Case where we cast to nearest-supported reso obj2 = np.timedelta64(1234, "D") td2 = Timedelta(obj2) - assert td2.unit == "s" + assert td2._creso == NpyDatetimeUnit.NPY_FR_s.value assert td2 == obj2 assert td2.days == 1234 @@ -57,7 +58,7 @@ def test_from_td64_retain_resolution(): obj3 = np.timedelta64(1000000000000000000, "us") td3 = Timedelta(obj3) assert td3.total_seconds() == 1000000000000 - assert td3.unit == "us" + assert td3._creso == NpyDatetimeUnit.NPY_FR_us.value def test_from_pytimedelta_us_reso(): @@ -65,31 +66,31 @@ def test_from_pytimedelta_us_reso(): td = timedelta(days=4, minutes=3) result = Timedelta(td) assert result.to_pytimedelta() == td - assert result.unit == "us" + assert result._creso == NpyDatetimeUnit.NPY_FR_us.value def test_from_tick_reso(): tick = offsets.Nano() - assert Timedelta(tick).unit == "ns" + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_ns.value tick = offsets.Micro() - assert Timedelta(tick).unit == "us" + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_us.value tick = offsets.Milli() - assert Timedelta(tick).unit == "ms" + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_ms.value tick = offsets.Second() - assert Timedelta(tick).unit == "s" + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value # everything above Second gets cast to the closest supported reso: second tick = offsets.Minute() - assert Timedelta(tick).unit == "s" + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value tick = offsets.Hour() - assert Timedelta(tick).unit == "s" + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value tick = offsets.Day() - assert Timedelta(tick).unit == "s" + assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value def test_construction(): @@ -281,7 +282,7 @@ def test_overflow_on_construction(): # used to overflow before non-ns support td = Timedelta(timedelta(days=13 * 19999)) - assert td.unit == "us" + assert td._creso == NpyDatetimeUnit.NPY_FR_us.value assert td.days == 13 * 19999 diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 94a810a026b2a..924f756edb233 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -33,7 +33,7 @@ def test_as_unit(self): res = td.as_unit("us") assert res.value == td.value // 1000 - assert res.unit == "us" + assert res._creso == NpyDatetimeUnit.NPY_FR_us.value rt = res.as_unit("ns") assert rt.value == td.value @@ -41,7 +41,7 @@ def test_as_unit(self): res = td.as_unit("ms") assert res.value == td.value // 1_000_000 - assert res.unit == "ms" + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value rt = res.as_unit("ns") assert rt.value == td.value @@ -49,7 +49,7 @@ def test_as_unit(self): res = td.as_unit("s") assert res.value == td.value // 1_000_000_000 - assert res.unit == "s" + assert res._creso == NpyDatetimeUnit.NPY_FR_s.value rt = res.as_unit("ns") assert rt.value == td.value @@ -66,7 +66,7 @@ def test_as_unit_overflows(self): res = td.as_unit("ms") assert res.value == us // 1000 - assert res.unit == "ms" + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value def test_as_unit_rounding(self): td = Timedelta(microseconds=1500) @@ -75,7 +75,7 @@ def test_as_unit_rounding(self): expected = Timedelta(milliseconds=1) assert res == expected - assert res.unit == "ms" + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value assert res.value == 1 with pytest.raises(ValueError, match="Cannot losslessly convert units"): @@ -311,13 +311,13 @@ def test_timedelta_class_min_max_resolution(): # when accessed on the class (as opposed to an instance), we default # to nanoseconds assert Timedelta.min == Timedelta(NaT.value + 1) - assert Timedelta.min.unit == "ns" + assert Timedelta.min._creso == NpyDatetimeUnit.NPY_FR_ns.value assert Timedelta.max == Timedelta(np.iinfo(np.int64).max) - assert Timedelta.max.unit == "ns" + assert Timedelta.max._creso == NpyDatetimeUnit.NPY_FR_ns.value assert Timedelta.resolution == Timedelta(1) - assert Timedelta.resolution.unit == "ns" + assert Timedelta.resolution._creso == NpyDatetimeUnit.NPY_FR_ns.value class TestTimedeltaUnaryOps: diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 1ab4f2d8dc3d8..5446e16c189b0 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -874,10 +874,10 @@ def test_sub_datetimelike_mismatched_reso(self, ts_tz): # this construction ensures we get cases with other._creso < ts._creso # and cases with other._creso > ts._creso unit = { - "us": "ms", - "ms": "s", - "s": "us", - }[ts.unit] + NpyDatetimeUnit.NPY_FR_us.value: "ms", + NpyDatetimeUnit.NPY_FR_ms.value: "s", + NpyDatetimeUnit.NPY_FR_s.value: "us", + }[ts._creso] other = ts.as_unit(unit) assert other._creso != ts._creso @@ -922,10 +922,10 @@ def test_sub_timedeltalike_mismatched_reso(self, ts_tz): # this construction ensures we get cases with other._creso < ts._creso # and cases with other._creso > ts._creso unit = { - "us": "ms", - "ms": "s", - "s": "us", - }[ts.unit] + NpyDatetimeUnit.NPY_FR_us.value: "ms", + NpyDatetimeUnit.NPY_FR_ms.value: "s", + NpyDatetimeUnit.NPY_FR_s.value: "us", + }[ts._creso] other = Timedelta(0).as_unit(unit) assert other._creso != ts._creso @@ -973,7 +973,7 @@ def test_sub_timedelta64_mismatched_reso(self, ts_tz): res = ts + np.timedelta64(1, "ns") exp = ts.as_unit("ns") + np.timedelta64(1, "ns") assert exp == res - assert exp.unit == "ns" + assert exp._creso == NpyDatetimeUnit.NPY_FR_ns.value def test_min(self, ts): assert ts.min <= ts @@ -996,13 +996,13 @@ def test_timestamp_class_min_max_resolution(): # when accessed on the class (as opposed to an instance), we default # to nanoseconds assert Timestamp.min == Timestamp(NaT.value + 1) - assert Timestamp.min.unit == "ns" + assert Timestamp.min._creso == NpyDatetimeUnit.NPY_FR_ns.value assert Timestamp.max == Timestamp(np.iinfo(np.int64).max) - assert Timestamp.max.unit == "ns" + assert Timestamp.max._creso == NpyDatetimeUnit.NPY_FR_ns.value assert Timestamp.resolution == Timedelta(1) - assert Timestamp.resolution.unit == "ns" + assert Timestamp.resolution._creso == NpyDatetimeUnit.NPY_FR_ns.value class TestAsUnit: @@ -1013,7 +1013,7 @@ def test_as_unit(self): res = ts.as_unit("us") assert res.value == ts.value // 1000 - assert res.unit == "us" + assert res._creso == NpyDatetimeUnit.NPY_FR_us.value rt = res.as_unit("ns") assert rt.value == ts.value @@ -1021,7 +1021,7 @@ def test_as_unit(self): res = ts.as_unit("ms") assert res.value == ts.value // 1_000_000 - assert res.unit == "ms" + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value rt = res.as_unit("ns") assert rt.value == ts.value @@ -1029,7 +1029,7 @@ def test_as_unit(self): res = ts.as_unit("s") assert res.value == ts.value // 1_000_000_000 - assert res.unit == "s" + assert res._creso == NpyDatetimeUnit.NPY_FR_s.value rt = res.as_unit("ns") assert rt.value == ts.value @@ -1046,7 +1046,7 @@ def test_as_unit_overflows(self): res = ts.as_unit("ms") assert res.value == us // 1000 - assert res.unit == "ms" + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value def test_as_unit_rounding(self): ts = Timestamp(1_500_000) # i.e. 1500 microseconds @@ -1055,7 +1055,7 @@ def test_as_unit_rounding(self): expected = Timestamp(1_000_000) # i.e. 1 millisecond assert res == expected - assert res.unit == "ms" + assert res._creso == NpyDatetimeUnit.NPY_FR_ms.value assert res.value == 1 with pytest.raises(ValueError, match="Cannot losslessly convert units"): diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 264b39cb49b9a..354a23fc257da 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -22,6 +22,7 @@ ) from pandas._libs.tslibs import timezones +from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas.errors import OutOfBoundsDatetime import pandas.util._test_decorators as td @@ -91,11 +92,11 @@ def test_tz_localize_ambiguous_bool(self, unit): result = ts.tz_localize("US/Central", ambiguous=True) assert result == expected0 - assert result.unit == unit + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = ts.tz_localize("US/Central", ambiguous=False) assert result == expected1 - assert result.unit == unit + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value def test_tz_localize_ambiguous(self): ts = Timestamp("2014-11-02 01:00") @@ -292,7 +293,7 @@ def test_timestamp_tz_localize_nonexistent_shift( assert result == expected.replace(microsecond=0, nanosecond=0) else: assert result == expected - assert result.unit == unit + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value @pytest.mark.parametrize("offset", [-1, 1]) def test_timestamp_tz_localize_nonexistent_shift_invalid(self, offset, warsaw): diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 5a487169ab8f5..1c1f3acc8331f 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -176,14 +176,14 @@ def test_round_dst_border_ambiguous(self, method, unit): # result = getattr(ts, method)("H", ambiguous=True) assert result == ts - assert result.unit == unit + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = getattr(ts, method)("H", ambiguous=False) expected = Timestamp("2017-10-29 01:00:00", tz="UTC").tz_convert( "Europe/Madrid" ) assert result == expected - assert result.unit == unit + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = getattr(ts, method)("H", ambiguous="NaT") assert result is NaT @@ -210,7 +210,7 @@ def test_round_dst_border_nonexistent(self, method, ts_str, freq, unit): result = getattr(ts, method)(freq, nonexistent="shift_forward") expected = Timestamp("2018-03-11 03:00:00", tz="America/Chicago") assert result == expected - assert result.unit == unit + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = getattr(ts, method)(freq, nonexistent="NaT") assert result is NaT @@ -490,7 +490,7 @@ def test_replace_dst_border(self, unit): result = t.replace(hour=3) expected = Timestamp("2013-11-3 03:00:00", tz="America/Chicago") assert result == expected - assert result.unit == unit + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value @pytest.mark.parametrize("fold", [0, 1]) @pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) @@ -504,7 +504,7 @@ def test_replace_dst_fold(self, fold, tz, unit): tz, ambiguous=not fold ) assert result == expected - assert result.unit == unit + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value # -------------------------------------------------------------- # Timestamp.normalize @@ -517,7 +517,7 @@ def test_normalize(self, tz_naive_fixture, arg, unit): result = ts.normalize() expected = Timestamp("2013-11-30", tz=tz) assert result == expected - assert result.unit == unit + assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value def test_normalize_pre_epoch_dates(self): # GH: 36294 From 1210525f1d3743295d1722a97cff4543764e7da4 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 27 Dec 2022 14:19:11 -0800 Subject: [PATCH 08/10] revert copy=False --- pandas/core/apply.py | 2 +- pandas/core/frame.py | 4 +--- pandas/core/groupby/generic.py | 2 +- pandas/core/indexing.py | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 02a9444dd4f97..722de91ba5246 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -955,7 +955,7 @@ def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame: result.index = res_index # infer dtypes - result = result.infer_objects(copy=False) + result = result.infer_objects() return result diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e085bb1c7cec0..21b3a0c033702 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9423,9 +9423,7 @@ def _append( row_df = other.to_frame().T # infer_objects is needed for # test_append_empty_frame_to_series_with_dateutil_tz - other = row_df.infer_objects(copy=False).rename_axis( - index.names, copy=False - ) + other = row_df.infer_objects().rename_axis(index.names, copy=False) elif isinstance(other, list): if not other: pass diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index d85cf7e025413..955f65585963d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1762,7 +1762,7 @@ def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: result = result.T # Note: we really only care about inferring numeric dtypes here - return self._reindex_output(result).infer_objects(copy=False) + return self._reindex_output(result).infer_objects() def _iterate_column_groupbys(self, obj: DataFrame | Series): for i, colname in enumerate(obj.columns): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a49f573e29a89..fa702770a0990 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2162,7 +2162,7 @@ def _setitem_with_indexer_missing(self, indexer, value): if not has_dtype: # i.e. if we already had a Series or ndarray, keep that # dtype. But if we had a list or dict, then do inference - df = df.infer_objects(copy=False) + df = df.infer_objects() self.obj._mgr = df._mgr else: self.obj._mgr = self.obj._append(value)._mgr From 8563aa88823a3467bd31454f98bde928f2edc777 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 27 Dec 2022 17:36:18 -0800 Subject: [PATCH 09/10] troubleshoot --- pandas/core/indexes/multi.py | 2 +- pandas/plotting/_matplotlib/core.py | 2 +- pandas/plotting/_matplotlib/hist.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 4be3d0c17c50d..48cf6000d100d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3379,7 +3379,7 @@ def _reorder_indexer( new_order = np.arange(n)[::-1][indexer] elif isinstance(k, slice) and k.start is None and k.stop is None: # slice(None) should not determine order GH#31330 - new_order = np.ones((n,), dtype=np.intp)[indexer] + new_order = np.ones((n,))[indexer] else: # For all other case, use the same order as the level new_order = np.arange(n)[indexer] diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 1d7f63c1e2f64..3a634a60e784e 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -602,7 +602,7 @@ def _compute_plot_data(self): # GH16953, infer_objects is needed as fallback, for ``Series`` # with ``dtype == object`` - data = data.infer_objects(copy=False) + data = data.infer_objects() include_type = [np.number, "datetime", "datetimetz", "timedelta"] # GH23719, allow plotting boolean diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index aca33d27eaff2..1add485e03760 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -80,7 +80,7 @@ def _args_adjust(self) -> None: def _calculate_bins(self, data: DataFrame) -> np.ndarray: """Calculate bins given data""" - nd_values = data.infer_objects(copy=False)._get_numeric_data() + nd_values = data.infer_objects()._get_numeric_data() values = np.ravel(nd_values) values = values[~isna(values)] From e270de8014316658d2da99d2d0cc3248fc66ed96 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 28 Dec 2022 11:09:02 -0800 Subject: [PATCH 10/10] remove warning filter --- pandas/tests/frame/indexing/test_indexing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 3b151f3d3338a..c0b04b1f8e80f 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1455,7 +1455,6 @@ def test_iloc_ea_series_indexer_with_na(self): with pytest.raises(ValueError, match=msg): df.iloc[:, indexer.values] - @pytest.mark.filterwarnings("ignore:indexing past lexsort.*:PerformanceWarning") @pytest.mark.parametrize("indexer", [True, (True,)]) @pytest.mark.parametrize("dtype", [bool, "boolean"]) def test_loc_bool_multiindex(self, dtype, indexer):