diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index e727684b9d8c5..10bcf6c9eabbf 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -546,9 +546,6 @@ cpdef array_to_datetime( else: # coerce # we now need to parse this as if unit='ns' - # we can ONLY accept integers at this point - # if we have previously (or in future accept - # datetimes/strings, then we must coerce) try: iresult[i] = cast_from_unit(val, "ns") except OverflowError: diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index dfb8b2009f0ec..332ff1522ccf5 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -52,7 +52,7 @@ cdef tzinfo convert_timezone( ) cdef int64_t parse_pydatetime( - object val, + datetime val, npy_datetimestruct *dts, bint utc_convert, ) except? -1 diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 33815aee8b795..7cff269d2191e 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -413,8 +413,8 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, Parameters ---------- - dts: npy_datetimestruct - tzoffset: int + dts : npy_datetimestruct + tzoffset : int tz : tzinfo or None timezone for the timezone-aware output. reso : NPY_DATETIMEUNIT, default NPY_FR_ns @@ -463,7 +463,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, return obj -cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit, +cdef _TSObject _convert_str_to_tsobject(str ts, tzinfo tz, str unit, bint dayfirst=False, bint yearfirst=False): """ @@ -499,7 +499,6 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit, NPY_DATETIMEUNIT out_bestunit, reso if len(ts) == 0 or ts in nat_strings: - ts = NaT obj = _TSObject() obj.value = NPY_NAT obj.tzinfo = tz @@ -727,16 +726,16 @@ cdef tzinfo convert_timezone( cdef int64_t parse_pydatetime( - object val, - npy_datetimestruct *dts, - bint utc_convert, + datetime val, + npy_datetimestruct *dts, + bint utc_convert, ) except? -1: """ Convert pydatetime to datetime64. Parameters ---------- - val + val : datetime Element being processed. dts : *npy_datetimestruct Needed to use in pydatetime_to_dt64, which writes to it. diff --git a/pandas/_libs/tslibs/parsing.pyi b/pandas/_libs/tslibs/parsing.pyi index 2e666249a76fc..a4440ffff5be9 100644 --- a/pandas/_libs/tslibs/parsing.pyi +++ b/pandas/_libs/tslibs/parsing.pyi @@ -11,7 +11,6 @@ def parse_datetime_string( date_string: str, dayfirst: bool = ..., yearfirst: bool = ..., - **kwargs, ) -> datetime: ... def parse_time_string( arg: str, @@ -24,28 +23,17 @@ def quarter_to_myear(year: int, quarter: int, freq: str) -> tuple[int, int]: ... def try_parse_dates( values: npt.NDArray[np.object_], # object[:] parser, - dayfirst: bool = ..., - default: datetime | None = ..., ) -> npt.NDArray[np.object_]: ... def try_parse_year_month_day( years: npt.NDArray[np.object_], # object[:] months: npt.NDArray[np.object_], # object[:] days: npt.NDArray[np.object_], # object[:] ) -> npt.NDArray[np.object_]: ... -def try_parse_datetime_components( - years: npt.NDArray[np.object_], # object[:] - months: npt.NDArray[np.object_], # object[:] - days: npt.NDArray[np.object_], # object[:] - hours: npt.NDArray[np.object_], # object[:] - minutes: npt.NDArray[np.object_], # object[:] - seconds: npt.NDArray[np.object_], # object[:] -) -> npt.NDArray[np.object_]: ... def guess_datetime_format( dt_str, dayfirst: bool | None = ..., ) -> str | None: ... def concat_date_cols( date_cols: tuple, - keep_trivial_numbers: bool = ..., ) -> npt.NDArray[np.object_]: ... def get_rule_month(source: str) -> str: ... diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 0f6640a90d7ed..dabeab3e30f4d 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -209,7 +209,7 @@ cdef object _parse_delimited_date(str date_string, bint dayfirst): raise DateParseError(f"Invalid date specified ({month}/{day})") -cdef bint does_string_look_like_time(str parse_string): +cdef bint _does_string_look_like_time(str parse_string): """ Checks whether given string is a time: it has to start either from H:MM or from HH:MM, and hour and minute values must be valid. @@ -249,7 +249,6 @@ def parse_datetime_string( str date_string, bint dayfirst=False, bint yearfirst=False, - **kwargs, ) -> datetime: """ Parse datetime string, only returns datetime. @@ -266,10 +265,10 @@ def parse_datetime_string( if not _does_string_look_like_datetime(date_string): raise ValueError(f'Given date string "{date_string}" not likely a datetime') - if does_string_look_like_time(date_string): + if _does_string_look_like_time(date_string): # use current datetime as default, not pass _DEFAULT_DATETIME dt = du_parse(date_string, dayfirst=dayfirst, - yearfirst=yearfirst, **kwargs) + yearfirst=yearfirst) return dt dt, _ = _parse_delimited_date(date_string, dayfirst) @@ -294,7 +293,7 @@ def parse_datetime_string( try: dt = du_parse(date_string, default=_DEFAULT_DATETIME, - dayfirst=dayfirst, yearfirst=yearfirst, **kwargs) + dayfirst=dayfirst, yearfirst=yearfirst) except TypeError: # following may be raised from dateutil # TypeError: 'NoneType' object is not iterable @@ -667,9 +666,7 @@ cdef dateutil_parse( # Parsing for type-inference -def try_parse_dates( - object[:] values, parser, bint dayfirst=False, default=None, -) -> np.ndarray: +def try_parse_dates(object[:] values, parser) -> np.ndarray: cdef: Py_ssize_t i, n object[::1] result @@ -705,47 +702,6 @@ def try_parse_year_month_day( return result.base # .base to access underlying ndarray -def try_parse_datetime_components(object[:] years, - object[:] months, - object[:] days, - object[:] hours, - object[:] minutes, - object[:] seconds) -> np.ndarray: - - cdef: - Py_ssize_t i, n - object[::1] result - int secs - double float_secs - double micros - - n = len(years) - # TODO(cython3): Use len instead of `shape[0]` - if ( - months.shape[0] != n - or days.shape[0] != n - or hours.shape[0] != n - or minutes.shape[0] != n - or seconds.shape[0] != n - ): - raise ValueError("Length of all datetime components must be equal") - result = np.empty(n, dtype="O") - - for i in range(n): - float_secs = float(seconds[i]) - secs = int(float_secs) - - micros = float_secs - secs - if micros > 0: - micros = micros * 1000000 - - result[i] = datetime(int(years[i]), int(months[i]), int(days[i]), - int(hours[i]), int(minutes[i]), secs, - int(micros)) - - return result.base # .base to access underlying ndarray - - # ---------------------------------------------------------------------- # Miscellaneous @@ -1001,6 +957,7 @@ cdef str _fill_token(token: str, padding: int): token_filled = f"{seconds}.{nanoseconds}" return token_filled + cdef void _maybe_warn_about_dayfirst(format: str, bint dayfirst): """Warn if guessed datetime format doesn't respect dayfirst argument.""" cdef: @@ -1062,16 +1019,13 @@ cdef object convert_to_unicode(object item, bint keep_trivial_numbers): @cython.wraparound(False) @cython.boundscheck(False) -def concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True) -> np.ndarray: +def concat_date_cols(tuple date_cols) -> np.ndarray: """ Concatenates elements from numpy arrays in `date_cols` into strings. Parameters ---------- date_cols : tuple[ndarray] - keep_trivial_numbers : bool, default True - if True and len(date_cols) == 1, then - conversion (to string from integer/float zero) is not performed Returns ------- @@ -1110,8 +1064,7 @@ def concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True) -> np.ndar it = PyArray_IterNew(array) for row_idx in range(rows_count): item = PyArray_GETITEM(array, PyArray_ITER_DATA(it)) - result_view[row_idx] = convert_to_unicode(item, - keep_trivial_numbers) + result_view[row_idx] = convert_to_unicode(item, True) PyArray_ITER_NEXT(it) else: # create fixed size list - more efficient memory allocation diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 8955fb678d075..f93afc0d1c3f2 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -764,7 +764,7 @@ cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) nogil: Parameters ---------- - dts: npy_datetimestruct* + dts : npy_datetimestruct* freq : int Returns diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 722de91ba5246..02a9444dd4f97 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -955,7 +955,7 @@ def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame: result.index = res_index # infer dtypes - result = result.infer_objects() + result = result.infer_objects(copy=False) return result diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index abc3d9e8ce9db..5dc654aebd5ee 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -471,10 +471,6 @@ def _internal_fill_value(self) -> int: dtype = self._ndarray.dtype return dtype.type(-1) - @property - def _constructor(self) -> type[Categorical]: - return Categorical - @classmethod def _from_sequence( cls, scalars, *, dtype: Dtype | None = None, copy: bool = False diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 1e829cc711e99..ee7a3533a40f1 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1286,7 +1286,7 @@ def _sub_periodlike(self, other: Period | PeriodArray) -> npt.NDArray[np.object_ return new_data @final - def _addsub_object_array(self, other: np.ndarray, op): + def _addsub_object_array(self, other: npt.NDArray[np.object_], op): """ Add or subtract array-like of DateOffset objects @@ -1297,10 +1297,14 @@ def _addsub_object_array(self, other: np.ndarray, op): Returns ------- - result : same class as self + np.ndarray[object] + Except in fastpath case with length 1 where we operate on the + contained scalar. """ assert op in [operator.add, operator.sub] if len(other) == 1 and self.ndim == 1: + # Note: without this special case, we could annotate return type + # as ndarray[object] # If both 1D then broadcasting is unambiguous return op(self, other[0]) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b3f2426256ccf..dcc40c11ec778 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -793,8 +793,6 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj, if val is NaT or val.tz is None: # type: ignore[comparison-overlap] val = val.to_datetime64() dtype = val.dtype - # TODO(2.0): this should be dtype = val.dtype - # to get the correct M8 resolution # TODO: test with datetime(2920, 10, 1) based on test_replace_dtypes else: if pandas_dtype: @@ -1677,8 +1675,8 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: arr._validate_setitem_value(element) return True except (ValueError, TypeError): - # TODO(2.0): stop catching ValueError for tzaware, see - # _catch_deprecated_value_error + # TODO: re-use _catch_deprecated_value_error to ensure we are + # strict about what exceptions we allow through here. return False # This is technically incorrect, but maintains the behavior of diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6491081c54592..5ef34e106ed72 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9436,7 +9436,9 @@ def _append( row_df = other.to_frame().T # infer_objects is needed for # test_append_empty_frame_to_series_with_dateutil_tz - other = row_df.infer_objects().rename_axis(index.names, copy=False) + other = row_df.infer_objects(copy=False).rename_axis( + index.names, copy=False + ) elif isinstance(other, list): if not other: pass diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 15e11aea4b65b..7b10b0f89dd5c 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1833,7 +1833,7 @@ def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: result = result.T # Note: we really only care about inferring numeric dtypes here - return self._reindex_output(result).infer_objects() + return self._reindex_output(result).infer_objects(copy=False) def _iterate_column_groupbys(self, obj: DataFrame | Series): for i, colname in enumerate(obj.columns): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 38de00584e628..f7fb6799d2376 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3385,7 +3385,7 @@ def _reorder_indexer( new_order = np.arange(n)[::-1][indexer] elif isinstance(k, slice) and k.start is None and k.stop is None: # slice(None) should not determine order GH#31330 - new_order = np.ones((n,))[indexer] + new_order = np.ones((n,), dtype=np.intp)[indexer] else: # For all other case, use the same order as the level new_order = np.arange(n)[indexer] diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 7fec60babea00..af3ff54bb9e2b 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -231,13 +231,11 @@ def _should_fallback_to_positional(self) -> bool: @doc(Index._convert_slice_indexer) def _convert_slice_indexer(self, key: slice, kind: str): - # TODO(2.0): once #45324 deprecation is enforced we should be able + # TODO(GH#50617): once Series.__[gs]etitem__ is removed we should be able # to simplify this. if is_float_dtype(self.dtype): assert kind in ["loc", "getitem"] - # TODO: can we write this as a condition based on - # e.g. _should_fallback_to_positional? # We always treat __getitem__ slicing as label-based # translate to locations return self.slice_indexer(key.start, key.stop, key.step) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 1b32c09bf4a25..636e376197ef1 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2166,7 +2166,7 @@ def _setitem_with_indexer_missing(self, indexer, value): if not has_dtype: # i.e. if we already had a Series or ndarray, keep that # dtype. But if we had a list or dict, then do inference - df = df.infer_objects() + df = df.infer_objects(copy=False) self.obj._mgr = df._mgr else: self.obj._mgr = self.obj._append(value)._mgr diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 742a988526cd0..6272f213ccef1 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -1117,7 +1117,6 @@ def converter(*date_cols): parsing.try_parse_dates( parsing.concat_date_cols(date_cols), parser=date_parser, - dayfirst=dayfirst, ), errors="ignore", ) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 3a634a60e784e..1d7f63c1e2f64 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -602,7 +602,7 @@ def _compute_plot_data(self): # GH16953, infer_objects is needed as fallback, for ``Series`` # with ``dtype == object`` - data = data.infer_objects() + data = data.infer_objects(copy=False) include_type = [np.number, "datetime", "datetimetz", "timedelta"] # GH23719, allow plotting boolean diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 956390f739481..d20f69cc0a8de 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -80,7 +80,7 @@ def _args_adjust(self) -> None: def _calculate_bins(self, data: DataFrame) -> np.ndarray: """Calculate bins given data""" - nd_values = data.infer_objects()._get_numeric_data() + nd_values = data.infer_objects(copy=False)._get_numeric_data() values = np.ravel(nd_values) values = values[~isna(values)] diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 93e1bcc113765..726eb8994a598 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -880,13 +880,9 @@ def test_quantile_ea_with_na(self, obj, index): expected = type(obj)(expected) tm.assert_equal(result, expected) - # TODO(GH#39763): filtering can be removed after GH#39763 is fixed - @pytest.mark.filterwarnings("ignore:Using .astype to convert:FutureWarning") def test_quantile_ea_all_na(self, request, obj, index): obj.iloc[:] = index._na_value - - # TODO(ArrayManager): this casting should be unnecessary after GH#39763 is fixed - obj = obj.astype(index.dtype) + # Check dtypes were preserved; this was once a problem see GH#39763 assert np.all(obj.dtypes == index.dtype) # result should be invariant to shuffling diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index d7f1d900db052..0232cb34ef200 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -18,7 +18,6 @@ class TestDataFrameSortValues: @pytest.mark.parametrize("dtype", [np.uint8, bool]) def test_sort_values_sparse_no_warning(self, dtype): # GH#45618 - # TODO(2.0): test will be unnecessary ser = pd.Series(Categorical(["a", "b", "a"], categories=["a", "b", "c"])) df = pd.get_dummies(ser, dtype=dtype, sparse=True) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 2fb95942b08db..d10b45eb7d270 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -610,8 +610,10 @@ def test_map_dictlike(self, mapper, simple_index): idx = simple_index if isinstance(idx, CategoricalIndex): - # TODO(2.0): see if we can avoid skipping once - # CategoricalIndex.reindex is removed. + # FIXME: this fails with CategoricalIndex bc it goes through + # Categorical.map which ends up calling get_indexer with + # non-unique values, which raises. This _should_ work fine for + # CategoricalIndex. pytest.skip(f"skipping tests for {type(idx)}") identity = mapper(idx.values, idx) diff --git a/pandas/tests/indexes/timedeltas/test_constructors.py b/pandas/tests/indexes/timedeltas/test_constructors.py index 73b742591cd10..4e817ee708614 100644 --- a/pandas/tests/indexes/timedeltas/test_constructors.py +++ b/pandas/tests/indexes/timedeltas/test_constructors.py @@ -154,17 +154,6 @@ def test_constructor(self): ) tm.assert_index_equal(result, expected) - # unicode - result = TimedeltaIndex( - [ - "1 days", - "1 days, 00:00:05", - np.timedelta64(2, "D"), - timedelta(days=2, seconds=2), - pd.offsets.Second(3), - ] - ) - expected = TimedeltaIndex( ["0 days 00:00:00", "0 days 00:00:01", "0 days 00:00:02"] ) diff --git a/pandas/tests/indexes/timedeltas/test_scalar_compat.py b/pandas/tests/indexes/timedeltas/test_scalar_compat.py index 5e4b228ba2d32..16459f00dac3b 100644 --- a/pandas/tests/indexes/timedeltas/test_scalar_compat.py +++ b/pandas/tests/indexes/timedeltas/test_scalar_compat.py @@ -41,11 +41,12 @@ def test_tdi_total_seconds(self): ) tm.assert_series_equal(ser.dt.total_seconds(), s_expt) + def test_tdi_total_seconds_all_nat(self): # with both nat ser = Series([np.nan, np.nan], dtype="timedelta64[ns]") - tm.assert_series_equal( - ser.dt.total_seconds(), Series([np.nan, np.nan], index=[0, 1]) - ) + result = ser.dt.total_seconds() + expected = Series([np.nan, np.nan]) + tm.assert_series_equal(result, expected) def test_tdi_round(self): td = timedelta_range(start="16801 days", periods=5, freq="30Min") diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index bdf7c2c9e3ff7..d1de676a4eb2e 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -964,21 +964,22 @@ def test_url(self, field, dtype): def test_timedelta(self): converter = lambda x: pd.to_timedelta(x, unit="ms") - s = Series([timedelta(23), timedelta(seconds=5)]) - assert s.dtype == "timedelta64[ns]" + ser = Series([timedelta(23), timedelta(seconds=5)]) + assert ser.dtype == "timedelta64[ns]" - result = read_json(s.to_json(), typ="series").apply(converter) - tm.assert_series_equal(result, s) + result = read_json(ser.to_json(), typ="series").apply(converter) + tm.assert_series_equal(result, ser) - s = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1])) - assert s.dtype == "timedelta64[ns]" - result = read_json(s.to_json(), typ="series").apply(converter) - tm.assert_series_equal(result, s) + ser = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1])) + assert ser.dtype == "timedelta64[ns]" + result = read_json(ser.to_json(), typ="series").apply(converter) + tm.assert_series_equal(result, ser) frame = DataFrame([timedelta(23), timedelta(seconds=5)]) assert frame[0].dtype == "timedelta64[ns]" tm.assert_frame_equal(frame, read_json(frame.to_json()).apply(converter)) + def test_timedelta2(self): frame = DataFrame( { "a": [timedelta(days=23), timedelta(seconds=5)], diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 6cbe3833cbca0..c3a989cee7b02 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -278,9 +278,7 @@ def test_guess_datetime_format_no_padding(string, fmt, dayfirst, warning): def test_try_parse_dates(): arr = np.array(["5/1/2000", "6/1/2000", "7/1/2000"], dtype=object) - result = parsing.try_parse_dates( - arr, dayfirst=True, parser=lambda x: du_parse(x, dayfirst=True) - ) + result = parsing.try_parse_dates(arr, parser=lambda x: du_parse(x, dayfirst=True)) expected = np.array([du_parse(d, dayfirst=True) for d in arr]) tm.assert_numpy_array_equal(result, expected)