CLN: assorted (#50644)

jbrockmendel · web-flow · commit 5e95673d6f95 · 2023-01-12T12:35:01.000-08:00
* CLN: assorted

* troubleshoot
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
@@ -546,9 +546,6 @@ cpdef array_to_datetime(
                 else:
                     # coerce
                     # we now need to parse this as if unit='ns'
-                    # we can ONLY accept integers at this point
-                    # if we have previously (or in future accept
-                    # datetimes/strings, then we must coerce)
                     try:
                         iresult[i] = cast_from_unit(val, "ns")
                     except OverflowError:
diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd
@@ -52,7 +52,7 @@ cdef tzinfo convert_timezone(
 )
 
 cdef int64_t parse_pydatetime(
-    object val,
+    datetime val,
     npy_datetimestruct *dts,
     bint utc_convert,
 ) except? -1
diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
@@ -413,8 +413,8 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,
 
     Parameters
     ----------
-    dts: npy_datetimestruct
-    tzoffset: int
+    dts : npy_datetimestruct
+    tzoffset : int
     tz : tzinfo or None
         timezone for the timezone-aware output.
     reso : NPY_DATETIMEUNIT, default NPY_FR_ns
@@ -463,7 +463,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,
     return obj
 
 
-cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
+cdef _TSObject _convert_str_to_tsobject(str ts, tzinfo tz, str unit,
                                         bint dayfirst=False,
                                         bint yearfirst=False):
     """
@@ -499,7 +499,6 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
         NPY_DATETIMEUNIT out_bestunit, reso
 
     if len(ts) == 0 or ts in nat_strings:
-        ts = NaT
         obj = _TSObject()
         obj.value = NPY_NAT
         obj.tzinfo = tz
@@ -727,16 +726,16 @@ cdef tzinfo convert_timezone(
 
 
 cdef int64_t parse_pydatetime(
-        object val,
-        npy_datetimestruct *dts,
-        bint utc_convert,
+    datetime val,
+    npy_datetimestruct *dts,
+    bint utc_convert,
 ) except? -1:
     """
     Convert pydatetime to datetime64.
 
     Parameters
     ----------
-    val
+    val : datetime
         Element being processed.
     dts : *npy_datetimestruct
         Needed to use in pydatetime_to_dt64, which writes to it.
diff --git a/pandas/_libs/tslibs/parsing.pyi b/pandas/_libs/tslibs/parsing.pyi
@@ -11,7 +11,6 @@ def parse_datetime_string(
     date_string: str,
     dayfirst: bool = ...,
     yearfirst: bool = ...,
-    **kwargs,
 ) -> datetime: ...
 def parse_time_string(
     arg: str,
@@ -24,28 +23,17 @@ def quarter_to_myear(year: int, quarter: int, freq: str) -> tuple[int, int]: ...
 def try_parse_dates(
     values: npt.NDArray[np.object_],  # object[:]
     parser,
-    dayfirst: bool = ...,
-    default: datetime | None = ...,
 ) -> npt.NDArray[np.object_]: ...
 def try_parse_year_month_day(
     years: npt.NDArray[np.object_],  # object[:]
     months: npt.NDArray[np.object_],  # object[:]
     days: npt.NDArray[np.object_],  # object[:]
 ) -> npt.NDArray[np.object_]: ...
-def try_parse_datetime_components(
-    years: npt.NDArray[np.object_],  # object[:]
-    months: npt.NDArray[np.object_],  # object[:]
-    days: npt.NDArray[np.object_],  # object[:]
-    hours: npt.NDArray[np.object_],  # object[:]
-    minutes: npt.NDArray[np.object_],  # object[:]
-    seconds: npt.NDArray[np.object_],  # object[:]
-) -> npt.NDArray[np.object_]: ...
 def guess_datetime_format(
     dt_str,
     dayfirst: bool | None = ...,
 ) -> str | None: ...
 def concat_date_cols(
     date_cols: tuple,
-    keep_trivial_numbers: bool = ...,
 ) -> npt.NDArray[np.object_]: ...
 def get_rule_month(source: str) -> str: ...
diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
@@ -209,7 +209,7 @@ cdef object _parse_delimited_date(str date_string, bint dayfirst):
     raise DateParseError(f"Invalid date specified ({month}/{day})")
 
 
-cdef bint does_string_look_like_time(str parse_string):
+cdef bint _does_string_look_like_time(str parse_string):
     """
     Checks whether given string is a time: it has to start either from
     H:MM or from HH:MM, and hour and minute values must be valid.
@@ -249,7 +249,6 @@ def parse_datetime_string(
     str date_string,
     bint dayfirst=False,
     bint yearfirst=False,
-    **kwargs,
 ) -> datetime:
     """
     Parse datetime string, only returns datetime.
@@ -266,10 +265,10 @@ def parse_datetime_string(
     if not _does_string_look_like_datetime(date_string):
         raise ValueError(f'Given date string "{date_string}" not likely a datetime')
 
-    if does_string_look_like_time(date_string):
+    if _does_string_look_like_time(date_string):
         # use current datetime as default, not pass _DEFAULT_DATETIME
         dt = du_parse(date_string, dayfirst=dayfirst,
-                      yearfirst=yearfirst, **kwargs)
+                      yearfirst=yearfirst)
         return dt
 
     dt, _ = _parse_delimited_date(date_string, dayfirst)
@@ -294,7 +293,7 @@ def parse_datetime_string(
 
     try:
         dt = du_parse(date_string, default=_DEFAULT_DATETIME,
-                      dayfirst=dayfirst, yearfirst=yearfirst, **kwargs)
+                      dayfirst=dayfirst, yearfirst=yearfirst)
     except TypeError:
         # following may be raised from dateutil
         # TypeError: 'NoneType' object is not iterable
@@ -667,9 +666,7 @@ cdef dateutil_parse(
 # Parsing for type-inference
 
 
-def try_parse_dates(
-    object[:] values, parser, bint dayfirst=False, default=None,
-) -> np.ndarray:
+def try_parse_dates(object[:] values, parser) -> np.ndarray:
     cdef:
         Py_ssize_t i, n
         object[::1] result
@@ -705,47 +702,6 @@ def try_parse_year_month_day(
     return result.base  # .base to access underlying ndarray
 
 
-def try_parse_datetime_components(object[:] years,
-                                  object[:] months,
-                                  object[:] days,
-                                  object[:] hours,
-                                  object[:] minutes,
-                                  object[:] seconds) -> np.ndarray:
-
-    cdef:
-        Py_ssize_t i, n
-        object[::1] result
-        int secs
-        double float_secs
-        double micros
-
-    n = len(years)
-    # TODO(cython3): Use len instead of `shape[0]`
-    if (
-        months.shape[0] != n
-        or days.shape[0] != n
-        or hours.shape[0] != n
-        or minutes.shape[0] != n
-        or seconds.shape[0] != n
-    ):
-        raise ValueError("Length of all datetime components must be equal")
-    result = np.empty(n, dtype="O")
-
-    for i in range(n):
-        float_secs = float(seconds[i])
-        secs = int(float_secs)
-
-        micros = float_secs - secs
-        if micros > 0:
-            micros = micros * 1000000
-
-        result[i] = datetime(int(years[i]), int(months[i]), int(days[i]),
-                             int(hours[i]), int(minutes[i]), secs,
-                             int(micros))
-
-    return result.base  # .base to access underlying ndarray
-
-
 # ----------------------------------------------------------------------
 # Miscellaneous
 
@@ -1001,6 +957,7 @@ cdef str _fill_token(token: str, padding: int):
         token_filled = f"{seconds}.{nanoseconds}"
     return token_filled
 
+
 cdef void _maybe_warn_about_dayfirst(format: str, bint dayfirst):
     """Warn if guessed datetime format doesn't respect dayfirst argument."""
     cdef:
@@ -1062,16 +1019,13 @@ cdef object convert_to_unicode(object item, bint keep_trivial_numbers):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True) -> np.ndarray:
+def concat_date_cols(tuple date_cols) -> np.ndarray:
     """
     Concatenates elements from numpy arrays in `date_cols` into strings.
 
     Parameters
     ----------
     date_cols : tuple[ndarray]
-    keep_trivial_numbers : bool, default True
-        if True and len(date_cols) == 1, then
-        conversion (to string from integer/float zero) is not performed
 
     Returns
     -------
@@ -1110,8 +1064,7 @@ def concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True) -> np.ndar
         it = <flatiter>PyArray_IterNew(array)
         for row_idx in range(rows_count):
             item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
-            result_view[row_idx] = convert_to_unicode(item,
-                                                      keep_trivial_numbers)
+            result_view[row_idx] = convert_to_unicode(item, True)
             PyArray_ITER_NEXT(it)
     else:
         # create fixed size list - more efficient memory allocation
diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
@@ -764,7 +764,7 @@ cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) nogil:
 
     Parameters
     ----------
-    dts: npy_datetimestruct*
+    dts : npy_datetimestruct*
     freq : int
 
     Returns
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -955,7 +955,7 @@ def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame:
         result.index = res_index
 
         # infer dtypes
-        result = result.infer_objects()
+        result = result.infer_objects(copy=False)
 
         return result
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -471,10 +471,6 @@ def _internal_fill_value(self) -> int:
         dtype = self._ndarray.dtype
         return dtype.type(-1)
 
-    @property
-    def _constructor(self) -> type[Categorical]:
-        return Categorical
-
     @classmethod
     def _from_sequence(
         cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -1256,7 +1256,7 @@ def _sub_periodlike(self, other: Period | PeriodArray) -> npt.NDArray[np.object_
         return new_data
 
     @final
-    def _addsub_object_array(self, other: np.ndarray, op):
+    def _addsub_object_array(self, other: npt.NDArray[np.object_], op):
         """
         Add or subtract array-like of DateOffset objects
 
@@ -1267,10 +1267,14 @@ def _addsub_object_array(self, other: np.ndarray, op):
 
         Returns
         -------
-        result : same class as self
+        np.ndarray[object]
+            Except in fastpath case with length 1 where we operate on the
+            contained scalar.
         """
         assert op in [operator.add, operator.sub]
         if len(other) == 1 and self.ndim == 1:
+            # Note: without this special case, we could annotate return type
+            #  as ndarray[object]
             # If both 1D then broadcasting is unambiguous
             return op(self, other[0])
 
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -793,8 +793,6 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj,
         if val is NaT or val.tz is None:  # type: ignore[comparison-overlap]
             val = val.to_datetime64()
             dtype = val.dtype
-            # TODO(2.0): this should be dtype = val.dtype
-            #  to get the correct M8 resolution
             # TODO: test with datetime(2920, 10, 1) based on test_replace_dtypes
         else:
             if pandas_dtype:
@@ -1677,8 +1675,8 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
                 arr._validate_setitem_value(element)
                 return True
             except (ValueError, TypeError):
-                # TODO(2.0): stop catching ValueError for tzaware, see
-                #  _catch_deprecated_value_error
+                # TODO: re-use _catch_deprecated_value_error to ensure we are
+                #  strict about what exceptions we allow through here.
                 return False
 
         # This is technically incorrect, but maintains the behavior of
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -9436,7 +9436,9 @@ def _append(
             row_df = other.to_frame().T
             # infer_objects is needed for
             #  test_append_empty_frame_to_series_with_dateutil_tz
-            other = row_df.infer_objects().rename_axis(index.names, copy=False)
+            other = row_df.infer_objects(copy=False).rename_axis(
+                index.names, copy=False
+            )
         elif isinstance(other, list):
             if not other:
                 pass
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -1836,7 +1836,7 @@ def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
             result = result.T
 
         # Note: we really only care about inferring numeric dtypes here
-        return self._reindex_output(result).infer_objects()
+        return self._reindex_output(result).infer_objects(copy=False)
 
     def _iterate_column_groupbys(self, obj: DataFrame | Series):
         for i, colname in enumerate(obj.columns):
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -3385,7 +3385,7 @@ def _reorder_indexer(
                 new_order = np.arange(n)[::-1][indexer]
             elif isinstance(k, slice) and k.start is None and k.stop is None:
                 # slice(None) should not determine order GH#31330
-                new_order = np.ones((n,))[indexer]
+                new_order = np.ones((n,), dtype=np.intp)[indexer]
             else:
                 # For all other case, use the same order as the level
                 new_order = np.arange(n)[indexer]
diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
@@ -231,13 +231,11 @@ def _should_fallback_to_positional(self) -> bool:
 
     @doc(Index._convert_slice_indexer)
     def _convert_slice_indexer(self, key: slice, kind: str):
-        # TODO(2.0): once #45324 deprecation is enforced we should be able
+        # TODO(GH#50617): once Series.__[gs]etitem__ is removed we should be able
         #  to simplify this.
         if is_float_dtype(self.dtype):
             assert kind in ["loc", "getitem"]
 
-            # TODO: can we write this as a condition based on
-            #  e.g. _should_fallback_to_positional?
             # We always treat __getitem__ slicing as label-based
             # translate to locations
             return self.slice_indexer(key.start, key.stop, key.step)
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -2166,7 +2166,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
                 if not has_dtype:
                     # i.e. if we already had a Series or ndarray, keep that
                     #  dtype.  But if we had a list or dict, then do inference
-                    df = df.infer_objects()
+                    df = df.infer_objects(copy=False)
                 self.obj._mgr = df._mgr
             else:
                 self.obj._mgr = self.obj._append(value)._mgr
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -1117,7 +1117,6 @@ def converter(*date_cols):
                     parsing.try_parse_dates(
                         parsing.concat_date_cols(date_cols),
                         parser=date_parser,
-                        dayfirst=dayfirst,
                     ),
                     errors="ignore",
                 )
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
@@ -603,7 +603,7 @@ def _compute_plot_data(self):
 
         # GH16953, infer_objects is needed as fallback, for ``Series``
         # with ``dtype == object``
-        data = data.infer_objects()
+        data = data.infer_objects(copy=False)
         include_type = [np.number, "datetime", "datetimetz", "timedelta"]
 
         # GH23719, allow plotting boolean
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
@@ -80,7 +80,7 @@ def _args_adjust(self) -> None:
 
     def _calculate_bins(self, data: DataFrame) -> np.ndarray:
         """Calculate bins given data"""
-        nd_values = data.infer_objects()._get_numeric_data()
+        nd_values = data.infer_objects(copy=False)._get_numeric_data()
         values = np.ravel(nd_values)
         values = values[~isna(values)]
 
diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py
diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py
diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
diff --git a/pandas/tests/indexes/timedeltas/test_constructors.py b/pandas/tests/indexes/timedeltas/test_constructors.py
diff --git a/pandas/tests/indexes/timedeltas/test_scalar_compat.py b/pandas/tests/indexes/timedeltas/test_scalar_compat.py
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py

Original file line number	Diff line number	Diff line change
`@@ -52,7 +52,7 @@ cdef tzinfo convert_timezone(`
`52`	`52`	`)`
`53`	`53`
`54`	`54`	`cdef int64_t parse_pydatetime(`
`55`		`- object val,`
	`55`	`+ datetime val,`
`56`	`56`	`npy_datetimestruct *dts,`
`57`	`57`	`bint utc_convert,`
`58`	`58`	`) except? -1`
Original file line number	Diff line number	Diff line change
`@@ -1117,7 +1117,6 @@ def converter(*date_cols):`
`1117`	`1117`	`parsing.try_parse_dates(`
`1118`	`1118`	`parsing.concat_date_cols(date_cols),`
`1119`	`1119`	`parser=date_parser,`
`1120`		`- dayfirst=dayfirst,`
`1121`	`1120`	`),`
`1122`	`1121`	`errors="ignore",`
`1123`	`1122`	`)`