pandas-dev
diff --git a/‎doc/source/reference/testing.rst
+2 b/‎doc/source/reference/testing.rst
+2
diff --git a/‎doc/source/whatsnew/v1.4.4.rst
+1-1 b/‎doc/source/whatsnew/v1.4.4.rst
+1-1
diff --git a/‎doc/source/whatsnew/v1.5.0.rst
+35-25 b/‎doc/source/whatsnew/v1.5.0.rst
+35-25
diff --git a/‎pandas/_libs/tslibs/conversion.pyx
+3-4 b/‎pandas/_libs/tslibs/conversion.pyx
+3-4
diff --git a/‎pandas/_libs/tslibs/np_datetime.pxd
-1 b/‎pandas/_libs/tslibs/np_datetime.pxd
-1
diff --git a/‎pandas/_libs/tslibs/np_datetime.pyx
-8 b/‎pandas/_libs/tslibs/np_datetime.pyx
-8
diff --git a/‎pandas/_libs/tslibs/period.pyx
+2-4 b/‎pandas/_libs/tslibs/period.pyx
+2-4
diff --git a/‎pandas/_libs/tslibs/vectorized.pyx
-1 b/‎pandas/_libs/tslibs/vectorized.pyx
-1
diff --git a/‎pandas/core/algorithms.py
+97-13 b/‎pandas/core/algorithms.py
+97-13
@@ -43,6 +43,8 @@ Exceptions and warnings
    errors.ParserError
    errors.ParserWarning
    errors.PerformanceWarning
+   errors.PyperclipException
+   errors.PyperclipWindowsException
    errors.SettingWithCopyError
    errors.SettingWithCopyWarning
    errors.SpecificationError
 
@@ -23,7 +23,7 @@ Fixed regressions
 
 Bug fixes
 ~~~~~~~~~
--
+- The :class:`errors.FutureWarning` raised when passing arguments (other than ``filepath_or_buffer``) as positional in :func:`read_csv` is now raised at the correct stacklevel (:issue:`47385`)
 -
 
 .. ---------------------------------------------------------------------------
 
@@ -40,7 +40,6 @@ from pandas._libs.tslibs.np_datetime cimport (
     NPY_FR_ns,
     astype_overflowsafe,
     check_dts_bounds,
-    dt64_to_dtstruct,
     dtstruct_to_dt64,
     get_datetime64_unit,
     get_datetime64_value,
@@ -248,7 +247,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
     elif is_datetime64_object(ts):
         obj.value = get_datetime64_nanos(ts)
         if obj.value != NPY_NAT:
-            dt64_to_dtstruct(obj.value, &obj.dts)
+            pandas_datetime_to_datetimestruct(obj.value, NPY_FR_ns, &obj.dts)
     elif is_integer_object(ts):
         try:
             ts = <int64_t>ts
@@ -266,7 +265,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
 
             ts = ts * cast_from_unit(None, unit)
             obj.value = ts
-            dt64_to_dtstruct(ts, &obj.dts)
+            pandas_datetime_to_datetimestruct(ts, NPY_FR_ns, &obj.dts)
     elif is_float_object(ts):
         if ts != ts or ts == NPY_NAT:
             obj.value = NPY_NAT
@@ -289,7 +288,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
 
             ts = cast_from_unit(ts, unit)
             obj.value = ts
-            dt64_to_dtstruct(ts, &obj.dts)
+            pandas_datetime_to_datetimestruct(ts, NPY_FR_ns, &obj.dts)
     elif PyDateTime_Check(ts):
         return convert_datetime_to_tsobject(ts, tz, nanos)
     elif PyDate_Check(ts):
 
@@ -76,7 +76,6 @@ cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1
 cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=?)
 
 cdef int64_t dtstruct_to_dt64(npy_datetimestruct* dts) nogil
-cdef void dt64_to_dtstruct(int64_t dt64, npy_datetimestruct* out) nogil
 
 cdef int64_t pydatetime_to_dt64(datetime val, npy_datetimestruct *dts)
 cdef void pydatetime_to_dtstruct(datetime dt, npy_datetimestruct *dts)
 
@@ -217,14 +217,6 @@ cdef inline int64_t dtstruct_to_dt64(npy_datetimestruct* dts) nogil:
     return npy_datetimestruct_to_datetime(NPY_FR_ns, dts)
 
 
-cdef inline void dt64_to_dtstruct(int64_t dt64,
-                                  npy_datetimestruct* out) nogil:
-    """Convenience function to call pandas_datetime_to_datetimestruct
-    with the by-far-most-common frequency NPY_FR_ns"""
-    pandas_datetime_to_datetimestruct(dt64, NPY_FR_ns, out)
-    return
-
-
 # just exposed for testing at the moment
 def py_td64_to_tdstruct(int64_t td64, NPY_DATETIMEUNIT unit):
     cdef:
 
@@ -49,8 +49,6 @@ from pandas._libs.tslibs.np_datetime cimport (
     NPY_FR_us,
     astype_overflowsafe,
     check_dts_bounds,
-    dt64_to_dtstruct,
-    dtstruct_to_dt64,
     get_timedelta64_value,
     npy_datetimestruct,
     npy_datetimestruct_to_datetime,
@@ -813,7 +811,7 @@ cdef void get_date_info(int64_t ordinal, int freq, npy_datetimestruct *dts) nogi
 
     pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, dts)
 
-    dt64_to_dtstruct(nanos, &dts2)
+    pandas_datetime_to_datetimestruct(nanos, NPY_DATETIMEUNIT.NPY_FR_ns, &dts2)
     dts.hour = dts2.hour
     dts.min = dts2.min
     dts.sec = dts2.sec
@@ -1149,7 +1147,7 @@ cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1:
     get_date_info(ordinal, freq, &dts)
 
     check_dts_bounds(&dts)
-    return dtstruct_to_dt64(&dts)
+    return npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_ns, &dts)
 
 
 cdef str period_format(int64_t value, int freq, object fmt=None):
 
@@ -30,7 +30,6 @@ from .nattype cimport (
 from .np_datetime cimport (
     NPY_DATETIMEUNIT,
     NPY_FR_ns,
-    dt64_to_dtstruct,
     npy_datetimestruct,
     pandas_datetime_to_datetimestruct,
 )
 
@@ -4,6 +4,7 @@
 """
 from __future__ import annotations
 
+import inspect
 import operator
 from textwrap import dedent
 from typing import (
@@ -14,7 +15,7 @@
     cast,
     final,
 )
-from warnings import warn
+import warnings
 
 import numpy as np
 
@@ -57,6 +58,7 @@
     is_numeric_dtype,
     is_object_dtype,
     is_scalar,
+    is_signed_integer_dtype,
     is_timedelta64_dtype,
     needs_i8_conversion,
 )
@@ -446,7 +448,12 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> npt.NDArray[np.bool_]:
         )
 
     if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)):
-        values = _ensure_arraylike(list(values))
+        if not is_signed_integer_dtype(comps):
+            # GH#46485 Use object to avoid upcast to float64 later
+            # TODO: Share with _find_common_type_compat
+            values = construct_1d_object_array_from_listlike(list(values))
+        else:
+            values = _ensure_arraylike(list(values))
     elif isinstance(values, ABCMultiIndex):
         # Avoid raising in extract_array
         values = np.array(values)
@@ -580,7 +587,8 @@ def factorize_array(
 def factorize(
     values,
     sort: bool = False,
-    na_sentinel: int | None = -1,
+    na_sentinel: int | None | lib.NoDefault = lib.no_default,
+    use_na_sentinel: bool | lib.NoDefault = lib.no_default,
     size_hint: int | None = None,
 ) -> tuple[np.ndarray, np.ndarray | Index]:
     """
@@ -598,7 +606,19 @@ def factorize(
         Value to mark "not found". If None, will not drop the NaN
         from the uniques of the values.
 
+        .. deprecated:: 1.5.0
+            The na_sentinel argument is deprecated and
+            will be removed in a future version of pandas. Specify use_na_sentinel as
+            either True or False.
+
         .. versionchanged:: 1.1.2
+
+    use_na_sentinel : bool, default True
+        If True, the sentinel -1 will be used for NaN values. If False,
+        NaN values will be encoded as non-negative integers and will not drop the
+        NaN from the uniques of the values.
+
+        .. versionadded:: 1.5.0
     {size_hint}\
 
     Returns
@@ -646,8 +666,8 @@ def factorize(
     >>> uniques
     array(['a', 'b', 'c'], dtype=object)
 
-    Missing values are indicated in `codes` with `na_sentinel`
-    (``-1`` by default). Note that missing values are never
+    When ``use_na_sentinel=True`` (the default), missing values are indicated in
+    the `codes` with the sentinel value ``-1`` and missing values are not
     included in `uniques`.
 
     >>> codes, uniques = pd.factorize(['b', None, 'a', 'c', 'b'])
@@ -682,16 +702,16 @@ def factorize(
     Index(['a', 'c'], dtype='object')
 
     If NaN is in the values, and we want to include NaN in the uniques of the
-    values, it can be achieved by setting ``na_sentinel=None``.
+    values, it can be achieved by setting ``use_na_sentinel=False``.
 
     >>> values = np.array([1, 2, 1, np.nan])
-    >>> codes, uniques = pd.factorize(values)  # default: na_sentinel=-1
+    >>> codes, uniques = pd.factorize(values)  # default: use_na_sentinel=True
     >>> codes
     array([ 0,  1,  0, -1])
     >>> uniques
     array([1., 2.])
 
-    >>> codes, uniques = pd.factorize(values, na_sentinel=None)
+    >>> codes, uniques = pd.factorize(values, use_na_sentinel=False)
     >>> codes
     array([0, 1, 0, 2])
     >>> uniques
@@ -706,6 +726,7 @@ def factorize(
     # responsible only for factorization. All data coercion, sorting and boxing
     # should happen here.
 
+    na_sentinel = resolve_na_sentinel(na_sentinel, use_na_sentinel)
     if isinstance(values, ABCRangeIndex):
         return values.factorize(sort=sort)
 
@@ -730,9 +751,22 @@ def factorize(
         codes, uniques = values.factorize(sort=sort)
         return _re_wrap_factorize(original, uniques, codes)
 
-    if not isinstance(values.dtype, np.dtype):
-        # i.e. ExtensionDtype
-        codes, uniques = values.factorize(na_sentinel=na_sentinel)
+    elif not isinstance(values.dtype, np.dtype):
+        if (
+            na_sentinel == -1
+            and "use_na_sentinel" in inspect.signature(values.factorize).parameters
+        ):
+            # Avoid using catch_warnings when possible
+            # GH#46910 - TimelikeOps has deprecated signature
+            codes, uniques = values.factorize(  # type: ignore[call-arg]
+                use_na_sentinel=True
+            )
+        else:
+            with warnings.catch_warnings():
+                # We've already warned above
+                warnings.filterwarnings("ignore", ".*use_na_sentinel.*", FutureWarning)
+                codes, uniques = values.factorize(na_sentinel=na_sentinel)
+
     else:
         values = np.asarray(values)  # convert DTA/TDA/MultiIndex
         codes, uniques = factorize_array(
@@ -757,6 +791,56 @@ def factorize(
     return _re_wrap_factorize(original, uniques, codes)
 
 
+def resolve_na_sentinel(
+    na_sentinel: int | None | lib.NoDefault,
+    use_na_sentinel: bool | lib.NoDefault,
+) -> int | None:
+    """
+    Determine value of na_sentinel for factorize methods.
+
+    See GH#46910 for details on the deprecation.
+
+    Parameters
+    ----------
+    na_sentinel : int, None, or lib.no_default
+        Value passed to the method.
+    use_na_sentinel : bool or lib.no_default
+        Value passed to the method.
+
+    Returns
+    -------
+    Resolved value of na_sentinel.
+    """
+    if na_sentinel is not lib.no_default and use_na_sentinel is not lib.no_default:
+        raise ValueError(
+            "Cannot specify both `na_sentinel` and `use_na_sentile`; "
+            f"got `na_sentinel={na_sentinel}` and `use_na_sentinel={use_na_sentinel}`"
+        )
+    if na_sentinel is lib.no_default:
+        result = -1 if use_na_sentinel is lib.no_default or use_na_sentinel else None
+    else:
+        if na_sentinel is None:
+            msg = (
+                "Specifying `na_sentinel=None` is deprecated, specify "
+                "`use_na_sentinel=False` instead."
+            )
+        elif na_sentinel == -1:
+            msg = (
+                "Specifying `na_sentinel=-1` is deprecated, specify "
+                "`use_na_sentinel=True` instead."
+            )
+        else:
+            msg = (
+                "Specifying the specific value to use for `na_sentinel` is "
+                "deprecated and will be removed in a future version of pandas. "
+                "Specify `use_na_sentinel=True` to use the sentinel value -1, and "
+                "`use_na_sentinel=False` to encode NaN values."
+            )
+        warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
+        result = na_sentinel
+    return result
+
+
 def _re_wrap_factorize(original, uniques, codes: np.ndarray):
     """
     Wrap factorize results in Series or Index depending on original type.
@@ -950,7 +1034,7 @@ def mode(
     try:
         npresult = np.sort(npresult)
     except TypeError as err:
-        warn(f"Unable to sort modes: {err}")
+        warnings.warn(f"Unable to sort modes: {err}")
 
     result = _reconstruct_data(npresult, original.dtype, original)
     return result
@@ -1570,7 +1654,7 @@ def diff(arr, n: int, axis: int = 0):
                 raise ValueError(f"cannot diff {type(arr).__name__} on axis={axis}")
             return op(arr, arr.shift(n))
         else:
-            warn(
+            warnings.warn(
                 "dtype lost in 'diff()'. In the future this will raise a "
                 "TypeError. Convert to a suitable dtype prior to calling 'diff'.",
                 FutureWarning,
Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,7 @@ Fixed regressions`
`23`	`23`
`24`	`24`	`Bug fixes`
`25`	`25`	`~~~~~~~~~`
`26`		`--`
	`26`	+- The :class:`errors.FutureWarning` raised when passing arguments (other than ``filepath_or_buffer``) as positional in :func:`read_csv` is now raised at the correct stacklevel (:issue:`47385`)
`27`	`27`	`-`
`28`	`28`
`29`	`29`	`.. ---------------------------------------------------------------------------`
Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,6 @@ from .nattype cimport (`
`30`	`30`	`from .np_datetime cimport (`
`31`	`31`	`NPY_DATETIMEUNIT,`
`32`	`32`	`NPY_FR_ns,`
`33`		`- dt64_to_dtstruct,`
`34`	`33`	`npy_datetimestruct,`
`35`	`34`	`pandas_datetime_to_datetimestruct,`
`36`	`35`	`)`