AlexKirko
diff --git a/‎.pre-commit-config.yaml
+3-11 b/‎.pre-commit-config.yaml
+3-11
diff --git a/‎asv_bench/benchmarks/arithmetic.py
+4 b/‎asv_bench/benchmarks/arithmetic.py
+4
diff --git a/‎doc/source/_static/reshaping_pivot.png
5.17 KB b/‎doc/source/_static/reshaping_pivot.png
5.17 KB
diff --git a/‎doc/source/getting_started/index.rst
+1-1 b/‎doc/source/getting_started/index.rst
+1-1
diff --git a/‎doc/source/user_guide/advanced.rst
+1-1 b/‎doc/source/user_guide/advanced.rst
+1-1
diff --git a/‎doc/source/user_guide/groupby.rst
+1-1 b/‎doc/source/user_guide/groupby.rst
+1-1
diff --git a/‎doc/source/user_guide/reshaping.rst
+1-1 b/‎doc/source/user_guide/reshaping.rst
+1-1
diff --git a/‎doc/source/user_guide/timeseries.rst
+5-1 b/‎doc/source/user_guide/timeseries.rst
+5-1
diff --git a/‎doc/source/whatsnew/v2.0.0.rst
+1 b/‎doc/source/whatsnew/v2.0.0.rst
+1
diff --git a/‎doc/source/whatsnew/v2.1.0.rst
+2 b/‎doc/source/whatsnew/v2.1.0.rst
+2
diff --git a/‎pandas/_config/config.py
+1-1 b/‎pandas/_config/config.py
+1-1
diff --git a/‎pandas/_libs/parsers.pyx
+14-20 b/‎pandas/_libs/parsers.pyx
+14-20
diff --git a/‎pandas/_libs/tslibs/timedeltas.pyx
+25-2 b/‎pandas/_libs/tslibs/timedeltas.pyx
+25-2
diff --git a/‎pandas/_testing/_random.py
+3-1 b/‎pandas/_testing/_random.py
+3-1
diff --git a/‎pandas/_testing/asserters.py
+10-5 b/‎pandas/_testing/asserters.py
+10-5
diff --git a/‎pandas/_testing/contexts.py
+1-1 b/‎pandas/_testing/contexts.py
+1-1
diff --git a/‎pandas/compat/numpy/function.py
+1-1 b/‎pandas/compat/numpy/function.py
+1-1
@@ -28,7 +28,7 @@ repos:
         types_or: [python, pyi]
         additional_dependencies: [black==23.1.0]
 -   repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.255
+    rev: v0.0.259
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
@@ -392,14 +392,6 @@ repos:
         files: ^pandas/
         exclude: ^(pandas/_libs/|pandas/tests/|pandas/errors/__init__.py$|pandas/_version.py)
         types: [python]
-    -   id: flake8-pyi
-        name: flake8-pyi
-        entry: flake8 --extend-ignore=E301,E302,E305,E701,E704
-        types: [pyi]
-        language: python
-        additional_dependencies:
-        - flake8==5.0.4
-        - flake8-pyi==22.8.1
     -   id: future-annotations
         name: import annotations from __future__
         entry: 'from __future__ import annotations'
@@ -421,8 +413,8 @@ repos:
         language: python
         stages: [manual]
         additional_dependencies:
-        - autotyping==22.9.0
-        - libcst==0.4.7
+        - autotyping==23.3.0
+        - libcst==0.4.9
     -   id: check-test-naming
         name: check that test names start with 'test'
         entry: python -m scripts.check_test_naming
 
@@ -266,10 +266,14 @@ def setup(self, tz):
         self.ts = self.s[halfway]
 
         self.s2 = Series(date_range("20010101", periods=N, freq="s", tz=tz))
+        self.ts_different_reso = Timestamp("2001-01-02", tz=tz)
 
     def time_series_timestamp_compare(self, tz):
         self.s <= self.ts
 
+    def time_series_timestamp_different_reso_compare(self, tz):
+        self.s <= self.ts_different_reso
+
     def time_timestamp_series_compare(self, tz):
         self.ts >= self.s
 
 
@@ -533,7 +533,7 @@ Data sets do not only contain numerical data. pandas provides a wide range of fu
 Coming from...
 --------------
 
-Are you familiar with other software for manipulating tablular data? Learn
+Are you familiar with other software for manipulating tabular data? Learn
 the pandas-equivalent operations compared to software you already know:
 
 .. panels::
 
@@ -322,7 +322,7 @@ As usual, **both sides** of the slicers are included as this is label indexing.
 .. warning::
 
    You should specify all axes in the ``.loc`` specifier, meaning the indexer for the **index** and
-   for the **columns**. There are some ambiguous cases where the passed indexer could be mis-interpreted
+   for the **columns**. There are some ambiguous cases where the passed indexer could be misinterpreted
    as indexing *both* axes, rather than into say the ``MultiIndex`` for the rows.
 
    You should do this:
 
@@ -149,7 +149,7 @@ the columns except the one we specify:
    grouped.sum()
 
 The above GroupBy will split the DataFrame on its index (rows). To split by columns, first do
-a tranpose:
+a transpose:
 
 .. ipython::
 
 
@@ -13,7 +13,7 @@ Reshaping by pivoting DataFrame objects
 
 .. image:: ../_static/reshaping_pivot.png
 
-Data is often stored in so-called "stacked" or "record" format:
+Data is often stored in so-called "stacked" or "record" format. In a "record" or "wide" format typically there is one row for each subject. In the "stacked" or "long" format there are multiple rows for each subject where applicable.
 
 .. ipython:: python
 
 
@@ -507,14 +507,18 @@ used if a custom frequency string is passed.
 Timestamp limitations
 ---------------------
 
-Since pandas represents timestamps in nanosecond resolution, the time span that
+The limits of timestamp representation depend on the chosen resolution. For
+nanosecond resolution, the time span that
 can be represented using a 64-bit integer is limited to approximately 584 years:
 
 .. ipython:: python
 
    pd.Timestamp.min
    pd.Timestamp.max
 
+When choosing second-resolution, the available range grows to  ``+/- 2.9e11 years``.
+Different resolutions can be converted to each other through ``as_unit``.
+
 .. seealso::
 
    :ref:`timeseries.oob`
 
@@ -1190,6 +1190,7 @@ Timedelta
 - Bug in :func:`to_timedelta` raising error when input has nullable dtype ``Float64`` (:issue:`48796`)
 - Bug in :class:`Timedelta` constructor incorrectly raising instead of returning ``NaT`` when given a ``np.timedelta64("nat")`` (:issue:`48898`)
 - Bug in :class:`Timedelta` constructor failing to raise when passed both a :class:`Timedelta` object and keywords (e.g. days, seconds) (:issue:`48898`)
+- Bug in :class:`Timedelta` comparisons with very large ``datetime.timedelta`` objects incorrect raising ``OutOfBoundsTimedelta`` (:issue:`49021`)
 
 Timezones
 ^^^^^^^^^
 
@@ -36,6 +36,7 @@ Other enhancements
 - :class:`api.extensions.ExtensionArray` now has a :meth:`~api.extensions.ExtensionArray.map` method (:issue:`51809`)
 - Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`)
 - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`)
+- :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`)
 - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`).
 
 .. ---------------------------------------------------------------------------
@@ -209,6 +210,7 @@ I/O
 ^^^
 - Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`)
 - :meth:`DataFrame.to_orc` now raising ``ValueError`` when non-default :class:`Index` is given (:issue:`51828`)
+- Bug in :func:`read_html`, style elements were read into DataFrames (:issue:`52197`)
 -
 
 Period
 
@@ -737,7 +737,7 @@ def pp(name: str, ks: Iterable[str]) -> list[str]:
 
 
 @contextmanager
-def config_prefix(prefix) -> Generator[None, None, None]:
+def config_prefix(prefix: str) -> Generator[None, None, None]:
     """
     contextmanager for multiple invocations of API with a common prefix
 
 
@@ -10,7 +10,6 @@ import sys
 import time
 import warnings
 
-from pandas.errors import ParserError
 from pandas.util._exceptions import find_stack_level
 
 from pandas import StringDtype
@@ -106,15 +105,10 @@ from pandas.errors import (
     ParserWarning,
 )
 
-from pandas.core.dtypes.common import (
-    is_bool_dtype,
-    is_datetime64_dtype,
-    is_extension_array_dtype,
-    is_float_dtype,
-    is_integer_dtype,
-    is_object_dtype,
+from pandas.core.dtypes.dtypes import (
+    CategoricalDtype,
+    ExtensionDtype,
 )
-from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.inference import is_dict_like
 
 cdef:
@@ -1077,7 +1071,7 @@ cdef class TextReader:
 
             # don't try to upcast EAs
             if (
-                na_count > 0 and not is_extension_array_dtype(col_dtype)
+                na_count > 0 and not isinstance(col_dtype, ExtensionDtype)
                 or self.dtype_backend != "numpy"
             ):
                 use_dtype_backend = self.dtype_backend != "numpy" and col_dtype is None
@@ -1142,14 +1136,14 @@ cdef class TextReader:
             # (see _try_bool_flex()). Usually this would be taken care of using
             # _maybe_upcast(), but if col_dtype is a floating type we should just
             # take care of that cast here.
-            if col_res.dtype == np.bool_ and is_float_dtype(col_dtype):
+            if col_res.dtype == np.bool_ and col_dtype.kind == "f":
                 mask = col_res.view(np.uint8) == na_values[np.uint8]
                 col_res = col_res.astype(col_dtype)
                 np.putmask(col_res, mask, np.nan)
                 return col_res, na_count
 
             # NaNs are already cast to True here, so can not use astype
-            if col_res.dtype == np.bool_ and is_integer_dtype(col_dtype):
+            if col_res.dtype == np.bool_ and col_dtype.kind in "iu":
                 if na_count > 0:
                     raise ValueError(
                         f"cannot safely convert passed user dtype of "
@@ -1193,14 +1187,14 @@ cdef class TextReader:
                 cats, codes, dtype, true_values=true_values)
             return cat, na_count
 
-        elif is_extension_array_dtype(dtype):
+        elif isinstance(dtype, ExtensionDtype):
             result, na_count = self._string_convert(i, start, end, na_filter,
                                                     na_hashset)
 
             array_type = dtype.construct_array_type()
             try:
                 # use _from_sequence_of_strings if the class defines it
-                if is_bool_dtype(dtype):
+                if dtype.kind == "b":
                     true_values = [x.decode() for x in self.true_values]
                     false_values = [x.decode() for x in self.false_values]
                     result = array_type._from_sequence_of_strings(
@@ -1216,7 +1210,7 @@ cdef class TextReader:
 
             return result, na_count
 
-        elif is_integer_dtype(dtype):
+        elif dtype.kind in "iu":
             try:
                 result, na_count = _try_int64(self.parser, i, start,
                                               end, na_filter, na_hashset)
@@ -1233,14 +1227,14 @@ cdef class TextReader:
 
             return result, na_count
 
-        elif is_float_dtype(dtype):
+        elif dtype.kind == "f":
             result, na_count = _try_double(self.parser, i, start, end,
                                            na_filter, na_hashset, na_flist)
 
             if result is not None and dtype != "float64":
                 result = result.astype(dtype)
             return result, na_count
-        elif is_bool_dtype(dtype):
+        elif dtype.kind == "b":
             result, na_count = _try_bool_flex(self.parser, i, start, end,
                                               na_filter, na_hashset,
                                               self.true_set, self.false_set)
@@ -1267,10 +1261,10 @@ cdef class TextReader:
             # unicode variable width
             return self._string_convert(i, start, end, na_filter,
                                         na_hashset)
-        elif is_object_dtype(dtype):
+        elif dtype == object:
             return self._string_convert(i, start, end, na_filter,
                                         na_hashset)
-        elif is_datetime64_dtype(dtype):
+        elif dtype.kind == "M":
             raise TypeError(f"the dtype {dtype} is not supported "
                             f"for parsing, pass this column "
                             f"using parse_dates instead")
@@ -1438,7 +1432,7 @@ def _maybe_upcast(
     -------
     The casted array.
     """
-    if is_extension_array_dtype(arr.dtype):
+    if isinstance(arr.dtype, ExtensionDtype):
         # TODO: the docstring says arr is an ndarray, in which case this cannot
         #  be reached. Is that incorrect?
         return arr
 
@@ -4,6 +4,10 @@ import warnings
 cimport cython
 from cpython.object cimport (
     Py_EQ,
+    Py_GE,
+    Py_GT,
+    Py_LE,
+    Py_LT,
     Py_NE,
     PyObject,
     PyObject_RichCompare,
@@ -1154,8 +1158,27 @@ cdef class _Timedelta(timedelta):
         if isinstance(other, _Timedelta):
             ots = other
         elif is_any_td_scalar(other):
-            ots = Timedelta(other)
-            # TODO: watch out for overflows
+            try:
+                ots = Timedelta(other)
+            except OutOfBoundsTimedelta as err:
+                # GH#49021 pytimedelta.max overflows
+                if not PyDelta_Check(other):
+                    # TODO: handle this case
+                    raise
+                ltup = (self.days, self.seconds, self.microseconds, self.nanoseconds)
+                rtup = (other.days, other.seconds, other.microseconds, 0)
+                if op == Py_EQ:
+                    return ltup == rtup
+                elif op == Py_NE:
+                    return ltup != rtup
+                elif op == Py_LT:
+                    return ltup < rtup
+                elif op == Py_LE:
+                    return ltup <= rtup
+                elif op == Py_GT:
+                    return ltup > rtup
+                elif op == Py_GE:
+                    return ltup >= rtup
 
         elif other is NaT:
             return op == Py_NE
 
@@ -10,7 +10,9 @@
 RANDS_CHARS = np.array(list(string.ascii_letters + string.digits), dtype=(np.str_, 1))
 
 
-def rands_array(nchars, size, dtype: NpDtype = "O", replace: bool = True) -> np.ndarray:
+def rands_array(
+    nchars, size: int, dtype: NpDtype = "O", replace: bool = True
+) -> np.ndarray:
     """
     Generate an array of byte strings.
     """
 
@@ -13,10 +13,8 @@
 
 from pandas.core.dtypes.common import (
     is_bool,
-    is_categorical_dtype,
     is_extension_array_dtype,
     is_integer_dtype,
-    is_interval_dtype,
     is_number,
     is_numeric_dtype,
     needs_i8_conversion,
@@ -33,6 +31,7 @@
     DataFrame,
     DatetimeIndex,
     Index,
+    IntervalDtype,
     IntervalIndex,
     MultiIndex,
     PeriodIndex,
@@ -238,7 +237,9 @@ def _check_types(left, right, obj: str = "Index") -> None:
         assert_attr_equal("inferred_type", left, right, obj=obj)
 
         # Skip exact dtype checking when `check_categorical` is False
-        if is_categorical_dtype(left.dtype) and is_categorical_dtype(right.dtype):
+        if isinstance(left.dtype, CategoricalDtype) and isinstance(
+            right.dtype, CategoricalDtype
+        ):
             if check_categorical:
                 assert_attr_equal("dtype", left, right, obj=obj)
                 assert_index_equal(left.categories, right.categories, exact=exact)
@@ -335,7 +336,9 @@ def _get_ilevel_values(index, level):
         assert_interval_array_equal(left._values, right._values)
 
     if check_categorical:
-        if is_categorical_dtype(left.dtype) or is_categorical_dtype(right.dtype):
+        if isinstance(left.dtype, CategoricalDtype) or isinstance(
+            right.dtype, CategoricalDtype
+        ):
             assert_categorical_equal(left._values, right._values, obj=f"{obj} category")
 
 
@@ -946,7 +949,9 @@ def assert_series_equal(
                 f"is not equal to {right._values}."
             )
             raise AssertionError(msg)
-    elif is_interval_dtype(left.dtype) and is_interval_dtype(right.dtype):
+    elif isinstance(left.dtype, IntervalDtype) and isinstance(
+        right.dtype, IntervalDtype
+    ):
         assert_interval_array_equal(left.array, right.array)
     elif isinstance(left.dtype, CategoricalDtype) or isinstance(
         right.dtype, CategoricalDtype
 
@@ -154,7 +154,7 @@ def ensure_safe_environment_variables() -> Generator[None, None, None]:
 
 
 @contextmanager
-def with_csv_dialect(name, **kwargs) -> Generator[None, None, None]:
+def with_csv_dialect(name: str, **kwargs) -> Generator[None, None, None]:
     """
     Context manager to temporarily register a CSV dialect for parsing CSV.
 
 
@@ -342,7 +342,7 @@ def validate_take_with_convert(convert: ndarray | bool | None, args, kwargs) ->
 )
 
 
-def validate_groupby_func(name, args, kwargs, allowed=None) -> None:
+def validate_groupby_func(name: str, args, kwargs, allowed=None) -> None:
     """
     'args' and 'kwargs' should be empty, except for allowed kwargs because all
     of their necessary parameters are explicitly listed in the function
Original file line number	Diff line number	Diff line change
`@@ -342,7 +342,7 @@ def validate_take_with_convert(convert: ndarray \| bool \| None, args, kwargs) ->`
`342`	`342`	`)`
`343`	`343`
`344`	`344`
`345`		`-def validate_groupby_func(name, args, kwargs, allowed=None) -> None:`
	`345`	`+def validate_groupby_func(name: str, args, kwargs, allowed=None) -> None:`
`346`	`346`	`"""`
`347`	`347`	`'args' and 'kwargs' should be empty, except for allowed kwargs because all`
`348`	`348`	`of their necessary parameters are explicitly listed in the function`