dimastbk
diff --git a/‎.github/workflows/wheels.yml
+2-2 b/‎.github/workflows/wheels.yml
+2-2
diff --git a/‎asv_bench/benchmarks/strings.py
-7 b/‎asv_bench/benchmarks/strings.py
-7
diff --git a/‎ci/code_checks.sh
-3 b/‎ci/code_checks.sh
-3
diff --git a/‎ci/test_wheels.py
-2 b/‎ci/test_wheels.py
-2
diff --git a/‎ci/test_wheels_windows.bat
+2-2 b/‎ci/test_wheels_windows.bat
+2-2
diff --git a/‎doc/source/development/contributing_codebase.rst
+2-1 b/‎doc/source/development/contributing_codebase.rst
+2-1
diff --git a/‎doc/source/reference/arrays.rst
+4-3 b/‎doc/source/reference/arrays.rst
+4-3
diff --git a/‎doc/source/user_guide/pyarrow.rst
+19-1 b/‎doc/source/user_guide/pyarrow.rst
+19-1
diff --git a/‎doc/source/whatsnew/v2.1.0.rst
+8-1 b/‎doc/source/whatsnew/v2.1.0.rst
+8-1
diff --git a/‎pandas/_libs/lib.pyx
+8-3 b/‎pandas/_libs/lib.pyx
+8-3
diff --git a/‎pandas/_libs/tslibs/offsets.pyx
+11-17 b/‎pandas/_libs/tslibs/offsets.pyx
+11-17
diff --git a/‎pandas/_testing/__init__.py
+17 b/‎pandas/_testing/__init__.py
+17
@@ -173,8 +173,8 @@ jobs:
           pip install hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17
           cd .. # Not a good idea to test within the src tree
           python -c "import pandas; print(pandas.__version__);
-          pandas.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '-n 2', '--no-strict-data-files']);
-          pandas.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db', '--no-strict-data-files'])"
+          pandas.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '-n 2']);
+          pandas.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db'])"
       - uses: actions/upload-artifact@v3
         with:
           name: sdist
 
@@ -34,7 +34,6 @@ def setup(self, dtype):
 
         # GH37371. Testing construction of string series/frames from ExtensionArrays
         self.series_cat_arr = Categorical(self.series_arr)
-        self.frame_cat_arr = Categorical(self.frame_arr)
 
     def time_series_construction(self, dtype):
         Series(self.series_arr, dtype=dtype)
@@ -54,12 +53,6 @@ def time_cat_series_construction(self, dtype):
     def peakmem_cat_series_construction(self, dtype):
         Series(self.series_cat_arr, dtype=dtype)
 
-    def time_cat_frame_construction(self, dtype):
-        DataFrame(self.frame_cat_arr, dtype=dtype)
-
-    def peakmem_cat_frame_construction(self, dtype):
-        DataFrame(self.frame_cat_arr, dtype=dtype)
-
 
 class Methods(Dtypes):
     def time_center(self, dtype):
 
@@ -86,8 +86,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
     MSG='Partially validate docstrings (EX01)' ;  echo $MSG
     $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01 --ignore_functions \
         pandas.Series.index \
-        pandas.Series.hasnans \
-        pandas.Series.to_list \
         pandas.Series.__iter__ \
         pandas.Series.keys \
         pandas.Series.item \
@@ -309,7 +307,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas_object \
         pandas.api.interchange.from_dataframe \
         pandas.Index.values \
-        pandas.Index.hasnans \
         pandas.Index.dtype \
         pandas.Index.inferred_type \
         pandas.Index.shape \
 
@@ -41,12 +41,10 @@
     multi_args = [
         "-m not clipboard and not single_cpu and not slow and not network and not db",
         "-n 2",
-        "--no-strict-data-files",
     ]
     pd.test(extra_args=multi_args)
     pd.test(
         extra_args=[
             "-m not clipboard and single_cpu and not slow and not network and not db",
-            "--no-strict-data-files",
         ]
     )
@@ -1,6 +1,6 @@
 set test_command=import pandas as pd; print(pd.__version__); ^
-pd.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '--no-strict-data-files', '-n=2']); ^
-pd.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db', '--no-strict-data-files'])
+pd.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '-n 2']); ^
+pd.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db'])
 
 python --version
 pip install pytz six numpy python-dateutil tzdata>=2022.1
 
@@ -812,7 +812,8 @@ install pandas) by typing::
     your installation is probably fine and you can start contributing!
 
 Often it is worth running only a subset of tests first around your changes before running the
-entire suite.
+entire suite (tip: you can use the [pandas-coverage app](https://pandas-coverage.herokuapp.com/)
+to find out which tests hit the lines of code you've modified, and then run only those).
 
 The easiest way to do this is with::
 
 
@@ -93,9 +93,10 @@ PyArrow type                                    pandas extension type      NumPy
 
 .. note::
 
-    For string types (``pyarrow.string()``, ``string[pyarrow]``), PyArrow support is still facilitated
-    by :class:`arrays.ArrowStringArray` and ``StringDtype("pyarrow")``. See the :ref:`string section <api.arrays.string>`
-    below.
+    Pyarrow-backed string support is provided by both ``pd.StringDtype("pyarrow")`` and ``pd.ArrowDtype(pa.string())``.
+    ``pd.StringDtype("pyarrow")`` is described below in the :ref:`string section <api.arrays.string>`
+    and will be returned if the string alias ``"string[pyarrow]"`` is specified. ``pd.ArrowDtype(pa.string())``
+    generally has better interoperability with :class:`ArrowDtype` of different types.
 
 While individual values in an :class:`arrays.ArrowExtensionArray` are stored as a PyArrow objects, scalars are **returned**
 as Python scalars corresponding to the data type, e.g. a PyArrow int64 will be returned as Python int, or :class:`NA` for missing
 
@@ -35,6 +35,23 @@ which is similar to a NumPy array. To construct these from the main pandas data
    df = pd.DataFrame([[1, 2], [3, 4]], dtype="uint64[pyarrow]")
    df
 
+.. note::
+
+    The string alias ``"string[pyarrow]"`` maps to ``pd.StringDtype("pyarrow")`` which is not equivalent to
+    specifying ``dtype=pd.ArrowDtype(pa.string())``. Generally, operations on the data will behave similarly
+    except ``pd.StringDtype("pyarrow")`` can return NumPy-backed nullable types while ``pd.ArrowDtype(pa.string())``
+    will return :class:`ArrowDtype`.
+
+   .. ipython:: python
+
+      import pyarrow as pa
+      data = list("abc")
+      ser_sd = pd.Series(data, dtype="string[pyarrow]")
+      ser_ad = pd.Series(data, dtype=pd.ArrowDtype(pa.string()))
+      ser_ad.dtype == ser_sd.dtype
+      ser_sd.str.contains("a")
+      ser_ad.str.contains("a")
+
 For PyArrow types that accept parameters, you can pass in a PyArrow type with those parameters
 into :class:`ArrowDtype` to use in the ``dtype`` parameter.
 
@@ -106,6 +123,7 @@ The following are just some examples of operations that are accelerated by nativ
 
 .. ipython:: python
 
+   import pyarrow as pa
    ser = pd.Series([-1.545, 0.211, None], dtype="float32[pyarrow]")
    ser.mean()
    ser + ser
@@ -115,7 +133,7 @@ The following are just some examples of operations that are accelerated by nativ
    ser.isna()
    ser.fillna(0)
 
-   ser_str = pd.Series(["a", "b", None], dtype="string[pyarrow]")
+   ser_str = pd.Series(["a", "b", None], dtype=pd.ArrowDtype(pa.string()))
    ser_str.str.startswith("a")
 
    from datetime import datetime
 
@@ -100,15 +100,19 @@ Deprecations
 ~~~~~~~~~~~~
 - Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`)
 - Deprecated :meth:`DataFrame._data` and :meth:`Series._data`, use public APIs instead (:issue:`33333`)
+- Deprecated :meth:`.Groupby.all` and :meth:`.GroupBy.any` with datetime64 or :class:`PeriodDtype` values, matching the :class:`Series` and :class:`DataFrame` deprecations (:issue:`34479`)
 - Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`)
 - Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`)
 - Deprecated :meth:`DataFrameGroupBy.dtypes`, check ``dtypes`` on the underlying object instead (:issue:`51045`)
 - Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`)
+- Deprecated :meth:`Categorical.to_list`, use ``obj.tolist()`` instead (:issue:`51254`)
 - Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`)
 - Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`)
 - Deprecated ``axis=1`` in :meth:`DataFrame.ewm`, :meth:`DataFrame.rolling`, :meth:`DataFrame.expanding`, transpose before calling the method instead (:issue:`51778`)
 - Deprecated the ``axis`` keyword in :meth:`DataFrame.ewm`, :meth:`Series.ewm`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, :meth:`Series.expanding` (:issue:`51778`)
+- Deprecated the ``axis`` keyword in :meth:`DataFrame.resample`, :meth:`Series.resample` (:issue:`51778`)
 - Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`)
+- Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`)
 - Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`)
 -
 
@@ -196,11 +200,12 @@ Missing
 
 MultiIndex
 ^^^^^^^^^^
--
+- Bug in :meth:`MultiIndex.set_levels` not preserving dtypes for :class:`Categorical` (:issue:`52125`)
 -
 
 I/O
 ^^^
+- Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`)
 - :meth:`DataFrame.to_orc` now raising ``ValueError`` when non-default :class:`Index` is given (:issue:`51828`)
 -
 
@@ -226,6 +231,7 @@ Groupby/resample/rolling
   grouped :class:`Series` or :class:`DataFrame` was a :class:`DatetimeIndex`, :class:`TimedeltaIndex`
   or :class:`PeriodIndex`, and the ``groupby`` method was given a function as its first argument,
   the function operated on the whole index rather than each element of the index. (:issue:`51979`)
+- Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64 or :class:`PeriodDtype` values (:issue:`52128`)
 -
 
 Reshaping
@@ -252,6 +258,7 @@ Styler
 Other
 ^^^^^
 - Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`)
+- Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`)
 
 .. ***DO NOT USE THIS SECTION***
 
 
@@ -1,6 +1,7 @@
 from collections import abc
 from decimal import Decimal
 from enum import Enum
+from sys import getsizeof
 from typing import (
     Literal,
     _GenericAlias,
@@ -159,7 +160,7 @@ def memory_usage_of_objects(arr: object[:]) -> int64_t:
 
     n = len(arr)
     for i in range(n):
-        size += arr[i].__sizeof__()
+        size += getsizeof(arr[i])
     return size
 
 
@@ -2325,10 +2326,14 @@ def maybe_convert_numeric(
                 if not seen.coerce_numeric:
                     raise type(err)(f"{err} at position {i}")
 
-                seen.saw_null()
-                floats[i] = NaN
                 mask[i] = 1
 
+                if allow_null_in_int:
+                    seen.null_ = True
+                else:
+                    seen.saw_null()
+                    floats[i] = NaN
+
     if seen.check_uint64_conflict():
         return (values, None)
 
 
@@ -2546,7 +2546,6 @@ cdef class MonthEnd(MonthOffset):
     DateOffset of one month end.
 
     MonthEnd goes to the next date which is an end of the month.
-    To get the end of the current month pass the parameter n equals 0.
 
     See Also
     --------
@@ -2562,10 +2561,10 @@ cdef class MonthEnd(MonthOffset):
     >>> ts + pd.offsets.MonthEnd()
     Timestamp('2022-02-28 00:00:00')
 
-    If you want to get the end of the current month pass the parameter n equals 0:
+    If you want to get the end of the current month:
 
     >>> ts = pd.Timestamp(2022, 1, 31)
-    >>> ts + pd.offsets.MonthEnd(0)
+    >>> pd.offsets.MonthEnd().rollforward(ts)
     Timestamp('2022-01-31 00:00:00')
     """
     _period_dtype_code = PeriodDtypeCode.M
@@ -2578,7 +2577,6 @@ cdef class MonthBegin(MonthOffset):
     DateOffset of one month at beginning.
 
     MonthBegin goes to the next date which is a start of the month.
-    To get the start of the current month pass the parameter n equals 0.
 
     See Also
     --------
@@ -2594,10 +2592,10 @@ cdef class MonthBegin(MonthOffset):
     >>> ts + pd.offsets.MonthBegin()
     Timestamp('2023-01-01 00:00:00')
 
-    If you want to get the start of the current month pass the parameter n equals 0:
+    If you want to get the start of the current month:
 
     >>> ts = pd.Timestamp(2022, 12, 1)
-    >>> ts + pd.offsets.MonthBegin(0)
+    >>> pd.offsets.MonthBegin().rollback(ts)
     Timestamp('2022-12-01 00:00:00')
     """
     _prefix = "MS"
@@ -2609,7 +2607,6 @@ cdef class BusinessMonthEnd(MonthOffset):
     DateOffset increments between the last business day of the month.
 
     BusinessMonthEnd goes to the next date which is the last business day of the month.
-    To get the last business day of the current month pass the parameter n equals 0.
 
     Examples
     --------
@@ -2621,11 +2618,10 @@ cdef class BusinessMonthEnd(MonthOffset):
     >>> ts + pd.offsets.BMonthEnd()
     Timestamp('2022-12-30 00:00:00')
 
-    If you want to get the end of the current business month
-    pass the parameter n equals 0:
+    If you want to get the end of the current business month:
 
     >>> ts = pd.Timestamp(2022, 11, 30)
-    >>> ts + pd.offsets.BMonthEnd(0)
+    >>> pd.offsets.BMonthEnd().rollforward(ts)
     Timestamp('2022-11-30 00:00:00')
     """
     _prefix = "BM"
@@ -2637,8 +2633,7 @@ cdef class BusinessMonthBegin(MonthOffset):
     DateOffset of one month at the first business day.
 
     BusinessMonthBegin goes to the next date which is the first business day
-    of the month. To get the first business day of the current month pass
-    the parameter n equals 0.
+    of the month.
 
     Examples
     --------
@@ -2650,11 +2645,10 @@ cdef class BusinessMonthBegin(MonthOffset):
     >>> ts + pd.offsets.BMonthBegin()
     Timestamp('2023-01-02 00:00:00')
 
-    If you want to get the start of the current business month pass
-    the parameter n equals 0:
+    If you want to get the start of the current business month:
 
     >>> ts = pd.Timestamp(2022, 12, 1)
-    >>> ts + pd.offsets.BMonthBegin(0)
+    >>> pd.offsets.BMonthBegin().rollback(ts)
     Timestamp('2022-12-01 00:00:00')
     """
     _prefix = "BMS"
@@ -2821,10 +2815,10 @@ cdef class SemiMonthEnd(SemiMonthOffset):
     >>> ts + pd.offsets.SemiMonthEnd()
     Timestamp('2022-02-15 00:00:00')
 
-    If you want to get the result for the current month pass the parameter n equals 0:
+    If you want to get the result for the current month:
 
     >>> ts = pd.Timestamp(2022, 1, 15)
-    >>> ts + pd.offsets.SemiMonthEnd(0)
+    >>> pd.offsets.SemiMonthEnd().rollforward(ts)
     Timestamp('2022-01-15 00:00:00')
     """
     _prefix = "SM"
 
@@ -177,6 +177,23 @@
     np.uint32,
 ]
 
+PYTHON_DATA_TYPES = [
+    str,
+    int,
+    float,
+    complex,
+    list,
+    tuple,
+    range,
+    dict,
+    set,
+    frozenset,
+    bool,
+    bytes,
+    bytearray,
+    memoryview,
+]
+
 ENDIAN = {"little": "<", "big": ">"}[byteorder]
 
 NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")]
Original file line number	Diff line number	Diff line change
`@@ -41,12 +41,10 @@`
`41`	`41`	`multi_args = [`
`42`	`42`	`"-m not clipboard and not single_cpu and not slow and not network and not db",`
`43`	`43`	`"-n 2",`
`44`		`- "--no-strict-data-files",`
`45`	`44`	`]`
`46`	`45`	`pd.test(extra_args=multi_args)`
`47`	`46`	`pd.test(`
`48`	`47`	`extra_args=[`
`49`	`48`	`"-m not clipboard and single_cpu and not slow and not network and not db",`
`50`		`- "--no-strict-data-files",`
`51`	`49`	`]`
`52`	`50`	`)`