AlexKirko
diff --git a/‎.pre-commit-config.yaml
+4-4 b/‎.pre-commit-config.yaml
+4-4
diff --git a/‎asv_bench/benchmarks/array.py
+1-1 b/‎asv_bench/benchmarks/array.py
+1-1
diff --git a/‎asv_bench/benchmarks/frame_methods.py
+15-5 b/‎asv_bench/benchmarks/frame_methods.py
+15-5
diff --git a/‎asv_bench/benchmarks/join_merge.py
+2-2 b/‎asv_bench/benchmarks/join_merge.py
+2-2
diff --git a/‎doc/cheatsheet/README.md
+22 b/‎doc/cheatsheet/README.md
+22
diff --git a/‎doc/cheatsheet/README.txt
-8 b/‎doc/cheatsheet/README.txt
-8
diff --git a/‎doc/make.py
+3-3 b/‎doc/make.py
+3-3
diff --git a/‎doc/source/development/debugging_extensions.rst
+5-1 b/‎doc/source/development/debugging_extensions.rst
+5-1
diff --git a/‎doc/source/getting_started/intro_tutorials/01_table_oriented.rst
+2-2 b/‎doc/source/getting_started/intro_tutorials/01_table_oriented.rst
+2-2
diff --git a/‎doc/source/user_guide/io.rst
+2-2 b/‎doc/source/user_guide/io.rst
+2-2
diff --git a/‎doc/source/whatsnew/index.rst
+1 b/‎doc/source/whatsnew/index.rst
+1
diff --git a/‎doc/source/whatsnew/v0.15.2.rst
+49-13 b/‎doc/source/whatsnew/v0.15.2.rst
+49-13
diff --git a/‎doc/source/whatsnew/v0.24.0.rst
+1 b/‎doc/source/whatsnew/v0.24.0.rst
+1
diff --git a/‎doc/source/whatsnew/v2.1.1.rst
+50 b/‎doc/source/whatsnew/v2.1.1.rst
+50
diff --git a/‎doc/source/whatsnew/v2.2.0.rst
+6-3 b/‎doc/source/whatsnew/v2.2.0.rst
+6-3
diff --git a/‎pandas/_libs/meson.build
+2-1 b/‎pandas/_libs/meson.build
+2-1
diff --git a/‎pandas/_libs/src/parser/tokenizer.c
+2-1 b/‎pandas/_libs/src/parser/tokenizer.c
+2-1
@@ -24,7 +24,7 @@ repos:
     hooks:
       - id: black
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.0.285
+    rev: v0.0.287
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
@@ -34,7 +34,7 @@ repos:
         alias: ruff-selected-autofixes
         args: [--select, "ANN001,ANN204", --fix-only, --exit-non-zero-on-fix]
 -   repo: https://github.com/jendrikseipp/vulture
-    rev: 'v2.7'
+    rev: 'v2.9.1'
     hooks:
       - id: vulture
         entry: python scripts/run_vulture.py
@@ -84,7 +84,7 @@ repos:
             '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size'
         ]
 -   repo: https://github.com/pylint-dev/pylint
-    rev: v3.0.0a6
+    rev: v3.0.0a7
     hooks:
     -   id: pylint
         stages: [manual]
@@ -124,7 +124,7 @@ repos:
         types: [text]  # overwrite types: [rst]
         types_or: [python, rst]
 -   repo: https://github.com/sphinx-contrib/sphinx-lint
-    rev: v0.6.7
+    rev: v0.6.8
     hooks:
     - id: sphinx-lint
 -   repo: local
 
@@ -90,7 +90,7 @@ def time_setitem(self, multiple_chunks):
             self.array[i] = "foo"
 
     def time_setitem_list(self, multiple_chunks):
-        indexer = list(range(0, 50)) + list(range(-1000, 0, 50))
+        indexer = list(range(50)) + list(range(-1000, 0, 50))
         self.array[indexer] = ["foo"] * len(indexer)
 
     def time_setitem_slice(self, multiple_chunks):
 
@@ -693,20 +693,30 @@ def time_frame_sort_values(self, ascending):
         self.df.sort_values(by="A", ascending=ascending)
 
 
-class SortIndexByColumns:
-    def setup(self):
+class SortMultiKey:
+    params = [True, False]
+    param_names = ["monotonic"]
+
+    def setup(self, monotonic):
         N = 10000
         K = 10
-        self.df = DataFrame(
+        df = DataFrame(
             {
                 "key1": tm.makeStringIndex(N).values.repeat(K),
                 "key2": tm.makeStringIndex(N).values.repeat(K),
                 "value": np.random.randn(N * K),
             }
         )
+        if monotonic:
+            df = df.sort_values(["key1", "key2"])
+        self.df_by_columns = df
+        self.df_by_index = df.set_index(["key1", "key2"])
+
+    def time_sort_values(self, monotonic):
+        self.df_by_columns.sort_values(by=["key1", "key2"])
 
-    def time_frame_sort_values_by_columns(self):
-        self.df.sort_values(by=["key1", "key2"])
+    def time_sort_index(self, monotonic):
+        self.df_by_index.sort_index()
 
 
 class Quantile:
 
@@ -360,14 +360,14 @@ class MergeCategoricals:
     def setup(self):
         self.left_object = DataFrame(
             {
-                "X": np.random.choice(range(0, 10), size=(10000,)),
+                "X": np.random.choice(range(10), size=(10000,)),
                 "Y": np.random.choice(["one", "two", "three"], size=(10000,)),
             }
         )
 
         self.right_object = DataFrame(
             {
-                "X": np.random.choice(range(0, 10), size=(10000,)),
+                "X": np.random.choice(range(10), size=(10000,)),
                 "Z": np.random.choice(["jjj", "kkk", "sss"], size=(10000,)),
             }
         )
 
@@ -0,0 +1,22 @@
+# Pandas Cheat Sheet
+
+The Pandas Cheat Sheet was created using Microsoft Powerpoint 2013.
+To create the PDF version, within Powerpoint, simply do a "Save As"
+and pick "PDF" as the format.
+
+This cheat sheet, originally written by Irv Lustig, [Princeton Consultants](https://www.princetonoptimization.com/), was inspired by the [RStudio Data Wrangling Cheatsheet](https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf).
+
+| Topic                  | PDF                                                                                                                                                                                                                                     | PPT                                                                                                                                                                                                                                               |
+|------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Pandas_Cheat_Sheet     | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a>    | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet.pptx" target="_parent"><img src="https://img.shields.io/badge/Open in PPT-B7472A?style=flat-square&logo=microsoft-powerpoint&logoColor=white"/></a>     |
+| Pandas_Cheat_Sheet_JA  | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a> | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pptx" target="_parent"><img  src="https://img.shields.io/badge/Open in PPT-B7472A?style=flat-square&logo=microsoft-powerpoint&logoColor=white"/></a> |
+
+
+**Alternative**
+
+Alternatively, if you want to complement your learning, you can use the Pandas Cheat sheets
+developed by [DataCamp](https://www.datacamp.com/) in "PDF", "Google Colab" and "Streamlit" formats.
+
+| Topic       | PDF                                                                                                                                                                                                                                  | Streamlit                                                                                                                                                        | Google Colab                                                                                                                                                                                                                                   |
+|-------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Pandas      | <a href="https://github.com/fralfaro/DS-Cheat-Sheets/blob/main/docs/files/pandas_cs.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a>       | <a href="https://ds-cheat-sheets-pandas.streamlit.app/" target="_parent"><img src="https://static.streamlit.io/badges/streamlit_badge_black_white.svg"/></a>     | <a href="https://colab.research.google.com/github/fralfaro/DS-Cheat-Sheets/blob/main/docs/examples/pandas/pandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>         |
@@ -159,10 +159,10 @@ def _get_page_title(self, page):
         Open the rst file `page` and extract its title.
         """
         fname = os.path.join(SOURCE_PATH, f"{page}.rst")
-        option_parser = docutils.frontend.OptionParser(
-            components=(docutils.parsers.rst.Parser,)
+        doc = docutils.utils.new_document(
+            "<doc>",
+            docutils.frontend.get_default_settings(docutils.parsers.rst.Parser),
         )
-        doc = docutils.utils.new_document("<doc>", option_parser.get_default_values())
         with open(fname, encoding="utf-8") as f:
             data = f.read()
 
 
@@ -21,12 +21,16 @@ By default building pandas from source will generate a release build. To generat
 
     pip install -ve . --no-build-isolation --config-settings=builddir="debug" --config-settings=setup-args="-Dbuildtype=debug"
 
+.. note::
+
+   conda environements update CFLAGS/CPPFLAGS with flags that are geared towards generating releases. If using conda, you may need to set ``CFLAGS="$CFLAGS -O0"`` and ``CPPFLAGS="$CPPFLAGS -O0"`` to ensure optimizations are turned off for debugging
+
 By specifying ``builddir="debug"`` all of the targets will be built and placed in the debug directory relative to the project root. This helps to keep your debug and release artifacts separate; you are of course able to choose a different directory name or omit altogether if you do not care to separate build types.
 
 Editor support
 --------------
 
-The meson build system generates a `compilation database <https://clang.llvm.org/docs/JSONCompilationDatabase.html>`_ automatically and places it in the build directory. Many language servers and IDEs can use this information to provide code-completion, go-to-defintion and error checking support as you type.
+The meson build system generates a `compilation database <https://clang.llvm.org/docs/JSONCompilationDatabase.html>`_ automatically and places it in the build directory. Many language servers and IDEs can use this information to provide code-completion, go-to-definition and error checking support as you type.
 
 How each language server / IDE chooses to look for the compilation database may vary. When in doubt you may want to create a symlink at the root of the project that points to the compilation database in your build directory. Assuming you used *debug* as your directory name, you can run::
 
 
@@ -106,9 +106,9 @@ between square brackets ``[]``.
     </ul>
 
 .. note::
-    If you are familiar to Python
+    If you are familiar with Python
     :ref:`dictionaries <python:tut-dictionaries>`, the selection of a
-    single column is very similar to selection of dictionary values based on
+    single column is very similar to the selection of dictionary values based on
     the key.
 
 You can create a ``Series`` from scratch as well:
 
@@ -1811,8 +1811,8 @@ Writing JSON
 A ``Series`` or ``DataFrame`` can be converted to a valid JSON string. Use ``to_json``
 with optional parameters:
 
-* ``path_or_buf`` : the pathname or buffer to write the output
-  This can be ``None`` in which case a JSON string is returned
+* ``path_or_buf`` : the pathname or buffer to write the output.
+  This can be ``None`` in which case a JSON string is returned.
 * ``orient`` :
 
   ``Series``:
 
@@ -24,6 +24,7 @@ Version 2.1
 .. toctree::
    :maxdepth: 2
 
+   v2.1.1
    v2.1.0
 
 Version 2.0
 
@@ -24,25 +24,61 @@ API changes
 - Indexing in ``MultiIndex`` beyond lex-sort depth is now supported, though
   a lexically sorted index will have a better performance. (:issue:`2646`)
 
-  .. ipython:: python
-    :okexcept:
-    :okwarning:
+  .. code-block:: ipython
+
+    In [1]: df = pd.DataFrame({'jim':[0, 0, 1, 1],
+       ...:                    'joe':['x', 'x', 'z', 'y'],
+       ...:                    'jolie':np.random.rand(4)}).set_index(['jim', 'joe'])
+       ...:
 
-    df = pd.DataFrame({'jim':[0, 0, 1, 1],
-                       'joe':['x', 'x', 'z', 'y'],
-                       'jolie':np.random.rand(4)}).set_index(['jim', 'joe'])
-    df
-    df.index.lexsort_depth
+    In [2]: df
+    Out[2]:
+                jolie
+    jim joe
+    0   x    0.126970
+        x    0.966718
+    1   z    0.260476
+        y    0.897237
+
+    [4 rows x 1 columns]
+
+    In [3]: df.index.lexsort_depth
+    Out[3]: 1
 
     # in prior versions this would raise a KeyError
     # will now show a PerformanceWarning
-    df.loc[(1, 'z')]
+    In [4]: df.loc[(1, 'z')]
+    Out[4]:
+                jolie
+    jim joe
+    1   z    0.260476
+
+    [1 rows x 1 columns]
 
     # lexically sorting
-    df2 = df.sort_index()
-    df2
-    df2.index.lexsort_depth
-    df2.loc[(1,'z')]
+    In [5]: df2 = df.sort_index()
+
+    In [6]: df2
+    Out[6]:
+                jolie
+    jim joe
+    0   x    0.126970
+        x    0.966718
+    1   y    0.897237
+        z    0.260476
+
+    [4 rows x 1 columns]
+
+    In [7]: df2.index.lexsort_depth
+    Out[7]: 2
+
+    In [8]: df2.loc[(1,'z')]
+    Out[8]:
+                jolie
+    jim joe
+    1   z    0.260476
+
+    [1 rows x 1 columns]
 
 - Bug in unique of Series with ``category`` dtype, which returned all categories regardless
   whether they were "used" or not (see :issue:`8559` for the discussion).
 
@@ -286,6 +286,7 @@ value. (:issue:`17054`)
 
 .. ipython:: python
 
+    from io import StringIO
     result = pd.read_html(StringIO("""
       <table>
         <thead>
 
@@ -0,0 +1,50 @@
+.. _whatsnew_211:
+
+What's new in 2.1.1 (September XX, 2023)
+----------------------------------------
+
+These are the changes in pandas 2.1.1. See :ref:`release` for a full changelog
+including other versions of pandas.
+
+{{ header }}
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_211.regressions:
+
+Fixed regressions
+~~~~~~~~~~~~~~~~~
+- Fixed regression in :func:`concat` when :class:`DataFrame` 's have two different extension dtypes (:issue:`54848`)
+- Fixed regression in :func:`merge` when merging over a PyArrow string index (:issue:`54894`)
+- Fixed regression in :func:`read_csv` when ``usecols`` is given and ``dtypes`` is a dict for ``engine="python"`` (:issue:`54868`)
+- Fixed regression in :func:`read_csv` when ``delim_whitespace`` is True (:issue:`54918`, :issue:`54931`)
+- Fixed regression in :meth:`.GroupBy.get_group` raising for ``axis=1`` (:issue:`54858`)
+- Fixed regression in :meth:`DataFrame.__setitem__` raising ``AssertionError`` when setting a :class:`Series` with a partial :class:`MultiIndex` (:issue:`54875`)
+- Fixed regression in :meth:`DataFrame.filter` not respecting the order of elements for ``filter`` (:issue:`54980`)
+- Fixed regression in :meth:`DataFrame.to_sql` not roundtripping datetime columns correctly for sqlite (:issue:`54877`)
+- Fixed regression in :meth:`MultiIndex.append` raising when appending overlapping :class:`IntervalIndex` levels (:issue:`54934`)
+- Fixed regression in :meth:`Series.drop_duplicates` for PyArrow strings (:issue:`54904`)
+- Fixed regression in :meth:`Series.interpolate` raising when ``fill_value`` was given (:issue:`54920`)
+- Fixed regression in :meth:`Series.value_counts` raising for numeric data if ``bins`` was specified (:issue:`54857`)
+- Fixed regression when comparing a :class:`Series` with ``datetime64`` dtype with ``None`` (:issue:`54870`)
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_211.bug_fixes:
+
+Bug fixes
+~~~~~~~~~
+- Fixed bug for :class:`ArrowDtype` raising ``NotImplementedError`` for fixed-size list (:issue:`55000`)
+- Fixed bug in :meth:`DataFrame.stack` with ``future_stack=True`` and columns a non-:class:`MultiIndex` consisting of tuples (:issue:`54948`)
+- Fixed bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` showing unnecessary ``FutureWarning`` (:issue:`54981`)
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_211.other:
+
+Other
+~~~~~
+-
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_211.contributors:
+
+Contributors
+~~~~~~~~~~~~
@@ -145,6 +145,7 @@ Deprecations
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_parquet` except ``path``. (:issue:`54229`)
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_pickle` except ``path``. (:issue:`54229`)
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`)
+- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.downcasting", True)`` (:issue:`53656`)
 - Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`)
 - Deprecated strings ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
 - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
@@ -157,19 +158,21 @@ Deprecations
 
 Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
+- Performance improvement in :func:`to_dict` on converting DataFrame to dictionary (:issue:`50990`)
+- Performance improvement in :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` when indexed by a :class:`MultiIndex` (:issue:`54835`)
 - Performance improvement when indexing with more than 4 keys (:issue:`54550`)
--
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_220.bug_fixes:
 
 Bug fixes
 ~~~~~~~~~
 - Bug in :class:`AbstractHolidayCalendar` where timezone data was not propagated when computing holiday observances (:issue:`54580`)
+- Bug in :class:`pandas.core.window.Rolling` where duplicate datetimelike indexes are treated as consecutive rather than equal with ``closed='left'`` and ``closed='neither'`` (:issue:`20712`)
 
 Categorical
 ^^^^^^^^^^^
--
+- :meth:`Categorical.isin` raising ``InvalidIndexError`` for categorical containing overlapping :class:`Interval` values (:issue:`34974`)
 -
 
 Datetimelike
@@ -243,7 +246,7 @@ Groupby/resample/rolling
 
 Reshaping
 ^^^^^^^^^
--
+- Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
 -
 
 Sparse
 
@@ -69,7 +69,8 @@ libs_sources = {
     'index': {'sources': ['index.pyx', _index_class_helper]},
     'indexing': {'sources': ['indexing.pyx']},
     'internals': {'sources': ['internals.pyx']},
-    'interval': {'sources': ['interval.pyx', _intervaltree_helper]},
+    'interval': {'sources': ['interval.pyx', _intervaltree_helper],
+                 'deps': _khash_primitive_helper_dep},
     'join': {'sources': ['join.pyx', _khash_primitive_helper],
              'deps': _khash_primitive_helper_dep},
     'lib': {'sources': ['lib.pyx', 'src/parser/tokenizer.c']},
 
@@ -664,7 +664,8 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes,
     ((!self->delim_whitespace && c == ' ' && self->skipinitialspace))
 
 // applied when in a field
-#define IS_DELIMITER(c) ((c == delimiter) || (delim_whitespace && isblank(c)))
+#define IS_DELIMITER(c) \
+    ((!delim_whitespace && c == delimiter) || (delim_whitespace && isblank(c)))
 
 #define _TOKEN_CLEANUP()                                                \
     self->stream_len = slen;                                            \
Original file line number	Diff line number	Diff line change
`@@ -360,14 +360,14 @@ class MergeCategoricals:`
`360`	`360`	`def setup(self):`
`361`	`361`	`self.left_object = DataFrame(`
`362`	`362`	`{`
`363`		`- "X": np.random.choice(range(0, 10), size=(10000,)),`
	`363`	`+ "X": np.random.choice(range(10), size=(10000,)),`
`364`	`364`	`"Y": np.random.choice(["one", "two", "three"], size=(10000,)),`
`365`	`365`	`}`
`366`	`366`	`)`
`367`	`367`
`368`	`368`	`self.right_object = DataFrame(`
`369`	`369`	`{`
`370`		`- "X": np.random.choice(range(0, 10), size=(10000,)),`
	`370`	`+ "X": np.random.choice(range(10), size=(10000,)),`
`371`	`371`	`"Z": np.random.choice(["jjj", "kkk", "sss"], size=(10000,)),`
`372`	`372`	`}`
`373`	`373`	`)`