pandas-dev
diff --git a/‎.github/workflows/package-checks.yml
+1-1 b/‎.github/workflows/package-checks.yml
+1-1
diff --git a/‎.github/workflows/wheels.yml
+2-2 b/‎.github/workflows/wheels.yml
+2-2
diff --git a/‎.gitignore
+3 b/‎.gitignore
+3
diff --git a/‎.pre-commit-config.yaml
+1-1 b/‎.pre-commit-config.yaml
+1-1
diff --git a/‎ci/code_checks.sh
+4-2 b/‎ci/code_checks.sh
+4-2
diff --git a/‎ci/deps/actions-310-numpydev.yaml
+2 b/‎ci/deps/actions-310-numpydev.yaml
+2
diff --git a/‎ci/deps/actions-310.yaml
+3-1 b/‎ci/deps/actions-310.yaml
+3-1
diff --git a/‎ci/deps/actions-311.yaml
+3-1 b/‎ci/deps/actions-311.yaml
+3-1
diff --git a/‎ci/deps/actions-38-downstream_compat.yaml
+3 b/‎ci/deps/actions-38-downstream_compat.yaml
+3
diff --git a/‎ci/deps/actions-38-minimum_versions.yaml
+1-1 b/‎ci/deps/actions-38-minimum_versions.yaml
+1-1
diff --git a/‎ci/deps/actions-38.yaml
+3 b/‎ci/deps/actions-38.yaml
+3
diff --git a/‎ci/deps/actions-39.yaml
+3-1 b/‎ci/deps/actions-39.yaml
+3-1
diff --git a/‎ci/deps/actions-pypy-38.yaml
+3 b/‎ci/deps/actions-pypy-38.yaml
+3
diff --git a/‎ci/test_wheels.py
+2 b/‎ci/test_wheels.py
+2
diff --git a/‎ci/test_wheels_windows.bat
+3-3 b/‎ci/test_wheels_windows.bat
+3-3
diff --git a/‎doc/source/conf.py
+6-6 b/‎doc/source/conf.py
+6-6
diff --git a/‎doc/source/development/community.rst
+8-6 b/‎doc/source/development/community.rst
+8-6
diff --git a/‎doc/source/development/extending.rst
+46 b/‎doc/source/development/extending.rst
+46
diff --git a/‎doc/source/getting_started/install.rst
-19 b/‎doc/source/getting_started/install.rst
-19
diff --git a/‎doc/source/user_guide/10min.rst
+2-2 b/‎doc/source/user_guide/10min.rst
+2-2
diff --git a/‎doc/source/user_guide/advanced.rst
+2-2 b/‎doc/source/user_guide/advanced.rst
+2-2
diff --git a/‎doc/source/user_guide/categorical.rst
+5-5 b/‎doc/source/user_guide/categorical.rst
+5-5
@@ -20,7 +20,7 @@ jobs:
     runs-on: ubuntu-22.04
     strategy:
       matrix:
-        extra: ["test", "performance", "timezone", "computation", "fss", "aws", "gcp", "excel", "parquet", "feather", "hdf5", "spss", "postgresql", "mysql", "sql-other", "html", "xml", "plot", "output_formatting", "clipboard", "compression", "all"]
+        extra: ["test", "performance", "computation", "fss", "aws", "gcp", "excel", "parquet", "feather", "hdf5", "spss", "postgresql", "mysql", "sql-other", "html", "xml", "plot", "output_formatting", "clipboard", "compression", "all"]
       fail-fast: false
     name: Install Extras - ${{ matrix.extra }}
     concurrency:
 
@@ -173,8 +173,8 @@ jobs:
           pip install hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17
           cd .. # Not a good idea to test within the src tree
           python -c "import pandas; print(pandas.__version__);
-          pandas.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '-n 2']);
-          pandas.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db'])"
+          pandas.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '-n 2', '--no-strict-data-files']);
+          pandas.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db', '--no-strict-data-files'])"
       - uses: actions/upload-artifact@v3
         with:
           name: sdist
 
@@ -53,6 +53,9 @@ dist
 # type checkers
 pandas/py.typed
 
+# pyenv
+.python-version
+
 # tox testing tool
 .tox
 # rope
 
@@ -28,7 +28,7 @@ repos:
         types_or: [python, pyi]
         additional_dependencies: [black==23.1.0]
 -   repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.253
+    rev: v0.0.255
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
 
@@ -97,6 +97,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.Series.is_monotonic_increasing \
         pandas.Series.is_monotonic_decreasing \
         pandas.Series.backfill \
+        pandas.Series.bfill \
+        pandas.Series.ffill \
         pandas.Series.pad \
         pandas.Series.argsort \
         pandas.Series.reorder_levels \
@@ -541,14 +543,14 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.DataFrame.iterrows \
         pandas.DataFrame.pipe \
         pandas.DataFrame.backfill \
+        pandas.DataFrame.bfill \
+        pandas.DataFrame.ffill \
         pandas.DataFrame.pad \
         pandas.DataFrame.swapaxes \
         pandas.DataFrame.first_valid_index \
         pandas.DataFrame.last_valid_index \
         pandas.DataFrame.attrs \
         pandas.DataFrame.plot \
-        pandas.DataFrame.sparse.density \
-        pandas.DataFrame.sparse.to_coo \
         pandas.DataFrame.to_gbq \
         pandas.DataFrame.style \
         pandas.DataFrame.__dataframe__
 
@@ -18,9 +18,11 @@ dependencies:
   - python-dateutil
   - pytz
   - pip
+
   - pip:
     - "cython"
     - "--extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple"
     - "--pre"
     - "numpy"
     - "scipy"
+    - "tzdata>=2022.1"
@@ -49,8 +49,10 @@ dependencies:
   - scipy>=1.7.1
   - sqlalchemy>=1.4.16
   - tabulate>=0.8.9
-  - tzdata>=2022a
   - xarray>=0.21.0
   - xlrd>=2.0.1
   - xlsxwriter>=1.4.3
   - zstandard>=0.15.2
+
+  - pip:
+    - tzdata>=2022.1
@@ -49,8 +49,10 @@ dependencies:
   - scipy>=1.7.1
   - sqlalchemy>=1.4.16
   - tabulate>=0.8.9
-  - tzdata>=2022a
   - xarray>=0.21.0
   - xlrd>=2.0.1
   - xlsxwriter>=1.4.3
   - zstandard>=0.15.2
+
+  - pip:
+    - tzdata>=2022.1
@@ -68,3 +68,6 @@ dependencies:
   - pandas-gbq>=0.15.0
   - pyyaml
   - py
+
+  - pip:
+    - tzdata>=2022.1
@@ -52,11 +52,11 @@ dependencies:
   - scipy=1.7.1
   - sqlalchemy=1.4.16
   - tabulate=0.8.9
-  - tzdata=2022a
   - xarray=0.21.0
   - xlrd=2.0.1
   - xlsxwriter=1.4.3
   - zstandard=0.15.2
 
   - pip:
     - pyqt5==5.15.1
+    - tzdata==2022.1
@@ -53,3 +53,6 @@ dependencies:
   - xlrd>=2.0.1
   - xlsxwriter>=1.4.3
   - zstandard>=0.15.2
+
+  - pip:
+    - tzdata>=2022.1
@@ -49,8 +49,10 @@ dependencies:
   - scipy>=1.7.1
   - sqlalchemy>=1.4.16
   - tabulate>=0.8.9
-  - tzdata>=2022a
   - xarray>=0.21.0
   - xlrd>=2.0.1
   - xlsxwriter>=1.4.3
   - zstandard>=0.15.2
+
+  - pip:
+    - tzdata>=2022.1
@@ -22,3 +22,6 @@ dependencies:
   - numpy
   - python-dateutil
   - pytz
+
+  - pip:
+    - tzdata>=2022.1
@@ -41,10 +41,12 @@
     multi_args = [
         "-m not clipboard and not single_cpu and not slow and not network and not db",
         "-n 2",
+        "--no-strict-data-files",
     ]
     pd.test(extra_args=multi_args)
     pd.test(
         extra_args=[
             "-m not clipboard and single_cpu and not slow and not network and not db",
+            "--no-strict-data-files",
         ]
     )
@@ -1,9 +1,9 @@
 set test_command=import pandas as pd; print(pd.__version__); ^
-pd.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '-n 2']); ^
-pd.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db'])
+pd.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '--no-strict-data-files', '-n=2']); ^
+pd.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db', '--no-strict-data-files'])
 
 python --version
-pip install pytz six numpy python-dateutil
+pip install pytz six numpy python-dateutil tzdata>=2022.1
 pip install hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17
 pip install --find-links=pandas/dist --no-index pandas
 python -c "%test_command%"
@@ -101,20 +101,20 @@
         reldir = os.path.relpath(dirname, source_path)
         for fname in fnames:
             if os.path.splitext(fname)[-1] in (".rst", ".ipynb"):
-                fname = os.path.relpath(os.path.join(dirname, fname), source_path)
+                rel_fname = os.path.relpath(os.path.join(dirname, fname), source_path)
 
-                if fname == "index.rst" and os.path.abspath(dirname) == source_path:
+                if rel_fname == "index.rst" and os.path.abspath(dirname) == source_path:
                     continue
                 if pattern == "-api" and reldir.startswith("reference"):
-                    exclude_patterns.append(fname)
+                    exclude_patterns.append(rel_fname)
                 elif (
                     pattern == "whatsnew"
                     and not reldir.startswith("reference")
                     and reldir != "whatsnew"
                 ):
-                    exclude_patterns.append(fname)
-                elif single_doc and fname != pattern:
-                    exclude_patterns.append(fname)
+                    exclude_patterns.append(rel_fname)
+                elif single_doc and rel_fname != pattern:
+                    exclude_patterns.append(rel_fname)
 
 with open(os.path.join(source_path, "index.rst.template")) as f:
     t = jinja2.Template(f.read())
 
@@ -111,9 +111,11 @@ contributing to pandas. The slack is a private space, specifically meant for
 people who are hesitant to bring up their questions or ideas on a large public
 mailing list or GitHub.
 
-If this sounds like the right place for you, you are welcome to join! Email us
-at `[email protected] <mailto://[email protected]>`_ and let us
-know that you read and agree to our `Code of Conduct <https://pandas.pydata.org/community/coc.html>`_
-😉 to get an invite. And please remember that slack is not meant to replace the
-mailing list or issue tracker - all important announcements and conversations
-should still happen there.
+If this sounds like the right place for you, you are welcome to join using
+`this link <https://join.slack.com/t/pandas-dev-community/shared_invite/zt-1e2qgy1r6-PLCN8UOLEUAYoLdAsaJilw>`_!
+Please remember to follow our `Code of Conduct <https://pandas.pydata.org/community/coc.html>`_,
+and be aware that our admins are monitoring for irrelevant messages and will remove folks who use
+our
+slack for spam, advertisements and messages not related to the pandas contributing community. And
+please remember that slack is not meant to replace the mailing list or issue tracker - all important
+announcements and conversations should still happen there.
@@ -488,3 +488,49 @@ registers the default "matplotlib" backend as follows.
 
 More information on how to implement a third-party plotting backend can be found at
 https://github.com/pandas-dev/pandas/blob/main/pandas/plotting/__init__.py#L1.
+
+.. _extending.pandas_priority:
+
+Arithmetic with 3rd party types
+-------------------------------
+
+In order to control how arithmetic works between a custom type and a pandas type,
+implement ``__pandas_priority__``.  Similar to numpy's ``__array_priority__``
+semantics, arithmetic methods on :class:`DataFrame`, :class:`Series`, and :class:`Index`
+objects will delegate to ``other``, if it has an attribute ``__pandas_priority__`` with a higher value.
+
+By default, pandas objects try to operate with other objects, even if they are not types known to pandas:
+
+.. code-block:: python
+
+    >>> pd.Series([1, 2]) + [10, 20]
+    0    11
+    1    22
+    dtype: int64
+
+In the example above, if ``[10, 20]`` was a custom type that can be understood as a list, pandas objects will still operate with it in the same way.
+
+In some cases, it is useful to delegate to the other type the operation. For example, consider I implement a
+custom list object, and I want the result of adding my custom list with a pandas :class:`Series` to be an instance of my list
+and not a :class:`Series` as seen in the previous example. This is now possible by defining the ``__pandas_priority__`` attribute
+of my custom list, and setting it to a higher value, than the priority of the pandas objects I want to operate with.
+
+The ``__pandas_priority__`` of :class:`DataFrame`, :class:`Series`, and :class:`Index` are ``4000``, ``3000``, and ``2000`` respectively.  The base ``ExtensionArray.__pandas_priority__`` is ``1000``.
+
+.. code-block:: python
+
+    class CustomList(list):
+        __pandas_priority__ = 5000
+
+        def __radd__(self, other):
+            # return `self` and not the addition for simplicity
+            return self
+
+    custom = CustomList()
+    series = pd.Series([1, 2, 3])
+
+    # Series refuses to add custom, since it's an unknown type with higher priority
+    assert series.__add__(custom) is NotImplemented
+
+    # This will cause the custom class `__radd__` being used instead
+    assert series + custom is custom
@@ -308,25 +308,6 @@ Dependency                                            Minimum Version    pip ext
 `numba <https://github.com/numba/numba>`__            0.53.1             performance        Alternative execution engine for operations that accept ``engine="numba"`` using a JIT compiler that translates Python functions to optimized machine code using the LLVM compiler.
 ===================================================== ================== ================== ===================================================================================================================================================================================
 
-Timezones
-^^^^^^^^^
-
-Installable with ``pip install "pandas[timezone]"``
-
-========================= ========================= =============== =============================================================
-Dependency                Minimum Version           pip extra       Notes
-========================= ========================= =============== =============================================================
-tzdata                    2022.1(pypi)/             timezone        Allows the use of ``zoneinfo`` timezones with pandas.
-                          2022a(for system tzdata)                  **Note**: You only need to install the pypi package if your
-                                                                    system does not already provide the IANA tz database.
-                                                                    However, the minimum tzdata version still applies, even if it
-                                                                    is not enforced through an error.
-
-                                                                    If you would like to keep your system tzdata version updated,
-                                                                    it is recommended to use the ``tzdata`` package from
-                                                                    conda-forge.
-========================= ========================= =============== =============================================================
-
 Visualization
 ^^^^^^^^^^^^^
 
 
@@ -702,11 +702,11 @@ Sorting is per order in the categories, not lexical order:
 
     df.sort_values(by="grade")
 
-Grouping by a categorical column also shows empty categories:
+Grouping by a categorical column with ``observed=False`` also shows empty categories:
 
 .. ipython:: python
 
-    df.groupby("grade").size()
+    df.groupby("grade", observed=False).size()
 
 
 Plotting
 
@@ -800,8 +800,8 @@ Groupby operations on the index will preserve the index nature as well.
 
 .. ipython:: python
 
-   df2.groupby(level=0).sum()
-   df2.groupby(level=0).sum().index
+   df2.groupby(level=0, observed=True).sum()
+   df2.groupby(level=0, observed=True).sum().index
 
 Reindexing operations will return a resulting index based on the type of the passed
 indexer. Passing a list will return a plain-old ``Index``; indexing with
 
@@ -607,7 +607,7 @@ even if some categories are not present in the data:
     s = pd.Series(pd.Categorical(["a", "b", "c", "c"], categories=["c", "a", "b", "d"]))
     s.value_counts()
 
-``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories.
+``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories when ``observed=False``.
 
 .. ipython:: python
 
@@ -618,17 +618,17 @@ even if some categories are not present in the data:
         data=[[1, 2, 3], [4, 5, 6]],
         columns=pd.MultiIndex.from_arrays([["A", "B", "B"], columns]),
     ).T
-    df.groupby(level=1).sum()
+    df.groupby(level=1, observed=False).sum()
 
-Groupby will also show "unused" categories:
+Groupby will also show "unused" categories when ``observed=False``:
 
 .. ipython:: python
 
     cats = pd.Categorical(
         ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c", "d"]
     )
     df = pd.DataFrame({"cats": cats, "values": [1, 2, 2, 2, 3, 4, 5]})
-    df.groupby("cats").mean()
+    df.groupby("cats", observed=False).mean()
 
     cats2 = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"])
     df2 = pd.DataFrame(
@@ -638,7 +638,7 @@ Groupby will also show "unused" categories:
             "values": [1, 2, 3, 4],
         }
     )
-    df2.groupby(["cats", "B"]).mean()
+    df2.groupby(["cats", "B"], observed=False).mean()
 
 
 Pivot tables:
Original file line number	Diff line number	Diff line change
`@@ -41,10 +41,12 @@`
`41`	`41`	`multi_args = [`
`42`	`42`	`"-m not clipboard and not single_cpu and not slow and not network and not db",`
`43`	`43`	`"-n 2",`
	`44`	`+ "--no-strict-data-files",`
`44`	`45`	`]`
`45`	`46`	`pd.test(extra_args=multi_args)`
`46`	`47`	`pd.test(`
`47`	`48`	`extra_args=[`
`48`	`49`	`"-m not clipboard and single_cpu and not slow and not network and not db",`
	`50`	`+ "--no-strict-data-files",`
`49`	`51`	`]`
`50`	`52`	`)`