pandas-dev
diff --git a/‎.circleci/setup_env.sh
+2-1 b/‎.circleci/setup_env.sh
+2-1
diff --git a/‎.github/actions/build_pandas/action.yml
+4-2 b/‎.github/actions/build_pandas/action.yml
+4-2
diff --git a/‎.github/actions/setup-conda/action.yml
+1-1 b/‎.github/actions/setup-conda/action.yml
+1-1
diff --git a/‎.github/workflows/32-bit-linux.yml
+1-1 b/‎.github/workflows/32-bit-linux.yml
+1-1
diff --git a/‎.github/workflows/package-checks.yml
+1-1 b/‎.github/workflows/package-checks.yml
+1-1
diff --git a/‎.github/workflows/python-dev.yml
+2-1 b/‎.github/workflows/python-dev.yml
+2-1
diff --git a/‎.pre-commit-config.yaml
+1-1 b/‎.pre-commit-config.yaml
+1-1
diff --git a/‎MANIFEST.in
-2 b/‎MANIFEST.in
-2
diff --git a/‎ci/code_checks.sh
+4-2 b/‎ci/code_checks.sh
+4-2
diff --git a/‎ci/deps/actions-310-numpydev.yaml
+2 b/‎ci/deps/actions-310-numpydev.yaml
+2
diff --git a/‎ci/deps/actions-310.yaml
+3-1 b/‎ci/deps/actions-310.yaml
+3-1
diff --git a/‎ci/deps/actions-311.yaml
+3-1 b/‎ci/deps/actions-311.yaml
+3-1
diff --git a/‎ci/deps/actions-38-downstream_compat.yaml
+3 b/‎ci/deps/actions-38-downstream_compat.yaml
+3
diff --git a/‎ci/deps/actions-38-minimum_versions.yaml
+1-1 b/‎ci/deps/actions-38-minimum_versions.yaml
+1-1
diff --git a/‎ci/deps/actions-38.yaml
+3 b/‎ci/deps/actions-38.yaml
+3
diff --git a/‎ci/deps/actions-39.yaml
+3-1 b/‎ci/deps/actions-39.yaml
+3-1
diff --git a/‎ci/deps/actions-pypy-38.yaml
+3 b/‎ci/deps/actions-pypy-38.yaml
+3
diff --git a/‎ci/test_wheels_windows.bat
+1-1 b/‎ci/test_wheels_windows.bat
+1-1
diff --git a/‎doc/source/conf.py
+6-6 b/‎doc/source/conf.py
+6-6
diff --git a/‎doc/source/development/community.rst
+8-6 b/‎doc/source/development/community.rst
+8-6
diff --git a/‎doc/source/getting_started/install.rst
-19 b/‎doc/source/getting_started/install.rst
-19
diff --git a/‎doc/source/user_guide/10min.rst
+2-2 b/‎doc/source/user_guide/10min.rst
+2-2
diff --git a/‎doc/source/user_guide/advanced.rst
+2-2 b/‎doc/source/user_guide/advanced.rst
+2-2
diff --git a/‎doc/source/user_guide/categorical.rst
+5-5 b/‎doc/source/user_guide/categorical.rst
+5-5
diff --git a/‎doc/source/user_guide/copy_on_write.rst
+33-7 b/‎doc/source/user_guide/copy_on_write.rst
+33-7
diff --git a/‎doc/source/user_guide/groupby.rst
+1-1 b/‎doc/source/user_guide/groupby.rst
+1-1
@@ -55,7 +55,8 @@ if pip list | grep -q ^pandas; then
 fi
 
 echo "Build extensions"
-python setup.py build_ext -q -j4
+# GH 47305: Parallel build can causes flaky ImportError from pandas/_libs/tslibs
+python setup.py build_ext -q -j1
 
 echo "Install pandas"
 python -m pip install --no-build-isolation --no-use-pep517 -e .
 
@@ -16,5 +16,7 @@ runs:
         python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index
       shell: bash -el {0}
       env:
-        # https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
-        N_JOBS: ${{ runner.os == 'macOS' && 3 || 2 }}
+        # Cannot use parallel compilation on Windows, see https://github.com/pandas-dev/pandas/issues/30873
+        # GH 47305: Parallel build causes flaky ImportError: /home/runner/work/pandas/pandas/pandas/_libs/tslibs/timestamps.cpython-38-x86_64-linux-gnu.so: undefined symbol: pandas_datetime_to_datetimestruct
+        N_JOBS: 1
+        #N_JOBS: ${{ runner.os == 'Windows' && 1 || 2 }}
@@ -30,7 +30,7 @@ runs:
         environment-name: ${{ inputs.environment-name }}
         extra-specs: ${{ inputs.extra-specs }}
         channels: conda-forge
-        channel-priority: 'strict'
+        channel-priority: ${{ runner.os == 'macOS' && 'flexible' || 'strict' }}
         condarc-file: ci/condarc.yml
         cache-env: true
         cache-downloads: true
@@ -40,7 +40,7 @@ jobs:
           python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \
           python -m pip install versioneer[toml] && \
           python -m pip install cython numpy python-dateutil pytz pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.34.2 && \
-          python setup.py build_ext -q -j$(nproc) && \
+          python setup.py build_ext -q -j1 && \
           python -m pip install --no-build-isolation --no-use-pep517 -e . && \
           python -m pip list && \
           export PANDAS_CI=1 && \
 
@@ -20,7 +20,7 @@ jobs:
     runs-on: ubuntu-22.04
     strategy:
       matrix:
-        extra: ["test", "performance", "timezone", "computation", "fss", "aws", "gcp", "excel", "parquet", "feather", "hdf5", "spss", "postgresql", "mysql", "sql-other", "html", "xml", "plot", "output_formatting", "clipboard", "compression", "all"]
+        extra: ["test", "performance", "computation", "fss", "aws", "gcp", "excel", "parquet", "feather", "hdf5", "spss", "postgresql", "mysql", "sql-other", "html", "xml", "plot", "output_formatting", "clipboard", "compression", "all"]
       fail-fast: false
     name: Install Extras - ${{ matrix.extra }}
     concurrency:
 
@@ -82,9 +82,10 @@ jobs:
         python -m pip install python-dateutil pytz cython hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
         python -m pip list
 
+    # GH 47305: Parallel build can cause flaky ImportError from pandas/_libs/tslibs
     - name: Build Pandas
       run: |
-        python setup.py build_ext -q -j4
+        python setup.py build_ext -q -j1
         python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index
 
     - name: Build Version
 
@@ -28,7 +28,7 @@ repos:
         types_or: [python, pyi]
         additional_dependencies: [black==23.1.0]
 -   repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.253
+    rev: v0.0.255
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
 
@@ -58,5 +58,3 @@ prune pandas/tests/io/parser/data
 # Selectively re-add *.cxx files that were excluded above
 graft pandas/_libs/src
 graft pandas/_libs/tslibs/src
-include pandas/_libs/pd_parser.h
-include pandas/_libs/pd_parser.c
@@ -97,6 +97,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.Series.is_monotonic_increasing \
         pandas.Series.is_monotonic_decreasing \
         pandas.Series.backfill \
+        pandas.Series.bfill \
+        pandas.Series.ffill \
         pandas.Series.pad \
         pandas.Series.argsort \
         pandas.Series.reorder_levels \
@@ -541,14 +543,14 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.DataFrame.iterrows \
         pandas.DataFrame.pipe \
         pandas.DataFrame.backfill \
+        pandas.DataFrame.bfill \
+        pandas.DataFrame.ffill \
         pandas.DataFrame.pad \
         pandas.DataFrame.swapaxes \
         pandas.DataFrame.first_valid_index \
         pandas.DataFrame.last_valid_index \
         pandas.DataFrame.attrs \
         pandas.DataFrame.plot \
-        pandas.DataFrame.sparse.density \
-        pandas.DataFrame.sparse.to_coo \
         pandas.DataFrame.to_gbq \
         pandas.DataFrame.style \
         pandas.DataFrame.__dataframe__
 
@@ -18,9 +18,11 @@ dependencies:
   - python-dateutil
   - pytz
   - pip
+
   - pip:
     - "cython"
     - "--extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple"
     - "--pre"
     - "numpy"
     - "scipy"
+    - "tzdata>=2022.1"
@@ -49,8 +49,10 @@ dependencies:
   - scipy>=1.7.1
   - sqlalchemy>=1.4.16
   - tabulate>=0.8.9
-  - tzdata>=2022a
   - xarray>=0.21.0
   - xlrd>=2.0.1
   - xlsxwriter>=1.4.3
   - zstandard>=0.15.2
+
+  - pip:
+    - tzdata>=2022.1
@@ -49,8 +49,10 @@ dependencies:
   - scipy>=1.7.1
   - sqlalchemy>=1.4.16
   - tabulate>=0.8.9
-  - tzdata>=2022a
   - xarray>=0.21.0
   - xlrd>=2.0.1
   - xlsxwriter>=1.4.3
   - zstandard>=0.15.2
+
+  - pip:
+    - tzdata>=2022.1
@@ -68,3 +68,6 @@ dependencies:
   - pandas-gbq>=0.15.0
   - pyyaml
   - py
+
+  - pip:
+    - tzdata>=2022.1
@@ -52,11 +52,11 @@ dependencies:
   - scipy=1.7.1
   - sqlalchemy=1.4.16
   - tabulate=0.8.9
-  - tzdata=2022a
   - xarray=0.21.0
   - xlrd=2.0.1
   - xlsxwriter=1.4.3
   - zstandard=0.15.2
 
   - pip:
     - pyqt5==5.15.1
+    - tzdata==2022.1
@@ -53,3 +53,6 @@ dependencies:
   - xlrd>=2.0.1
   - xlsxwriter>=1.4.3
   - zstandard>=0.15.2
+
+  - pip:
+    - tzdata>=2022.1
@@ -49,8 +49,10 @@ dependencies:
   - scipy>=1.7.1
   - sqlalchemy>=1.4.16
   - tabulate>=0.8.9
-  - tzdata>=2022a
   - xarray>=0.21.0
   - xlrd>=2.0.1
   - xlsxwriter>=1.4.3
   - zstandard>=0.15.2
+
+  - pip:
+    - tzdata>=2022.1
@@ -22,3 +22,6 @@ dependencies:
   - numpy
   - python-dateutil
   - pytz
+
+  - pip:
+    - tzdata>=2022.1
@@ -3,7 +3,7 @@ pd.test(extra_args=['-m not clipboard and not single_cpu and not slow and not ne
 pd.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db'])
 
 python --version
-pip install pytz six numpy python-dateutil
+pip install pytz six numpy python-dateutil tzdata>=2022.1
 pip install hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17
 pip install --find-links=pandas/dist --no-index pandas
 python -c "%test_command%"
@@ -101,20 +101,20 @@
         reldir = os.path.relpath(dirname, source_path)
         for fname in fnames:
             if os.path.splitext(fname)[-1] in (".rst", ".ipynb"):
-                fname = os.path.relpath(os.path.join(dirname, fname), source_path)
+                rel_fname = os.path.relpath(os.path.join(dirname, fname), source_path)
 
-                if fname == "index.rst" and os.path.abspath(dirname) == source_path:
+                if rel_fname == "index.rst" and os.path.abspath(dirname) == source_path:
                     continue
                 if pattern == "-api" and reldir.startswith("reference"):
-                    exclude_patterns.append(fname)
+                    exclude_patterns.append(rel_fname)
                 elif (
                     pattern == "whatsnew"
                     and not reldir.startswith("reference")
                     and reldir != "whatsnew"
                 ):
-                    exclude_patterns.append(fname)
-                elif single_doc and fname != pattern:
-                    exclude_patterns.append(fname)
+                    exclude_patterns.append(rel_fname)
+                elif single_doc and rel_fname != pattern:
+                    exclude_patterns.append(rel_fname)
 
 with open(os.path.join(source_path, "index.rst.template")) as f:
     t = jinja2.Template(f.read())
 
@@ -111,9 +111,11 @@ contributing to pandas. The slack is a private space, specifically meant for
 people who are hesitant to bring up their questions or ideas on a large public
 mailing list or GitHub.
 
-If this sounds like the right place for you, you are welcome to join! Email us
-at `[email protected] <mailto://[email protected]>`_ and let us
-know that you read and agree to our `Code of Conduct <https://pandas.pydata.org/community/coc.html>`_
-😉 to get an invite. And please remember that slack is not meant to replace the
-mailing list or issue tracker - all important announcements and conversations
-should still happen there.
+If this sounds like the right place for you, you are welcome to join using
+`this link <https://join.slack.com/t/pandas-dev-community/shared_invite/zt-1e2qgy1r6-PLCN8UOLEUAYoLdAsaJilw>`_!
+Please remember to follow our `Code of Conduct <https://pandas.pydata.org/community/coc.html>`_,
+and be aware that our admins are monitoring for irrelevant messages and will remove folks who use
+our
+slack for spam, advertisements and messages not related to the pandas contributing community. And
+please remember that slack is not meant to replace the mailing list or issue tracker - all important
+announcements and conversations should still happen there.
@@ -308,25 +308,6 @@ Dependency                                            Minimum Version    pip ext
 `numba <https://github.com/numba/numba>`__            0.53.1             performance        Alternative execution engine for operations that accept ``engine="numba"`` using a JIT compiler that translates Python functions to optimized machine code using the LLVM compiler.
 ===================================================== ================== ================== ===================================================================================================================================================================================
 
-Timezones
-^^^^^^^^^
-
-Installable with ``pip install "pandas[timezone]"``
-
-========================= ========================= =============== =============================================================
-Dependency                Minimum Version           pip extra       Notes
-========================= ========================= =============== =============================================================
-tzdata                    2022.1(pypi)/             timezone        Allows the use of ``zoneinfo`` timezones with pandas.
-                          2022a(for system tzdata)                  **Note**: You only need to install the pypi package if your
-                                                                    system does not already provide the IANA tz database.
-                                                                    However, the minimum tzdata version still applies, even if it
-                                                                    is not enforced through an error.
-
-                                                                    If you would like to keep your system tzdata version updated,
-                                                                    it is recommended to use the ``tzdata`` package from
-                                                                    conda-forge.
-========================= ========================= =============== =============================================================
-
 Visualization
 ^^^^^^^^^^^^^
 
 
@@ -702,11 +702,11 @@ Sorting is per order in the categories, not lexical order:
 
     df.sort_values(by="grade")
 
-Grouping by a categorical column also shows empty categories:
+Grouping by a categorical column with ``observed=False`` also shows empty categories:
 
 .. ipython:: python
 
-    df.groupby("grade").size()
+    df.groupby("grade", observed=False).size()
 
 
 Plotting
 
@@ -800,8 +800,8 @@ Groupby operations on the index will preserve the index nature as well.
 
 .. ipython:: python
 
-   df2.groupby(level=0).sum()
-   df2.groupby(level=0).sum().index
+   df2.groupby(level=0, observed=True).sum()
+   df2.groupby(level=0, observed=True).sum().index
 
 Reindexing operations will return a resulting index based on the type of the passed
 indexer. Passing a list will return a plain-old ``Index``; indexing with
 
@@ -607,7 +607,7 @@ even if some categories are not present in the data:
     s = pd.Series(pd.Categorical(["a", "b", "c", "c"], categories=["c", "a", "b", "d"]))
     s.value_counts()
 
-``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories.
+``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories when ``observed=False``.
 
 .. ipython:: python
 
@@ -618,17 +618,17 @@ even if some categories are not present in the data:
         data=[[1, 2, 3], [4, 5, 6]],
         columns=pd.MultiIndex.from_arrays([["A", "B", "B"], columns]),
     ).T
-    df.groupby(level=1).sum()
+    df.groupby(level=1, observed=False).sum()
 
-Groupby will also show "unused" categories:
+Groupby will also show "unused" categories when ``observed=False``:
 
 .. ipython:: python
 
     cats = pd.Categorical(
         ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c", "d"]
     )
     df = pd.DataFrame({"cats": cats, "values": [1, 2, 2, 2, 3, 4, 5]})
-    df.groupby("cats").mean()
+    df.groupby("cats", observed=False).mean()
 
     cats2 = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"])
     df2 = pd.DataFrame(
@@ -638,7 +638,7 @@ Groupby will also show "unused" categories:
             "values": [1, 2, 3, 4],
         }
     )
-    df2.groupby(["cats", "B"]).mean()
+    df2.groupby(["cats", "B"], observed=False).mean()
 
 
 Pivot tables:
 
@@ -6,11 +6,6 @@
 Copy-on-Write (CoW)
 *******************
 
-.. ipython:: python
-    :suppress:
-
-    pd.options.mode.copy_on_write = True
-
 Copy-on-Write was first introduced in version 1.5.0. Starting from version 2.0 most of the
 optimizations that become possible through CoW are implemented and supported. A complete list
 can be found at :ref:`Copy-on-Write optimizations <copy_on_write.optimizations>`.
@@ -21,6 +16,36 @@ CoW will lead to more predictable behavior since it is not possible to update mo
 one object with one statement, e.g. indexing operations or methods won't have side-effects. Additionally, through
 delaying copies as long as possible, the average performance and memory usage will improve.
 
+Previous behavior
+-----------------
+
+pandas indexing behavior is tricky to understand. Some operations return views while
+other return copies. Depending on the result of the operation, mutation one object
+might accidentally mutate another:
+
+.. ipython:: python
+
+    df = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
+    subset = df["foo"]
+    subset.iloc[0] = 100
+    df
+
+Mutating ``subset``, e.g. updating its values, also updates ``df``. The exact behavior is
+hard to predict. Copy-on-Write solves accidentally modifying more than one object,
+it explicitly disallows this. With CoW enabled, ``df`` is unchanged:
+
+.. ipython:: python
+
+    pd.options.mode.copy_on_write = True
+
+    df = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
+    subset = df["foo"]
+    subset.iloc[0] = 100
+    df
+
+The following sections will explain what this means and how it impacts existing
+applications.
+
 Description
 -----------
 
@@ -114,10 +139,11 @@ two subsequent indexing operations, e.g.
 The column ``foo`` is updated where the column ``bar`` is greater than 5.
 This violates the CoW principles though, because it would have to modify the
 view ``df["foo"]`` and ``df`` in one step. Hence, chained assignment will
-consistently never work and raise a ``ChainedAssignmentError`` with CoW enabled:
+consistently never work and raise a ``ChainedAssignmentError`` warning
+with CoW enabled:
 
 .. ipython:: python
-    :okexcept:
+    :okwarning:
 
     df = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
     df["foo"][df["bar"] > 5] = 100
 
@@ -1401,7 +1401,7 @@ can be used as group keys. If so, the order of the levels will be preserved:
 
    factor = pd.qcut(data, [0, 0.25, 0.5, 0.75, 1.0])
 
-   data.groupby(factor).mean()
+   data.groupby(factor, observed=False).mean()
 
 .. _groupby.specify: