sthagen · sthagen · Aug 14, 2021 · Aug 9, 2021 · Aug 10, 2021 · Aug 10, 2021
diff --git a/.github/ISSUE_TEMPLATE/submit_question.md b/.github/ISSUE_TEMPLATE/submit_question.md
diff --git a/.github/ISSUE_TEMPLATE/submit_question.yml b/.github/ISSUE_TEMPLATE/submit_question.yml
@@ -0,0 +1,43 @@
+name: Submit Question
+description: Ask a general question about pandas
+title: "QST: "
+labels: [Usage Question, Needs Triage]
+
+body:
+  - type: markdown
+    attributes:
+      value: >
+        Since [StackOverflow](https://stackoverflow.com) is better suited towards answering
+        usage questions, we ask that all usage questions are first asked on StackOverflow.
+  - type: checkboxes
+    attributes:
+      options:
+        - label: >
+            I have searched the [[pandas] tag](https://stackoverflow.com/questions/tagged/pandas)
+            on StackOverflow for similar questions.
+          required: true
+        - label: >
+            I have asked my usage related question on [StackOverflow](https://stackoverflow.com).
+          required: true
+  - type: input
+    id: question-link
+    attributes:
+      label: Link to question on StackOverflow
+    validations:
+      required: true
+  - type: markdown
+    attributes:
+      value: ---
+  - type: textarea
+    id: question
+    attributes:
+      label: Question about pandas
+      description: >
+        **Note**: If you'd still like to submit a question, please read [this guide](
+        https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) detailing
+        how to provide the necessary information for us to reproduce your question.
+      placeholder: |
+        ```python
+        # Your code here, if applicable
+
+        ```
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -48,10 +48,6 @@ jobs:
     - name: Build Pandas
       uses: ./.github/actions/build_pandas
 
-    - name: Linting
-      run: ci/code_checks.sh lint
-      if: always()
-
     - name: Checks on imported code
       run: ci/code_checks.sh code
       if: always()

diff --git a/.github/workflows/python-dev.yml b/.github/workflows/python-dev.yml
@@ -41,7 +41,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip setuptools wheel
-        pip install git+https://github.com/numpy/numpy.git
+        pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
         pip install git+https://github.com/pytest-dev/pytest.git
         pip install git+https://github.com/nedbat/coveragepy.git
         pip install cython python-dateutil pytz hypothesis pytest-xdist pytest-cov

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -107,6 +107,11 @@ repos:
             # Check for deprecated messages without sphinx directive
             |(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)
         types_or: [python, cython, rst]
+    -   id: cython-casting
+        name: Check Cython casting is `<type>obj`, not `<type> obj`
+        language: pygrep
+        entry: '[a-zA-Z0-9*]> '
+        files: (\.pyx|\.pxi.in)$
     -   id: incorrect-backticks
         name: Check for backticks incorrectly rendering because of missing spaces
         language: pygrep

diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
@@ -538,8 +538,12 @@ class Interpolate:
     def setup(self, downcast):
         N = 10000
         # this is the worst case, where every column has NaNs.
-        self.df = DataFrame(np.random.randn(N, 100))
-        self.df.values[::2] = np.nan
+        arr = np.random.randn(N, 100)
+        # NB: we need to set values in array, not in df.values, otherwise
+        #  the benchmark will be misleading for ArrayManager
+        arr[::2] = np.nan
+
+        self.df = DataFrame(arr)
 
         self.df2 = DataFrame(
             {

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -3,21 +3,18 @@
 # Run checks related to code quality.
 #
 # This script is intended for both the CI and to check locally that code standards are
-# respected. We are currently linting (PEP-8 and similar), looking for patterns of
-# common mistakes (sphinx directives with missing blank lines, old style classes,
-# unwanted imports...), we run doctests here (currently some files only), and we
+# respected. We run doctests here (currently some files only), and we
 # validate formatting error in docstrings.
 #
 # Usage:
 #   $ ./ci/code_checks.sh               # run all checks
-#   $ ./ci/code_checks.sh lint          # run linting only
 #   $ ./ci/code_checks.sh code          # checks on imported code
 #   $ ./ci/code_checks.sh doctests      # run doctests
 #   $ ./ci/code_checks.sh docstrings    # validate docstring errors
 #   $ ./ci/code_checks.sh typing        # run static type analysis
 
-[[ -z "$1" || "$1" == "lint" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "typing" ]] || \
-    { echo "Unknown command $1. Usage: $0 [lint|code|doctests|docstrings|typing]"; exit 9999; }
+[[ -z "$1" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "typing" ]] || \
+    { echo "Unknown command $1. Usage: $0 [code|doctests|docstrings|typing]"; exit 9999; }
 
 BASE_DIR="$(dirname $0)/.."
 RET=0
@@ -40,23 +37,6 @@ if [[ "$GITHUB_ACTIONS" == "true" ]]; then
     INVGREP_PREPEND="##[error]"
 fi
 
-### LINTING ###
-if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
-
-    # Check that cython casting is of the form `<type>obj` as opposed to `<type> obj`;
-    # it doesn't make a difference, but we want to be internally consistent.
-    # Note: this grep pattern is (intended to be) equivalent to the python
-    # regex r'(?<![ ->])> '
-    MSG='Linting .pyx code for spacing conventions in casting' ; echo $MSG
-    invgrep -r -E --include '*.pyx' --include '*.pxi.in' '[a-zA-Z0-9*]> ' pandas/_libs
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    # readability/casting: Warnings about C casting instead of C++ casting
-    # runtime/int: Warnings about using C number types instead of C++ ones
-    # build/include_subdir: Warnings about prefacing included header files with directory
-
-fi
-
 ### CODE ###
 if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then
 

diff --git a/ci/deps/actions-39-numpydev.yaml b/ci/deps/actions-39-numpydev.yaml
@@ -11,11 +11,11 @@ dependencies:
   - hypothesis>=5.5.3
 
   # pandas dependencies
+  - python-dateutil
   - pytz
   - pip
   - pip:
     - cython==0.29.21 # GH#34014
-    - "git+git://github.com/dateutil/dateutil.git"
     - "--extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple"
     - "--pre"
     - "numpy"

diff --git a/doc/source/_static/style/appmaphead1.png b/doc/source/_static/style/appmaphead1.png
diff --git a/doc/source/_static/style/appmaphead2.png b/doc/source/_static/style/appmaphead2.png
diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst
@@ -23,11 +23,10 @@ contributing them to the project::
 
    ./ci/code_checks.sh
 
-The script verifies the linting of code files, it looks for common mistake patterns
-(like missing spaces around sphinx directives that make the documentation not
-being rendered properly) and it also validates the doctests. It is possible to
-run the checks independently by using the parameters ``lint``, ``patterns`` and
-``doctests`` (e.g. ``./ci/code_checks.sh lint``).
+The script validates the doctests, formatting in docstrings, static typing, and
+imported modules. It is possible to run the checks independently by using the
+parameters ``docstring``, ``code``, ``typing``, and ``doctests``
+(e.g. ``./ci/code_checks.sh doctests``).
 
 In addition, because a lot of people use our library, it is important that we
 do not make sudden changes to the code that could have the potential to break

diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst
@@ -106,7 +106,7 @@ extension array for IP Address data, this might be ``ipaddress.IPv4Address``.
 
 See the `extension dtype source`_ for interface definition.
 
-:class:`pandas.api.extension.ExtensionDtype` can be registered to pandas to allow creation via a string dtype name.
+:class:`pandas.api.extensions.ExtensionDtype` can be registered to pandas to allow creation via a string dtype name.
 This allows one to instantiate ``Series`` and ``.astype()`` with a registered string name, for
 example ``'category'`` is a registered string accessor for the ``CategoricalDtype``.
 
@@ -125,7 +125,7 @@ data. We do require that your array be convertible to a NumPy array, even if
 this is relatively expensive (as it is for ``Categorical``).
 
 They may be backed by none, one, or many NumPy arrays. For example,
-``pandas.Categorical`` is an extension array backed by two arrays,
+:class:`pandas.Categorical` is an extension array backed by two arrays,
 one for codes and one for categories. An array of IPv6 addresses may
 be backed by a NumPy structured array with two fields, one for the
 lower 64 bits and one for the upper 64 bits. Or they may be backed

diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst
@@ -170,6 +170,7 @@ Numeric Index
    :toctree: api/
    :template: autosummary/class_without_autosummary.rst
 
+   NumericIndex
    RangeIndex
    Int64Index
    UInt64Index

diff --git a/doc/source/reference/style.rst b/doc/source/reference/style.rst
@@ -36,6 +36,8 @@ Style application
 
    Styler.apply
    Styler.applymap
+   Styler.apply_index
+   Styler.applymap_index
    Styler.format
    Styler.hide_index
    Styler.hide_columns

diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst
@@ -7,7 +7,7 @@ MultiIndex / advanced indexing
 ******************************
 
 This section covers :ref:`indexing with a MultiIndex <advanced.hierarchical>`
-and :ref:`other advanced indexing features <indexing.index_types>`.
+and :ref:`other advanced indexing features <advanced.index_types>`.
 
 See the :ref:`Indexing and Selecting Data <indexing>` for general indexing documentation.
 
@@ -738,7 +738,7 @@ faster than fancy indexing.
    %timeit ser.iloc[indexer]
    %timeit ser.take(indexer)
 
-.. _indexing.index_types:
+.. _advanced.index_types:
 
 Index types
 -----------
@@ -749,7 +749,7 @@ and documentation about ``TimedeltaIndex`` is found :ref:`here <timedeltas.index
 
 In the following sub-sections we will highlight some other index types.
 
-.. _indexing.categoricalindex:
+.. _advanced.categoricalindex:
 
 CategoricalIndex
 ~~~~~~~~~~~~~~~~
@@ -846,22 +846,36 @@ values **not** in the categories, similarly to how you can reindex **any** panda
       In [1]: pd.concat([df4, df5])
       TypeError: categories must match existing categories when appending
 
-.. _indexing.rangeindex:
+.. _advanced.rangeindex:
 
 Int64Index and RangeIndex
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
+.. note::
+
+    In pandas 2.0, :class:`NumericIndex` will become the default index type for numeric types
+    instead of ``Int64Index``, ``Float64Index`` and ``UInt64Index`` and those index types
+    will be removed. See :ref:`here <advanced.numericindex>` for more.
+    ``RangeIndex`` however, will not be removed, as it represents an optimized version of an integer index.
+
 :class:`Int64Index` is a fundamental basic index in pandas. This is an immutable array
 implementing an ordered, sliceable set.
 
 :class:`RangeIndex` is a sub-class of ``Int64Index``  that provides the default index for all ``NDFrame`` objects.
 ``RangeIndex`` is an optimized version of ``Int64Index`` that can represent a monotonic ordered set. These are analogous to Python `range types <https://docs.python.org/3/library/stdtypes.html#typesseq-range>`__.
 
-.. _indexing.float64index:
+.. _advanced.float64index:
 
 Float64Index
 ~~~~~~~~~~~~
 
+.. note::
+
+    In pandas 2.0, :class:`NumericIndex` will become the default index type for numeric types
+    instead of ``Int64Index``, ``Float64Index`` and ``UInt64Index`` and those index types
+    will be removed. See :ref:`here <advanced.numericindex>` for more.
+    ``RangeIndex`` however, will not be removed, as it represents an optimized version of an integer index.
+
 By default a :class:`Float64Index` will be automatically created when passing floating, or mixed-integer-floating values in index creation.
 This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the
 same.
@@ -956,6 +970,38 @@ If you need integer based selection, you should use ``iloc``:
 
    dfir.iloc[0:5]
 
+
+.. _advanced.numericindex:
+
+NumericIndex
+~~~~~~~~~~~~
+
+.. versionadded:: 1.4.0
+
+.. note::
+
+    In pandas 2.0, :class:`NumericIndex` will become the default index type for numeric types
+    instead of ``Int64Index``, ``Float64Index`` and ``UInt64Index`` and those index types
+    will be removed.
+    ``RangeIndex`` however, will not be removed, as it represents an optimized version of an integer index.
+
+:class:`NumericIndex` is an index type that can hold data of any numpy int/uint/float dtype. For example:
+
+.. ipython:: python
+
+   idx = pd.NumericIndex([1, 2, 4, 5], dtype="int8")
+   idx
+   ser = pd.Series(range(4), index=idx)
+   ser
+
+``NumericIndex`` works the same way as the existing ``Int64Index``, ``Float64Index`` and
+``UInt64Index`` except that it can hold any numpy int, uint or float dtype.
+
+Until Pandas 2.0, you will have to call ``NumericIndex`` explicitly in order to use it, like in the example above.
+In Pandas 2.0, ``NumericIndex`` will become the default pandas numeric index type and will automatically be used where appropriate.
+
+Please notice that ``NumericIndex`` *can not* hold Pandas numeric dtypes (:class:`Int64Dtype`, :class:`Int32Dtype` etc.).
+
 .. _advanced.intervalindex:
 
 IntervalIndex

diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
@@ -1141,7 +1141,7 @@ Categorical index
 ``CategoricalIndex`` is a type of index that is useful for supporting
 indexing with duplicates. This is a container around a ``Categorical``
 and allows efficient indexing and storage of an index with a large number of duplicated elements.
-See the :ref:`advanced indexing docs <indexing.categoricalindex>` for a more detailed
+See the :ref:`advanced indexing docs <advanced.categoricalindex>` for a more detailed
 explanation.
 
 Setting the index will create a ``CategoricalIndex``:

diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst
@@ -474,7 +474,15 @@ rows and columns:
 
 .. ipython:: python
 
-   df.pivot_table(index=["A", "B"], columns="C", margins=True, aggfunc=np.std)
+   table = df.pivot_table(index=["A", "B"], columns="C", margins=True, aggfunc=np.std)
+   table
+
+Additionally, you can call :meth:`DataFrame.stack` to display a pivoted DataFrame
+as having a multi-level index:
+
+.. ipython:: python
+
+    table.stack()
 
 .. _reshaping.crosstabulations:
 

diff --git a/doc/source/user_guide/sparse.rst b/doc/source/user_guide/sparse.rst
@@ -294,7 +294,7 @@ To convert back to sparse SciPy matrix in COO format, you can use the :meth:`Dat
 
    sdf.sparse.to_coo()
 
-meth:`Series.sparse.to_coo` is implemented for transforming a ``Series`` with sparse values indexed by a :class:`MultiIndex` to a :class:`scipy.sparse.coo_matrix`.
+:meth:`Series.sparse.to_coo` is implemented for transforming a ``Series`` with sparse values indexed by a :class:`MultiIndex` to a :class:`scipy.sparse.coo_matrix`.
 
 The method requires a ``MultiIndex`` with two or more levels.