sthagen · sthagen · Aug 8, 2021 · Aug 5, 2021 · Aug 5, 2021 · Aug 5, 2021
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -32,10 +32,6 @@ jobs:
       with:
         fetch-depth: 0
 
-    - name: Looking for unwanted patterns
-      run: ci/code_checks.sh patterns
-      if: always()
-
     - name: Cache conda
       uses: actions/cache@v2
       with:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -102,7 +102,29 @@ repos:
             # Incorrect code-block / IPython directives
             |\.\.\ code-block\ ::
             |\.\.\ ipython\ ::
+
+            # Check for deprecated messages without sphinx directive
+            |(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)
         types_or: [python, cython, rst]
+    -   id: incorrect-backticks
+        name: Check for backticks incorrectly rendering because of missing spaces
+        language: pygrep
+        entry: '[a-zA-Z0-9]\`\`?[a-zA-Z0-9]'
+        types: [rst]
+        files: ^doc/source/
+    -   id: seed-check-asv
+        name: Check for unnecessary random seeds in asv benchmarks
+        language: pygrep
+        entry: 'np\.random\.seed'
+        files: ^asv_bench/benchmarks
+        exclude: ^asv_bench/benchmarks/pandas_vb_common\.py
+    -   id: invalid-ea-testing
+        name: Check for invalid EA testing
+        language: pygrep
+        entry: 'tm\.assert_(series|frame)_equal'
+        files: ^pandas/tests/extension/base
+        types: [python]
+        exclude: ^pandas/tests/extension/base/base\.py
     -   id: pip-to-conda
         name: Generate pip dependency from conda
         description: This hook checks if the conda environment.yml and requirements-dev.txt are equal

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -11,14 +11,13 @@
 # Usage:
 #   $ ./ci/code_checks.sh               # run all checks
 #   $ ./ci/code_checks.sh lint          # run linting only
-#   $ ./ci/code_checks.sh patterns      # check for patterns that should not exist
 #   $ ./ci/code_checks.sh code          # checks on imported code
 #   $ ./ci/code_checks.sh doctests      # run doctests
 #   $ ./ci/code_checks.sh docstrings    # validate docstring errors
 #   $ ./ci/code_checks.sh typing        # run static type analysis
 
-[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "typing" ]] || \
-    { echo "Unknown command $1. Usage: $0 [lint|patterns|code|doctests|docstrings|typing]"; exit 9999; }
+[[ -z "$1" || "$1" == "lint" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "typing" ]] || \
+    { echo "Unknown command $1. Usage: $0 [lint|code|doctests|docstrings|typing]"; exit 9999; }
 
 BASE_DIR="$(dirname $0)/.."
 RET=0
@@ -58,28 +57,6 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
 
 fi
 
-### PATTERNS ###
-if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
-
-    # Check for the following code in the extension array base tests: `tm.assert_frame_equal` and `tm.assert_series_equal`
-    MSG='Check for invalid EA testing' ; echo $MSG
-    invgrep -r -E --include '*.py' --exclude base.py 'tm.assert_(series|frame)_equal' pandas/tests/extension/base
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Check for deprecated messages without sphinx directive' ; echo $MSG
-    invgrep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Check for backticks incorrectly rendering because of missing spaces' ; echo $MSG
-    invgrep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Check for unnecessary random seeds in asv benchmarks' ; echo $MSG
-    invgrep -R --exclude pandas_vb_common.py -E 'np.random.seed' asv_bench/benchmarks/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-fi
-
 ### CODE ###
 if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then
 

diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
@@ -12,7 +12,7 @@ cross platform distribution for data analysis and scientific computing.
 This is the recommended installation method for most users.
 
 Instructions for installing from source,
-`PyPI <https://pypi.org/project/pandas>`__, `ActivePython <https://www.activestate.com/activepython/downloads>`__, various Linux distributions, or a
+`PyPI <https://pypi.org/project/pandas>`__, `ActivePython <https://www.activestate.com/products/python/downloads/>`__, various Linux distributions, or a
 `development version <https://github.com/pandas-dev/pandas>`__ are also provided.
 
 .. _install.version:
@@ -47,7 +47,7 @@ rest of the `SciPy <https://scipy.org/>`__ stack without needing to install
 anything else, and without needing to wait for any software to be compiled.
 
 Installation instructions for `Anaconda <https://docs.continuum.io/anaconda/>`__
-`can be found here <https://docs.continuum.io/anaconda/install.html>`__.
+`can be found here <https://docs.continuum.io/anaconda/install/>`__.
 
 A full list of the packages available as part of the
 `Anaconda <https://docs.continuum.io/anaconda/>`__ distribution

diff --git a/doc/source/getting_started/overview.rst b/doc/source/getting_started/overview.rst
@@ -29,7 +29,7 @@ and :class:`DataFrame` (2-dimensional), handle the vast majority of typical use
 cases in finance, statistics, social science, and many areas of
 engineering. For R users, :class:`DataFrame` provides everything that R's
 ``data.frame`` provides and much more. pandas is built on top of `NumPy
-<https://www.numpy.org>`__ and is intended to integrate well within a scientific
+<https://numpy.org>`__ and is intended to integrate well within a scientific
 computing environment with many other 3rd party libraries.
 
 Here are just a few of the things that pandas does well:
@@ -75,7 +75,7 @@ Some other notes
    specialized tool.
 
  - pandas is a dependency of `statsmodels
-   <https://www.statsmodels.org/stable/index.html>`__, making it an important part of the
+   <https://statsmodels.org>`__, making it an important part of the
    statistical computing ecosystem in Python.
 
  - pandas has been used extensively in production in financial applications.
@@ -168,7 +168,7 @@ The list of the Core Team members and more detailed information can be found on
 Institutional partners
 ----------------------
 
-The information about current institutional partners can be found on `pandas website page <https://pandas.pydata.org/about.html>`__.
+The information about current institutional partners can be found on `pandas website page <https://pandas.pydata.org/about/sponsors.html>`__.
 
 License
 -------

diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst
@@ -32,7 +32,11 @@ Bug fixes
 ~~~~~~~~~
 - Bug in :meth:`pandas.read_excel` modifies the dtypes dictionary when reading a file with duplicate columns (:issue:`42462`)
 - 1D slices over extension types turn into N-dimensional slices over ExtensionArrays (:issue:`42430`)
+- Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly for the first row when ``center=True`` and ``window`` is an offset that covers all the rows (:issue:`42753`)
 - :meth:`.Styler.hide_columns` now hides the index name header row as well as column headers (:issue:`42101`)
+- :meth:`.Styler.set_sticky` has amended CSS to control the column/index names and ensure the correct sticky positions (:issue:`42537`)
+- Bug in de-serializing datetime indexes in PYTHONOPTIMIZED mode (:issue:`42866`)
+-
 
 .. ---------------------------------------------------------------------------
 

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -34,6 +34,7 @@ Other enhancements
 - :meth:`Series.sample`, :meth:`DataFrame.sample`, and :meth:`.GroupBy.sample` now accept a ``np.random.Generator`` as input to ``random_state``. A generator will be more performant, especially with ``replace=False`` (:issue:`38100`)
 -  Additional options added to :meth:`.Styler.bar` to control alignment and display, with keyword only arguments (:issue:`26070`, :issue:`36419`)
 - :meth:`Styler.bar` now validates the input argument ``width`` and ``height`` (:issue:`42511`)
+- Add keyword ``levels`` to :meth:`.Styler.hide_index` for optionally controlling hidden levels in a MultiIndex (:issue:`25475`)
 - :meth:`Series.ewm`, :meth:`DataFrame.ewm`, now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview <window.overview>` for performance and functional benefits (:issue:`42273`)
 - Added ``sparse_index`` and ``sparse_columns`` keyword arguments to :meth:`.Styler.to_html` (:issue:`41946`)
 - Added keyword argument ``environment`` to :meth:`.Styler.to_latex` also allowing a specific "longtable" entry with a separate jinja2 template (:issue:`41866`)
@@ -236,6 +237,7 @@ Indexing
 - Bug in indexing on a :class:`MultiIndex` failing to drop scalar levels when the indexer is a tuple containing a datetime-like string (:issue:`42476`)
 - Bug in :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` when passing an ascending value, failed to raise or incorrectly raising ``ValueError`` (:issue:`41634`)
 - Bug in updating values of :class:`pandas.Series` using boolean index, created by using :meth:`pandas.DataFrame.pop` (:issue:`42530`)
+- Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.nan`` (:issue:`35392`)
 - Bug in :meth:`DataFrame.query` did not handle the degree sign in a backticked column name, such as \`Temp(°C)\`, used in an expression to query a dataframe (:issue:`42826`)
 -
 
@@ -274,7 +276,9 @@ Groupby/resample/rolling
 - Bug in :meth:`Series.rolling.apply`, :meth:`DataFrame.rolling.apply`, :meth:`Series.expanding.apply` and :meth:`DataFrame.expanding.apply` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`42287`)
 - Bug in :meth:`DataFrame.groupby.rolling.var` would calculate the rolling variance only on the first group (:issue:`42442`)
 - Bug in :meth:`GroupBy.shift` that would return the grouping columns if ``fill_value`` was not None (:issue:`41556`)
+- Bug in :meth:`SeriesGroupBy.nlargest` and :meth:`SeriesGroupBy.nsmallest` would have an inconsistent index when the input Series was sorted and ``n`` was greater than or equal to all group sizes (:issue:`15272`, :issue:`16345`, :issue:`29129`)
 - Bug in :meth:`pandas.DataFrame.ewm`, where non-float64 dtypes were silently failing (:issue:`42452`)
+- Bug in :meth:`pandas.DataFrame.rolling` operation along rows (``axis=1``) incorrectly omits columns containing ``float16`` and ``float32`` (:issue:`41779`)
 
 Reshaping
 ^^^^^^^^^

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -288,10 +288,12 @@ cdef class IndexEngine:
             object val
             int count = 0, count_missing = 0
             Py_ssize_t i, j, n, n_t, n_alloc
+            bint d_has_nan = False, stargets_has_nan = False, need_nan_check = True
 
         self._ensure_mapping_populated()
         values = np.array(self._get_index_values(), copy=False)
         stargets = set(targets)
+
         n = len(values)
         n_t = len(targets)
         if n > 10_000:
@@ -321,19 +323,35 @@ cdef class IndexEngine:
 
         if stargets:
             # otherwise, map by iterating through all items in the index
+
             for i in range(n):
                 val = values[i]
                 if val in stargets:
                     if val not in d:
                         d[val] = []
                     d[val].append(i)
 
+                elif util.is_nan(val):
+                    # GH#35392
+                    if need_nan_check:
+                        # Do this check only once
+                        stargets_has_nan = any(util.is_nan(val) for x in stargets)
+                        need_nan_check = False
+
+                    if stargets_has_nan:
+                        if not d_has_nan:
+                            # use a canonical nan object
+                            d[np.nan] = []
+                            d_has_nan = True
+                        d[np.nan].append(i)
+
         for i in range(n_t):
             val = targets[i]
 
             # found
-            if val in d:
-                for j in d[val]:
+            if val in d or (d_has_nan and util.is_nan(val)):
+                key = val if not util.is_nan(val) else np.nan
+                for j in d[key]:
 
                     # realloc if needed
                     if count >= n_alloc:

diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx
@@ -265,6 +265,9 @@ ctypedef fused join_t:
     int16_t
     int32_t
     int64_t
+    uint8_t
+    uint16_t
+    uint32_t
     uint64_t
 
 

diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx
@@ -79,12 +79,11 @@ def calculate_variable_window_bounds(
     else:
         end[0] = 0
     if center:
-        for j in range(0, num_values + 1):
-            if (index[j] == index[0] + index_growth_sign * window_size / 2 and
-               right_closed):
+        end_bound = index[0] + index_growth_sign * window_size / 2
+        for j in range(0, num_values):
+            if (index[j] < end_bound) or (index[j] == end_bound and right_closed):
                 end[0] = j + 1
-                break
-            elif index[j] >= index[0] + index_growth_sign * window_size / 2:
+            elif index[j] >= end_bound:
                 end[0] = j
                 break
 

diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
@@ -105,6 +105,7 @@
     use_numexpr,
     with_csv_dialect,
 )
+from pandas.core.api import NumericIndex
 from pandas.core.arrays import (
     DatetimeArray,
     PandasArray,
@@ -314,7 +315,7 @@ def makeNumericIndex(k=10, name=None, *, dtype):
     else:
         raise NotImplementedError(f"wrong dtype {dtype}")
 
-    return Index(values, dtype=dtype, name=name)
+    return NumericIndex(values, dtype=dtype, name=name)
 
 
 def makeIntIndex(k=10, name=None):

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -460,6 +460,16 @@ def _create_mi_with_dt64tz_level():
     "uint": tm.makeUIntIndex(100),
     "range": tm.makeRangeIndex(100),
     "float": tm.makeFloatIndex(100),
+    "num_int64": tm.makeNumericIndex(100, dtype="int64"),
+    "num_int32": tm.makeNumericIndex(100, dtype="int32"),
+    "num_int16": tm.makeNumericIndex(100, dtype="int16"),
+    "num_int8": tm.makeNumericIndex(100, dtype="int8"),
+    "num_uint64": tm.makeNumericIndex(100, dtype="uint64"),
+    "num_uint32": tm.makeNumericIndex(100, dtype="uint32"),
+    "num_uint16": tm.makeNumericIndex(100, dtype="uint16"),
+    "num_uint8": tm.makeNumericIndex(100, dtype="uint8"),
+    "num_float64": tm.makeNumericIndex(100, dtype="float64"),
+    "num_float32": tm.makeNumericIndex(100, dtype="float32"),
     "bool": tm.makeBoolIndex(10),
     "categorical": tm.makeCategoricalIndex(100),
     "interval": tm.makeIntervalIndex(100),
@@ -511,7 +521,10 @@ def index_flat(request):
     params=[
         key
         for key in indices_dict
-        if key not in ["int", "uint", "range", "empty", "repeats"]
+        if not (
+            key in ["int", "uint", "range", "empty", "repeats"]
+            or key.startswith("num_")
+        )
         and not isinstance(indices_dict[key], MultiIndex)
     ]
 )
-Original file line number
+Diff line change
@@ Expand Up / @@ -265,6 +265,9 @@ ctypedef fused join_t: @@
         int16_t
         int32_t
         int64_t
+        uint8_t
+        uint16_t
+        uint32_t
         uint64_t
@@ Expand Down @@