pandas-dev
diff --git a/‎.github/workflows/pre-commit.yml
Lines changed: 0 additions & 2 deletions b/‎.github/workflows/pre-commit.yml
Lines changed: 0 additions & 2 deletions
diff --git a/‎.pre-commit-config.yaml
Lines changed: 1 addition & 0 deletions b/‎.pre-commit-config.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎ci/deps/actions-38-db.yaml
Lines changed: 1 addition & 1 deletion b/‎ci/deps/actions-38-db.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/deps/actions-38-slow.yaml
Lines changed: 1 addition & 1 deletion b/‎ci/deps/actions-38-slow.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/deps/actions-39-slow.yaml
Lines changed: 1 addition & 1 deletion b/‎ci/deps/actions-39-slow.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/deps/actions-39.yaml
Lines changed: 1 addition & 1 deletion b/‎ci/deps/actions-39.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/deps/azure-windows-38.yaml
Lines changed: 1 addition & 1 deletion b/‎ci/deps/azure-windows-38.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/deps/azure-windows-39.yaml
Lines changed: 1 addition & 1 deletion b/‎ci/deps/azure-windows-39.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/development/contributing_codebase.rst
Lines changed: 3 additions & 1 deletion b/‎doc/source/development/contributing_codebase.rst
Lines changed: 3 additions & 1 deletion
diff --git a/‎doc/source/whatsnew/v1.3.4.rst
Lines changed: 2 additions & 0 deletions b/‎doc/source/whatsnew/v1.3.4.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v1.4.0.rst
Lines changed: 7 additions & 1 deletion b/‎doc/source/whatsnew/v1.4.0.rst
Lines changed: 7 additions & 1 deletion
diff --git a/‎environment.yml
Lines changed: 1 addition & 1 deletion b/‎environment.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/algos_common_helper.pxi.in
Lines changed: 2 additions & 4 deletions b/‎pandas/_libs/algos_common_helper.pxi.in
Lines changed: 2 additions & 4 deletions
diff --git a/‎pandas/_libs/algos_take_helper.pxi.in
Lines changed: 4 additions & 4 deletions b/‎pandas/_libs/algos_take_helper.pxi.in
Lines changed: 4 additions & 4 deletions
diff --git a/‎pandas/_libs/index.pyx
Lines changed: 8 additions & 5 deletions b/‎pandas/_libs/index.pyx
Lines changed: 8 additions & 5 deletions
diff --git a/‎pandas/_libs/internals.pyx
Lines changed: 23 additions & 9 deletions b/‎pandas/_libs/internals.pyx
Lines changed: 23 additions & 9 deletions
@@ -13,8 +13,6 @@ jobs:
     concurrency:
       group: ${{ github.ref }}-pre-commit
       cancel-in-progress: ${{github.event_name == 'pull_request'}}
-    env:
-      SKIP: pyright
     steps:
     - uses: actions/checkout@v2
     - uses: actions/setup-python@v2
 
@@ -89,6 +89,7 @@ repos:
         language: node
         pass_filenames: false
         types: [python]
+        stages: [manual]
         # note: keep version in sync with .github/workflows/ci.yml
         additional_dependencies: ['[email protected]']
 -   repo: local
 
@@ -16,7 +16,7 @@ dependencies:
   - botocore>=1.11
   - dask
   - fastparquet>=0.4.0
-  - fsspec>=0.7.4, <2021.6.0
+  - fsspec>=0.7.4
   - gcsfs>=0.6.0
   - geopandas
   - html5lib
 
@@ -13,7 +13,7 @@ dependencies:
 
   # pandas dependencies
   - beautifulsoup4
-  - fsspec>=0.7.4, <2021.6.0
+  - fsspec>=0.7.4
   - html5lib
   - lxml
   - matplotlib
 
@@ -15,7 +15,7 @@ dependencies:
   # pandas dependencies
   - beautifulsoup4
   - bottleneck
-  - fsspec>=0.8.0, <2021.6.0
+  - fsspec>=0.8.0
   - gcsfs
   - html5lib
   - jinja2
 
@@ -14,7 +14,7 @@ dependencies:
   # pandas dependencies
   - beautifulsoup4
   - bottleneck
-  - fsspec>=0.8.0, <2021.6.0
+  - fsspec>=0.8.0
   - gcsfs
   - html5lib
   - jinja2
 
@@ -17,7 +17,7 @@ dependencies:
   - bottleneck
   - fastparquet>=0.4.0
   - flask
-  - fsspec>=0.8.0, <2021.6.0
+  - fsspec>=0.8.0
   - matplotlib=3.3.2
   - moto>=1.3.14
   - numba
 
@@ -15,7 +15,7 @@ dependencies:
   # pandas dependencies
   - beautifulsoup4
   - bottleneck
-  - fsspec>=0.8.0, <2021.6.0
+  - fsspec>=0.8.0
   - gcsfs
   - html5lib
   - jinja2
 
@@ -402,10 +402,12 @@ pandas uses `mypy <http://mypy-lang.org>`_ and `pyright <https://github.com/micr
    mypy pandas
 
    # let pre-commit setup and run pyright
-   pre-commit run --all-files pyright
+   pre-commit run --hook-stage manual --all-files pyright
    # or if pyright is installed (requires node.js)
    pyright
 
+A recent version of ``numpy`` (>=1.21.0) is required for type validation.
+
 .. _contributing.ci:
 
 Testing with continuous integration
 
@@ -22,6 +22,8 @@ Fixed regressions
 - Fixed regression in :meth:`Series.cat.categories` setter failing to update the categories on the ``Series`` (:issue:`43334`)
 - Fixed regression in :meth:`pandas.read_csv` raising ``UnicodeDecodeError`` exception when ``memory_map=True`` (:issue:`43540`)
 - Fixed regression in :meth:`Series.aggregate` attempting to pass ``args`` and ``kwargs`` multiple times to the user supplied ``func`` in certain cases (:issue:`43357`)
+- Fixed regression when iterating over a :class:`DataFrame.groupby.rolling` object causing the resulting DataFrames to have an incorrect index if the input groupings were not sorted (:issue:`43386`)
+- Fixed regression in :meth:`DataFrame.groupby.rolling.cov` and :meth:`DataFrame.groupby.rolling.corr` computing incorrect results if the input groupings were not sorted (:issue:`43386`)
 
 .. ---------------------------------------------------------------------------
 
 
@@ -126,7 +126,8 @@ Other enhancements
 - Attempting to write into a file in missing parent directory with :meth:`DataFrame.to_csv`, :meth:`DataFrame.to_html`, :meth:`DataFrame.to_excel`, :meth:`DataFrame.to_feather`, :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_json`, :meth:`DataFrame.to_pickle`, and :meth:`DataFrame.to_xml` now explicitly mentions missing parent directory, the same is true for :class:`Series` counterparts (:issue:`24306`)
 - :meth:`IntegerArray.all` , :meth:`IntegerArray.any`, :meth:`FloatingArray.any`, and :meth:`FloatingArray.all` use Kleene logic (:issue:`41967`)
 - Added support for nullable boolean and integer types in :meth:`DataFrame.to_stata`, :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`40855`)
--
+- :meth:`DataFrame.__pos__`, :meth:`DataFrame.__neg__` now retain ``ExtensionDtype`` dtypes (:issue:`43883`)
+
 
 .. ---------------------------------------------------------------------------
 
@@ -336,6 +337,7 @@ Other Deprecations
 - Deprecated the 'include_start' and 'include_end' arguments in :meth:`DataFrame.between_time`; in a future version passing 'include_start' or 'include_end' will raise (:issue:`40245`)
 - Deprecated the ``squeeze`` argument to :meth:`read_csv`, :meth:`read_table`, and :meth:`read_excel`. Users should squeeze the DataFrame afterwards with ``.squeeze("columns")`` instead. (:issue:`43242`)
 - Deprecated the ``index`` argument to :class:`SparseArray` construction (:issue:`23089`)
+- Deprecated the ``closed`` argument in :meth:`date_range` and :meth:`bdate_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
 - Deprecated :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`)
 - Deprecated silent dropping of columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a dictionary (:issue:`43740`)
 - Deprecated silent dropping of columns that raised a ``TypeError``, ``DataError``, and some cases of ``ValueError`` in :meth:`Series.aggregate`, :meth:`DataFrame.aggregate`, :meth:`Series.groupby.aggregate`, and :meth:`DataFrame.groupby.aggregate` when used with a list (:issue:`43740`)
@@ -386,6 +388,7 @@ Datetimelike
 - Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`)
 - Bug in :func:`to_datetime` with ``format`` and ``pandas.NA`` was raising ``ValueError`` (:issue:`42957`)
 - :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised in the case of delimited date strings (e.g. ``31-12-2012``) (:issue:`12585`)
+- Bug in :meth:`date_range` and :meth:`bdate_range` do not return right bound when ``start`` = ``end`` and set is closed on one side (:issue:`43394`)
 -
 
 Timedelta
@@ -464,6 +467,8 @@ I/O
 - Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`)
 - Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
 - Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`)
+- Bug in :func:`read_json` not handling non-numpy dtypes correctly (especially ``category``) (:issue:`21892`, :issue:`33205`)
+- Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`)
 - Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`)
 -
 
@@ -509,6 +514,7 @@ Sparse
 - Bug in :meth:`DataFrame.sparse.to_coo` raising ``AttributeError`` when column names are not unique (:issue:`29564`)
 - Bug in :meth:`SparseArray.max` and :meth:`SparseArray.min` raising ``ValueError`` for arrays with 0 non-null elements (:issue:`43527`)
 - Bug in :meth:`DataFrame.sparse.to_coo` silently converting non-zero fill values to zero (:issue:`24817`)
+- Bug in :class:`SparseArray` comparison methods with an array-like operand of mismatched length raising ``AssertionError`` or unclear ``ValueError`` depending on the input (:issue:`43863`)
 -
 
 ExtensionArray
 
@@ -106,7 +106,7 @@ dependencies:
   - pytables>=3.6.1  # pandas.read_hdf, DataFrame.to_hdf
   - s3fs>=0.4.0  # file IO when using 's3://...' path
   - aiobotocore
-  - fsspec>=0.7.4, <2021.6.0  # for generic remote file operations
+  - fsspec>=0.7.4  # for generic remote file operations
   - gcsfs>=0.6.0  # file IO when using 'gcs://...' path
   - sqlalchemy  # pandas.read_sql, DataFrame.to_sql
   - xarray<0.19  # DataFrame.to_xarray
 
@@ -8,18 +8,16 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 # ensure_dtype
 # ----------------------------------------------------------------------
 
-cdef int PLATFORM_INT = (<ndarray>np.arange(0, dtype=np.intp)).descr.type_num
-
 
 def ensure_platform_int(object arr):
     # GH3033, GH1392
     # platform int is the size of the int pointer, e.g. np.intp
     if util.is_array(arr):
-        if (<ndarray>arr).descr.type_num == PLATFORM_INT:
+        if (<ndarray>arr).descr.type_num == cnp.NPY_INTP:
             return arr
         else:
             # equiv: arr.astype(np.intp)
-            return cnp.PyArray_Cast(<ndarray>arr, PLATFORM_INT)
+            return cnp.PyArray_Cast(<ndarray>arr, cnp.NPY_INTP)
     else:
         return np.array(arr, dtype=np.intp)
 
 
@@ -103,7 +103,7 @@ def take_2d_axis0_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values,
 {{else}}
 def take_2d_axis0_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
 {{endif}}
-                                    ndarray[intp_t] indexer,
+                                    ndarray[intp_t, ndim=1] indexer,
                                     {{c_type_out}}[:, :] out,
                                     fill_value=np.nan):
     cdef:
@@ -158,7 +158,7 @@ def take_2d_axis1_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values,
 {{else}}
 def take_2d_axis1_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
 {{endif}}
-                                    ndarray[intp_t] indexer,
+                                    ndarray[intp_t, ndim=1] indexer,
                                     {{c_type_out}}[:, :] out,
                                     fill_value=np.nan):
 
@@ -195,8 +195,8 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
                                     fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
-        ndarray[intp_t] idx0 = indexer[0]
-        ndarray[intp_t] idx1 = indexer[1]
+        ndarray[intp_t, ndim=1] idx0 = indexer[0]
+        ndarray[intp_t, ndim=1] idx1 = indexer[1]
         {{c_type_out}} fv
 
     n = len(idx0)
 
@@ -116,12 +116,14 @@ cdef class IndexEngine:
     cdef:
         bint unique, monotonic_inc, monotonic_dec
         bint need_monotonic_check, need_unique_check
+        object _np_type
 
     def __init__(self, ndarray values):
         self.values = values
 
         self.over_size_threshold = len(values) >= _SIZE_CUTOFF
         self.clear_mapping()
+        self._np_type = values.dtype.type
 
     def __contains__(self, val: object) -> bool:
         # We assume before we get here:
@@ -168,13 +170,13 @@ cdef class IndexEngine:
         See ObjectEngine._searchsorted_left.__doc__.
         """
         # Caller is responsible for ensuring _check_type has already been called
-        loc = self.values.searchsorted(val, side="left")
+        loc = self.values.searchsorted(self._np_type(val), side="left")
         return loc
 
     cdef inline _get_loc_duplicates(self, object val):
         # -> Py_ssize_t | slice | ndarray[bool]
         cdef:
-            Py_ssize_t diff
+            Py_ssize_t diff, left, right
 
         if self.is_monotonic_increasing:
             values = self.values
@@ -318,8 +320,8 @@ cdef class IndexEngine:
             set stargets, remaining_stargets
             dict d = {}
             object val
-            int count = 0, count_missing = 0
-            Py_ssize_t i, j, n, n_t, n_alloc
+            Py_ssize_t count = 0, count_missing = 0
+            Py_ssize_t i, j, n, n_t, n_alloc, start, end
             bint d_has_nan = False, stargets_has_nan = False, need_nan_check = True
 
         values = self.values
@@ -481,7 +483,8 @@ cdef class DatetimeEngine(Int64Engine):
         #  with either a Timestamp or NaT (Timedelta or NaT for TimedeltaEngine)
 
         cdef:
-            int64_t loc
+            Py_ssize_t loc
+
         if is_definitely_invalid_key(val):
             raise TypeError(f"'{val}' is an invalid key")
 
 
@@ -227,7 +227,7 @@ cdef class BlockPlacement:
         cdef:
             slice nv, s = self._ensure_has_slice()
             Py_ssize_t other_int, start, stop, step, l
-            ndarray newarr
+            ndarray[intp_t, ndim=1] newarr
 
         if s is not None:
             # see if we are either all-above or all-below, each of which
@@ -260,7 +260,7 @@ cdef class BlockPlacement:
         cdef:
             slice slc = self._ensure_has_slice()
             slice new_slice
-            ndarray new_placement
+            ndarray[intp_t, ndim=1] new_placement
 
         if slc is not None and slc.step == 1:
             new_slc = slice(slc.start * factor, slc.stop * factor, 1)
@@ -345,7 +345,9 @@ cpdef Py_ssize_t slice_len(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX) except -
     return length
 
 
-cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX):
+cdef (Py_ssize_t, Py_ssize_t, Py_ssize_t, Py_ssize_t) slice_get_indices_ex(
+    slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX
+):
     """
     Get (start, stop, step, length) tuple for a slice.
 
@@ -460,9 +462,11 @@ def get_blkno_indexers(
     # blockno handling.
     cdef:
         int64_t cur_blkno
-        Py_ssize_t i, start, stop, n, diff, tot_len
+        Py_ssize_t i, start, stop, n, diff
+        cnp.npy_intp tot_len
         int64_t blkno
         object group_dict = defaultdict(list)
+        ndarray[int64_t, ndim=1] arr
 
     n = blknos.shape[0]
     result = list()
@@ -495,7 +499,8 @@ def get_blkno_indexers(
                 result.append((blkno, slice(slices[0][0], slices[0][1])))
             else:
                 tot_len = sum(stop - start for start, stop in slices)
-                arr = np.empty(tot_len, dtype=np.int64)
+                # equiv np.empty(tot_len, dtype=np.int64)
+                arr = cnp.PyArray_EMPTY(1, &tot_len, cnp.NPY_INT64, 0)
 
                 i = 0
                 for start, stop in slices:
@@ -526,16 +531,21 @@ def get_blkno_placements(blknos, group: bool = True):
         yield blkno, BlockPlacement(indexer)
 
 
+@cython.boundscheck(False)
+@cython.wraparound(False)
 cpdef update_blklocs_and_blknos(
-    ndarray[intp_t] blklocs, ndarray[intp_t] blknos, Py_ssize_t loc, intp_t nblocks
+    ndarray[intp_t, ndim=1] blklocs,
+    ndarray[intp_t, ndim=1] blknos,
+    Py_ssize_t loc,
+    intp_t nblocks,
 ):
     """
     Update blklocs and blknos when a new column is inserted at 'loc'.
     """
     cdef:
         Py_ssize_t i
         cnp.npy_intp length = len(blklocs) + 1
-        ndarray[intp_t] new_blklocs, new_blknos
+        ndarray[intp_t, ndim=1] new_blklocs, new_blknos
 
     # equiv: new_blklocs = np.empty(length, dtype=np.intp)
     new_blklocs = cnp.PyArray_EMPTY(1, &length, cnp.NPY_INTP, 0)
@@ -693,7 +703,7 @@ cdef class BlockManager:
             cnp.npy_intp length = self.shape[0]
             SharedBlock blk
             BlockPlacement bp
-            ndarray[intp_t] new_blknos, new_blklocs
+            ndarray[intp_t, ndim=1] new_blknos, new_blklocs
 
         # equiv: np.empty(length, dtype=np.intp)
         new_blknos = cnp.PyArray_EMPTY(1, &length, cnp.NPY_INTP, 0)
@@ -711,7 +721,11 @@ cdef class BlockManager:
                 new_blknos[j] = blkno
                 new_blklocs[j] = i
 
-        for blkno in new_blknos:
+        for i in range(length):
+            # faster than `for blkno in new_blknos`
+            #  https://github.com/cython/cython/issues/4393
+            blkno = new_blknos[i]
+
             # If there are any -1s remaining, this indicates that our mgr_locs
             #  are invalid.
             if blkno == -1: