pandas-dev · mroeschke · Jun 22, 2022 · Jun 23, 2022
diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml
@@ -166,3 +166,32 @@ jobs:
 
       - name: Build image
         run: docker build --pull --no-cache --tag pandas-dev-env .
+
+  requirements-dev-text-installable:
+    name: Test install requirements-dev.txt
+    runs-on: ubuntu-latest
+
+    concurrency:
+      # https://github.community/t/concurrecy-not-work-for-push/183068/7
+      group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-requirements-dev-text-installable
+      cancel-in-progress: true
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Setup Python
+        id: setup_python
+        uses: actions/setup-python@v3
+        with:
+          python-version: '3.8'
+          cache: 'pip'
+          cache-dependency-path: 'requirements-dev.txt'
+
+      - name: Install requirements-dev.txt
+        run: pip install -r requirements-dev.txt
+
+      - name: Check Pip Cache Hit
+        run: echo ${{ steps.setup_python.outputs.cache-hit }}
diff --git a/.github/workflows/posix.yml → .github/workflows/ubuntu.yml b/.github/workflows/posix.yml → .github/workflows/ubuntu.yml
@@ -1,4 +1,4 @@
-name: Posix
+name: Ubuntu
 
 on:
   push:
@@ -145,7 +145,7 @@ jobs:
 
     - name: Extra installs
       # xsel for clipboard tests
-      run: sudo apt-get update && sudo apt-get install -y libc6-dev-i386 xsel ${{ env.EXTRA_APT }}
+      run: sudo apt-get update && sudo apt-get install -y xsel ${{ env.EXTRA_APT }}
 
     - uses: conda-incubator/[email protected]
       with:

diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml
@@ -31,8 +31,7 @@ dependencies:
   - jinja2
   - lxml
   - matplotlib
-  # TODO: uncomment after numba supports py310
-  #- numba
+  - numba
   - numexpr
   - openpyxl
   - odfpy

diff --git a/environment.yml b/environment.yml
@@ -1,21 +1,85 @@
+# Local development dependencies including docs building, website upload, ASV benchmark
 name: pandas-dev
 channels:
   - conda-forge
 dependencies:
-  # required
-  - numpy>=1.18.5
   - python=3.8
-  - python-dateutil>=2.8.1
+
+  # test dependencies
+  - cython=0.29.30
+  - pytest>=6.0
+  - pytest-cov
+  - pytest-xdist>=1.31
+  - psutil
+  - pytest-asyncio>=0.17
+  - boto3
+
+  # required dependencies
+  - python-dateutil
+  - numpy
   - pytz
 
+  # optional dependencies
+  - beautifulsoup4
+  - blosc
+  - brotlipy
+  - bottleneck
+  - fastparquet
+  - fsspec
+  - html5lib
+  - hypothesis
+  - gcsfs
+  - jinja2
+  - lxml
+  - matplotlib
+  - numba>=0.53.1
+  - numexpr>=2.8.0  # pin for "Run checks on imported code" job
+  - openpyxl
+  - odfpy
+  - pandas-gbq
+  - psycopg2
+  - pyarrow
+  - pymysql
+  - pyreadstat
+  - pytables
+  - python-snappy
+  - pyxlsb
+  - s3fs
+  - scipy
+  - sqlalchemy
+  - tabulate
+  - xarray
+  - xlrd
+  - xlsxwriter
+  - xlwt
+  - zstandard
+
+  # downstream packages
+  - aiobotocore<2.0.0  # GH#44311 pinned to fix docbuild
+  - botocore
+  - cftime
+  - dask
+  - ipython
+  - geopandas-base
+  - seaborn
+  - scikit-learn
+  - statsmodels
+  - coverage
+  - pandas-datareader
+  - pyyaml
+  - py
+  - pytorch
+
+  # local testing dependencies
+  - moto
+  - flask
+
   # benchmarks
   - asv
 
-  # building
   # The compiler packages are meta-packages and install the correct compiler (activation) packages on the respective platforms.
   - c-compiler
   - cxx-compiler
-  - cython>=0.29.30
 
   # code checks
   - black=22.3.0
@@ -24,18 +88,19 @@ dependencies:
   - flake8-bugbear=21.3.2  # used by flake8, find likely bugs
   - flake8-comprehensions=3.7.0  # used by flake8, linting of unnecessary comprehensions
   - isort>=5.2.1  # check that imports are in the right order
-  - mypy=0.930
-  - pre-commit>=2.9.2
+  - mypy=0.960
+  - pre-commit>=2.15.0
   - pycodestyle  # used by flake8
   - pyupgrade
 
   # documentation
   - gitpython  # obtain contributors from git for whatsnew
   - gitdb
-  - numpydoc < 1.2  # 2021-02-09 1.2dev breaking CI
+  - natsort  # DataFrame.sort_values doctest
+  - numpydoc
   - pandas-dev-flaker=0.4.0
   - pydata-sphinx-theme=0.8.0
-  - pytest-cython
+  - pytest-cython  # doctest
   - sphinx
   - sphinx-panels
   - types-python-dateutil
@@ -47,77 +112,14 @@ dependencies:
   - nbconvert>=6.4.5
   - nbsphinx
   - pandoc
-
-  # Dask and its dependencies (that dont install with dask)
-  - dask-core
-  - toolz>=0.7.3
-  - partd>=0.3.10
-  - cloudpickle>=0.2.1
-
-  # web (jinja2 is also needed, but it's also an optional pandas dependency)
-  - markdown
-  - feedparser
-  - pyyaml
-  - requests
-
-  # testing
-  - boto3
-  - botocore>=1.11
-  - hypothesis>=5.5.3
-  - moto  # mock S3
-  - flask
-  - pytest>=6.0
-  - pytest-cov
-  - pytest-xdist>=1.31
-  - pytest-asyncio>=0.17
-  - pytest-instafail
-
-  # downstream tests
-  - seaborn
-  - statsmodels
-
-  # unused (required indirectly may be?)
   - ipywidgets
   - nbformat
   - notebook>=6.0.3
-
-  # optional
-  - blosc
-  - bottleneck>=1.3.1
   - ipykernel
-  - ipython>=7.11.1
-  - jinja2  # pandas.Styler
-  - matplotlib>=3.3.2  # pandas.plotting, Series.plot, DataFrame.plot
-  - numexpr>=2.7.1
-  - scipy>=1.4.1
-  - numba>=0.50.1
-
-  # optional for io
-  # ---------------
-  # pd.read_html
-  - beautifulsoup4>=4.8.2
-  - html5lib
-  - lxml
-
-  # pd.read_excel, DataFrame.to_excel, pd.ExcelWriter, pd.ExcelFile
-  - openpyxl
-  - xlrd
-  - xlsxwriter
-  - xlwt
-  - odfpy
-
-  - fastparquet>=0.4.0  # pandas.read_parquet, DataFrame.to_parquet
-  - pyarrow>2.0.1  # pandas.read_parquet, DataFrame.to_parquet, pandas.read_feather, DataFrame.to_feather
-  - python-snappy  # required by pyarrow
 
-  - pytables>=3.6.1  # pandas.read_hdf, DataFrame.to_hdf
-  - s3fs>=0.4.0  # file IO when using 's3://...' path
-  - aiobotocore<2.0.0  # GH#44311 pinned to fix docbuild
-  - fsspec>=0.7.4  # for generic remote file operations
-  - gcsfs>=0.6.0  # file IO when using 'gcs://...' path
-  - sqlalchemy  # pandas.read_sql, DataFrame.to_sql
-  - xarray<0.19  # DataFrame.to_xarray
-  - cftime  # Needed for downstream xarray.CFTimeIndex test
-  - pyreadstat  # pandas.read_spss
-  - tabulate>=0.8.3  # DataFrame.to_markdown
-  - natsort  # DataFrame.sort_values
+  # web
+  - jinja2  # in optional dependencies, but documented here as needed
+  - markdown
+  - feedparser
+  - pyyaml
+  - requests
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -1078,12 +1078,7 @@ def checked_add_with_arr(
     elif arr_mask is not None:
         not_nan = np.logical_not(arr_mask)
     elif b_mask is not None:
-        # Argument 1 to "__call__" of "_UFunc_Nin1_Nout1" has incompatible type
-        # "Optional[ndarray[Any, dtype[bool_]]]"; expected
-        # "Union[_SupportsArray[dtype[Any]], _NestedSequence[_SupportsArray[dtype[An
-        # y]]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool,
-        # int, float, complex, str, bytes]]]"  [arg-type]
-        not_nan = np.logical_not(b2_mask)  # type: ignore[arg-type]
+        not_nan = np.logical_not(b2_mask)
     else:
         not_nan = np.empty(arr.shape, dtype=bool)
         not_nan.fill(True)

diff --git a/pandas/core/array_algos/quantile.py b/pandas/core/array_algos/quantile.py
@@ -184,5 +184,8 @@ def _nanpercentile(
         return result
     else:
         return np.percentile(
-            values, qs, axis=1, **{np_percentile_argname: interpolation}
+            values,
+            qs,
+            axis=1,
+            **{np_percentile_argname: interpolation},
         )
diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py
@@ -265,7 +265,11 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any)
         return result
 
     # Determine if we should defer.
-    no_defer = (np.ndarray.__array_ufunc__, cls.__array_ufunc__)
+    # error: "Type[ndarray[Any, Any]]" has no attribute "__array_ufunc__"
+    no_defer = (
+        np.ndarray.__array_ufunc__,  # type: ignore[attr-defined]
+        cls.__array_ufunc__,
+    )
 
     for item in inputs:
         higher_priority = (

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -661,10 +661,20 @@ def __getitem__(
             if is_scalar(left) and isna(left):
                 return self._fill_value
             return Interval(left, right, self.closed)
-        # error: Argument 1 to "ndim" has incompatible type "Union[ndarray,
-        # ExtensionArray]"; expected "Union[Union[int, float, complex, str, bytes,
-        # generic], Sequence[Union[int, float, complex, str, bytes, generic]],
-        # Sequence[Sequence[Any]], _SupportsArray]"
+        # error: Argument 1 to "ndim" has incompatible type
+        # "Union[ndarray[Any, Any], ExtensionArray]"; expected
+        # "Union[Sequence[Sequence[Sequence[Sequence[Sequence[Any]]]]],
+        # Union[Union[_SupportsArray[dtype[Any]],
+        # Sequence[_SupportsArray[dtype[Any]]],
+        # Sequence[Sequence[_SupportsArray[dtype[Any]]]],
+        # Sequence[Sequence[Sequence[_SupportsArray[dtype[Any]]]]],
+        # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[Any]]]]]]],
+        # Union[bool, int, float, complex, str, bytes,
+        # Sequence[Union[bool, int, float, complex, str, bytes]],
+        # Sequence[Sequence[Union[bool, int, float, complex, str, bytes]]],
+        # Sequence[Sequence[Sequence[Union[bool, int, float, complex, str, bytes]]]],
+        # Sequence[Sequence[Sequence[Sequence[Union[bool, int, float,
+        # complex, str, bytes]]]]]]]]"
         if np.ndim(left) > 1:  # type: ignore[arg-type]
             # GH#30588 multi-dimensional indexer disallowed
             raise ValueError("multi-dimensional indexing not allowed")
@@ -1639,13 +1649,7 @@ def isin(self, values) -> np.ndarray:
                 #  complex128 ndarray is much more performant.
                 left = self._combined.view("complex128")
                 right = values._combined.view("complex128")
-                # Argument 1 to "in1d" has incompatible type "Union[ExtensionArray,
-                # ndarray[Any, Any], ndarray[Any, dtype[Any]]]"; expected
-                # "Union[_SupportsArray[dtype[Any]], _NestedSequence[_SupportsArray[
-                # dtype[Any]]], bool, int, float, complex, str, bytes,
-                # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
-                # [arg-type]
-                return np.in1d(left, right)  # type: ignore[arg-type]
+                return np.in1d(left, right)
 
             elif needs_i8_conversion(self.left.dtype) ^ needs_i8_conversion(
                 values.left.dtype

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -140,7 +140,13 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
 
     def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
         # values is supposed to already be validated in the subclass
-        if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_):
+        if not (
+            isinstance(mask, np.ndarray)
+            and
+            # error: Non-overlapping equality check
+            # (left operand type: "dtype[bool_]", right operand type: "Type[bool_]")
+            mask.dtype == np.bool_  # type: ignore[comparison-overlap]
+        ):
             raise TypeError(
                 "mask should be boolean numpy array. Use "
                 "the 'pd.array' function instead"
@@ -943,11 +949,7 @@ def any(self, *, skipna: bool = True, **kwargs):
         nv.validate_any((), kwargs)
 
         values = self._data.copy()
-        # Argument 3 to "putmask" has incompatible type "object"; expected
-        # "Union[_SupportsArray[dtype[Any]], _NestedSequence[
-        # _SupportsArray[dtype[Any]]], bool, int, float, complex, str, bytes, _Nested
-        # Sequence[Union[bool, int, float, complex, str, bytes]]]"  [arg-type]
-        np.putmask(values, self._mask, self._falsey_value)  # type: ignore[arg-type]
+        np.putmask(values, self._mask, self._falsey_value)
         result = values.any()
         if skipna:
             return result
@@ -1023,11 +1025,7 @@ def all(self, *, skipna: bool = True, **kwargs):
         nv.validate_all((), kwargs)
 
         values = self._data.copy()
-        # Argument 3 to "putmask" has incompatible type "object"; expected
-        # "Union[_SupportsArray[dtype[Any]], _NestedSequence[
-        # _SupportsArray[dtype[Any]]], bool, int, float, complex, str, bytes, _Neste
-        # dSequence[Union[bool, int, float, complex, str, bytes]]]"  [arg-type]
-        np.putmask(values, self._mask, self._truthy_value)  # type: ignore[arg-type]
+        np.putmask(values, self._mask, self._truthy_value)
         result = values.all()
 
         if skipna: