lithomas1
diff --git a/‎.circleci/config.yml
+4-1 b/‎.circleci/config.yml
+4-1
diff --git a/‎.github/ISSUE_TEMPLATE/bug_report.yaml
+2-2 b/‎.github/ISSUE_TEMPLATE/bug_report.yaml
+2-2
diff --git a/‎.github/workflows/docbuild-and-upload.yml
-6 b/‎.github/workflows/docbuild-and-upload.yml
-6
diff --git a/‎.github/workflows/macos-windows.yml
+1-1 b/‎.github/workflows/macos-windows.yml
+1-1
diff --git a/‎.github/workflows/python-dev.yml
+6-4 b/‎.github/workflows/python-dev.yml
+6-4
diff --git a/‎.github/workflows/sdist.yml
-1 b/‎.github/workflows/sdist.yml
-1
diff --git a/‎.github/workflows/ubuntu.yml
+8-3 b/‎.github/workflows/ubuntu.yml
+8-3
diff --git a/‎.github/workflows/wheels.yml
+4-2 b/‎.github/workflows/wheels.yml
+4-2
diff --git a/‎.pre-commit-config.yaml
+1-4 b/‎.pre-commit-config.yaml
+1-4
diff --git a/‎LICENSE
+1-1 b/‎LICENSE
+1-1
diff --git a/‎asv_bench/benchmarks/array.py
+1-1 b/‎asv_bench/benchmarks/array.py
+1-1
diff --git a/‎asv_bench/benchmarks/indexing.py
+32 b/‎asv_bench/benchmarks/indexing.py
+32
diff --git a/‎asv_bench/benchmarks/indexing_engines.py
+81-1 b/‎asv_bench/benchmarks/indexing_engines.py
+81-1
diff --git a/‎asv_bench/benchmarks/io/hdf.py
+8 b/‎asv_bench/benchmarks/io/hdf.py
+8
diff --git a/‎asv_bench/benchmarks/io/json.py
+7-1 b/‎asv_bench/benchmarks/io/json.py
+7-1
@@ -14,7 +14,10 @@ jobs:
     steps:
       - checkout
       - run: .circleci/setup_env.sh
-      - run: PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH ci/run_tests.sh
+      - run: >
+          PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH
+          LD_PRELOAD=$HOME/miniconda3/envs/pandas-dev/lib/libgomp.so.1:$LD_PRELOAD
+          ci/run_tests.sh
 
 workflows:
   test:
 
@@ -17,8 +17,8 @@ body:
             [latest version](https://pandas.pydata.org/docs/whatsnew/index.html) of pandas.
           required: true
         - label: >
-            I have confirmed this bug exists on the [main branch]
-            (https://pandas.pydata.org/docs/dev/getting_started/install.html#installing-the-development-version-of-pandas)
+            I have confirmed this bug exists on the
+            [main branch](https://pandas.pydata.org/docs/dev/getting_started/install.html#installing-the-development-version-of-pandas)
             of pandas.
   - type: textarea
     id: example
 
@@ -46,12 +46,6 @@ jobs:
     - name: Build Pandas
       uses: ./.github/actions/build_pandas
 
-    - name: Set up maintainers cache
-      uses: actions/cache@v3
-      with:
-        path: maintainers.json
-        key: maintainers
-
     - name: Build website
       run: python web/pandas_web.py web/pandas --target-path=web/build
 
 
@@ -31,7 +31,7 @@ jobs:
     strategy:
       matrix:
         os: [macos-latest, windows-latest]
-        env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml]
+        env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml]
       fail-fast: false
     runs-on: ${{ matrix.os }}
     name: ${{ format('{0} {1}', matrix.os, matrix.env_file) }}
 
@@ -23,12 +23,14 @@ name: Python Dev
 on:
   push:
     branches:
-      - main
-      - 1.5.x
+#      - main
+#      - 1.5.x
+      - None
   pull_request:
     branches:
-      - main
-      - 1.5.x
+#      - main
+#      - 1.5.x
+      - None
     paths-ignore:
       - "doc/**"
 
 
@@ -92,5 +92,4 @@ jobs:
     - name: Import pandas
       run: |
         cd ..
-        conda list
         python -c "import pandas; pandas.show_versions();"
@@ -27,7 +27,7 @@ jobs:
     timeout-minutes: 180
     strategy:
       matrix:
-        env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml]
+        env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml]
         pattern: ["not single_cpu", "single_cpu"]
         pyarrow_version: ["7", "8", "9", "10"]
         include:
@@ -73,11 +73,10 @@ jobs:
             env_file: actions-pypy-38.yaml
             pattern: "not slow and not network and not single_cpu"
             test_args: "--max-worker-restart 0"
-            error_on_warnings: "0"
           - name: "Numpy Dev"
             env_file: actions-310-numpydev.yaml
             pattern: "not slow and not network and not single_cpu"
-            test_args: "-W error::DeprecationWarning:numpy -W error::FutureWarning:numpy"
+            test_args: "-W error::DeprecationWarning -W error::FutureWarning"
             error_on_warnings: "0"
         exclude:
           - env_file: actions-38.yaml
@@ -92,6 +91,12 @@ jobs:
             pyarrow_version: "8"
           - env_file: actions-39.yaml
             pyarrow_version: "9"
+          - env_file: actions-311.yaml
+            pyarrow_version: "7"
+          - env_file: actions-311.yaml
+            pyarrow_version: "8"
+          - env_file: actions-311.yaml
+            pyarrow_version: "9"
       fail-fast: false
     name: ${{ matrix.name || format('{0} pyarrow={1} {2}', matrix.env_file, matrix.pyarrow_version, matrix.pattern) }}
     env:
 
@@ -86,7 +86,8 @@ jobs:
           activate-environment: test
           channels: conda-forge, anaconda
           channel-priority: true
-          mamba-version: "*"
+          # mamba fails to solve, also we really don't need this since we're just installing python
+          # mamba-version: "*"
 
       - name: Test wheels (Windows 64-bit only)
         if: ${{ matrix.buildplat[1] == 'win_amd64' }}
@@ -154,7 +155,8 @@ jobs:
           python-version: '3.8'
           channels: conda-forge
           channel-priority: true
-          mamba-version: "*"
+          # mamba fails to solve, also we really don't need this since we're just installing python
+          # mamba-version: "*"
 
       - name: Build sdist
         run: |
 
@@ -92,7 +92,7 @@ repos:
         args: [--disable=all, --enable=redefined-outer-name]
         stages: [manual]
 -   repo: https://github.com/PyCQA/isort
-    rev: 5.11.4
+    rev: 5.12.0
     hooks:
     -   id: isort
 -   repo: https://github.com/asottile/pyupgrade
@@ -443,6 +443,3 @@ repos:
         types: [python]
         files: ^pandas/tests
         language: python
-        exclude: |
-            (?x)
-            ^pandas/tests/generic/test_generic.py  # GH50380
@@ -3,7 +3,7 @@ BSD 3-Clause License
 Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
 All rights reserved.
 
-Copyright (c) 2011-2022, Open source contributors.
+Copyright (c) 2011-2023, Open source contributors.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 
@@ -93,7 +93,7 @@ def time_setitem(self, multiple_chunks):
             self.array[i] = "foo"
 
     def time_setitem_list(self, multiple_chunks):
-        indexer = list(range(0, 50)) + list(range(-50, 0))
+        indexer = list(range(0, 50)) + list(range(-1000, 0, 50))
         self.array[indexer] = ["foo"] * len(indexer)
 
     def time_setitem_slice(self, multiple_chunks):
 
@@ -8,6 +8,7 @@
 import numpy as np
 
 from pandas import (
+    NA,
     CategoricalIndex,
     DataFrame,
     Index,
@@ -83,6 +84,37 @@ def time_loc_slice(self, index, index_structure):
         self.data.loc[:800000]
 
 
+class NumericMaskedIndexing:
+    monotonic_list = list(range(10**6))
+    non_monotonic_list = (
+        list(range(50)) + [54, 53, 52, 51] + list(range(55, 10**6 - 1))
+    )
+
+    params = [
+        ("Int64", "UInt64", "Float64"),
+        (True, False),
+    ]
+    param_names = ["dtype", "monotonic"]
+
+    def setup(self, dtype, monotonic):
+
+        indices = {
+            True: Index(self.monotonic_list, dtype=dtype),
+            False: Index(self.non_monotonic_list, dtype=dtype).append(
+                Index([NA], dtype=dtype)
+            ),
+        }
+        self.data = indices[monotonic]
+        self.indexer = np.arange(300, 1_000)
+        self.data_dups = self.data.append(self.data)
+
+    def time_get_indexer(self, dtype, monotonic):
+        self.data.get_indexer(self.indexer)
+
+    def time_get_indexer_dups(self, dtype, monotonic):
+        self.data.get_indexer_for(self.indexer)
+
+
 class NonNumericSeriesIndexing:
 
     params = [
 
@@ -1,5 +1,8 @@
 """
-Benchmarks in this file depend exclusively on code in _libs/
+Benchmarks in this file depend mostly on code in _libs/
+
+We have to created masked arrays to test the masked engine though. The
+array is unpacked on the Cython level.
 
 If a PR does not edit anything in _libs, it is very unlikely that benchmarks
 in this file will be affected.
@@ -9,6 +12,8 @@
 
 from pandas._libs import index as libindex
 
+from pandas.core.arrays import BaseMaskedArray
+
 
 def _get_numeric_engines():
     engine_names = [
@@ -30,6 +35,26 @@ def _get_numeric_engines():
     ]
 
 
+def _get_masked_engines():
+    engine_names = [
+        ("MaskedInt64Engine", "Int64"),
+        ("MaskedInt32Engine", "Int32"),
+        ("MaskedInt16Engine", "Int16"),
+        ("MaskedInt8Engine", "Int8"),
+        ("MaskedUInt64Engine", "UInt64"),
+        ("MaskedUInt32Engine", "UInt32"),
+        ("MaskedUInt16engine", "UInt16"),
+        ("MaskedUInt8Engine", "UInt8"),
+        ("MaskedFloat64Engine", "Float64"),
+        ("MaskedFloat32Engine", "Float32"),
+    ]
+    return [
+        (getattr(libindex, engine_name), dtype)
+        for engine_name, dtype in engine_names
+        if hasattr(libindex, engine_name)
+    ]
+
+
 class NumericEngineIndexing:
 
     params = [
@@ -80,6 +105,61 @@ def time_get_loc_near_middle(self, engine_and_dtype, index_type, unique, N):
         self.data.get_loc(self.key_middle)
 
 
+class MaskedNumericEngineIndexing:
+
+    params = [
+        _get_masked_engines(),
+        ["monotonic_incr", "monotonic_decr", "non_monotonic"],
+        [True, False],
+        [10**5, 2 * 10**6],  # 2e6 is above SIZE_CUTOFF
+    ]
+    param_names = ["engine_and_dtype", "index_type", "unique", "N"]
+
+    def setup(self, engine_and_dtype, index_type, unique, N):
+        engine, dtype = engine_and_dtype
+
+        if index_type == "monotonic_incr":
+            if unique:
+                arr = np.arange(N * 3, dtype=dtype.lower())
+            else:
+                values = list([1] * N + [2] * N + [3] * N)
+                arr = np.array(values, dtype=dtype.lower())
+            mask = np.zeros(N * 3, dtype=np.bool_)
+        elif index_type == "monotonic_decr":
+            if unique:
+                arr = np.arange(N * 3, dtype=dtype.lower())[::-1]
+            else:
+                values = list([1] * N + [2] * N + [3] * N)
+                arr = np.array(values, dtype=dtype.lower())[::-1]
+            mask = np.zeros(N * 3, dtype=np.bool_)
+        else:
+            assert index_type == "non_monotonic"
+            if unique:
+                arr = np.zeros(N * 3, dtype=dtype.lower())
+                arr[:N] = np.arange(N * 2, N * 3, dtype=dtype.lower())
+                arr[N:] = np.arange(N * 2, dtype=dtype.lower())
+
+            else:
+                arr = np.array([1, 2, 3] * N, dtype=dtype.lower())
+            mask = np.zeros(N * 3, dtype=np.bool_)
+            mask[-1] = True
+
+        self.data = engine(BaseMaskedArray(arr, mask))
+        # code belows avoids populating the mapping etc. while timing.
+        self.data.get_loc(2)
+
+        self.key_middle = arr[len(arr) // 2]
+        self.key_early = arr[2]
+
+    def time_get_loc(self, engine_and_dtype, index_type, unique, N):
+        self.data.get_loc(self.key_early)
+
+    def time_get_loc_near_middle(self, engine_and_dtype, index_type, unique, N):
+        # searchsorted performance may be different near the middle of a range
+        #  vs near an endpoint
+        self.data.get_loc(self.key_middle)
+
+
 class ObjectEngineIndexing:
 
     params = [("monotonic_incr", "monotonic_decr", "non_monotonic")]
 
@@ -128,9 +128,17 @@ def setup(self, format):
         self.df["object"] = tm.makeStringIndex(N)
         self.df.to_hdf(self.fname, "df", format=format)
 
+        # Numeric df
+        self.df1 = self.df.copy()
+        self.df1 = self.df1.reset_index()
+        self.df1.to_hdf(self.fname, "df1", format=format)
+
     def time_read_hdf(self, format):
         read_hdf(self.fname, "df")
 
+    def peakmem_read_hdf(self, format):
+        read_hdf(self.fname, "df")
+
     def time_write_hdf(self, format):
         self.df.to_hdf(self.fname, "df", format=format)
 
 
@@ -294,7 +294,8 @@ def time_float_longint_str_lines(self):
 class ToJSONMem:
     def setup_cache(self):
         df = DataFrame([[1]])
-        frames = {"int": df, "float": df.astype(float)}
+        df2 = DataFrame(range(8), date_range("1/1/2000", periods=8, freq="T"))
+        frames = {"int": df, "float": df.astype(float), "datetime": df2}
 
         return frames
 
@@ -308,5 +309,10 @@ def peakmem_float(self, frames):
         for _ in range(100_000):
             df.to_json()
 
+    def peakmem_time(self, frames):
+        df = frames["datetime"]
+        for _ in range(10_000):
+            df.to_json(orient="table")
+
 
 from ..pandas_vb_common import setup  # noqa: F401 isort:skip