mdhsieh
diff --git a/‎.github/workflows/database.yml
+1-1 b/‎.github/workflows/database.yml
+1-1
diff --git a/‎.pre-commit-config.yaml
+1 b/‎.pre-commit-config.yaml
+1
diff --git a/‎asv_bench/benchmarks/strings.py
+45-37 b/‎asv_bench/benchmarks/strings.py
+45-37
diff --git a/‎ci/deps/actions-37-db-min.yaml
+2-1 b/‎ci/deps/actions-37-db-min.yaml
+2-1
diff --git a/‎ci/deps/actions-37-db.yaml
+1-1 b/‎ci/deps/actions-37-db.yaml
+1-1
diff --git a/‎ci/deps/actions-37-minimum_versions.yaml
+1-1 b/‎ci/deps/actions-37-minimum_versions.yaml
+1-1
diff --git a/‎ci/deps/actions-37.yaml
+1-1 b/‎ci/deps/actions-37.yaml
+1-1
diff --git a/‎ci/deps/azure-macos-37.yaml
+2-1 b/‎ci/deps/azure-macos-37.yaml
+2-1
diff --git a/‎ci/deps/azure-windows-37.yaml
+1-1 b/‎ci/deps/azure-windows-37.yaml
+1-1
diff --git a/‎ci/deps/azure-windows-38.yaml
+1-1 b/‎ci/deps/azure-windows-38.yaml
+1-1
diff --git a/‎ci/run_tests.sh
+1-1 b/‎ci/run_tests.sh
+1-1
diff --git a/‎doc/redirects.csv
+1 b/‎doc/redirects.csv
+1
diff --git a/‎doc/source/getting_started/install.rst
+1-1 b/‎doc/source/getting_started/install.rst
+1-1
diff --git a/‎doc/source/reference/series.rst
+1 b/‎doc/source/reference/series.rst
+1
diff --git a/‎doc/source/whatsnew/v1.3.0.rst
+4-1 b/‎doc/source/whatsnew/v1.3.0.rst
+4-1
diff --git a/‎environment.yml
+1-1 b/‎environment.yml
+1-1
diff --git a/‎pandas/compat/_optional.py
+1-1 b/‎pandas/compat/_optional.py
+1-1
diff --git a/‎pandas/core/array_algos/take.py
+8-3 b/‎pandas/core/array_algos/take.py
+8-3
diff --git a/‎pandas/core/arrays/string_arrow.py
+17-12 b/‎pandas/core/arrays/string_arrow.py
+17-12
@@ -70,7 +70,7 @@ jobs:
     - uses: conda-incubator/setup-miniconda@v2
       with:
         activate-environment: pandas-dev
-        channel-priority: strict
+        channel-priority: flexible
         environment-file: ${{ matrix.ENV_FILE }}
         use-only-tar-bz2: true
 
 
@@ -21,6 +21,7 @@ repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v3.4.0
     hooks:
+    -   id: debug-statements
     -   id: end-of-file-fixer
         exclude: \.txt$
     -   id: trailing-whitespace
 
@@ -11,6 +11,19 @@
 from .pandas_vb_common import tm
 
 
+class Dtypes:
+    params = ["str", "string", "arrow_string"]
+    param_names = ["dtype"]
+
+    def setup(self, dtype):
+        from pandas.core.arrays.string_arrow import ArrowStringDtype  # noqa: F401
+
+        try:
+            self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype)
+        except ImportError:
+            raise NotImplementedError
+
+
 class Construction:
 
     params = ["str", "string"]
@@ -49,18 +62,7 @@ def peakmem_cat_frame_construction(self, dtype):
         DataFrame(self.frame_cat_arr, dtype=dtype)
 
 
-class Methods:
-    params = ["str", "string", "arrow_string"]
-    param_names = ["dtype"]
-
-    def setup(self, dtype):
-        from pandas.core.arrays.string_arrow import ArrowStringDtype  # noqa: F401
-
-        try:
-            self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype)
-        except ImportError:
-            raise NotImplementedError
-
+class Methods(Dtypes):
     def time_center(self, dtype):
         self.s.str.center(100)
 
@@ -83,6 +85,9 @@ def time_find(self, dtype):
     def time_rfind(self, dtype):
         self.s.str.rfind("[A-Z]+")
 
+    def time_fullmatch(self, dtype):
+        self.s.str.fullmatch("A")
+
     def time_get(self, dtype):
         self.s.str.get(0)
 
@@ -211,35 +216,26 @@ def time_cat(self, other_cols, sep, na_rep, na_frac):
         self.s.str.cat(others=self.others, sep=sep, na_rep=na_rep)
 
 
-class Contains:
+class Contains(Dtypes):
 
-    params = (["str", "string", "arrow_string"], [True, False])
+    params = (Dtypes.params, [True, False])
     param_names = ["dtype", "regex"]
 
     def setup(self, dtype, regex):
-        from pandas.core.arrays.string_arrow import ArrowStringDtype  # noqa: F401
-
-        try:
-            self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype)
-        except ImportError:
-            raise NotImplementedError
+        super().setup(dtype)
 
     def time_contains(self, dtype, regex):
         self.s.str.contains("A", regex=regex)
 
 
-class Split:
+class Split(Dtypes):
 
-    params = (["str", "string", "arrow_string"], [True, False])
+    params = (Dtypes.params, [True, False])
     param_names = ["dtype", "expand"]
 
     def setup(self, dtype, expand):
-        from pandas.core.arrays.string_arrow import ArrowStringDtype  # noqa: F401
-
-        try:
-            self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype).str.join("--")
-        except ImportError:
-            raise NotImplementedError
+        super().setup(dtype)
+        self.s = self.s.str.join("--")
 
     def time_split(self, dtype, expand):
         self.s.str.split("--", expand=expand)
@@ -248,17 +244,23 @@ def time_rsplit(self, dtype, expand):
         self.s.str.rsplit("--", expand=expand)
 
 
-class Dummies:
-    params = ["str", "string", "arrow_string"]
-    param_names = ["dtype"]
+class Extract(Dtypes):
 
-    def setup(self, dtype):
-        from pandas.core.arrays.string_arrow import ArrowStringDtype  # noqa: F401
+    params = (Dtypes.params, [True, False])
+    param_names = ["dtype", "expand"]
 
-        try:
-            self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype).str.join("|")
-        except ImportError:
-            raise NotImplementedError
+    def setup(self, dtype, expand):
+        super().setup(dtype)
+
+    def time_extract_single_group(self, dtype, expand):
+        with warnings.catch_warnings(record=True):
+            self.s.str.extract("(\\w*)A", expand=expand)
+
+
+class Dummies(Dtypes):
+    def setup(self, dtype):
+        super().setup(dtype)
+        self.s = self.s.str.join("|")
 
     def time_get_dummies(self, dtype):
         self.s.str.get_dummies("|")
@@ -279,3 +281,9 @@ def setup(self):
     def time_vector_slice(self):
         # GH 2602
         self.s.str[:5]
+
+
+class Iter(Dtypes):
+    def time_iter(self, dtype):
+        for i in self.s:
+            pass
@@ -31,7 +31,8 @@ dependencies:
   - openpyxl
   - pandas-gbq
   - google-cloud-bigquery>=1.27.2 # GH 36436
-  - pyarrow=0.17 # GH 38803
+  - protobuf>=3.12.4
+  - pyarrow=0.17.1 # GH 38803
   - pytables>=3.5.1
   - scipy
   - xarray=0.12.3
 
@@ -31,7 +31,7 @@ dependencies:
   - pandas-gbq
   - google-cloud-bigquery>=1.27.2 # GH 36436
   - psycopg2
-  - pyarrow>=0.15.0
+  - pyarrow>=0.17.0
   - pymysql
   - pytables
   - python-snappy
 
@@ -23,7 +23,7 @@ dependencies:
   - pytables=3.5.1
   - python-dateutil=2.7.3
   - pytz=2017.3
-  - pyarrow=0.15
+  - pyarrow=0.17.0
   - scipy=1.2
   - xlrd=1.2.0
   - xlsxwriter=1.0.2
 
@@ -18,7 +18,7 @@ dependencies:
   - numpy=1.19
   - python-dateutil
   - nomkl
-  - pyarrow=0.15.1
+  - pyarrow
   - pytz
   - s3fs>=0.4.0
   - moto>=1.3.14
 
@@ -1,6 +1,7 @@
 name: pandas-dev
 channels:
   - defaults
+  - conda-forge
 dependencies:
   - python=3.7.*
 
@@ -21,7 +22,7 @@ dependencies:
   - numexpr
   - numpy=1.17.3
   - openpyxl
-  - pyarrow=0.15.1
+  - pyarrow=0.17.0
   - pytables
   - python-dateutil==2.7.3
   - pytz
 
@@ -26,7 +26,7 @@ dependencies:
   - numexpr
   - numpy=1.17.*
   - openpyxl
-  - pyarrow=0.15
+  - pyarrow=0.17.0
   - pytables
   - python-dateutil
   - pytz
 
@@ -25,7 +25,7 @@ dependencies:
   - numpy=1.18.*
   - openpyxl
   - jinja2
-  - pyarrow>=0.15.0
+  - pyarrow>=0.17.0
   - pytables
   - python-dateutil
   - pytz
 
@@ -24,7 +24,7 @@ PYTEST_CMD="${XVFB}pytest -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile $TE
 if [[ $(uname) != "Linux"  && $(uname) != "Darwin" ]]; then
     # GH#37455 windows py38 build appears to be running out of memory
     #  skip collection of window tests
-    PYTEST_CMD="$PYTEST_CMD --ignore=pandas/tests/window/ --ignore=pandas/tests/plotting/"
+    PYTEST_CMD="$PYTEST_CMD --ignore=pandas/tests/window/moments --ignore=pandas/tests/plotting/"
 fi
 
 echo $PYTEST_CMD
 
@@ -1197,6 +1197,7 @@ generated/pandas.Series.str.extractall,../reference/api/pandas.Series.str.extrac
 generated/pandas.Series.str.extract,../reference/api/pandas.Series.str.extract
 generated/pandas.Series.str.findall,../reference/api/pandas.Series.str.findall
 generated/pandas.Series.str.find,../reference/api/pandas.Series.str.find
+generated/pandas.Series.str.fullmatch,../reference/api/pandas.Series.str.fullmatch
 generated/pandas.Series.str.get_dummies,../reference/api/pandas.Series.str.get_dummies
 generated/pandas.Series.str.get,../reference/api/pandas.Series.str.get
 generated/pandas.Series.str,../reference/api/pandas.Series.str
 
@@ -358,7 +358,7 @@ PyTables                  3.5.1              HDF5-based reading / writing
 blosc                     1.17.0             Compression for HDF5
 zlib                                         Compression for HDF5
 fastparquet               0.4.0              Parquet reading / writing
-pyarrow                   0.15.0             Parquet, ORC, and feather reading / writing
+pyarrow                   0.17.0             Parquet, ORC, and feather reading / writing
 pyreadstat                                   SPSS files (.sav) reading
 ========================= ================== =============================================================
 
 
@@ -415,6 +415,7 @@ strings and apply several methods to it. These can be accessed like
    Series.str.extractall
    Series.str.find
    Series.str.findall
+   Series.str.fullmatch
    Series.str.get
    Series.str.index
    Series.str.join
 
@@ -224,6 +224,7 @@ Other enhancements
 - :meth:`pandas.read_csv` and :meth:`pandas.read_json` expose the argument ``encoding_errors`` to control how encoding errors are handled (:issue:`39450`)
 - :meth:`.GroupBy.any` and :meth:`.GroupBy.all` use Kleene logic with nullable data types (:issue:`37506`)
 - :meth:`.GroupBy.any` and :meth:`.GroupBy.all` return a ``BooleanDtype`` for columns with nullable data types (:issue:`33449`)
+- :meth:`.GroupBy.rank` now supports object-dtype data (:issue:`38278`)
 - Constructing a :class:`DataFrame` or :class:`Series` with the ``data`` argument being a Python iterable that is *not* a NumPy ``ndarray`` consisting of NumPy scalars will now result in a dtype with a precision the maximum of the NumPy scalars; this was already the case when ``data`` is a NumPy ``ndarray`` (:issue:`40908`)
 - Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`)
 - Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`)
@@ -578,7 +579,7 @@ Optional libraries below the lowest tested version may still work, but are not c
 +-----------------+-----------------+---------+
 | openpyxl        | 3.0.0           |    X    |
 +-----------------+-----------------+---------+
-| pyarrow         | 0.15.0          |         |
+| pyarrow         | 0.17.0          |    X    |
 +-----------------+-----------------+---------+
 | pymysql         | 0.8.1           |    X    |
 +-----------------+-----------------+---------+
@@ -672,6 +673,7 @@ Performance improvements
 - Performance improvement for concatenation of data with type :class:`CategoricalDtype` (:issue:`40193`)
 - Performance improvement in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` with nullable data types (:issue:`37493`)
 - Performance improvement in :meth:`Series.nunique` with nan values (:issue:`40865`)
+- Performance improvement in :meth:`DataFrame.transpose`, :meth:`Series.unstack` with ``DatetimeTZDtype`` (:issue:`40149`)
 
 .. ---------------------------------------------------------------------------
 
@@ -971,6 +973,7 @@ Other
 - Bug in :func:`pandas.util.show_versions` where console JSON output was not proper JSON (:issue:`39701`)
 - Bug in :meth:`DataFrame.convert_dtypes` incorrectly raised ValueError when called on an empty DataFrame (:issue:`40393`)
 - Bug in :meth:`DataFrame.clip` not interpreting missing values as no threshold (:issue:`40420`)
+- Bug in :class:`Series` backed by :class:`DatetimeArray` or :class:`TimedeltaArray` sometimes failing to set the array's ``freq`` to ``None`` (:issue:`41425`)
 
 .. ---------------------------------------------------------------------------
 
 
@@ -100,7 +100,7 @@ dependencies:
   - odfpy
 
   - fastparquet>=0.3.2  # pandas.read_parquet, DataFrame.to_parquet
-  - pyarrow>=0.15.0  # pandas.read_parquet, DataFrame.to_parquet, pandas.read_feather, DataFrame.to_feather
+  - pyarrow>=0.17.0  # pandas.read_parquet, DataFrame.to_parquet, pandas.read_feather, DataFrame.to_feather
   - python-snappy  # required by pyarrow
 
   - pyqt>=5.9.2  # pandas.read_clipboard
 
@@ -21,7 +21,7 @@
     "odfpy": "1.3.0",
     "openpyxl": "3.0.0",
     "pandas_gbq": "0.12.0",
-    "pyarrow": "0.15.0",
+    "pyarrow": "0.17.0",
     "pytest": "5.0.1",
     "pyxlsb": "1.0.6",
     "s3fs": "0.4.0",
 
@@ -16,7 +16,10 @@
 from pandas._typing import ArrayLike
 
 from pandas.core.dtypes.cast import maybe_promote
-from pandas.core.dtypes.common import ensure_platform_int
+from pandas.core.dtypes.common import (
+    ensure_platform_int,
+    is_1d_only_ea_obj,
+)
 from pandas.core.dtypes.missing import na_value_for_dtype
 
 from pandas.core.construction import ensure_wrapped_if_datetimelike
@@ -91,12 +94,14 @@ def take_nd(
 
     if not isinstance(arr, np.ndarray):
         # i.e. ExtensionArray,
-        if arr.ndim == 2:
-            # e.g. DatetimeArray, TimedeltArray
+        # includes for EA to catch DatetimeArray, TimedeltaArray
+        if not is_1d_only_ea_obj(arr):
+            # i.e. DatetimeArray, TimedeltaArray
             arr = cast("NDArrayBackedExtensionArray", arr)
             return arr.take(
                 indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis
             )
+
         return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
 
     arr = np.asarray(arr)
 
@@ -820,33 +820,38 @@ def _str_contains(self, pat, case=True, flags=0, na=np.nan, regex: bool = True):
             result[isna(result)] = bool(na)
         return result
 
-    def _str_startswith(self, pat, na=None):
+    def _str_startswith(self, pat: str, na=None):
         if pa_version_under4p0:
             return super()._str_startswith(pat, na)
 
-        result = pc.match_substring_regex(self._data, "^" + re.escape(pat))
-        result = BooleanDtype().__from_arrow__(result)
-        if not isna(na):
-            result[isna(result)] = bool(na)
-        return result
+        pat = "^" + re.escape(pat)
+        return self._str_contains(pat, na=na, regex=True)
 
-    def _str_endswith(self, pat, na=None):
+    def _str_endswith(self, pat: str, na=None):
         if pa_version_under4p0:
             return super()._str_endswith(pat, na)
 
-        result = pc.match_substring_regex(self._data, re.escape(pat) + "$")
-        result = BooleanDtype().__from_arrow__(result)
-        if not isna(na):
-            result[isna(result)] = bool(na)
-        return result
+        pat = re.escape(pat) + "$"
+        return self._str_contains(pat, na=na, regex=True)
 
     def _str_match(
         self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None
     ):
+        if pa_version_under4p0:
+            return super()._str_match(pat, case, flags, na)
+
         if not pat.startswith("^"):
             pat = "^" + pat
         return self._str_contains(pat, case, flags, na, regex=True)
 
+    def _str_fullmatch(self, pat, case: bool = True, flags: int = 0, na: Scalar = None):
+        if pa_version_under4p0:
+            return super()._str_fullmatch(pat, case, flags, na)
+
+        if not pat.endswith("$") or pat.endswith("//$"):
+            pat = pat + "$"
+        return self._str_match(pat, case, flags, na)
+
     def _str_isalnum(self):
         result = pc.utf8_is_alnum(self._data)
         return BooleanDtype().__from_arrow__(result)