simonjayhawkins
diff --git a/‎.circleci/config.yml
+1-1 b/‎.circleci/config.yml
+1-1
diff --git a/‎.github/workflows/code-checks.yml
+1-1 b/‎.github/workflows/code-checks.yml
+1-1
diff --git a/‎.github/workflows/posix.yml
+1-1 b/‎.github/workflows/posix.yml
+1-1
diff --git a/‎.pre-commit-config.yaml
+1-1 b/‎.pre-commit-config.yaml
+1-1
diff --git a/‎asv_bench/benchmarks/array.py
+31 b/‎asv_bench/benchmarks/array.py
+31
diff --git a/‎asv_bench/benchmarks/categoricals.py
+1-1 b/‎asv_bench/benchmarks/categoricals.py
+1-1
diff --git a/‎ci/deps/circle-38-arm64.yaml
+41-8 b/‎ci/deps/circle-38-arm64.yaml
+41-8
diff --git a/‎doc/source/getting_started/install.rst
+1-1 b/‎doc/source/getting_started/install.rst
+1-1
diff --git a/‎doc/source/getting_started/intro_tutorials/03_subset_data.rst
+2-2 b/‎doc/source/getting_started/intro_tutorials/03_subset_data.rst
+2-2
diff --git a/‎doc/source/getting_started/intro_tutorials/04_plotting.rst
+8-8 b/‎doc/source/getting_started/intro_tutorials/04_plotting.rst
+8-8
diff --git a/‎doc/source/getting_started/intro_tutorials/05_add_columns.rst
+6-6 b/‎doc/source/getting_started/intro_tutorials/05_add_columns.rst
+6-6
diff --git a/‎doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
+7-7 b/‎doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
+7-7
@@ -8,7 +8,7 @@ jobs:
     environment:
       ENV_FILE: ci/deps/circle-38-arm64.yaml
       PYTEST_WORKERS: auto
-      PATTERN: "not slow and not network and not clipboard and not arm_slow"
+      PATTERN: "not single_cpu and not slow and not network and not clipboard and not arm_slow and not db"
       PYTEST_TARGET: "pandas"
       PANDAS_CI: "1"
     steps:
 
@@ -74,7 +74,7 @@ jobs:
 
     - name: Install pyright
       # note: keep version in sync with .pre-commit-config.yaml
-      run: npm install -g [email protected].212
+      run: npm install -g [email protected].230
 
     - name: Build Pandas
       id: build
 
@@ -162,7 +162,7 @@ jobs:
       shell: bash
       run: |
         # TODO: re-enable cov, its slowing the tests down though
-        pip install Cython numpy python-dateutil pytz pytest>=6.0 pytest-xdist>=1.31.0 hypothesis>=5.5.3
+        pip install Cython numpy python-dateutil pytz pytest>=6.0 pytest-xdist>=1.31.0 pytest-asyncio hypothesis>=5.5.3
       if: ${{ env.IS_PYPY == 'true' }}
 
     - name: Build Pandas
 
@@ -85,7 +85,7 @@ repos:
         types: [python]
         stages: [manual]
         # note: keep version in sync with .github/workflows/code-checks.yml
-        additional_dependencies: ['[email protected].212']
+        additional_dependencies: ['[email protected].230']
 -   repo: local
     hooks:
     -   id: flake8-rst
 
@@ -2,6 +2,8 @@
 
 import pandas as pd
 
+from .pandas_vb_common import tm
+
 
 class BooleanArray:
     def setup(self):
@@ -39,3 +41,32 @@ def time_constructor(self):
 
     def time_from_integer_array(self):
         pd.array(self.values_integer, dtype="Int64")
+
+
+class ArrowStringArray:
+
+    params = [False, True]
+    param_names = ["multiple_chunks"]
+
+    def setup(self, multiple_chunks):
+        try:
+            import pyarrow as pa
+        except ImportError:
+            raise NotImplementedError
+        strings = tm.rands_array(3, 10_000)
+        if multiple_chunks:
+            chunks = [strings[i : i + 100] for i in range(0, len(strings), 100)]
+            self.array = pd.arrays.ArrowStringArray(pa.chunked_array(chunks))
+        else:
+            self.array = pd.arrays.ArrowStringArray(pa.array(strings))
+
+    def time_setitem(self, multiple_chunks):
+        for i in range(200):
+            self.array[i] = "foo"
+
+    def time_setitem_list(self, multiple_chunks):
+        indexer = list(range(0, 50)) + list(range(-50, 0))
+        self.array[indexer] = ["foo"] * len(indexer)
+
+    def time_setitem_slice(self, multiple_chunks):
+        self.array[::10] = "foo"
@@ -187,7 +187,7 @@ def time_remove_categories(self):
 class Rank:
     def setup(self):
         N = 10**5
-        ncats = 100
+        ncats = 15
 
         self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str)
         self.s_str_cat = pd.Series(self.s_str, dtype="category")
 
@@ -4,19 +4,52 @@ channels:
 dependencies:
   - python=3.8
 
-  # tools
-  - cython>=0.29.24
+  # test dependencies
+  - cython=0.29.24
   - pytest>=6.0
+  - pytest-cov
   - pytest-xdist>=1.31
   - hypothesis>=5.5.3
+  - psutil
   - pytest-asyncio
+  - boto3
 
-  # pandas dependencies
-  - botocore>=1.11
-  - flask
-  - moto
-  - numpy
+  # required dependencies
   - python-dateutil
+  - numpy
   - pytz
+
+  # optional dependencies
+  - beautifulsoup4
+  - blosc
+  - bottleneck
+  - brotlipy
+  - fastparquet
+  - fsspec
+  - html5lib
+  - gcsfs
+  - jinja2
+  - lxml
+  - matplotlib
+  - numba
+  - numexpr
+  - openpyxl
+  - odfpy
+  - pandas-gbq
+  - psycopg2
+  - pyarrow
+  - pymysql
+  # Not provided on ARM
+  #- pyreadstat
+  - pytables
+  - python-snappy
+  - pyxlsb
+  - s3fs
+  - scipy
+  - sqlalchemy
+  - tabulate
+  - xarray
+  - xlrd
+  - xlsxwriter
+  - xlwt
   - zstandard
-  - pip
 
@@ -276,7 +276,7 @@ Computation
 ========================= ================== =============================================================
 Dependency                Minimum Version    Notes
 ========================= ================== =============================================================
-SciPy                     1.14.1             Miscellaneous statistical functions
+SciPy                     1.4.1              Miscellaneous statistical functions
 numba                     0.50.1             Alternative execution engine for rolling operations
                                              (see :ref:`Enhancing Performance <enhancingperf.numba>`)
 xarray                    0.15.1             pandas-like API for N-dimensional data
 
@@ -358,9 +358,9 @@ See the user guide section on :ref:`different choices for indexing <indexing.cho
    of column/row labels, a slice of labels, a conditional expression or
    a colon.
 -  Select specific rows and/or columns using ``loc`` when using the row
-   and column names
+   and column names.
 -  Select specific rows and/or columns using ``iloc`` when using the
-   positions in the table
+   positions in the table.
 -  You can assign new values to a selection based on ``loc``/``iloc``.
 
 .. raw:: html
 
@@ -88,7 +88,7 @@ method. Hence, the :meth:`~DataFrame.plot` method works on both ``Series`` and
     <ul class="task-bullet">
         <li>
 
-I want to visually compare the :math:`N0_2` values measured in London versus Paris.
+I want to visually compare the :math:`NO_2` values measured in London versus Paris.
 
 .. ipython:: python
 
@@ -197,26 +197,26 @@ I want to further customize, extend or save the resulting plot.
     </ul>
 
 Each of the plot objects created by pandas is a
-`matplotlib <https://matplotlib.org/>`__ object. As Matplotlib provides
+`Matplotlib <https://matplotlib.org/>`__ object. As Matplotlib provides
 plenty of options to customize plots, making the link between pandas and
-Matplotlib explicit enables all the power of matplotlib to the plot.
+Matplotlib explicit enables all the power of Matplotlib to the plot.
 This strategy is applied in the previous example:
 
 ::
 
-   fig, axs = plt.subplots(figsize=(12, 4))        # Create an empty matplotlib Figure and Axes
+   fig, axs = plt.subplots(figsize=(12, 4))        # Create an empty Matplotlib Figure and Axes
    air_quality.plot.area(ax=axs)                   # Use pandas to put the area plot on the prepared Figure/Axes
-   axs.set_ylabel("NO$_2$ concentration")          # Do any matplotlib customization you like
-   fig.savefig("no2_concentrations.png")           # Save the Figure/Axes using the existing matplotlib method.
+   axs.set_ylabel("NO$_2$ concentration")          # Do any Matplotlib customization you like
+   fig.savefig("no2_concentrations.png")           # Save the Figure/Axes using the existing Matplotlib method.
 
 .. raw:: html
 
     <div class="shadow gs-callout gs-callout-remember">
         <h4>REMEMBER</h4>
 
--  The ``.plot.*`` methods are applicable on both Series and DataFrames
+-  The ``.plot.*`` methods are applicable on both Series and DataFrames.
 -  By default, each of the columns is plotted as a different element
-   (line, boxplot,…)
+   (line, boxplot,…).
 -  Any plot created by pandas is a Matplotlib object.
 
 .. raw:: html
 
@@ -41,7 +41,7 @@ How to create new columns derived from existing columns?
     <ul class="task-bullet">
         <li>
 
-I want to express the :math:`NO_2` concentration of the station in London in mg/m\ :math:`^3`
+I want to express the :math:`NO_2` concentration of the station in London in mg/m\ :math:`^3`.
 
 (*If we assume temperature of 25 degrees Celsius and pressure of 1013
 hPa, the conversion factor is 1.882*)
@@ -60,7 +60,7 @@ at the left side of the assignment.
     </ul>
 
 .. note::
-    The calculation of the values is done **element_wise**. This
+    The calculation of the values is done **element-wise**. This
     means all values in the given column are multiplied by the value 1.882
     at once. You do not need to use a loop to iterate each of the rows!
 
@@ -72,7 +72,7 @@ at the left side of the assignment.
     <ul class="task-bullet">
         <li>
 
-I want to check the ratio of the values in Paris versus Antwerp and save the result in a new column
+I want to check the ratio of the values in Paris versus Antwerp and save the result in a new column.
 
 .. ipython:: python
 
@@ -89,8 +89,8 @@ values in each row*.
         </li>
     </ul>
 
-Also other mathematical operators (``+``, ``-``, ``\*``, ``/``) or
-logical operators (``<``, ``>``, ``=``,…) work element wise. The latter was already
+Also other mathematical operators (``+``, ``-``, ``*``, ``/``,…) or
+logical operators (``<``, ``>``, ``==``,…) work element-wise. The latter was already
 used in the :ref:`subset data tutorial <10min_tut_03_subset>` to filter
 rows of a table using a conditional expression.
 
@@ -101,7 +101,7 @@ If you need more advanced logic, you can use arbitrary Python code via :meth:`~D
     <ul class="task-bullet">
         <li>
 
-I want to rename the data columns to the corresponding station identifiers used by openAQ
+I want to rename the data columns to the corresponding station identifiers used by `OpenAQ <https://openaq.org/>`__.
 
 .. ipython:: python
 
 
@@ -74,15 +74,15 @@ What is the median age and ticket fare price of the Titanic passengers?
     titanic[["Age", "Fare"]].median()
 
 The statistic applied to multiple columns of a ``DataFrame`` (the selection of two columns
-return a ``DataFrame``, see the :ref:`subset data tutorial <10min_tut_03_subset>`) is calculated for each numeric column.
+returns a ``DataFrame``, see the :ref:`subset data tutorial <10min_tut_03_subset>`) is calculated for each numeric column.
 
 .. raw:: html
 
         </li>
     </ul>
 
 The aggregating statistic can be calculated for multiple columns at the
-same time. Remember the ``describe`` function from :ref:`first tutorial <10min_tut_01_tableoriented>`?
+same time. Remember the ``describe`` function from the :ref:`first tutorial <10min_tut_01_tableoriented>`?
 
 .. ipython:: python
 
@@ -161,7 +161,7 @@ columns:
     titanic.groupby("Sex").mean()
 
 It does not make much sense to get the average value of the ``Pclass``.
-if we are only interested in the average age for each gender, the
+If we are only interested in the average age for each gender, the
 selection of columns (rectangular brackets ``[]`` as usual) is supported
 on the grouped data as well:
 
@@ -254,7 +254,7 @@ within each group:
     <div class="d-flex flex-row gs-torefguide">
         <span class="badge badge-info">To user guide</span>
 
-The user guide has a dedicated section on ``value_counts`` , see page on :ref:`discretization <basics.discretization>`.
+The user guide has a dedicated section on ``value_counts`` , see the page on :ref:`discretization <basics.discretization>`.
 
 .. raw:: html
 
@@ -265,10 +265,10 @@ The user guide has a dedicated section on ``value_counts`` , see page on :ref:`d
     <div class="shadow gs-callout gs-callout-remember">
         <h4>REMEMBER</h4>
 
--  Aggregation statistics can be calculated on entire columns or rows
--  ``groupby`` provides the power of the *split-apply-combine* pattern
+-  Aggregation statistics can be calculated on entire columns or rows.
+-  ``groupby`` provides the power of the *split-apply-combine* pattern.
 -  ``value_counts`` is a convenient shortcut to count the number of
-   entries in each category of a variable
+   entries in each category of a variable.
 
 .. raw:: html