pandas-dev
diff --git a/‎.travis.yml
+11-1 b/‎.travis.yml
+11-1
diff --git a/‎asv_bench/benchmarks/algorithms.py
+14-3 b/‎asv_bench/benchmarks/algorithms.py
+14-3
diff --git a/‎ci/build39.sh
+21 b/‎ci/build39.sh
+21
diff --git a/‎ci/deps/azure-37-numpydev.yaml
+1-1 b/‎ci/deps/azure-37-numpydev.yaml
+1-1
diff --git a/‎ci/setup_env.sh
+5 b/‎ci/setup_env.sh
+5
diff --git a/‎doc/source/development/contributing.rst
+2-2 b/‎doc/source/development/contributing.rst
+2-2
diff --git a/‎doc/source/development/extending.rst
+1-1 b/‎doc/source/development/extending.rst
+1-1
diff --git a/‎doc/source/getting_started/intro_tutorials/02_read_write.rst
+3-3 b/‎doc/source/getting_started/intro_tutorials/02_read_write.rst
+3-3
diff --git a/‎doc/source/getting_started/intro_tutorials/03_subset_data.rst
+1-1 b/‎doc/source/getting_started/intro_tutorials/03_subset_data.rst
+1-1
diff --git a/‎doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
+4-4 b/‎doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
+4-4
diff --git a/‎doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
+8-8 b/‎doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
+8-8
diff --git a/‎doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
+1-1 b/‎doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
+1-1
diff --git a/‎doc/source/getting_started/intro_tutorials/09_timeseries.rst
+3-3 b/‎doc/source/getting_started/intro_tutorials/09_timeseries.rst
+3-3
diff --git a/‎doc/source/getting_started/intro_tutorials/10_text_data.rst
+10-10 b/‎doc/source/getting_started/intro_tutorials/10_text_data.rst
+10-10
diff --git a/‎doc/source/reference/extensions.rst
+1 b/‎doc/source/reference/extensions.rst
+1
@@ -27,6 +27,11 @@ matrix:
   fast_finish: true
 
   include:
+    # In allowed failures
+    - dist: bionic
+      python: 3.9-dev
+      env:
+        - JOB="3.9-dev" PATTERN="(not slow and not network and not clipboard)"
     - env:
         - JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network and not clipboard)"
 
@@ -53,6 +58,11 @@ matrix:
       services:
         - mysql
         - postgresql
+  allow_failures:
+  - dist: bionic
+    python: 3.9-dev
+    env:
+        - JOB="3.9-dev" PATTERN="(not slow and not network)"
 
 before_install:
   - echo "before_install"
@@ -83,7 +93,7 @@ install:
 script:
   - echo "script start"
   - echo "$JOB"
-  - source activate pandas-dev
+  - if [ "$JOB" != "3.9-dev" ]; then source activate pandas-dev; fi
   - ci/run_tests.sh
 
 after_script:
 
@@ -34,7 +34,16 @@ class Factorize:
     params = [
         [True, False],
         [True, False],
-        ["int", "uint", "float", "string", "datetime64[ns]", "datetime64[ns, tz]"],
+        [
+            "int",
+            "uint",
+            "float",
+            "string",
+            "datetime64[ns]",
+            "datetime64[ns, tz]",
+            "Int64",
+            "boolean",
+        ],
     ]
     param_names = ["unique", "sort", "dtype"]
 
@@ -49,13 +58,15 @@ def setup(self, unique, sort, dtype):
             "datetime64[ns, tz]": pd.date_range(
                 "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
             ),
+            "Int64": pd.array(np.arange(N), dtype="Int64"),
+            "boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"),
         }[dtype]
         if not unique:
             data = data.repeat(5)
-        self.idx = data
+        self.data = data
 
     def time_factorize(self, unique, sort, dtype):
-        self.idx.factorize(sort=sort)
+        pd.factorize(self.data, sort=sort)
 
 
 class Duplicated:
 
@@ -0,0 +1,21 @@
+#!/bin/bash -e
+# Special build for python3.9 until numpy puts its own wheels up
+
+sudo apt-get install build-essential gcc xvfb
+pip install --no-deps -U pip wheel setuptools
+pip install python-dateutil pytz pytest pytest-xdist hypothesis
+pip install cython --pre # https://github.com/cython/cython/issues/3395
+
+git clone https://github.com/numpy/numpy
+cd numpy
+python setup.py build_ext --inplace
+python setup.py install
+cd ..
+rm -rf numpy
+
+python setup.py build_ext -inplace
+python -m pip install --no-build-isolation -e .
+
+python -c "import sys; print(sys.version_info)"
+python -c "import pandas as pd"
+python -c "import hypothesis"
@@ -16,7 +16,7 @@ dependencies:
   - pip:
     - cython==0.29.16 # GH#34014
     - "git+git://github.com/dateutil/dateutil.git"
-    - "-f https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com"
+    - "--extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple"
     - "--pre"
     - "numpy"
     - "scipy"
@@ -1,5 +1,10 @@
 #!/bin/bash -e
 
+if [ "$JOB" == "3.9-dev" ]; then
+    /bin/bash ci/build39.sh
+    exit 0
+fi
+
 # edit the locale file if needed
 if [[ "$(uname)" == "Linux" && -n "$LC_ALL" ]]; then
     echo "Adding locale to the first line of pandas/__init__.py"
 
@@ -110,7 +110,7 @@ version control to allow many people to work together on the project.
 Some great resources for learning Git:
 
 * the `GitHub help pages <https://help.github.com/>`_.
-* the `NumPy's documentation <https://docs.scipy.org/doc/numpy/dev/index.html>`_.
+* the `NumPy's documentation <https://numpy.org/doc/stable/dev/index.html>`_.
 * Matthew Brett's `Pydagogue <https://matthew-brett.github.com/pydagogue/>`_.
 
 Getting started with Git
@@ -974,7 +974,7 @@ it is worth getting in the habit of writing tests ahead of time so this is never
 Like many packages, pandas uses `pytest
 <https://docs.pytest.org/en/latest/>`_ and the convenient
 extensions in `numpy.testing
-<https://docs.scipy.org/doc/numpy/reference/routines.testing.html>`_.
+<https://numpy.org/doc/stable/reference/routines.testing.html>`_.
 
 .. note::
 
 
@@ -219,7 +219,7 @@ and re-boxes it if necessary.
 
 If applicable, we highly recommend that you implement ``__array_ufunc__`` in your
 extension array to avoid coercion to an ndarray. See
-`the numpy documentation <https://docs.scipy.org/doc/numpy/reference/generated/numpy.lib.mixins.NDArrayOperatorsMixin.html>`__
+`the numpy documentation <https://numpy.org/doc/stable/reference/generated/numpy.lib.mixins.NDArrayOperatorsMixin.html>`__
 for an example.
 
 As part of your implementation, we require that you defer to pandas when a pandas
 
@@ -23,7 +23,7 @@
                     <div class="card-body">
                         <p class="card-text">
 
-This tutorial uses the titanic data set, stored as CSV. The data
+This tutorial uses the Titanic data set, stored as CSV. The data
 consists of the following data columns:
 
 -  PassengerId: Id of every passenger.
@@ -61,7 +61,7 @@ How do I read and write tabular data?
     <ul class="task-bullet">
         <li>
 
-I want to analyse the titanic passenger data, available as a CSV file.
+I want to analyze the Titanic passenger data, available as a CSV file.
 
 .. ipython:: python
 
@@ -134,7 +134,7 @@ strings (``object``).
     <ul class="task-bullet">
         <li>
 
-My colleague requested the titanic data as a spreadsheet.
+My colleague requested the Titanic data as a spreadsheet.
 
 .. ipython:: python
 
 
@@ -330,7 +330,7 @@ When using the column names, row labels or a condition expression, use
 the ``loc`` operator in front of the selection brackets ``[]``. For both
 the part before and after the comma, you can use a single label, a list
 of labels, a slice of labels, a conditional expression or a colon. Using
-a colon specificies you want to select all rows or columns.
+a colon specifies you want to select all rows or columns.
 
 .. raw:: html
 
 
@@ -23,7 +23,7 @@
                     <div class="card-body">
                         <p class="card-text">
 
-This tutorial uses the titanic data set, stored as CSV. The data
+This tutorial uses the Titanic data set, stored as CSV. The data
 consists of the following data columns:
 
 -  PassengerId: Id of every passenger.
@@ -72,7 +72,7 @@ Aggregating statistics
     <ul class="task-bullet">
         <li>
 
-What is the average age of the titanic passengers?
+What is the average age of the Titanic passengers?
 
 .. ipython:: python
 
@@ -95,7 +95,7 @@ across rows by default.
     <ul class="task-bullet">
         <li>
 
-What is the median age and ticket fare price of the titanic passengers?
+What is the median age and ticket fare price of the Titanic passengers?
 
 .. ipython:: python
 
@@ -148,7 +148,7 @@ Aggregating statistics grouped by category
     <ul class="task-bullet">
         <li>
 
-What is the average age for male versus female titanic passengers?
+What is the average age for male versus female Titanic passengers?
 
 .. ipython:: python
 
 
@@ -23,7 +23,7 @@
                     <div class="card-body">
                         <p class="card-text">
 
-This tutorial uses the titanic data set, stored as CSV. The data
+This tutorial uses the Titanic data set, stored as CSV. The data
 consists of the following data columns:
 
 -  PassengerId: Id of every passenger.
@@ -122,7 +122,7 @@ Sort table rows
     <ul class="task-bullet">
         <li>
 
-I want to sort the titanic data according to the age of the passengers.
+I want to sort the Titanic data according to the age of the passengers.
 
 .. ipython:: python
 
@@ -138,7 +138,7 @@ I want to sort the titanic data according to the age of the passengers.
     <ul class="task-bullet">
         <li>
 
-I want to sort the titanic data according to the cabin class and age in descending order.
+I want to sort the Titanic data according to the cabin class and age in descending order.
 
 .. ipython:: python
 
@@ -282,7 +282,7 @@ For more information about :meth:`~DataFrame.pivot_table`, see the user guide se
    </div>
 
 .. note::
-    If case you are wondering, :meth:`~DataFrame.pivot_table` is indeed directly linked
+    In case you are wondering, :meth:`~DataFrame.pivot_table` is indeed directly linked
     to :meth:`~DataFrame.groupby`. The same result can be derived by grouping on both
     ``parameter`` and ``location``:
 
@@ -338,7 +338,7 @@ newly created column.
 
 The solution is the short version on how to apply :func:`pandas.melt`. The method
 will *melt* all columns NOT mentioned in ``id_vars`` together into two
-columns: A columns with the column header names and a column with the
+columns: A column with the column header names and a column with the
 values itself. The latter column gets by default the name ``value``.
 
 The :func:`pandas.melt` method can be defined in more detail:
@@ -357,8 +357,8 @@ The result in the same, but in more detail defined:
 
 -  ``value_vars`` defines explicitly which columns to *melt* together
 -  ``value_name`` provides a custom column name for the values column
-   instead of the default columns name ``value``
--  ``var_name`` provides a custom column name for the columns collecting
+   instead of the default column name ``value``
+-  ``var_name`` provides a custom column name for the column collecting
    the column header names. Otherwise it takes the index name or a
    default ``variable``
 
@@ -383,7 +383,7 @@ Conversion from wide to long format with :func:`pandas.melt` is explained in the
         <h4>REMEMBER</h4>
 
 -  Sorting by one or more columns is supported by ``sort_values``
--  The ``pivot`` function is purely restructering of the data,
+-  The ``pivot`` function is purely restructuring of the data,
    ``pivot_table`` supports aggregations
 -  The reverse of ``pivot`` (long to wide format) is ``melt`` (wide to
    long format)
 
@@ -305,7 +305,7 @@ More information on join/merge of tables is provided in the user guide section o
     <div class="shadow gs-callout gs-callout-remember">
         <h4>REMEMBER</h4>
 
--  Multiple tables can be concatenated both column as row wise using
+-  Multiple tables can be concatenated both column-wise and row-wise using
    the ``concat`` function.
 -  For database-like merging/joining of tables, use the ``merge``
    function.
 
@@ -78,7 +78,7 @@ provide any datetime operations (e.g. extract the year, day of the
 week,…). By applying the ``to_datetime`` function, pandas interprets the
 strings and convert these to datetime (i.e. ``datetime64[ns, UTC]``)
 objects. In pandas we call these datetime objects similar to
-``datetime.datetime`` from the standard library a :class:`pandas.Timestamp`.
+``datetime.datetime`` from the standard library as :class:`pandas.Timestamp`.
 
 .. raw:: html
 
@@ -99,7 +99,7 @@ objects. In pandas we call these datetime objects similar to
 Why are these :class:`pandas.Timestamp` objects useful? Let’s illustrate the added
 value with some example cases.
 
-   What is the start and end date of the time series data set working
+   What is the start and end date of the time series data set we are working
    with?
 
 .. ipython:: python
@@ -214,7 +214,7 @@ Plot the typical :math:`NO_2` pattern during the day of our time series of all s
 
 Similar to the previous case, we want to calculate a given statistic
 (e.g. mean :math:`NO_2`) **for each hour of the day** and we can use the
-split-apply-combine approach again. For this case, the datetime property ``hour``
+split-apply-combine approach again. For this case, we use the datetime property ``hour``
 of pandas ``Timestamp``, which is also accessible by the ``dt`` accessor.
 
 .. raw:: html
 
@@ -23,7 +23,7 @@
                     <div class="card-body">
                         <p class="card-text">
 
-This tutorial uses the titanic data set, stored as CSV. The data
+This tutorial uses the Titanic data set, stored as CSV. The data
 consists of the following data columns:
 
 -  PassengerId: Id of every passenger.
@@ -102,7 +102,7 @@ Create a new column ``Surname`` that contains the surname of the Passengers by e
 
 Using the :meth:`Series.str.split` method, each of the values is returned as a list of
 2 elements. The first element is the part before the comma and the
-second element the part after the comma.
+second element is the part after the comma.
 
 .. ipython:: python
 
@@ -135,7 +135,7 @@ More information on extracting parts of strings is available in the user guide s
     <ul class="task-bullet">
         <li>
 
-Extract the passenger data about the Countess on board of the Titanic.
+Extract the passenger data about the Countesses on board of the Titanic.
 
 .. ipython:: python
 
@@ -145,24 +145,24 @@ Extract the passenger data about the Countess on board of the Titanic.
 
     titanic[titanic["Name"].str.contains("Countess")]
 
-(*Interested in her story? See*\ `Wikipedia <https://en.wikipedia.org/wiki/No%C3%ABl_Leslie,_Countess_of_Rothes>`__\ *!*)
+(*Interested in her story? See *\ `Wikipedia <https://en.wikipedia.org/wiki/No%C3%ABl_Leslie,_Countess_of_Rothes>`__\ *!*)
 
 The string method :meth:`Series.str.contains` checks for each of the values in the
 column ``Name`` if the string contains the word ``Countess`` and returns
 for each of the values ``True`` (``Countess`` is part of the name) of
-``False`` (``Countess`` is notpart of the name). This output can be used
+``False`` (``Countess`` is not part of the name). This output can be used
 to subselect the data using conditional (boolean) indexing introduced in
 the :ref:`subsetting of data tutorial <10min_tut_03_subset>`. As there was
-only 1 Countess on the Titanic, we get one row as a result.
+only one Countess on the Titanic, we get one row as a result.
 
 .. raw:: html
 
         </li>
     </ul>
 
 .. note::
-    More powerful extractions on strings is supported, as the
-    :meth:`Series.str.contains` and :meth:`Series.str.extract` methods accepts `regular
+    More powerful extractions on strings are supported, as the
+    :meth:`Series.str.contains` and :meth:`Series.str.extract` methods accept `regular
     expressions <https://docs.python.org/3/library/re.html>`__, but out of
     scope of this tutorial.
 
@@ -182,7 +182,7 @@ More information on extracting parts of strings is available in the user guide s
     <ul class="task-bullet">
         <li>
 
-Which passenger of the titanic has the longest name?
+Which passenger of the Titanic has the longest name?
 
 .. ipython:: python
 
@@ -220,7 +220,7 @@ we can do a selection using the ``loc`` operator, introduced in the
     <ul class="task-bullet">
         <li>
 
-In the ‘Sex’ columns, replace values of ‘male’ by ‘M’ and all ‘female’ values by ‘F’
+In the "Sex" column, replace values of "male" by "M" and values of "female" by "F"
 
 .. ipython:: python
 
 
@@ -45,6 +45,7 @@ objects.
       api.extensions.ExtensionArray.copy
       api.extensions.ExtensionArray.view
       api.extensions.ExtensionArray.dropna
+      api.extensions.ExtensionArray.equals
       api.extensions.ExtensionArray.factorize
       api.extensions.ExtensionArray.fillna
       api.extensions.ExtensionArray.isna