simonjayhawkins
diff --git a/‎.github/CODE_OF_CONDUCT.md
-1 b/‎.github/CODE_OF_CONDUCT.md
-1
diff --git a/‎.pre-commit-config.yaml
+10-2 b/‎.pre-commit-config.yaml
+10-2
diff --git a/‎.travis.yml
+4-4 b/‎.travis.yml
+4-4
diff --git a/‎AUTHORS.md
-1 b/‎AUTHORS.md
-1
diff --git a/‎ci/azure/posix.yml
+14-18 b/‎ci/azure/posix.yml
+14-18
diff --git a/‎ci/deps/azure-37-32bit.yaml
-26 b/‎ci/deps/azure-37-32bit.yaml
-26
diff --git a/‎ci/deps/azure-37-slow.yaml
+1 b/‎ci/deps/azure-37-slow.yaml
+1
diff --git a/‎ci/deps/travis-37.yaml renamed to ‎ci/deps/azure-37.yaml
+1 b/‎ci/deps/travis-37.yaml renamed to ‎ci/deps/azure-37.yaml
+1
diff --git a/‎ci/deps/travis-38.yaml renamed to ‎ci/deps/azure-38.yaml
+1-1 b/‎ci/deps/travis-38.yaml renamed to ‎ci/deps/azure-38.yaml
+1-1
diff --git a/‎ci/deps/travis-37-locale.yaml
+14-8 b/‎ci/deps/travis-37-locale.yaml
+14-8
diff --git a/‎ci/deps/azure-37-locale.yaml renamed to ‎ci/deps/travis-38-slow.yaml
+11-11 b/‎ci/deps/azure-37-locale.yaml renamed to ‎ci/deps/travis-38-slow.yaml
+11-11
diff --git a/‎ci/travis_process_gbq_encryption.sh
-1 b/‎ci/travis_process_gbq_encryption.sh
-1
diff --git a/‎doc/data/iris.data
+1-1 b/‎doc/data/iris.data
+1-1
diff --git a/‎doc/source/development/contributing.rst
+10-7 b/‎doc/source/development/contributing.rst
+10-7
diff --git a/‎doc/source/development/developer.rst
+1-1 b/‎doc/source/development/developer.rst
+1-1
diff --git a/‎doc/source/getting_started/intro_tutorials/03_subset_data.rst
+5-5 b/‎doc/source/getting_started/intro_tutorials/03_subset_data.rst
+5-5
diff --git a/‎doc/source/getting_started/intro_tutorials/04_plotting.rst
+2-2 b/‎doc/source/getting_started/intro_tutorials/04_plotting.rst
+2-2
diff --git a/‎doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
+4-1 b/‎doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
+4-1
diff --git a/‎doc/source/getting_started/intro_tutorials/index.rst
-1 b/‎doc/source/getting_started/intro_tutorials/index.rst
-1
diff --git a/‎doc/source/getting_started/overview.rst
-1 b/‎doc/source/getting_started/overview.rst
-1
diff --git a/‎doc/source/reference/general_utility_functions.rst
-1 b/‎doc/source/reference/general_utility_functions.rst
-1
@@ -60,4 +60,3 @@ and the [Swift Code of Conduct][swift].
 [homepage]: https://www.contributor-covenant.org
 [version]: https://www.contributor-covenant.org/version/1/3/0/
 [swift]: https://swift.org/community/#code-of-conduct
-
@@ -21,10 +21,12 @@ repos:
           - file
         args: [--append-config=flake8/cython-template.cfg]
 -   repo: https://github.com/PyCQA/isort
-    rev: 5.2.2
+    rev: 5.6.0
     hooks:
     -   id: isort
         exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$
+        files: '.pxd$|.py$'
+        types: [file]
 -   repo: https://github.com/asottile/pyupgrade
     rev: v2.7.2
     hooks:
@@ -39,11 +41,17 @@ repos:
     -   id: pip_to_conda
         name: Generate pip dependency from conda
         description: This hook checks if the conda environment.yml and requirements-dev.txt are equal
-        language: system
+        language: python
         entry: python -m scripts.generate_pip_deps_from_conda
         files: ^(environment.yml|requirements-dev.txt)$
         pass_filenames: false
+        additional_dependencies: [pyyaml]
 -   repo: https://github.com/asottile/yesqa
     rev: v1.2.2
     hooks:
     -   id: yesqa
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.2.0
+    hooks:
+    -   id: end-of-file-fixer
+        exclude: '.html$|^LICENSES/|.csv$|.txt$|.svg$|.py$'
@@ -41,10 +41,10 @@ matrix:
         - JOB="3.9-dev" PATTERN="(not slow and not network and not clipboard)"
 
     - env:
-        - JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network and not clipboard)"
-
-    - env:
-        - JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network and not clipboard)"
+      - JOB="3.8, slow" ENV_FILE="ci/deps/travis-38-slow.yaml" PATTERN="slow" SQL="1"
+      services:
+        - mysql
+        - postgresql
 
     - env:
         - JOB="3.7, locale" ENV_FILE="ci/deps/travis-37-locale.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8" SQL="1"
 
@@ -54,4 +54,3 @@ pandas is distributed under a 3-clause ("Simplified" or "New") BSD
 license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have
 BSD-compatible licenses, are included. Their licenses follow the pandas
 license.
-
@@ -20,39 +20,35 @@ jobs:
           CONDA_PY: "37"
           PATTERN: "not slow and not network and not clipboard"
 
+        py37:
+          ENV_FILE: ci/deps/azure-37.yaml
+          CONDA_PY: "37"
+          PATTERN: "not slow and not network and not clipboard"
+
         py37_locale_slow:
           ENV_FILE: ci/deps/azure-37-locale_slow.yaml
           CONDA_PY: "37"
           PATTERN: "slow"
-          # pandas does not use the language (zh_CN), but should support different encodings (utf8)
-          # we should test with encodings different than utf8, but doesn't seem like Ubuntu supports any
-          LANG: "zh_CN.utf8"
-          LC_ALL: "zh_CN.utf8"
-          EXTRA_APT: "language-pack-zh-hans"
+          LANG: "it_IT.utf8"
+          LC_ALL: "it_IT.utf8"
+          EXTRA_APT: "language-pack-it xsel"
 
         py37_slow:
           ENV_FILE: ci/deps/azure-37-slow.yaml
           CONDA_PY: "37"
           PATTERN: "slow"
 
-        py37_locale:
-          ENV_FILE: ci/deps/azure-37-locale.yaml
-          CONDA_PY: "37"
-          PATTERN: "not slow and not network"
-          LANG: "it_IT.utf8"
-          LC_ALL: "it_IT.utf8"
-          EXTRA_APT: "language-pack-it xsel"
-
-#        py37_32bit:
-#          ENV_FILE: ci/deps/azure-37-32bit.yaml
-#          CONDA_PY: "37"
-#          PATTERN: "not slow and not network and not clipboard"
-#          BITS32: "yes"
+        py38:
+          ENV_FILE: ci/deps/azure-38.yaml
+          CONDA_PY: "38"
+          PATTERN: "not slow and not network and not clipboard"
 
         py38_locale:
           ENV_FILE: ci/deps/azure-38-locale.yaml
           CONDA_PY: "38"
           PATTERN: "not slow and not network"
+          # pandas does not use the language (zh_CN), but should support different encodings (utf8)
+          # we should test with encodings different than utf8, but doesn't seem like Ubuntu supports any
           LANG: "zh_CN.utf8"
           LC_ALL: "zh_CN.utf8"
           EXTRA_APT: "language-pack-zh-hans xsel"
 
@@ -10,6 +10,7 @@ dependencies:
   - pytest>=5.0.1
   - pytest-xdist>=1.21
   - hypothesis>=3.58.0
+  - pytest-azurepipelines
 
   # pandas dependencies
   - beautifulsoup4
 
@@ -10,6 +10,7 @@ dependencies:
   - pytest>=5.0.1
   - pytest-xdist>=1.21
   - hypothesis>=3.58.0
+  - pytest-azurepipelines
 
   # pandas dependencies
   - botocore>=1.11
 
@@ -10,11 +10,11 @@ dependencies:
   - pytest>=5.0.1
   - pytest-xdist>=1.21
   - hypothesis>=3.58.0
+  - pytest-azurepipelines
 
   # pandas dependencies
   - numpy
   - python-dateutil
   - nomkl
   - pytz
-  - pip
   - tabulate==0.8.3
@@ -11,7 +11,12 @@ dependencies:
   - pytest-xdist>=1.21
   - hypothesis>=3.58.0
 
-  # pandas dependencies
+  # required
+  - numpy
+  - python-dateutil
+  - pytz
+
+  # optional
   - beautifulsoup4
   - blosc=1.15.0
   - python-blosc
@@ -20,22 +25,23 @@ dependencies:
   - ipython
   - jinja2
   - lxml=4.3.0
-  - matplotlib=3.0.*
+  - matplotlib
   - nomkl
   - numexpr
-  - numpy
   - openpyxl
   - pandas-gbq
   - google-cloud-bigquery>=1.27.2 # GH 36436
   - pyarrow>=0.17
-  - psycopg2=2.7
-  - pymysql=0.7.11
   - pytables>=3.5.1
-  - python-dateutil
-  - pytz
   - scipy
-  - sqlalchemy=1.3.0
   - xarray=0.12.0
   - xlrd
   - xlsxwriter
   - xlwt
+  - moto
+  - flask
+
+  # sql
+  - psycopg2=2.7
+  - pymysql=0.7.11
+  - sqlalchemy=1.3.0
@@ -3,35 +3,35 @@ channels:
   - defaults
   - conda-forge
 dependencies:
-  - python=3.7.*
+  - python=3.8.*
 
   # tools
   - cython>=0.29.21
   - pytest>=5.0.1
   - pytest-xdist>=1.21
-  - pytest-asyncio
   - hypothesis>=3.58.0
-  - pytest-azurepipelines
 
   # pandas dependencies
   - beautifulsoup4
+  - fsspec>=0.7.4
   - html5lib
-  - ipython
-  - jinja2
   - lxml
-  - matplotlib>=3.3.0
-  - moto
-  - flask
-  - nomkl
+  - matplotlib
   - numexpr
-  - numpy=1.16.*
+  - numpy
   - openpyxl
+  - patsy
+  - psycopg2
+  - pymysql
   - pytables
   - python-dateutil
   - pytz
+  - s3fs>=0.4.0
+  - moto>=1.3.14
   - scipy
-  - xarray
+  - sqlalchemy
   - xlrd
   - xlsxwriter
   - xlwt
   - moto
+  - flask
@@ -10,4 +10,3 @@ elif [[ -n ${!TRAVIS_IV_ENV} ]]; then
     export GBQ_PROJECT_ID='pandas-gbq-tests';
     echo 'Successfully decrypted gbq credentials'
 fi
-
@@ -148,4 +148,4 @@ SepalLength,SepalWidth,PetalLength,PetalWidth,Name
 6.3,2.5,5.0,1.9,Iris-virginica
 6.5,3.0,5.2,2.0,Iris-virginica
 6.2,3.4,5.4,2.3,Iris-virginica
-5.9,3.0,5.1,1.8,Iris-virginica
+5.9,3.0,5.1,1.8,Iris-virginica
@@ -837,6 +837,9 @@ to run its checks by running::
 
 without having to have done ``pre-commit install`` beforehand.
 
+Note that if you have conflicting installations of ``virtualenv``, then you may get an
+error - see `here <https://github.com/pypa/virtualenv/issues/1875>`_.
+
 Backwards compatibility
 ~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -1362,16 +1365,16 @@ environments. If you want to use virtualenv instead, write::
 The ``-E virtualenv`` option should be added to all ``asv`` commands
 that run benchmarks. The default value is defined in ``asv.conf.json``.
 
-Running the full test suite can take up to one hour and use up to 3GB of RAM.
-Usually it is sufficient to paste only a subset of the results into the pull
-request to show that the committed changes do not cause unexpected performance
-regressions.  You can run specific benchmarks using the ``-b`` flag, which
-takes a regular expression.  For example, this will only run tests from a
-``pandas/asv_bench/benchmarks/groupby.py`` file::
+Running the full benchmark suite can be an all-day process, depending on your
+hardware and its resource utilization. However, usually it is sufficient to paste
+only a subset of the results into the pull request to show that the committed changes
+do not cause unexpected performance regressions.  You can run specific benchmarks
+using the ``-b`` flag, which takes a regular expression. For example, this will
+only run benchmarks from a ``pandas/asv_bench/benchmarks/groupby.py`` file::
 
     asv continuous -f 1.1 upstream/master HEAD -b ^groupby
 
-If you want to only run a specific group of tests from a file, you can do it
+If you want to only run a specific group of benchmarks from a file, you can do it
 using ``.`` as a separator. For example::
 
     asv continuous -f 1.1 upstream/master HEAD -b groupby.GroupByMethods
 
@@ -184,4 +184,4 @@ As an example of fully-formed metadata:
     'creator': {
       'library': 'pyarrow',
       'version': '0.13.0'
-    }}
+    }}
@@ -27,14 +27,14 @@ This tutorial uses the Titanic data set, stored as CSV. The data
 consists of the following data columns:
 
 -  PassengerId: Id of every passenger.
--  Survived: This feature have value 0 and 1. 0 for not survived and 1
+-  Survived: This feature has value 0 and 1. 0 for not survived and 1
    for survived.
 -  Pclass: There are 3 classes: Class 1, Class 2 and Class 3.
 -  Name: Name of passenger.
 -  Sex: Gender of passenger.
 -  Age: Age of passenger.
--  SibSp: Indication that passenger have siblings and spouse.
--  Parch: Whether a passenger is alone or have family.
+-  SibSp: Indication that passengers have siblings and spouses.
+-  Parch: Whether a passenger is alone or has a family.
 -  Ticket: Ticket number of passenger.
 -  Fare: Indicating the fare.
 -  Cabin: The cabin of passenger.
@@ -199,7 +199,7 @@ selection brackets ``[]``. Only rows for which the value is ``True``
 will be selected.
 
 We know from before that the original Titanic ``DataFrame`` consists of
-891 rows. Let’s have a look at the amount of rows which satisfy the
+891 rows. Let’s have a look at the number of rows which satisfy the
 condition by checking the ``shape`` attribute of the resulting
 ``DataFrame`` ``above_35``:
 
@@ -398,7 +398,7 @@ See the user guide section on :ref:`different choices for indexing <indexing.cho
     <div class="d-flex flex-row gs-torefguide">
         <span class="badge badge-info">To user guide</span>
 
-A full overview about indexing is provided in the user guide pages on :ref:`indexing and selecting data <indexing>`.
+A full overview of indexing is provided in the user guide pages on :ref:`indexing and selecting data <indexing>`.
 
 .. raw:: html
 
 
@@ -167,7 +167,7 @@ I want each of the columns in a separate subplot.
     @savefig 04_airqual_area_subplot.png
     axs = air_quality.plot.area(figsize=(12, 4), subplots=True)
 
-Separate subplots for each of the data columns is supported by the ``subplots`` argument
+Separate subplots for each of the data columns are supported by the ``subplots`` argument
 of the ``plot`` functions. The builtin options available in each of the pandas plot
 functions that are worthwhile to have a look.
 
@@ -214,7 +214,7 @@ I want to further customize, extend or save the resulting plot.
         </li>
     </ul>
 
-Each of the plot objects created by pandas are a
+Each of the plot objects created by pandas is a
 `matplotlib <https://matplotlib.org/>`__ object. As Matplotlib provides
 plenty of options to customize plots, making the link between pandas and
 Matplotlib explicit enables all the power of matplotlib to the plot.
 
@@ -123,7 +123,10 @@ aggregating statistics for given columns can be defined using the
 .. ipython:: python
 
     titanic.agg(
-        {"Age": ["min", "max", "median", "skew"], "Fare": ["min", "max", "median", "mean"]}
+        {
+            "Age": ["min", "max", "median", "skew"],
+            "Fare": ["min", "max", "median", "mean"],
+        }
     )
 
 .. raw:: html
 
@@ -19,4 +19,3 @@ Getting started tutorials
     08_combine_dataframes
     09_timeseries
     10_text_data
-
@@ -174,4 +174,3 @@ License
 -------
 
 .. literalinclude:: ../../../LICENSE
-
@@ -122,4 +122,3 @@ Bug report function
    :toctree: api/
 
    show_versions
-
Original file line number	Diff line number	Diff line change
`@@ -174,4 +174,3 @@ License`
`174`	`174`	`-------`
`175`	`175`
`176`	`176`	`.. literalinclude:: ../../../LICENSE`
`177`		`-`
Original file line number	Diff line number	Diff line change
`@@ -122,4 +122,3 @@ Bug report function`
`122`	`122`	`:toctree: api/`
`123`	`123`
`124`	`124`	`show_versions`
`125`		`-`