pandas-dev
diff --git a/‎.travis.yml
+3-5 b/‎.travis.yml
+3-5
diff --git a/‎ci/azure/windows.yml
+15-19 b/‎ci/azure/windows.yml
+15-19
diff --git a/‎ci/build38.sh
-19 b/‎ci/build38.sh
-19
diff --git a/‎ci/deps/travis-38.yaml
+16 b/‎ci/deps/travis-38.yaml
+16
diff --git a/‎ci/setup_env.sh
-5 b/‎ci/setup_env.sh
-5
diff --git a/‎doc/source/development/extending.rst
+42 b/‎doc/source/development/extending.rst
+42
diff --git a/‎doc/source/user_guide/io.rst
+3 b/‎doc/source/user_guide/io.rst
+3
diff --git a/‎doc/source/user_guide/text.rst
+35-2 b/‎doc/source/user_guide/text.rst
+35-2
diff --git a/‎doc/source/whatsnew/v1.0.0.rst
+16-1 b/‎doc/source/whatsnew/v1.0.0.rst
+16-1
diff --git a/‎pandas/_libs/internals.pyx
+5-12 b/‎pandas/_libs/internals.pyx
+5-12
@@ -30,11 +30,9 @@ matrix:
       - python: 3.5
 
     include:
-    - dist: bionic
-      # 18.04
-      python: 3.8.0
+    - dist: trusty
       env:
-        - JOB="3.8-dev" PATTERN="(not slow and not network)"
+        - JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network)"
 
     - dist: trusty
       env:
@@ -88,7 +86,7 @@ install:
 script:
   - echo "script start"
   - echo "$JOB"
-  - if [ "$JOB" != "3.8-dev" ]; then source activate pandas-dev; fi
+  - source activate pandas-dev
   - ci/run_tests.sh
 
 after_script:
 
@@ -11,49 +11,45 @@ jobs:
       py36_np15:
         ENV_FILE: ci/deps/azure-windows-36.yaml
         CONDA_PY: "36"
+        PATTERN: "not slow and not network"
 
       py37_np141:
         ENV_FILE: ci/deps/azure-windows-37.yaml
         CONDA_PY: "37"
+        PATTERN: "not slow and not network"
 
   steps:
     - powershell: |
         Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
         Write-Host "##vso[task.prependpath]$HOME/miniconda3/bin"
       displayName: 'Add conda to PATH'
     - script: conda update -q -n base conda
-      displayName: Update conda
-    - script: |
-        call activate
+      displayName: 'Update conda'
+    - bash: |
         conda env create -q --file ci\\deps\\azure-windows-$(CONDA_PY).yaml
       displayName: 'Create anaconda environment'
-    - script: |
-        call activate pandas-dev
-        call conda list
+    - bash: |
+        source activate pandas-dev
+        conda list
         ci\\incremental\\build.cmd
       displayName: 'Build'
-    - script: |
-        call activate pandas-dev
-        pytest -m "not slow and not network" --junitxml=test-data.xml pandas -n 2 -r sxX --strict --durations=10 %*
+    - bash: |
+        source activate pandas-dev
+        ci/run_tests.sh
       displayName: 'Test'
     - task: PublishTestResults@2
       inputs:
         testResultsFiles: 'test-data.xml'
         testRunTitle: 'Windows-$(CONDA_PY)'
     - powershell: |
-        $junitXml = "test-data.xml"
-        $(Get-Content $junitXml | Out-String) -match 'failures="(.*?)"'
-        if ($matches[1] -eq 0)
-        {
+        $(Get-Content "test-data.xml" | Out-String) -match 'failures="(.*?)"'
+        if ($matches[1] -eq 0) {
           Write-Host "No test failures in test-data"
-        }
-        else
-        {
-          # note that this will produce $LASTEXITCODE=1
-          Write-Error "$($matches[1]) tests failed"
+        } else {
+          Write-Error "$($matches[1]) tests failed"  # will produce $LASTEXITCODE=1
         }
       displayName: 'Check for test failures'
-    - script: |
+    - bash: |
         source activate pandas-dev
         python ci/print_skipped.py
       displayName: 'Print skipped tests'
@@ -0,0 +1,16 @@
+name: pandas-dev
+channels:
+  - defaults
+  - conda-forge
+dependencies:
+  - python=3.8.*
+  - cython>=0.29.13
+  - numpy
+  - python-dateutil
+  - nomkl
+  - pytz
+  # universal
+  - pytest>=5.0.0
+  - pytest-xdist>=1.29.0
+  - hypothesis>=3.58.0
+  - pip
@@ -1,10 +1,5 @@
 #!/bin/bash -e
 
-if [ "$JOB" == "3.8-dev" ]; then
-    /bin/bash ci/build38.sh
-    exit 0
-fi
-
 # edit the locale file if needed
 if [ -n "$LOCALE_OVERRIDE" ]; then
     echo "Adding locale to the first line of pandas/__init__.py"
 
@@ -251,6 +251,48 @@ To use a test, subclass it:
 See https://github.com/pandas-dev/pandas/blob/master/pandas/tests/extension/base/__init__.py
 for a list of all the tests available.
 
+.. _extending.extension.arrow:
+
+Compatibility with Apache Arrow
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+An ``ExtensionArray`` can support conversion to / from ``pyarrow`` arrays
+(and thus support for example serialization to the Parquet file format)
+by implementing two methods: ``ExtensionArray.__arrow_array__`` and
+``ExtensionDtype.__from_arrow__``.
+
+The ``ExtensionArray.__arrow_array__`` ensures that ``pyarrow`` knowns how
+to convert the specific extension array into a ``pyarrow.Array`` (also when
+included as a column in a pandas DataFrame):
+
+.. code-block:: python
+
+    class MyExtensionArray(ExtensionArray):
+        ...
+
+        def __arrow_array__(self, type=None):
+            # convert the underlying array values to a pyarrow Array
+            import pyarrow
+            return pyarrow.array(..., type=type)
+
+The ``ExtensionDtype.__from_arrow__`` method then controls the conversion
+back from pyarrow to a pandas ExtensionArray. This method receives a pyarrow
+``Array`` or ``ChunkedArray`` as only argument and is expected to return the
+appropriate pandas ``ExtensionArray`` for this dtype and the passed values:
+
+.. code-block:: none
+
+    class ExtensionDtype:
+        ...
+
+        def __from_arrow__(self, array: pyarrow.Array/ChunkedArray) -> ExtensionArray:
+            ...
+
+See more in the `Arrow documentation <https://arrow.apache.org/docs/python/extending_types.html>`__.
+
+Those methods have been implemented for the nullable integer and string extension
+dtypes included in pandas, and ensure roundtrip to pyarrow and the Parquet file format.
+
 .. _extension dtype dtypes: https://github.com/pandas-dev/pandas/blob/master/pandas/core/dtypes/dtypes.py
 .. _extension dtype source: https://github.com/pandas-dev/pandas/blob/master/pandas/core/dtypes/base.py
 .. _extension array source: https://github.com/pandas-dev/pandas/blob/master/pandas/core/arrays/base.py
 
@@ -4716,6 +4716,9 @@ Several caveats.
 * The ``pyarrow`` engine preserves the ``ordered`` flag of categorical dtypes with string types. ``fastparquet`` does not preserve the ``ordered`` flag.
 * Non supported types include ``Period`` and actual Python object types. These will raise a helpful error message
   on an attempt at serialization.
+* The ``pyarrow`` engine preserves extension data types such as the nullable integer and string data
+  type (requiring pyarrow >= 1.0.0, and requiring the extension type to implement the needed protocols,
+  see the :ref:`extension types documentation <extending.extension.arrow>`).
 
 You can specify an ``engine`` to direct the serialization. This can be one of ``pyarrow``, or ``fastparquet``, or ``auto``.
 If the engine is NOT specified, then the ``pd.options.io.parquet.engine`` option is checked; if this is also ``auto``,
 
@@ -13,7 +13,7 @@ Text Data Types
 
 .. versionadded:: 1.0.0
 
-There are two main ways to store text data
+There are two ways to store text data in pandas:
 
 1. ``object`` -dtype NumPy array.
 2. :class:`StringDtype` extension type.
@@ -63,7 +63,40 @@ Or ``astype`` after the ``Series`` or ``DataFrame`` is created
    s
    s.astype("string")
 
-Everything that follows in the rest of this document applies equally to
+.. _text.differences:
+
+Behavior differences
+^^^^^^^^^^^^^^^^^^^^
+
+These are places where the behavior of ``StringDtype`` objects differ from
+``object`` dtype
+
+l. For ``StringDtype``, :ref:`string accessor methods<api.series.str>`
+   that return **numeric** output will always return a nullable integer dtype,
+   rather than either int or float dtype, depending on the presence of NA values.
+
+   .. ipython:: python
+
+      s = pd.Series(["a", None, "b"], dtype="string")
+      s
+      s.str.count("a")
+      s.dropna().str.count("a")
+
+   Both outputs are ``Int64`` dtype. Compare that with object-dtype
+
+   .. ipython:: python
+
+      s.astype(object).str.count("a")
+      s.astype(object).dropna().str.count("a")
+
+   When NA values are present, the output dtype is float64.
+
+2. Some string methods, like :meth:`Series.str.decode` are not available
+   on ``StringArray`` because ``StringArray`` only holds strings, not
+   bytes.
+
+
+Everything else that follows in the rest of this document applies equally to
 ``string`` and ``object`` dtype.
 
 .. _text.string_methods:
 
@@ -63,7 +63,7 @@ Previously, strings were typically stored in object-dtype NumPy arrays.
    ``StringDtype`` is currently considered experimental. The implementation
    and parts of the API may change without warning.
 
-The text extension type solves several issues with object-dtype NumPy arrays:
+The ``'string'`` extension type solves several issues with object-dtype NumPy arrays:
 
 1. You can accidentally store a *mixture* of strings and non-strings in an
    ``object`` dtype array. A ``StringArray`` can only store strings.
@@ -88,9 +88,17 @@ You can use the alias ``"string"`` as well.
 The usual string accessor methods work. Where appropriate, the return type
 of the Series or columns of a DataFrame will also have string dtype.
 
+.. ipython:: python
+
    s.str.upper()
    s.str.split('b', expand=True).dtypes
 
+String accessor methods returning integers will return a value with :class:`Int64Dtype`
+
+.. ipython:: python
+
+   s.str.count("a")
+
 We recommend explicitly using the ``string`` data type when working with strings.
 See :ref:`text.types` for more.
 
@@ -114,6 +122,9 @@ Other enhancements
 - Added ``encoding`` argument to :meth:`DataFrame.to_string` for non-ascii text (:issue:`28766`)
 - Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`)
 - :meth:`Styler.background_gradient` now accepts ``vmin`` and ``vmax`` arguments (:issue:`12145`)
+- Roundtripping DataFrames with nullable integer or string data types to parquet
+  (:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
+  now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).
 
 Build Changes
 ^^^^^^^^^^^^^
@@ -268,6 +279,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
 - Removed the previously deprecated ``reduce`` and ``broadcast`` arguments from :meth:`DataFrame.apply` (:issue:`18577`)
 - Removed the previously deprecated ``assert_raises_regex`` function in ``pandas.util.testing`` (:issue:`29174`)
 - Removed :meth:`Index.is_lexsorted_for_tuple` (:issue:`29305`)
+- Removed support for nexted renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`29608`)
 -
 
 .. _whatsnew_1000.performance:
@@ -342,6 +354,7 @@ Numeric
 - :class:`DataFrame` flex inequality comparisons methods (:meth:`DataFrame.lt`, :meth:`DataFrame.le`, :meth:`DataFrame.gt`, :meth: `DataFrame.ge`) with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`)
 - Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`)
 - Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29142`)
+- Bug in :meth:`Series.var` not computing the right value with a nullable integer dtype series not passing through ddof argument (:issue:`29128`)
 - Improved error message when using `frac` > 1 and `replace` = False (:issue:`27451`)
 - Bug in numeric indexes resulted in it being possible to instantiate an :class:`Int64Index`, :class:`UInt64Index`, or :class:`Float64Index` with an invalid dtype (e.g. datetime-like) (:issue:`29539`)
 - Bug in :class:`UInt64Index` precision loss while constructing from a list with values in the ``np.uint64`` range (:issue:`29526`)
@@ -432,6 +445,7 @@ Groupby/resample/rolling
 
 -
 - Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`)
+- Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty series or dataframe (:issue:`28427`)
 - Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`)
 - Bug in :meth:`DataFrame.rolling` not allowing rolling over multi-index levels (:issue: `15584`).
 - Bug in :meth:`DataFrame.rolling` not allowing rolling on monotonic decreasing time indexes (:issue: `19248`).
@@ -452,6 +466,7 @@ Reshaping
 - Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`)
 - Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ValueError (:issue:`28664`)
 - Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`)
+- Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`)
 
 Sparse
 ^^^^^^
 
@@ -1,21 +1,14 @@
 import cython
 from cython import Py_ssize_t
 
-from cpython.object cimport PyObject
+from cpython.slice cimport PySlice_GetIndicesEx
 
 cdef extern from "Python.h":
     Py_ssize_t PY_SSIZE_T_MAX
 
 import numpy as np
 from numpy cimport int64_t
 
-cdef extern from "compat_helper.h":
-    cdef int slice_get_indices(PyObject* s, Py_ssize_t length,
-                               Py_ssize_t *start, Py_ssize_t *stop,
-                               Py_ssize_t *step,
-                               Py_ssize_t *slicelength) except -1
-
-
 from pandas._libs.algos import ensure_int64
 
 
@@ -258,8 +251,8 @@ cpdef Py_ssize_t slice_len(
     if slc is None:
         raise TypeError("slc must be slice")
 
-    slice_get_indices(<PyObject *>slc, objlen,
-                      &start, &stop, &step, &length)
+    PySlice_GetIndicesEx(slc, objlen,
+                         &start, &stop, &step, &length)
 
     return length
 
@@ -278,8 +271,8 @@ cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX):
     if slc is None:
         raise TypeError("slc should be a slice")
 
-    slice_get_indices(<PyObject *>slc, objlen,
-                      &start, &stop, &step, &length)
+    PySlice_GetIndicesEx(slc, objlen,
+                         &start, &stop, &step, &length)
 
     return start, stop, step, length