jbrockmendel
diff --git a/‎.travis.yml
+19-16 b/‎.travis.yml
+19-16
diff --git a/‎README.md
+1-1 b/‎README.md
+1-1
diff --git a/‎ci/code_checks.sh
+1-1 b/‎ci/code_checks.sh
+1-1
diff --git a/‎ci/setup_env.sh
+2-1 b/‎ci/setup_env.sh
+2-1
diff --git a/‎doc/source/getting_started/overview.rst
+1-2 b/‎doc/source/getting_started/overview.rst
+1-2
diff --git a/‎doc/source/user_guide/io.rst
+1-1 b/‎doc/source/user_guide/io.rst
+1-1
diff --git a/‎doc/source/user_guide/text.rst
+5-1 b/‎doc/source/user_guide/text.rst
+5-1
diff --git a/‎doc/source/whatsnew/v1.0.0.rst
+7-2 b/‎doc/source/whatsnew/v1.0.0.rst
+7-2
diff --git a/‎pandas/__init__.py
+1-2 b/‎pandas/__init__.py
+1-2
diff --git a/‎pandas/_config/config.py
+3-1 b/‎pandas/_config/config.py
+3-1
diff --git a/‎pandas/_config/localization.py
+1-1 b/‎pandas/_config/localization.py
+1-1
diff --git a/‎pandas/core/arrays/base.py
+1-5 b/‎pandas/core/arrays/base.py
+1-5
diff --git a/‎pandas/core/arrays/boolean.py
+18 b/‎pandas/core/arrays/boolean.py
+18
diff --git a/‎pandas/core/arrays/categorical.py
+16 b/‎pandas/core/arrays/categorical.py
+16
diff --git a/‎pandas/core/arrays/string_.py
+29-9 b/‎pandas/core/arrays/string_.py
+29-9
@@ -30,31 +30,34 @@ matrix:
       - python: 3.5
 
     include:
-    - dist: trusty
-      env:
+    - env:
         - JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network)"
 
-    - dist: trusty
-      env:
+    - env:
         - JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network)"
 
-    - dist: trusty
-      env:
-        - JOB="3.6, locale" ENV_FILE="ci/deps/travis-36-locale.yaml" PATTERN="((not slow and not network) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8"
+    - env:
+        - JOB="3.6, locale" ENV_FILE="ci/deps/travis-36-locale.yaml" PATTERN="((not slow and not network) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8" SQL="1"
+      services:
+        - mysql
+        - postgresql
 
-    - dist: trusty
-      env:
-        - JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36-cov.yaml" PATTERN="((not slow and not network) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true
+    - env:
+        - JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36-cov.yaml" PATTERN="((not slow and not network) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true SQL="1"
+      services:
+        - mysql
+        - postgresql
 
     # In allow_failures
-    - dist: trusty
-      env:
-        - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
+    - env:
+        - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
+      services:
+        - mysql
+        - postgresql
 
     allow_failures:
-      - dist: trusty
-        env:
-          - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
+      - env:
+          - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
 
 before_install:
   - echo "before_install"
 
@@ -124,7 +124,7 @@ Here are just a few of the things that pandas does well:
     and saving/loading data from the ultrafast [**HDF5 format**][hdfstore]
   - [**Time series**][timeseries]-specific functionality: date range
     generation and frequency conversion, moving window statistics,
-    moving window linear regressions, date shifting and lagging, etc.
+    date shifting and lagging.
 
 
    [missing-data]: https://pandas.pydata.org/pandas-docs/stable/missing_data.html#working-with-missing-data
 
@@ -39,7 +39,7 @@ function invgrep {
 }
 
 if [[ "$GITHUB_ACTIONS" == "true" ]]; then
-    FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code):%(text)s"
+    FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code)s:%(text)s"
     INVGREP_PREPEND="##[error]"
 else
     FLAKE8_FORMAT="default"
 
@@ -140,7 +140,8 @@ echo "conda list"
 conda list
 
 # Install DB for Linux
-if [ "${TRAVIS_OS_NAME}" == "linux" ]; then
+
+if [[ -n ${SQL:0} ]]; then
   echo "installing dbs"
   mysql -e 'create database pandas_nosetest;'
   psql -c 'create database pandas_nosetest;' -U postgres
 
@@ -57,8 +57,7 @@ Here are just a few of the things that pandas does well:
     Excel files, databases, and saving / loading data from the ultrafast **HDF5
     format**
   - **Time series**-specific functionality: date range generation and frequency
-    conversion, moving window statistics, moving window linear regressions,
-    date shifting and lagging, etc.
+    conversion, moving window statistics, date shifting and lagging.
 
 Many of these principles are here to address the shortcomings frequently
 experienced using other languages / scientific research environments. For data
 
@@ -35,7 +35,7 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
     binary;`SPSS <https://en.wikipedia.org/wiki/SPSS>`__;:ref:`read_spss<io.spss_reader>`;
     binary;`Python Pickle Format <https://docs.python.org/3/library/pickle.html>`__;:ref:`read_pickle<io.pickle>`;:ref:`to_pickle<io.pickle>`
     SQL;`SQL <https://en.wikipedia.org/wiki/SQL>`__;:ref:`read_sql<io.sql>`;:ref:`to_sql<io.sql>`
-    SQL;`Google Big Query <https://en.wikipedia.org/wiki/BigQuery>`__;:ref:`read_gbq<io.bigquery>`;:ref:`to_gbq<io.bigquery>`
+    SQL;`Google BigQuery <https://en.wikipedia.org/wiki/BigQuery>`__;:ref:`read_gbq<io.bigquery>`;:ref:`to_gbq<io.bigquery>`
 
 :ref:`Here <io.perf>` is an informal performance comparison for some of these IO methods.
 
 
@@ -94,7 +94,11 @@ l. For ``StringDtype``, :ref:`string accessor methods<api.series.str>`
 2. Some string methods, like :meth:`Series.str.decode` are not available
    on ``StringArray`` because ``StringArray`` only holds strings, not
    bytes.
-
+3. In comparision operations, :class:`arrays.StringArray` and ``Series`` backed
+   by a ``StringArray`` will return an object with :class:`BooleanDtype`,
+   rather than a ``bool`` dtype object. Missing values in a ``StringArray``
+   will propagate in comparision operations, rather than always comparing
+   unequal like :attr:`numpy.nan`.
 
 Everything else that follows in the rest of this document applies equally to
 ``string`` and ``object`` dtype.
 
@@ -205,6 +205,8 @@ Other enhancements
   (:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
   now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).
 - The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`)
+- :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`)
+
 
 Build Changes
 ^^^^^^^^^^^^^
@@ -486,6 +488,7 @@ Documentation Improvements
 Deprecations
 ~~~~~~~~~~~~
 
+- :meth:`Series.item` and :meth:`Index.item` have been _undeprecated_ (:issue:`29250`)
 - ``Index.set_value`` has been deprecated. For a given index ``idx``, array ``arr``,
   value in ``idx`` of ``idx_val`` and a new value of ``val``, ``idx.set_value(arr, idx_val, val)``
   is equivalent to ``arr[idx.get_loc(idx_val)] = val``, which should be used instead (:issue:`28621`).
@@ -681,6 +684,7 @@ Categorical
   same type as if one used the :meth:`.str.` / :meth:`.dt.` on a :class:`Series` of that type. E.g. when accessing :meth:`Series.dt.tz_localize` on a
   :class:`Categorical` with duplicate entries, the accessor was skipping duplicates (:issue:`27952`)
 - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` that would give incorrect results on categorical data (:issue:`26988`)
+- Bug where calling :meth:`Categorical.min` or :meth:`Categorical.max` on an empty Categorical would raise a numpy exception (:issue:`30227`)
 
 
 Datetimelike
@@ -702,6 +706,8 @@ Datetimelike
 - Bug in :attr:`Timestamp.resolution` being a property instead of a class attribute (:issue:`29910`)
 - Bug in :func:`pandas.to_datetime` when called with ``None`` raising ``TypeError`` instead of returning ``NaT`` (:issue:`30011`)
 - Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`)
+- Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`)
+-
 
 Timedelta
 ^^^^^^^^^
@@ -797,7 +803,6 @@ I/O
 - Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`)
 - Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`)
 - :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`)
--
 
 Plotting
 ^^^^^^^^
@@ -862,7 +867,7 @@ ExtensionArray
 
 - Bug in :class:`arrays.PandasArray` when setting a scalar string (:issue:`28118`, :issue:`28150`).
 - Bug where nullable integers could not be compared to strings (:issue:`28930`)
--
+- Bug where :class:`DataFrame` constructor raised ValueError with list-like data and ``dtype`` specified (:issue:`30280`)
 
 
 Other
 
@@ -273,6 +273,5 @@ class SparseSeries:
     Excel files, databases, and saving/loading data from the ultrafast HDF5
     format.
   - Time series-specific functionality: date range generation and frequency
-    conversion, moving window statistics, moving window linear regressions,
-    date shifting and lagging, etc.
+    conversion, moving window statistics, date shifting and lagging.
 """
@@ -462,6 +462,7 @@ def register_option(key: str, defval: object, doc="", validator=None, cb=None):
 
     cursor = _global_config
     msg = "Path prefix to option '{option}' is already an option"
+
     for i, p in enumerate(path[:-1]):
         if not isinstance(cursor, dict):
             raise OptionError(msg.format(option=".".join(path[:i])))
@@ -650,8 +651,9 @@ def _build_option_description(k):
         s += f"\n    [default: {o.defval}] [currently: {_get_option(k, True)}]"
 
     if d:
+        rkey = d.rkey if d.rkey else ""
         s += "\n    (Deprecated"
-        s += ", use `{rkey}` instead.".format(rkey=d.rkey if d.rkey else "")
+        s += f", use `{rkey}` instead."
         s += ")"
 
     return s
 
@@ -161,6 +161,6 @@ def get_locales(prefix=None, normalize=True, locale_getter=_default_locale_gette
     if prefix is None:
         return _valid_locales(out_locales, normalize)
 
-    pattern = re.compile("{prefix}.*".format(prefix=prefix))
+    pattern = re.compile(f"{prefix}.*")
     found = pattern.findall("\n".join(out_locales))
     return _valid_locales(found, normalize)
@@ -27,8 +27,6 @@
 from pandas.core.missing import backfill_1d, pad_1d
 from pandas.core.sorting import nargsort
 
-_not_implemented_message = "{} does not implement {}."
-
 _extension_array_shared_docs: Dict[str, str] = dict()
 
 
@@ -330,9 +328,7 @@ def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None:
         #   __init__ method coerces that value, then so should __setitem__
         # Note, also, that Series/DataFrame.where internally use __setitem__
         # on a copy of the data.
-        raise NotImplementedError(
-            _not_implemented_message.format(type(self), "__setitem__")
-        )
+        raise NotImplementedError(f"{type(self)} does not implement __setitem__.")
 
     def __len__(self) -> int:
         """
 
@@ -103,6 +103,24 @@ def __repr__(self) -> str:
     def _is_boolean(self) -> bool:
         return True
 
+    def __from_arrow__(self, array):
+        """Construct BooleanArray from passed pyarrow Array/ChunkedArray"""
+        import pyarrow
+
+        if isinstance(array, pyarrow.Array):
+            chunks = [array]
+        else:
+            # pyarrow.ChunkedArray
+            chunks = array.chunks
+
+        results = []
+        for arr in chunks:
+            # TODO should optimize this without going through object array
+            bool_arr = BooleanArray._from_sequence(np.array(arr))
+            results.append(bool_arr)
+
+        return BooleanArray._concat_same_type(results)
+
 
 def coerce_to_array(values, mask=None, copy: bool = False):
     """
 
@@ -2115,6 +2115,10 @@ def min(self, skipna=True):
 
         Only ordered `Categoricals` have a minimum!
 
+        .. versionchanged:: 1.0.0
+
+           Returns an NA value on empty arrays
+
         Raises
         ------
         TypeError
@@ -2125,6 +2129,10 @@ def min(self, skipna=True):
         min : the minimum of this `Categorical`
         """
         self.check_for_ordered("min")
+
+        if not len(self._codes):
+            return self.dtype.na_value
+
         good = self._codes != -1
         if not good.all():
             if skipna:
@@ -2142,6 +2150,10 @@ def max(self, skipna=True):
 
         Only ordered `Categoricals` have a maximum!
 
+        .. versionchanged:: 1.0.0
+
+           Returns an NA value on empty arrays
+
         Raises
         ------
         TypeError
@@ -2152,6 +2164,10 @@ def max(self, skipna=True):
         max : the maximum of this `Categorical`
         """
         self.check_for_ordered("max")
+
+        if not len(self._codes):
+            return self.dtype.na_value
+
         good = self._codes != -1
         if not good.all():
             if skipna:
 
@@ -86,7 +86,7 @@ def __from_arrow__(self, array):
 
         results = []
         for arr in chunks:
-            # using _from_sequence to ensure None is convered to np.nan
+            # using _from_sequence to ensure None is convered to NA
             str_arr = StringArray._from_sequence(np.array(arr))
             results.append(str_arr)
 
@@ -134,6 +134,10 @@ class StringArray(PandasArray):
         The string methods are available on Series backed by
         a StringArray.
 
+    Notes
+    -----
+    StringArray returns a BooleanArray for comparison methods.
+
     Examples
     --------
     >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string")
@@ -148,6 +152,13 @@ class StringArray(PandasArray):
     Traceback (most recent call last):
     ...
     ValueError: StringArray requires an object-dtype ndarray of strings.
+
+    For comparision methods, this returns a :class:`pandas.BooleanArray`
+
+    >>> pd.array(["a", None, "c"], dtype="string") == "a"
+    <BooleanArray>
+    [True, NA, False]
+    Length: 3, dtype: boolean
     """
 
     # undo the PandasArray hack
@@ -197,7 +208,10 @@ def __arrow_array__(self, type=None):
 
         if type is None:
             type = pa.string()
-        return pa.array(self._ndarray, type=type, from_pandas=True)
+
+        values = self._ndarray.copy()
+        values[self.isna()] = None
+        return pa.array(values, type=type, from_pandas=True)
 
     def _values_for_factorize(self):
         arr = self._ndarray.copy()
@@ -255,7 +269,12 @@ def value_counts(self, dropna=False):
     # Overrride parent because we have different return types.
     @classmethod
     def _create_arithmetic_method(cls, op):
+        # Note: this handles both arithmetic and comparison methods.
         def method(self, other):
+            from pandas.arrays import BooleanArray
+
+            assert op.__name__ in ops.ARITHMETIC_BINOPS | ops.COMPARISON_BINOPS
+
             if isinstance(other, (ABCIndexClass, ABCSeries, ABCDataFrame)):
                 return NotImplemented
 
@@ -275,15 +294,16 @@ def method(self, other):
                 other = np.asarray(other)
                 other = other[valid]
 
-            result = np.empty_like(self._ndarray, dtype="object")
-            result[mask] = StringDtype.na_value
-            result[valid] = op(self._ndarray[valid], other)
-
-            if op.__name__ in {"add", "radd", "mul", "rmul"}:
+            if op.__name__ in ops.ARITHMETIC_BINOPS:
+                result = np.empty_like(self._ndarray, dtype="object")
+                result[mask] = StringDtype.na_value
+                result[valid] = op(self._ndarray[valid], other)
                 return StringArray(result)
             else:
-                dtype = "object" if mask.any() else "bool"
-                return np.asarray(result, dtype=dtype)
+                # logical
+                result = np.zeros(len(self._ndarray), dtype="bool")
+                result[valid] = op(self._ndarray[valid], other)
+                return BooleanArray(result, mask)
 
         return compat.set_function_name(method, f"__{op.__name__}__", cls)
Original file line number	Diff line number	Diff line change
`@@ -39,7 +39,7 @@ function invgrep {`
`39`	`39`	`}`
`40`	`40`
`41`	`41`	`if [[ "$GITHUB_ACTIONS" == "true" ]]; then`
`42`		`- FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code):%(text)s"`
	`42`	`+ FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code)s:%(text)s"`
`43`	`43`	`INVGREP_PREPEND="##[error]"`
`44`	`44`	`else`
`45`	`45`	`FLAKE8_FORMAT="default"`