From 3313f237c14224660d4b8fe2384b9b77eec91bf7 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 22 Nov 2019 09:56:22 -0600
Subject: [PATCH 01/12] API: Infer extension types in array

* string
* integer
---
 doc/source/user_guide/integer_na.rst  | 34 ++++++++++++++++++++-------
 doc/source/whatsnew/v1.0.0.rst        | 31 ++++++++++++++++++++++++
 pandas/_libs/lib.pyx                  |  8 +++++--
 pandas/core/construction.py           | 25 ++++++++++++++++----
 pandas/tests/arrays/test_array.py     | 25 +++++++++++++++-----
 pandas/tests/dtypes/test_inference.py |  9 ++++---
 6 files changed, 108 insertions(+), 24 deletions(-)
diff --git a/doc/source/user_guide/integer_na.rst b/doc/source/user_guide/integer_na.rst
index f1f3d79eed61e..63cd5dbe03239 100644
--- a/doc/source/user_guide/integer_na.rst
+++ b/doc/source/user_guide/integer_na.rst
@@ -25,8 +25,7 @@ numbers.
 
 Pandas can represent integer data with possibly missing values using
 :class:`arrays.IntegerArray`. This is an :ref:`extension types <extending.extension-types>`
-implemented within pandas. It is not the default dtype for integers, and will not be inferred;
-you must explicitly pass the dtype into :meth:`array` or :class:`Series`:
+implemented within pandas.
 
 .. ipython:: python
 
@@ -50,17 +49,34 @@ NumPy array.
 You can also pass the list-like object to the :class:`Series` constructor
 with the dtype.
 
-.. ipython:: python
+.. warning::
 
-   s = pd.Series([1, 2, np.nan], dtype="Int64")
-   s
+   Currently :meth:`pandas.array` and :meth:`pandas.Series` use different
+   rules for dtype inference. :meth:`pandas.array` will infer a nullable-
+   integer dtype
 
-By default (if you don't specify ``dtype``), NumPy is used, and you'll end
-up with a ``float64`` dtype Series:
+   .. ipython:: python
 
-.. ipython:: python
+      pd.array([1, None])
+      pd.array([1, 2])
+
+   For backwards-compatibility, :class:`Series` infers these as either
+   integer or float dtype
+
+   .. ipython:: python
+
+      pd.Series([1, None])
+      pd.Series([1, 2])
+
+   We recommend explicitly providing the dtype to avoid confusion.
+
+   .. ipython:: python
+
+      pd.array([1, None], dtype="Int64")
+      pd.Series([1, None], dtype="Int64")
 
-   pd.Series([1, 2, np.nan])
+   In the future, we may provide an option for :class:`Series` to infer a
+   nullable-integer dtype.
 
 Operations involving an integer array will behave similar to NumPy arrays.
 Missing values will be propagated, and the data will be coerced to another
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index ac440c263088b..0a510dae0454c 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -234,6 +234,37 @@ The following methods now also correctly output values for unobserved categories
 
    df.groupby(["cat_1", "cat_2"], observed=False)["value"].count()
 
+:meth:`pandas.array` inference changes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:meth:`pandas.array` now infers pandas' new extension types in several cases:
+
+1. Sting data (including missing values) now returns a :class:`arrays.StringArray`.
+2. Integer data (including missing values) now returns a :class:`arrays.IntegerArray`.
+
+*pandas 0.25.x*
+
+.. code-block:: python
+
+   >>> pd.array(["a", None])
+   <PandasArray>
+   ['a', None]
+   Length: 2, dtype: object
+
+   >>> pd.array([1, None])
+   <PandasArray>
+   [1, None]
+   Length: 2, dtype: object
+
+
+*pandas 1.0.0*
+
+.. ipython:: python
+
+   pd.array(["a", None])
+   pd.array([1, None])
+
+As a reminder, you can specify the ``dtype`` to disable all inference.
 
 .. _whatsnew_1000.api_breaking.deps:
 
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index aaf6456df8f8e..7b31ebaf9ba9b 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -1113,6 +1113,7 @@ def infer_dtype(value: object, skipna: object=None) -> str:
     Results can include:
 
     - string
+    - mixed-string
     - unicode
     - bytes
     - floating
@@ -1319,8 +1320,11 @@ def infer_dtype(value: object, skipna: object=None) -> str:
             return 'boolean'
 
     elif isinstance(val, str):
-        if is_string_array(values, skipna=skipna):
-            return 'string'
+        if is_string_array(values, skipna=True):
+            if isnaobj(values).any():
+                return "mixed-string"
+            else:
+                return "string"
 
     elif isinstance(val, bytes):
         if is_bytes_array(values, skipna=skipna):
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index c0b08beead0ca..edb99776bafc0 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -94,11 +94,18 @@ def array(
         :class:`pandas.Period`         :class:`pandas.arrays.PeriodArray`
         :class:`datetime.datetime`     :class:`pandas.arrays.DatetimeArray`
         :class:`datetime.timedelta`    :class:`pandas.arrays.TimedeltaArray`
+        :class:`int`                   :class:`pandas.arrays.IntegerArray`
+        :class:`str`                   :class:`pandas.arrays.StringArray`
         ============================== =====================================
 
         For all other cases, NumPy's usual inference rules will be used.
 
-    copy : bool, default True
+        .. versionchanged:: 1.0.0
+
+           Pandas infers nullable-integer dtype for integer data and
+           string dtype for string data.
+
+   copy : bool, default True
         Whether to copy the data, even if not necessary. Depending
         on the type of `data`, creating the new array may require
         copying data, even if ``copy=False``.
@@ -246,21 +253,25 @@ def array(
     """
     from pandas.core.arrays import (
         period_array,
+        IntegerArray,
         IntervalArray,
         PandasArray,
         DatetimeArray,
         TimedeltaArray,
+        StringArray,
     )
 
     if lib.is_scalar(data):
         msg = "Cannot pass scalar '{}' to 'pandas.array'."
         raise ValueError(msg.format(data))
 
-    data = extract_array(data, extract_numpy=True)
-
-    if dtype is None and isinstance(data, ABCExtensionArray):
+    if dtype is None and isinstance(
+        data, (ABCSeries, ABCIndexClass, ABCExtensionArray)
+    ):
         dtype = data.dtype
 
+    data = extract_array(data, extract_numpy=True)
+
     # this returns None for not-found dtypes.
     if isinstance(dtype, str):
         dtype = registry.find(dtype) or dtype
@@ -298,6 +309,12 @@ def array(
             # timedelta, timedelta64
             return TimedeltaArray._from_sequence(data, copy=copy)
 
+        elif inferred_dtype in {"string", "mixed-string"}:
+            return StringArray._from_sequence(data, copy=copy)
+
+        elif inferred_dtype in {"integer", "mixed-integer"}:
+            return IntegerArray._from_sequence(data, copy=copy)
+
         # TODO(BooleanArray): handle this type
 
     # Pandas overrides NumPy for
diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py
index e8d9ecfac61e4..ba2d5f8ee9a03 100644
--- a/pandas/tests/arrays/test_array.py
+++ b/pandas/tests/arrays/test_array.py
@@ -19,14 +19,14 @@
     "data, dtype, expected",
     [
         # Basic NumPy defaults.
-        ([1, 2], None, PandasArray(np.array([1, 2]))),
+        ([1, 2], None, pd.arrays.IntegerArray._from_sequence([1, 2])),
         ([1, 2], object, PandasArray(np.array([1, 2], dtype=object))),
         (
             [1, 2],
             np.dtype("float32"),
             PandasArray(np.array([1.0, 2.0], dtype=np.dtype("float32"))),
         ),
-        (np.array([1, 2]), None, PandasArray(np.array([1, 2]))),
+        (np.array([1, 2]), None, pd.arrays.IntegerArray._from_sequence([1, 2])),
         # String alias passes through to NumPy
         ([1, 2], "float32", PandasArray(np.array([1, 2], dtype="float32"))),
         # Period alias
@@ -113,6 +113,13 @@
         # IntegerNA
         ([1, None], "Int16", integer_array([1, None], dtype="Int16")),
         (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
+        # String
+        (["a", None], "string", pd.arrays.StringArray._from_sequence(["a", None])),
+        (
+            ["a", None],
+            pd.StringDtype(),
+            pd.arrays.StringArray._from_sequence(["a", None]),
+        ),
         # Index
         (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
         # Series[EA] returns the EA
@@ -139,15 +146,15 @@ def test_array(data, dtype, expected):
 def test_array_copy():
     a = np.array([1, 2])
     # default is to copy
-    b = pd.array(a)
+    b = pd.array(a, dtype=a.dtype)
     assert np.shares_memory(a, b._ndarray) is False
 
     # copy=True
-    b = pd.array(a, copy=True)
+    b = pd.array(a, dtype=a.dtype, copy=True)
     assert np.shares_memory(a, b._ndarray) is False
 
     # copy=False
-    b = pd.array(a, copy=False)
+    b = pd.array(a, dtype=a.dtype, copy=False)
     assert np.shares_memory(a, b._ndarray) is True
 
 
@@ -211,6 +218,12 @@ def test_array_copy():
             np.array([1, 2], dtype="m8[us]"),
             pd.arrays.TimedeltaArray(np.array([1000, 2000], dtype="m8[ns]")),
         ),
+        # integer
+        ([1, 2], pd.arrays.IntegerArray._from_sequence([1, 2])),
+        ([1, None], pd.arrays.IntegerArray._from_sequence([1, None])),
+        # string
+        (["a", "b"], pd.arrays.StringArray._from_sequence(["a", "b"])),
+        (["a", None], pd.arrays.StringArray._from_sequence(["a", None])),
     ],
 )
 def test_array_inference(data, expected):
@@ -241,7 +254,7 @@ def test_array_inference_fails(data):
 @pytest.mark.parametrize("data", [np.array([[1, 2], [3, 4]]), [[1, 2], [3, 4]]])
 def test_nd_raises(data):
     with pytest.raises(ValueError, match="PandasArray must be 1-dimensional"):
-        pd.array(data)
+        pd.array(data, dtype="int64")
 
 
 def test_scalar_raises():
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index 743b844917edf..282e8fdf45aef 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -732,12 +732,15 @@ def test_string(self):
     def test_unicode(self):
         arr = ["a", np.nan, "c"]
         result = lib.infer_dtype(arr, skipna=False)
-        assert result == "mixed"
+        assert result == "mixed-string"
 
         arr = ["a", np.nan, "c"]
         result = lib.infer_dtype(arr, skipna=True)
-        expected = "string"
-        assert result == expected
+        assert result == "string"
+
+        arr = ["a", "c"]
+        result = lib.infer_dtype(arr, skipna=False)
+        assert result == "string"
 
     @pytest.mark.parametrize(
         "dtype, missing, skipna, expected",

From dd02d69f3761e36b8fb9ecc20fd1a6f437bf2a6b Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 22 Nov 2019 13:51:30 -0600
Subject: [PATCH 02/12] update docstring

---
 pandas/core/construction.py | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index edb99776bafc0..de7e4b932e1a0 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -219,24 +219,17 @@ def array(
     [a, b, a]
     Categories (3, object): [a < b < c]
 
-    Because omitting the `dtype` passes the data through to NumPy,
-    a mixture of valid integers and NA will return a floating-point
-    NumPy array.
+    Pandas will infer an ExtensionArray for some types of data:
 
     >>> pd.array([1, 2, np.nan])
-    <PandasArray>
-    [1.0,  2.0, nan]
-    Length: 3, dtype: float64
-
-    To use pandas' nullable :class:`pandas.arrays.IntegerArray`, specify
-    the dtype:
-
-    >>> pd.array([1, 2, np.nan], dtype='Int64')
     <IntegerArray>
     [1, 2, NaN]
     Length: 3, dtype: Int64
 
-    Pandas will infer an ExtensionArray for some types of data:
+    >>> pd.array(["a", None, "c"])
+    <StringArray>
+    ['a', nan, 'c']
+    Length: 3, dtype: string
 
     >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")])
     <PeriodArray>

From 5a9c306a0ee8e339a8ea0f51e00fb3275598127c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 22 Nov 2019 14:50:00 -0600
Subject: [PATCH 03/12] remove mixed-string

---
 doc/source/whatsnew/v1.0.0.rst             | 4 ++--
 pandas/_libs/lib.pyx                       | 7 ++-----
 pandas/core/construction.py                | 2 +-
 pandas/tests/dtypes/test_inference.py      | 2 +-
 pandas/tests/frame/test_block_internals.py | 4 ++--
 pandas/tests/internals/test_internals.py   | 2 +-
 6 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 0a510dae0454c..803d1d359cf10 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -237,7 +237,7 @@ The following methods now also correctly output values for unobserved categories
 :meth:`pandas.array` inference changes
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-:meth:`pandas.array` now infers pandas' new extension types in several cases:
+:meth:`pandas.array` now infers pandas' new extension types in several cases (:issue:`29791`):
 
 1. Sting data (including missing values) now returns a :class:`arrays.StringArray`.
 2. Integer data (including missing values) now returns a :class:`arrays.IntegerArray`.
@@ -350,7 +350,7 @@ Other API changes
 - :meth:`Series.dropna` has dropped its ``**kwargs`` argument in favor of a single ``how`` parameter.
   Supplying anything else than ``how`` to ``**kwargs`` raised a ``TypeError`` previously (:issue:`29388`)
 - When testing pandas, the new minimum required version of pytest is 5.0.1 (:issue:`29664`)
--
+- :meth:`pandas.api.types.infer_dtype` returns ``"string"`` rather than ``"mixed"`` for a mixture of strings and NA values (:issue:`29799`)
 
 
 .. _whatsnew_1000.api.documentation:
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 7b31ebaf9ba9b..0e5241d8bef34 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -1113,7 +1113,6 @@ def infer_dtype(value: object, skipna: object=None) -> str:
     Results can include:
 
     - string
-    - mixed-string
     - unicode
     - bytes
     - floating
@@ -1320,11 +1319,9 @@ def infer_dtype(value: object, skipna: object=None) -> str:
             return 'boolean'
 
     elif isinstance(val, str):
+        # we deliberately ignore skipna
         if is_string_array(values, skipna=True):
-            if isnaobj(values).any():
-                return "mixed-string"
-            else:
-                return "string"
+            return "string"
 
     elif isinstance(val, bytes):
         if is_bytes_array(values, skipna=skipna):
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index de7e4b932e1a0..506b2c82134ee 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -302,7 +302,7 @@ def array(
             # timedelta, timedelta64
             return TimedeltaArray._from_sequence(data, copy=copy)
 
-        elif inferred_dtype in {"string", "mixed-string"}:
+        elif inferred_dtype == "string":
             return StringArray._from_sequence(data, copy=copy)
 
         elif inferred_dtype in {"integer", "mixed-integer"}:
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index 282e8fdf45aef..3c1705ddb785e 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -732,7 +732,7 @@ def test_string(self):
     def test_unicode(self):
         arr = ["a", np.nan, "c"]
         result = lib.infer_dtype(arr, skipna=False)
-        assert result == "mixed-string"
+        assert result == "string"
 
         arr = ["a", np.nan, "c"]
         result = lib.infer_dtype(arr, skipna=True)
diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
index d491e9f25c897..b27e7c217c4c2 100644
--- a/pandas/tests/frame/test_block_internals.py
+++ b/pandas/tests/frame/test_block_internals.py
@@ -615,12 +615,12 @@ def test_constructor_no_pandas_array(self):
     def test_add_column_with_pandas_array(self):
         # GH 26390
         df = pd.DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]})
-        df["c"] = pd.array([1, 2, None, 3])
+        df["c"] = pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object))
         df2 = pd.DataFrame(
             {
                 "a": [1, 2, 3, 4],
                 "b": ["a", "b", "c", "d"],
-                "c": pd.array([1, 2, None, 3]),
+                "c": pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)),
             }
         )
         assert type(df["c"]._data.blocks[0]) == ObjectBlock
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index abe2ddf955ad8..551782d0b363a 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -1268,7 +1268,7 @@ def test_block_shape():
 
 def test_make_block_no_pandas_array():
     # https://github.com/pandas-dev/pandas/pull/24866
-    arr = pd.array([1, 2])
+    arr = pd.arrays.PandasArray(np.array([1, 2]))
 
     # PandasArray, no dtype
     result = make_block(arr, slice(len(arr)))

From e3ba8464e2e9f90ffde417c20db9170132457cca Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 25 Nov 2019 06:09:47 -0600
Subject: [PATCH 04/12] skipna=True

---
 pandas/_libs/lib.pyx        | 3 +--
 pandas/core/construction.py | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 0e5241d8bef34..48cbfa61e0863 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -1319,8 +1319,7 @@ def infer_dtype(value: object, skipna: object=None) -> str:
             return 'boolean'
 
     elif isinstance(val, str):
-        # we deliberately ignore skipna
-        if is_string_array(values, skipna=True):
+        if is_string_array(values, skipna=skipna):
             return "string"
 
     elif isinstance(val, bytes):
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index 506b2c82134ee..bfb41c7a011d3 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -274,7 +274,7 @@ def array(
         return cls._from_sequence(data, dtype=dtype, copy=copy)
 
     if dtype is None:
-        inferred_dtype = lib.infer_dtype(data, skipna=False)
+        inferred_dtype = lib.infer_dtype(data, skipna=True)
         if inferred_dtype == "period":
             try:
                 return period_array(data, copy=copy)
@@ -305,7 +305,7 @@ def array(
         elif inferred_dtype == "string":
             return StringArray._from_sequence(data, copy=copy)
 
-        elif inferred_dtype in {"integer", "mixed-integer"}:
+        elif inferred_dtype == "integer":
             return IntegerArray._from_sequence(data, copy=copy)
 
         # TODO(BooleanArray): handle this type

From e055ada39319133a609bb4f834a553e8bc87e537 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 25 Nov 2019 07:51:19 -0600
Subject: [PATCH 05/12] update new test

---
 pandas/tests/dtypes/test_inference.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index 3c1705ddb785e..d7693d29c08b9 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -732,7 +732,9 @@ def test_string(self):
     def test_unicode(self):
         arr = ["a", np.nan, "c"]
         result = lib.infer_dtype(arr, skipna=False)
-        assert result == "string"
+        # This currently returns "mixed", but it's not clear that's optimal.
+        # This could also return "string" or "mixed-string"
+        assert result == "mixed"
 
         arr = ["a", np.nan, "c"]
         result = lib.infer_dtype(arr, skipna=True)

From ad43c3a966f59d2e49df435565a103488706991a Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 25 Nov 2019 08:26:45 -0600
Subject: [PATCH 06/12] reduce

---
 pandas/tests/series/test_ufunc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py
index c8a127f89bf91..6c665917210a4 100644
--- a/pandas/tests/series/test_ufunc.py
+++ b/pandas/tests/series/test_ufunc.py
@@ -251,7 +251,7 @@ def __add__(self, other):
 @pytest.mark.parametrize(
     "values",
     [
-        pd.array([1, 3, 2]),
+        pd.array([1, 3, 2], dtype="int64"),
         pd.array([1, 10, 0], dtype="Sparse[int]"),
         pd.to_datetime(["2000", "2010", "2001"]),
         pd.to_datetime(["2000", "2010", "2001"]).tz_localize("CET"),

From 77c5d3f414108f5105866392f738bd516b9294ac Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 25 Nov 2019 08:31:53 -0600
Subject: [PATCH 07/12] 32 bit, doc

---
 doc/source/user_guide/integer_na.rst | 2 ++
 pandas/tests/arrays/test_array.py    | 6 +++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/doc/source/user_guide/integer_na.rst b/doc/source/user_guide/integer_na.rst
index 63cd5dbe03239..77568f3bcb244 100644
--- a/doc/source/user_guide/integer_na.rst
+++ b/doc/source/user_guide/integer_na.rst
@@ -84,6 +84,8 @@ dtype if needed.
 
 .. ipython:: python
 
+   s = pd.Series([1, 2, None], dtype="Int64")
+
    # arithmetic
    s + 1
 
diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py
index ba2d5f8ee9a03..dc61bc3fc37d5 100644
--- a/pandas/tests/arrays/test_array.py
+++ b/pandas/tests/arrays/test_array.py
@@ -26,7 +26,11 @@
             np.dtype("float32"),
             PandasArray(np.array([1.0, 2.0], dtype=np.dtype("float32"))),
         ),
-        (np.array([1, 2]), None, pd.arrays.IntegerArray._from_sequence([1, 2])),
+        (
+            np.array([1, 2], dtype="int64"),
+            None,
+            pd.arrays.IntegerArray._from_sequence([1, 2]),
+        ),
         # String alias passes through to NumPy
         ([1, 2], "float32", PandasArray(np.array([1, 2], dtype="float32"))),
         # Period alias

From 0f89f47653d28a677f150eaffdf6488d77922776 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 25 Nov 2019 08:32:38 -0600
Subject: [PATCH 08/12] update

---
 doc/source/whatsnew/v1.0.0.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 6174976e74b92..92e87cacf607b 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -350,7 +350,6 @@ Other API changes
 - :meth:`Series.dropna` has dropped its ``**kwargs`` argument in favor of a single ``how`` parameter.
   Supplying anything else than ``how`` to ``**kwargs`` raised a ``TypeError`` previously (:issue:`29388`)
 - When testing pandas, the new minimum required version of pytest is 5.0.1 (:issue:`29664`)
-- :meth:`pandas.api.types.infer_dtype` returns ``"string"`` rather than ``"mixed"`` for a mixture of strings and NA values (:issue:`29799`)
 
 
 .. _whatsnew_1000.api.documentation:

From 4e08fd2eed92c3d5bc8a9b65d03bb343df9889cf Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 25 Nov 2019 10:48:12 -0600
Subject: [PATCH 09/12] fix docstring

---
 pandas/core/construction.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index bfb41c7a011d3..a5a88a7b2d0b5 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -105,7 +105,7 @@ def array(
            Pandas infers nullable-integer dtype for integer data and
            string dtype for string data.
 
-   copy : bool, default True
+    copy : bool, default True
         Whether to copy the data, even if not necessary. Depending
         on the type of `data`, creating the new array may require
         copying data, even if ``copy=False``.
@@ -161,14 +161,6 @@ def array(
     ['a', 'b']
     Length: 2, dtype: str32
 
-    Or use the dedicated constructor for the array you're expecting, and
-    wrap that in a PandasArray
-
-    >>> pd.array(np.array(['a', 'b'], dtype='<U1'))
-    <PandasArray>
-    ['a', 'b']
-    Length: 2, dtype: str32
-
     Finally, Pandas has arrays that mostly overlap with NumPy
 
       * :class:`arrays.DatetimeArray`
@@ -191,13 +183,21 @@ def array(
 
     Examples
     --------
-    If a dtype is not specified, `data` is passed through to
-    :meth:`numpy.array`, and a :class:`arrays.PandasArray` is returned.
+    If a dtype is not specified, pandas will infer the best dtype from the values.
+    See the description of `dtype` for the types pandas infers for.
 
     >>> pd.array([1, 2])
-    <PandasArray>
+    <IntegerArray>
     [1, 2]
-    Length: 2, dtype: int64
+    Length: 2, dtype: Int64
+
+    If pandas does not infer a dedicated extension type for some values, we
+    fall back to returning a :class:`arrays.PandasArray`.
+
+    >>> pd.array([1.1, 2.2])
+    <PandasArray>
+    [1.1, 2.2]
+    Length: 2, dtype: float64
 
     Or the NumPy dtype can be specified
 

From bddce9b8b3eeacd53d8be7688593c4034222ea1f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 25 Nov 2019 10:49:57 -0600
Subject: [PATCH 10/12] reorganize

---
 pandas/core/construction.py | 48 ++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index a5a88a7b2d0b5..0e23b18505efa 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -191,20 +191,20 @@ def array(
     [1, 2]
     Length: 2, dtype: Int64
 
-    If pandas does not infer a dedicated extension type for some values, we
-    fall back to returning a :class:`arrays.PandasArray`.
-
-    >>> pd.array([1.1, 2.2])
-    <PandasArray>
-    [1.1, 2.2]
-    Length: 2, dtype: float64
+    >>> pd.array([1, 2, np.nan])
+    <IntegerArray>
+    [1, 2, NaN]
+    Length: 3, dtype: Int64
 
-    Or the NumPy dtype can be specified
+    >>> pd.array(["a", None, "c"])
+    <StringArray>
+    ['a', nan, 'c']
+    Length: 3, dtype: string
 
-    >>> pd.array([1, 2], dtype=np.dtype("int32"))
-    <PandasArray>
-    [1, 2]
-    Length: 2, dtype: int32
+    >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")])
+    <PeriodArray>
+    ['2000-01-01', '2000-01-01']
+    Length: 2, dtype: period[D]
 
     You can use the string alias for `dtype`
 
@@ -219,22 +219,20 @@ def array(
     [a, b, a]
     Categories (3, object): [a < b < c]
 
-    Pandas will infer an ExtensionArray for some types of data:
+    If pandas does not infer a dedicated extension type a
+    :class:`arrays.PandasArray` is returned.
 
-    >>> pd.array([1, 2, np.nan])
-    <IntegerArray>
-    [1, 2, NaN]
-    Length: 3, dtype: Int64
+    >>> pd.array([1.1, 2.2])
+    <PandasArray>
+    [1.1, 2.2]
+    Length: 2, dtype: float64
 
-    >>> pd.array(["a", None, "c"])
-    <StringArray>
-    ['a', nan, 'c']
-    Length: 3, dtype: string
+    Or the NumPy dtype can be specified
 
-    >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")])
-    <PeriodArray>
-    ['2000-01-01', '2000-01-01']
-    Length: 2, dtype: period[D]
+    >>> pd.array([1, 2], dtype=np.dtype("int32"))
+    <PandasArray>
+    [1, 2]
+    Length: 2, dtype: int32
 
     `data` must be 1-dimensional. A ValueError is raised when the input
     has the wrong dimensionality.

From 372ac06420c56418280162bb44d479f4f367bd50 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 27 Nov 2019 12:00:45 -0600
Subject: [PATCH 11/12] Handle BooleanArray

---
 doc/source/whatsnew/v1.0.0.rst    |  3 ++-
 pandas/core/construction.py       | 10 +++++++---
 pandas/tests/arrays/test_array.py | 10 ++++++++++
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 81965d64d325b..0b314a8c705a1 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -264,8 +264,9 @@ The following methods now also correctly output values for unobserved categories
 
 :meth:`pandas.array` now infers pandas' new extension types in several cases (:issue:`29791`):
 
-1. Sting data (including missing values) now returns a :class:`arrays.StringArray`.
+1. String data (including missing values) now returns a :class:`arrays.StringArray`.
 2. Integer data (including missing values) now returns a :class:`arrays.IntegerArray`.
+3. Boolean data (including missing values) now returns the new :class:`arrays.BooleanArray`
 
 *pandas 0.25.x*
 
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index 0e23b18505efa..ce906678d990c 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -96,14 +96,16 @@ def array(
         :class:`datetime.timedelta`    :class:`pandas.arrays.TimedeltaArray`
         :class:`int`                   :class:`pandas.arrays.IntegerArray`
         :class:`str`                   :class:`pandas.arrays.StringArray`
+        :class:`bool`                  :class:`pandas.arrays.BooleanArray`
         ============================== =====================================
 
         For all other cases, NumPy's usual inference rules will be used.
 
         .. versionchanged:: 1.0.0
 
-           Pandas infers nullable-integer dtype for integer data and
-           string dtype for string data.
+           Pandas infers nullable-integer dtype for integer data,
+           string dtype for string data, and nullable-boolean dtype
+           for boolean data.
 
     copy : bool, default True
         Whether to copy the data, even if not necessary. Depending
@@ -244,6 +246,7 @@ def array(
     """
     from pandas.core.arrays import (
         period_array,
+        BooleanArray,
         IntegerArray,
         IntervalArray,
         PandasArray,
@@ -306,7 +309,8 @@ def array(
         elif inferred_dtype == "integer":
             return IntegerArray._from_sequence(data, copy=copy)
 
-        # TODO(BooleanArray): handle this type
+        elif inferred_dtype == "boolean":
+            return BooleanArray._from_sequence(data, copy=copy)
 
     # Pandas overrides NumPy for
     #   1. datetime64[ns]
diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py
index 7b44a362a5c30..479f8dbad0418 100644
--- a/pandas/tests/arrays/test_array.py
+++ b/pandas/tests/arrays/test_array.py
@@ -124,6 +124,13 @@
             pd.StringDtype(),
             pd.arrays.StringArray._from_sequence(["a", None]),
         ),
+        # Boolean
+        ([True, None], "boolean", pd.arrays.BooleanArray._from_sequence([True, None])),
+        (
+            [True, None],
+            pd.BooleanDtype(),
+            pd.arrays.BooleanArray._from_sequence([True, None]),
+        ),
         # Index
         (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
         # Series[EA] returns the EA
@@ -228,6 +235,9 @@ def test_array_copy():
         # string
         (["a", "b"], pd.arrays.StringArray._from_sequence(["a", "b"])),
         (["a", None], pd.arrays.StringArray._from_sequence(["a", None])),
+        # Boolean
+        ([True, False], pd.arrays.BooleanArray._from_sequence([True, False])),
+        ([True, None], pd.arrays.BooleanArray._from_sequence([True, None])),
     ],
 )
 def test_array_inference(data, expected):

From d0f3082ce4d2ca66533cdc0b4dabc2612b8e6e04 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 2 Dec 2019 06:15:14 -0600
Subject: [PATCH 12/12] update docstring

---
 pandas/core/construction.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index ce906678d990c..dc537d50b3419 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -229,7 +229,11 @@ def array(
     [1.1, 2.2]
     Length: 2, dtype: float64
 
-    Or the NumPy dtype can be specified
+    As mentioned in the "Notes" section, new extension types may be added
+    in the future (by pandas or 3rd party libraries), causing the return
+    value to no longer be a :class:`arrays.PandasArray`. Specify the `dtype`
+    as a NumPy dtype if you need to ensure there's no future change in
+    behavior.
 
     >>> pd.array([1, 2], dtype=np.dtype("int32"))
     <PandasArray>