From 38e37be3baa396199e210f52e5066f487db554ea Mon Sep 17 00:00:00 2001
From: "Uwe L. Korn" <uwe.korn@quantco.com>
Date: Wed, 31 Jul 2019 15:09:52 +0200
Subject: [PATCH 1/3] Handle construction of string ExtensionArray from lists

---
 pandas/core/construction.py                   |  4 +-
 .../extension/arrow/{bool.py => arrays.py}    | 53 +++++++++++++++----
 pandas/tests/extension/arrow/test_bool.py     |  2 +-
 pandas/tests/extension/arrow/test_string.py   | 13 +++++
 4 files changed, 61 insertions(+), 11 deletions(-)
 rename pandas/tests/extension/arrow/{bool.py => arrays.py} (80%)
 create mode 100644 pandas/tests/extension/arrow/test_string.py

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index 9528723a6dc0f..b31f466569875 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -470,7 +470,9 @@ def sanitize_array(data, index, dtype=None, copy=False, raise_cast_failure=False
 
     # This is to prevent mixed-type Series getting all casted to
     # NumPy string type, e.g. NaN --> '-1#IND'.
-    if issubclass(subarr.dtype.type, str):
+    if not (
+        is_extension_array_dtype(subarr.dtype) or is_extension_array_dtype(dtype)
+    ) and issubclass(subarr.dtype.type, str):
         # GH#16605
         # If not empty convert the data to dtype
         # GH#19853: If data is a scalar, subarr has already the result
diff --git a/pandas/tests/extension/arrow/bool.py b/pandas/tests/extension/arrow/arrays.py
similarity index 80%
rename from pandas/tests/extension/arrow/bool.py
rename to pandas/tests/extension/arrow/arrays.py
index eb75d6d968073..3b6b0455f9a5d 100644
--- a/pandas/tests/extension/arrow/bool.py
+++ b/pandas/tests/extension/arrow/arrays.py
@@ -43,18 +43,27 @@ def _is_boolean(self):
         return True
 
 
-class ArrowBoolArray(ExtensionArray):
-    def __init__(self, values):
-        if not isinstance(values, pa.ChunkedArray):
-            raise ValueError
+@register_extension_dtype
+class ArrowStringDtype(ExtensionDtype):
 
-        assert values.type == pa.bool_()
-        self._data = values
-        self._dtype = ArrowBoolDtype()
+    type = str
+    kind = "U"
+    name = "arrow_string"
+    na_value = pa.NULL
 
-    def __repr__(self):
-        return "ArrowBoolArray({})".format(repr(self._data))
+    @classmethod
+    def construct_from_string(cls, string):
+        if string == cls.name:
+            return cls()
+        else:
+            raise TypeError("Cannot construct a '{}' from '{}'".format(cls, string))
 
+    @classmethod
+    def construct_array_type(cls):
+        return ArrowStringArray
+
+
+class ArrowExtensionArray(ExtensionArray):
     @classmethod
     def from_scalars(cls, values):
         arr = pa.chunked_array([pa.array(np.asarray(values))])
@@ -142,3 +151,29 @@ def any(self, axis=0, out=None):
 
     def all(self, axis=0, out=None):
         return self._data.to_pandas().all()
+
+
+class ArrowBoolArray(ArrowExtensionArray):
+    def __init__(self, values):
+        if not isinstance(values, pa.ChunkedArray):
+            raise ValueError
+
+        assert values.type == pa.bool_()
+        self._data = values
+        self._dtype = ArrowBoolDtype()
+
+    def __repr__(self):
+        return "ArrowBoolArray({})".format(repr(self._data))
+
+
+class ArrowStringArray(ArrowExtensionArray):
+    def __init__(self, values):
+        if not isinstance(values, pa.ChunkedArray):
+            raise ValueError
+
+        assert values.type == pa.string()
+        self._data = values
+        self._dtype = ArrowStringDtype()
+
+    def __repr__(self):
+        return "ArrowStringArray({})".format(repr(self._data))
diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py
index 205edf5da5b74..cc0deca765b41 100644
--- a/pandas/tests/extension/arrow/test_bool.py
+++ b/pandas/tests/extension/arrow/test_bool.py
@@ -7,7 +7,7 @@
 
 pytest.importorskip("pyarrow", minversion="0.10.0")
 
-from .bool import ArrowBoolArray, ArrowBoolDtype  # isort:skip
+from .arrays import ArrowBoolArray, ArrowBoolDtype  # isort:skip
 
 
 @pytest.fixture
diff --git a/pandas/tests/extension/arrow/test_string.py b/pandas/tests/extension/arrow/test_string.py
new file mode 100644
index 0000000000000..06f149aa4b75f
--- /dev/null
+++ b/pandas/tests/extension/arrow/test_string.py
@@ -0,0 +1,13 @@
+import pytest
+
+import pandas as pd
+
+pytest.importorskip("pyarrow", minversion="0.10.0")
+
+from .arrays import ArrowStringDtype  # isort:skip
+
+
+def test_constructor_from_list():
+    # GH 27673
+    result = pd.Series(["E"], dtype=ArrowStringDtype())
+    assert isinstance(result.dtype, ArrowStringDtype)

From 503335d93eff7cdaa053fafa0539586b95d3bf1b Mon Sep 17 00:00:00 2001
From: "Uwe L. Korn" <uwe.korn@quantco.com>
Date: Thu, 1 Aug 2019 09:56:28 +0200
Subject: [PATCH 2/3] Pull ExtensionArray check one level up

---
 pandas/core/construction.py | 45 +++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 25 deletions(-)

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index b31f466569875..0c25cdf121cbb 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -468,32 +468,27 @@ def sanitize_array(data, index, dtype=None, copy=False, raise_cast_failure=False
         else:
             subarr = com.asarray_tuplesafe(data, dtype=dtype)
 
-    # This is to prevent mixed-type Series getting all casted to
-    # NumPy string type, e.g. NaN --> '-1#IND'.
-    if not (
-        is_extension_array_dtype(subarr.dtype) or is_extension_array_dtype(dtype)
-    ) and issubclass(subarr.dtype.type, str):
-        # GH#16605
-        # If not empty convert the data to dtype
-        # GH#19853: If data is a scalar, subarr has already the result
-        if not lib.is_scalar(data):
-            if not np.all(isna(data)):
-                data = np.array(data, dtype=dtype, copy=False)
-            subarr = np.array(data, dtype=object, copy=copy)
-
-    if (
-        not (is_extension_array_dtype(subarr.dtype) or is_extension_array_dtype(dtype))
-        and is_object_dtype(subarr.dtype)
-        and not is_object_dtype(dtype)
-    ):
-        inferred = lib.infer_dtype(subarr, skipna=False)
-        if inferred == "period":
-            from pandas.core.arrays import period_array
+    if not (is_extension_array_dtype(subarr.dtype) or is_extension_array_dtype(dtype)):
+        # This is to prevent mixed-type Series getting all casted to
+        # NumPy string type, e.g. NaN --> '-1#IND'.
+        if issubclass(subarr.dtype.type, str):
+            # GH#16605
+            # If not empty convert the data to dtype
+            # GH#19853: If data is a scalar, subarr has already the result
+            if not lib.is_scalar(data):
+                if not np.all(isna(data)):
+                    data = np.array(data, dtype=dtype, copy=False)
+                subarr = np.array(data, dtype=object, copy=copy)
 
-            try:
-                subarr = period_array(subarr)
-            except IncompatibleFrequency:
-                pass
+        if is_object_dtype(subarr.dtype) and not is_object_dtype(dtype):
+            inferred = lib.infer_dtype(subarr, skipna=False)
+            if inferred == "period":
+                from pandas.core.arrays import period_array
+
+                try:
+                    subarr = period_array(subarr)
+                except IncompatibleFrequency:
+                    pass
 
     return subarr
 

From 3f30c155e15ed2754acf6d3fe693d395ca1e2996 Mon Sep 17 00:00:00 2001
From: "Uwe L. Korn" <uwe.korn@quantco.com>
Date: Thu, 1 Aug 2019 18:28:07 +0200
Subject: [PATCH 3/3] Unify repr

---
 pandas/tests/extension/arrow/arrays.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py
index 3b6b0455f9a5d..6a28f76e474cc 100644
--- a/pandas/tests/extension/arrow/arrays.py
+++ b/pandas/tests/extension/arrow/arrays.py
@@ -78,6 +78,9 @@ def from_array(cls, arr):
     def _from_sequence(cls, scalars, dtype=None, copy=False):
         return cls.from_scalars(scalars)
 
+    def __repr__(self):
+        return "{cls}({data})".format(cls=type(self).__name__, data=repr(self._data))
+
     def __getitem__(self, item):
         if pd.api.types.is_scalar(item):
             return self._data.to_pandas()[item]
@@ -162,9 +165,6 @@ def __init__(self, values):
         self._data = values
         self._dtype = ArrowBoolDtype()
 
-    def __repr__(self):
-        return "ArrowBoolArray({})".format(repr(self._data))
-
 
 class ArrowStringArray(ArrowExtensionArray):
     def __init__(self, values):
@@ -174,6 +174,3 @@ def __init__(self, values):
         assert values.type == pa.string()
         self._data = values
         self._dtype = ArrowStringDtype()
-
-    def __repr__(self):
-        return "ArrowStringArray({})".format(repr(self._data))