From 381b07309a8c6aa60d3862747302e4810b66c05f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 22 Oct 2018 15:35:29 -0500
Subject: [PATCH 1/8] Preserve EA dtype in DataFrame.stack

---
 doc/source/whatsnew/v0.24.0.txt          |  2 ++
 pandas/core/reshape/reshape.py           |  9 ++++++++-
 pandas/tests/extension/base/reshaping.py |  8 ++++++++
 pandas/tests/frame/test_reshape.py       | 11 +++++++++++
 4 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index d0aa156cf5059..5aa5088d5028b 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -724,6 +724,8 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your
 - Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`)
 - :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`).
 - Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`)
+- :meth:`DataFrame.stack` no longer converts to object dtype for DataFrames where each column has the same extension dtype. The output Series will have the same dtype as the columns (:issue:`23077`).
+
 
 .. _whatsnew_0240.api.incompatibilities:
 
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 03b77f0e787f0..d4fc3ed66bce6 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -470,8 +470,15 @@ def factorize(index):
         if is_extension_array_dtype(dtype):
             arr = dtype.construct_array_type()
             new_values = arr._concat_same_type([
-                col for _, col in frame.iteritems()
+                col._values for _, col in frame.iteritems()
             ])
+            # final take to get the order correct.
+            # idx is an indexer like
+            # [c0r0, c1r0, c2r0, ...,
+            #  c0r1, c1r1, c241, ...]
+            idx = np.arange(N * K).reshape(K, N).T.ravel()
+            new_values = new_values.take(idx)
+
         else:
             # homogeneous, non-EA
             new_values = frame.values.ravel()
diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
index 7f13c2cd67373..d985bd6785715 100644
--- a/pandas/tests/extension/base/reshaping.py
+++ b/pandas/tests/extension/base/reshaping.py
@@ -170,3 +170,11 @@ def test_merge(self, data, na_value):
                  [data[0], data[0], data[1], data[2], na_value],
                  dtype=data.dtype)})
         self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
+
+    def test_stack(self, data):
+        df = pd.DataFrame({"A": data[:5], "B": data[:5]})
+        result = df.stack()
+        assert result.dtype == df.A.dtype
+        result = result.astype(object)
+        expected = df.astype(object).stack()
+        self.assert_series_equal(result, expected)
diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py
index 9f6735c7ba2bf..4ddd3141b5f8b 100644
--- a/pandas/tests/frame/test_reshape.py
+++ b/pandas/tests/frame/test_reshape.py
@@ -872,6 +872,17 @@ def test_stack_preserve_categorical_dtype(self, ordered, labels):
 
         tm.assert_series_equal(result, expected)
 
+    def test_stack_preserve_categorical_dtype_values(self):
+        # GH-23077
+        cat = pd.Categorical(['a', 'a', 'b', 'c'])
+        df = pd.DataFrame({"A": cat, "B": cat})
+        result = df.stack()
+        index = pd.MultiIndex.from_product([[0, 1, 2, 3], ['A', 'B']])
+        expected = pd.Series(pd.Categorical(['a', 'a', 'a', 'a',
+                                             'b', 'b', 'c', 'c']),
+                             index=index)
+        tm.assert_series_equal(result, expected)
+
     @pytest.mark.parametrize("level", [0, 'baz'])
     def test_unstack_swaplevel_sortlevel(self, level):
         # GH 20994

From 428f2308b67324aeda125be085bb6387b77037fd Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 23 Oct 2018 11:29:34 -0500
Subject: [PATCH 2/8] sparse

---
 pandas/core/internals/blocks.py         |  5 ++++-
 pandas/tests/sparse/frame/test_frame.py | 10 ++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 6fb1184f48b69..60200d7c3cdfa 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -36,6 +36,7 @@
     is_list_like,
     is_re,
     is_re_compilable,
+    is_sparse,
     pandas_dtype)
 from pandas.core.dtypes.cast import (
     maybe_downcast_to_dtype,
@@ -633,7 +634,9 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
             return self
 
         if klass is None:
-            if dtype == np.object_:
+            # sparse is "special" and preserves sparsity.
+            # We're changing this in GH-23125
+            if dtype == np.object_ and is_sparse(values):
                 klass = ObjectBlock
             elif is_extension_array_dtype(dtype):
                 klass = ExtensionBlock
diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py
index 03143488c3874..10074a2e5ad99 100644
--- a/pandas/tests/sparse/frame/test_frame.py
+++ b/pandas/tests/sparse/frame/test_frame.py
@@ -736,6 +736,16 @@ def test_astype_bool(self):
         assert res['A'].dtype == SparseDtype(np.bool)
         assert res['B'].dtype == SparseDtype(np.bool)
 
+    def test_astype_object(self):
+        # This may change in GH-23125
+        df = pd.DataFrame({"A": SparseArray([0, 1]),
+                           "B": SparseArray([0, 1])})
+        result = df.astype(object)
+        dtype = SparseDtype(object, 0)
+        expected = pd.DataFrame({"A": SparseArray([0, 1], dtype=dtype),
+                                 "B": SparseArray([0, 1], dtype=dtype)})
+        tm.assert_frame_equal(result, expected)
+
     def test_fillna(self, float_frame_fill0, float_frame_fill0_dense):
         df = float_frame_fill0.reindex(lrange(5))
         dense = float_frame_fill0_dense.reindex(lrange(5))

From 0d39be0ec5028c058164ac5f7f162cb8f2b4f793 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 24 Oct 2018 06:34:15 -0500
Subject: [PATCH 3/8] multi test

---
 pandas/tests/extension/base/reshaping.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
index d985bd6785715..113daeb0d5359 100644
--- a/pandas/tests/extension/base/reshaping.py
+++ b/pandas/tests/extension/base/reshaping.py
@@ -171,10 +171,21 @@ def test_merge(self, data, na_value):
                  dtype=data.dtype)})
         self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
 
-    def test_stack(self, data):
+    @pytest.mark.parametrize("columns", [
+        ["A", "B"],
+        pd.MultiIndex.from_tuples([('A', 'a'), ('A', 'b')],
+                                  names=['outer', 'inner']),
+    ])
+    def test_stack(self, data, columns):
         df = pd.DataFrame({"A": data[:5], "B": data[:5]})
+        df.columns = columns
         result = df.stack()
-        assert result.dtype == df.A.dtype
-        result = result.astype(object)
         expected = df.astype(object).stack()
-        self.assert_series_equal(result, expected)
+
+        if isinstance(expected, pd.Series):
+            assert result.dtype == df.iloc[:, 0].dtype
+        else:
+            assert all(result.dtypes == df.iloc[:, 0].dtype)
+
+        result = result.astype(object)
+        self.assert_equal(result, expected)

From 7e9224a9b56876fd5e9024fabd6102b8b71bd4f8 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 24 Oct 2018 15:27:52 -0500
Subject: [PATCH 4/8] multiple columns

---
 pandas/core/internals/blocks.py          |  7 ++++---
 pandas/core/reshape/reshape.py           | 25 +++++++++++++++++++++---
 pandas/tests/extension/json/test_json.py |  9 ++++++++-
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index c346252666173..18ec046d0e614 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -633,9 +633,10 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
             return self
 
         if klass is None:
-            # sparse is "special" and preserves sparsity.
-            # We're changing this in GH-23125
-            if dtype == np.object_ and is_sparse(values):
+            if is_sparse(self.values):
+                # Series[Sparse].astype(object) is sparse.
+                klass = ExtensionBlock
+            elif is_object_dtype(dtype):
                 klass = ObjectBlock
             elif is_extension_array_dtype(dtype):
                 klass = ExtensionBlock
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index d4fc3ed66bce6..496a946b0bcc3 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -475,7 +475,7 @@ def factorize(index):
             # final take to get the order correct.
             # idx is an indexer like
             # [c0r0, c1r0, c2r0, ...,
-            #  c0r1, c1r1, c241, ...]
+            #  c0r1, c1r1, c2r1, ...]
             idx = np.arange(N * K).reshape(K, N).T.ravel()
             new_values = new_values.take(idx)
 
@@ -603,20 +603,39 @@ def _convert_level_number(level_num, columns):
         # indexer
         if not isinstance(loc, slice):
             slice_len = len(loc)
+            locs = list(loc)
         else:
             slice_len = loc.stop - loc.start
+            locs = list(range(loc.start, loc.stop))
 
         if slice_len != levsize:
             chunk = this.loc[:, this.columns[loc]]
             chunk.columns = level_vals.take(chunk.columns.labels[-1])
             value_slice = chunk.reindex(columns=level_vals_used).values
         else:
-            if frame._is_mixed_type:
+            if (frame._is_homogeneous_type and
+                    is_extension_array_dtype(frame.dtypes.iloc[0])):
+                import pdb; pdb.set_trace()
+                dtype = this.loc[:, this.columns[loc]].dtypes.iloc[0]
+                subset = this.loc[:, this.columns[loc]]
+
+                value_slice = dtype.construct_array_type()._concat_same_type(
+                    [x._values for _, x in subset.iteritems()]
+                )
+                N, K = this.shape
+                idx = np.arange(N * K).reshape(K, N).T.ravel()
+                value_slice = value_slice.take(idx)
+
+            elif frame._is_mixed_type:
                 value_slice = this.loc[:, this.columns[loc]].values
             else:
                 value_slice = this.values[:, loc]
 
-        new_data[key] = value_slice.ravel()
+        if value_slice.ndim > 1:
+            # i.e. not extension
+            value_slice = value_slice.ravel()
+
+        new_data[key] = value_slice
 
     if len(drop_cols) > 0:
         new_columns = new_columns.difference(drop_cols)
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index 15d99f6c5d2fc..15aebf3e17ecb 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -138,7 +138,14 @@ def test_from_dtype(self, data):
 
 
 class TestReshaping(BaseJSON, base.BaseReshapingTests):
-    pass
+
+    @pytest.mark.skip(reason="Different definitions of NA")
+    def test_stack(self):
+        """
+        The test does .astype(object).stack(). If we happen to have
+        any missing values in `data`, then we'll end up with different
+        rows since we consider `{}` NA, but `.astype(object)` doesn't.
+        """
 
 
 class TestGetitem(BaseJSON, base.BaseGetitemTests):

From d6661cb6490916b808b2f9c0ac650bec56ad8cf7 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 24 Oct 2018 16:12:29 -0500
Subject: [PATCH 5/8] remove pdb

---
 pandas/core/reshape/reshape.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 496a946b0bcc3..6dbf7765b68d5 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -603,10 +603,8 @@ def _convert_level_number(level_num, columns):
         # indexer
         if not isinstance(loc, slice):
             slice_len = len(loc)
-            locs = list(loc)
         else:
             slice_len = loc.stop - loc.start
-            locs = list(range(loc.start, loc.stop))
 
         if slice_len != levsize:
             chunk = this.loc[:, this.columns[loc]]
@@ -615,7 +613,6 @@ def _convert_level_number(level_num, columns):
         else:
             if (frame._is_homogeneous_type and
                     is_extension_array_dtype(frame.dtypes.iloc[0])):
-                import pdb; pdb.set_trace()
                 dtype = this.loc[:, this.columns[loc]].dtypes.iloc[0]
                 subset = this.loc[:, this.columns[loc]]
 

From 144d11748421c9a0d346696735180751014b2c93 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 5 Nov 2018 10:00:57 -0600
Subject: [PATCH 6/8] really object

---
 pandas/tests/extension/base/reshaping.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
index 32645f1da9397..a96dc9977a413 100644
--- a/pandas/tests/extension/base/reshaping.py
+++ b/pandas/tests/extension/base/reshaping.py
@@ -181,6 +181,9 @@ def test_stack(self, data, columns):
         df.columns = columns
         result = df.stack()
         expected = df.astype(object).stack()
+        # we need a second astype(object), in case the constructor inferred
+        # object -> specialized, as is done for period.
+        expected = expected.astype(object)
 
         if isinstance(expected, pd.Series):
             assert result.dtype == df.iloc[:, 0].dtype

From 98f75c917130db8810ac3a46f39a2caa686ec1b2 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 5 Nov 2018 10:01:49 -0600
Subject: [PATCH 7/8] remove loc

---
 pandas/core/reshape/reshape.py | 51 ++++++++++++++++++++++++++--------
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index a375b7e7f4529..c3add07fbb831 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -467,13 +467,7 @@ def factorize(index):
             new_values = arr._concat_same_type([
                 col._values for _, col in frame.iteritems()
             ])
-            # final take to get the order correct.
-            # idx is an indexer like
-            # [c0r0, c1r0, c2r0, ...,
-            #  c0r1, c1r1, c2r1, ...]
-            idx = np.arange(N * K).reshape(K, N).T.ravel()
-            new_values = new_values.take(idx)
-
+            new_values = _reorder_for_extension_array_stack(new_values, N, K)
         else:
             # homogeneous, non-EA
             new_values = frame.values.ravel()
@@ -602,14 +596,14 @@ def _convert_level_number(level_num, columns):
             slice_len = loc.stop - loc.start
 
         if slice_len != levsize:
-            chunk = this.loc[:, this.columns[loc]]
+            chunk = this[this.columns[loc]]
             chunk.columns = level_vals.take(chunk.columns.labels[-1])
             value_slice = chunk.reindex(columns=level_vals_used).values
         else:
             if (frame._is_homogeneous_type and
                     is_extension_array_dtype(frame.dtypes.iloc[0])):
-                dtype = this.loc[:, this.columns[loc]].dtypes.iloc[0]
-                subset = this.loc[:, this.columns[loc]]
+                dtype = this[this.columns[loc]].dtypes.iloc[0]
+                subset = this[this.columns[loc]]
 
                 value_slice = dtype.construct_array_type()._concat_same_type(
                     [x._values for _, x in subset.iteritems()]
@@ -619,7 +613,7 @@ def _convert_level_number(level_num, columns):
                 value_slice = value_slice.take(idx)
 
             elif frame._is_mixed_type:
-                value_slice = this.loc[:, this.columns[loc]].values
+                value_slice = this[this.columns[loc]].values
             else:
                 value_slice = this.values[:, loc]
 
@@ -965,3 +959,38 @@ def make_axis_dummies(frame, axis='minor', transform=None):
     values = values.take(labels, axis=0)
 
     return DataFrame(values, columns=items, index=frame.index)
+
+
+def _reorder_for_extension_array_stack(arr, n_rows, n_columns):
+    """
+    Re-orders the values when stacking multiple extension-arrays.
+
+    The indirect stacking method used for EAs requires a followup
+    take to get the order correct.
+
+    Parameters
+    ----------
+    arr : ExtensionArray
+    n_rows, n_columns : int
+        The number of rows and columns in the original DataFrame.
+
+    Returns
+    -------
+    taken : ExtensionArray
+        The original `arr` with elements re-ordered appropriately
+
+    Examples
+    --------
+    >>> arr = np.array(['a', 'b', 'c', 'd', 'e', 'f'])
+    >>> _reorder_for_extension_array_stack(arr, 2, 3)
+    array(['a', 'c', 'e', 'b', 'd', 'f'], dtype='<U1')
+
+    >>> _reorder_for_extension_array_stack(arr, 3, 2)
+    array(['a', 'd', 'b', 'e', 'c', 'f'], dtype='<U1')
+    """
+    # final take to get the order correct.
+    # idx is an indexer like
+    # [c0r0, c1r0, c2r0, ...,
+    #  c0r1, c1r1, c2r1, ...]
+    idx = np.arange(n_rows * n_columns).reshape(n_columns, n_rows).T.ravel()
+    return arr.take(idx)

From f6aeafae074ef38843014c55f06caae558e7e3b6 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Nov 2018 06:26:13 -0600
Subject: [PATCH 8/8] lint

---
 pandas/core/internals/blocks.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 1b05b9cf02827..1f2a1ee52159e 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -35,7 +35,6 @@
     is_numeric_v_string_like, is_extension_type,
     is_extension_array_dtype,
     is_list_like,
-    is_sparse,
     is_re,
     is_re_compilable,
     is_sparse,