From 221bd87bc228827dec941e8a895dac5da18b6a5f Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 20 May 2021 19:34:52 -0700
Subject: [PATCH 1/6] tests not passing but i need to rebase again

---
 pandas/core/dtypes/concat.py           | 14 ++++++++++++++
 pandas/core/indexes/category.py        |  8 +++++++-
 pandas/tests/base/test_value_counts.py | 21 ++++++++-------------
 3 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index b0d00775bbed1..5999d7e13b743 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -125,6 +125,20 @@ def is_nonempty(x) -> bool:
     if any_ea:
         # we ignore axis here, as internally concatting with EAs is always
         # for axis=0
+        if any(is_categorical_dtype(x.dtype) for x in to_concat):
+            first = [x for x in to_concat if is_categorical_dtype(x.dtype)][0]
+            from pandas import Index
+            try:
+                codes = np.concatenate([Index(first)._is_dtype_compat(Index(c)).codes for c in to_concat])
+            except TypeError:
+                # not all to_concat elements are among our categories (or NA)
+                pass
+            else:
+                cat = first._from_backing_data(codes)
+                if first.ordered:
+                    cat = cat.as_ordered()
+                return cat
+
         if not single_dtype:
             target_dtype = find_common_type([x.dtype for x in to_concat])
             to_concat = [cast_to_common_type(arr, target_dtype) for arr in to_concat]
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 7339c82cbcc77..52a9244bcafe1 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -548,6 +548,8 @@ def map(self, mapper):
         return Index(mapped, name=self.name)
 
     def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
+        alt = Index._concat(self, to_concat, name=name)  # uses concat_compat
+
         # if calling index is category, don't check dtype of others
         try:
             codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat])
@@ -556,7 +558,11 @@ def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
             from pandas.core.dtypes.concat import concat_compat
 
             res = concat_compat(to_concat)
-            return Index(res, name=name)
+            out = Index(res, name=name)
+            assert out.equals(alt)
+            assert out.dtype == alt.dtype
+            return out
         else:
             cat = self._data._from_backing_data(codes)
+            assert cat.dtype == alt.dtype
             return type(self)._simple_new(cat, name=name)
diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py
index 10f391a49d98f..cc591523dea0a 100644
--- a/pandas/tests/base/test_value_counts.py
+++ b/pandas/tests/base/test_value_counts.py
@@ -43,7 +43,8 @@ def test_value_counts(index_or_series_obj):
 
 
 @pytest.mark.parametrize("null_obj", [np.nan, None])
-def test_value_counts_null(null_obj, index_or_series_obj):
+@pytest.mark.parametrize("dropna", [True, False])
+def test_value_counts_null(null_obj, dropna, index_or_series_obj):
     orig = index_or_series_obj
     obj = orig.copy()
 
@@ -70,20 +71,14 @@ def test_value_counts_null(null_obj, index_or_series_obj):
     expected = Series(dict(counter.most_common()), dtype=np.int64)
     expected.index = expected.index.astype(obj.dtype)
 
-    result = obj.value_counts()
-    if obj.duplicated().any():
-        # TODO:
-        #  Order of entries with the same count is inconsistent on CI (gh-32449)
-        expected = expected.sort_index()
-        result = result.sort_index()
-    tm.assert_series_equal(result, expected)
+    if not dropna:
+        # can't use expected[null_obj] = 3 as
+        # IntervalIndex doesn't allow assignment
+        new_entry = Series({np.nan: 3}, dtype=np.int64)
+        expected = expected.append(new_entry)
 
-    # can't use expected[null_obj] = 3 as
-    # IntervalIndex doesn't allow assignment
-    new_entry = Series({np.nan: 3}, dtype=np.int64)
-    expected = expected.append(new_entry)
+    result = obj.value_counts(dropna=dropna)
 
-    result = obj.value_counts(dropna=False)
     if obj.duplicated().any():
         # TODO:
         #  Order of entries with the same count is inconsistent on CI (gh-32449)

From 1f45dbd28fecbaded6beb5ceb740bb74e419d5c4 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 4 Aug 2021 14:29:23 -0700
Subject: [PATCH 2/6] API: make concat_compat behave like
 CategoricalIndex._concat

---
 pandas/core/dtypes/concat.py                  | 20 ++++++---
 pandas/core/indexes/category.py               | 23 +---------
 .../tests/indexes/categorical/test_append.py  |  5 ++-
 .../reshape/concat/test_append_common.py      | 43 ++++++++++---------
 pandas/tests/reshape/concat/test_empty.py     |  6 +--
 5 files changed, 45 insertions(+), 52 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 5999d7e13b743..542bd8357ef25 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -125,17 +125,27 @@ def is_nonempty(x) -> bool:
     if any_ea:
         # we ignore axis here, as internally concatting with EAs is always
         # for axis=0
-        if any(is_categorical_dtype(x.dtype) for x in to_concat):
-            first = [x for x in to_concat if is_categorical_dtype(x.dtype)][0]
-            from pandas import Index
+        cats = [x for x in to_concat if is_categorical_dtype(x.dtype)]
+        if len(cats):
+            # TODO: Ideally this shouldn't be order-dependent
+            first = cats[0]
+            from pandas import (
+                CategoricalIndex,
+                Index,
+            )
+
+            ci = CategoricalIndex(first)
+
             try:
-                codes = np.concatenate([Index(first)._is_dtype_compat(Index(c)).codes for c in to_concat])
+                codes = np.concatenate(
+                    [ci._is_dtype_compat(Index(c)).codes for c in to_concat]
+                )
             except TypeError:
                 # not all to_concat elements are among our categories (or NA)
                 pass
             else:
                 cat = first._from_backing_data(codes)
-                if first.ordered:
+                if all(x.dtype.ordered for x in cats):
                     cat = cat.as_ordered()
                 return cat
 
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 52a9244bcafe1..6d00a5cd3cc7c 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -278,7 +278,8 @@ def _is_dtype_compat(self, other) -> Categorical:
                     "categories must match existing categories when appending"
                 )
 
-        return other
+        # TODO: this is a lot like the non-coercing constructor
+        return other.astype(self.dtype, copy=False)
 
     def equals(self, other: object) -> bool:
         """
@@ -546,23 +547,3 @@ def map(self, mapper):
         """
         mapped = self._values.map(mapper)
         return Index(mapped, name=self.name)
-
-    def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
-        alt = Index._concat(self, to_concat, name=name)  # uses concat_compat
-
-        # if calling index is category, don't check dtype of others
-        try:
-            codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat])
-        except TypeError:
-            # not all to_concat elements are among our categories (or NA)
-            from pandas.core.dtypes.concat import concat_compat
-
-            res = concat_compat(to_concat)
-            out = Index(res, name=name)
-            assert out.equals(alt)
-            assert out.dtype == alt.dtype
-            return out
-        else:
-            cat = self._data._from_backing_data(codes)
-            assert cat.dtype == alt.dtype
-            return type(self)._simple_new(cat, name=name)
diff --git a/pandas/tests/indexes/categorical/test_append.py b/pandas/tests/indexes/categorical/test_append.py
index b48c3219f5111..2a99f72de947b 100644
--- a/pandas/tests/indexes/categorical/test_append.py
+++ b/pandas/tests/indexes/categorical/test_append.py
@@ -48,9 +48,10 @@ def test_append_non_categories(self, ci):
         tm.assert_index_equal(result, expected, exact=True)
 
     def test_append_object(self, ci):
-        # GH#14298 - if base object is not categorical -> coerce to object
+        # GH#14298 - if base object and all entries are among
+        #  categories -> cast to categorical (GH#41626)
         result = Index(["c", "a"]).append(ci)
-        expected = Index(list("caaabbca"))
+        expected = Index(list("caaabbca"), dtype=ci.dtype)
         tm.assert_index_equal(result, expected, exact=True)
 
     def test_append_to_another(self):
diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py
index b8b254e786194..1bcf2b631e9b8 100644
--- a/pandas/tests/reshape/concat/test_append_common.py
+++ b/pandas/tests/reshape/concat/test_append_common.py
@@ -511,18 +511,18 @@ def test_union_categorical_same_categories_different_order(self):
         tm.assert_series_equal(result, expected)
 
     def test_concat_categorical_coercion(self):
-        # GH 13524
+        # GH 13524, GH#41626
 
-        # category + not-category => not-category
+        # category + not-category (but all-castable/nan) => category
         s1 = Series([1, 2, np.nan], dtype="category")
         s2 = Series([2, 1, 2])
 
-        exp = Series([1, 2, np.nan, 2, 1, 2], dtype="object")
+        exp = Series([1, 2, np.nan, 2, 1, 2], dtype=s1.dtype)
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
 
         # result shouldn't be affected by 1st elem dtype
-        exp = Series([2, 1, 2, 1, 2, np.nan], dtype="object")
+        exp = Series([2, 1, 2, 1, 2, np.nan], dtype=s1.dtype)
         tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
         tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
 
@@ -562,31 +562,31 @@ def test_concat_categorical_coercion(self):
         tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
         tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
 
-        # if normal series only contains NaN-likes => not-category
+        # if normal series only contains NaN-likes => category (GH#41626)
         s1 = Series([10, 11], dtype="category")
         s2 = Series([np.nan, np.nan, np.nan])
 
-        exp = Series([10, 11, np.nan, np.nan, np.nan])
+        exp = Series([10, 11, np.nan, np.nan, np.nan], dtype=s1.dtype)
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
 
-        exp = Series([np.nan, np.nan, np.nan, 10, 11])
+        exp = Series([np.nan, np.nan, np.nan, 10, 11], dtype=s1.dtype)
         tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
         tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
 
     def test_concat_categorical_3elem_coercion(self):
-        # GH 13524
+        # GH 13524, GH#41626
 
-        # mixed dtypes => not-category
+        # mixed dtypes, all castable to our categories => category (GH#41626)
         s1 = Series([1, 2, np.nan], dtype="category")
         s2 = Series([2, 1, 2], dtype="category")
         s3 = Series([1, 2, 1, 2, np.nan])
 
-        exp = Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan], dtype="float")
+        exp = Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan], dtype=s1.dtype)
         tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
         tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp)
 
-        exp = Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2], dtype="float")
+        exp = Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2], dtype=s1.dtype)
         tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp)
 
@@ -654,7 +654,7 @@ def test_concat_categorical_ordered(self):
         tm.assert_series_equal(s1.append([s2, s1], ignore_index=True), exp)
 
     def test_concat_categorical_coercion_nan(self):
-        # GH 13524
+        # GH 13524, GH#41626
 
         # some edge cases
         # category + not-category => not category
@@ -665,18 +665,19 @@ def test_concat_categorical_coercion_nan(self):
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
 
+        # all elements of s2 are nan => category (GH#41626)
         s1 = Series([1, np.nan], dtype="category")
         s2 = Series([np.nan, np.nan])
 
-        exp = Series([1, np.nan, np.nan, np.nan], dtype="float")
+        exp = Series([1, np.nan, np.nan, np.nan], dtype=s1.dtype)
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
 
-        # mixed dtype, all nan-likes => not-category
+        # mixed dtype, all nan-likes => category (GH#41626)
         s1 = Series([np.nan, np.nan], dtype="category")
         s2 = Series([np.nan, np.nan])
 
-        exp = Series([np.nan, np.nan, np.nan, np.nan])
+        exp = Series([np.nan, np.nan, np.nan, np.nan], dtype=s1.dtype)
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
         tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
@@ -692,7 +693,7 @@ def test_concat_categorical_coercion_nan(self):
         tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
 
     def test_concat_categorical_empty(self):
-        # GH 13524
+        # GH 13524, GH#41626
 
         s1 = Series([], dtype="category")
         s2 = Series([1, 2], dtype="category")
@@ -712,11 +713,11 @@ def test_concat_categorical_empty(self):
         s1 = Series([], dtype="category")
         s2 = Series([], dtype="object")
 
-        # different dtype => not-category
-        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
-        tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)
-        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
-        tm.assert_series_equal(s2.append(s1, ignore_index=True), s2)
+        # different dtype, but all castable (bc empty) => category (GH#41626)
+        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s1)
+        tm.assert_series_equal(s1.append(s2, ignore_index=True), s1)
+        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s1)
+        tm.assert_series_equal(s2.append(s1, ignore_index=True), s1)
 
         s1 = Series([], dtype="category")
         s2 = Series([np.nan, np.nan])
diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py
index 63482dbc1502c..966a0883eaa4b 100644
--- a/pandas/tests/reshape/concat/test_empty.py
+++ b/pandas/tests/reshape/concat/test_empty.py
@@ -105,7 +105,7 @@ def test_concat_empty_series_timelike(self, tz, values):
             ("M8[ns]", np.int64, np.object_),
             # categorical
             ("category", "category", "category"),
-            ("category", "object", "object"),
+            ("category", "object", "category"),  # GH#41626
         ],
     )
     def test_concat_empty_series_dtypes(self, left, right, expected):
@@ -177,12 +177,12 @@ def test_concat_empty_series_dtypes_triple(self):
         )
 
     def test_concat_empty_series_dtype_category_with_array(self):
-        # GH#18515
+        # GH#18515, GH#41626
         assert (
             concat(
                 [Series(np.array([]), dtype="category"), Series(dtype="float64")]
             ).dtype
-            == "float64"
+            == "category"
         )
 
     def test_concat_empty_series_dtypes_sparse(self):

From abdd2a8c0a85594a00997654c72ce0c3bd8d6482 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 26 Nov 2021 12:57:01 -0800
Subject: [PATCH 3/6] avoid FutureWarning

---
 pandas/core/dtypes/concat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 39b462021557c..c58ec320dd40e 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -134,7 +134,7 @@ def is_nonempty(x) -> bool:
 
             try:
                 codes = np.concatenate(
-                    [ci._is_dtype_compat(Index(c)).codes for c in to_concat]
+                    [ci._is_dtype_compat(Index._with_infer(c)).codes for c in to_concat]
                 )
             except TypeError:
                 # not all to_concat elements are among our categories (or NA)

From 9164016475bf959f7c4f2f372349f3389c19b673 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 6 Jan 2022 11:50:39 -0800
Subject: [PATCH 4/6] avoid append

---
 pandas/tests/reshape/concat/test_append_common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py
index 25f5e1a309510..6a192f821c47f 100644
--- a/pandas/tests/reshape/concat/test_append_common.py
+++ b/pandas/tests/reshape/concat/test_append_common.py
@@ -727,9 +727,9 @@ def test_concat_categorical_empty(self):
 
         # different dtype, but all castable (bc empty) => category (GH#41626)
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s1)
-        tm.assert_series_equal(s1.append(s2, ignore_index=True), s1)
+        tm.assert_series_equal(s1._append(s2, ignore_index=True), s1)
         tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s1)
-        tm.assert_series_equal(s2.append(s1, ignore_index=True), s1)
+        tm.assert_series_equal(s2._append(s1, ignore_index=True), s1)
 
         s1 = Series([], dtype="category")
         s2 = Series([np.nan, np.nan])

From 7ba8d2d2954ba710d81013041a33bf6c8ae79581 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 6 Jan 2022 11:56:08 -0800
Subject: [PATCH 5/6] catch warnings

---
 pandas/tests/arrays/sparse/test_combine_concat.py | 11 +++++++++--
 pandas/tests/extension/test_sparse.py             |  4 +++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/arrays/sparse/test_combine_concat.py b/pandas/tests/arrays/sparse/test_combine_concat.py
index 0f09af269148b..f18aab416a2ea 100644
--- a/pandas/tests/arrays/sparse/test_combine_concat.py
+++ b/pandas/tests/arrays/sparse/test_combine_concat.py
@@ -53,10 +53,17 @@ def test_concat_with_non_sparse(other, expected_dtype):
     # https://github.com/pandas-dev/pandas/issues/34336
     s_sparse = pd.Series([1, 0, 2], dtype=pd.SparseDtype("int64", 0))
 
-    result = pd.concat([s_sparse, other], ignore_index=True)
+    msg = "passing a SparseArray to pd.Index"
+    warn = FutureWarning
+    if isinstance(expected_dtype, pd.SparseDtype):
+        warn = None
+
+    with tm.assert_produces_warning(warn, match=msg):
+        result = pd.concat([s_sparse, other], ignore_index=True)
     expected = pd.Series(list(s_sparse) + list(other)).astype(expected_dtype)
     tm.assert_series_equal(result, expected)
 
-    result = pd.concat([other, s_sparse], ignore_index=True)
+    with tm.assert_produces_warning(warn, match=msg):
+        result = pd.concat([other, s_sparse], ignore_index=True)
     expected = pd.Series(list(other) + list(s_sparse)).astype(expected_dtype)
     tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
index 3a37ea4d673af..5948db9ab5434 100644
--- a/pandas/tests/extension/test_sparse.py
+++ b/pandas/tests/extension/test_sparse.py
@@ -139,7 +139,9 @@ def test_concat_mixed_dtypes(self, data):
         dfs = [df1, df2, df3]
 
         # dataframes
-        result = pd.concat(dfs)
+        msg = "passing a SparseArray to pd.Index"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = pd.concat(dfs)
         expected = pd.concat(
             [x.apply(lambda s: np.asarray(s).astype(object)) for x in dfs]
         )

From 75ab0a0717dee79f4aa87c514929912087415a94 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 6 Jan 2022 19:56:16 -0800
Subject: [PATCH 6/6] ArrayManager compat

---
 pandas/tests/extension/test_sparse.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
index 5948db9ab5434..1fe046488eef5 100644
--- a/pandas/tests/extension/test_sparse.py
+++ b/pandas/tests/extension/test_sparse.py
@@ -130,7 +130,7 @@ class TestConstructors(BaseSparseTests, base.BaseConstructorsTests):
 
 
 class TestReshaping(BaseSparseTests, base.BaseReshapingTests):
-    def test_concat_mixed_dtypes(self, data):
+    def test_concat_mixed_dtypes(self, data, using_array_manager):
         # https://github.com/pandas-dev/pandas/issues/20762
         # This should be the same, aside from concat([sparse, float])
         df1 = pd.DataFrame({"A": data[:3]})
@@ -140,7 +140,8 @@ def test_concat_mixed_dtypes(self, data):
 
         # dataframes
         msg = "passing a SparseArray to pd.Index"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
+        warn = None if using_array_manager else FutureWarning
+        with tm.assert_produces_warning(warn, match=msg):
             result = pd.concat(dfs)
         expected = pd.concat(
             [x.apply(lambda s: np.asarray(s).astype(object)) for x in dfs]