From 4487bec4c98bdfc2ac79dcc85e2c36552c59431e Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Tue, 5 May 2020 22:13:48 +0200
Subject: [PATCH 01/60] Add fix to raise error when category value is not
 predefined

---
 pandas/core/generic.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index b550857252466..b9b407ea6e495 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -70,6 +70,7 @@
     is_float,
     is_list_like,
     is_number,
+    is_categorical_dtype,
     is_numeric_dtype,
     is_object_dtype,
     is_re_compilable,
@@ -5226,6 +5227,19 @@ def __setattr__(self, name: str, value) -> None:
         After regular attribute access, try setting the name
         This allows simpler access to columns for interactive use.
         """
+        # Fix for handling assignment to a Catagorical dtype
+        # with a category which is not predefined.
+        if isinstance(value, BlockManager) and is_categorical_dtype(self):
+            if len(value) > 0:
+                new_value = value.as_array()[-1]
+            else:
+                new_value = None
+            if not pd.isna(new_value) and new_value not in self.dtype.categories.values:
+                raise ValueError(
+                    "Cannot setitem on a Categorical with a new "
+                    "category, set the categories first"
+                )
+
         # first try regular attribute access via __getattribute__, so that
         # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify
         # the same attribute.

From 10098ab2d464882eda2ac4e9289a5af5cebc1963 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Tue, 5 May 2020 23:02:51 +0200
Subject: [PATCH 02/60] Fix linting

---
 pandas/core/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index b9b407ea6e495..d76f371563bff 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -63,6 +63,7 @@
     ensure_str,
     is_bool,
     is_bool_dtype,
+    is_categorical_dtype,
     is_datetime64_any_dtype,
     is_datetime64tz_dtype,
     is_dict_like,
@@ -70,7 +71,6 @@
     is_float,
     is_list_like,
     is_number,
-    is_categorical_dtype,
     is_numeric_dtype,
     is_object_dtype,
     is_re_compilable,

From cb34580a94f4b97313c0adb05925a432ef6db014 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Tue, 5 May 2020 23:25:05 +0200
Subject: [PATCH 03/60] Added new test

---
 pandas/tests/arrays/categorical/test_indexing.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py
index abfae189bb4d7..90b73104153da 100644
--- a/pandas/tests/arrays/categorical/test_indexing.py
+++ b/pandas/tests/arrays/categorical/test_indexing.py
@@ -234,6 +234,12 @@ def test_where_ordered_differs_rasies(self):
         with pytest.raises(ValueError, match="without identical categories"):
             ser.where([True, False, True], other)
 
+    def test_loc_new_category_raises(self):
+        ser = pd.Series(Categorical(["a", "b", "c"]))
+        msg = "Cannot setitem on a Categorical with a new category"
+        with pytest.raises(ValueError, match=msg):
+            ser.loc[3] = "d"
+
 
 @pytest.mark.parametrize("index", [True, False])
 def test_mask_with_boolean(index):

From c627fa69ae9231f0355d3a40f31d993e6449bd1b Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sat, 16 May 2020 18:06:47 +0200
Subject: [PATCH 04/60] Add test case for unused categories

---
 pandas/tests/arrays/categorical/test_indexing.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py
index 90b73104153da..5282e424a6103 100644
--- a/pandas/tests/arrays/categorical/test_indexing.py
+++ b/pandas/tests/arrays/categorical/test_indexing.py
@@ -240,6 +240,15 @@ def test_loc_new_category_raises(self):
         with pytest.raises(ValueError, match=msg):
             ser.loc[3] = "d"
 
+    def test_unused_category_retention(self):
+        # Init case
+        exp_cats = Index(["a", "b", "c", "d"])
+        cat1 = Series(Categorical(["a", "b", "c"], categories=exp_cats))
+        tm.assert_index_equal(cat1.cat.categories, exp_cats)
+
+        # Modify case
+        cat1.loc[0] = "b"
+        tm.assert_index_equal(cat1.cat.categories, exp_cats)        
 
 @pytest.mark.parametrize("index", [True, False])
 def test_mask_with_boolean(index):

From ba3a75107639fe9344e4e583ea8e1a8f84921bd0 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sat, 16 May 2020 18:08:03 +0200
Subject: [PATCH 05/60] Remove trailing whitespace

---
 pandas/tests/arrays/categorical/test_indexing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py
index 5282e424a6103..d2acd39f3d089 100644
--- a/pandas/tests/arrays/categorical/test_indexing.py
+++ b/pandas/tests/arrays/categorical/test_indexing.py
@@ -248,7 +248,7 @@ def test_unused_category_retention(self):
 
         # Modify case
         cat1.loc[0] = "b"
-        tm.assert_index_equal(cat1.cat.categories, exp_cats)        
+        tm.assert_index_equal(cat1.cat.categories, exp_cats)
 
 @pytest.mark.parametrize("index", [True, False])
 def test_mask_with_boolean(index):

From 51dcdfedb10bb1f19a2c49b290ac484283128ae8 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sat, 16 May 2020 22:01:23 +0200
Subject: [PATCH 06/60] Fix linting

---
 pandas/tests/arrays/categorical/test_indexing.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py
index d2acd39f3d089..dd2302b2bca59 100644
--- a/pandas/tests/arrays/categorical/test_indexing.py
+++ b/pandas/tests/arrays/categorical/test_indexing.py
@@ -249,6 +249,7 @@ def test_unused_category_retention(self):
         # Modify case
         cat1.loc[0] = "b"
         tm.assert_index_equal(cat1.cat.categories, exp_cats)
+        
 
 @pytest.mark.parametrize("index", [True, False])
 def test_mask_with_boolean(index):

From 9057b262008862c65cdb8181f7bac8db9f74a1b9 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sat, 16 May 2020 22:24:23 +0200
Subject: [PATCH 07/60] Fix linting

---
 pandas/tests/arrays/categorical/test_indexing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py
index dd2302b2bca59..7ed85da4c8502 100644
--- a/pandas/tests/arrays/categorical/test_indexing.py
+++ b/pandas/tests/arrays/categorical/test_indexing.py
@@ -249,7 +249,7 @@ def test_unused_category_retention(self):
         # Modify case
         cat1.loc[0] = "b"
         tm.assert_index_equal(cat1.cat.categories, exp_cats)
-        
+
 
 @pytest.mark.parametrize("index", [True, False])
 def test_mask_with_boolean(index):

From 06fdc3ea4ca95983310ee69c8d415506b6dd99e3 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sat, 23 May 2020 21:02:54 +0200
Subject: [PATCH 08/60] Remove temporary fix from generic.py

---
 pandas/core/generic.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 3e9f87eb5e0a7..c3e6b1e989555 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -5229,19 +5229,6 @@ def __setattr__(self, name: str, value) -> None:
         After regular attribute access, try setting the name
         This allows simpler access to columns for interactive use.
         """
-        # Fix for handling assignment to a Catagorical dtype
-        # with a category which is not predefined.
-        if isinstance(value, BlockManager) and is_categorical_dtype(self):
-            if len(value) > 0:
-                new_value = value.as_array()[-1]
-            else:
-                new_value = None
-            if not pd.isna(new_value) and new_value not in self.dtype.categories.values:
-                raise ValueError(
-                    "Cannot setitem on a Categorical with a new "
-                    "category, set the categories first"
-                )
-
         # first try regular attribute access via __getattribute__, so that
         # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify
         # the same attribute.

From 582c02344e1085af4d886e90523c05596fc05a9b Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 24 May 2020 18:50:12 +0200
Subject: [PATCH 09/60] First fix try through indexing.py

---
 pandas/core/indexing.py | 39 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index b857a59195695..f898bb11a306b 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -20,6 +20,8 @@
 from pandas.core.dtypes.concat import concat_compat
 from pandas.core.dtypes.generic import ABCDataFrame, ABCMultiIndex, ABCSeries
 from pandas.core.dtypes.missing import _infer_fill_value, isna
+from pandas.core.dtypes.cast import find_common_type
+from pandas.core.dtypes.common import is_categorical_dtype
 
 import pandas.core.common as com
 from pandas.core.indexers import (
@@ -1790,7 +1792,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
         """
         Insert new row(s) or column(s) into the Series or DataFrame.
         """
-        from pandas import Series
+        from pandas import Series, DataFrame
 
         # reindex the axis to the new value
         # and set inplace
@@ -1815,8 +1817,21 @@ def _setitem_with_indexer_missing(self, indexer, value):
                 # GH#22717 handle casting compatibility that np.concatenate
                 #  does incorrectly
                 new_values = concat_compat([self.obj._values, new_values])
+                if is_object_dtype(new_values.dtype):
+                    dtype = self.obj.dtype
+                else:
+                    dtype = find_common_type([self.obj.dtype, new_values.dtype])
+            else:
+                dtype = None
+
+            if is_categorical_dtype(self.obj.dtype):
+                if (~np.in1d(new_values, self.obj.dtypes.categories.values)).any():
+                    raise ValueError(
+                        "Cannot setitem on a Categorical with a new category"
+                    )
+
             self.obj._mgr = self.obj._constructor(
-                new_values, index=new_index, name=self.obj.name
+                new_values, index=new_index, name=self.obj.name, dtype=dtype
             )._mgr
             self.obj._maybe_update_cacher(clear=True)
 
@@ -1838,7 +1853,25 @@ def _setitem_with_indexer_missing(self, indexer, value):
                     if len(value) != len(self.obj.columns):
                         raise ValueError("cannot set a row with mismatched columns")
 
-                value = Series(value, index=self.obj.columns, name=indexer)
+                if len(set(self.obj.dtypes)) > 1:
+                    value = list(value)
+                    for i in range(len(self.obj.columns)):
+                        value[i] = Series(data=[value[i]], dtype=self.obj.dtypes[i])
+                        if is_categorical_dtype(self.obj.dtypes[i]):
+                            if (
+                                ~np.in1d(
+                                    value[i].values,
+                                    self.obj.dtypes[i].categories.values,
+                                )
+                            ).any():
+                                raise ValueError(
+                                    "Cannot setitem on a Categorical with a new category"
+                                )
+                    value = dict(zip(self.obj.columns, value))
+                    value = DataFrame(value)
+                    value.index = [indexer]
+                else:
+                    value = Series(value, index=self.obj.columns, name=indexer)
 
             self.obj._mgr = self.obj.append(value)._mgr
             self.obj._maybe_update_cacher(clear=True)

From 730fc2b1a730ca4d4a10948f54c47c426848b6fd Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 24 May 2020 19:33:50 +0200
Subject: [PATCH 10/60] Fix lint

---
 pandas/core/generic.py  |  1 -
 pandas/core/indexing.py | 24 +++++++++---------------
 2 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 98c9924a932ac..8aa8f8bb60654 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -65,7 +65,6 @@
     ensure_str,
     is_bool,
     is_bool_dtype,
-    is_categorical_dtype,
     is_datetime64_any_dtype,
     is_datetime64tz_dtype,
     is_dict_like,
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index f898bb11a306b..18b401021605b 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1794,6 +1794,13 @@ def _setitem_with_indexer_missing(self, indexer, value):
         """
         from pandas import Series, DataFrame
 
+        def check_valid_categorical(new_values, obj_dtype):
+            if is_categorical_dtype(obj_dtype):
+                if (~np.in1d(new_values, obj_dtype.categories.values)).any():
+                    raise ValueError(
+                        "Cannot setitem on a Categorical with a new category"
+                    )
+
         # reindex the axis to the new value
         # and set inplace
         if self.ndim == 1:
@@ -1824,11 +1831,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
             else:
                 dtype = None
 
-            if is_categorical_dtype(self.obj.dtype):
-                if (~np.in1d(new_values, self.obj.dtypes.categories.values)).any():
-                    raise ValueError(
-                        "Cannot setitem on a Categorical with a new category"
-                    )
+            check_valid_categorical(new_values, self.obj.dtype)
 
             self.obj._mgr = self.obj._constructor(
                 new_values, index=new_index, name=self.obj.name, dtype=dtype
@@ -1857,16 +1860,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
                     value = list(value)
                     for i in range(len(self.obj.columns)):
                         value[i] = Series(data=[value[i]], dtype=self.obj.dtypes[i])
-                        if is_categorical_dtype(self.obj.dtypes[i]):
-                            if (
-                                ~np.in1d(
-                                    value[i].values,
-                                    self.obj.dtypes[i].categories.values,
-                                )
-                            ).any():
-                                raise ValueError(
-                                    "Cannot setitem on a Categorical with a new category"
-                                )
+                        check_valid_categorical(value[i], self.obj.dtypes[i])
                     value = dict(zip(self.obj.columns, value))
                     value = DataFrame(value)
                     value.index = [indexer]

From c275eb95f0b6d3a2917679a2d1d675f62c0f10ce Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 24 May 2020 19:59:09 +0200
Subject: [PATCH 11/60] Fix import ordering

---
 pandas/core/indexing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 18b401021605b..175ae26eb38b9 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -7,7 +7,9 @@
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import doc
 
+from pandas.core.dtypes.cast import find_common_type
 from pandas.core.dtypes.common import (
+    is_categorical_dtype,
     is_hashable,
     is_integer,
     is_iterator,
@@ -20,8 +22,6 @@
 from pandas.core.dtypes.concat import concat_compat
 from pandas.core.dtypes.generic import ABCDataFrame, ABCMultiIndex, ABCSeries
 from pandas.core.dtypes.missing import _infer_fill_value, isna
-from pandas.core.dtypes.cast import find_common_type
-from pandas.core.dtypes.common import is_categorical_dtype
 
 import pandas.core.common as com
 from pandas.core.indexers import (

From 944ae24dbe53905dafd1de5e4b5fd2d452fc2a12 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 24 May 2020 22:13:00 +0200
Subject: [PATCH 12/60] Fix Update

---
 pandas/core/indexing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 175ae26eb38b9..dab4c470c6ec9 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1825,14 +1825,14 @@ def check_valid_categorical(new_values, obj_dtype):
                 #  does incorrectly
                 new_values = concat_compat([self.obj._values, new_values])
                 if is_object_dtype(new_values.dtype):
-                    dtype = self.obj.dtype
+                    dtype = None
                 else:
                     dtype = find_common_type([self.obj.dtype, new_values.dtype])
             else:
                 dtype = None
 
             check_valid_categorical(new_values, self.obj.dtype)
-
+             
             self.obj._mgr = self.obj._constructor(
                 new_values, index=new_index, name=self.obj.name, dtype=dtype
             )._mgr

From 8372bdbeff5ec238653ce6887b348f2fcc998aa0 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 24 May 2020 22:38:56 +0200
Subject: [PATCH 13/60] Fix lint

---
 pandas/core/indexing.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index dab4c470c6ec9..b64926f635fae 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1832,7 +1832,6 @@ def check_valid_categorical(new_values, obj_dtype):
                 dtype = None
 
             check_valid_categorical(new_values, self.obj.dtype)
-             
             self.obj._mgr = self.obj._constructor(
                 new_values, index=new_index, name=self.obj.name, dtype=dtype
             )._mgr

From 0e5e41838b412c1bb18b2d7fbc9eba6e80850e5d Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 24 May 2020 23:16:55 +0200
Subject: [PATCH 14/60] Include more related test cases

---
 .../tests/arrays/categorical/test_indexing.py | 23 ++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py
index 7ed85da4c8502..9e74cdc29da74 100644
--- a/pandas/tests/arrays/categorical/test_indexing.py
+++ b/pandas/tests/arrays/categorical/test_indexing.py
@@ -3,6 +3,8 @@
 
 import pandas as pd
 from pandas import Categorical, CategoricalIndex, Index, PeriodIndex, Series
+from pandas.core.dtypes.common import is_categorical_dtype
+
 import pandas._testing as tm
 import pandas.core.common as com
 from pandas.tests.arrays.categorical.common import TestCategorical
@@ -234,7 +236,7 @@ def test_where_ordered_differs_rasies(self):
         with pytest.raises(ValueError, match="without identical categories"):
             ser.where([True, False, True], other)
 
-    def test_loc_new_category_raises(self):
+    def test_loc_new_category_series_raises(self):
         ser = pd.Series(Categorical(["a", "b", "c"]))
         msg = "Cannot setitem on a Categorical with a new category"
         with pytest.raises(ValueError, match=msg):
@@ -250,6 +252,25 @@ def test_unused_category_retention(self):
         cat1.loc[0] = "b"
         tm.assert_index_equal(cat1.cat.categories, exp_cats)
 
+    def test_loc_new_category_row_raises(self):
+        data = {
+            "int": [0, 1, 2],
+            "cat": pd.Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
+        }
+        df = pd.DataFrame.from_dict(data)
+        msg = "Cannot setitem on a Categorical with a new category"
+        with pytest.raises(ValueError, match=msg):
+            df.loc[3] = [3, "d"]
+
+    def test_loc_new_row_category_dtype_retention(self):
+        data = {
+            "int": [0, 1, 2],
+            "cat": pd.Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
+        }
+        df = pd.DataFrame.from_dict(data)
+        df.loc[3] = [3, "c"]
+        assert is_categorical_dtype(df["cat"])
+
 
 @pytest.mark.parametrize("index", [True, False])
 def test_mask_with_boolean(index):

From eea359acde96e95e722f6cda1c3e7d7e06a33e6d Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 24 May 2020 23:38:05 +0200
Subject: [PATCH 15/60] Fix linting

---
 pandas/tests/arrays/categorical/test_indexing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py
index 9e74cdc29da74..fe306e3c1f0e3 100644
--- a/pandas/tests/arrays/categorical/test_indexing.py
+++ b/pandas/tests/arrays/categorical/test_indexing.py
@@ -2,8 +2,8 @@
 import pytest
 
 import pandas as pd
-from pandas import Categorical, CategoricalIndex, Index, PeriodIndex, Series
 from pandas.core.dtypes.common import is_categorical_dtype
+from pandas import Categorical, CategoricalIndex, Index, PeriodIndex, Series
 
 import pandas._testing as tm
 import pandas.core.common as com

From 5f72d4ec744c66ee9312e4251d918cd03dbfa9a7 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 24 May 2020 23:56:28 +0200
Subject: [PATCH 16/60] Update test_indexing.py

---
 pandas/tests/arrays/categorical/test_indexing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py
index fe306e3c1f0e3..5307118af0d01 100644
--- a/pandas/tests/arrays/categorical/test_indexing.py
+++ b/pandas/tests/arrays/categorical/test_indexing.py
@@ -1,10 +1,10 @@
 import numpy as np
 import pytest
 
-import pandas as pd
 from pandas.core.dtypes.common import is_categorical_dtype
-from pandas import Categorical, CategoricalIndex, Index, PeriodIndex, Series
 
+import pandas as pd
+from pandas import Categorical, CategoricalIndex, Index, PeriodIndex, Series
 import pandas._testing as tm
 import pandas.core.common as com
 from pandas.tests.arrays.categorical.common import TestCategorical

From 26f474bd0afa5ac3be8189777adef3fb7862fc36 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Thu, 8 Oct 2020 23:23:22 +0300
Subject: [PATCH 17/60] import missing dtypes function

---
 pandas/core/indexing.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 16b0c096a0624..d8856ace5c9d5 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -13,6 +13,7 @@
 
 from pandas.core.dtypes.cast import find_common_type
 from pandas.core.dtypes.common import (
+    is_categorical_dtype,
     is_array_like,
     is_hashable,
     is_integer,

From 215943e8f5c8e2a1e69253de8e4fe24f676bdb66 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Thu, 8 Oct 2020 23:32:12 +0300
Subject: [PATCH 18/60] Fix linting

---
 pandas/core/indexing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index d8856ace5c9d5..5c960f90b1459 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -13,8 +13,8 @@
 
 from pandas.core.dtypes.cast import find_common_type
 from pandas.core.dtypes.common import (
-    is_categorical_dtype,
     is_array_like,
+    is_categorical_dtype,
     is_hashable,
     is_integer,
     is_iterator,
@@ -1844,7 +1844,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
         """
         Insert new row(s) or column(s) into the Series or DataFrame.
         """
-        from pandas import Series, DataFrame
+        from pandas import DataFrame, Series
 
         def check_valid_categorical(new_values, obj_dtype):
             if is_categorical_dtype(obj_dtype):

From 5bacde9276d753546b20627c6b838c52e2d6f28e Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sat, 10 Oct 2020 13:12:56 +0300
Subject: [PATCH 19/60] Include requested changes

- Moved the tests for this PR to /tests/series/test_categorical.py
- Applied the code changes based on feedback
---
 .../tests/arrays/categorical/test_indexing.py | 36 -------------
 pandas/tests/series/test_categorical.py       | 50 +++++++++++++++++++
 2 files changed, 50 insertions(+), 36 deletions(-)
 create mode 100644 pandas/tests/series/test_categorical.py

diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py
index 686aa9ccf764a..209341a4b8634 100644
--- a/pandas/tests/arrays/categorical/test_indexing.py
+++ b/pandas/tests/arrays/categorical/test_indexing.py
@@ -1,7 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas.core.dtypes.common import is_categorical_dtype
 
 import pandas as pd
 from pandas import Categorical, CategoricalIndex, Index, PeriodIndex, Series
@@ -244,41 +243,6 @@ def test_where_ordered_differs_rasies(self):
         with pytest.raises(ValueError, match="without identical categories"):
             ser.where([True, False, True], other)
 
-    def test_loc_new_category_series_raises(self):
-        ser = pd.Series(Categorical(["a", "b", "c"]))
-        msg = "Cannot setitem on a Categorical with a new category"
-        with pytest.raises(ValueError, match=msg):
-            ser.loc[3] = "d"
-
-    def test_unused_category_retention(self):
-        # Init case
-        exp_cats = Index(["a", "b", "c", "d"])
-        cat1 = Series(Categorical(["a", "b", "c"], categories=exp_cats))
-        tm.assert_index_equal(cat1.cat.categories, exp_cats)
-
-        # Modify case
-        cat1.loc[0] = "b"
-        tm.assert_index_equal(cat1.cat.categories, exp_cats)
-
-    def test_loc_new_category_row_raises(self):
-        data = {
-            "int": [0, 1, 2],
-            "cat": pd.Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
-        }
-        df = pd.DataFrame.from_dict(data)
-        msg = "Cannot setitem on a Categorical with a new category"
-        with pytest.raises(ValueError, match=msg):
-            df.loc[3] = [3, "d"]
-
-    def test_loc_new_row_category_dtype_retention(self):
-        data = {
-            "int": [0, 1, 2],
-            "cat": pd.Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
-        }
-        df = pd.DataFrame.from_dict(data)
-        df.loc[3] = [3, "c"]
-        assert is_categorical_dtype(df["cat"])
-
 
 @pytest.mark.parametrize("index", [True, False])
 def test_mask_with_boolean(index):
diff --git a/pandas/tests/series/test_categorical.py b/pandas/tests/series/test_categorical.py
new file mode 100644
index 0000000000000..cbac23d3adcd9
--- /dev/null
+++ b/pandas/tests/series/test_categorical.py
@@ -0,0 +1,50 @@
+import pytest
+import pandas as pd
+import pandas._testing as tm
+
+from pandas import Categorical, Index
+
+
+class TestCategoricalSeries:
+    def test_loc_new_category_series_raises(self):
+        ser = pd.Series(Categorical(["a", "b", "c"]))
+        msg = "Cannot setitem on a Categorical with a new category"
+        with pytest.raises(ValueError, match=msg):
+            ser.loc[3] = "d"
+
+    def test_unused_category_retention(self):
+        # Init case
+        exp_cats = Index(["a", "b", "c", "d"])
+        ser = pd.Series(Categorical(["a", "b", "c"], categories=exp_cats))
+        tm.assert_index_equal(ser.cat.categories, exp_cats)
+
+        # Modify case
+        ser.loc[0] = "b"
+        expected = pd.Series(Categorical(["b", "b", "c"], categories=exp_cats))
+        tm.assert_index_equal(ser.cat.categories, exp_cats)
+        tm.assert_series_equal(ser, expected)
+
+    def test_loc_new_category_row_raises(self):
+        data = {
+            "int": [0, 1, 2],
+            "cat": Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
+        }
+        df = pd.DataFrame(data)
+        msg = "Cannot setitem on a Categorical with a new category"
+        with pytest.raises(ValueError, match=msg):
+            df.loc[3] = [3, "d"]
+
+    def test_loc_new_row_category_dtype_retention(self):
+        df_data = {
+            "int": [0, 1, 2],
+            "cat": pd.Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
+        }
+        df = pd.DataFrame(df_data)
+        df.loc[3] = [3, "c"]
+
+        expected_data = {
+            "int": [0, 1, 2, 3],
+            "cat": pd.Categorical(["a", "b", "c", "c"], categories=["a", "b", "c"]),
+        }
+        expected = pd.DataFrame(expected_data)
+        tm.assert_frame_equal(df, expected)

From 96e4318d0d1702fd0cadb1e7387fe890be337f3b Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sat, 10 Oct 2020 13:54:19 +0300
Subject: [PATCH 20/60] Fix import ordering/format

---
 pandas/tests/arrays/categorical/test_indexing.py | 1 -
 pandas/tests/series/test_categorical.py          | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py
index 209341a4b8634..2c4dd8fe64057 100644
--- a/pandas/tests/arrays/categorical/test_indexing.py
+++ b/pandas/tests/arrays/categorical/test_indexing.py
@@ -1,7 +1,6 @@
 import numpy as np
 import pytest
 
-
 import pandas as pd
 from pandas import Categorical, CategoricalIndex, Index, PeriodIndex, Series
 import pandas._testing as tm
diff --git a/pandas/tests/series/test_categorical.py b/pandas/tests/series/test_categorical.py
index cbac23d3adcd9..28d1dd54b81cf 100644
--- a/pandas/tests/series/test_categorical.py
+++ b/pandas/tests/series/test_categorical.py
@@ -1,8 +1,8 @@
 import pytest
-import pandas as pd
-import pandas._testing as tm
 
+import pandas as pd
 from pandas import Categorical, Index
+import pandas._testing as tm
 
 
 class TestCategoricalSeries:

From e7ce2464b556bdc1325da7d061aaac80dade592f Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 11 Oct 2020 22:48:56 +0300
Subject: [PATCH 21/60] Update test_categorical.py

---
 pandas/tests/series/test_categorical.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/pandas/tests/series/test_categorical.py b/pandas/tests/series/test_categorical.py
index 28d1dd54b81cf..630ae45ebb3d4 100644
--- a/pandas/tests/series/test_categorical.py
+++ b/pandas/tests/series/test_categorical.py
@@ -25,26 +25,24 @@ def test_unused_category_retention(self):
         tm.assert_series_equal(ser, expected)
 
     def test_loc_new_category_row_raises(self):
-        data = {
+        df = pd.DataFrame({
             "int": [0, 1, 2],
             "cat": Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
-        }
-        df = pd.DataFrame(data)
+        })
         msg = "Cannot setitem on a Categorical with a new category"
         with pytest.raises(ValueError, match=msg):
             df.loc[3] = [3, "d"]
 
     def test_loc_new_row_category_dtype_retention(self):
-        df_data = {
+        df = pd.DataFrame({
             "int": [0, 1, 2],
             "cat": pd.Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
-        }
-        df = pd.DataFrame(df_data)
+        })
         df.loc[3] = [3, "c"]
 
-        expected_data = {
+        expected = pd.DataFrame({
             "int": [0, 1, 2, 3],
             "cat": pd.Categorical(["a", "b", "c", "c"], categories=["a", "b", "c"]),
-        }
-        expected = pd.DataFrame(expected_data)
+        })
+        
         tm.assert_frame_equal(df, expected)

From 31ef609e0571e03d68cdff54655f43d9772961e3 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 11 Oct 2020 22:56:58 +0300
Subject: [PATCH 22/60] Fix format

---
 pandas/tests/series/test_categorical.py | 30 +++++++++++++++----------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/series/test_categorical.py b/pandas/tests/series/test_categorical.py
index 630ae45ebb3d4..20ba11a36d046 100644
--- a/pandas/tests/series/test_categorical.py
+++ b/pandas/tests/series/test_categorical.py
@@ -25,24 +25,30 @@ def test_unused_category_retention(self):
         tm.assert_series_equal(ser, expected)
 
     def test_loc_new_category_row_raises(self):
-        df = pd.DataFrame({
-            "int": [0, 1, 2],
-            "cat": Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
-        })
+        df = pd.DataFrame(
+            {
+                "int": [0, 1, 2],
+                "cat": Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
+            }
+        )
         msg = "Cannot setitem on a Categorical with a new category"
         with pytest.raises(ValueError, match=msg):
             df.loc[3] = [3, "d"]
 
     def test_loc_new_row_category_dtype_retention(self):
-        df = pd.DataFrame({
-            "int": [0, 1, 2],
-            "cat": pd.Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
-        })
+        df = pd.DataFrame(
+            {
+                "int": [0, 1, 2],
+                "cat": pd.Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
+            }
+        )
         df.loc[3] = [3, "c"]
 
-        expected = pd.DataFrame({
-            "int": [0, 1, 2, 3],
-            "cat": pd.Categorical(["a", "b", "c", "c"], categories=["a", "b", "c"]),
-        })
+        expected = pd.DataFrame(
+            {
+                "int": [0, 1, 2, 3],
+                "cat": pd.Categorical(["a", "b", "c", "c"], categories=["a", "b", "c"]),
+            }
+        )
         
         tm.assert_frame_equal(df, expected)

From ce3f46318d23849fe7d78549b9a08d4351fbedda Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 11 Oct 2020 23:05:08 +0300
Subject: [PATCH 23/60] Remove commas

---
 pandas/tests/series/test_categorical.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/series/test_categorical.py b/pandas/tests/series/test_categorical.py
index 20ba11a36d046..ed33c44ec26b4 100644
--- a/pandas/tests/series/test_categorical.py
+++ b/pandas/tests/series/test_categorical.py
@@ -28,7 +28,7 @@ def test_loc_new_category_row_raises(self):
         df = pd.DataFrame(
             {
                 "int": [0, 1, 2],
-                "cat": Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
+                "cat": Categorical(["a", "b", "c"], categories=["a", "b", "c"])
             }
         )
         msg = "Cannot setitem on a Categorical with a new category"
@@ -39,7 +39,7 @@ def test_loc_new_row_category_dtype_retention(self):
         df = pd.DataFrame(
             {
                 "int": [0, 1, 2],
-                "cat": pd.Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
+                "cat": pd.Categorical(["a", "b", "c"], categories=["a", "b", "c"])
             }
         )
         df.loc[3] = [3, "c"]
@@ -47,7 +47,7 @@ def test_loc_new_row_category_dtype_retention(self):
         expected = pd.DataFrame(
             {
                 "int": [0, 1, 2, 3],
-                "cat": pd.Categorical(["a", "b", "c", "c"], categories=["a", "b", "c"]),
+                "cat": pd.Categorical(["a", "b", "c", "c"], categories=["a", "b", "c"])
             }
         )
         

From a825269e7db073e57cf00d5d4285f3eb036bd841 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 11 Oct 2020 23:15:01 +0300
Subject: [PATCH 24/60] Update test_categorical.py

---
 pandas/tests/series/test_categorical.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/series/test_categorical.py b/pandas/tests/series/test_categorical.py
index ed33c44ec26b4..d8d6e08ca931d 100644
--- a/pandas/tests/series/test_categorical.py
+++ b/pandas/tests/series/test_categorical.py
@@ -28,7 +28,7 @@ def test_loc_new_category_row_raises(self):
         df = pd.DataFrame(
             {
                 "int": [0, 1, 2],
-                "cat": Categorical(["a", "b", "c"], categories=["a", "b", "c"])
+                "cat": Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
             }
         )
         msg = "Cannot setitem on a Categorical with a new category"
@@ -39,7 +39,7 @@ def test_loc_new_row_category_dtype_retention(self):
         df = pd.DataFrame(
             {
                 "int": [0, 1, 2],
-                "cat": pd.Categorical(["a", "b", "c"], categories=["a", "b", "c"])
+                "cat": pd.Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
             }
         )
         df.loc[3] = [3, "c"]
@@ -47,8 +47,8 @@ def test_loc_new_row_category_dtype_retention(self):
         expected = pd.DataFrame(
             {
                 "int": [0, 1, 2, 3],
-                "cat": pd.Categorical(["a", "b", "c", "c"], categories=["a", "b", "c"])
+                "cat": pd.Categorical(["a", "b", "c", "c"], categories=["a", "b", "c"]),
             }
         )
-        
+
         tm.assert_frame_equal(df, expected)

From 72726a03296d09a008a81e6401a4b6a3a0729c7e Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 18 Oct 2020 00:37:08 +0300
Subject: [PATCH 25/60] Update solution

---
 pandas/core/dtypes/cast.py              | 20 +++++++++
 pandas/core/dtypes/concat.py            |  2 +-
 pandas/core/indexing.py                 | 59 +++++++++----------------
 pandas/tests/series/test_categorical.py | 18 ++++----
 4 files changed, 51 insertions(+), 48 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index e550309461de4..ad661d6f8d908 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -71,6 +71,7 @@
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import (
+    CategoricalDtype,
     DatetimeTZDtype,
     ExtensionDtype,
     IntervalDtype,
@@ -1581,6 +1582,25 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj:
     # get unique types (dict.fromkeys is used as order-preserving set())
     types = list(dict.fromkeys(types).keys())
 
+    # If set of dtypes contains only categoricals (with the exception of strings)
+    # then the common dtype will be the categorical (in case it's the only one)
+    is_cat_or_str = lambda x: is_categorical_dtype(x) or is_string_dtype(x)
+    if all(is_cat_or_str(t) for t in types) and not any(is_object_dtype(t) for t in types):
+        # Return union of the categorical dtypes?
+        cat_dtypes = []
+        for t in types:
+            if is_categorical_dtype(t):
+                cat_dtypes.append(t)
+        if len(cat_dtypes) > 0:
+            dtype_ref = cat_dtypes[0]
+            cat_dtypes_same = True
+            for dtype in cat_dtypes:
+                if not is_dtype_equal(dtype, dtype_ref):
+                    cat_dtypes_same = False
+                    break
+            if cat_dtypes_same:
+                return dtype_ref
+                
     if any(isinstance(t, ExtensionDtype) for t in types):
         for t in types:
             if isinstance(t, ExtensionDtype):
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 60fd959701821..5b12a90135fe8 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -281,7 +281,7 @@ def union_categoricals(
     def _maybe_unwrap(x):
         if isinstance(x, (ABCCategoricalIndex, ABCSeries)):
             return x._values
-        elif isinstance(x, Categorical):
+        elif isinstance(x, Categorical) or is_categorical_dtype(x):
             return x
         else:
             raise TypeError("all components to combine must be Categorical")
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 3a21202722372..6701974359cce 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -11,10 +11,8 @@
 from pandas.errors import AbstractMethodError, InvalidIndexError
 from pandas.util._decorators import doc
 
-from pandas.core.dtypes.cast import find_common_type
 from pandas.core.dtypes.common import (
     is_array_like,
-    is_categorical_dtype,
     is_hashable,
     is_integer,
     is_iterator,
@@ -665,14 +663,9 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None):
             and not com.is_bool_indexer(key)
             and all(is_hashable(k) for k in key)
         ):
-            for i, k in enumerate(key):
+            for k in key:
                 if k not in self.obj:
-                    if value is None:
-                        self.obj[k] = np.nan
-                    elif is_list_like(value):
-                        self.obj[k] = value[i]
-                    else:
-                        self.obj[k] = value
+                    self.obj[k] = np.nan
 
     def __setitem__(self, key, value):
         if isinstance(key, tuple):
@@ -1542,15 +1535,14 @@ def _setitem_with_indexer(self, indexer, value):
         info_axis = self.obj._info_axis_number
 
         # maybe partial set
-        take_split_path = not self.obj._mgr.is_single_block
+        take_split_path = len(self.obj._mgr.blocks) > 1
 
         # if there is only one block/type, still have to take split path
         # unless the block is one-dimensional or it can hold the value
         if not take_split_path and self.obj._mgr.blocks:
-            if self.ndim > 1:
-                # in case of dict, keys are indices
+            (blk,) = self.obj._mgr.blocks
+            if 1 < blk.ndim:  # in case of dict, keys are indices
                 val = list(value.values()) if isinstance(value, dict) else value
-                blk = self.obj._mgr.blocks[0]
                 take_split_path = not blk._can_hold_element(val)
 
         # if we have any multi-indexes that have non-trivial slices
@@ -1584,7 +1576,10 @@ def _setitem_with_indexer(self, indexer, value):
                         # must have all defined axes if we have a scalar
                         # or a list-like on the non-info axes if we have a
                         # list-like
-                        if not len(self.obj):
+                        len_non_info_axes = (
+                            len(_ax) for _i, _ax in enumerate(self.obj.axes) if _i != i
+                        )
+                        if any(not l for l in len_non_info_axes):
                             if not is_list_like_indexer(value):
                                 raise ValueError(
                                     "cannot set a frame with no "
@@ -1769,7 +1764,7 @@ def _setitem_with_indexer(self, indexer, value):
                     self._setitem_single_column(loc, value, pi)
 
         else:
-            self._setitem_single_block(indexer, value)
+            self._setitem_single_block_inplace(indexer, value)
 
     def _setitem_single_column(self, loc: int, value, plane_indexer):
         # positional setting on column loc
@@ -1796,9 +1791,10 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
         # reset the sliced object if unique
         self.obj._iset_item(loc, ser)
 
-    def _setitem_single_block(self, indexer, value):
+    def _setitem_single_block_inplace(self, indexer, value):
         """
-        _setitem_with_indexer for the case when we have a single Block.
+        _setitem_with_indexer for the case when we have a single Block
+        and the value can be set into it without casting.
         """
         from pandas import Series
 
@@ -1847,13 +1843,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
         Insert new row(s) or column(s) into the Series or DataFrame.
         """
         from pandas import DataFrame, Series
-
-        def check_valid_categorical(new_values, obj_dtype):
-            if is_categorical_dtype(obj_dtype):
-                if (~np.in1d(new_values, obj_dtype.categories.values)).any():
-                    raise ValueError(
-                        "Cannot setitem on a Categorical with a new category"
-                    )
+        from pandas.core.dtypes.cast import find_common_type
 
         # reindex the axis to the new value
         # and set inplace
@@ -1878,16 +1868,8 @@ def check_valid_categorical(new_values, obj_dtype):
                 # GH#22717 handle casting compatibility that np.concatenate
                 #  does incorrectly
                 new_values = concat_compat([self.obj._values, new_values])
-                if is_object_dtype(new_values.dtype):
-                    dtype = None
-                else:
-                    dtype = find_common_type([self.obj.dtype, new_values.dtype])
-            else:
-                dtype = None
-
-            check_valid_categorical(new_values, self.obj.dtype)
             self.obj._mgr = self.obj._constructor(
-                new_values, index=new_index, name=self.obj.name, dtype=dtype
+                new_values, index=new_index, name=self.obj.name
             )._mgr
             self.obj._maybe_update_cacher(clear=True)
 
@@ -1915,13 +1897,14 @@ def check_valid_categorical(new_values, obj_dtype):
                 if len(set(self.obj.dtypes)) > 1:
                     value = list(value)
                     for i in range(len(self.obj.columns)):
-                        value[i] = Series(data=[value[i]], dtype=self.obj.dtypes[i])
-                        check_valid_categorical(value[i], self.obj.dtypes[i])
-                    value = dict(zip(self.obj.columns, value))
-                    value = DataFrame(value)
+                        dtype = find_common_type([self.obj.dtypes[i], type(value[i])])
+                        value[i] = Series(data=[value[i]], dtype=dtype)
+                    value = DataFrame(dict(zip(self.obj.columns, value)))
                     value.index = [indexer]
                 else:
-                    value = Series(value, index=self.obj.columns, name=indexer)
+                    dtype = find_common_type([self.obj.dtypes[0], type(value)])
+                    value = Series(value, index=self.obj.columns, name=indexer, dtype=dtype)
+
 
             self.obj._mgr = self.obj.append(value)._mgr
             self.obj._maybe_update_cacher(clear=True)
diff --git a/pandas/tests/series/test_categorical.py b/pandas/tests/series/test_categorical.py
index d8d6e08ca931d..69589cc598e27 100644
--- a/pandas/tests/series/test_categorical.py
+++ b/pandas/tests/series/test_categorical.py
@@ -6,12 +6,6 @@
 
 
 class TestCategoricalSeries:
-    def test_loc_new_category_series_raises(self):
-        ser = pd.Series(Categorical(["a", "b", "c"]))
-        msg = "Cannot setitem on a Categorical with a new category"
-        with pytest.raises(ValueError, match=msg):
-            ser.loc[3] = "d"
-
     def test_unused_category_retention(self):
         # Init case
         exp_cats = Index(["a", "b", "c", "d"])
@@ -31,9 +25,15 @@ def test_loc_new_category_row_raises(self):
                 "cat": Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
             }
         )
-        msg = "Cannot setitem on a Categorical with a new category"
-        with pytest.raises(ValueError, match=msg):
-            df.loc[3] = [3, "d"]
+        df.loc[3] = [3, "d"]
+
+        expected = pd.DataFrame(
+            {
+                "int": [0, 1, 2, 3],
+                "cat": Categorical(["a", "b", "c", pd.NA], categories=["a", "b", "c"]),
+            }
+        )
+        tm.assert_frame_equal(df, expected)
 
     def test_loc_new_row_category_dtype_retention(self):
         df = pd.DataFrame(

From 51e2032044c67517f4ac96d931c28fa6d943d89b Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 18 Oct 2020 00:39:44 +0300
Subject: [PATCH 26/60] Fix lint

---
 pandas/core/dtypes/cast.py | 6 ++++--
 pandas/core/indexing.py    | 5 +++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 423f140fe5d6a..656ab1c8d31ed 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1569,7 +1569,9 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj:
     # If set of dtypes contains only categoricals (with the exception of strings)
     # then the common dtype will be the categorical (in case it's the only one)
     is_cat_or_str = lambda x: is_categorical_dtype(x) or is_string_dtype(x)
-    if all(is_cat_or_str(t) for t in types) and not any(is_object_dtype(t) for t in types):
+    if all(is_cat_or_str(t) for t in types) and not any(
+        is_object_dtype(t) for t in types
+    ):
         # Return union of the categorical dtypes?
         cat_dtypes = []
         for t in types:
@@ -1584,7 +1586,7 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj:
                     break
             if cat_dtypes_same:
                 return dtype_ref
-                
+
     if any(isinstance(t, ExtensionDtype) for t in types):
         for t in types:
             if isinstance(t, ExtensionDtype):
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 6701974359cce..0d923c79fe5c5 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1903,8 +1903,9 @@ def _setitem_with_indexer_missing(self, indexer, value):
                     value.index = [indexer]
                 else:
                     dtype = find_common_type([self.obj.dtypes[0], type(value)])
-                    value = Series(value, index=self.obj.columns, name=indexer, dtype=dtype)
-
+                    value = Series(
+                        value, index=self.obj.columns, name=indexer, dtype=dtype
+                    )
 
             self.obj._mgr = self.obj.append(value)._mgr
             self.obj._maybe_update_cacher(clear=True)

From 7d643573dcafbdd36e4cd2be27470b5522cfd474 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 18 Oct 2020 00:51:42 +0300
Subject: [PATCH 27/60] Fix format issues

---
 pandas/core/dtypes/cast.py              |  1 -
 pandas/core/indexing.py                 | 21 +++++----------------
 pandas/tests/series/test_categorical.py |  2 --
 3 files changed, 5 insertions(+), 19 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 656ab1c8d31ed..8e318e2674ff0 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -72,7 +72,6 @@
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import (
-    CategoricalDtype,
     DatetimeTZDtype,
     ExtensionDtype,
     IntervalDtype,
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 0d923c79fe5c5..5e654a44dcec1 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -11,28 +11,17 @@
 from pandas.errors import AbstractMethodError, InvalidIndexError
 from pandas.util._decorators import doc
 
-from pandas.core.dtypes.common import (
-    is_array_like,
-    is_hashable,
-    is_integer,
-    is_iterator,
-    is_list_like,
-    is_numeric_dtype,
-    is_object_dtype,
-    is_scalar,
-    is_sequence,
-)
+from pandas.core.dtypes.common import (is_array_like, is_hashable, is_integer,
+                                       is_iterator, is_list_like, is_numeric_dtype,
+                                       is_object_dtype, is_scalar, is_sequence)
 from pandas.core.dtypes.concat import concat_compat
 from pandas.core.dtypes.generic import ABCDataFrame, ABCMultiIndex, ABCSeries
 from pandas.core.dtypes.missing import infer_fill_value, isna
 
 import pandas.core.common as com
 from pandas.core.construction import array as pd_array
-from pandas.core.indexers import (
-    check_array_indexer,
-    is_list_like_indexer,
-    length_of_indexer,
-)
+from pandas.core.indexers import (check_array_indexer, is_list_like_indexer,
+                                  length_of_indexer)
 from pandas.core.indexes.api import Index
 
 if TYPE_CHECKING:
diff --git a/pandas/tests/series/test_categorical.py b/pandas/tests/series/test_categorical.py
index 69589cc598e27..21f71d6a9d6d5 100644
--- a/pandas/tests/series/test_categorical.py
+++ b/pandas/tests/series/test_categorical.py
@@ -1,5 +1,3 @@
-import pytest
-
 import pandas as pd
 from pandas import Categorical, Index
 import pandas._testing as tm

From d68f2152988e5ed558f7276eb084efa4fe12af4d Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 18 Oct 2020 01:01:39 +0300
Subject: [PATCH 28/60] Update indexing.py

---
 pandas/core/indexing.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 5e654a44dcec1..ac3b82494b5f3 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -11,17 +11,28 @@
 from pandas.errors import AbstractMethodError, InvalidIndexError
 from pandas.util._decorators import doc
 
-from pandas.core.dtypes.common import (is_array_like, is_hashable, is_integer,
-                                       is_iterator, is_list_like, is_numeric_dtype,
-                                       is_object_dtype, is_scalar, is_sequence)
+from pandas.core.dtypes.common import (
+    is_array_like,
+    is_hashable,
+    is_integer,
+    is_iterator,
+    is_list_like,
+    is_numeric_dtype,
+    is_object_dtype,
+    is_scalar,
+    is_sequence,
+)
 from pandas.core.dtypes.concat import concat_compat
 from pandas.core.dtypes.generic import ABCDataFrame, ABCMultiIndex, ABCSeries
 from pandas.core.dtypes.missing import infer_fill_value, isna
 
 import pandas.core.common as com
 from pandas.core.construction import array as pd_array
-from pandas.core.indexers import (check_array_indexer, is_list_like_indexer,
-                                  length_of_indexer)
+from pandas.core.indexers import (
+    check_array_indexer,
+    is_list_like_indexer,
+    length_of_indexer,
+)
 from pandas.core.indexes.api import Index
 
 if TYPE_CHECKING:
@@ -1831,9 +1842,10 @@ def _setitem_with_indexer_missing(self, indexer, value):
         """
         Insert new row(s) or column(s) into the Series or DataFrame.
         """
-        from pandas import DataFrame, Series
         from pandas.core.dtypes.cast import find_common_type
 
+        from pandas import DataFrame, Series
+
         # reindex the axis to the new value
         # and set inplace
         if self.ndim == 1:

From 5ea8ab1ae33978357bbbc1b5365aa96ee8f894a5 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 18 Oct 2020 10:40:20 +0300
Subject: [PATCH 29/60] Update indexing.py

---
 pandas/core/indexing.py | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index ac3b82494b5f3..9554dc7eef56f 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -663,9 +663,14 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None):
             and not com.is_bool_indexer(key)
             and all(is_hashable(k) for k in key)
         ):
-            for k in key:
+            for i, k in enumerate(key):
                 if k not in self.obj:
-                    self.obj[k] = np.nan
+                    if value is None:
+                        self.obj[k] = np.nan
+                    elif is_list_like(value):
+                        self.obj[k] = value[i]
+                    else:
+                        self.obj[k] = value
 
     def __setitem__(self, key, value):
         if isinstance(key, tuple):
@@ -1535,14 +1540,15 @@ def _setitem_with_indexer(self, indexer, value):
         info_axis = self.obj._info_axis_number
 
         # maybe partial set
-        take_split_path = len(self.obj._mgr.blocks) > 1
+        take_split_path = not self.obj._mgr.is_single_block
 
         # if there is only one block/type, still have to take split path
         # unless the block is one-dimensional or it can hold the value
         if not take_split_path and self.obj._mgr.blocks:
-            (blk,) = self.obj._mgr.blocks
-            if 1 < blk.ndim:  # in case of dict, keys are indices
+            if self.ndim > 1:
+                # in case of dict, keys are indices
                 val = list(value.values()) if isinstance(value, dict) else value
+                blk = self.obj._mgr.blocks[0]
                 take_split_path = not blk._can_hold_element(val)
 
         # if we have any multi-indexes that have non-trivial slices
@@ -1576,10 +1582,7 @@ def _setitem_with_indexer(self, indexer, value):
                         # must have all defined axes if we have a scalar
                         # or a list-like on the non-info axes if we have a
                         # list-like
-                        len_non_info_axes = (
-                            len(_ax) for _i, _ax in enumerate(self.obj.axes) if _i != i
-                        )
-                        if any(not l for l in len_non_info_axes):
+                        if not len(self.obj):
                             if not is_list_like_indexer(value):
                                 raise ValueError(
                                     "cannot set a frame with no "
@@ -1764,7 +1767,7 @@ def _setitem_with_indexer(self, indexer, value):
                     self._setitem_single_column(loc, value, pi)
 
         else:
-            self._setitem_single_block_inplace(indexer, value)
+            self._setitem_single_block(indexer, value)
 
     def _setitem_single_column(self, loc: int, value, plane_indexer):
         # positional setting on column loc
@@ -1791,10 +1794,9 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
         # reset the sliced object if unique
         self.obj._iset_item(loc, ser)
 
-    def _setitem_single_block_inplace(self, indexer, value):
+    def _setitem_single_block(self, indexer, value):
         """
-        _setitem_with_indexer for the case when we have a single Block
-        and the value can be set into it without casting.
+        _setitem_with_indexer for the case when we have a single Block.
         """
         from pandas import Series
 
@@ -2381,4 +2383,4 @@ def maybe_numeric_slice(df, slice_, include_bool: bool = False):
         if include_bool:
             dtypes.append(bool)
         slice_ = IndexSlice[:, df.select_dtypes(include=dtypes).columns]
-    return slice_
+    return slice_
\ No newline at end of file

From b08efc127c92c07b935fc4ab9fa2b39afe9f8eda Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 18 Oct 2020 10:43:09 +0300
Subject: [PATCH 30/60] Update indexing.py

---
 pandas/core/indexing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 9554dc7eef56f..490f0e2fbd307 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -2383,4 +2383,4 @@ def maybe_numeric_slice(df, slice_, include_bool: bool = False):
         if include_bool:
             dtypes.append(bool)
         slice_ = IndexSlice[:, df.select_dtypes(include=dtypes).columns]
-    return slice_
\ No newline at end of file
+    return slice_

From 69f4e626fb1d58ab48d42ec9ec7c6f87aa9698b7 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 18 Oct 2020 10:44:36 +0300
Subject: [PATCH 31/60] Update test_categorical.py

---
 pandas/tests/series/test_categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/series/test_categorical.py b/pandas/tests/series/test_categorical.py
index 21f71d6a9d6d5..469682c1b57ac 100644
--- a/pandas/tests/series/test_categorical.py
+++ b/pandas/tests/series/test_categorical.py
@@ -16,7 +16,7 @@ def test_unused_category_retention(self):
         tm.assert_index_equal(ser.cat.categories, exp_cats)
         tm.assert_series_equal(ser, expected)
 
-    def test_loc_new_category_row_raises(self):
+    def test_loc_new_category_nan_value(self):
         df = pd.DataFrame(
             {
                 "int": [0, 1, 2],

From 4c33040e9e9c2c8d721fc24b036a1c99b8f29bfe Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 18 Oct 2020 20:20:06 +0300
Subject: [PATCH 32/60] Update concat.py

---
 pandas/core/dtypes/concat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 5b12a90135fe8..60fd959701821 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -281,7 +281,7 @@ def union_categoricals(
     def _maybe_unwrap(x):
         if isinstance(x, (ABCCategoricalIndex, ABCSeries)):
             return x._values
-        elif isinstance(x, Categorical) or is_categorical_dtype(x):
+        elif isinstance(x, Categorical):
             return x
         else:
             raise TypeError("all components to combine must be Categorical")

From e9367366c7af0980d83c7248f2b77c159531ec6b Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 18 Oct 2020 20:22:26 +0300
Subject: [PATCH 33/60] Update cast.py

---
 pandas/core/dtypes/cast.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 8e318e2674ff0..5e7712281f748 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1578,12 +1578,12 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj:
                 cat_dtypes.append(t)
         if len(cat_dtypes) > 0:
             dtype_ref = cat_dtypes[0]
-            cat_dtypes_same = True
+            cat_dtypes_equal = True
             for dtype in cat_dtypes:
                 if not is_dtype_equal(dtype, dtype_ref):
-                    cat_dtypes_same = False
+                    cat_dtypes_equal = False
                     break
-            if cat_dtypes_same:
+            if cat_dtypes_equal:
                 return dtype_ref
 
     if any(isinstance(t, ExtensionDtype) for t in types):

From 8031f8fdd143fdcf1bc2ab32ca9c3ce08de4079a Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 18 Oct 2020 20:55:10 +0300
Subject: [PATCH 34/60] Update cast.py

---
 pandas/core/dtypes/cast.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 5e7712281f748..a9fbadcd29de3 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1578,12 +1578,7 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj:
                 cat_dtypes.append(t)
         if len(cat_dtypes) > 0:
             dtype_ref = cat_dtypes[0]
-            cat_dtypes_equal = True
-            for dtype in cat_dtypes:
-                if not is_dtype_equal(dtype, dtype_ref):
-                    cat_dtypes_equal = False
-                    break
-            if cat_dtypes_equal:
+            if all(is_dtype_equal(dtype, dtype_ref) for dtype in cat_dtypes):
                 return dtype_ref
 
     if any(isinstance(t, ExtensionDtype) for t in types):

From c08c6c0ff7070f93a3074d8e6078d07b34e6c1d4 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Wed, 4 Nov 2020 20:34:34 +0200
Subject: [PATCH 35/60] Update test_categorical.py

---
 pandas/tests/series/test_categorical.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/series/test_categorical.py b/pandas/tests/series/test_categorical.py
index 469682c1b57ac..a0ec9a1f8be98 100644
--- a/pandas/tests/series/test_categorical.py
+++ b/pandas/tests/series/test_categorical.py
@@ -37,7 +37,7 @@ def test_loc_new_row_category_dtype_retention(self):
         df = pd.DataFrame(
             {
                 "int": [0, 1, 2],
-                "cat": pd.Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
+                "cat": Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
             }
         )
         df.loc[3] = [3, "c"]
@@ -45,7 +45,7 @@ def test_loc_new_row_category_dtype_retention(self):
         expected = pd.DataFrame(
             {
                 "int": [0, 1, 2, 3],
-                "cat": pd.Categorical(["a", "b", "c", "c"], categories=["a", "b", "c"]),
+                "cat": Categorical(["a", "b", "c", "c"], categories=["a", "b", "c"]),
             }
         )
 

From c862d991035609103c6e7b95ab822ae8071c5df2 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sat, 28 Nov 2020 21:47:04 +0200
Subject: [PATCH 36/60] Revert previous approach and include concat changes

---
 pandas/core/dtypes/cast.py   |  6 ++----
 pandas/core/dtypes/concat.py | 13 +++++++++++--
 pandas/core/indexing.py      | 17 ++---------------
 3 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 13ef014eaa47d..96a590b1924f2 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1603,10 +1603,8 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj:
     # If set of dtypes contains only categoricals (with the exception of strings)
     # then the common dtype will be the categorical (in case it's the only one)
     is_cat_or_str = lambda x: is_categorical_dtype(x) or is_string_dtype(x)
-    if all(is_cat_or_str(t) for t in types) and not any(
-        is_object_dtype(t) for t in types
-    ):
-        # Return union of the categorical dtypes?
+    if all(is_cat_or_str(t) for t in types):
+        # Should we extend this to use the union of categorical dtypes?
         cat_dtypes = []
         for t in types:
             if is_categorical_dtype(t):
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 63e3440558c75..9ab87c28a911a 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -7,7 +7,11 @@
 
 from pandas._typing import ArrayLike, DtypeObj
 
-from pandas.core.dtypes.cast import find_common_type
+from pandas.core.dtypes.cast import (
+    convert_dtypes,
+    find_common_type,
+    maybe_downcast_to_dtype,
+)
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
     is_dtype_equal,
@@ -142,7 +146,12 @@ def is_nonempty(x) -> bool:
         # we ignore axis here, as internally concatting with EAs is always
         # for axis=0
         if not single_dtype:
-            target_dtype = find_common_type([x.dtype for x in to_concat])
+            conv_types = [convert_dtypes(x) for x in to_concat]
+            for i in range(len(to_concat)):
+                if conv_types[i] == "string":
+                    conv_types[i] = np.dtype(str)
+                to_concat[i] = maybe_downcast_to_dtype(to_concat[i], conv_types[i])
+            target_dtype = find_common_type(conv_types)
             to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
 
         if isinstance(to_concat[0], ExtensionArray):
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 4e11f2becb8b2..6aa031af64833 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1860,9 +1860,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
         """
         Insert new row(s) or column(s) into the Series or DataFrame.
         """
-        from pandas.core.dtypes.cast import find_common_type
-
-        from pandas import DataFrame, Series
+        from pandas import Series
 
         # reindex the axis to the new value
         # and set inplace
@@ -1914,18 +1912,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
                     if len(value) != len(self.obj.columns):
                         raise ValueError("cannot set a row with mismatched columns")
 
-                if len(set(self.obj.dtypes)) > 1:
-                    value = list(value)
-                    for i in range(len(self.obj.columns)):
-                        dtype = find_common_type([self.obj.dtypes[i], type(value[i])])
-                        value[i] = Series(data=[value[i]], dtype=dtype)
-                    value = DataFrame(dict(zip(self.obj.columns, value)))
-                    value.index = [indexer]
-                else:
-                    dtype = find_common_type([self.obj.dtypes[0], type(value)])
-                    value = Series(
-                        value, index=self.obj.columns, name=indexer, dtype=dtype
-                    )
+                value = Series(value, index=self.obj.columns, name=indexer)
 
             self.obj._mgr = self.obj.append(value)._mgr
             self.obj._maybe_update_cacher(clear=True)

From 5baa314c0961d17cacfea5f6c0583be8ade96bee Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sat, 28 Nov 2020 22:05:57 +0200
Subject: [PATCH 37/60] Remove non-required convertion

---
 pandas/core/dtypes/concat.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 9ab87c28a911a..8e4b1020a2555 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -147,10 +147,6 @@ def is_nonempty(x) -> bool:
         # for axis=0
         if not single_dtype:
             conv_types = [convert_dtypes(x) for x in to_concat]
-            for i in range(len(to_concat)):
-                if conv_types[i] == "string":
-                    conv_types[i] = np.dtype(str)
-                to_concat[i] = maybe_downcast_to_dtype(to_concat[i], conv_types[i])
             target_dtype = find_common_type(conv_types)
             to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
 

From cb5d8e49a2eebda89f7bf00223ac82065b52867d Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sat, 28 Nov 2020 22:12:56 +0200
Subject: [PATCH 38/60] Update concat.py

---
 pandas/core/dtypes/concat.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 8e4b1020a2555..266d9bf72754d 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -9,8 +9,7 @@
 
 from pandas.core.dtypes.cast import (
     convert_dtypes,
-    find_common_type,
-    maybe_downcast_to_dtype,
+    find_common_type
 )
 from pandas.core.dtypes.common import (
     is_categorical_dtype,

From 7d7da20908ccebc93fb6b236d1002623766a2e70 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sat, 28 Nov 2020 22:18:58 +0200
Subject: [PATCH 39/60] Update concat.py

---
 pandas/core/dtypes/concat.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 266d9bf72754d..e17aa72de6285 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -7,10 +7,7 @@
 
 from pandas._typing import ArrayLike, DtypeObj
 
-from pandas.core.dtypes.cast import (
-    convert_dtypes,
-    find_common_type
-)
+from pandas.core.dtypes.cast import convert_dtypes, find_common_type
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
     is_dtype_equal,

From ecad50f07cd533ff06ef7a8fd7093e3a1e2cb127 Mon Sep 17 00:00:00 2001
From: chrispe92 <ch.petrop@gmail.com>
Date: Sun, 6 Dec 2020 17:15:38 +0200
Subject: [PATCH 40/60] Update cast.py

---
 pandas/core/dtypes/cast.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 96a590b1924f2..edd0e9d4cb75f 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1602,8 +1602,10 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj:
 
     # If set of dtypes contains only categoricals (with the exception of strings)
     # then the common dtype will be the categorical (in case it's the only one)
-    is_cat_or_str = lambda x: is_categorical_dtype(x) or is_string_dtype(x)
-    if all(is_cat_or_str(t) for t in types):
+    is_cat_or_str = lambda x: is_categorical_dtype(x) | is_string_dtype(x)
+    if all(is_cat_or_str(t) for t in types) and not any(
+        is_object_dtype(t) for t in types
+    ):
         # Should we extend this to use the union of categorical dtypes?
         cat_dtypes = []
         for t in types:

From af5e1414a9a48b5431c557e56a02e2514565e93d Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Mon, 15 Feb 2021 11:07:53 +0200
Subject: [PATCH 41/60] Add new version with raise

---
 pandas/core/dtypes/cast.py              |  16 ----
 pandas/core/dtypes/concat.py            |  79 +++++++++++++++-
 pandas/tests/series/test_categorical.py | 116 ++++++++++++++++++------
 3 files changed, 162 insertions(+), 49 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 51291f6401c98..e27c519304e2e 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1609,22 +1609,6 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj:
     # get unique types (dict.fromkeys is used as order-preserving set())
     types = list(dict.fromkeys(types).keys())
 
-    # If set of dtypes contains only categoricals (with the exception of strings)
-    # then the common dtype will be the categorical (in case it's the only one)
-    is_cat_or_str = lambda x: is_categorical_dtype(x) | is_string_dtype(x)
-    if all(is_cat_or_str(t) for t in types) and not any(
-        is_object_dtype(t) for t in types
-    ):
-        # Should we extend this to use the union of categorical dtypes?
-        cat_dtypes = []
-        for t in types:
-            if is_categorical_dtype(t):
-                cat_dtypes.append(t)
-        if len(cat_dtypes) > 0:
-            dtype_ref = cat_dtypes[0]
-            if all(is_dtype_equal(dtype, dtype_ref) for dtype in cat_dtypes):
-                return dtype_ref
-
     if any(isinstance(t, ExtensionDtype) for t in types):
         for t in types:
             if isinstance(t, ExtensionDtype):
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 70ad212ccd980..7bec779f6607c 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -7,7 +7,7 @@
 
 from pandas._typing import ArrayLike, DtypeObj
 
-from pandas.core.dtypes.cast import convert_dtypes, find_common_type
+from pandas.core.dtypes.cast import find_common_type
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
     is_dtype_equal,
@@ -15,6 +15,7 @@
     is_sparse,
 )
 from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCSeries
+from pandas.core.dtypes.missing import isna
 
 from pandas.core.arrays import ExtensionArray
 from pandas.core.arrays.sparse import SparseArray
@@ -61,6 +62,69 @@ def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
     return arr.astype(dtype, copy=False)
 
 
+def _can_cast_to_categorical(to_cast):
+    """
+    Evaluates if a list of arrays can be casted to a single categorical dtype.
+    The categorical dtype to cast to, is determined by any of the arrays which
+    is already of categorical dtype. If no such array exists, or if the existing
+    categorical dtype does not contain any of the unique values of the other arrays,
+    then it will return False.
+
+    Parameters
+    ----------
+    to_cast : array of arrays
+
+    Returns
+    -------
+    True if possible to cast to a single categorical dtype, False otherwise.
+    """
+    if len(to_cast) == 0:
+        raise ValueError("No arrays to cast")
+
+    types = [x.dtype for x in to_cast]
+
+    # If any of the arrays is of categorical dtype, then we will use it as a reference.
+    # If no such array exists, then we just return.
+    if any(is_categorical_dtype(t) for t in types):
+        cat_dtypes = []
+        for t in types:
+            if (
+                is_categorical_dtype(t)
+                and len(t.categories.values) > 0
+                and any(isna(t.categories.values) == False)
+            ):
+                categorical_values_dtype = t.categories.values.dtype
+                if all(
+                    is_categorical_dtype(x) or np.can_cast(categorical_values_dtype, x)
+                    for x in types
+                ):
+                    cat_dtypes.append(t)
+        if len(cat_dtypes) == 0 or any(
+            not is_dtype_equal(dtype, cat_dtypes[0]) for dtype in cat_dtypes[1:]
+        ):
+            return False
+    else:
+        return False
+
+    def categorical_contains_values(categorical_dtype, x):
+        unique_values = np.unique(x[~isna(x)])
+        if any(
+            val not in categorical_dtype.categories for val in unique_values.tolist()
+        ):
+            return False
+        return True
+
+    if not all(
+        categorical_contains_values(to_cast[0].dtype, other) or len(other) == 0
+        for other in to_cast[1:]
+    ):
+        raise ValueError(
+            "Cannot concat on a Categorical with a new category, set the categories first"
+        )
+
+    return True
+
+
 def concat_compat(to_concat, axis: int = 0, ea_compat_axis: bool = False):
     """
     provide concatenation of an array of arrays each of which is a single
@@ -108,8 +172,17 @@ def is_nonempty(x) -> bool:
         # we ignore axis here, as internally concatting with EAs is always
         # for axis=0
         if not single_dtype:
-            conv_types = [convert_dtypes(x) for x in to_concat]
-            target_dtype = find_common_type(conv_types)
+            # Special case for handling concat with categorical series.
+            # We need to make sure that categorical dtype is preserved
+            # when an array of valid values is given (GH#25383)
+            if (
+                isinstance(to_concat[0], ExtensionArray)
+                and all(x.shape[0] == 1 for x in to_concat[1:])
+                and _can_cast_to_categorical(to_concat)
+            ):
+                target_dtype = to_concat[0].dtype
+            else:
+                target_dtype = find_common_type([x.dtype for x in to_concat])
             to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
 
         if isinstance(to_concat[0], ExtensionArray):
diff --git a/pandas/tests/series/test_categorical.py b/pandas/tests/series/test_categorical.py
index a0ec9a1f8be98..058da1bcc16a2 100644
--- a/pandas/tests/series/test_categorical.py
+++ b/pandas/tests/series/test_categorical.py
@@ -1,39 +1,27 @@
+import pytest
+
+from pandas.core.dtypes.concat import _can_cast_to_categorical
+
 import pandas as pd
-from pandas import Categorical, Index
+from pandas import Categorical
 import pandas._testing as tm
 
 
 class TestCategoricalSeries:
-    def test_unused_category_retention(self):
-        # Init case
-        exp_cats = Index(["a", "b", "c", "d"])
-        ser = pd.Series(Categorical(["a", "b", "c"], categories=exp_cats))
-        tm.assert_index_equal(ser.cat.categories, exp_cats)
-
-        # Modify case
-        ser.loc[0] = "b"
-        expected = pd.Series(Categorical(["b", "b", "c"], categories=exp_cats))
-        tm.assert_index_equal(ser.cat.categories, exp_cats)
-        tm.assert_series_equal(ser, expected)
-
-    def test_loc_new_category_nan_value(self):
-        df = pd.DataFrame(
-            {
-                "int": [0, 1, 2],
-                "cat": Categorical(["a", "b", "c"], categories=["a", "b", "c"]),
-            }
-        )
-        df.loc[3] = [3, "d"]
+    def test_setitem_undefined_category_raises(self):
+        ser = pd.Series(Categorical(["a", "b", "c"]))
+        msg = r"Cannot setitem on a Categorical with a new category, set the categories first"
+        with pytest.raises(ValueError, match=msg):
+            ser.loc[2] = "d"
 
-        expected = pd.DataFrame(
-            {
-                "int": [0, 1, 2, 3],
-                "cat": Categorical(["a", "b", "c", pd.NA], categories=["a", "b", "c"]),
-            }
-        )
-        tm.assert_frame_equal(df, expected)
+    def test_concat_undefined_category_raises(self):
+        ser = pd.Series(Categorical(["a", "b", "c"]))
+        msg = r"Cannot concat on a Categorical with a new category, set the categories first"
+        with pytest.raises(ValueError, match=msg):
+            ser.loc[3] = "d"
 
-    def test_loc_new_row_category_dtype_retention(self):
+    def test_loc_category_dtype_retention(self):
+        # Case 1
         df = pd.DataFrame(
             {
                 "int": [0, 1, 2],
@@ -41,7 +29,6 @@ def test_loc_new_row_category_dtype_retention(self):
             }
         )
         df.loc[3] = [3, "c"]
-
         expected = pd.DataFrame(
             {
                 "int": [0, 1, 2, 3],
@@ -50,3 +37,72 @@ def test_loc_new_row_category_dtype_retention(self):
         )
 
         tm.assert_frame_equal(df, expected)
+
+        # Case 2
+        ser = pd.Series(Categorical(["a", "b", "c"]))
+        ser.loc[3] = "c"
+        expected = pd.Series(Categorical(["a", "b", "c", "c"]))
+        tm.assert_series_equal(ser, expected)
+
+        # Case 3
+        ser = pd.Series(Categorical([1, 2, 3]))
+        ser.loc[3] = 3
+        expected = pd.Series(Categorical([1, 2, 3, 3]))
+        tm.assert_series_equal(ser, expected)
+
+        # Case 4
+        ser = pd.Series(Categorical([1, 2, 3]))
+        ser.loc[3] = pd.NA
+        expected = pd.Series(Categorical([1, 2, 3, pd.NA]))
+        tm.assert_series_equal(ser, expected)
+
+    def test_can_cast_to_categorical(self):
+        # Case 1:
+        # Series of identical categorical dtype should
+        # be able to concat to categorical
+        ser1 = pd.Series(Categorical(["a", "b", "c"]))
+        ser2 = pd.Series(Categorical(["a", "b", "c"]))
+        arr = [ser1, ser2]
+        assert _can_cast_to_categorical(arr) == True
+
+        # Case 2:
+        # Series of non-identical categorical dtype should
+        # not be able to concat to categoorical
+        ser1 = pd.Series(Categorical(["a", "b", "c"]))
+        ser2 = pd.Series(Categorical(["a", "b", "d"]))
+        arr = [ser1, ser2]
+        assert _can_cast_to_categorical(arr) == False
+
+        # Concat of a categorical series with a series
+        # containing only values identical to the
+        # categorical values should be possible
+
+        # Case 3: For string categorical values
+        ser1 = pd.Series(Categorical(["a", "b", "c"]))
+        ser2 = pd.Series(["a", "a", "b"])
+        arr = [ser1, ser2]
+        assert _can_cast_to_categorical(arr) == True
+
+        # Case 4: For int categorical values
+        ser1 = pd.Series(Categorical([1, 2, 3]))
+        ser2 = pd.Series([1, 2])
+        arr = [ser1, ser2]
+        assert _can_cast_to_categorical(arr) == True
+
+        # The rest should raise because not all values
+        # are present in the categorical.
+
+        # Case 5
+        ser1 = pd.Series(Categorical([1, 2, 3]))
+        ser2 = pd.Series([3, 4])
+        arr = [ser1, ser2]
+        msg = r"Cannot concat on a Categorical with a new category, set the categories first"
+        with pytest.raises(ValueError, match=msg):
+            _can_cast_to_categorical(arr)
+
+        # Case 6
+        ser1 = pd.Series(Categorical(["a", "b", "c"]))
+        ser2 = pd.Series(["d", "e"])
+        arr = [ser1, ser2]
+        with pytest.raises(ValueError, match=msg):
+            _can_cast_to_categorical(arr)

From 6d45570b6fbbb1d492f3d8461e6f704eaaf1e168 Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Mon, 15 Feb 2021 11:24:31 +0200
Subject: [PATCH 42/60] Add format fixes

---
 pandas/core/dtypes/concat.py            |  5 +++--
 pandas/tests/series/test_categorical.py | 18 ++++++++++--------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 7bec779f6607c..e4d61be8c895c 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -91,7 +91,7 @@ def _can_cast_to_categorical(to_cast):
             if (
                 is_categorical_dtype(t)
                 and len(t.categories.values) > 0
-                and any(isna(t.categories.values) == False)
+                and any(~isna(t.categories.values))
             ):
                 categorical_values_dtype = t.categories.values.dtype
                 if all(
@@ -119,7 +119,8 @@ def categorical_contains_values(categorical_dtype, x):
         for other in to_cast[1:]
     ):
         raise ValueError(
-            "Cannot concat on a Categorical with a new category, set the categories first"
+            "Cannot concat on a Categorical with a new category, "
+            "set the categories first"
         )
 
     return True
diff --git a/pandas/tests/series/test_categorical.py b/pandas/tests/series/test_categorical.py
index 058da1bcc16a2..8f35290fdf396 100644
--- a/pandas/tests/series/test_categorical.py
+++ b/pandas/tests/series/test_categorical.py
@@ -10,13 +10,15 @@
 class TestCategoricalSeries:
     def test_setitem_undefined_category_raises(self):
         ser = pd.Series(Categorical(["a", "b", "c"]))
-        msg = r"Cannot setitem on a Categorical with a new category, set the categories first"
+        msg = "Cannot setitem on a Categorical with a new category, "\
+              "set the categories first"
         with pytest.raises(ValueError, match=msg):
             ser.loc[2] = "d"
 
     def test_concat_undefined_category_raises(self):
         ser = pd.Series(Categorical(["a", "b", "c"]))
-        msg = r"Cannot concat on a Categorical with a new category, set the categories first"
+        msg = "Cannot concat on a Categorical with a new category, "\
+              "set the categories first"
         with pytest.raises(ValueError, match=msg):
             ser.loc[3] = "d"
 
@@ -35,7 +37,6 @@ def test_loc_category_dtype_retention(self):
                 "cat": Categorical(["a", "b", "c", "c"], categories=["a", "b", "c"]),
             }
         )
-
         tm.assert_frame_equal(df, expected)
 
         # Case 2
@@ -63,7 +64,7 @@ def test_can_cast_to_categorical(self):
         ser1 = pd.Series(Categorical(["a", "b", "c"]))
         ser2 = pd.Series(Categorical(["a", "b", "c"]))
         arr = [ser1, ser2]
-        assert _can_cast_to_categorical(arr) == True
+        assert _can_cast_to_categorical(arr) is True
 
         # Case 2:
         # Series of non-identical categorical dtype should
@@ -71,7 +72,7 @@ def test_can_cast_to_categorical(self):
         ser1 = pd.Series(Categorical(["a", "b", "c"]))
         ser2 = pd.Series(Categorical(["a", "b", "d"]))
         arr = [ser1, ser2]
-        assert _can_cast_to_categorical(arr) == False
+        assert _can_cast_to_categorical(arr) is False
 
         # Concat of a categorical series with a series
         # containing only values identical to the
@@ -81,13 +82,13 @@ def test_can_cast_to_categorical(self):
         ser1 = pd.Series(Categorical(["a", "b", "c"]))
         ser2 = pd.Series(["a", "a", "b"])
         arr = [ser1, ser2]
-        assert _can_cast_to_categorical(arr) == True
+        assert _can_cast_to_categorical(arr) is True
 
         # Case 4: For int categorical values
         ser1 = pd.Series(Categorical([1, 2, 3]))
         ser2 = pd.Series([1, 2])
         arr = [ser1, ser2]
-        assert _can_cast_to_categorical(arr) == True
+        assert _can_cast_to_categorical(arr) is True
 
         # The rest should raise because not all values
         # are present in the categorical.
@@ -96,7 +97,8 @@ def test_can_cast_to_categorical(self):
         ser1 = pd.Series(Categorical([1, 2, 3]))
         ser2 = pd.Series([3, 4])
         arr = [ser1, ser2]
-        msg = r"Cannot concat on a Categorical with a new category, set the categories first"
+        msg = "Cannot concat on a Categorical with a new category, "\
+              "set the categories first"
         with pytest.raises(ValueError, match=msg):
             _can_cast_to_categorical(arr)
 

From 31612ed4bde8302a36f41e305a2abcc8adc4bc41 Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Mon, 15 Feb 2021 11:31:04 +0200
Subject: [PATCH 43/60] Update test_categorical.py

---
 pandas/tests/series/test_categorical.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/series/test_categorical.py b/pandas/tests/series/test_categorical.py
index 8f35290fdf396..44aa318c1d3d6 100644
--- a/pandas/tests/series/test_categorical.py
+++ b/pandas/tests/series/test_categorical.py
@@ -10,15 +10,19 @@
 class TestCategoricalSeries:
     def test_setitem_undefined_category_raises(self):
         ser = pd.Series(Categorical(["a", "b", "c"]))
-        msg = "Cannot setitem on a Categorical with a new category, "\
-              "set the categories first"
+        msg = (
+            "Cannot setitem on a Categorical with a new category, "
+            "set the categories first"
+        )
         with pytest.raises(ValueError, match=msg):
             ser.loc[2] = "d"
 
     def test_concat_undefined_category_raises(self):
         ser = pd.Series(Categorical(["a", "b", "c"]))
-        msg = "Cannot concat on a Categorical with a new category, "\
-              "set the categories first"
+        msg = (
+            "Cannot concat on a Categorical with a new category, "
+            "set the categories first"
+        )
         with pytest.raises(ValueError, match=msg):
             ser.loc[3] = "d"
 
@@ -97,8 +101,10 @@ def test_can_cast_to_categorical(self):
         ser1 = pd.Series(Categorical([1, 2, 3]))
         ser2 = pd.Series([3, 4])
         arr = [ser1, ser2]
-        msg = "Cannot concat on a Categorical with a new category, "\
-              "set the categories first"
+        msg = (
+            "Cannot concat on a Categorical with a new category, "
+            "set the categories first"
+        )
         with pytest.raises(ValueError, match=msg):
             _can_cast_to_categorical(arr)
 

From 6d9e6670874ca503389df9308fe59fd55affe5ef Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Sat, 20 Feb 2021 13:58:49 +0200
Subject: [PATCH 44/60] Update

---
 pandas/core/dtypes/cast.py              | 26 +++++++-
 pandas/core/dtypes/concat.py            | 87 +++++--------------------
 pandas/tests/series/test_categorical.py | 58 +----------------
 3 files changed, 41 insertions(+), 130 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 669bfe08d42b0..deb9dec8fbb6a 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1639,13 +1639,14 @@ def sanitize_to_nanoseconds(values: np.ndarray) -> np.ndarray:
     return values
 
 
-def find_common_type(types: List[DtypeObj]) -> DtypeObj:
+def find_common_type(types: List[DtypeObj], prio_cat_dtype: bool = False) -> DtypeObj:
     """
     Find a common data type among the given dtypes.
 
     Parameters
     ----------
     types : list of dtypes
+    prio_cat_dtype: set priority towards finding a categorical dtype
 
     Returns
     -------
@@ -1661,6 +1662,29 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj:
 
     first = types[0]
 
+    # categorical dtypes should not be casted to a new dtype
+    # if priority is set accodring to prio_cat_dtype
+    if prio_cat_dtype:
+        if any(is_categorical_dtype(t) for t in types):
+            cat_dtypes = []
+            for t in types:
+                if (
+                    is_categorical_dtype(t)
+                    and len(t.categories.values) > 0
+                    and any(~isna(t.categories.values))
+                ):
+                    categorical_values_dtype = t.categories.values.dtype
+                    if all(
+                        is_categorical_dtype(x)
+                        or np.can_cast(categorical_values_dtype, x)
+                        for x in types
+                    ):
+                        cat_dtypes.append(t)
+            if len(cat_dtypes) > 0:
+                dtype_ref = cat_dtypes[0]
+                if all(is_dtype_equal(dtype, dtype_ref) for dtype in cat_dtypes[1:]):
+                    return dtype_ref
+
     # workaround for find_common_type([np.dtype('datetime64[ns]')] * 2)
     # => object
     if all(is_dtype_equal(first, t) for t in types[1:]):
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index b8aa163fc027f..644fa4b75ca87 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -21,6 +21,7 @@
     ABCCategoricalIndex,
     ABCSeries,
 )
+from pandas.core.dtypes.missing import isna
 
 from pandas.core.arrays import ExtensionArray
 from pandas.core.arrays.sparse import SparseArray
@@ -35,6 +36,14 @@ def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
     Helper function for `arr.astype(common_dtype)` but handling all special
     cases.
     """
+    if is_categorical_dtype(dtype):
+        unique_values = np.unique(arr[~isna(arr)])
+        if any(val not in dtype.categories for val in unique_values.tolist()):
+            raise ValueError(
+                "Cannot setitem on a Categorical with a new category, "
+                "set the categories first"
+            )
+
     if (
         is_categorical_dtype(arr.dtype)
         and isinstance(dtype, np.dtype)
@@ -70,70 +79,6 @@ def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
     return arr.astype(dtype, copy=False)
 
 
-def _can_cast_to_categorical(to_cast):
-    """
-    Evaluates if a list of arrays can be casted to a single categorical dtype.
-    The categorical dtype to cast to, is determined by any of the arrays which
-    is already of categorical dtype. If no such array exists, or if the existing
-    categorical dtype does not contain any of the unique values of the other arrays,
-    then it will return False.
-
-    Parameters
-    ----------
-    to_cast : array of arrays
-
-    Returns
-    -------
-    True if possible to cast to a single categorical dtype, False otherwise.
-    """
-    if len(to_cast) == 0:
-        raise ValueError("No arrays to cast")
-
-    types = [x.dtype for x in to_cast]
-
-    # If any of the arrays is of categorical dtype, then we will use it as a reference.
-    # If no such array exists, then we just return.
-    if any(is_categorical_dtype(t) for t in types):
-        cat_dtypes = []
-        for t in types:
-            if (
-                is_categorical_dtype(t)
-                and len(t.categories.values) > 0
-                and any(~isna(t.categories.values))
-            ):
-                categorical_values_dtype = t.categories.values.dtype
-                if all(
-                    is_categorical_dtype(x) or np.can_cast(categorical_values_dtype, x)
-                    for x in types
-                ):
-                    cat_dtypes.append(t)
-        if len(cat_dtypes) == 0 or any(
-            not is_dtype_equal(dtype, cat_dtypes[0]) for dtype in cat_dtypes[1:]
-        ):
-            return False
-    else:
-        return False
-
-    def categorical_contains_values(categorical_dtype, x):
-        unique_values = np.unique(x[~isna(x)])
-        if any(
-            val not in categorical_dtype.categories for val in unique_values.tolist()
-        ):
-            return False
-        return True
-
-    if not all(
-        categorical_contains_values(to_cast[0].dtype, other) or len(other) == 0
-        for other in to_cast[1:]
-    ):
-        raise ValueError(
-            "Cannot concat on a Categorical with a new category, "
-            "set the categories first"
-        )
-
-    return True
-
-
 def concat_compat(to_concat, axis: int = 0, ea_compat_axis: bool = False):
     """
     provide concatenation of an array of arrays each of which is a single
@@ -184,14 +129,12 @@ def is_nonempty(x) -> bool:
             # Special case for handling concat with categorical series.
             # We need to make sure that categorical dtype is preserved
             # when an array of valid values is given (GH#25383)
-            if (
-                isinstance(to_concat[0], ExtensionArray)
-                and all(x.shape[0] == 1 for x in to_concat[1:])
-                and _can_cast_to_categorical(to_concat)
-            ):
-                target_dtype = to_concat[0].dtype
-            else:
-                target_dtype = find_common_type([x.dtype for x in to_concat])
+            use_index_expansion = len(to_concat) == 2 and all(
+                x.shape[0] == 1 for x in to_concat[1:]
+            )
+            target_dtype = find_common_type(
+                [x.dtype for x in to_concat], prio_cat_dtype=use_index_expansion
+            )
             to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
 
         if isinstance(to_concat[0], ExtensionArray):
diff --git a/pandas/tests/series/test_categorical.py b/pandas/tests/series/test_categorical.py
index 44aa318c1d3d6..ab5afeaef7381 100644
--- a/pandas/tests/series/test_categorical.py
+++ b/pandas/tests/series/test_categorical.py
@@ -1,7 +1,5 @@
 import pytest
 
-from pandas.core.dtypes.concat import _can_cast_to_categorical
-
 import pandas as pd
 from pandas import Categorical
 import pandas._testing as tm
@@ -20,7 +18,7 @@ def test_setitem_undefined_category_raises(self):
     def test_concat_undefined_category_raises(self):
         ser = pd.Series(Categorical(["a", "b", "c"]))
         msg = (
-            "Cannot concat on a Categorical with a new category, "
+            "Cannot setitem on a Categorical with a new category, "
             "set the categories first"
         )
         with pytest.raises(ValueError, match=msg):
@@ -60,57 +58,3 @@ def test_loc_category_dtype_retention(self):
         ser.loc[3] = pd.NA
         expected = pd.Series(Categorical([1, 2, 3, pd.NA]))
         tm.assert_series_equal(ser, expected)
-
-    def test_can_cast_to_categorical(self):
-        # Case 1:
-        # Series of identical categorical dtype should
-        # be able to concat to categorical
-        ser1 = pd.Series(Categorical(["a", "b", "c"]))
-        ser2 = pd.Series(Categorical(["a", "b", "c"]))
-        arr = [ser1, ser2]
-        assert _can_cast_to_categorical(arr) is True
-
-        # Case 2:
-        # Series of non-identical categorical dtype should
-        # not be able to concat to categoorical
-        ser1 = pd.Series(Categorical(["a", "b", "c"]))
-        ser2 = pd.Series(Categorical(["a", "b", "d"]))
-        arr = [ser1, ser2]
-        assert _can_cast_to_categorical(arr) is False
-
-        # Concat of a categorical series with a series
-        # containing only values identical to the
-        # categorical values should be possible
-
-        # Case 3: For string categorical values
-        ser1 = pd.Series(Categorical(["a", "b", "c"]))
-        ser2 = pd.Series(["a", "a", "b"])
-        arr = [ser1, ser2]
-        assert _can_cast_to_categorical(arr) is True
-
-        # Case 4: For int categorical values
-        ser1 = pd.Series(Categorical([1, 2, 3]))
-        ser2 = pd.Series([1, 2])
-        arr = [ser1, ser2]
-        assert _can_cast_to_categorical(arr) is True
-
-        # The rest should raise because not all values
-        # are present in the categorical.
-
-        # Case 5
-        ser1 = pd.Series(Categorical([1, 2, 3]))
-        ser2 = pd.Series([3, 4])
-        arr = [ser1, ser2]
-        msg = (
-            "Cannot concat on a Categorical with a new category, "
-            "set the categories first"
-        )
-        with pytest.raises(ValueError, match=msg):
-            _can_cast_to_categorical(arr)
-
-        # Case 6
-        ser1 = pd.Series(Categorical(["a", "b", "c"]))
-        ser2 = pd.Series(["d", "e"])
-        arr = [ser1, ser2]
-        with pytest.raises(ValueError, match=msg):
-            _can_cast_to_categorical(arr)

From e0da6558ea6e72386c43cb946908337031408fd4 Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Sat, 20 Feb 2021 16:31:29 +0200
Subject: [PATCH 45/60] Use prio_cat_dtype only for EAs

---
 pandas/core/dtypes/concat.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 644fa4b75ca87..aac8b505d2596 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -37,6 +37,8 @@ def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
     cases.
     """
     if is_categorical_dtype(dtype):
+        # if casting an array to a categorical dtype, then we need to ensure
+        # that its unique values are predefined as categories in that dtype
         unique_values = np.unique(arr[~isna(arr)])
         if any(val not in dtype.categories for val in unique_values.tolist()):
             raise ValueError(
@@ -121,23 +123,21 @@ def is_nonempty(x) -> bool:
     all_empty = not len(non_empties)
     single_dtype = len({x.dtype for x in to_concat}) == 1
     any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat)
+    first_ea = isinstance(to_concat[0], ExtensionArray)
+    arr_index_expansion = (
+        first_ea and len(to_concat) == 2 and to_concat[1].shape[0] == 1
+    )
 
     if any_ea:
         # we ignore axis here, as internally concatting with EAs is always
         # for axis=0
         if not single_dtype:
-            # Special case for handling concat with categorical series.
-            # We need to make sure that categorical dtype is preserved
-            # when an array of valid values is given (GH#25383)
-            use_index_expansion = len(to_concat) == 2 and all(
-                x.shape[0] == 1 for x in to_concat[1:]
-            )
             target_dtype = find_common_type(
-                [x.dtype for x in to_concat], prio_cat_dtype=use_index_expansion
+                [x.dtype for x in to_concat], prio_cat_dtype=arr_index_expansion
             )
             to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
 
-        if isinstance(to_concat[0], ExtensionArray):
+        if first_ea:
             cls = type(to_concat[0])
             return cls._concat_same_type(to_concat)
         else:

From 92d1f143989e3ec2399de0fee2f05e2049040068 Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Sat, 20 Feb 2021 17:37:47 +0200
Subject: [PATCH 46/60] Revert usage of first_ea

---
 pandas/core/dtypes/concat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index aac8b505d2596..8e268689fb959 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -137,7 +137,7 @@ def is_nonempty(x) -> bool:
             )
             to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
 
-        if first_ea:
+        if isinstance(to_concat[0], ExtensionArray):
             cls = type(to_concat[0])
             return cls._concat_same_type(to_concat)
         else:

From 9b9b382e22fc0698894cafb02e0153608d3fbe1e Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Sat, 20 Feb 2021 19:24:50 +0200
Subject: [PATCH 47/60] Fix mypy errors

---
 pandas/core/dtypes/cast.py   | 21 +++++++++------------
 pandas/core/dtypes/concat.py |  3 ++-
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index deb9dec8fbb6a..c9eeb8c9bce22 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -88,6 +88,7 @@
     is_unsigned_integer_dtype,
 )
 from pandas.core.dtypes.dtypes import (
+    CategoricalDtype,
     DatetimeTZDtype,
     ExtensionDtype,
     IntervalDtype,
@@ -1668,18 +1669,14 @@ def find_common_type(types: List[DtypeObj], prio_cat_dtype: bool = False) -> Dty
         if any(is_categorical_dtype(t) for t in types):
             cat_dtypes = []
             for t in types:
-                if (
-                    is_categorical_dtype(t)
-                    and len(t.categories.values) > 0
-                    and any(~isna(t.categories.values))
-                ):
-                    categorical_values_dtype = t.categories.values.dtype
-                    if all(
-                        is_categorical_dtype(x)
-                        or np.can_cast(categorical_values_dtype, x)
-                        for x in types
-                    ):
-                        cat_dtypes.append(t)
+                if isinstance(t, CategoricalDtype):
+                    if any(~isna(t.categories.values)):
+                        cat_values_dtype = t.categories.values.dtype
+                        if all(
+                            is_categorical_dtype(x) or np.can_cast(cat_values_dtype, x)
+                            for x in types
+                        ):
+                            cat_dtypes.append(t)
             if len(cat_dtypes) > 0:
                 dtype_ref = cat_dtypes[0]
                 if all(is_dtype_equal(dtype, dtype_ref) for dtype in cat_dtypes[1:]):
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 8e268689fb959..60a5a0b9d0e15 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -17,6 +17,7 @@
     is_extension_array_dtype,
     is_sparse,
 )
+from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.generic import (
     ABCCategoricalIndex,
     ABCSeries,
@@ -36,7 +37,7 @@ def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
     Helper function for `arr.astype(common_dtype)` but handling all special
     cases.
     """
-    if is_categorical_dtype(dtype):
+    if isinstance(dtype, CategoricalDtype):
         # if casting an array to a categorical dtype, then we need to ensure
         # that its unique values are predefined as categories in that dtype
         unique_values = np.unique(arr[~isna(arr)])

From d3df994e91e9d1265f1c2d0e1ee2250944736947 Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Sat, 20 Feb 2021 20:12:40 +0200
Subject: [PATCH 48/60] Use unique1d in _cast_to_common_type

---
 pandas/core/dtypes/concat.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 60a5a0b9d0e15..b822e64a81f86 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -10,6 +10,7 @@
     DtypeObj,
 )
 
+from pandas.core.algorithms import unique1d
 from pandas.core.dtypes.cast import find_common_type
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
@@ -40,7 +41,7 @@ def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
     if isinstance(dtype, CategoricalDtype):
         # if casting an array to a categorical dtype, then we need to ensure
         # that its unique values are predefined as categories in that dtype
-        unique_values = np.unique(arr[~isna(arr)])
+        unique_values = unique1d(arr[~isna(arr)])
         if any(val not in dtype.categories for val in unique_values.tolist()):
             raise ValueError(
                 "Cannot setitem on a Categorical with a new category, "

From 41aa9e37bb6686b7069c9ad75e9a52f4c6362baf Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Sat, 20 Feb 2021 20:33:39 +0200
Subject: [PATCH 49/60] Fix isort error

---
 pandas/core/dtypes/concat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index b822e64a81f86..dc841bf050798 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -10,7 +10,6 @@
     DtypeObj,
 )
 
-from pandas.core.algorithms import unique1d
 from pandas.core.dtypes.cast import find_common_type
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
@@ -25,6 +24,7 @@
 )
 from pandas.core.dtypes.missing import isna
 
+from pandas.core.algorithms import unique1d
 from pandas.core.arrays import ExtensionArray
 from pandas.core.arrays.sparse import SparseArray
 from pandas.core.construction import (

From ca0eb1fbd009121964ed80e761eb6f98eac88573 Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Sat, 20 Feb 2021 22:19:17 +0200
Subject: [PATCH 50/60] Renamed input variable for find_common_type

---
 pandas/core/dtypes/cast.py   | 14 +++++++++-----
 pandas/core/dtypes/concat.py |  2 +-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index c9eeb8c9bce22..cb971f28e6a55 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1640,14 +1640,16 @@ def sanitize_to_nanoseconds(values: np.ndarray) -> np.ndarray:
     return values
 
 
-def find_common_type(types: List[DtypeObj], prio_cat_dtype: bool = False) -> DtypeObj:
+def find_common_type(
+    types: List[DtypeObj], promote_categorical: bool = False
+) -> DtypeObj:
     """
     Find a common data type among the given dtypes.
 
     Parameters
     ----------
     types : list of dtypes
-    prio_cat_dtype: set priority towards finding a categorical dtype
+    promote_categorical : find if possible, a categorical dtype that fits all the dtypes
 
     Returns
     -------
@@ -1663,9 +1665,11 @@ def find_common_type(types: List[DtypeObj], prio_cat_dtype: bool = False) -> Dty
 
     first = types[0]
 
-    # categorical dtypes should not be casted to a new dtype
-    # if priority is set accodring to prio_cat_dtype
-    if prio_cat_dtype:
+    # We will first try to find a common categorical dtype
+    # if promote_categorical is set to True. This is used
+    # to preserve the categorical dtype (since categorical
+    # values can consist of multiple dtypes).
+    if promote_categorical:
         if any(is_categorical_dtype(t) for t in types):
             cat_dtypes = []
             for t in types:
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index dc841bf050798..73f0d70c1f5f7 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -135,7 +135,7 @@ def is_nonempty(x) -> bool:
         # for axis=0
         if not single_dtype:
             target_dtype = find_common_type(
-                [x.dtype for x in to_concat], prio_cat_dtype=arr_index_expansion
+                [x.dtype for x in to_concat], promote_categorical=arr_index_expansion
             )
             to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
 

From 931d6c804786c23c128781a8e57e6d04731a47a7 Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Sun, 7 Mar 2021 19:31:40 +0200
Subject: [PATCH 51/60] Remove new argument in find_common_type

---
 pandas/core/dtypes/cast.py              | 43 +++++++++++++------------
 pandas/core/dtypes/concat.py            |  8 +----
 pandas/tests/series/test_categorical.py |  6 ----
 3 files changed, 24 insertions(+), 33 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 2c1a6e7b5f710..5ad5d7865429e 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1769,16 +1769,13 @@ def ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj:
     return dtype
 
 
-def find_common_type(
-    types: List[DtypeObj], promote_categorical: bool = False
-) -> DtypeObj:
+def find_common_type(types: List[DtypeObj]) -> DtypeObj:
     """
     Find a common data type among the given dtypes.
 
     Parameters
     ----------
     types : list of dtypes
-    promote_categorical : find if possible, a categorical dtype that fits all the dtypes
 
     Returns
     -------
@@ -1798,22 +1795,28 @@ def find_common_type(
     # if promote_categorical is set to True. This is used
     # to preserve the categorical dtype (since categorical
     # values can consist of multiple dtypes).
-    if promote_categorical:
-        if any(is_categorical_dtype(t) for t in types):
-            cat_dtypes = []
-            for t in types:
-                if isinstance(t, CategoricalDtype):
-                    if any(~isna(t.categories.values)):
-                        cat_values_dtype = t.categories.values.dtype
-                        if all(
-                            is_categorical_dtype(x) or np.can_cast(cat_values_dtype, x)
-                            for x in types
-                        ):
-                            cat_dtypes.append(t)
-            if len(cat_dtypes) > 0:
-                dtype_ref = cat_dtypes[0]
-                if all(is_dtype_equal(dtype, dtype_ref) for dtype in cat_dtypes[1:]):
-                    return dtype_ref
+    if any(is_categorical_dtype(t) for t in types):
+        cat_dtypes = []
+        for t in types:
+            if isinstance(t, CategoricalDtype):
+                if any(~isna(t.categories.values)):
+                    cat_values_dtype = t.categories.values.dtype
+                    if all(
+                        (
+                            is_categorical_dtype(x)
+                            or (
+                                is_numeric_dtype(cat_values_dtype)
+                                and is_numeric_dtype(x)
+                            )
+                            or np.can_cast(x, cat_values_dtype)
+                        )
+                        for x in types
+                    ):
+                        cat_dtypes.append(t)
+        if len(cat_dtypes) > 0:
+            dtype_ref = cat_dtypes[0]
+            if all(is_dtype_equal(dtype, dtype_ref) for dtype in cat_dtypes[1:]):
+                return dtype_ref
 
     # workaround for find_common_type([np.dtype('datetime64[ns]')] * 2)
     # => object
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index c01e0d5f0f6d3..f6049dc8d295d 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -125,18 +125,12 @@ def is_nonempty(x) -> bool:
     all_empty = not len(non_empties)
     single_dtype = len({x.dtype for x in to_concat}) == 1
     any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat)
-    first_ea = isinstance(to_concat[0], ExtensionArray)
-    arr_index_expansion = (
-        first_ea and len(to_concat) == 2 and to_concat[1].shape[0] == 1
-    )
 
     if any_ea:
         # we ignore axis here, as internally concatting with EAs is always
         # for axis=0
         if not single_dtype:
-            target_dtype = find_common_type(
-                [x.dtype for x in to_concat], promote_categorical=arr_index_expansion
-            )
+            target_dtype = find_common_type([x.dtype for x in to_concat])
             to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
 
         if isinstance(to_concat[0], ExtensionArray):
diff --git a/pandas/tests/series/test_categorical.py b/pandas/tests/series/test_categorical.py
index ab5afeaef7381..9dba345c3db36 100644
--- a/pandas/tests/series/test_categorical.py
+++ b/pandas/tests/series/test_categorical.py
@@ -52,9 +52,3 @@ def test_loc_category_dtype_retention(self):
         ser.loc[3] = 3
         expected = pd.Series(Categorical([1, 2, 3, 3]))
         tm.assert_series_equal(ser, expected)
-
-        # Case 4
-        ser = pd.Series(Categorical([1, 2, 3]))
-        ser.loc[3] = pd.NA
-        expected = pd.Series(Categorical([1, 2, 3, pd.NA]))
-        tm.assert_series_equal(ser, expected)

From 8065ddb51af54b61e56f7b9e683f95a790d62f55 Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Sat, 13 Mar 2021 13:16:17 +0200
Subject: [PATCH 52/60] Add check to _get_common_dtype

---
 pandas/core/dtypes/cast.py   | 28 ----------------------------
 pandas/core/dtypes/dtypes.py | 19 +++++++++++++++++++
 2 files changed, 19 insertions(+), 28 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 5ad5d7865429e..6eca89e1a8744 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -88,7 +88,6 @@
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import (
-    CategoricalDtype,
     DatetimeTZDtype,
     ExtensionDtype,
     IntervalDtype,
@@ -1791,33 +1790,6 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj:
 
     first = types[0]
 
-    # We will first try to find a common categorical dtype
-    # if promote_categorical is set to True. This is used
-    # to preserve the categorical dtype (since categorical
-    # values can consist of multiple dtypes).
-    if any(is_categorical_dtype(t) for t in types):
-        cat_dtypes = []
-        for t in types:
-            if isinstance(t, CategoricalDtype):
-                if any(~isna(t.categories.values)):
-                    cat_values_dtype = t.categories.values.dtype
-                    if all(
-                        (
-                            is_categorical_dtype(x)
-                            or (
-                                is_numeric_dtype(cat_values_dtype)
-                                and is_numeric_dtype(x)
-                            )
-                            or np.can_cast(x, cat_values_dtype)
-                        )
-                        for x in types
-                    ):
-                        cat_dtypes.append(t)
-        if len(cat_dtypes) > 0:
-            dtype_ref = cat_dtypes[0]
-            if all(is_dtype_equal(dtype, dtype_ref) for dtype in cat_dtypes[1:]):
-                return dtype_ref
-
     # workaround for find_common_type([np.dtype('datetime64[ns]')] * 2)
     # => object
     if all(is_dtype_equal(first, t) for t in types[1:]):
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index da3a9269cf2c4..76691108f7dc5 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -614,6 +614,25 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
         elif any(non_init_cats):
             return None
 
+        # case for compatible dtypes with categories.dtype
+        from pandas.core.dtypes.common import is_dtype_equal, is_extension_array_dtype
+
+        non_identical_cat_dtype = [
+            isinstance(x, CategoricalDtype) and x != self for x in dtypes
+        ]
+        if not any(non_identical_cat_dtype):
+            non_cat_dtypes_compat = [
+                isinstance(x, CategoricalDtype)
+                or is_dtype_equal(x, self.categories.dtype)
+                or (
+                    not is_extension_array_dtype(x)
+                    and np.can_cast(x, self.categories.dtype)
+                )
+                for x in dtypes
+            ]
+            if all(non_cat_dtypes_compat):
+                return self
+
         # categorical is aware of Sparse -> extract sparse subdtypes
         dtypes = [x.subtype if isinstance(x, SparseDtype) else x for x in dtypes]
         # extract the categories' dtype

From b21326b39ca46d4930d1618122af4a66cda4c9cf Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Sat, 13 Mar 2021 13:30:39 +0200
Subject: [PATCH 53/60] Update dtypes.py

---
 pandas/core/dtypes/dtypes.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 3eb1cbee3f9ba..b5f849261bf5c 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -623,8 +623,11 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
             return None
 
         # case for compatible dtypes with categories.dtype
-        from pandas.core.dtypes.common import is_dtype_equal, is_extension_array_dtype
-
+        from pandas.core.dtypes.common import (
+            is_dtype_equal, 
+            is_extension_array_dtype
+        )
+        
         non_identical_cat_dtype = [
             isinstance(x, CategoricalDtype) and x != self for x in dtypes
         ]

From 335fc0695c938599bfcbfbdc3d960cf6ab6ac62f Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Sat, 13 Mar 2021 13:48:05 +0200
Subject: [PATCH 54/60] Update dtypes.py

---
 pandas/core/dtypes/dtypes.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index b5f849261bf5c..3eb1cbee3f9ba 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -623,11 +623,8 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
             return None
 
         # case for compatible dtypes with categories.dtype
-        from pandas.core.dtypes.common import (
-            is_dtype_equal, 
-            is_extension_array_dtype
-        )
-        
+        from pandas.core.dtypes.common import is_dtype_equal, is_extension_array_dtype
+
         non_identical_cat_dtype = [
             isinstance(x, CategoricalDtype) and x != self for x in dtypes
         ]

From 950dcc4ed6db7517b64404f3e3c1bbad8d8f0eb0 Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Sat, 13 Mar 2021 14:03:28 +0200
Subject: [PATCH 55/60] Update dtypes.py

---
 pandas/core/dtypes/dtypes.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 3eb1cbee3f9ba..e9579ce0276bc 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -42,6 +42,10 @@
     ExtensionDtype,
     register_extension_dtype,
 )
+from pandas.core.dtypes.common import (
+    is_dtype_equal,
+    is_extension_array_dtype,
+)
 from pandas.core.dtypes.generic import (
     ABCCategoricalIndex,
     ABCIndex,
@@ -623,8 +627,6 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
             return None
 
         # case for compatible dtypes with categories.dtype
-        from pandas.core.dtypes.common import is_dtype_equal, is_extension_array_dtype
-
         non_identical_cat_dtype = [
             isinstance(x, CategoricalDtype) and x != self for x in dtypes
         ]

From 2ee1df89d8b18762fcc6d83895c53ed3d973cad3 Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Sat, 13 Mar 2021 14:43:10 +0200
Subject: [PATCH 56/60] Update dtypes.py

---
 pandas/core/dtypes/dtypes.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index e9579ce0276bc..30ea2b7d3ac23 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -42,10 +42,6 @@
     ExtensionDtype,
     register_extension_dtype,
 )
-from pandas.core.dtypes.common import (
-    is_dtype_equal,
-    is_extension_array_dtype,
-)
 from pandas.core.dtypes.generic import (
     ABCCategoricalIndex,
     ABCIndex,
@@ -633,9 +629,9 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
         if not any(non_identical_cat_dtype):
             non_cat_dtypes_compat = [
                 isinstance(x, CategoricalDtype)
-                or is_dtype_equal(x, self.categories.dtype)
+                or x == self.categories.dtype
                 or (
-                    not is_extension_array_dtype(x)
+                    not isinstance(x, ExtensionDtype)
                     and np.can_cast(x, self.categories.dtype)
                 )
                 for x in dtypes

From 17120f0f0fc46c074162287b5ddc9bf0d24370ea Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Sat, 13 Mar 2021 17:39:08 +0200
Subject: [PATCH 57/60] Test

---
 pandas/core/indexes/category.py | 43 +++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 62941a23c6459..e9a4cc32af588 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -620,6 +620,49 @@ def map(self, mapper):
         mapped = self._values.map(mapper)
         return Index(mapped, name=self.name)
 
+    def insert(self, loc: int, item):
+        """
+        Make new Index inserting new item at location. Follows
+        Python list.append semantics for negative values.
+
+        Parameters
+        ----------
+        loc : int
+        item : object
+
+        Returns
+        -------
+        new_index : Index
+
+        Raises
+        ------
+        ValueError if the item is not valid for this dtype.
+        """
+        from pandas.core.dtypes.cast import (
+            find_common_type,
+            infer_dtype_from,
+        )
+        arr = self._data
+        try:
+            code = arr._validate_scalar(item)
+        except (ValueError, TypeError):
+            # e.g. trying to insert an integer into a DatetimeIndex
+            #  We cannot keep the same dtype, so cast to the (often object)
+            #  minimal shared dtype before doing the insert.
+            dtype, _ = infer_dtype_from(item, pandas_dtype=True)
+            dtype = find_common_type([self.dtype.categories.dtype, dtype])
+            return self.astype(dtype).insert(loc, item)
+        else:
+            new_vals = np.concatenate(
+                (
+                    arr._ndarray[:loc],
+                    np.asarray([code], dtype=arr._ndarray.dtype),
+                    arr._ndarray[loc:],
+                )
+            )
+            new_arr = arr._from_backing_data(new_vals)
+            return type(self)._simple_new(new_arr, name=self.name)
+
     def _concat(self, to_concat: List[Index], name: Hashable) -> Index:
         # if calling index is category, don't check dtype of others
         try:

From 439b49f6ea976af1e9fec15cad0456f125601ef5 Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Sat, 13 Mar 2021 20:57:11 +0200
Subject: [PATCH 58/60] Add flag in get_common_type

---
 pandas/core/dtypes/cast.py      |  8 +++++-
 pandas/core/dtypes/concat.py    |  4 ++-
 pandas/core/dtypes/dtypes.py    | 16 ++++++------
 pandas/core/indexes/category.py | 43 ---------------------------------
 4 files changed, 17 insertions(+), 54 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 44650500e0f65..f5db5c8f2f981 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1849,7 +1849,9 @@ def ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj:
     return dtype
 
 
-def find_common_type(types: List[DtypeObj]) -> DtypeObj:
+def find_common_type(
+    types: List[DtypeObj], downcast_cat_dtype: Optional[bool] = True
+) -> DtypeObj:
     """
     Find a common data type among the given dtypes.
 
@@ -1876,6 +1878,10 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj:
     if all(is_dtype_equal(first, t) for t in types[1:]):
         return first
 
+    # downcast categorical to the dtype of their categories
+    if downcast_cat_dtype and not all(is_categorical_dtype(t) for t in types):
+        types = [t.categories.dtype if is_categorical_dtype(t) else t for t in types]
+
     # get unique types (dict.fromkeys is used as order-preserving set())
     types = list(dict.fromkeys(types).keys())
 
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index f0265ecbe06b9..6110a442da008 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -134,7 +134,9 @@ def is_nonempty(x) -> bool:
         # we ignore axis here, as internally concatting with EAs is always
         # for axis=0
         if not single_dtype:
-            target_dtype = find_common_type([x.dtype for x in to_concat])
+            target_dtype = find_common_type(
+                [x.dtype for x in to_concat], downcast_cat_dtype=False
+            )
             to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
 
         if isinstance(to_concat[0], ExtensionArray):
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 30ea2b7d3ac23..a27d9599158e5 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -605,7 +605,6 @@ def _is_boolean(self) -> bool:
 
     def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
         from pandas.core.arrays.sparse import SparseDtype
-
         # check if we have all categorical dtype with identical categories
         if all(isinstance(x, CategoricalDtype) for x in dtypes):
             first = dtypes[0]
@@ -627,16 +626,15 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
             isinstance(x, CategoricalDtype) and x != self for x in dtypes
         ]
         if not any(non_identical_cat_dtype):
+            non_cat_dtypes = [
+                x.categories.dtype if isinstance(x, CategoricalDtype) else x for x in dtypes
+            ]
             non_cat_dtypes_compat = [
-                isinstance(x, CategoricalDtype)
-                or x == self.categories.dtype
-                or (
-                    not isinstance(x, ExtensionDtype)
-                    and np.can_cast(x, self.categories.dtype)
-                )
-                for x in dtypes
+                not isinstance(x, ExtensionDtype)
+                and np.can_cast(x, self.categories.dtype)
+                for x in non_cat_dtypes
             ]
-            if all(non_cat_dtypes_compat):
+            if all(non_cat_dtypes_compat) and self.categories is not None and len(self.categories) > 0:
                 return self
 
         # categorical is aware of Sparse -> extract sparse subdtypes
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index e9a4cc32af588..62941a23c6459 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -620,49 +620,6 @@ def map(self, mapper):
         mapped = self._values.map(mapper)
         return Index(mapped, name=self.name)
 
-    def insert(self, loc: int, item):
-        """
-        Make new Index inserting new item at location. Follows
-        Python list.append semantics for negative values.
-
-        Parameters
-        ----------
-        loc : int
-        item : object
-
-        Returns
-        -------
-        new_index : Index
-
-        Raises
-        ------
-        ValueError if the item is not valid for this dtype.
-        """
-        from pandas.core.dtypes.cast import (
-            find_common_type,
-            infer_dtype_from,
-        )
-        arr = self._data
-        try:
-            code = arr._validate_scalar(item)
-        except (ValueError, TypeError):
-            # e.g. trying to insert an integer into a DatetimeIndex
-            #  We cannot keep the same dtype, so cast to the (often object)
-            #  minimal shared dtype before doing the insert.
-            dtype, _ = infer_dtype_from(item, pandas_dtype=True)
-            dtype = find_common_type([self.dtype.categories.dtype, dtype])
-            return self.astype(dtype).insert(loc, item)
-        else:
-            new_vals = np.concatenate(
-                (
-                    arr._ndarray[:loc],
-                    np.asarray([code], dtype=arr._ndarray.dtype),
-                    arr._ndarray[loc:],
-                )
-            )
-            new_arr = arr._from_backing_data(new_vals)
-            return type(self)._simple_new(new_arr, name=self.name)
-
     def _concat(self, to_concat: List[Index], name: Hashable) -> Index:
         # if calling index is category, don't check dtype of others
         try:

From c6e3435103f7f3412abcc1176d72e9fcf0670d46 Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Sat, 13 Mar 2021 22:29:00 +0200
Subject: [PATCH 59/60] Revert

---
 pandas/core/dtypes/cast.py   | 24 ++++++++++++++++++++----
 pandas/core/dtypes/concat.py |  6 +++++-
 pandas/core/dtypes/dtypes.py | 19 ++-----------------
 3 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index f5db5c8f2f981..d0fa6239fc1cb 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -88,6 +88,7 @@
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import (
+    CategoricalDtype,
     DatetimeTZDtype,
     ExtensionDtype,
     IntervalDtype,
@@ -1850,7 +1851,7 @@ def ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj:
 
 
 def find_common_type(
-    types: List[DtypeObj], downcast_cat_dtype: Optional[bool] = True
+    types: List[DtypeObj], promote_categorical: Optional[bool] = False
 ) -> DtypeObj:
     """
     Find a common data type among the given dtypes.
@@ -1858,6 +1859,7 @@ def find_common_type(
     Parameters
     ----------
     types : list of dtypes
+    promote_categorical : find if possible, a categorical dtype that fits all the dtypes
 
     Returns
     -------
@@ -1878,9 +1880,23 @@ def find_common_type(
     if all(is_dtype_equal(first, t) for t in types[1:]):
         return first
 
-    # downcast categorical to the dtype of their categories
-    if downcast_cat_dtype and not all(is_categorical_dtype(t) for t in types):
-        types = [t.categories.dtype if is_categorical_dtype(t) else t for t in types]
+    # special case for categorical
+    if promote_categorical:
+        if any(is_categorical_dtype(t) for t in types):
+            cat_dtypes = []
+            for t in types:
+                if isinstance(t, CategoricalDtype) and t.categories is not None:
+                    if any(~isna(t.categories.values)):
+                        cat_values_dtype = t.categories.values.dtype
+                        if all(
+                            is_categorical_dtype(x) or np.can_cast(x, cat_values_dtype)
+                            for x in types
+                        ):
+                            cat_dtypes.append(t)
+            if len(cat_dtypes) > 0:
+                dtype_ref = cat_dtypes[0]
+                if all(is_dtype_equal(dtype, dtype_ref) for dtype in cat_dtypes[1:]):
+                    return dtype_ref
 
     # get unique types (dict.fromkeys is used as order-preserving set())
     types = list(dict.fromkeys(types).keys())
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 6110a442da008..1e27b93e5abf1 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -129,13 +129,17 @@ def is_nonempty(x) -> bool:
     all_empty = not len(non_empties)
     single_dtype = len({x.dtype for x in to_concat}) == 1
     any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat)
+    first_ea = isinstance(to_concat[0], ExtensionArray)
+    arr_index_expansion = (
+        first_ea and len(to_concat) == 2 and to_concat[1].shape[0] == 1
+    )
 
     if any_ea:
         # we ignore axis here, as internally concatting with EAs is always
         # for axis=0
         if not single_dtype:
             target_dtype = find_common_type(
-                [x.dtype for x in to_concat], downcast_cat_dtype=False
+                [x.dtype for x in to_concat], promote_categorical=arr_index_expansion
             )
             to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
 
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index a27d9599158e5..2b94822a630c1 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -605,6 +605,7 @@ def _is_boolean(self) -> bool:
 
     def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
         from pandas.core.arrays.sparse import SparseDtype
+
         # check if we have all categorical dtype with identical categories
         if all(isinstance(x, CategoricalDtype) for x in dtypes):
             first = dtypes[0]
@@ -621,22 +622,6 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
         elif any(non_init_cats):
             return None
 
-        # case for compatible dtypes with categories.dtype
-        non_identical_cat_dtype = [
-            isinstance(x, CategoricalDtype) and x != self for x in dtypes
-        ]
-        if not any(non_identical_cat_dtype):
-            non_cat_dtypes = [
-                x.categories.dtype if isinstance(x, CategoricalDtype) else x for x in dtypes
-            ]
-            non_cat_dtypes_compat = [
-                not isinstance(x, ExtensionDtype)
-                and np.can_cast(x, self.categories.dtype)
-                for x in non_cat_dtypes
-            ]
-            if all(non_cat_dtypes_compat) and self.categories is not None and len(self.categories) > 0:
-                return self
-
         # categorical is aware of Sparse -> extract sparse subdtypes
         dtypes = [x.subtype if isinstance(x, SparseDtype) else x for x in dtypes]
         # extract the categories' dtype
@@ -1380,4 +1365,4 @@ def itemsize(self) -> int:
         """
         The element size of this data-type object.
         """
-        return self._dtype.itemsize
+        return self._dtype.itemsize
\ No newline at end of file

From fc40817f21b5d7d569faf2ac9313e6036948a06d Mon Sep 17 00:00:00 2001
From: chrispe <ch.petrop@gmail.com>
Date: Sat, 13 Mar 2021 22:30:14 +0200
Subject: [PATCH 60/60] Update dtypes.py

---
 pandas/core/dtypes/dtypes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 2b94822a630c1..d44d2a564fb78 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -1365,4 +1365,4 @@ def itemsize(self) -> int:
         """
         The element size of this data-type object.
         """
-        return self._dtype.itemsize
\ No newline at end of file
+        return self._dtype.itemsize