pandas-dev · jreback · Jan 5, 2019 · Dec 30, 2018 · Dec 30, 2018 · Dec 30, 2018
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -1113,6 +1113,36 @@ cast from integer dtype to floating dtype (:issue:`22019`)
        ...:                    'c': [1, 1, np.nan, 1, 1]})
     In [4]: pd.crosstab(df.a, df.b, normalize='columns')
 
+Formatting Categorical Integer Data With ``NaN`` Values
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Categorical integer data with ``NaN`` values will be formatted as integers
+instead of floats. :meth:`Series.to_numpy` is not affected (:issue:`19214`)
+
+*Previous Behavior*
+
+.. code-block:: ipython
+
+    In [3]: pd.Series([1, 2, np.nan], dtype='object').astype('category')
+    Out[3]:
+    0    1.0
+    1    2.0
+    2    NaN
+    dtype: category
+    Categories (2, int64): [1, 2]
+
+    In [4]: pd.Categorical([1, 2, np.nan])
+    Out[4]:
+    [1.0, 2.0, NaN]
+    Categories (2, int64): [1, 2]
+
+*New Behavior*
+
+.. ipython:: python
+
+    pd.Series([1, 2, np.nan], dtype='object').astype('category')
+    pd.Categorical([1, 2, np.nan])
+
 Datetimelike API Changes
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -1653,6 +1683,7 @@ Reshaping
 - :meth:`DataFrame.nlargest` and :meth:`DataFrame.nsmallest` now returns the correct n values when keep != 'all' also when tied on the first columns (:issue:`22752`)
 - Constructing a DataFrame with an index argument that wasn't already an instance of :class:`~pandas.core.Index` was broken (:issue:`22227`).
 - Bug in :class:`DataFrame` prevented list subclasses to be used to construction (:issue:`21226`)
+- Calling :func:`pandas.concat` on a ``Categorical`` of ints with NA values now causes them to be processed as objects (formerly coerced to floats) (:issue:`19214`)
 - Bug in :func:`DataFrame.unstack` and :func:`DataFrame.pivot_table` returning a missleading error message when the resulting DataFrame has more elements than int32 can handle. Now, the error message is improved, pointing towards the actual problem (:issue:`20601`)
 
 .. _whatsnew_0240.bug_fixes.sparse:

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1520,6 +1520,9 @@ def get_values(self):
         # if we are a datetime and period index, return Index to keep metadata
         if is_datetimelike(self.categories):
             return self.categories.take(self._codes, fill_value=np.nan)
+        elif is_integer_dtype(self.categories) and -1 in self._codes:
+            return self.categories.astype("object").take(self._codes,
+                                                         fill_value=np.nan)
         return np.array(self)
 
     def check_for_ordered(self, op):

diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py
@@ -240,6 +240,15 @@ def test_categorical_repr_datetime_ordered(self):
 
         assert repr(c) == exp
 
+    def test_categorical_repr_int_with_nan(self):
+        c = Categorical([1, 2, np.nan])
+        c_exp = """[1, 2, NaN]\nCategories (2, int64): [1, 2]"""
+        assert repr(c) == c_exp
+
+        s = Series([1, 2, np.nan], dtype="object").astype("category")
+        s_exp = """0      1\n1      2\n2    NaN\ndtype: category\nCategories (2, int64): [1, 2]"""  # noqa
+        assert repr(s) == s_exp
+
     def test_categorical_repr_period(self):
         idx = period_range('2011-01-01 09:00', freq='H', periods=5)
         c = Categorical(idx)

diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
@@ -496,7 +496,7 @@ def test_concat_categorical(self):
         s1 = pd.Series([10, 11, np.nan], dtype='category')
         s2 = pd.Series([np.nan, 1, 3, 2], dtype='category')
 
-        exp = pd.Series([10, 11, np.nan, np.nan, 1, 3, 2])
+        exp = pd.Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype='object')
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
 
@@ -516,12 +516,12 @@ def test_concat_categorical_coercion(self):
         s1 = pd.Series([1, 2, np.nan], dtype='category')
         s2 = pd.Series([2, 1, 2])
 
-        exp = pd.Series([1, 2, np.nan, 2, 1, 2])
+        exp = pd.Series([1, 2, np.nan, 2, 1, 2], dtype='object')
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
 
         # result shouldn't be affected by 1st elem dtype
-        exp = pd.Series([2, 1, 2, 1, 2, np.nan])
+        exp = pd.Series([2, 1, 2, 1, 2, np.nan], dtype='object')
         tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
         tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
 
@@ -541,11 +541,11 @@ def test_concat_categorical_coercion(self):
         s1 = pd.Series([10, 11, np.nan], dtype='category')
         s2 = pd.Series([1, 3, 2])
 
-        exp = pd.Series([10, 11, np.nan, 1, 3, 2])
+        exp = pd.Series([10, 11, np.nan, 1, 3, 2], dtype='object')
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
 
-        exp = pd.Series([1, 3, 2, 10, 11, np.nan])
+        exp = pd.Series([1, 3, 2, 10, 11, np.nan], dtype='object')
         tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
         tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
 
@@ -581,11 +581,13 @@ def test_concat_categorical_3elem_coercion(self):
         s2 = pd.Series([2, 1, 2], dtype='category')
         s3 = pd.Series([1, 2, 1, 2, np.nan])
 
-        exp = pd.Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan])
+        exp = pd.Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan],
+                        dtype='object')
         tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
         tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp)
 
-        exp = pd.Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2])
+        exp = pd.Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2],
+                        dtype='object')
         tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp)
 
@@ -669,7 +671,7 @@ def test_concat_categorical_coercion_nan(self):
         s1 = pd.Series([1, np.nan], dtype='category')
         s2 = pd.Series([np.nan, np.nan])
 
-        exp = pd.Series([1, np.nan, np.nan, np.nan])
+        exp = pd.Series([1, np.nan, np.nan, np.nan], dtype='object')
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)