pandas-dev · jreback · Jan 5, 2019 · Dec 30, 2018 · Dec 30, 2018 · Dec 30, 2018
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -1323,6 +1323,7 @@ Categorical
 - Bug in many methods of the ``.str``-accessor, which always failed on calling the ``CategoricalIndex.str`` constructor (:issue:`23555`, :issue:`23556`)
 - Bug in :meth:`Series.where` losing the categorical dtype for categorical data (:issue:`24077`)
 - Bug in :meth:`Categorical.apply` where ``NaN`` values could be handled unpredictably. They now remain unchanged (:issue:`24241`)
+- Bug in :meth:`Categorical.get_values` where integers would be formatted as floats if ``NaN`` values were present (:issue:`19214`)
 
 Datetimelike
 ^^^^^^^^^^^^
@@ -1653,6 +1654,7 @@ Reshaping
 - :meth:`DataFrame.nlargest` and :meth:`DataFrame.nsmallest` now returns the correct n values when keep != 'all' also when tied on the first columns (:issue:`22752`)
 - Constructing a DataFrame with an index argument that wasn't already an instance of :class:`~pandas.core.Index` was broken (:issue:`22227`).
 - Bug in :class:`DataFrame` prevented list subclasses to be used to construction (:issue:`21226`)
+- Calling :func:`pandas.concat` on a ``Categorical`` of ints with NA values now causes them to be processed as objects (formerly coerced to floats) (:issue:`19214`)
 - Bug in :func:`DataFrame.unstack` and :func:`DataFrame.pivot_table` returning a missleading error message when the resulting DataFrame has more elements than int32 can handle. Now, the error message is improved, pointing towards the actual problem (:issue:`20601`)
 
 .. _whatsnew_0240.bug_fixes.sparse:

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1520,6 +1520,11 @@ def get_values(self):
         # if we are a datetime and period index, return Index to keep metadata
         if is_datetimelike(self.categories):
             return self.categories.take(self._codes, fill_value=np.nan)
+        elif is_integer_dtype(self.categories) and -1 in self._codes:
+            warn("Integer values represented as objects to accomodate NaNs",
+                 RuntimeWarning)
+            return self.categories.astype("object").take(self._codes,
+                                                         fill_value=np.nan)
         return np.array(self)
 
     def check_for_ordered(self, op):

diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py
@@ -14,6 +14,7 @@
 
 class TestCategoricalMissing(object):
 
+    @pytest.mark.filterwarnings("ignore:Integer values:RuntimeWarning")
     def test_na_flags_int_categories(self):
         # #1457
 

diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 
 import numpy as np
+import pytest
 
 from pandas.compat import PY3, u
 
@@ -240,6 +241,16 @@ def test_categorical_repr_datetime_ordered(self):
 
         assert repr(c) == exp
 
+    @pytest.mark.filterwarnings("ignore:Integer values:RuntimeWarning")
+    def test_categorical_repr_int_with_nan(self):
+        c = Categorical([1, 2, np.nan])
+        c_exp = """[1, 2, NaN]\nCategories (2, int64): [1, 2]"""
+        assert repr(c) == c_exp
+
+        s = Series([1, 2, np.nan], dtype="object").astype("category")
+        s_exp = """0      1\n1      2\n2    NaN\ndtype: category\nCategories (2, int64): [1, 2]"""  # noqa
+        assert repr(s) == s_exp
+
     def test_categorical_repr_period(self):
         idx = period_range('2011-01-01 09:00', freq='H', periods=5)
         c = Categorical(idx)

diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py
@@ -354,6 +354,7 @@ def test_na_actions_categorical(self):
         res = df.fillna("a")
         tm.assert_frame_equal(res, df_exp)
 
+    @pytest.mark.filterwarnings("ignore:Integer values:RuntimeWarning")
     def test_fillna_categorical_nan(self):
         # GH 14021
         # np.nan should always be a valid filler

diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py
@@ -806,6 +806,7 @@ def test_mode_timedelta(self, dropna, expected1, expected2):
         expected2 = Series(expected2, dtype='timedelta64[ns]')
         tm.assert_series_equal(result, expected2)
 
+    @pytest.mark.filterwarnings("ignore:Integer values:RuntimeWarning")
     @pytest.mark.parametrize('dropna, expected1, expected2, expected3', [
         (True, Categorical([1, 2], categories=[1, 2]),
          Categorical(['a'], categories=[1, 'a']),

diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
@@ -473,6 +473,7 @@ def test_concatlike_common_period_mixed_dt_to_object(self):
         res = pd.concat([tds, ps1])
         tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
 
+    @pytest.mark.filterwarnings("ignore:Integer values:RuntimeWarning")
     def test_concat_categorical(self):
         # GH 13524
 
@@ -496,7 +497,7 @@ def test_concat_categorical(self):
         s1 = pd.Series([10, 11, np.nan], dtype='category')
         s2 = pd.Series([np.nan, 1, 3, 2], dtype='category')
 
-        exp = pd.Series([10, 11, np.nan, np.nan, 1, 3, 2])
+        exp = pd.Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype='object')
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
 
@@ -509,19 +510,20 @@ def test_union_categorical_same_categories_different_order(self):
                                          categories=['a', 'b', 'c']))
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.filterwarnings("ignore:Integer values:RuntimeWarning")
     def test_concat_categorical_coercion(self):
         # GH 13524
 
         # category + not-category => not-category
         s1 = pd.Series([1, 2, np.nan], dtype='category')
         s2 = pd.Series([2, 1, 2])
 
-        exp = pd.Series([1, 2, np.nan, 2, 1, 2])
+        exp = pd.Series([1, 2, np.nan, 2, 1, 2], dtype='object')
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
 
         # result shouldn't be affected by 1st elem dtype
-        exp = pd.Series([2, 1, 2, 1, 2, np.nan])
+        exp = pd.Series([2, 1, 2, 1, 2, np.nan], dtype='object')
         tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
         tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
 
@@ -541,11 +543,11 @@ def test_concat_categorical_coercion(self):
         s1 = pd.Series([10, 11, np.nan], dtype='category')
         s2 = pd.Series([1, 3, 2])
 
-        exp = pd.Series([10, 11, np.nan, 1, 3, 2])
+        exp = pd.Series([10, 11, np.nan, 1, 3, 2], dtype='object')
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
 
-        exp = pd.Series([1, 3, 2, 10, 11, np.nan])
+        exp = pd.Series([1, 3, 2, 10, 11, np.nan], dtype='object')
         tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
         tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
 
@@ -573,6 +575,7 @@ def test_concat_categorical_coercion(self):
         tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
         tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
 
+    @pytest.mark.filterwarnings("ignore:Integer values:RuntimeWarning")
     def test_concat_categorical_3elem_coercion(self):
         # GH 13524
 
@@ -581,11 +584,13 @@ def test_concat_categorical_3elem_coercion(self):
         s2 = pd.Series([2, 1, 2], dtype='category')
         s3 = pd.Series([1, 2, 1, 2, np.nan])
 
-        exp = pd.Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan])
+        exp = pd.Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan],
+                        dtype='object')
         tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
         tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp)
 
-        exp = pd.Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2])
+        exp = pd.Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2],
+                        dtype='object')
         tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp)
 
@@ -638,6 +643,7 @@ def test_concat_categorical_multi_coercion(self):
         res = s6.append([s5, s4, s3, s2, s1], ignore_index=True)
         tm.assert_series_equal(res, exp)
 
+    @pytest.mark.filterwarnings("ignore:Integer values:RuntimeWarning")
     def test_concat_categorical_ordered(self):
         # GH 13524
 
@@ -653,6 +659,7 @@ def test_concat_categorical_ordered(self):
         tm.assert_series_equal(pd.concat([s1, s2, s1], ignore_index=True), exp)
         tm.assert_series_equal(s1.append([s2, s1], ignore_index=True), exp)
 
+    @pytest.mark.filterwarnings("ignore:Integer values:RuntimeWarning")
     def test_concat_categorical_coercion_nan(self):
         # GH 13524
 
@@ -669,7 +676,7 @@ def test_concat_categorical_coercion_nan(self):
         s1 = pd.Series([1, np.nan], dtype='category')
         s2 = pd.Series([np.nan, np.nan])
 
-        exp = pd.Series([1, np.nan, np.nan, np.nan])
+        exp = pd.Series([1, np.nan, np.nan, np.nan], dtype='object')
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
 

diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py
@@ -496,6 +496,7 @@ def test_setitem_with_tz_dst():
     tm.assert_series_equal(s, exp)
 
 
+@pytest.mark.filterwarnings("ignore:Integer values:RuntimeWarning")
 def test_categorial_assigning_ops():
     orig = Series(Categorical(["b", "b"], categories=["a", "b"]))
     s = orig.copy()

diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
@@ -1314,6 +1314,7 @@ def test_duplicate_keep_all_ties(self):
 
 class TestCategoricalSeriesAnalytics(object):
 
+    @pytest.mark.filterwarnings("ignore:Integer values:RuntimeWarning")
     def test_count(self):
 
         s = Series(Categorical([np.nan, 1, 2, np.nan],
-Original file line number
+Diff line change
@@ Expand Up / @@ -14,6 +14,7 @@ @@
     class TestCategoricalMissing(object):
+        @pytest.mark.filterwarnings("ignore:Integer values:RuntimeWarning")
         def test_na_flags_int_categories(self):
             # #1457
@@ Expand Down @@