Skip to content

Commit 704c505

Browse files
committed
Categorical: preserve ints when NaN are present
`Categorical([1, np.nan])` would end up with a single `1.` float level. This commit ensures that if `values` is a list of ints and contains np.nan, the float conversation does not take place.
1 parent a797b28 commit 704c505

File tree

2 files changed

+21
-1
lines changed

2 files changed

+21
-1
lines changed

pandas/core/categorical.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -220,9 +220,17 @@ def __init__(self, values, levels=None, ordered=None, name=None, fastpath=False,
220220
inferred = com._possibly_infer_to_datetimelike(values)
221221
if not isinstance(inferred, np.ndarray):
222222
from pandas.core.series import _sanitize_array
223-
values = _sanitize_array(values, None)
223+
safe_dtype = None
224+
if isinstance(values, list) and np.nan in values:
225+
# On list with NaNs, int values will be converted to float. Use "object" dtype
226+
# to prvent this. In the end objects will be casted to int/... in the level
227+
# assignment step.
228+
safe_dtype = "object"
229+
values = _sanitize_array(values, None, dtype=safe_dtype)
224230

225231
if levels is None:
232+
# object is needed to preserve ints in case we have np.nan in values
233+
values = np.asarray(values, dtype="object")
226234
try:
227235
codes, levels = factorize(values, sort=True)
228236
# If the underlying data structure was sortable, and the user doesn't want to

pandas/tests/test_categorical.py

+12
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,18 @@ def test_constructor(self):
111111
cat = pd.Categorical([1,2,3,np.nan], levels=[1,2,3])
112112
self.assertTrue(com.is_integer_dtype(cat.levels))
113113

114+
# https://github.com/pydata/pandas/issues/3678
115+
cat = pd.Categorical([np.nan,1, 2, 3])
116+
self.assertTrue(com.is_integer_dtype(cat.levels))
117+
118+
# this should result in floats
119+
cat = pd.Categorical([np.nan, 1, 2., 3 ])
120+
self.assertTrue(com.is_float_dtype(cat.levels))
121+
122+
cat = pd.Categorical([np.nan, 1., 2., 3. ])
123+
self.assertTrue(com.is_float_dtype(cat.levels))
124+
125+
114126
def test_from_codes(self):
115127

116128
# too few levels

0 commit comments

Comments
 (0)