Skip to content

Commit b4f354f

Browse files
committed
review updates
1 parent 7684902 commit b4f354f

File tree

8 files changed

+91
-42
lines changed

8 files changed

+91
-42
lines changed

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ Conversion
252252
- Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`)
253253
- Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`)
254254
- Fixed a bug where ``FY5253`` date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`)
255-
- Bug in :meth:`Index.astype` with a categorical dtype where the resultant index would not be converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`)
255+
- Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`)
256256

257257

258258
Indexing

pandas/core/indexes/category.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from pandas import compat
55
from pandas.compat.numpy import function as nv
66
from pandas.core.dtypes.generic import ABCCategorical, ABCSeries
7+
from pandas.core.dtypes.dtypes import CategoricalDtype
78
from pandas.core.dtypes.common import (
89
is_categorical_dtype,
910
_ensure_platform_int,
@@ -166,8 +167,6 @@ def _create_categorical(self, data, categories=None, ordered=None,
166167
data = Categorical(data, categories=categories, ordered=ordered,
167168
dtype=dtype)
168169
else:
169-
from pandas.core.dtypes.dtypes import CategoricalDtype
170-
171170
if categories is not None:
172171
data = data.set_categories(categories, ordered=ordered)
173172
elif ordered is not None and ordered != data.ordered:
@@ -346,9 +345,22 @@ def astype(self, dtype, copy=True):
346345
if is_interval_dtype(dtype):
347346
from pandas import IntervalIndex
348347
return IntervalIndex.from_intervals(np.array(self))
349-
elif is_categorical_dtype(dtype) and (dtype == self.dtype):
350-
# fastpath if dtype is the same current
351-
return self.copy() if copy else self
348+
elif is_categorical_dtype(dtype):
349+
# want to maintain existing categories/ordered if they are None
350+
if dtype.categories is None:
351+
new_categories = self.categories
352+
else:
353+
new_categories = dtype.categories
354+
if dtype.ordered is None:
355+
new_ordered = self.ordered
356+
else:
357+
new_ordered = dtype.ordered
358+
dtype = CategoricalDtype(new_categories, new_ordered)
359+
360+
# fastpath if dtypes are equal
361+
if dtype == self.dtype:
362+
return self.copy() if copy else self
363+
352364
return super(CategoricalIndex, self).astype(dtype=dtype, copy=copy)
353365

354366
@cache_readonly

pandas/core/indexes/interval.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -626,8 +626,9 @@ def astype(self, dtype, copy=True):
626626
elif is_object_dtype(dtype):
627627
return Index(self.values, dtype=object)
628628
elif is_categorical_dtype(dtype):
629-
from pandas import Categorical
630-
return Categorical(self, ordered=True)
629+
from pandas.core.indexes.category import CategoricalIndex
630+
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
631+
copy=copy)
631632
raise ValueError('Cannot cast IntervalIndex to dtype {dtype}'
632633
.format(dtype=dtype))
633634

pandas/core/indexes/multi.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from pandas.core.dtypes.common import (
1515
_ensure_int64,
1616
_ensure_platform_int,
17+
is_categorical_dtype,
1718
is_object_dtype,
1819
is_iterator,
1920
is_list_like,
@@ -2716,9 +2717,14 @@ def difference(self, other):
27162717

27172718
@Appender(_index_shared_docs['astype'])
27182719
def astype(self, dtype, copy=True):
2719-
if not is_object_dtype(pandas_dtype(dtype)):
2720-
raise TypeError('Setting %s dtype to anything other than object '
2721-
'is not supported' % self.__class__)
2720+
dtype = pandas_dtype(dtype)
2721+
if is_categorical_dtype(dtype):
2722+
msg = '> 1 ndim Categorical are not supported at this time'
2723+
raise NotImplementedError(msg)
2724+
elif not is_object_dtype(dtype):
2725+
msg = ('Setting {cls} dtype to anything other than object '
2726+
'is not supported').format(cls=self.__class__)
2727+
raise TypeError(msg)
27222728
elif copy is True:
27232729
return self._shallow_copy()
27242730
return self

pandas/tests/indexes/common.py

+19-9
Original file line numberDiff line numberDiff line change
@@ -1060,19 +1060,29 @@ def test_putmask_with_wrong_mask(self):
10601060
with pytest.raises(ValueError):
10611061
index.putmask('foo', 1)
10621062

1063-
def test_astype_category(self):
1063+
@pytest.mark.parametrize('copy', [True, False])
1064+
@pytest.mark.parametrize('name', [None, 'foo'])
1065+
@pytest.mark.parametrize('ordered', [True, False])
1066+
def test_astype_category(self, copy, name, ordered):
10641067
# GH 18630
10651068
index = self.create_index()
1069+
if name:
1070+
index = index.rename(name)
10661071

1067-
expected = CategoricalIndex(index.values)
1068-
result = index.astype('category', copy=True)
1072+
# standard categories
1073+
dtype = CategoricalDtype(ordered=ordered)
1074+
result = index.astype(dtype, copy=copy)
1075+
expected = CategoricalIndex(index.values, name=name, ordered=ordered)
10691076
tm.assert_index_equal(result, expected)
10701077

1071-
expected = CategoricalIndex(index.values, name='foo')
1072-
result = index.rename('foo').astype('category', copy=False)
1078+
# non-standard categories
1079+
dtype = CategoricalDtype(index.unique().tolist()[:-1], ordered)
1080+
result = index.astype(dtype, copy=copy)
1081+
expected = CategoricalIndex(index.values, name=name, dtype=dtype)
10731082
tm.assert_index_equal(result, expected)
10741083

1075-
dtype = CategoricalDtype(index.unique()[:-1], ordered=True)
1076-
expected = CategoricalIndex(index.values, dtype=dtype)
1077-
result = index.astype(dtype)
1078-
tm.assert_index_equal(result, expected)
1084+
if ordered is False:
1085+
# dtype='category' defaults to ordered=False, so only test once
1086+
result = index.astype('category', copy=copy)
1087+
expected = CategoricalIndex(index.values, name=name)
1088+
tm.assert_index_equal(result, expected)

pandas/tests/indexes/test_category.py

+32-3
Original file line numberDiff line numberDiff line change
@@ -388,9 +388,6 @@ def test_delete(self):
388388
def test_astype(self):
389389

390390
ci = self.create_index()
391-
result = ci.astype('category')
392-
tm.assert_index_equal(result, ci, exact=True)
393-
394391
result = ci.astype(object)
395392
tm.assert_index_equal(result, Index(np.array(ci)))
396393

@@ -414,6 +411,38 @@ def test_astype(self):
414411
result = IntervalIndex.from_intervals(result.values)
415412
tm.assert_index_equal(result, expected)
416413

414+
@pytest.mark.parametrize('copy', [True, False])
415+
@pytest.mark.parametrize('name', [None, 'foo'])
416+
@pytest.mark.parametrize('dtype_ordered', [True, False])
417+
@pytest.mark.parametrize('index_ordered', [True, False])
418+
def test_astype_category(self, copy, name, dtype_ordered, index_ordered):
419+
# GH 18630
420+
index = self.create_index(ordered=index_ordered)
421+
if name:
422+
index = index.rename(name)
423+
424+
# standard categories
425+
dtype = CategoricalDtype(ordered=dtype_ordered)
426+
result = index.astype(dtype, copy=copy)
427+
expected = CategoricalIndex(index.tolist(),
428+
name=name,
429+
categories=index.categories,
430+
ordered=dtype_ordered)
431+
tm.assert_index_equal(result, expected)
432+
433+
# non-standard categories
434+
dtype = CategoricalDtype(index.unique().tolist()[:-1], dtype_ordered)
435+
result = index.astype(dtype, copy=copy)
436+
expected = CategoricalIndex(index.tolist(), name=name, dtype=dtype)
437+
tm.assert_index_equal(result, expected)
438+
439+
if dtype_ordered is False:
440+
# dtype='category' defaults to ordered=False, so only test once
441+
result = index.astype('category', copy=copy)
442+
expected = CategoricalIndex(
443+
index.tolist(), categories=index.categories, name=name)
444+
tm.assert_index_equal(result, expected)
445+
417446
def test_reindex_base(self):
418447
# Determined by cat ordering.
419448
idx = CategoricalIndex(list("cab"), categories=list("cab"))

pandas/tests/indexes/test_interval.py

-12
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from pandas import (
77
Interval, IntervalIndex, Index, isna, notna, interval_range, Timestamp,
88
Timedelta, compat, date_range, timedelta_range, DateOffset)
9-
from pandas.core.dtypes.dtypes import CategoricalDtype
109
from pandas.compat import lzip
1110
from pandas.tseries.offsets import Day
1211
from pandas._libs.interval import IntervalTree
@@ -363,17 +362,6 @@ def test_astype(self, closed):
363362
tm.assert_index_equal(result, idx)
364363
assert result.equals(idx)
365364

366-
def test_astype_category(self, closed):
367-
# GH 18630
368-
idx = self.create_index(closed=closed)
369-
expected = pd.Categorical(idx, ordered=True)
370-
371-
result = idx.astype('category')
372-
tm.assert_categorical_equal(result, expected)
373-
374-
result = idx.astype(CategoricalDtype())
375-
tm.assert_categorical_equal(result, expected)
376-
377365
@pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series])
378366
def test_where(self, closed, klass):
379367
idx = self.create_index(closed=closed)

pandas/tests/indexes/test_multi.py

+10-7
Original file line numberDiff line numberDiff line change
@@ -555,14 +555,17 @@ def test_astype(self):
555555
with tm.assert_raises_regex(TypeError, "^Setting.*dtype.*object"):
556556
self.index.astype(np.dtype(int))
557557

558-
def test_astype_category(self):
558+
@pytest.mark.parametrize('ordered', [True, False])
559+
def test_astype_category(self, ordered):
559560
# GH 18630
560-
msg = 'Setting .* dtype to anything other than object is not supported'
561-
with tm.assert_raises_regex(TypeError, msg):
562-
self.index.astype('category')
563-
564-
with tm.assert_raises_regex(TypeError, msg):
565-
self.index.astype(CategoricalDtype())
561+
msg = '> 1 ndim Categorical are not supported at this time'
562+
with tm.assert_raises_regex(NotImplementedError, msg):
563+
self.index.astype(CategoricalDtype(ordered=ordered))
564+
565+
if ordered is False:
566+
# dtype='category' defaults to ordered=False, so only test once
567+
with tm.assert_raises_regex(NotImplementedError, msg):
568+
self.index.astype('category')
566569

567570
def test_constructor_single_level(self):
568571
result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],

0 commit comments

Comments
 (0)