Skip to content

Commit 3de75cd

Browse files
committed
Refactored
1 parent 96d5144 commit 3de75cd

File tree

5 files changed

+24
-90
lines changed

5 files changed

+24
-90
lines changed

pandas/_libs/parsers.pyx

-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ from pandas.core.dtypes.common import (
4848
from pandas.core.categorical import Categorical, _recode_for_categories
4949
from pandas.core.algorithms import take_1d
5050
from pandas.core.dtypes.concat import union_categoricals
51-
from pandas.core.dtypes.cast import maybe_convert_for_categorical
5251
from pandas import Index
5352

5453
import pandas.io.common as com

pandas/core/categorical.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
_ensure_platform_int,
2222
is_dtype_equal,
2323
is_datetimelike,
24+
is_datetime64_dtype,
25+
is_timedelta64_dtype,
2426
is_categorical,
2527
is_categorical_dtype,
2628
is_integer_dtype,
@@ -528,11 +530,20 @@ def _from_inferred_categories(cls, inferred_categories, inferred_codes,
528530
-------
529531
Categorical
530532
"""
531-
from pandas.core.dtypes.cast import maybe_convert_for_categorical
532-
from pandas import Index
533+
from pandas import Index, to_numeric, to_datetime, to_timedelta
533534

534535
cats = Index(inferred_categories)
535-
cats = maybe_convert_for_categorical(cats, dtype)
536+
537+
# Convert to a specialzed type with `dtype` is specified
538+
if (isinstance(dtype, CategoricalDtype) and
539+
dtype.categories is not None):
540+
541+
if dtype.categories.is_numeric():
542+
cats = to_numeric(inferred_categories, errors='coerce')
543+
elif is_datetime64_dtype(dtype.categories):
544+
cats = to_datetime(inferred_categories, errors='coerce')
545+
elif is_timedelta64_dtype(dtype.categories):
546+
cats = to_timedelta(inferred_categories, errors='coerce')
536547

537548
if (isinstance(dtype, CategoricalDtype) and
538549
dtype.categories is not None):

pandas/core/dtypes/cast.py

+1-37
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@
2424
_ensure_int32, _ensure_int64,
2525
_NS_DTYPE, _TD_DTYPE, _INT64_DTYPE,
2626
_POSSIBLY_CAST_DTYPES)
27-
from .dtypes import (ExtensionDtype, DatetimeTZDtype, PeriodDtype,
28-
CategoricalDtype)
27+
from .dtypes import ExtensionDtype, DatetimeTZDtype, PeriodDtype
2928
from .generic import (ABCDatetimeIndex, ABCPeriodIndex,
3029
ABCSeries)
3130
from .missing import isna, notna
@@ -605,41 +604,6 @@ def conv(r, dtype):
605604
return [conv(r, dtype) for r, dtype in zip(result, dtypes)]
606605

607606

608-
def maybe_convert_for_categorical(categories, dtype):
609-
"""Convert ``categories`` depending on ``dtype``.
610-
611-
Converts to numeric, datetime, or timedelta types, when ``dtype`` is
612-
a CategoricalDtype with known, non-object categories.
613-
614-
Parameters
615-
----------
616-
categories : array-like
617-
type : CategoricalDtype
618-
619-
Returns
620-
-------
621-
new_categories : array or Index
622-
623-
Examples
624-
--------
625-
>>> maybe_convert_for_categorical(['1', '2'], CategoricalDtype([1, 2]))
626-
array([ 1, 2])
627-
>>> maybe_convert_for_categorical([1, 'a'], CategoricalDtype([1, 2]))
628-
array([ 1., nan])
629-
"""
630-
if isinstance(dtype, CategoricalDtype) and dtype.categories is not None:
631-
from pandas import to_numeric, to_datetime, to_timedelta
632-
633-
if dtype.categories.is_numeric():
634-
categories = to_numeric(categories, errors='coerce')
635-
elif is_datetime64_dtype(dtype.categories):
636-
categories = to_datetime(categories, errors='coerce')
637-
elif is_timedelta64_dtype(dtype.categories):
638-
categories = to_timedelta(categories, errors='coerce')
639-
640-
return categories
641-
642-
643607
def astype_nansafe(arr, dtype, copy=True):
644608
""" return a view if copy is False, but
645609
need to be very careful as the result shape could change! """

pandas/io/parsers.py

+9-10
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@
2323
is_scalar, is_categorical_dtype)
2424
from pandas.core.dtypes.dtypes import CategoricalDtype
2525
from pandas.core.dtypes.missing import isna
26-
from pandas.core.dtypes.cast import (astype_nansafe,
27-
maybe_convert_for_categorical)
26+
from pandas.core.dtypes.cast import astype_nansafe
2827
from pandas.core.index import (Index, MultiIndex, RangeIndex,
2928
_ensure_index_from_sequences)
3029
from pandas.core.series import Series
@@ -1610,15 +1609,15 @@ def _cast_types(self, values, cast_type, column):
16101609
known_cats = (isinstance(cast_type, CategoricalDtype) and
16111610
cast_type.categories is not None)
16121611

1613-
categories = ordered = None
16141612
if known_cats:
1615-
values = maybe_convert_for_categorical(values, cast_type)
1616-
categories = cast_type.categories
1617-
ordered = cast_type.ordered
1618-
elif not is_object_dtype(values):
1619-
values = astype_nansafe(values, str)
1620-
values = Categorical(values, categories=categories,
1621-
ordered=ordered)
1613+
cats = Index(values).unique()
1614+
values = Categorical._from_inferred_categories(
1615+
cats, cats.get_indexer(values), cast_type
1616+
)
1617+
else:
1618+
if not is_object_dtype(values):
1619+
values = astype_nansafe(values, str)
1620+
values = Categorical(values, categories=None, ordered=False)
16221621
else:
16231622
try:
16241623
values = astype_nansafe(values, cast_type, copy=True)

pandas/tests/dtypes/test_cast.py

-39
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from pandas.core.dtypes.cast import (
1717
maybe_downcast_to_dtype,
1818
maybe_convert_objects,
19-
maybe_convert_for_categorical,
2019
cast_scalar_to_array,
2120
infer_dtype_from_scalar,
2221
infer_dtype_from_array,
@@ -300,44 +299,6 @@ def test_maybe_infer_to_datetimelike(self):
300299
[NaT, 'b', 1]]))
301300
assert result.size == 6
302301

303-
def test_maybe_convert_for_categorical_noop(self):
304-
expected = ['1', '2']
305-
result = maybe_convert_for_categorical(expected, None)
306-
assert result == expected
307-
308-
result = maybe_convert_for_categorical(expected, CategoricalDtype())
309-
assert result == expected
310-
311-
result = maybe_convert_for_categorical(expected, 'category')
312-
assert result == expected
313-
314-
@pytest.mark.parametrize('categories, dtype, expected', [
315-
(['1', '2'], [1, 2, 3], np.array([1, 2], dtype='i8')),
316-
(['1', '2', 'a'], [1, 2, 3], np.array([1, 2, np.nan], dtype='f8')),
317-
])
318-
def test_maybe_convert_for_categorical(self, categories, dtype, expected):
319-
dtype = CategoricalDtype(dtype)
320-
result = maybe_convert_for_categorical(categories, dtype)
321-
tm.assert_numpy_array_equal(result, expected)
322-
323-
@pytest.mark.parametrize('categories, dtype, expected', [
324-
(['2016', '2017'], pd.to_datetime(['2016', '2017']),
325-
pd.to_datetime(['2016', '2017'])),
326-
(['2016', '2017', 'bad'], pd.to_datetime(['2016', '2017']),
327-
pd.to_datetime(['2016', '2017', 'NaT'])),
328-
329-
(['1H', '2H'], pd.to_timedelta(['1H', '2H']),
330-
pd.to_timedelta(['1H', '2H'])),
331-
(['1H', '2H', 'bad'], pd.to_timedelta(['1H', '2H']),
332-
pd.to_timedelta(['1H', '2H', 'NaT'])),
333-
334-
])
335-
def test_maybe_convert_for_categorical_dates(self, categories, dtype,
336-
expected):
337-
dtype = CategoricalDtype(dtype)
338-
result = maybe_convert_for_categorical(categories, dtype)
339-
tm.assert_index_equal(result, expected)
340-
341302

342303
class TestConvert(object):
343304

0 commit comments

Comments
 (0)