Skip to content

Commit 339400e

Browse files
committed
CLN: more generic index creation in algorithms.py
CLN: move coerce_indexer_dtype to common
1 parent 6bbb39e commit 339400e

File tree

3 files changed

+19
-23
lines changed

3 files changed

+19
-23
lines changed

pandas/core/algorithms.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1):
166166
elif is_timedelta:
167167
uniques = uniques.astype('m8[ns]')
168168
if isinstance(values, Index):
169-
uniques = values._simple_new(uniques, None, freq=getattr(values, 'freq', None),
170-
tz=getattr(values, 'tz', None))
169+
uniques = values._shallow_copy(uniques, name=None)
171170
elif isinstance(values, Series):
172171
uniques = Index(uniques)
173172
return labels, uniques

pandas/core/categorical.py

+4-20
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ def __init__(self, values, categories=None, ordered=None, name=None, fastpath=Fa
196196

197197
if fastpath:
198198
# fast path
199-
self._codes = _coerce_codes_dtype(values, categories)
199+
self._codes = com._coerce_indexer_dtype(values, categories)
200200
self.name = name
201201
self.categories = categories
202202
self.ordered = ordered
@@ -289,7 +289,7 @@ def __init__(self, values, categories=None, ordered=None, name=None, fastpath=Fa
289289
self.ordered = False if ordered is None else ordered
290290
self.categories = categories
291291
self.name = name
292-
self._codes = _coerce_codes_dtype(codes, categories)
292+
self._codes = com._coerce_indexer_dtype(codes, categories)
293293

294294
def copy(self):
295295
""" Copy constructor. """
@@ -609,7 +609,7 @@ def add_categories(self, new_categories, inplace=False):
609609
new_categories = self._validate_categories(new_categories)
610610
cat = self if inplace else self.copy()
611611
cat._categories = new_categories
612-
cat._codes = _coerce_codes_dtype(cat._codes, new_categories)
612+
cat._codes = com._coerce_indexer_dtype(cat._codes, new_categories)
613613
if not inplace:
614614
return cat
615615

@@ -1422,22 +1422,6 @@ def _delegate_method(self, name, *args, **kwargs):
14221422

14231423
##### utility routines #####
14241424

1425-
_int8_max = np.iinfo(np.int8).max
1426-
_int16_max = np.iinfo(np.int16).max
1427-
_int32_max = np.iinfo(np.int32).max
1428-
1429-
def _coerce_codes_dtype(codes, categories):
1430-
""" coerce the code input array to an appropriate dtype """
1431-
codes = np.array(codes,copy=False)
1432-
l = len(categories)
1433-
if l < _int8_max:
1434-
return codes.astype('int8')
1435-
elif l < _int16_max:
1436-
return codes.astype('int16')
1437-
elif l < _int32_max:
1438-
return codes.astype('int32')
1439-
return codes.astype('int64')
1440-
14411425
def _get_codes_for_values(values, categories):
14421426
""""
14431427
utility routine to turn values into codes given the specified categories
@@ -1450,7 +1434,7 @@ def _get_codes_for_values(values, categories):
14501434
(hash_klass, vec_klass), vals = _get_data_algo(values, _hashtables)
14511435
t = hash_klass(len(categories))
14521436
t.map_locations(com._values_from_object(categories))
1453-
return _coerce_codes_dtype(t.lookup(values), categories)
1437+
return com._coerce_indexer_dtype(t.lookup(values), categories)
14541438

14551439
def _convert_to_list_like(list_like):
14561440
if hasattr(list_like, "dtype"):

pandas/core/common.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ class AmbiguousIndexError(PandasError, KeyError):
4949
_INT64_DTYPE = np.dtype(np.int64)
5050
_DATELIKE_DTYPES = set([np.dtype(t) for t in ['M8[ns]', '<M8[ns]', '>M8[ns]',
5151
'm8[ns]', '<m8[ns]', '>m8[ns]']])
52-
52+
_int8_max = np.iinfo(np.int8).max
53+
_int16_max = np.iinfo(np.int16).max
54+
_int32_max = np.iinfo(np.int32).max
5355

5456
# define abstract base classes to enable isinstance type checking on our
5557
# objects
@@ -961,6 +963,17 @@ def diff(arr, n, axis=0):
961963

962964
return out_arr
963965

966+
def _coerce_indexer_dtype(indexer, categories):
967+
""" coerce the indexer input array to the smallest dtype possible """
968+
indexer = np.array(indexer,copy=False)
969+
l = len(categories)
970+
if l < _int8_max:
971+
return indexer.astype('int8')
972+
elif l < _int16_max:
973+
return indexer.astype('int16')
974+
elif l < _int32_max:
975+
return indexer.astype('int32')
976+
return indexer.astype('int64')
964977

965978
def _coerce_to_dtypes(result, dtypes):
966979
""" given a dtypes and a result set, coerce the result elements to the

0 commit comments

Comments
 (0)