Skip to content

Commit fdcf9a1

Browse files
committed
change a couple of sorting.py functions to be non-private (public to pandas internals)
closes #15393
1 parent 90ff22d commit fdcf9a1

File tree

8 files changed

+94
-81
lines changed

8 files changed

+94
-81
lines changed

pandas/core/frame.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -3179,7 +3179,7 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False,
31793179
raise ValueError('Length of ascending (%d) != length of by (%d)' %
31803180
(len(ascending), len(by)))
31813181
if len(by) > 1:
3182-
from pandas.core.sorting import _lexsort_indexer
3182+
from pandas.core.sorting import lexsort_indexer
31833183

31843184
def trans(v):
31853185
if needs_i8_conversion(v):
@@ -3193,11 +3193,11 @@ def trans(v):
31933193
raise ValueError('Cannot sort by duplicate column %s' %
31943194
str(x))
31953195
keys.append(trans(k))
3196-
indexer = _lexsort_indexer(keys, orders=ascending,
3197-
na_position=na_position)
3196+
indexer = lexsort_indexer(keys, orders=ascending,
3197+
na_position=na_position)
31983198
indexer = _ensure_platform_int(indexer)
31993199
else:
3200-
from pandas.core.sorting import _nargsort
3200+
from pandas.core.sorting import nargsort
32013201

32023202
by = by[0]
32033203
k = self.xs(by, axis=other_axis).values
@@ -3214,8 +3214,8 @@ def trans(v):
32143214
if isinstance(ascending, (tuple, list)):
32153215
ascending = ascending[0]
32163216

3217-
indexer = _nargsort(k, kind=kind, ascending=ascending,
3218-
na_position=na_position)
3217+
indexer = nargsort(k, kind=kind, ascending=ascending,
3218+
na_position=na_position)
32193219

32203220
new_data = self._data.take(indexer,
32213221
axis=self._get_block_manager_axis(axis),
@@ -3300,17 +3300,17 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
33003300
sort_remaining=sort_remaining)
33013301

33023302
elif isinstance(labels, MultiIndex):
3303-
from pandas.core.sorting import _lexsort_indexer
3303+
from pandas.core.sorting import lexsort_indexer
33043304

33053305
# make sure that the axis is lexsorted to start
33063306
# if not we need to reconstruct to get the correct indexer
33073307
if not labels.is_lexsorted():
33083308
labels = MultiIndex.from_tuples(labels.values)
33093309

3310-
indexer = _lexsort_indexer(labels.labels, orders=ascending,
3311-
na_position=na_position)
3310+
indexer = lexsort_indexer(labels.labels, orders=ascending,
3311+
na_position=na_position)
33123312
else:
3313-
from pandas.core.sorting import _nargsort
3313+
from pandas.core.sorting import nargsort
33143314

33153315
# GH11080 - Check monotonic-ness before sort an index
33163316
# if monotonic (already sorted), return None or copy() according
@@ -3322,8 +3322,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
33223322
else:
33233323
return self.copy()
33243324

3325-
indexer = _nargsort(labels, kind=kind, ascending=ascending,
3326-
na_position=na_position)
3325+
indexer = nargsort(labels, kind=kind, ascending=ascending,
3326+
na_position=na_position)
33273327

33283328
new_data = self._data.take(indexer,
33293329
axis=self._get_block_manager_axis(axis),

pandas/core/groupby.py

+13-10
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,9 @@
4747
from pandas.core.internals import BlockManager, make_block
4848
from pandas.core.series import Series
4949
from pandas.core.panel import Panel
50-
from pandas.core.sorting import (_get_group_index_sorter, get_group_index,
51-
_compress_group_index, _KeyMapper,
52-
decons_obs_group_ids, _get_indices_dict)
50+
from pandas.core.sorting import (get_group_index_sorter, get_group_index,
51+
compress_group_index, get_flattened_iterator,
52+
decons_obs_group_ids, get_indexer_dict)
5353
from pandas.util.decorators import (cache_readonly, Substitution, Appender,
5454
make_signature, deprecate_kwarg)
5555
from pandas.formats.printing import pprint_thing
@@ -731,7 +731,7 @@ def _cumcount_array(self, ascending=True):
731731
(though the default is sort=True) for groupby in general
732732
"""
733733
ids, _, ngroups = self.grouper.group_info
734-
sorter = _get_group_index_sorter(ids, ngroups)
734+
sorter = get_group_index_sorter(ids, ngroups)
735735
ids, count = ids[sorter], len(ids)
736736

737737
if count == 0:
@@ -1618,9 +1618,12 @@ def _get_group_keys(self):
16181618
return self.levels[0]
16191619
else:
16201620
comp_ids, _, ngroups = self.group_info
1621+
16211622
# provide "flattened" iterator for multi-group setting
1622-
mapper = _KeyMapper(comp_ids, ngroups, self.labels, self.levels)
1623-
return [mapper.get_key(i) for i in range(ngroups)]
1623+
return get_flattened_iterator(comp_ids,
1624+
ngroups,
1625+
self.levels,
1626+
self.labels)
16241627

16251628
def apply(self, f, data, axis=0):
16261629
mutated = self.mutated
@@ -1664,7 +1667,7 @@ def indices(self):
16641667
label_list = [ping.labels for ping in self.groupings]
16651668
keys = [_values_from_object(ping.group_index)
16661669
for ping in self.groupings]
1667-
return _get_indices_dict(label_list, keys)
1670+
return get_indexer_dict(label_list, keys)
16681671

16691672
@property
16701673
def labels(self):
@@ -1728,7 +1731,7 @@ def _get_compressed_labels(self):
17281731
if len(all_labels) > 1:
17291732
group_index = get_group_index(all_labels, self.shape,
17301733
sort=True, xnull=True)
1731-
return _compress_group_index(group_index, sort=self.sort)
1734+
return compress_group_index(group_index, sort=self.sort)
17321735

17331736
ping = self.groupings[0]
17341737
return ping.labels, np.arange(len(ping.group_index))
@@ -2029,7 +2032,7 @@ def _aggregate_series_fast(self, obj, func):
20292032

20302033
# avoids object / Series creation overhead
20312034
dummy = obj._get_values(slice(None, 0)).to_dense()
2032-
indexer = _get_group_index_sorter(group_index, ngroups)
2035+
indexer = get_group_index_sorter(group_index, ngroups)
20332036
obj = obj.take(indexer, convert=False)
20342037
group_index = algos.take_nd(group_index, indexer, allow_fill=False)
20352038
grouper = lib.SeriesGrouper(obj, func, group_index, ngroups,
@@ -4207,7 +4210,7 @@ def slabels(self):
42074210
@cache_readonly
42084211
def sort_idx(self):
42094212
# Counting sort indexer
4210-
return _get_group_index_sorter(self.labels, self.ngroups)
4213+
return get_group_index_sorter(self.labels, self.ngroups)
42114214

42124215
def __iter__(self):
42134216
sdata = self._get_sorted_data()

pandas/core/reshape.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from pandas._sparse import IntIndex
2121

2222
from pandas.core.categorical import Categorical, _factorize_from_iterable
23-
from pandas.core.sorting import (get_group_index, _compress_group_index,
23+
from pandas.core.sorting import (get_group_index, compress_group_index,
2424
decons_obs_group_ids)
2525

2626
import pandas.core.algorithms as algos
@@ -157,7 +157,7 @@ def get_result(self):
157157

158158
# filter out missing levels
159159
if values.shape[1] > 0:
160-
col_inds, obs_ids = _compress_group_index(self.sorted_labels[-1])
160+
col_inds, obs_ids = compress_group_index(self.sorted_labels[-1])
161161
# rare case, level values not observed
162162
if len(obs_ids) < self.full_shape[1]:
163163
inds = (value_mask.sum(0) > 0).nonzero()[0]
@@ -267,7 +267,7 @@ def _unstack_multiple(data, clocs):
267267
shape = [len(x) for x in clevels]
268268
group_index = get_group_index(clabels, shape, sort=False, xnull=False)
269269

270-
comp_ids, obs_ids = _compress_group_index(group_index, sort=False)
270+
comp_ids, obs_ids = compress_group_index(group_index, sort=False)
271271
recons_labels = decons_obs_group_ids(comp_ids, obs_ids, shape, clabels,
272272
xnull=False)
273273

@@ -459,7 +459,7 @@ def _unstack_frame(obj, level, fill_value=None):
459459

460460
def get_compressed_ids(labels, sizes):
461461
ids = get_group_index(labels, sizes, sort=True, xnull=False)
462-
return _compress_group_index(ids, sort=True)
462+
return compress_group_index(ids, sort=True)
463463

464464

465465
def stack(frame, level=-1, dropna=True):

pandas/core/series.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1786,12 +1786,12 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
17861786
new_index, indexer = index.sortlevel(level, ascending=ascending,
17871787
sort_remaining=sort_remaining)
17881788
elif isinstance(index, MultiIndex):
1789-
from pandas.core.sorting import _lexsort_indexer
1790-
indexer = _lexsort_indexer(index.labels, orders=ascending)
1789+
from pandas.core.sorting import lexsort_indexer
1790+
indexer = lexsort_indexer(index.labels, orders=ascending)
17911791
else:
1792-
from pandas.core.sorting import _nargsort
1793-
indexer = _nargsort(index, kind=kind, ascending=ascending,
1794-
na_position=na_position)
1792+
from pandas.core.sorting import nargsort
1793+
indexer = nargsort(index, kind=kind, ascending=ascending,
1794+
na_position=na_position)
17951795

17961796
indexer = _ensure_platform_int(indexer)
17971797
new_index = index.take(indexer)

pandas/core/sorting.py

+26-19
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
from pandas import lib
1414

1515

16+
_INT64_MAX = np.iinfo(np.int64).max
17+
18+
1619
def get_group_index(labels, shape, sort, xnull):
1720
"""
1821
For the particular label_list, gets the offsets into the hypothetical list
@@ -74,7 +77,7 @@ def loop(labels, shape):
7477

7578
# compress what has been done so far in order to avoid overflow
7679
# to retain lexical ranks, obs_ids should be sorted
77-
comp_ids, obs_ids = _compress_group_index(out, sort=sort)
80+
comp_ids, obs_ids = compress_group_index(out, sort=sort)
7881

7982
labels = [comp_ids] + labels[nlev:]
8083
shape = [len(obs_ids)] + shape[nlev:]
@@ -91,10 +94,7 @@ def maybe_lift(lab, size): # pormote nan values
9194
return loop(list(labels), list(shape))
9295

9396

94-
_INT64_MAX = np.iinfo(np.int64).max
95-
96-
97-
def _int64_overflow_possible(shape):
97+
def is_int64_overflow_possible(shape):
9898
the_prod = long(1)
9999
for x in shape:
100100
the_prod *= long(x)
@@ -104,7 +104,7 @@ def _int64_overflow_possible(shape):
104104

105105
def decons_group_index(comp_labels, shape):
106106
# reconstruct labels
107-
if _int64_overflow_possible(shape):
107+
if is_int64_overflow_possible(shape):
108108
# at some point group indices are factorized,
109109
# and may not be deconstructed here! wrong path!
110110
raise ValueError('cannot deconstruct factorized group indices!')
@@ -137,7 +137,7 @@ def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull):
137137
lift = np.fromiter(((a == -1).any() for a in labels), dtype='i8')
138138
shape = np.asarray(shape, dtype='i8') + lift
139139

140-
if not _int64_overflow_possible(shape):
140+
if not is_int64_overflow_possible(shape):
141141
# obs ids are deconstructable! take the fast route!
142142
out = decons_group_index(obs_ids, shape)
143143
return out if xnull or not lift.any() \
@@ -148,19 +148,19 @@ def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull):
148148
return [i8copy(lab[i]) for lab in labels]
149149

150150

151-
def _indexer_from_factorized(labels, shape, compress=True):
151+
def indexer_from_factorized(labels, shape, compress=True):
152152
ids = get_group_index(labels, shape, sort=True, xnull=False)
153153

154154
if not compress:
155155
ngroups = (ids.size and ids.max()) + 1
156156
else:
157-
ids, obs = _compress_group_index(ids, sort=True)
157+
ids, obs = compress_group_index(ids, sort=True)
158158
ngroups = len(obs)
159159

160-
return _get_group_index_sorter(ids, ngroups)
160+
return get_group_index_sorter(ids, ngroups)
161161

162162

163-
def _lexsort_indexer(keys, orders=None, na_position='last'):
163+
def lexsort_indexer(keys, orders=None, na_position='last'):
164164
labels = []
165165
shape = []
166166
if isinstance(orders, bool):
@@ -201,10 +201,10 @@ def _lexsort_indexer(keys, orders=None, na_position='last'):
201201
shape.append(n)
202202
labels.append(codes)
203203

204-
return _indexer_from_factorized(labels, shape)
204+
return indexer_from_factorized(labels, shape)
205205

206206

207-
def _nargsort(items, kind='quicksort', ascending=True, na_position='last'):
207+
def nargsort(items, kind='quicksort', ascending=True, na_position='last'):
208208
"""
209209
This is intended to be a drop-in replacement for np.argsort which
210210
handles NaNs. It adds ascending and na_position parameters.
@@ -244,7 +244,7 @@ class _KeyMapper(object):
244244
Ease my suffering. Map compressed group id -> key tuple
245245
"""
246246

247-
def __init__(self, comp_ids, ngroups, labels, levels):
247+
def __init__(self, comp_ids, ngroups, levels, labels):
248248
self.levels = levels
249249
self.labels = labels
250250
self.comp_ids = comp_ids.astype(np.int64)
@@ -263,15 +263,22 @@ def get_key(self, comp_id):
263263
for table, level in zip(self.tables, self.levels))
264264

265265

266-
def _get_indices_dict(label_list, keys):
266+
def get_flattened_iterator(comp_ids, ngroups, levels, labels):
267+
# provide "flattened" iterator for multi-group setting
268+
mapper = _KeyMapper(comp_ids, ngroups, levels, labels)
269+
return [mapper.get_key(i) for i in range(ngroups)]
270+
271+
272+
def get_indexer_dict(label_list, keys):
273+
""" return a diction of {labels} -> {indexers} """
267274
shape = list(map(len, keys))
268275

269276
group_index = get_group_index(label_list, shape, sort=True, xnull=True)
270277
ngroups = ((group_index.size and group_index.max()) + 1) \
271-
if _int64_overflow_possible(shape) \
278+
if is_int64_overflow_possible(shape) \
272279
else np.prod(shape, dtype='i8')
273280

274-
sorter = _get_group_index_sorter(group_index, ngroups)
281+
sorter = get_group_index_sorter(group_index, ngroups)
275282

276283
sorted_labels = [lab.take(sorter) for lab in label_list]
277284
group_index = group_index.take(sorter)
@@ -282,7 +289,7 @@ def _get_indices_dict(label_list, keys):
282289
# ----------------------------------------------------------------------
283290
# sorting levels...cleverly?
284291

285-
def _get_group_index_sorter(group_index, ngroups):
292+
def get_group_index_sorter(group_index, ngroups):
286293
"""
287294
_algos.groupsort_indexer implements `counting sort` and it is at least
288295
O(ngroups), where
@@ -309,7 +316,7 @@ def _get_group_index_sorter(group_index, ngroups):
309316
return group_index.argsort(kind='mergesort')
310317

311318

312-
def _compress_group_index(group_index, sort=True):
319+
def compress_group_index(group_index, sort=True):
313320
"""
314321
Group_index is offsets into cartesian product of all possible labels. This
315322
space can be huge, so this function compresses it, by computing offsets

pandas/indexes/multi.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1405,7 +1405,7 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True):
14051405
Indices of output values in original index
14061406
14071407
"""
1408-
from pandas.core.sorting import _indexer_from_factorized
1408+
from pandas.core.sorting import indexer_from_factorized
14091409

14101410
if isinstance(level, (compat.string_types, int)):
14111411
level = [level]
@@ -1417,8 +1417,8 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True):
14171417
if not len(level) == len(ascending):
14181418
raise ValueError("level must have same length as ascending")
14191419

1420-
from pandas.core.sorting import _lexsort_indexer
1421-
indexer = _lexsort_indexer(self.labels, orders=ascending)
1420+
from pandas.core.sorting import lexsort_indexer
1421+
indexer = lexsort_indexer(self.labels, orders=ascending)
14221422

14231423
# level ordering
14241424
else:
@@ -1436,8 +1436,8 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True):
14361436
else:
14371437
sortorder = level[0]
14381438

1439-
indexer = _indexer_from_factorized(primary, primshp,
1440-
compress=False)
1439+
indexer = indexer_from_factorized(primary, primshp,
1440+
compress=False)
14411441

14421442
if not ascending:
14431443
indexer = indexer[::-1]

0 commit comments

Comments
 (0)