Skip to content

Commit c3fd7e8

Browse files
committed
code cleanup
1 parent df1dcc8 commit c3fd7e8

File tree

3 files changed

+41
-30
lines changed

3 files changed

+41
-30
lines changed

asv_bench/benchmarks/reindex.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ def setup(self):
1616
data=np.random.rand(10000, 30), columns=range(30))
1717

1818
# multi-index
19-
N = 1000
20-
K = 20
19+
N = 5000
20+
K = 200
2121
level1 = tm.makeStringIndex(N).values.repeat(K)
2222
level2 = np.tile(tm.makeStringIndex(K).values, N)
2323
index = MultiIndex.from_arrays([level1, level2])

pandas/indexes/multi.py

+30-17
Original file line numberDiff line numberDiff line change
@@ -667,14 +667,9 @@ def _has_complex_internals(self):
667667
@cache_readonly
668668
def is_monotonic(self):
669669

670-
def level_values(level):
671-
unique = self.levels[level]
672-
labels = self.labels[level]
673-
return algos.take_1d(unique.values, labels,
674-
fill_value=unique._na_value)
675-
676670
# reversed() because lexsort() wants the most significant key last.
677-
values = [level_values(i) for i in reversed(range(len(self.levels)))]
671+
values = [self._get_level_values(i)
672+
for i in reversed(range(len(self.levels)))]
678673
try:
679674
sort_order = np.lexsort(values)
680675
return Index(sort_order).is_monotonic
@@ -827,26 +822,44 @@ def _try_mi(k):
827822

828823
raise InvalidIndexError(key)
829824

830-
def get_level_values(self, level):
825+
def _get_level_values(self, level):
831826
"""
832-
Return vector of label values for requested level, equal to the length
833-
of the index
827+
Return vector of label values for requested level,
828+
equal to the length of the index
829+
830+
**this is an internal method**
834831
835832
Parameters
836833
----------
837-
level : int or level name
834+
level : int level
838835
839836
Returns
840837
-------
841838
values : ndarray
842839
"""
843-
num = self._get_level_number(level)
844-
unique = self.levels[num] # .values
845-
labels = self.labels[num]
846-
filled = algos.take_1d(unique.values, labels,
840+
841+
unique = self.levels[level]
842+
labels = self.labels[level]
843+
filled = algos.take_1d(unique._values, labels,
847844
fill_value=unique._na_value)
848-
values = unique._shallow_copy(filled)
849-
return values
845+
return filled
846+
847+
def get_level_values(self, level):
848+
"""
849+
Return vector of label values for requested level,
850+
equal to the length of the index
851+
852+
Parameters
853+
----------
854+
level : int or level name
855+
856+
Returns
857+
-------
858+
values : Index
859+
"""
860+
level = self._get_level_number(level)
861+
values = self._get_level_values(level)
862+
return self.levels[level]._shallow_copy(values)
850863

851864
def format(self, space=2, sparsify=None, adjoin=True, names=False,
852865
na_rep=None, formatter=None):

pandas/tools/hashing.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
import numpy as np
77
from pandas import _hash, Series, factorize, Categorical, Index, MultiIndex
8-
import pandas.core.algorithms as algos
98
from pandas.lib import is_bool_array
109
from pandas.types.generic import ABCIndexClass, ABCSeries, ABCDataFrame
1110
from pandas.types.common import (is_categorical_dtype, is_numeric_dtype,
@@ -115,7 +114,7 @@ def hash_pandas_object(obj, index=True, encoding='utf8', hash_key=None,
115114
return h
116115

117116

118-
def hash_tuples(vals, encoding='utf8', hash_key=None):
117+
def hash_tuples(vals, encoding='utf8', hash_key=None, categorize=True):
119118
"""
120119
Hash an MultiIndex / list-of-tuples efficiently
121120
@@ -126,6 +125,9 @@ def hash_tuples(vals, encoding='utf8', hash_key=None):
126125
vals : MultiIndex, list-of-tuples, or single tuple
127126
encoding : string, default 'utf8'
128127
hash_key : string key to encode, default to _default_hash_key
128+
categorize : bool, default True
129+
Whether to first categorize object arrays before hashing. This is more
130+
efficient when the array contains duplicate values.
129131
130132
Returns
131133
-------
@@ -143,18 +145,14 @@ def hash_tuples(vals, encoding='utf8', hash_key=None):
143145
vals = MultiIndex.from_tuples(vals)
144146

145147
# create a list-of-ndarrays
146-
def get_level_values(num):
147-
unique = vals.levels[num] # .values
148-
labels = vals.labels[num]
149-
filled = algos.take_1d(unique._values, labels,
150-
fill_value=unique._na_value)
151-
return filled
152-
153-
vals = [get_level_values(level)
148+
vals = [vals._get_level_values(level)
154149
for level in range(vals.nlevels)]
155150

156151
# hash the list-of-ndarrays
157-
hashes = (hash_array(l, encoding=encoding, hash_key=hash_key)
152+
hashes = (hash_array(l,
153+
encoding=encoding,
154+
hash_key=hash_key,
155+
categorize=categorize)
158156
for l in vals)
159157
h = _combine_hash_arrays(hashes, len(vals))
160158
if is_tuple:

0 commit comments

Comments
 (0)