Skip to content

Commit f47131c

Browse files
committed
code cleanup
1 parent df1dcc8 commit f47131c

File tree

3 files changed

+41
-29
lines changed

3 files changed

+41
-29
lines changed

asv_bench/benchmarks/reindex.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ def setup(self):
1616
data=np.random.rand(10000, 30), columns=range(30))
1717

1818
# multi-index
19-
N = 1000
20-
K = 20
19+
N = 5000
20+
K = 200
2121
level1 = tm.makeStringIndex(N).values.repeat(K)
2222
level2 = np.tile(tm.makeStringIndex(K).values, N)
2323
index = MultiIndex.from_arrays([level1, level2])

pandas/indexes/multi.py

+30-17
Original file line numberDiff line numberDiff line change
@@ -667,14 +667,9 @@ def _has_complex_internals(self):
667667
@cache_readonly
668668
def is_monotonic(self):
669669

670-
def level_values(level):
671-
unique = self.levels[level]
672-
labels = self.labels[level]
673-
return algos.take_1d(unique.values, labels,
674-
fill_value=unique._na_value)
675-
676670
# reversed() because lexsort() wants the most significant key last.
677-
values = [level_values(i) for i in reversed(range(len(self.levels)))]
671+
values = [self._get_level_values(i)
672+
for i in reversed(range(len(self.levels)))]
678673
try:
679674
sort_order = np.lexsort(values)
680675
return Index(sort_order).is_monotonic
@@ -827,26 +822,44 @@ def _try_mi(k):
827822

828823
raise InvalidIndexError(key)
829824

830-
def get_level_values(self, level):
825+
def _get_level_values(self, level):
831826
"""
832-
Return vector of label values for requested level, equal to the length
833-
of the index
827+
Return vector of label values for requested level,
828+
equal to the length of the index
829+
830+
**this is an internal method**
834831
835832
Parameters
836833
----------
837-
level : int or level name
834+
level : int level
838835
839836
Returns
840837
-------
841838
values : ndarray
842839
"""
843-
num = self._get_level_number(level)
844-
unique = self.levels[num] # .values
845-
labels = self.labels[num]
846-
filled = algos.take_1d(unique.values, labels,
840+
841+
unique = self.levels[level]
842+
labels = self.labels[level]
843+
filled = algos.take_1d(unique._values, labels,
847844
fill_value=unique._na_value)
848-
values = unique._shallow_copy(filled)
849-
return values
845+
return filled
846+
847+
def get_level_values(self, level):
848+
"""
849+
Return vector of label values for requested level,
850+
equal to the length of the index
851+
852+
Parameters
853+
----------
854+
level : int or level name
855+
856+
Returns
857+
-------
858+
values : Index
859+
"""
860+
level = self._get_level_number(level)
861+
values = self._get_level_values(level)
862+
return self.levels[level]._shallow_copy(values)
850863

851864
def format(self, space=2, sparsify=None, adjoin=True, names=False,
852865
na_rep=None, formatter=None):

pandas/tools/hashing.py

+9-10
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def hash_pandas_object(obj, index=True, encoding='utf8', hash_key=None,
115115
return h
116116

117117

118-
def hash_tuples(vals, encoding='utf8', hash_key=None):
118+
def hash_tuples(vals, encoding='utf8', hash_key=None, categorize=True):
119119
"""
120120
Hash an MultiIndex / list-of-tuples efficiently
121121
@@ -126,6 +126,9 @@ def hash_tuples(vals, encoding='utf8', hash_key=None):
126126
vals : MultiIndex, list-of-tuples, or single tuple
127127
encoding : string, default 'utf8'
128128
hash_key : string key to encode, default to _default_hash_key
129+
categorize : bool, default True
130+
Whether to first categorize object arrays before hashing. This is more
131+
efficient when the array contains duplicate values.
129132
130133
Returns
131134
-------
@@ -143,18 +146,14 @@ def hash_tuples(vals, encoding='utf8', hash_key=None):
143146
vals = MultiIndex.from_tuples(vals)
144147

145148
# create a list-of-ndarrays
146-
def get_level_values(num):
147-
unique = vals.levels[num] # .values
148-
labels = vals.labels[num]
149-
filled = algos.take_1d(unique._values, labels,
150-
fill_value=unique._na_value)
151-
return filled
152-
153-
vals = [get_level_values(level)
149+
vals = [vals._get_level_values(level)
154150
for level in range(vals.nlevels)]
155151

156152
# hash the list-of-ndarrays
157-
hashes = (hash_array(l, encoding=encoding, hash_key=hash_key)
153+
hashes = (hash_array(l,
154+
encoding=encoding,
155+
hash_key=hash_key,
156+
categorize=categorize)
158157
for l in vals)
159158
h = _combine_hash_arrays(hashes, len(vals))
160159
if is_tuple:

0 commit comments

Comments
 (0)