-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Loc enhancements #22826
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Loc enhancements #22826
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,95 +11,110 @@ | |
class NumericSeriesIndexing(object): | ||
|
||
goal_time = 0.2 | ||
params = [Int64Index, Float64Index] | ||
param = ['index'] | ||
params = [ | ||
(Int64Index, Float64Index), | ||
('unique_monotonic_inc', 'nonunique_monotonic_inc'), | ||
] | ||
param_names = ['index dtype', 'index structure'] | ||
|
||
def setup(self, index): | ||
def setup(self, index, index_structure): | ||
N = 10**6 | ||
idx = index(range(N)) | ||
self.data = Series(np.random.rand(N), index=idx) | ||
indices = { | ||
'unique_monotonic_inc': index(range(N)), | ||
'nonunique_monotonic_inc': index( | ||
list(range(55)) + [54] + list(range(55, N - 1))), | ||
} | ||
self.data = Series(np.random.rand(N), index=indices[index_structure]) | ||
self.array = np.arange(10000) | ||
self.array_list = self.array.tolist() | ||
|
||
def time_getitem_scalar(self, index): | ||
def time_getitem_scalar(self, index, index_structure): | ||
self.data[800000] | ||
|
||
def time_getitem_slice(self, index): | ||
def time_getitem_slice(self, index, index_structure): | ||
self.data[:800000] | ||
|
||
def time_getitem_list_like(self, index): | ||
def time_getitem_list_like(self, index, index_structure): | ||
self.data[[800000]] | ||
|
||
def time_getitem_array(self, index): | ||
def time_getitem_array(self, index, index_structure): | ||
self.data[self.array] | ||
|
||
def time_getitem_lists(self, index): | ||
def time_getitem_lists(self, index, index_structure): | ||
self.data[self.array_list] | ||
|
||
def time_iloc_array(self, index): | ||
def time_iloc_array(self, index, index_structure): | ||
self.data.iloc[self.array] | ||
|
||
def time_iloc_list_like(self, index): | ||
def time_iloc_list_like(self, index, index_structure): | ||
self.data.iloc[[800000]] | ||
|
||
def time_iloc_scalar(self, index): | ||
def time_iloc_scalar(self, index, index_structure): | ||
self.data.iloc[800000] | ||
|
||
def time_iloc_slice(self, index): | ||
def time_iloc_slice(self, index, index_structure): | ||
self.data.iloc[:800000] | ||
|
||
def time_ix_array(self, index): | ||
def time_ix_array(self, index, index_structure): | ||
self.data.ix[self.array] | ||
|
||
def time_ix_list_like(self, index): | ||
def time_ix_list_like(self, index, index_structure): | ||
self.data.ix[[800000]] | ||
|
||
def time_ix_scalar(self, index): | ||
def time_ix_scalar(self, index, index_structure): | ||
self.data.ix[800000] | ||
|
||
def time_ix_slice(self, index): | ||
def time_ix_slice(self, index, index_structure): | ||
self.data.ix[:800000] | ||
|
||
def time_loc_array(self, index): | ||
def time_loc_array(self, index, index_structure): | ||
self.data.loc[self.array] | ||
|
||
def time_loc_list_like(self, index): | ||
def time_loc_list_like(self, index, index_structure): | ||
self.data.loc[[800000]] | ||
|
||
def time_loc_scalar(self, index): | ||
def time_loc_scalar(self, index, index_structure): | ||
self.data.loc[800000] | ||
|
||
def time_loc_slice(self, index): | ||
def time_loc_slice(self, index, index_structure): | ||
self.data.loc[:800000] | ||
|
||
|
||
class NonNumericSeriesIndexing(object): | ||
|
||
goal_time = 0.2 | ||
params = ['string', 'datetime'] | ||
param_names = ['index'] | ||
params = [ | ||
('string', 'datetime'), | ||
('unique_monotonic_inc', 'nonunique_monotonic_inc'), | ||
] | ||
param_names = ['index dtype', 'index structure'] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added underscores in commit |
||
|
||
def setup(self, index): | ||
N = 10**5 | ||
def setup(self, index, index_structure): | ||
N = 10**6 | ||
indexes = {'string': tm.makeStringIndex(N), | ||
'datetime': date_range('1900', periods=N, freq='s')} | ||
index = indexes[index] | ||
if index_structure == 'nonunique_monotonic_inc': | ||
index = index.insert(item=index[2], loc=2)[:-1] | ||
self.s = Series(np.random.rand(N), index=index) | ||
self.lbl = index[80000] | ||
|
||
def time_getitem_label_slice(self, index): | ||
def time_getitem_label_slice(self, index, index_structure): | ||
self.s[:self.lbl] | ||
|
||
def time_getitem_pos_slice(self, index): | ||
def time_getitem_pos_slice(self, index, index_structure): | ||
self.s[:80000] | ||
|
||
def time_get_value(self, index): | ||
def time_get_value(self, index, index_structure): | ||
with warnings.catch_warnings(record=True): | ||
self.s.get_value(self.lbl) | ||
|
||
def time_getitem_scalar(self, index): | ||
def time_getitem_scalar(self, index, index_structure): | ||
self.s[self.lbl] | ||
|
||
def time_getitem_list_like(self, index, index_structure): | ||
self.s[[self.lbl]] | ||
|
||
|
||
class DataFrameStringIndexing(object): | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -294,14 +294,23 @@ cdef class IndexEngine: | |
result = np.empty(n_alloc, dtype=np.int64) | ||
missing = np.empty(n_t, dtype=np.int64) | ||
|
||
# form the set of the results (like ismember) | ||
members = np.empty(n, dtype=np.uint8) | ||
for i in range(n): | ||
val = values[i] | ||
if val in stargets: | ||
if val not in d: | ||
d[val] = [] | ||
d[val].append(i) | ||
# map each starget to its position in the index | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what if you drop the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you drop the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a case where this is true in the asv's and compare? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, in each of the following asv's
|
||
if stargets and len(stargets) < 5 and self.is_monotonic_increasing: | ||
# if there are few enough stargets and the index is monotonically | ||
# increasing, then use binary search for each starget | ||
for starget in stargets: | ||
start = values.searchsorted(starget, side='left') | ||
end = values.searchsorted(starget, side='right') | ||
if start != end: | ||
d[starget] = list(range(start, end)) | ||
else: | ||
# otherwise, map by iterating through all items in the index | ||
for i in range(n): | ||
val = values[i] | ||
if val in stargets: | ||
if val not in d: | ||
d[val] = [] | ||
d[val].append(i) | ||
|
||
for i in range(n_t): | ||
val = targets[i] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
did you actually try this? I am shocked if this works when it has spaces in the name, or maybe the names are just mapped by postion. in any event these need underscores
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah I tried this and you can see it working in this sample output from the asv run:
I've added underscores in commit
4ad3006
.