-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: Create and propagate UInt64Index #14937
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -860,15 +860,20 @@ def _convert_for_reindex(self, key, axis=0): | |
return labels[key] | ||
else: | ||
if isinstance(key, Index): | ||
# want Index objects to pass through untouched | ||
keyarr = key | ||
keyarr = labels._convert_index_indexer(key) | ||
else: | ||
# asarray can be unsafe, NumPy strings are weird | ||
keyarr = _asarray_tuplesafe(key) | ||
|
||
if is_integer_dtype(keyarr) and not labels.is_integer(): | ||
keyarr = _ensure_platform_int(keyarr) | ||
return labels.take(keyarr) | ||
if is_integer_dtype(keyarr): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here, if there are too many special cases this makes this code even more impenetrable. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same answer as above. This code is very difficult to insert There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd like to capture this again in a method on the |
||
# Cast the indexer to uint64 if possible so | ||
# that the values returned from indexing are | ||
# also uint64. | ||
keyarr = labels._convert_arr_indexer(keyarr) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a 1-liner here about what this is doing (always will take comments w.r.t. indexing code :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
if not labels.is_integer(): | ||
keyarr = _ensure_platform_int(keyarr) | ||
return labels.take(keyarr) | ||
|
||
return keyarr | ||
|
||
|
@@ -1044,11 +1049,10 @@ def _getitem_iterable(self, key, axis=0): | |
return self.obj.take(inds, axis=axis, convert=False) | ||
else: | ||
if isinstance(key, Index): | ||
# want Index objects to pass through untouched | ||
keyarr = key | ||
keyarr = labels._convert_index_indexer(key) | ||
else: | ||
# asarray can be unsafe, NumPy strings are weird | ||
keyarr = _asarray_tuplesafe(key) | ||
keyarr = labels._convert_arr_indexer(keyarr) | ||
|
||
if is_categorical_dtype(labels): | ||
keyarr = labels._shallow_copy(keyarr) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,8 @@ | |
is_object_dtype, | ||
is_categorical_dtype, | ||
is_bool_dtype, | ||
is_signed_integer_dtype, | ||
is_unsigned_integer_dtype, | ||
is_integer_dtype, is_float_dtype, | ||
is_datetime64_any_dtype, | ||
is_timedelta64_dtype, | ||
|
@@ -199,14 +201,25 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, | |
data = np.array(data, copy=copy, dtype=dtype) | ||
elif inferred in ['floating', 'mixed-integer-float']: | ||
|
||
# if we are actually all equal to integers | ||
# If we are actually all equal to integers, | ||
# then coerce to integer | ||
from .numeric import Int64Index, Float64Index | ||
from .numeric import (Int64Index, UInt64Index, | ||
Float64Index) | ||
try: | ||
res = data.astype('i8') | ||
res = data.astype('i8', copy=False) | ||
if (res == data).all(): | ||
return Int64Index(res, copy=copy, | ||
name=name) | ||
except (OverflowError, TypeError, ValueError): | ||
pass | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldn't we try to convert to a UInt64 index only if we have oveflow on the int64 conversion? (and not some other error)? I suppose the way the code is now it will just hit both paths then fall thru on a genuine Type/Value error. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't believe so. try:
try:
Int64Index(...)
except OverflowError:
UInt64Index(...)
except (TypeError, ValueError):
pass However, I am more in favor of my method at this point since it is slightly more modular. |
||
|
||
# Conversion to int64 failed (possibly due to | ||
# overflow), so let's try now with uint64. | ||
try: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a comment There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
res = data.astype('u8', copy=False) | ||
if (res == data).all(): | ||
return UInt64Index(res, copy=copy, | ||
name=name) | ||
except (TypeError, ValueError): | ||
pass | ||
|
||
|
@@ -235,10 +248,13 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, | |
IncompatibleFrequency) | ||
if isinstance(data, PeriodIndex): | ||
return PeriodIndex(data, copy=copy, name=name, **kwargs) | ||
if issubclass(data.dtype.type, np.integer): | ||
if is_signed_integer_dtype(data.dtype): | ||
from .numeric import Int64Index | ||
return Int64Index(data, copy=copy, dtype=dtype, name=name) | ||
elif issubclass(data.dtype.type, np.floating): | ||
elif is_unsigned_integer_dtype(data.dtype): | ||
from .numeric import UInt64Index | ||
return UInt64Index(data, copy=copy, dtype=dtype, name=name) | ||
elif is_float_dtype(data.dtype): | ||
from .numeric import Float64Index | ||
return Float64Index(data, copy=copy, dtype=dtype, name=name) | ||
elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data): | ||
|
@@ -254,9 +270,13 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, | |
if dtype is None: | ||
inferred = lib.infer_dtype(subarr) | ||
if inferred == 'integer': | ||
from .numeric import Int64Index | ||
return Int64Index(subarr.astype('i8'), copy=copy, | ||
name=name) | ||
from .numeric import Int64Index, UInt64Index | ||
try: | ||
return Int64Index(subarr.astype('i8'), copy=copy, | ||
name=name) | ||
except OverflowError: | ||
return UInt64Index(subarr.astype('u8'), copy=copy, | ||
name=name) | ||
elif inferred in ['floating', 'mixed-integer-float']: | ||
from .numeric import Float64Index | ||
return Float64Index(subarr, copy=copy, name=name) | ||
|
@@ -1253,6 +1273,40 @@ def is_int(v): | |
|
||
return indexer | ||
|
||
_index_shared_docs['_convert_arr_indexer'] = """ | ||
Convert an array-like indexer to the appropriate dtype. | ||
|
||
Parameters | ||
---------- | ||
keyarr : array-like | ||
Indexer to convert. | ||
|
||
Returns | ||
------- | ||
converted_keyarr : array-like | ||
""" | ||
|
||
@Appender(_index_shared_docs['_convert_arr_indexer']) | ||
def _convert_arr_indexer(self, keyarr): | ||
return keyarr | ||
|
||
_index_shared_docs['_convert_index_indexer'] = """ | ||
Convert an Index indexer to the appropriate dtype. | ||
|
||
Parameters | ||
---------- | ||
keyarr : Index (or sub-class) | ||
Indexer to convert. | ||
|
||
Returns | ||
------- | ||
converted_keyarr : Index (or sub-class) | ||
""" | ||
|
||
@Appender(_index_shared_docs['_convert_index_indexer']) | ||
def _convert_index_indexer(self, keyarr): | ||
return keyarr | ||
|
||
def _convert_list_indexer(self, keyarr, kind=None): | ||
""" | ||
passed a key that is tuplesafe that is integer based | ||
|
@@ -3489,7 +3543,7 @@ def _validate_for_numeric_binop(self, other, op, opstr): | |
raise ValueError("cannot evaluate a numeric op with " | ||
"unequal lengths") | ||
other = _values_from_object(other) | ||
if other.dtype.kind not in ['f', 'i']: | ||
if other.dtype.kind not in ['f', 'i', 'u']: | ||
raise TypeError("cannot evaluate a numeric op " | ||
"with a non-numeric dtype") | ||
elif isinstance(other, (DateOffset, np.timedelta64, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think you need this example. mainly wanted to put the issues together (top section is good though).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fair enough.