Skip to content

CLN: Index.__new__ #27883

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3447,15 +3447,14 @@ def _get_info_slice(obj, indexer):
if not is_list_like(exclude):
exclude = (exclude,) if exclude is not None else ()

selection = tuple(map(frozenset, (include, exclude)))
selection = (frozenset(include), frozenset(exclude))

if not any(selection):
raise ValueError("at least one of include or exclude must be nonempty")

# convert the myriad valid dtypes object to a single representation
include, exclude = map(
lambda x: frozenset(map(infer_dtype_from_object, x)), selection
)
include = frozenset(infer_dtype_from_object(x) for x in include)
exclude = frozenset(infer_dtype_from_object(x) for x in exclude)
for dtypes in (include, exclude):
invalidate_string_dtypes(dtypes)

Expand Down
124 changes: 47 additions & 77 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,12 +309,12 @@ def __new__(

elif (
is_datetime64_any_dtype(data)
or (dtype is not None and is_datetime64_any_dtype(dtype))
or is_datetime64_any_dtype(dtype)
or "tz" in kwargs
):
from pandas import DatetimeIndex

if dtype is not None and is_dtype_equal(_o_dtype, dtype):
if is_dtype_equal(_o_dtype, dtype):
# GH#23524 passing `dtype=object` to DatetimeIndex is invalid,
# will raise in the where `data` is already tz-aware. So
# we leave it out of this step and cast to object-dtype after
Expand All @@ -329,12 +329,10 @@ def __new__(
)
return result

elif is_timedelta64_dtype(data) or (
dtype is not None and is_timedelta64_dtype(dtype)
):
elif is_timedelta64_dtype(data) or is_timedelta64_dtype(dtype):
from pandas import TimedeltaIndex

if dtype is not None and is_dtype_equal(_o_dtype, dtype):
if is_dtype_equal(_o_dtype, dtype):
# Note we can pass copy=False because the .astype below
# will always make a copy
result = TimedeltaIndex(data, copy=False, name=name, **kwargs)
Expand All @@ -355,11 +353,9 @@ def __new__(
elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype):
data = np.asarray(data)
if not (dtype is None or is_object_dtype(dtype)):

# coerce to the provided dtype
data = dtype.construct_array_type()._from_sequence(
data, dtype=dtype, copy=False
)
ea_cls = dtype.construct_array_type()
data = ea_cls._from_sequence(data, dtype=dtype, copy=False)

# coerce to the object dtype
data = data.astype(object)
Expand All @@ -368,58 +364,48 @@ def __new__(
# index-like
elif isinstance(data, (np.ndarray, Index, ABCSeries)):
if dtype is not None:
try:

# we need to avoid having numpy coerce
# things that look like ints/floats to ints unless
# they are actually ints, e.g. '0' and 0.0
# should not be coerced
# GH 11836
if is_integer_dtype(dtype):
inferred = lib.infer_dtype(data, skipna=False)
if inferred == "integer":
data = maybe_cast_to_integer_array(data, dtype, copy=copy)
elif inferred in ["floating", "mixed-integer-float"]:
if isna(data).any():
raise ValueError("cannot convert float NaN to integer")

if inferred == "mixed-integer-float":
data = maybe_cast_to_integer_array(data, dtype)

# If we are actually all equal to integers,
# then coerce to integer.
try:
return cls._try_convert_to_int_index(
data, copy, name, dtype
)
except ValueError:
pass

# Return an actual float index.
from .numeric import Float64Index

return Float64Index(data, copy=copy, dtype=dtype, name=name)

elif inferred == "string":
pass
else:
data = data.astype(dtype)
elif is_float_dtype(dtype):
inferred = lib.infer_dtype(data, skipna=False)
if inferred == "string":
# we need to avoid having numpy coerce
# things that look like ints/floats to ints unless
# they are actually ints, e.g. '0' and 0.0
# should not be coerced
# GH 11836
if is_integer_dtype(dtype):
inferred = lib.infer_dtype(data, skipna=False)
if inferred == "integer":
data = maybe_cast_to_integer_array(data, dtype, copy=copy)
elif inferred in ["floating", "mixed-integer-float"]:
if isna(data).any():
raise ValueError("cannot convert float NaN to integer")

if inferred == "mixed-integer-float":
data = maybe_cast_to_integer_array(data, dtype)

# If we are actually all equal to integers,
# then coerce to integer.
try:
return cls._try_convert_to_int_index(
data, copy, name, dtype
)
except ValueError:
pass
else:
data = data.astype(dtype)

# Return an actual float index.
from .numeric import Float64Index

return Float64Index(data, copy=copy, dtype=dtype, name=name)

elif inferred == "string":
pass
else:
data = np.array(data, dtype=dtype, copy=copy)

except (TypeError, ValueError) as e:
msg = str(e)
if (
"cannot convert float" in msg
or "Trying to coerce float values to integer" in msg
):
raise
data = data.astype(dtype)
elif is_float_dtype(dtype):
inferred = lib.infer_dtype(data, skipna=False)
if inferred == "string":
pass
else:
data = data.astype(dtype)
else:
data = np.array(data, dtype=dtype, copy=copy)

# maybe coerce to a sub-class
from pandas.core.indexes.period import PeriodIndex, IncompatibleFrequency
Expand Down Expand Up @@ -555,16 +541,6 @@ def _simple_new(cls, values, name=None, dtype=None, **kwargs):

Must be careful not to recurse.
"""
if not hasattr(values, "dtype"):
if (values is None or not len(values)) and dtype is not None:
values = np.empty(0, dtype=dtype)
else:
values = np.array(values, copy=False)
if is_object_dtype(values):
values = cls(
values, name=name, dtype=dtype, **kwargs
)._ndarray_values

if isinstance(values, (ABCSeries, ABCIndexClass)):
# Index._data must always be an ndarray.
# This is no-copy for when _values is an ndarray,
Expand Down Expand Up @@ -1862,8 +1838,6 @@ def inferred_type(self):

@cache_readonly
def is_all_dates(self):
if self._data is None:
return False
return is_datetime_array(ensure_object(self.values))

# --------------------------------------------------------------------
Expand Down Expand Up @@ -3134,13 +3108,9 @@ def _convert_scalar_indexer(self, key, kind=None):
"""

@Appender(_index_shared_docs["_convert_slice_indexer"])
def _convert_slice_indexer(self, key, kind=None):
def _convert_slice_indexer(self, key: slice, kind=None):
assert kind in ["ix", "loc", "getitem", "iloc", None]

# if we are not a slice, then we are done
if not isinstance(key, slice):
return key

# validate iloc
if kind == "iloc":
return slice(
Expand Down