Skip to content

REF: use cached inferred_type when calling lib.infer_dtype(index) #33537

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 25, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 23 additions & 18 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1170,15 +1170,15 @@ cdef class Seen:
or self.nat_)


cdef object _try_infer_map(object v):
cdef object _try_infer_map(object dtype):
"""
If its in our map, just return the dtype.
"""
cdef:
object val
str attr
for attr in ['name', 'kind', 'base']:
val = getattr(v.dtype, attr)
for attr in ["name", "kind", "base"]:
val = getattr(dtype, attr)
if val in _TYPE_MAP:
return _TYPE_MAP[val]
return None
Expand Down Expand Up @@ -1291,44 +1291,49 @@ def infer_dtype(value: object, skipna: bool = True) -> str:

if util.is_array(value):
values = value
elif hasattr(value, 'dtype'):
elif hasattr(value, "inferred_type") and skipna is False:
# Index, use the cached attribute if possible, populate the cache otherwise
return value.inferred_type
elif hasattr(value, "dtype"):
# this will handle ndarray-like
# e.g. categoricals
try:
values = getattr(value, '_values', getattr(value, 'values', value))
except TypeError:
# This gets hit if we have an EA, since cython expects `values`
# to be an ndarray
value = _try_infer_map(value)
dtype = value.dtype
if not isinstance(dtype, np.dtype):
value = _try_infer_map(value.dtype)
if value is not None:
return value

# its ndarray like but we can't handle
# its ndarray-like but we can't handle
raise ValueError(f"cannot infer type for {type(value)}")

# Unwrap Series/Index
values = np.asarray(value)

else:
if not isinstance(value, list):
value = list(value)
from pandas.core.dtypes.cast import (
construct_1d_object_array_from_listlike)
values = construct_1d_object_array_from_listlike(value)

# See also: construct_1d_object_array_from_listlike
values = np.empty(len(value), dtype=object)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why did you change from using the function for this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

trying to untangle dependency structure

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can revert, this isnt related to the rest of the PR

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that function exists for a reason, to avoid constantly repeating the 2 lines you added. pls revert.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated + green

values[:] = value

# make contiguous
values = values.ravel()
# for f-contiguous array 1000 x 1000, passing order="K" gives 5000x speedup
values = values.ravel(order="K")

val = _try_infer_map(values)
val = _try_infer_map(values.dtype)
if val is not None:
return val

if values.dtype != np.object_:
values = values.astype('O')
values = values.astype("O")

if skipna:
values = values[~isnaobj(values)]

n = len(values)
if n == 0:
return 'empty'
return "empty"

# try to use a valid value
for i in range(n):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1982,7 +1982,7 @@ def inferred_type(self) -> str_t:
"""
Return a string of the type inferred from the values.
"""
return lib.infer_dtype(self, skipna=False)
return lib.infer_dtype(self._values, skipna=False)

@cache_readonly
def is_all_dates(self) -> bool:
Expand Down