-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: Ensure 'coerce' actually coerces datatypes #10265
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1887,65 +1887,68 @@ def _maybe_box_datetimelike(value): | |
|
||
_values_from_object = lib.values_from_object | ||
|
||
def _possibly_convert_objects(values, convert_dates=True, | ||
convert_numeric=True, | ||
convert_timedeltas=True): | ||
|
||
def _possibly_convert_objects(values, | ||
datetime=True, | ||
numeric=True, | ||
timedelta=True, | ||
coerce=False, | ||
copy=True): | ||
""" if we have an object dtype, try to coerce dates and/or numbers """ | ||
|
||
# if we have passed in a list or scalar | ||
conversion_count = sum((datetime, numeric, timedelta)) | ||
if conversion_count == 0: | ||
import warnings | ||
warnings.warn('Must explicitly pass type for conversion. Defaulting to ' | ||
'pre-0.17 behavior where datetime=True, numeric=True, ' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice. I added this to the deprecation issue so can remove this at some point. |
||
'timedelta=True and coerce=False', DeprecationWarning) | ||
datetime = numeric = timedelta = True | ||
coerce = False | ||
|
||
if isinstance(values, (list, tuple)): | ||
# List or scalar | ||
values = np.array(values, dtype=np.object_) | ||
if not hasattr(values, 'dtype'): | ||
elif not hasattr(values, 'dtype'): | ||
values = np.array([values], dtype=np.object_) | ||
|
||
# convert dates | ||
if convert_dates and values.dtype == np.object_: | ||
|
||
# we take an aggressive stance and convert to datetime64[ns] | ||
if convert_dates == 'coerce': | ||
new_values = _possibly_cast_to_datetime( | ||
values, 'M8[ns]', coerce=True) | ||
|
||
# if we are all nans then leave me alone | ||
if not isnull(new_values).all(): | ||
values = new_values | ||
|
||
else: | ||
values = lib.maybe_convert_objects( | ||
values, convert_datetime=convert_dates) | ||
|
||
# convert timedeltas | ||
if convert_timedeltas and values.dtype == np.object_: | ||
|
||
if convert_timedeltas == 'coerce': | ||
from pandas.tseries.timedeltas import to_timedelta | ||
values = to_timedelta(values, coerce=True) | ||
|
||
# if we are all nans then leave me alone | ||
if not isnull(new_values).all(): | ||
values = new_values | ||
|
||
else: | ||
values = lib.maybe_convert_objects( | ||
values, convert_timedelta=convert_timedeltas) | ||
|
||
# convert to numeric | ||
if values.dtype == np.object_: | ||
if convert_numeric: | ||
try: | ||
new_values = lib.maybe_convert_numeric( | ||
values, set(), coerce_numeric=True) | ||
|
||
# if we are all nans then leave me alone | ||
if not isnull(new_values).all(): | ||
values = new_values | ||
|
||
except: | ||
pass | ||
else: | ||
|
||
# soft-conversion | ||
values = lib.maybe_convert_objects(values) | ||
elif not is_object_dtype(values.dtype): | ||
# If not object, do not attempt conversion | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need to handle the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function doesn't have a copy keyword - this is handled by the caller.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a bug. This needs handling either IN There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the bug is in Maybe copy needs to go down a level for this edge case. I don't really understand what There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep could be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pushed copy one level deeper so I can trap these. |
||
values = values.copy() if copy else values | ||
return values | ||
|
||
# If 1 flag is coerce, ensure 2 others are False | ||
if coerce: | ||
if conversion_count > 1: | ||
raise ValueError("Only one of 'datetime', 'numeric' or " | ||
"'timedelta' can be True when when coerce=True.") | ||
|
||
# Immediate return if coerce | ||
if datetime: | ||
return pd.to_datetime(values, coerce=True, box=False) | ||
elif timedelta: | ||
return pd.to_timedelta(values, coerce=True, box=False) | ||
elif numeric: | ||
return lib.maybe_convert_numeric(values, set(), coerce_numeric=True) | ||
|
||
# Soft conversions | ||
if datetime: | ||
values = lib.maybe_convert_objects(values, | ||
convert_datetime=datetime) | ||
|
||
if timedelta and is_object_dtype(values.dtype): | ||
# Object check to ensure only run if previous did not convert | ||
values = lib.maybe_convert_objects(values, | ||
convert_timedelta=timedelta) | ||
|
||
if numeric and is_object_dtype(values.dtype): | ||
try: | ||
converted = lib.maybe_convert_numeric(values, | ||
set(), | ||
coerce_numeric=True) | ||
# If all NaNs, then do not-alter | ||
values = converted if not isnull(converted).all() else values | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The issue here is that if it isn't numeric and coerce=False, it will raise and do nothing, but if it is not numeric but is a timestamp then it will get nan-filled. This is why this logic is here. |
||
values = values.copy() if copy else values | ||
except: | ||
pass | ||
|
||
return values | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This example seems a bit strange, as for
numeric
, the setting ofcoerce
does not matter?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated to include case where coerce matters
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe add an example of what a usage with coerce did when u have 0 and 1 of the values being converted (eg no convert / convert ) - this is the example that could break code so want to make it prominent
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added one.