-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG: Fix pd.to_numeric
to have consistent behavior for date-like arguments
#43315
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
870769f
1f0b361
08a7bb8
05e1f2d
5a5fd77
f844dc4
ffc1a6e
bcf1cca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -77,6 +77,30 @@ def test_series(last_val): | |
tm.assert_series_equal(result, expected) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"list_data,kwargs", | ||
[ | ||
(["-3.14", 7], {}), | ||
( | ||
["-3.14", 7, pd.to_datetime(0), pd.NaT, ["30", -10]], | ||
{"errors": "coerce"} | ||
), | ||
( | ||
["-3.14", 7, pd.to_datetime(0), pd.NaT, ["30", -10]], | ||
{"errors": "ignore"} | ||
), | ||
] | ||
) | ||
def test_list_series(list_data, kwargs): | ||
lis = list_data | ||
ser = Series(list_data) | ||
|
||
result = to_numeric(lis, **kwargs) | ||
expected = to_numeric(ser, **kwargs).values | ||
|
||
tm.assert_numpy_array_equal(result, expected) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are you comparing vs a numpy array? these should use assert_series_equal at a minimum There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because one input was a ...
result = to_numeric(lis, **kwargs)
expected = to_numeric(ser, **kwargs)
tm.assert_series_equal(pd.Series(result), expected) |
||
|
||
|
||
@pytest.mark.parametrize( | ||
"data", | ||
[ | ||
|
@@ -111,10 +135,31 @@ def test_error(data, msg): | |
|
||
|
||
@pytest.mark.parametrize( | ||
"errors,exp_data", [("ignore", [1, -3.14, "apple"]), ("coerce", [1, -3.14, np.nan])] | ||
"data,msg", | ||
[ | ||
([22.06, "-86", pd.NaT], "Invalid object type at position 2"), | ||
( | ||
[pd.to_datetime(0), 22.06, "-86", pd.NaT], | ||
"Invalid object type at position 0" | ||
), | ||
], | ||
) | ||
def test_type_error(data, msg): | ||
ser = Series(data) | ||
|
||
with pytest.raises(TypeError, match=msg): | ||
to_numeric(ser, errors="raise") | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"errors,exp_data", | ||
[ | ||
("ignore", [1, -3.14, "apple", pd.to_datetime(0), pd.NaT]), | ||
("coerce", [1, -3.14, np.nan, np.nan, np.nan]) | ||
], | ||
) | ||
def test_ignore_error(errors, exp_data): | ||
ser = Series([1, -3.14, "apple"]) | ||
ser = Series([1, -3.14, "apple", pd.to_datetime(0), pd.NaT]) | ||
result = to_numeric(ser, errors=errors) | ||
|
||
expected = Series(exp_data) | ||
|
@@ -372,39 +417,6 @@ def test_str(data, exp, transform_assert_equal): | |
assert_equal(result, expected) | ||
|
||
|
||
def test_datetime_like(tz_naive_fixture, transform_assert_equal): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are you removing these? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because I was changing the behavior of the function for date-like objects |
||
transform, assert_equal = transform_assert_equal | ||
idx = pd.date_range("20130101", periods=3, tz=tz_naive_fixture) | ||
|
||
result = to_numeric(transform(idx)) | ||
expected = transform(idx.asi8) | ||
assert_equal(result, expected) | ||
|
||
|
||
def test_timedelta(transform_assert_equal): | ||
transform, assert_equal = transform_assert_equal | ||
idx = pd.timedelta_range("1 days", periods=3, freq="D") | ||
|
||
result = to_numeric(transform(idx)) | ||
expected = transform(idx.asi8) | ||
assert_equal(result, expected) | ||
|
||
|
||
def test_period(transform_assert_equal): | ||
transform, assert_equal = transform_assert_equal | ||
|
||
idx = pd.period_range("2011-01", periods=3, freq="M", name="") | ||
inp = transform(idx) | ||
|
||
if isinstance(inp, Index): | ||
result = to_numeric(inp) | ||
expected = transform(idx.asi8) | ||
assert_equal(result, expected) | ||
else: | ||
# TODO: PeriodDtype, so support it in to_numeric. | ||
pytest.skip("Missing PeriodDtype support in to_numeric") | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"errors,expected", | ||
[ | ||
|
@@ -450,7 +462,6 @@ def test_errors_invalid_value(): | |
[ | ||
["1", 2, 3], | ||
[1, 2, 3], | ||
np.array(["1970-01-02", "1970-01-03", "1970-01-04"], dtype="datetime64[D]"), | ||
], | ||
) | ||
@pytest.mark.parametrize( | ||
|
@@ -478,7 +489,6 @@ def test_downcast_basic(data, kwargs, exp_dtype): | |
[ | ||
["1", 2, 3], | ||
[1, 2, 3], | ||
np.array(["1970-01-02", "1970-01-03", "1970-01-04"], dtype="datetime64[D]"), | ||
], | ||
) | ||
def test_signed_downcast(data, signed_downcast): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
umm what is this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The idea was to change the behavior of
pd.to_datetime
for date-like objects. Refer to this discussion: #43280 (comment)