-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
API/BUG: Handling Dtype Coercions in Series/Index (GH 15832) #15859
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
676a4e5
e12bca7
9fc617b
8b463cb
43456a5
faa5c5c
1c90e7e
278c2fb
a8cd752
bbdea4b
d2e26ac
3c868a4
20ac5c6
14ed83b
3d0e76f
1726408
1f8e9b7
83cfc5d
417188a
939ae11
86e9d5e
359086d
50950f5
012fb57
35a5ff1
b1e6632
a1033cb
b78f4cc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,7 +35,8 @@ | |
from pandas.core.dtypes.cast import ( | ||
maybe_upcast, infer_dtype_from_scalar, | ||
maybe_convert_platform, | ||
maybe_cast_to_datetime, maybe_castable) | ||
maybe_cast_to_datetime, maybe_castable, | ||
maybe_cast_to_integer_array) | ||
from pandas.core.dtypes.missing import isnull, notnull | ||
|
||
from pandas.core.common import (is_bool_indexer, | ||
|
@@ -2941,9 +2942,13 @@ def _try_cast(arr, take_fast_path): | |
return arr | ||
|
||
try: | ||
if is_float_dtype(dtype) or is_integer_dtype(dtype): | ||
subarr = maybe_cast_to_integer_array(np.asarray(arr), dtype) | ||
|
||
subarr = maybe_cast_to_datetime(arr, dtype) | ||
if not is_extension_type(subarr): | ||
subarr = np.array(subarr, dtype=dtype, copy=copy) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am going to move this entire section (e.g. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you want to move part/all of this ok with that as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
except (ValueError, TypeError): | ||
if is_categorical_dtype(dtype): | ||
subarr = Categorical(arr) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1,26 @@ | ||
# coding=utf-8 | ||
# pylint: disable-msg=E1101,W0612 | ||
|
||
import pytest | ||
|
||
from datetime import datetime, timedelta | ||
|
||
from numpy import nan | ||
import numpy as np | ||
import numpy.ma as ma | ||
import pandas as pd | ||
|
||
from pandas.core.dtypes.common import ( | ||
is_categorical_dtype, | ||
is_datetime64tz_dtype) | ||
import pytest | ||
from numpy import nan | ||
from pandas import (Index, Series, isnull, date_range, | ||
NaT, period_range, MultiIndex, IntervalIndex) | ||
from pandas.core.indexes.datetimes import Timestamp, DatetimeIndex | ||
from pandas import compat | ||
from pandas.compat import lrange, range, zip, OrderedDict, long | ||
|
||
import pandas.util.testing as tm | ||
from pandas._libs import lib | ||
from pandas._libs.tslib import iNaT | ||
|
||
from pandas.compat import lrange, range, zip, OrderedDict, long | ||
from pandas import compat | ||
from pandas.core.dtypes.common import ( | ||
is_categorical_dtype, | ||
is_datetime64tz_dtype) | ||
from pandas.core.indexes.datetimes import Timestamp, DatetimeIndex | ||
from pandas.util.testing import assert_series_equal | ||
import pandas.util.testing as tm | ||
|
||
from .common import TestData | ||
|
||
|
||
|
@@ -301,12 +297,35 @@ def test_constructor_pass_nan_nat(self): | |
tm.assert_series_equal(Series(np.array([np.nan, pd.NaT])), exp) | ||
|
||
def test_constructor_cast(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test name isn't particularly informative. Let's break this test up so that we can check for specific errors and also utilize @pytest.mark.parametrize(...)
def test_constructor_unsigned_dtype_overflow(self):
...
@pytest.mark.parametrize(...)
def test_constructor_coerce_float_fail(self:
... Also, I prefer if we can use pytest.raises(ValueError, Series, ['a', 'b', 'c'], dtype=float) I can begin to see / understand why it fails, but what is the exact reason where it is breaking? If it is separate from the ones you added, let's make that a test of its own. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ucals : You broke up the tests, but your new tests still use |
||
pytest.raises(ValueError, Series, ['a', 'b', 'c'], dtype=float) | ||
msg = "could not convert string to float" | ||
with tm.assert_raises_regex(ValueError, msg): | ||
Series(['a', 'b', 'c'], dtype=float) | ||
|
||
@pytest.mark.parametrize("unsigned_integers", ['uint8', 'uint16', 'uint32', | ||
'uint64']) | ||
def test_constructor_unsigned_dtype_overflow(self, unsigned_integers): | ||
# GH 15832 | ||
msg = 'Trying to coerce negative values to unsigned integers' | ||
with tm.assert_raises_regex(OverflowError, msg): | ||
Series([-1], dtype=unsigned_integers) | ||
|
||
@pytest.mark.parametrize("integers", ['uint8', 'uint16', 'uint32', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's use some more informative names like |
||
'uint64', 'int32', 'int64', 'int16', | ||
'int8']) | ||
@pytest.mark.parametrize("floats", ['float16', 'float32']) | ||
def test_constructor_coerce_float_fail(self, integers, floats): | ||
# GH 15832 | ||
msg = 'Trying to coerce float values to integers' | ||
with tm.assert_raises_regex(ValueError, msg): | ||
Series([1, 2, 3.5], dtype=integers) | ||
|
||
s = Series([1, 2, 3.5], dtype=floats) | ||
expected = Series([1, 2, 3.5]).astype(floats) | ||
assert_series_equal(s, expected) | ||
|
||
def test_constructor_dtype_nocast(self): | ||
# 1572 | ||
s = Series([1, 2, 3]) | ||
|
||
s2 = Series(s, dtype=np.int64) | ||
|
||
s2[1] = 5 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the spirit of good documentation, let's add some examples here!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Excellent! Nice examples.