-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: Series construction with EA dtype and index but no data fails #33846
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
a06e1a4
6ae3342
72f8ec3
7a17b33
1881a03
45ef9a5
a339f05
6bfbd1a
1c8bd8c
840df49
9cf81ee
d427714
d47cba4
c5cc30d
421aa7c
4c51356
ff4ff63
aa11bb6
268f3a5
2df2bf1
211328c
e598f4c
8c44e23
dac66d0
b363fb2
f2026d3
52fcd7f
4907f34
663c863
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -516,7 +516,7 @@ def _try_cast( | |
|
||
Parameters | ||
---------- | ||
arr : ndarray, list, tuple, iterator (catchall) | ||
arr : ndarray, scalar, list, tuple, iterator (catchall) | ||
Excludes: ExtensionArray, Series, Index. | ||
dtype : np.dtype, ExtensionDtype or None | ||
copy : bool | ||
|
@@ -533,6 +533,10 @@ def _try_cast( | |
if isinstance(dtype, ExtensionDtype) and dtype.kind != "M": | ||
# create an extension array from its dtype | ||
# DatetimeTZ case needs to go through maybe_cast_to_datetime | ||
|
||
if lib.is_scalar(arr): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we could look into how to identify a collection that could be considered a 'scalar' for some EA, eg JSONDtype. although I think out-of-scope for the issue that this PR attempts to fix (i.e. IntegerArray, where the scalars are scalars) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rather than this I would call: but I wouldn't do this right here, rather on L453, e.g. add an elif is_scalar(data) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that's option 3 in #33846 (comment) do this just for EA types and keep the code path the same for non-EA types? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no this will work generically There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i'm getting a few failures in pandas/tests/series/test_constructors.py. i'll push the change anyway use the ci to see what else fails while I investigate. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. kk |
||
arr = [arr] | ||
|
||
array_type = dtype.construct_array_type()._from_sequence | ||
subarr = array_type(arr, dtype=dtype, copy=copy) | ||
return subarr | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -55,6 +55,16 @@ def test_from_dtype(self, data): | |
def test_from_sequence_from_cls(self, data): | ||
super().test_from_sequence_from_cls(data) | ||
|
||
@pytest.mark.xfail(reason="GH-26469") | ||
def test_series_constructor_no_data_with_index(self, data, na_value): | ||
# pyarrow.lib.ArrowInvalid: only handle 1-dimensional arrays | ||
super().test_series_constructor_no_data_with_index(data, na_value) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you know why it is failing for this dtype? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the traceback is
hmm, seems to be because using lib.is_scalar, pa.NULL is passed to sanitize_array
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jorisvandenbossche not sure if you want this fixed here. will raise a separate issue for this case in the meantime. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, that's fine, we don't really support this dtype anyway, it's only to test certain things |
||
|
||
@pytest.mark.xfail(reason="GH-26469") | ||
def test_series_constructor_scalar_na_with_index(self, data, na_value): | ||
# pyarrow.lib.ArrowInvalid: only handle 1-dimensional arrays | ||
super().test_series_constructor_scalar_na_with_index(data, na_value) | ||
|
||
|
||
class TestReduce(base.BaseNoReduceTests): | ||
def test_reduce_series_boolean(self): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,6 +33,32 @@ def test_series_constructor(self, data): | |
assert result2.dtype == data.dtype | ||
assert isinstance(result2._mgr.blocks[0], ExtensionBlock) | ||
|
||
def test_series_constructor_no_data_with_index(self, data, na_value): | ||
dtype = data.dtype | ||
result = pd.Series(index=[1, 2, 3], dtype=dtype) | ||
expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype) | ||
self.assert_series_equal(result, expected) | ||
|
||
def test_series_constructor_scalar_na_with_index(self, data, na_value): | ||
dtype = data.dtype | ||
result = pd.Series(na_value, index=[1, 2, 3], dtype=dtype) | ||
expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype) | ||
self.assert_series_equal(result, expected) | ||
|
||
def test_series_constructor_scalar_with_index(self, data): | ||
scalar = data[0] | ||
dtype = data.dtype | ||
result = pd.Series(scalar, index=[1, 2, 3], dtype=dtype) | ||
expected = pd.Series([scalar] * 3, index=[1, 2, 3], dtype=dtype) | ||
self.assert_series_equal(result, expected) | ||
|
||
def test_series_constructor_scalar_with_one_element_index(self, data): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would maybe just combine this one with the test above (both are about scalar with index) ? (and one test less to override in the subclasses) |
||
scalar = data[0] | ||
dtype = data.dtype | ||
result = pd.Series(scalar, index=["foo"], dtype=dtype) | ||
expected = pd.Series([scalar], index=["foo"], dtype=dtype) | ||
self.assert_series_equal(result, expected) | ||
|
||
@pytest.mark.parametrize("from_series", [True, False]) | ||
def test_dataframe_constructor_from_dict(self, data, from_series): | ||
if from_series: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -150,6 +150,26 @@ def test_from_dtype(self, data): | |
# construct from our dtype & string dtype | ||
pass | ||
|
||
@pytest.mark.xfail(reason="GH-26469") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should these be a new issue? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. a few checks to go but I think we need a discussion on when to allow a collection to be treated as scalar. so yes, will probably raise an issue for this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. kk, and just flip the references to that, otherwise lgtm. ping on green. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
def test_series_constructor_no_data_with_index(self, data, na_value): | ||
# RecursionError: maximum recursion depth exceeded in comparison | ||
super().test_series_constructor_no_data_with_index(data, na_value) | ||
|
||
@pytest.mark.xfail(reason="GH-26469") | ||
def test_series_constructor_scalar_na_with_index(self, data, na_value): | ||
# RecursionError: maximum recursion depth exceeded in comparison | ||
super().test_series_constructor_scalar_na_with_index(data, na_value) | ||
|
||
@pytest.mark.xfail(reason="GH-26469") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a more informative message |
||
def test_series_constructor_scalar_with_index(self, data): | ||
# TypeError: All values must be of type <class 'collections.abc.Mapping'> | ||
super().test_series_constructor_scalar_with_index(data) | ||
|
||
@pytest.mark.xfail(reason="GH-26469") | ||
def test_series_constructor_scalar_with_one_element_index(self, data): | ||
# TypeError: All values must be of type <class 'collections.abc.Mapping'> | ||
super().test_series_constructor_scalar_with_one_element_index(data) | ||
|
||
|
||
class TestReshaping(BaseJSON, base.BaseReshapingTests): | ||
@pytest.mark.skip(reason="Different definitions of NA") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -82,7 +82,15 @@ class TestDatetimeDtype(BaseDatetimeTests, base.BaseDtypeTests): | |
|
||
|
||
class TestConstructors(BaseDatetimeTests, base.BaseConstructorsTests): | ||
pass | ||
@pytest.mark.xfail(reason="GH-26469") | ||
def test_series_constructor_scalar_with_index(self, data): | ||
# TypeError: data type not understood | ||
super().test_series_constructor_scalar_with_index(data) | ||
|
||
@pytest.mark.xfail(reason="GH-26469") | ||
def test_series_constructor_scalar_with_one_element_index(self, data): | ||
# TypeError: data type not understood | ||
super().test_series_constructor_scalar_with_one_element_index(data) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is some special casing for datetime in the Series construction. Although fixing this could also be considered out-of-scope for the issue that this PR attempts to close, I could look into this further and maybe raise a separate issue if not fixed here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Opening a separate issue is fine for me as well, either way There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is not failing with the changes as they stand atm. the previous 'fix' was inside a |
||
|
||
|
||
class TestGetitem(BaseDatetimeTests, base.BaseGetitemTests): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -151,6 +151,16 @@ def test_array_from_scalars(self, data): | |
# ValueError: PandasArray must be 1-dimensional. | ||
super().test_array_from_scalars(data) | ||
|
||
@skip_nested | ||
def test_series_constructor_scalar_with_index(self, data): | ||
# ValueError: Length of passed values is 1, index implies 3. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for the object dtype, the scalar is a tuple, so this failure is related to #33846 (comment) |
||
super().test_series_constructor_scalar_with_index(data) | ||
|
||
@skip_nested | ||
def test_series_constructor_scalar_with_one_element_index(self, data): | ||
# ValueError: PandasArray must be 1-dimensional. | ||
super().test_series_constructor_scalar_with_one_element_index(data) | ||
|
||
|
||
class TestDtype(BaseNumPyTests, base.BaseDtypeTests): | ||
@pytest.mark.skip(reason="Incorrect expected.") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -112,7 +112,15 @@ def test_view(self, data): | |
|
||
|
||
class TestConstructors(BaseSparseTests, base.BaseConstructorsTests): | ||
pass | ||
@pytest.mark.xfail(reason="GH-26469", strict=False) | ||
def test_series_constructor_no_data_with_index(self, data, na_value): | ||
# ValueError: Cannot convert non-finite values (NA or inf) to integer | ||
super().test_series_constructor_no_data_with_index(data, na_value) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you know why sparse is failing? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, so much for the xfail. no longer failing. was only failing on only one of the fill values before hence the strict=False. |
||
|
||
@pytest.mark.xfail(reason="GH-26469", strict=False) | ||
def test_series_constructor_scalar_na_with_index(self, data, na_value): | ||
# ValueError: Cannot convert non-finite values (NA or inf) to integer | ||
super().test_series_constructor_scalar_na_with_index(data, na_value) | ||
|
||
|
||
class TestReshaping(BaseSparseTests, base.BaseReshapingTests): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
or scalar data?