Skip to content

BUG: Series construction with EA dtype and index but no data fails #33846

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 29 commits into from
May 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
a06e1a4
BUG: Series construction with EA dtype and index but no data fails
simonjayhawkins Apr 28, 2020
6ae3342
redo tests
simonjayhawkins Apr 29, 2020
72f8ec3
Merge remote-tracking branch 'upstream/master' into broadcast-ea-bug
simonjayhawkins Apr 29, 2020
7a17b33
Merge remote-tracking branch 'upstream/master' into broadcast-ea-bug
simonjayhawkins Apr 30, 2020
1881a03
add test_series_constructor_scalar_with_one_element_index
simonjayhawkins Apr 30, 2020
45ef9a5
move dtype to test function parameters
simonjayhawkins Apr 30, 2020
a339f05
comment - whatsnew
simonjayhawkins Apr 30, 2020
6bfbd1a
comment - merge tests
simonjayhawkins Apr 30, 2020
1c8bd8c
special case to avoid _try_cast call
simonjayhawkins Apr 30, 2020
840df49
troubleshoot
simonjayhawkins Apr 30, 2020
9cf81ee
less failures
simonjayhawkins Apr 30, 2020
d427714
maybe_cast_to_datetime
simonjayhawkins Apr 30, 2020
d47cba4
add failure reason for pyarrow
simonjayhawkins Apr 30, 2020
c5cc30d
update issue ref for ArrowBoolDtype
simonjayhawkins Apr 30, 2020
421aa7c
remove sparse test overrides
simonjayhawkins Apr 30, 2020
4c51356
ref to new issue for JSONDtype RecursionError
simonjayhawkins Apr 30, 2020
ff4ff63
collection as scalar msg and gh ref
simonjayhawkins Apr 30, 2020
aa11bb6
Merge remote-tracking branch 'upstream/master' into broadcast-ea-bug
simonjayhawkins May 1, 2020
268f3a5
fix corner case
simonjayhawkins May 1, 2020
2df2bf1
comment - maybe_cast_to_datetime
simonjayhawkins May 1, 2020
211328c
Merge remote-tracking branch 'upstream/master' into broadcast-ea-bug
simonjayhawkins May 1, 2020
e598f4c
add test for gh-33559
simonjayhawkins May 1, 2020
8c44e23
troubleshoot timeout
simonjayhawkins May 1, 2020
dac66d0
troubleshoot timeout
simonjayhawkins May 1, 2020
b363fb2
Merge remote-tracking branch 'upstream/master' into broadcast-ea-bug
simonjayhawkins May 1, 2020
f2026d3
troubleshoot timeout
simonjayhawkins May 1, 2020
52fcd7f
skip on py3.6
simonjayhawkins May 1, 2020
4907f34
Merge remote-tracking branch 'upstream/master' into broadcast-ea-bug
simonjayhawkins May 1, 2020
663c863
Merge branch 'master' into broadcast-ea-bug
jreback May 2, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -752,7 +752,8 @@ Sparse
ExtensionArray
^^^^^^^^^^^^^^

- Fixed bug where :meth:`Serires.value_counts` would raise on empty input of ``Int64`` dtype (:issue:`33317`)
- Fixed bug where :meth:`Series.value_counts` would raise on empty input of ``Int64`` dtype (:issue:`33317`)
- Fixed bug in :class:`Series` construction with EA dtype and index but no data or scalar data fails (:issue:`26469`)
- Fixed bug that caused :meth:`Series.__repr__()` to crash for extension types whose elements are multidimensional arrays (:issue:`33770`).


Expand Down
7 changes: 6 additions & 1 deletion pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,11 @@ def sanitize_array(
subarr = _try_cast(arr, dtype, copy, raise_cast_failure)
elif isinstance(data, abc.Set):
raise TypeError("Set type is unordered")
elif lib.is_scalar(data) and index is not None and dtype is not None:
data = maybe_cast_to_datetime(data, dtype)
if not lib.is_scalar(data):
data = data[0]
subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype)
else:
subarr = _try_cast(data, dtype, copy, raise_cast_failure)

Expand Down Expand Up @@ -516,7 +521,7 @@ def _try_cast(

Parameters
----------
arr : ndarray, list, tuple, iterator (catchall)
arr : ndarray, scalar, list, tuple, iterator (catchall)
Excludes: ExtensionArray, Series, Index.
dtype : np.dtype, ExtensionDtype or None
copy : bool
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1384,7 +1384,9 @@ def maybe_cast_to_datetime(value, dtype, errors: str = "raise"):
pass

# coerce datetimelike to object
elif is_datetime64_dtype(value) and not is_datetime64_dtype(dtype):
elif is_datetime64_dtype(
getattr(value, "dtype", None)
) and not is_datetime64_dtype(dtype):
if is_object_dtype(dtype):
if value.dtype != DT64NS_DTYPE:
value = value.astype(DT64NS_DTYPE)
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/extension/arrow/test_bool.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas.compat import PY37

import pandas as pd
import pandas._testing as tm
from pandas.tests.extension import base
Expand Down Expand Up @@ -55,6 +57,18 @@ def test_from_dtype(self, data):
def test_from_sequence_from_cls(self, data):
super().test_from_sequence_from_cls(data)

@pytest.mark.skipif(not PY37, reason="timeout on Linux py36_locale")
@pytest.mark.xfail(reason="pa.NULL is not recognised as scalar, GH-33899")
def test_series_constructor_no_data_with_index(self, dtype, na_value):
# pyarrow.lib.ArrowInvalid: only handle 1-dimensional arrays
super().test_series_constructor_no_data_with_index(dtype, na_value)

@pytest.mark.skipif(not PY37, reason="timeout on Linux py36_locale")
@pytest.mark.xfail(reason="pa.NULL is not recognised as scalar, GH-33899")
def test_series_constructor_scalar_na_with_index(self, dtype, na_value):
# pyarrow.lib.ArrowInvalid: only handle 1-dimensional arrays
super().test_series_constructor_scalar_na_with_index(dtype, na_value)


class TestReduce(base.BaseNoReduceTests):
def test_reduce_series_boolean(self):
Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/extension/base/constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,31 @@ def test_series_constructor(self, data):
assert result2.dtype == data.dtype
assert isinstance(result2._mgr.blocks[0], ExtensionBlock)

def test_series_constructor_no_data_with_index(self, dtype, na_value):
result = pd.Series(index=[1, 2, 3], dtype=dtype)
expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype)
self.assert_series_equal(result, expected)

# GH 33559 - empty index
result = pd.Series(index=[], dtype=dtype)
expected = pd.Series([], index=pd.Index([], dtype="object"), dtype=dtype)
self.assert_series_equal(result, expected)
Comment on lines +41 to +44
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this PR appears to also close #33559

the index discrepancy is consistent with non-EA types.

>>> import pandas as pd
>>> pd.__version__
'1.1.0.dev0+1446.g1c88e6aff'
>>> pd.Series(dtype="int64", index=[]).index
Index([], dtype='object')
>>>
>>> pd.Series(dtype="int64").index
Index([], dtype='object')
>>>
>>> pd.Series([], dtype="int64").index
RangeIndex(start=0, stop=0, step=1)
>>>

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, there is another PR trying to clean this up


def test_series_constructor_scalar_na_with_index(self, dtype, na_value):
result = pd.Series(na_value, index=[1, 2, 3], dtype=dtype)
expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype)
self.assert_series_equal(result, expected)

def test_series_constructor_scalar_with_index(self, data, dtype):
scalar = data[0]
result = pd.Series(scalar, index=[1, 2, 3], dtype=dtype)
expected = pd.Series([scalar] * 3, index=[1, 2, 3], dtype=dtype)
self.assert_series_equal(result, expected)

result = pd.Series(scalar, index=["foo"], dtype=dtype)
expected = pd.Series([scalar], index=["foo"], dtype=dtype)
self.assert_series_equal(result, expected)

@pytest.mark.parametrize("from_series", [True, False])
def test_dataframe_constructor_from_dict(self, data, from_series):
if from_series:
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/extension/json/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,21 @@ def test_from_dtype(self, data):
# construct from our dtype & string dtype
pass

@pytest.mark.xfail(reason="RecursionError, GH-33900")
def test_series_constructor_no_data_with_index(self, dtype, na_value):
# RecursionError: maximum recursion depth exceeded in comparison
super().test_series_constructor_no_data_with_index(dtype, na_value)

@pytest.mark.xfail(reason="RecursionError, GH-33900")
def test_series_constructor_scalar_na_with_index(self, dtype, na_value):
# RecursionError: maximum recursion depth exceeded in comparison
super().test_series_constructor_scalar_na_with_index(dtype, na_value)

@pytest.mark.xfail(reason="collection as scalar, GH-33901")
def test_series_constructor_scalar_with_index(self, data, dtype):
# TypeError: All values must be of type <class 'collections.abc.Mapping'>
super().test_series_constructor_scalar_with_index(data, dtype)


class TestReshaping(BaseJSON, base.BaseReshapingTests):
@pytest.mark.skip(reason="Different definitions of NA")
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@ def test_array_from_scalars(self, data):
# ValueError: PandasArray must be 1-dimensional.
super().test_array_from_scalars(data)

@skip_nested
def test_series_constructor_scalar_with_index(self, data, dtype):
# ValueError: Length of passed values is 1, index implies 3.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for the object dtype, the scalar is a tuple, so this failure is related to #33846 (comment)

super().test_series_constructor_scalar_with_index(data, dtype)


class TestDtype(BaseNumPyTests, base.BaseDtypeTests):
@pytest.mark.skip(reason="Incorrect expected.")
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1444,3 +1444,9 @@ def test_constructor_datetime64(self):

series = Series(dates)
assert np.issubdtype(series.dtype, np.dtype("M8[ns]"))

def test_constructor_datetimelike_scalar_to_string_dtype(self):
# https://github.com/pandas-dev/pandas/pull/33846
result = Series("M", index=[1, 2, 3], dtype="string")
expected = pd.Series(["M", "M", "M"], index=[1, 2, 3], dtype="string")
tm.assert_series_equal(result, expected)