Skip to content

BUG: Fix Series constructor for Categorical with index #19714

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 27, 2018
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,7 @@ Reshaping
- Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`)
- Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`)
- Bug in :func:`DataFrame.iterrows`, which would infers strings not compliant to `ISO8601 <https://en.wikipedia.org/wiki/ISO_8601>`_ to datetimes (:issue:`19671`)
- Bug in :class:`Series` constructor with ``Categorical`` where a ```ValueError`` is not raised when an index of different length is given (:issue:`19342`)

Other
^^^^^
Expand Down
13 changes: 12 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,6 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
'be False.')

elif is_extension_array_dtype(data) and dtype is not None:
# GH12574: Allow dtype=category only, otherwise error
if not data.dtype.is_dtype(dtype):
raise ValueError("Cannot specify a dtype '{}' with an "
"extension array of a different "
Expand All @@ -235,6 +234,18 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
if not is_list_like(data):
data = [data]
index = com._default_index(len(data))
elif is_list_like(data):

# a scalar numpy array is list-like but doesn't
# have a proper length
try:
if len(index) != len(data):
raise ValueError(
'Length of passed values is {val}, '
'index implies {ind}'
.format(val=len(data), ind=len(index)))
except TypeError:
pass

# create/copy the manager
if isinstance(data, SingleBlockManager):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/formats/test_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def setup_method(self, method):

def h(x, foo='bar'):
return pd.Series(
['color: {foo}'.format(foo=foo)], index=x.index, name=x.name)
'color: {foo}'.format(foo=foo), index=x.index, name=x.name)

self.h = h
self.styler = Styler(self.df)
Expand Down
28 changes: 28 additions & 0 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,34 @@ def test_constructor_default_index(self):
s = Series([0, 1, 2])
tm.assert_index_equal(s.index, pd.Index(np.arange(3)))

@pytest.mark.parametrize('input', [[1, 2, 3],
(1, 2, 3),
list(range(3)),
pd.Categorical(['a', 'b', 'a']),
(i for i in range(3)),
map(lambda x: x, range(3))])
def test_constructor_index_mismatch(self, input):
# GH 19342
# test that construction of a Series with an index of different length
# raises an error
msg = 'Length of passed values is 3, index implies 4'
with pytest.raises(ValueError, message=msg):
Series(input, index=np.arange(4))

def test_constructor_numpy_scalar(self):
# GH 19342
# construction with a numpy scalar
# should not raise
result = Series(np.array(100), index=np.arange(4), dtype='int64')
expected = Series(100, index=np.arange(4), dtype='int64')
tm.assert_series_equal(result, expected)

def test_constructor_broadcast_list(self):
# GH 19342
# construction with single-element container and index
# should raise
pytest.raises(ValueError, Series, ['foo'], index=['a', 'b', 'c'])

def test_constructor_corner(self):
df = tm.makeTimeDataFrame()
objs = [df, df]
Expand Down