From a47802e5929e8cfb3d3f6bac211df5a240df2aa4 Mon Sep 17 00:00:00 2001 From: chris Date: Thu, 22 Feb 2018 08:32:44 -0500 Subject: [PATCH 1/9] BUG: Fix Series constructor for Categorical with index Fixes Series constructor so that ValueError is raised when a Categorical and index of different length are given. --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/series.py | 15 ++++++++++----- pandas/tests/series/test_constructors.py | 14 ++++++++++++++ 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index fb22dc40e335f..5330f7e7e998b 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -919,6 +919,7 @@ Reshaping - Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`) - Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`) - Bug in :func:`DataFrame.iterrows`, which would infers strings not compliant to `ISO8601 `_ to datetimes (:issue:`19671`) +- Bug in :class:`Series` constructor with ``Categorical`` where a ```ValueError`` is not raised when an index of different length is given (:issue:`19342`) Other ^^^^^ diff --git a/pandas/core/series.py b/pandas/core/series.py index 26b7fd552b062..c3a99bc9bb7ab 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -213,11 +213,11 @@ def __init__(self, data=None, index=None, dtype=None, name=None, elif is_extension_array_dtype(data) and dtype is not None: # GH12574: Allow dtype=category only, otherwise error - if not data.dtype.is_dtype(dtype): - raise ValueError("Cannot specify a dtype '{}' with an " - "extension array of a different " - "dtype ('{}').".format(dtype, - data.dtype)) + if ((dtype is not None) and + not is_categorical_dtype(dtype)): + raise ValueError("cannot specify a dtype with a " + "Categorical unless " + "dtype='category'") elif (isinstance(data, types.GeneratorType) or (compat.PY3 and isinstance(data, map))): @@ -235,6 +235,11 @@ def __init__(self, data=None, index=None, dtype=None, name=None, if not is_list_like(data): data = [data] index = com._default_index(len(data)) + else: + if not is_scalar(data) and len(index) != len(data): + raise ValueError('Length of passed values is {val}, ' + 'index implies {ind}' + .format(val=len(data), ind=len(index))) # create/copy the manager if isinstance(data, SingleBlockManager): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 77f9dfcce686d..1a60b854da658 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -400,6 +400,20 @@ def test_constructor_default_index(self): s = Series([0, 1, 2]) tm.assert_index_equal(s.index, pd.Index(np.arange(3))) + @pytest.mark.parametrize('input', [[1, 2, 3], + (1, 2, 3), + list(range(3)), + pd.Categorical(['a', 'b', 'a']), + (i for i in range(3)), + map(lambda x: x, range(3))]) + def test_constructor_index_mismatch(self, input): + # GH 19342 + # test that construction of a Series with an index of different length + # raises an error + msg = 'Length of passed values is 3, index implies 4' + with pytest.raises(ValueError, message=msg): + Series(input, index=np.arange(4)) + def test_constructor_corner(self): df = tm.makeTimeDataFrame() objs = [df, df] From e5423b7476431f3adb65cbc8ee312895cea93c1f Mon Sep 17 00:00:00 2001 From: chris Date: Fri, 23 Feb 2018 09:53:17 -0500 Subject: [PATCH 2/9] Potential fix for failed tests --- pandas/core/series.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index c3a99bc9bb7ab..24085e9cba02d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -235,8 +235,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, if not is_list_like(data): data = [data] index = com._default_index(len(data)) - else: - if not is_scalar(data) and len(index) != len(data): + elif is_list_like(data) and len(index) != len(data): raise ValueError('Length of passed values is {val}, ' 'index implies {ind}' .format(val=len(data), ind=len(index))) From 6f65134bf046dc93e0942082e2ca6c463652a106 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 24 Feb 2018 09:47:27 -0500 Subject: [PATCH 3/9] revert changes from master --- pandas/core/series.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 24085e9cba02d..46cc9a2db219e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -212,12 +212,11 @@ def __init__(self, data=None, index=None, dtype=None, name=None, 'be False.') elif is_extension_array_dtype(data) and dtype is not None: - # GH12574: Allow dtype=category only, otherwise error - if ((dtype is not None) and - not is_categorical_dtype(dtype)): - raise ValueError("cannot specify a dtype with a " - "Categorical unless " - "dtype='category'") + if not data.dtype.is_dtype(dtype): + raise ValueError("Cannot specify a dtype '{}' with an " + "extension array of a different " + "dtype ('{}').".format(dtype, + data.dtype)) elif (isinstance(data, types.GeneratorType) or (compat.PY3 and isinstance(data, map))): From 1297c2b6ac39fe441765681cdd69883357de9f55 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 24 Feb 2018 10:51:46 -0500 Subject: [PATCH 4/9] accomodate numpy scalar --- pandas/core/series.py | 16 ++++++++++++---- pandas/tests/series/test_constructors.py | 8 ++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 46cc9a2db219e..8053651a4877a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -234,10 +234,18 @@ def __init__(self, data=None, index=None, dtype=None, name=None, if not is_list_like(data): data = [data] index = com._default_index(len(data)) - elif is_list_like(data) and len(index) != len(data): - raise ValueError('Length of passed values is {val}, ' - 'index implies {ind}' - .format(val=len(data), ind=len(index))) + elif is_list_like(data): + + # a scalar numpy array is list-like but doesn't + # have a proper length + try: + if len(index) != len(data): + raise ValueError( + 'Length of passed values is {val}, ' + 'index implies {ind}' + .format(val=len(data), ind=len(index))) + except TypeError: + pass # create/copy the manager if isinstance(data, SingleBlockManager): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 1a60b854da658..2bd478e216ad3 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -414,6 +414,14 @@ def test_constructor_index_mismatch(self, input): with pytest.raises(ValueError, message=msg): Series(input, index=np.arange(4)) + def test_constructor_numpy_scalar(self): + # GH 19342 + # construction with a numpy scalar + # should not raise + result = Series(np.array(100), index=np.arange(4)) + expected = Series(100, index=np.arange(4)) + tm.assert_series_equal(result, expected) + def test_constructor_corner(self): df = tm.makeTimeDataFrame() objs = [df, df] From 7847923d8e8c71bb6a8c38f24f2f8c397a18a358 Mon Sep 17 00:00:00 2001 From: cbertinato Date: Sat, 24 Feb 2018 13:53:18 -0500 Subject: [PATCH 5/9] Allow broadcasting of single-element lists --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 8053651a4877a..e10261cfecd8d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -239,7 +239,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, # a scalar numpy array is list-like but doesn't # have a proper length try: - if len(index) != len(data): + if len(data) > 1 and len(index) != len(data): raise ValueError( 'Length of passed values is {val}, ' 'index implies {ind}' From bb693c7d2ff467686b8d69d7c6e41810d94b6f1b Mon Sep 17 00:00:00 2001 From: cbertinato Date: Sun, 25 Feb 2018 11:06:29 -0500 Subject: [PATCH 6/9] Fix test for 32-bit environment --- pandas/tests/series/test_constructors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 2bd478e216ad3..ed2f09166bd47 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -418,8 +418,8 @@ def test_constructor_numpy_scalar(self): # GH 19342 # construction with a numpy scalar # should not raise - result = Series(np.array(100), index=np.arange(4)) - expected = Series(100, index=np.arange(4)) + result = Series(np.array(100), index=np.arange(4), dtype='int64') + expected = Series(100, index=np.arange(4), dtype='int64') tm.assert_series_equal(result, expected) def test_constructor_corner(self): From 29d9519e238f773912a6965b46882e8ba2cb5abe Mon Sep 17 00:00:00 2001 From: cbertinato Date: Sun, 25 Feb 2018 21:40:59 -0500 Subject: [PATCH 7/9] Allow list with len 1 to be broadcast --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index e10261cfecd8d..4f35754afeb3d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -239,7 +239,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, # a scalar numpy array is list-like but doesn't # have a proper length try: - if len(data) > 1 and len(index) != len(data): + if len(data) != 1 and len(index) != len(data): raise ValueError( 'Length of passed values is {val}, ' 'index implies {ind}' From e756c7e4e8e277b6ae709264962cab2c25faa0dc Mon Sep 17 00:00:00 2001 From: chris Date: Mon, 26 Feb 2018 08:34:36 -0500 Subject: [PATCH 8/9] Add test for single-element list and index case --- pandas/tests/series/test_constructors.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index ed2f09166bd47..36994a9746aeb 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -422,6 +422,14 @@ def test_constructor_numpy_scalar(self): expected = Series(100, index=np.arange(4), dtype='int64') tm.assert_series_equal(result, expected) + def test_constructor_broadcast_list(self): + # GH 19342 + # construction with single-element container and index + # should not raise + result = Series(['foo'], index=['a', 'b', 'c']) + expected = Series(['foo'] * 3, index=['a', 'b', 'c']) + tm.assert_series_equal(result, expected) + def test_constructor_corner(self): df = tm.makeTimeDataFrame() objs = [df, df] From f5db9abd070e112aa3e081b48e6d9c22a563eae0 Mon Sep 17 00:00:00 2001 From: chris Date: Mon, 26 Feb 2018 09:20:48 -0500 Subject: [PATCH 9/9] Disallow broadcasting of single-element lists Modified test setup in io/formats/test_style.py accordingly --- pandas/core/series.py | 2 +- pandas/tests/io/formats/test_style.py | 2 +- pandas/tests/series/test_constructors.py | 6 ++---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 4f35754afeb3d..8053651a4877a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -239,7 +239,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, # a scalar numpy array is list-like but doesn't # have a proper length try: - if len(data) != 1 and len(index) != len(data): + if len(index) != len(data): raise ValueError( 'Length of passed values is {val}, ' 'index implies {ind}' diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index bedb11d4fc4ae..adf8e14b756c2 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -24,7 +24,7 @@ def setup_method(self, method): def h(x, foo='bar'): return pd.Series( - ['color: {foo}'.format(foo=foo)], index=x.index, name=x.name) + 'color: {foo}'.format(foo=foo), index=x.index, name=x.name) self.h = h self.styler = Styler(self.df) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 36994a9746aeb..25f425ffa0021 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -425,10 +425,8 @@ def test_constructor_numpy_scalar(self): def test_constructor_broadcast_list(self): # GH 19342 # construction with single-element container and index - # should not raise - result = Series(['foo'], index=['a', 'b', 'c']) - expected = Series(['foo'] * 3, index=['a', 'b', 'c']) - tm.assert_series_equal(result, expected) + # should raise + pytest.raises(ValueError, Series, ['foo'], index=['a', 'b', 'c']) def test_constructor_corner(self): df = tm.makeTimeDataFrame()