From d4d02a2b4db6705e2816f47e54f53d55a94d2f3f Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Wed, 6 Dec 2017 10:26:54 +0100 Subject: [PATCH 1/7] REF: implement and use construct_1d_array_from_listlike --- pandas/core/common.py | 18 ++++++------------ pandas/core/dtypes/cast.py | 25 +++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 76a69030463ec..800f0c7e63f06 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -21,6 +21,7 @@ from pandas.core.dtypes.missing import isna, isnull, notnull # noqa from pandas.api import types from pandas.core.dtypes import common +from pandas.core.dtypes.cast import construct_1d_array_from_listlike # compat from pandas.errors import ( # noqa @@ -381,7 +382,7 @@ def _asarray_tuplesafe(values, dtype=None): return values.values if isinstance(values, list) and dtype in [np.object_, object]: - return lib.list_to_object_array(values) + return construct_1d_array_from_listlike(values) result = np.asarray(values, dtype=dtype) @@ -389,17 +390,10 @@ def _asarray_tuplesafe(values, dtype=None): result = np.asarray(values, dtype=object) if result.ndim == 2: - if isinstance(values, list): - return lib.list_to_object_array(values) - else: - # Making a 1D array that safely contains tuples is a bit tricky - # in numpy, leading to the following - try: - result = np.empty(len(values), dtype=object) - result[:] = values - except ValueError: - # we have a list-of-list - result[:] = [tuple(x) for x in values] + # Avoid building an array of arrays: + # TODO: verify whether any path hits this except #18819 (invalid) + values = [tuple(x) for x in values] + result = construct_1d_array_from_listlike(values) return result diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index a97b84ab9cc5b..6e68272f174ff 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1162,3 +1162,28 @@ def construct_1d_arraylike_from_scalar(value, length, dtype): subarr.fill(value) return subarr + + +def construct_1d_array_from_listlike(values, dtype='object'): + """ + Transform any list-like object in a 1-dimensional numpy array. + + Parameters + ---------- + values : any iterable which has a len() + dtype : dtype, default 'object' + + Raises + ------ + TypeError + * If `values` does not have a len() + + Returns + ------- + 1-dimensional numpy array of dtype "dtype" + """ + # numpy will try to interpret nested lists as further dimensions, hence + # making a 1D array that contains list-likes is a bit tricky: + result = np.empty(len(values), dtype=dtype) + result[:] = values + return result From 0afff94555d555ff6b15b81849798f424f29bc35 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Fri, 8 Dec 2017 10:17:41 +0100 Subject: [PATCH 2/7] REF: Replace lib.list_to_object_array with construct_1d_array_from_listlike --- pandas/_libs/lib.pyx | 17 ----------------- pandas/_libs/src/inference.pyx | 4 +++- pandas/core/algorithms.py | 7 ++++--- pandas/core/dtypes/cast.py | 2 +- pandas/core/ops.py | 7 ++++--- pandas/tests/frame/test_constructors.py | 4 ++-- pandas/tests/indexes/test_multi.py | 6 +++--- 7 files changed, 17 insertions(+), 30 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a39f83d5261c0..5a62203f79642 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -148,23 +148,6 @@ def item_from_zerodim(object val): return util.unbox_if_zerodim(val) -@cython.wraparound(False) -@cython.boundscheck(False) -cpdef ndarray[object] list_to_object_array(list obj): - """ - Convert list to object ndarray. Seriously can\'t believe - I had to write this function. - """ - cdef: - Py_ssize_t i, n = len(obj) - ndarray[object] arr = np.empty(n, dtype=object) - - for i in range(n): - arr[i] = obj[i] - - return arr - - @cython.wraparound(False) @cython.boundscheck(False) def fast_unique(ndarray[object] values): diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index e15b4693432d9..34b5277d0b94e 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -7,6 +7,8 @@ from tslibs.conversion cimport convert_to_tsobject from tslibs.timedeltas cimport convert_to_timedelta64 from tslibs.timezones cimport get_timezone from datetime import datetime, timedelta +from pandas.core.dtypes.cast import construct_1d_array_from_listlike + iNaT = util.get_nat() cdef bint PY2 = sys.version_info[0] == 2 @@ -349,7 +351,7 @@ def infer_dtype(object value, bint skipna=False): else: if not isinstance(value, list): value = list(value) - values = list_to_object_array(value) + values = construct_1d_array_from_listlike(value) values = getattr(values, 'values', values) val = _try_infer_map(values) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 0ceb8966fd3c8..b8bca3bf5c054 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -6,7 +6,8 @@ from warnings import warn, catch_warnings import numpy as np -from pandas.core.dtypes.cast import maybe_promote +from pandas.core.dtypes.cast import ( + maybe_promote, construct_1d_array_from_listlike) from pandas.core.dtypes.generic import ( ABCSeries, ABCIndex, ABCIndexClass, ABCCategorical) @@ -171,7 +172,7 @@ def _ensure_arraylike(values): if inferred in ['mixed', 'string', 'unicode']: if isinstance(values, tuple): values = list(values) - values = lib.list_to_object_array(values) + values = construct_1d_array_from_listlike(values) else: values = np.asarray(values) return values @@ -401,7 +402,7 @@ def isin(comps, values): .format(values_type=type(values).__name__)) if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)): - values = lib.list_to_object_array(list(values)) + values = construct_1d_array_from_listlike(list(values)) comps, dtype, _ = _ensure_data(comps) values, _, _ = _ensure_data(values, dtype=dtype) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6e68272f174ff..ca3d05cb15024 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -42,7 +42,7 @@ def maybe_convert_platform(values): """ try to do platform conversion, allow ndarray or list here """ if isinstance(values, (list, tuple)): - values = lib.list_to_object_array(list(values)) + values = construct_1d_array_from_listlike(list(values)) if getattr(values, 'dtype', None) == np.object_: if hasattr(values, '_values'): values = values._values diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 2fb0cbb14c225..4bf6856e5f1b4 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -33,7 +33,8 @@ is_list_like, is_scalar, _ensure_object) -from pandas.core.dtypes.cast import maybe_upcast_putmask, find_common_type +from pandas.core.dtypes.cast import ( + maybe_upcast_putmask, find_common_type, construct_1d_array_from_listlike) from pandas.core.dtypes.generic import ( ABCSeries, ABCDataFrame, @@ -750,7 +751,7 @@ def wrapper(left, right, name=name, na_op=na_op): def _comp_method_OBJECT_ARRAY(op, x, y): if isinstance(y, list): - y = lib.list_to_object_array(y) + y = construct_1d_array_from_listlike(y) if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)): if not is_object_dtype(y.dtype): y = y.astype(np.object_) @@ -901,7 +902,7 @@ def na_op(x, y): result = op(x, y) except TypeError: if isinstance(y, list): - y = lib.list_to_object_array(y) + y = construct_1d_array_from_listlike(y) if isinstance(y, (np.ndarray, ABCSeries)): if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 22ad2258e70bc..68a74df5f63df 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -21,8 +21,8 @@ MultiIndex, Timedelta, Timestamp, date_range, Categorical) import pandas as pd -import pandas._libs.lib as lib import pandas.util.testing as tm +from pandas.core.dtypes.cast import construct_1d_array_from_listlike from pandas.tests.frame.common import TestData @@ -1199,7 +1199,7 @@ def test_constructor_from_items(self): DataFrame.from_items(row_items, orient='index') # orient='index', but thar be tuples - arr = lib.list_to_object_array( + arr = construct_1d_array_from_listlike( [('bar', 'baz')] * len(self.mixed_frame)) self.mixed_frame['foo'] = arr row_items = [(idx, list(self.mixed_frame.xs(idx))) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 510ca6ac83ec0..9b2d92df39252 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -18,7 +18,7 @@ from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.indexes.base import InvalidIndexError -from pandas._libs import lib +from pandas.core.dtypes.cast import construct_1d_array_from_listlike from pandas._libs.lib import Timestamp import pandas.util.testing as tm @@ -913,7 +913,7 @@ def test_from_product_invalid_input(self): def test_from_product_datetimeindex(self): dt_index = date_range('2000-01-01', periods=2) mi = pd.MultiIndex.from_product([[1, 2], dt_index]) - etalon = lib.list_to_object_array([(1, pd.Timestamp( + etalon = construct_1d_array_from_listlike([(1, pd.Timestamp( '2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp( '2000-01-01')), (2, pd.Timestamp('2000-01-02'))]) tm.assert_numpy_array_equal(mi.values, etalon) @@ -940,7 +940,7 @@ def test_values_boxed(self): (3, pd.Timestamp('2000-01-03'))] mi = pd.MultiIndex.from_tuples(tuples) tm.assert_numpy_array_equal(mi.values, - lib.list_to_object_array(tuples)) + construct_1d_array_from_listlike(tuples)) # Check that code branches for boxed values produce identical results tm.assert_numpy_array_equal(mi.values[:4], mi[:4].values) From f5f37732589e33deb34aa0dde33faef4ce9553dd Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Fri, 8 Dec 2017 10:48:39 +0100 Subject: [PATCH 3/7] TST: asv benchmarks for Series constructors --- asv_bench/benchmarks/ctors.py | 36 +++++++++++++++++++++++------- asv_bench/benchmarks/frame_ctor.py | 12 ++++++++++ 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/asv_bench/benchmarks/ctors.py b/asv_bench/benchmarks/ctors.py index 6276dc324ca0d..65af7b077d80f 100644 --- a/asv_bench/benchmarks/ctors.py +++ b/asv_bench/benchmarks/ctors.py @@ -1,10 +1,36 @@ import numpy as np -from pandas import DataFrame, Series, Index, DatetimeIndex, Timestamp +from pandas import Series, Index, DatetimeIndex, Timestamp from .pandas_vb_common import setup # noqa -class Constructors(object): +class SeriesConstructors(object): + + goal_time = 0.2 + + param_names = ["data_fmt", "with_index"] + params = [[lambda x: x, + list, + lambda arr: list(arr.astype(str)), + lambda arr: dict(zip(range(len(arr)), arr)), + lambda arr: [(i, -i) for i in arr], + lambda arr: [[i, -i] for i in arr], + lambda arr: ([(i, -i) for i in arr][:-1] + [None]), + lambda arr: ([[i, -i] for i in arr][:-1] + [None])], + [False, True]] + + def setup(self, data_fmt, with_index): + N = 10**4 + np.random.seed(1234) + arr = np.random.randn(N) + self.data = data_fmt(arr) + self.index = np.arange(N) if with_index else None + + def time_series_constructor(self, data_fmt, with_index): + Series(self.data, index=self.index) + + +class SeriesDtypesConstructors(object): goal_time = 0.2 @@ -19,12 +45,6 @@ def setup(self): self.s = Series([Timestamp('20110101'), Timestamp('20120101'), Timestamp('20130101')] * N * 10) - def time_frame_from_ndarray(self): - DataFrame(self.arr) - - def time_series_from_ndarray(self): - Series(self.data, index=self.index) - def time_index_from_array_string(self): Index(self.arr_str) diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py index 6761d48d25919..391a209cb2a89 100644 --- a/asv_bench/benchmarks/frame_ctor.py +++ b/asv_bench/benchmarks/frame_ctor.py @@ -81,3 +81,15 @@ def setup(self, nrows): def time_frame_from_records_generator(self, nrows): # issue-6700 self.df = DataFrame.from_records(self.gen, nrows=nrows) + + +class FromNDArray(object): + + goal_time = 0.2 + + def setup(self): + N = 100000 + self.data = np.random.randn(N) + + def time_frame_from_ndarray(self): + self.df = DataFrame(self.data) From 44eedfaa8fcb6bea2638d643883e40cd084e1e4b Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Thu, 14 Dec 2017 09:03:04 +0100 Subject: [PATCH 4/7] REF: Solve circular dependency with python 2.7 --- pandas/_libs/src/inference.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index 34b5277d0b94e..50aabf3af9ad5 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -7,8 +7,6 @@ from tslibs.conversion cimport convert_to_tsobject from tslibs.timedeltas cimport convert_to_timedelta64 from tslibs.timezones cimport get_timezone from datetime import datetime, timedelta -from pandas.core.dtypes.cast import construct_1d_array_from_listlike - iNaT = util.get_nat() cdef bint PY2 = sys.version_info[0] == 2 @@ -351,6 +349,7 @@ def infer_dtype(object value, bint skipna=False): else: if not isinstance(value, list): value = list(value) + from pandas.core.dtypes.cast import construct_1d_array_from_listlike values = construct_1d_array_from_listlike(value) values = getattr(values, 'values', values) From 0892280dfb9b7bd27d00e091d40f5229eb51fc06 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Thu, 14 Dec 2017 14:36:30 +0100 Subject: [PATCH 5/7] TST: test_cast_1d_array --- pandas/tests/dtypes/test_cast.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index 82a35fa711e8c..daa27bcfd2b3f 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -21,7 +21,8 @@ infer_dtype_from_array, maybe_convert_string_to_object, maybe_convert_scalar, - find_common_type) + find_common_type, + construct_1d_array_from_listlike) from pandas.core.dtypes.dtypes import ( CategoricalDtype, DatetimeTZDtype, @@ -407,3 +408,24 @@ def test_period_dtype(self): np.dtype('datetime64[ns]'), np.object, np.int64]: assert find_common_type([dtype, dtype2]) == np.object assert find_common_type([dtype2, dtype]) == np.object + + @pytest.mark.parametrize('dtype', [int, float, str, object, None]) + @pytest.mark.parametrize('datum1', [1, 2., "3", (4, 5), [6, 7], None]) + @pytest.mark.parametrize('datum2', [8, 9., "10", (11, 12), [13, 14], None]) + def test_cast_1d_array(self, dtype, datum1, datum2): + data = [datum1, datum2] + try: + # Conversion to 1d array is possible if requested dtype is object + possible = dtype is object + # ... or the following succeeds _and_ the result has dimension 1: + possible = possible or np.array(data, dtype=dtype).ndim == 1 + if not possible: + exc = ValueError + except (ValueError, TypeError) as exception: + exc = type(exception) + + if possible: + assert list(construct_1d_array_from_listlike(data)) == data + else: + pytest.raises(exc, construct_1d_array_from_listlike, + data, dtype=dtype) From c4e635f4142bffca7f8e8b3cbe5f3584d24614b8 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Mon, 18 Dec 2017 08:05:23 +0100 Subject: [PATCH 6/7] REF: only support object dtype --- pandas/_libs/src/inference.pyx | 5 +++-- pandas/core/algorithms.py | 6 +++--- pandas/core/common.py | 6 +++--- pandas/core/dtypes/cast.py | 12 ++++++------ pandas/core/ops.py | 7 ++++--- pandas/tests/dtypes/test_cast.py | 25 +++++++------------------ pandas/tests/frame/test_constructors.py | 4 ++-- pandas/tests/indexes/test_multi.py | 12 ++++++------ 8 files changed, 34 insertions(+), 43 deletions(-) diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index 50aabf3af9ad5..8bfed4fe60fed 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -349,8 +349,9 @@ def infer_dtype(object value, bint skipna=False): else: if not isinstance(value, list): value = list(value) - from pandas.core.dtypes.cast import construct_1d_array_from_listlike - values = construct_1d_array_from_listlike(value) + from pandas.core.dtypes.cast import ( + construct_1d_object_array_from_listlike) + values = construct_1d_object_array_from_listlike(value) values = getattr(values, 'values', values) val = _try_infer_map(values) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index b8bca3bf5c054..167f215b6c0ac 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -7,7 +7,7 @@ import numpy as np from pandas.core.dtypes.cast import ( - maybe_promote, construct_1d_array_from_listlike) + maybe_promote, construct_1d_object_array_from_listlike) from pandas.core.dtypes.generic import ( ABCSeries, ABCIndex, ABCIndexClass, ABCCategorical) @@ -172,7 +172,7 @@ def _ensure_arraylike(values): if inferred in ['mixed', 'string', 'unicode']: if isinstance(values, tuple): values = list(values) - values = construct_1d_array_from_listlike(values) + values = construct_1d_object_array_from_listlike(values) else: values = np.asarray(values) return values @@ -402,7 +402,7 @@ def isin(comps, values): .format(values_type=type(values).__name__)) if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)): - values = construct_1d_array_from_listlike(list(values)) + values = construct_1d_object_array_from_listlike(list(values)) comps, dtype, _ = _ensure_data(comps) values, _, _ = _ensure_data(values, dtype=dtype) diff --git a/pandas/core/common.py b/pandas/core/common.py index 800f0c7e63f06..35696be5b2a03 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -21,7 +21,7 @@ from pandas.core.dtypes.missing import isna, isnull, notnull # noqa from pandas.api import types from pandas.core.dtypes import common -from pandas.core.dtypes.cast import construct_1d_array_from_listlike +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike # compat from pandas.errors import ( # noqa @@ -382,7 +382,7 @@ def _asarray_tuplesafe(values, dtype=None): return values.values if isinstance(values, list) and dtype in [np.object_, object]: - return construct_1d_array_from_listlike(values) + return construct_1d_object_array_from_listlike(values) result = np.asarray(values, dtype=dtype) @@ -393,7 +393,7 @@ def _asarray_tuplesafe(values, dtype=None): # Avoid building an array of arrays: # TODO: verify whether any path hits this except #18819 (invalid) values = [tuple(x) for x in values] - result = construct_1d_array_from_listlike(values) + result = construct_1d_object_array_from_listlike(values) return result diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index ca3d05cb15024..87c6fb69f33bf 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -42,7 +42,7 @@ def maybe_convert_platform(values): """ try to do platform conversion, allow ndarray or list here """ if isinstance(values, (list, tuple)): - values = construct_1d_array_from_listlike(list(values)) + values = construct_1d_object_array_from_listlike(list(values)) if getattr(values, 'dtype', None) == np.object_: if hasattr(values, '_values'): values = values._values @@ -1164,14 +1164,14 @@ def construct_1d_arraylike_from_scalar(value, length, dtype): return subarr -def construct_1d_array_from_listlike(values, dtype='object'): +def construct_1d_object_array_from_listlike(values): """ - Transform any list-like object in a 1-dimensional numpy array. + Transform any list-like object in a 1-dimensional numpy array of object + dtype. Parameters ---------- values : any iterable which has a len() - dtype : dtype, default 'object' Raises ------ @@ -1180,10 +1180,10 @@ def construct_1d_array_from_listlike(values, dtype='object'): Returns ------- - 1-dimensional numpy array of dtype "dtype" + 1-dimensional numpy array of dtype object """ # numpy will try to interpret nested lists as further dimensions, hence # making a 1D array that contains list-likes is a bit tricky: - result = np.empty(len(values), dtype=dtype) + result = np.empty(len(values), dtype='object') result[:] = values return result diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 4bf6856e5f1b4..17ad5f20ce742 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -34,7 +34,8 @@ is_scalar, _ensure_object) from pandas.core.dtypes.cast import ( - maybe_upcast_putmask, find_common_type, construct_1d_array_from_listlike) + maybe_upcast_putmask, find_common_type, + construct_1d_object_array_from_listlike) from pandas.core.dtypes.generic import ( ABCSeries, ABCDataFrame, @@ -751,7 +752,7 @@ def wrapper(left, right, name=name, na_op=na_op): def _comp_method_OBJECT_ARRAY(op, x, y): if isinstance(y, list): - y = construct_1d_array_from_listlike(y) + y = construct_1d_object_array_from_listlike(y) if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)): if not is_object_dtype(y.dtype): y = y.astype(np.object_) @@ -902,7 +903,7 @@ def na_op(x, y): result = op(x, y) except TypeError: if isinstance(y, list): - y = construct_1d_array_from_listlike(y) + y = construct_1d_object_array_from_listlike(y) if isinstance(y, (np.ndarray, ABCSeries)): if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)): diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index daa27bcfd2b3f..10f1e658c053f 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -22,7 +22,7 @@ maybe_convert_string_to_object, maybe_convert_scalar, find_common_type, - construct_1d_array_from_listlike) + construct_1d_object_array_from_listlike) from pandas.core.dtypes.dtypes import ( CategoricalDtype, DatetimeTZDtype, @@ -409,23 +409,12 @@ def test_period_dtype(self): assert find_common_type([dtype, dtype2]) == np.object assert find_common_type([dtype2, dtype]) == np.object - @pytest.mark.parametrize('dtype', [int, float, str, object, None]) @pytest.mark.parametrize('datum1', [1, 2., "3", (4, 5), [6, 7], None]) @pytest.mark.parametrize('datum2', [8, 9., "10", (11, 12), [13, 14], None]) - def test_cast_1d_array(self, dtype, datum1, datum2): + def test_cast_1d_array(self, datum1, datum2): data = [datum1, datum2] - try: - # Conversion to 1d array is possible if requested dtype is object - possible = dtype is object - # ... or the following succeeds _and_ the result has dimension 1: - possible = possible or np.array(data, dtype=dtype).ndim == 1 - if not possible: - exc = ValueError - except (ValueError, TypeError) as exception: - exc = type(exception) - - if possible: - assert list(construct_1d_array_from_listlike(data)) == data - else: - pytest.raises(exc, construct_1d_array_from_listlike, - data, dtype=dtype) + result = construct_1d_object_array_from_listlike(data) + + # Direct comparison fails: https://github.com/numpy/numpy/issues/10218 + assert result.dtype == 'object' + assert list(result) == data diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 68a74df5f63df..8be6c4875ae24 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -22,7 +22,7 @@ date_range, Categorical) import pandas as pd import pandas.util.testing as tm -from pandas.core.dtypes.cast import construct_1d_array_from_listlike +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.tests.frame.common import TestData @@ -1199,7 +1199,7 @@ def test_constructor_from_items(self): DataFrame.from_items(row_items, orient='index') # orient='index', but thar be tuples - arr = construct_1d_array_from_listlike( + arr = construct_1d_object_array_from_listlike( [('bar', 'baz')] * len(self.mixed_frame)) self.mixed_frame['foo'] = arr row_items = [(idx, list(self.mixed_frame.xs(idx))) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 9b2d92df39252..7d6937592002d 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -18,7 +18,7 @@ from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.indexes.base import InvalidIndexError -from pandas.core.dtypes.cast import construct_1d_array_from_listlike +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas._libs.lib import Timestamp import pandas.util.testing as tm @@ -913,7 +913,7 @@ def test_from_product_invalid_input(self): def test_from_product_datetimeindex(self): dt_index = date_range('2000-01-01', periods=2) mi = pd.MultiIndex.from_product([[1, 2], dt_index]) - etalon = construct_1d_array_from_listlike([(1, pd.Timestamp( + etalon = construct_1d_object_array_from_listlike([(1, pd.Timestamp( '2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp( '2000-01-01')), (2, pd.Timestamp('2000-01-02'))]) tm.assert_numpy_array_equal(mi.values, etalon) @@ -938,11 +938,11 @@ def test_values_boxed(self): (1, pd.Timestamp('2000-01-04')), (2, pd.Timestamp('2000-01-02')), (3, pd.Timestamp('2000-01-03'))] - mi = pd.MultiIndex.from_tuples(tuples) - tm.assert_numpy_array_equal(mi.values, - construct_1d_array_from_listlike(tuples)) + result = pd.MultiIndex.from_tuples(tuples) + expected = construct_1d_object_array_from_listlike(tuples) + tm.assert_numpy_array_equal(result.values, expected) # Check that code branches for boxed values produce identical results - tm.assert_numpy_array_equal(mi.values[:4], mi[:4].values) + tm.assert_numpy_array_equal(result.values[:4], result[:4].values) def test_append(self): result = self.index[:3].append(self.index[3:]) From d0a6e48a703db106e7ca7caf5620390debff3080 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Mon, 18 Dec 2017 15:30:30 +0100 Subject: [PATCH 7/7] TST: Test invalid input to test_cast_1d_array_invalid_scalar --- pandas/tests/dtypes/test_cast.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index 10f1e658c053f..d13d781f03117 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -418,3 +418,7 @@ def test_cast_1d_array(self, datum1, datum2): # Direct comparison fails: https://github.com/numpy/numpy/issues/10218 assert result.dtype == 'object' assert list(result) == data + + @pytest.mark.parametrize('val', [1, 2., None]) + def test_cast_1d_array_invalid_scalar(self, val): + pytest.raises(TypeError, construct_1d_object_array_from_listlike, val)