diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 99b57e2427509..89c9f7ab68490 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -383,6 +383,7 @@ Reshaping - Bug in :func:`concat` where order of ``OrderedDict`` (and ``dict`` in Python 3.6+) is not respected, when passed in as ``objs`` argument (:issue:`21510`) - Bug in :func:`concat` where the resulting ``freq`` of two :class:`DatetimeIndex` with the same ``freq`` would be dropped (:issue:`3232`). - Bug in :func:`merge` where merging with equivalent Categorical dtypes was raising an error (:issue:`22501`) +- Bug in :class:`DataFrame` constructor when passing non-empty tuples would cause a segmentation fault (:issue:`25691`) Sparse ^^^^^^ diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f84dc3fa9fb90..f98f29d178c15 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2264,22 +2264,21 @@ def to_object_array(rows: object, int min_width=0): cdef: Py_ssize_t i, j, n, k, tmp ndarray[object, ndim=2] result - list input_rows list row - input_rows = rows - n = len(input_rows) + rows = list(rows) + n = len(rows) k = min_width for i in range(n): - tmp = len(input_rows[i]) + tmp = len(rows[i]) if tmp > k: k = tmp result = np.empty((n, k), dtype=object) for i in range(n): - row = list(input_rows[i]) + row = list(rows[i]) for j in range(len(row)): result[i, j] = row[j] @@ -2304,12 +2303,26 @@ def tuples_to_object_array(ndarray[object] tuples): return result -def to_object_array_tuples(rows: list): +def to_object_array_tuples(rows: object): + """ + Convert a list of tuples into an object array. Any subclass of + tuple in `rows` will be casted to tuple. + + Parameters + ---------- + rows : 2-d array (N, K) + A list of tuples to be converted into an array. + + Returns + ------- + obj_array : numpy array of the object dtype + """ cdef: Py_ssize_t i, j, n, k, tmp ndarray[object, ndim=2] result tuple row + rows = list(rows) n = len(rows) k = 0 diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 8ea5c05125005..fa8ea27420b1a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1209,12 +1209,26 @@ def test_constructor_mixed_type_rows(self): expected = DataFrame([[1, 2], [3, 4]]) tm.assert_frame_equal(result, expected) - def test_constructor_tuples(self): + @pytest.mark.parametrize("tuples,lists", [ + ((), []), + ((()), []), + (((), ()), [(), ()]), + (((), ()), [[], []]), + (([], []), [[], []]), + (([1, 2, 3], [4, 5, 6]), [[1, 2, 3], [4, 5, 6]]) + ]) + def test_constructor_tuple(self, tuples, lists): + # GH 25691 + result = DataFrame(tuples) + expected = DataFrame(lists) + tm.assert_frame_equal(result, expected) + + def test_constructor_list_of_tuples(self): result = DataFrame({'A': [(1, 2), (3, 4)]}) expected = DataFrame({'A': Series([(1, 2), (3, 4)])}) tm.assert_frame_equal(result, expected) - def test_constructor_namedtuples(self): + def test_constructor_list_of_namedtuples(self): # GH11181 from collections import namedtuple named_tuple = namedtuple("Pandas", list('ab'))