diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 569ec613cbcb9..2cd6c8783d387 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -11,9 +11,12 @@ datetime, timedelta, ) +import errno import functools +import os import random import re +import signal from typing import Iterator import warnings @@ -2459,6 +2462,60 @@ def test_constructor_list_str_na(self, string_dtype): expected = DataFrame({"A": ["1.0", "2.0", None]}, dtype=object) tm.assert_frame_equal(result, expected) + def test_constructor_large_size_frame(self): + # GH#50708 + # https://stackoverflow.com/questions/2281850/timeout-function- + # if-it-takes-too-long-to-finish/2282656#2282656 + class TimeoutError(Exception): + pass + + def timeout(seconds=10, error_message=os.strerror(errno.ETIME)): + def decorator(func): + def _handle_timeout(signum, frame): + raise TimeoutError(error_message) + + @functools.wraps(func) + def wrapper(*args, **kwargs): + signal.signal(signal.SIGALRM, _handle_timeout) + signal.alarm(seconds) + try: + result = func(*args, **kwargs) + finally: + signal.alarm(0) + return result + + return wrapper + + return decorator + + class LargeFrame(DataFrame): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + for col in self.columns: + if self.dtypes[col] == "O": + self[col] = pd.to_numeric(self[col], errors="ignore") + + @property + def _constructor(self): + return type(self) + + def get_frame(N): + return LargeFrame( + data=np.vstack( + [ + np.where(np.random.rand(N) > 0.36, np.random.rand(N), np.nan) + for _ in range(2) + ] + ).T + ) + + # check that dropna() doesn't fall into an infinite loop + @timeout(5) + def time_restricted_dropna(n): + get_frame(n).dropna() + + time_restricted_dropna(1000000) + @pytest.mark.parametrize("copy", [False, True]) def test_dict_nocopy( self,