From 56498f91c023ca57f2bc4c303f5622dcc08864e8 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sat, 14 Jan 2023 23:25:42 +0100 Subject: [PATCH 1/3] TEST: dropna() doesn't fall into infinite loop --- pandas/tests/frame/test_constructors.py | 26 +++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index e009ba45514a2..8ebe2ddd35766 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2457,6 +2457,32 @@ def test_constructor_list_str_na(self, string_dtype): expected = DataFrame({"A": ["1.0", "2.0", None]}, dtype=object) tm.assert_frame_equal(result, expected) + def test_constructor_large_size_frame(self): + class LargeFrame(DataFrame): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + for col in self.columns: + if self.dtypes[col] == "O": + self[col] = pd.to_numeric(self[col], errors="ignore") + + @property + def _constructor(self): + return type(self) + + def get_frame(N): + return LargeFrame( + data=np.vstack( + [ + np.where(np.random.rand(N) > 0.36, np.random.rand(N), np.nan) + for _ in range(2) + ] + ).T + ) + + frame = get_frame(1000000) + # check that dropna() doesn't fall into an infinite loop + frame.dropna() + @pytest.mark.parametrize("copy", [False, True]) def test_dict_nocopy( self, From 6374d43e885ed6f7fde7181fc8ae8eb7badad015 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 30 Jan 2023 12:29:31 +0100 Subject: [PATCH 2/3] add a link to the issue --- pandas/tests/frame/test_constructors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 8ebe2ddd35766..5c89d039dc2da 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2458,6 +2458,7 @@ def test_constructor_list_str_na(self, string_dtype): tm.assert_frame_equal(result, expected) def test_constructor_large_size_frame(self): + # GH#50708 class LargeFrame(DataFrame): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) From d11a3d8fe78136717f2288f5af5016e0fc175d4d Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 30 Jan 2023 15:38:16 +0100 Subject: [PATCH 3/3] add decorator to handle timeout --- pandas/tests/frame/test_constructors.py | 34 +++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 5c89d039dc2da..f749480c07b80 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -11,9 +11,12 @@ datetime, timedelta, ) +import errno import functools +import os import random import re +import signal from typing import Iterator import warnings @@ -2459,6 +2462,30 @@ def test_constructor_list_str_na(self, string_dtype): def test_constructor_large_size_frame(self): # GH#50708 + # https://stackoverflow.com/questions/2281850/timeout-function- + # if-it-takes-too-long-to-finish/2282656#2282656 + class TimeoutError(Exception): + pass + + def timeout(seconds=10, error_message=os.strerror(errno.ETIME)): + def decorator(func): + def _handle_timeout(signum, frame): + raise TimeoutError(error_message) + + @functools.wraps(func) + def wrapper(*args, **kwargs): + signal.signal(signal.SIGALRM, _handle_timeout) + signal.alarm(seconds) + try: + result = func(*args, **kwargs) + finally: + signal.alarm(0) + return result + + return wrapper + + return decorator + class LargeFrame(DataFrame): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -2480,9 +2507,12 @@ def get_frame(N): ).T ) - frame = get_frame(1000000) # check that dropna() doesn't fall into an infinite loop - frame.dropna() + @timeout(5) + def time_restricted_dropna(n): + get_frame(n).dropna() + + time_restricted_dropna(1000000) @pytest.mark.parametrize("copy", [False, True]) def test_dict_nocopy(