From 3fd4550ae5697377f8ae6f13888ce1e8a04a9a87 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 26 Jan 2025 09:02:37 -0500 Subject: [PATCH 1/3] BUG: is_*_array returns true on empty object dtype --- pandas/_libs/lib.pyx | 36 +++++++++++++-------------- pandas/tests/dtypes/test_inference.py | 24 ++++++++++++++++++ 2 files changed, 42 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index de603beff7836..5239aa2c61dc5 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1882,7 +1882,7 @@ cdef class BoolValidator(Validator): cpdef bint is_bool_array(ndarray values, bint skipna=False): cdef: - BoolValidator validator = BoolValidator(len(values), + BoolValidator validator = BoolValidator(values.size, values.dtype, skipna=skipna) return validator.validate(values) @@ -1900,7 +1900,7 @@ cdef class IntegerValidator(Validator): # Note: only python-exposed for tests cpdef bint is_integer_array(ndarray values, bint skipna=True): cdef: - IntegerValidator validator = IntegerValidator(len(values), + IntegerValidator validator = IntegerValidator(values.size, values.dtype, skipna=skipna) return validator.validate(values) @@ -1915,7 +1915,7 @@ cdef class IntegerNaValidator(Validator): cdef bint is_integer_na_array(ndarray values, bint skipna=True): cdef: - IntegerNaValidator validator = IntegerNaValidator(len(values), + IntegerNaValidator validator = IntegerNaValidator(values.size, values.dtype, skipna=skipna) return validator.validate(values) @@ -1931,7 +1931,7 @@ cdef class IntegerFloatValidator(Validator): cdef bint is_integer_float_array(ndarray values, bint skipna=True): cdef: - IntegerFloatValidator validator = IntegerFloatValidator(len(values), + IntegerFloatValidator validator = IntegerFloatValidator(values.size, values.dtype, skipna=skipna) return validator.validate(values) @@ -1949,7 +1949,7 @@ cdef class FloatValidator(Validator): # Note: only python-exposed for tests cpdef bint is_float_array(ndarray values): cdef: - FloatValidator validator = FloatValidator(len(values), values.dtype) + FloatValidator validator = FloatValidator(values.size, values.dtype) return validator.validate(values) @@ -1967,7 +1967,7 @@ cdef class ComplexValidator(Validator): cdef bint is_complex_array(ndarray values): cdef: - ComplexValidator validator = ComplexValidator(len(values), values.dtype) + ComplexValidator validator = ComplexValidator(values.size, values.dtype) return validator.validate(values) @@ -1980,7 +1980,7 @@ cdef class DecimalValidator(Validator): cdef bint is_decimal_array(ndarray values, bint skipna=False): cdef: DecimalValidator validator = DecimalValidator( - len(values), values.dtype, skipna=skipna + values.size, values.dtype, skipna=skipna ) return validator.validate(values) @@ -1996,7 +1996,7 @@ cdef class StringValidator(Validator): cpdef bint is_string_array(ndarray values, bint skipna=False): cdef: - StringValidator validator = StringValidator(len(values), + StringValidator validator = StringValidator(values.size, values.dtype, skipna=skipna) return validator.validate(values) @@ -2013,7 +2013,7 @@ cdef class BytesValidator(Validator): cdef bint is_bytes_array(ndarray values, bint skipna=False): cdef: - BytesValidator validator = BytesValidator(len(values), values.dtype, + BytesValidator validator = BytesValidator(values.size, values.dtype, skipna=skipna) return validator.validate(values) @@ -2064,7 +2064,7 @@ cdef class DatetimeValidator(TemporalValidator): cpdef bint is_datetime_array(ndarray values, bint skipna=True): cdef: - DatetimeValidator validator = DatetimeValidator(len(values), + DatetimeValidator validator = DatetimeValidator(values.size, skipna=skipna) return validator.validate(values) @@ -2078,7 +2078,7 @@ cdef class Datetime64Validator(DatetimeValidator): # Note: only python-exposed for tests cpdef bint is_datetime64_array(ndarray values, bint skipna=True): cdef: - Datetime64Validator validator = Datetime64Validator(len(values), + Datetime64Validator validator = Datetime64Validator(values.size, skipna=skipna) return validator.validate(values) @@ -2093,7 +2093,7 @@ cdef class AnyDatetimeValidator(DatetimeValidator): cdef bint is_datetime_or_datetime64_array(ndarray values, bint skipna=True): cdef: - AnyDatetimeValidator validator = AnyDatetimeValidator(len(values), + AnyDatetimeValidator validator = AnyDatetimeValidator(values.size, skipna=skipna) return validator.validate(values) @@ -2105,7 +2105,7 @@ def is_datetime_with_singletz_array(values: ndarray) -> bool: Doesn't check values are datetime-like types. """ cdef: - Py_ssize_t i = 0, j, n = len(values) + Py_ssize_t i = 0, j, n = values.size object base_val, base_tz, val, tz if n == 0: @@ -2153,7 +2153,7 @@ cpdef bint is_timedelta_or_timedelta64_array(ndarray values, bint skipna=True): Infer with timedeltas and/or nat/none. """ cdef: - AnyTimedeltaValidator validator = AnyTimedeltaValidator(len(values), + AnyTimedeltaValidator validator = AnyTimedeltaValidator(values.size, skipna=skipna) return validator.validate(values) @@ -2167,7 +2167,7 @@ cdef class DateValidator(Validator): # Note: only python-exposed for tests cpdef bint is_date_array(ndarray values, bint skipna=False): cdef: - DateValidator validator = DateValidator(len(values), skipna=skipna) + DateValidator validator = DateValidator(values.size, skipna=skipna) return validator.validate(values) @@ -2180,7 +2180,7 @@ cdef class TimeValidator(Validator): # Note: only python-exposed for tests cpdef bint is_time_array(ndarray values, bint skipna=False): cdef: - TimeValidator validator = TimeValidator(len(values), skipna=skipna) + TimeValidator validator = TimeValidator(values.size, skipna=skipna) return validator.validate(values) @@ -2231,14 +2231,14 @@ cpdef bint is_interval_array(ndarray values): Is this an ndarray of Interval (or np.nan) with a single dtype? """ cdef: - Py_ssize_t i, n = len(values) + Py_ssize_t i, n = values.size str closed = None bint numeric = False bint dt64 = False bint td64 = False object val - if len(values) == 0: + if n == 0: return False for i in range(n): diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index da444b55490f0..800b07f02b442 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1582,6 +1582,30 @@ def test_is_string_array(self): ) assert not lib.is_string_array(np.array([1, 2])) + @pytest.mark.parametrize( + "func", + [ + "is_bool_array", + "is_date_array", + "is_datetime_array", + "is_datetime64_array", + "is_float_array", + "is_integer_array", + "is_interval_array", + "is_string_array", + "is_time_array", + "is_timedelta_or_timedelta64_array", + ], + ) + def test_is_dtype_array_empty_obj(self, func): + func = getattr(lib, func) + + arr = np.empty((2, 0), dtype=object) + assert not func(arr) + + arr = np.empty((0, 2), dtype=object) + assert not func(arr) + def test_to_object_array_tuples(self): r = (5, 6) values = [r] From f97526bdb37aa391bbc539bd8097a0990913eb9a Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 26 Jan 2025 09:04:03 -0500 Subject: [PATCH 2/3] GH-ref --- pandas/tests/dtypes/test_inference.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 800b07f02b442..db98751324ebc 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1598,6 +1598,7 @@ def test_is_string_array(self): ], ) def test_is_dtype_array_empty_obj(self, func): + # https://github.com/pandas-dev/pandas/pull/60796 func = getattr(lib, func) arr = np.empty((2, 0), dtype=object) From a46e595d22cc2fbd24e86e88b27bab6015659d64 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 26 Jan 2025 12:27:13 -0500 Subject: [PATCH 3/3] Fix test --- pandas/tests/io/test_feather.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 24af0a014dd50..e778193c147c1 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -143,8 +143,8 @@ def test_rw_use_threads(self): def test_path_pathlib(self): df = pd.DataFrame( 1.1 * np.arange(120).reshape((30, 4)), - columns=pd.Index(list("ABCD"), dtype=object), - index=pd.Index([f"i-{i}" for i in range(30)], dtype=object), + columns=pd.Index(list("ABCD")), + index=pd.Index([f"i-{i}" for i in range(30)]), ).reset_index() result = tm.round_trip_pathlib(df.to_feather, read_feather) tm.assert_frame_equal(df, result)