Skip to content

Commit f9b4443

Browse files
jbrockmendelJulianWgs
authored andcommitted
CLN: validators (pandas-dev#41810)
1 parent b94642d commit f9b4443

File tree

1 file changed

+34
-2
lines changed

1 file changed

+34
-2
lines changed

pandas/_libs/lib.pyx

+34-2
Original file line numberDiff line numberDiff line change
@@ -1644,6 +1644,7 @@ cdef inline bint is_timedelta(object o):
16441644
return PyDelta_Check(o) or util.is_timedelta64_object(o)
16451645

16461646

1647+
@cython.internal
16471648
cdef class Validator:
16481649

16491650
cdef:
@@ -1662,6 +1663,7 @@ cdef class Validator:
16621663
return False
16631664

16641665
if self.is_array_typed():
1666+
# i.e. this ndarray is already of the desired dtype
16651667
return True
16661668
elif self.dtype.type_num == NPY_OBJECT:
16671669
if self.skipna:
@@ -1717,11 +1719,16 @@ cdef class Validator:
17171719
return True
17181720

17191721
cdef bint finalize_validate_skipna(self):
1722+
"""
1723+
If we _only_ saw non-dtype-specific NA values, even if they are valid
1724+
for this dtype, we do not infer this dtype.
1725+
"""
17201726
# TODO(phillipc): Remove the existing validate methods and replace them
17211727
# with the skipna versions upon full deprecation of skipna=False
17221728
return True
17231729

17241730

1731+
@cython.internal
17251732
cdef class BoolValidator(Validator):
17261733
cdef inline bint is_value_typed(self, object value) except -1:
17271734
return util.is_bool_object(value)
@@ -1738,6 +1745,7 @@ cpdef bint is_bool_array(ndarray values, bint skipna=False):
17381745
return validator.validate(values)
17391746

17401747

1748+
@cython.internal
17411749
cdef class IntegerValidator(Validator):
17421750
cdef inline bint is_value_typed(self, object value) except -1:
17431751
return util.is_integer_object(value)
@@ -1746,13 +1754,15 @@ cdef class IntegerValidator(Validator):
17461754
return issubclass(self.dtype.type, np.integer)
17471755

17481756

1757+
# Note: only python-exposed for tests
17491758
cpdef bint is_integer_array(ndarray values):
17501759
cdef:
17511760
IntegerValidator validator = IntegerValidator(len(values),
17521761
values.dtype)
17531762
return validator.validate(values)
17541763

17551764

1765+
@cython.internal
17561766
cdef class IntegerNaValidator(Validator):
17571767
cdef inline bint is_value_typed(self, object value) except -1:
17581768
return (util.is_integer_object(value)
@@ -1766,6 +1776,7 @@ cdef bint is_integer_na_array(ndarray values):
17661776
return validator.validate(values)
17671777

17681778

1779+
@cython.internal
17691780
cdef class IntegerFloatValidator(Validator):
17701781
cdef inline bint is_value_typed(self, object value) except -1:
17711782
return util.is_integer_object(value) or util.is_float_object(value)
@@ -1781,6 +1792,7 @@ cdef bint is_integer_float_array(ndarray values):
17811792
return validator.validate(values)
17821793

17831794

1795+
@cython.internal
17841796
cdef class FloatValidator(Validator):
17851797
cdef inline bint is_value_typed(self, object value) except -1:
17861798
return util.is_float_object(value)
@@ -1789,12 +1801,14 @@ cdef class FloatValidator(Validator):
17891801
return issubclass(self.dtype.type, np.floating)
17901802

17911803

1804+
# Note: only python-exposed for tests
17921805
cpdef bint is_float_array(ndarray values):
17931806
cdef:
17941807
FloatValidator validator = FloatValidator(len(values), values.dtype)
17951808
return validator.validate(values)
17961809

17971810

1811+
@cython.internal
17981812
cdef class ComplexValidator(Validator):
17991813
cdef inline bint is_value_typed(self, object value) except -1:
18001814
return (
@@ -1812,6 +1826,7 @@ cdef bint is_complex_array(ndarray values):
18121826
return validator.validate(values)
18131827

18141828

1829+
@cython.internal
18151830
cdef class DecimalValidator(Validator):
18161831
cdef inline bint is_value_typed(self, object value) except -1:
18171832
return is_decimal(value)
@@ -1823,6 +1838,7 @@ cdef bint is_decimal_array(ndarray values):
18231838
return validator.validate(values)
18241839

18251840

1841+
@cython.internal
18261842
cdef class StringValidator(Validator):
18271843
cdef inline bint is_value_typed(self, object value) except -1:
18281844
return isinstance(value, str)
@@ -1843,6 +1859,7 @@ cpdef bint is_string_array(ndarray values, bint skipna=False):
18431859
return validator.validate(values)
18441860

18451861

1862+
@cython.internal
18461863
cdef class BytesValidator(Validator):
18471864
cdef inline bint is_value_typed(self, object value) except -1:
18481865
return isinstance(value, bytes)
@@ -1858,6 +1875,7 @@ cdef bint is_bytes_array(ndarray values, bint skipna=False):
18581875
return validator.validate(values)
18591876

18601877

1878+
@cython.internal
18611879
cdef class TemporalValidator(Validator):
18621880
cdef:
18631881
Py_ssize_t generic_null_count
@@ -1884,9 +1902,14 @@ cdef class TemporalValidator(Validator):
18841902
return self.is_value_typed(value) or is_typed_null or is_generic_null
18851903

18861904
cdef inline bint finalize_validate_skipna(self):
1905+
"""
1906+
If we _only_ saw non-dtype-specific NA values, even if they are valid
1907+
for this dtype, we do not infer this dtype.
1908+
"""
18871909
return self.generic_null_count != self.n
18881910

18891911

1912+
@cython.internal
18901913
cdef class DatetimeValidator(TemporalValidator):
18911914
cdef bint is_value_typed(self, object value) except -1:
18921915
return PyDateTime_Check(value)
@@ -1902,19 +1925,21 @@ cpdef bint is_datetime_array(ndarray values, bint skipna=True):
19021925
return validator.validate(values)
19031926

19041927

1928+
@cython.internal
19051929
cdef class Datetime64Validator(DatetimeValidator):
19061930
cdef inline bint is_value_typed(self, object value) except -1:
19071931
return util.is_datetime64_object(value)
19081932

19091933

1934+
# Note: only python-exposed for tests
19101935
cpdef bint is_datetime64_array(ndarray values):
19111936
cdef:
19121937
Datetime64Validator validator = Datetime64Validator(len(values),
19131938
skipna=True)
19141939
return validator.validate(values)
19151940

19161941

1917-
# TODO: only non-here use is in test
1942+
# Note: only python-exposed for tests
19181943
def is_datetime_with_singletz_array(values: ndarray) -> bool:
19191944
"""
19201945
Check values have the same tzinfo attribute.
@@ -1945,6 +1970,7 @@ def is_datetime_with_singletz_array(values: ndarray) -> bool:
19451970
return True
19461971

19471972

1973+
@cython.internal
19481974
cdef class TimedeltaValidator(TemporalValidator):
19491975
cdef bint is_value_typed(self, object value) except -1:
19501976
return PyDelta_Check(value)
@@ -1953,12 +1979,13 @@ cdef class TimedeltaValidator(TemporalValidator):
19531979
return is_null_timedelta64(value)
19541980

19551981

1982+
@cython.internal
19561983
cdef class AnyTimedeltaValidator(TimedeltaValidator):
19571984
cdef inline bint is_value_typed(self, object value) except -1:
19581985
return is_timedelta(value)
19591986

19601987

1961-
# TODO: only non-here use is in test
1988+
# Note: only python-exposed for tests
19621989
cpdef bint is_timedelta_or_timedelta64_array(ndarray values):
19631990
"""
19641991
Infer with timedeltas and/or nat/none.
@@ -1969,22 +1996,26 @@ cpdef bint is_timedelta_or_timedelta64_array(ndarray values):
19691996
return validator.validate(values)
19701997

19711998

1999+
@cython.internal
19722000
cdef class DateValidator(Validator):
19732001
cdef inline bint is_value_typed(self, object value) except -1:
19742002
return PyDate_Check(value)
19752003

19762004

2005+
# Note: only python-exposed for tests
19772006
cpdef bint is_date_array(ndarray values, bint skipna=False):
19782007
cdef:
19792008
DateValidator validator = DateValidator(len(values), skipna=skipna)
19802009
return validator.validate(values)
19812010

19822011

2012+
@cython.internal
19832013
cdef class TimeValidator(Validator):
19842014
cdef inline bint is_value_typed(self, object value) except -1:
19852015
return PyTime_Check(value)
19862016

19872017

2018+
# Note: only python-exposed for tests
19882019
cpdef bint is_time_array(ndarray values, bint skipna=False):
19892020
cdef:
19902021
TimeValidator validator = TimeValidator(len(values), skipna=skipna)
@@ -2022,6 +2053,7 @@ cdef bint is_period_array(ndarray[object] values):
20222053
return True
20232054

20242055

2056+
# Note: only python-exposed for tests
20252057
cpdef bint is_interval_array(ndarray values):
20262058
"""
20272059
Is this an ndarray of Interval (or np.nan) with a single dtype?

0 commit comments

Comments
 (0)