From 09decf2987e06dd856375c08dbcaa54b2abed4a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Mon, 11 Jul 2022 21:10:19 -0400 Subject: [PATCH 1/3] TYP: make na_value consistently a property --- pandas/core/arrays/string_.py | 11 +++++++---- pandas/core/arrays/string_arrow.py | 5 +++-- pandas/core/dtypes/dtypes.py | 9 +++++++-- pandas/core/indexes/numeric.py | 4 +++- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 083acf16ec758..d4f652d4f433a 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -90,8 +90,11 @@ class StringDtype(StorageExtensionDtype): name = "string" - #: StringDtype.na_value uses pandas.NA - na_value = libmissing.NA + #: StringDtype().na_value uses pandas.NA + @property + def na_value(self) -> libmissing.NAType: + return libmissing.NA + _metadata = ("storage",) def __init__(self, storage=None) -> None: @@ -396,7 +399,7 @@ def __setitem__(self, key, value): # validate new items if scalar_value: if isna(value): - value = StringDtype.na_value + value = libmissing.NA elif not isinstance(value, str): raise ValueError( f"Cannot set non-string value '{value}' into a StringArray." @@ -497,7 +500,7 @@ def _cmp_method(self, other, op): if op.__name__ in ops.ARITHMETIC_BINOPS: result = np.empty_like(self._ndarray, dtype="object") - result[mask] = StringDtype.na_value + result[mask] = libmissing.NA result[valid] = op(self._ndarray[valid], other) return StringArray(result) else: diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 3e3df5a3200c1..bb2fefabd6ae5 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -242,8 +242,9 @@ def astype(self, dtype, copy: bool = True): # ------------------------------------------------------------------------ # String methods interface - # error: Cannot determine type of 'na_value' - _str_na_value = StringDtype.na_value # type: ignore[has-type] + # error: Incompatible types in assignment (expression has type "NAType", + # base class "ObjectStringArrayMixin" defined the type as "float") + _str_na_value = libmissing.NA # type: ignore[assignment] def _str_map( self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 9683c1dd93645..99b2082d409a9 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -676,11 +676,14 @@ class DatetimeTZDtype(PandasExtensionDtype): kind: str_type = "M" num = 101 base = np.dtype("M8[ns]") # TODO: depend on reso? - na_value = NaT _metadata = ("unit", "tz") _match = re.compile(r"(datetime64|M8)\[(?P.+), (?P.+)\]") _cache_dtypes: dict[str_type, PandasExtensionDtype] = {} + @property + def na_value(self) -> NaTType: + return NaT + @cache_readonly def str(self): return f"|M8[{self._unit}]" @@ -1450,7 +1453,9 @@ class BaseMaskedDtype(ExtensionDtype): base = None type: type - na_value = libmissing.NA + @property + def na_value(self) -> libmissing.NAType: + return libmissing.NA @cache_readonly def numpy_dtype(self) -> np.dtype: diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 56fcec751749b..a0c74684e5eb9 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -409,7 +409,9 @@ class Float64Index(NumericIndex): __doc__ = _num_index_shared_docs["class_descr"] % _index_descr_args _typ = "float64index" - _engine_type = libindex.Float64Engine _default_dtype = np.dtype(np.float64) _dtype_validation_metadata = (is_float_dtype, "float") _is_backward_compat_public_numeric_index: bool = False + + def _engine_type(self) -> type[libindex.Float64Engine]: + return libindex.Float64Engine From 998d0dddd7ef984166e129fbaec5a86ed5a403f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Mon, 11 Jul 2022 21:46:54 -0400 Subject: [PATCH 2/3] property --- pandas/core/indexes/numeric.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index a0c74684e5eb9..5731d476cef10 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -413,5 +413,6 @@ class Float64Index(NumericIndex): _dtype_validation_metadata = (is_float_dtype, "float") _is_backward_compat_public_numeric_index: bool = False + @property def _engine_type(self) -> type[libindex.Float64Engine]: return libindex.Float64Engine From 869675289b2bea9712743cf6b459c1f05ab36302 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Tue, 12 Jul 2022 00:17:38 -0400 Subject: [PATCH 3/3] replace a few more class accesses --- pandas/core/arrays/string_.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index d4f652d4f433a..c9abef226770c 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -338,13 +338,11 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False): na_values = scalars._mask result = scalars._data result = lib.ensure_string_array(result, copy=copy, convert_na_value=False) - result[na_values] = StringDtype.na_value + result[na_values] = libmissing.NA else: - # convert non-na-likes to str, and nan-likes to StringDtype.na_value - result = lib.ensure_string_array( - scalars, na_value=StringDtype.na_value, copy=copy - ) + # convert non-na-likes to str, and nan-likes to StringDtype().na_value + result = lib.ensure_string_array(scalars, na_value=libmissing.NA, copy=copy) # Manually creating new array avoids the validation step in the __init__, so is # faster. Refactor need for validation? @@ -515,7 +513,7 @@ def _cmp_method(self, other, op): # String methods interface # error: Incompatible types in assignment (expression has type "NAType", # base class "PandasArray" defined the type as "float") - _str_na_value = StringDtype.na_value # type: ignore[assignment] + _str_na_value = libmissing.NA # type: ignore[assignment] def _str_map( self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True