diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 0e342c7855653..f5fbd4cc4a7fc 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -12,7 +12,6 @@ import pandas._libs.missing as libmissing from pandas._libs.tslibs import ( NaT, - Period, iNaT, ) from pandas._typing import ( @@ -644,40 +643,3 @@ def is_valid_na_for_dtype(obj, dtype: DtypeObj) -> bool: # fallback, default to allowing NaN, None, NA, NaT return not isinstance(obj, (np.datetime64, np.timedelta64, Decimal)) - - -def isna_all(arr: ArrayLike) -> bool: - """ - Optimized equivalent to isna(arr).all() - """ - total_len = len(arr) - - # Usually it's enough to check but a small fraction of values to see if - # a block is NOT null, chunks should help in such cases. - # parameters 1000 and 40 were chosen arbitrarily - chunk_len = max(total_len // 40, 1000) - - dtype = arr.dtype - if dtype.kind == "f": - checker = nan_checker - - elif dtype.kind in ["m", "M"] or dtype.type is Period: - # error: Incompatible types in assignment (expression has type - # "Callable[[Any], Any]", variable has type "ufunc") - checker = lambda x: np.asarray(x.view("i8")) == iNaT # type: ignore[assignment] - - else: - # error: Incompatible types in assignment (expression has type "Callable[[Any], - # Any]", variable has type "ufunc") - checker = lambda x: _isna_array( # type: ignore[assignment] - x, inf_as_na=INF_AS_NA - ) - - return all( - # error: Argument 1 to "__call__" of "ufunc" has incompatible type - # "Union[ExtensionArray, Any]"; expected "Union[Union[int, float, complex, str, - # bytes, generic], Sequence[Union[int, float, complex, str, bytes, generic]], - # Sequence[Sequence[Any]], _SupportsArray]" - checker(arr[i : i + chunk_len]).all() # type: ignore[arg-type] - for i in range(0, total_len, chunk_len) - ) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 1360f1d1a508a..69b01f0b26be3 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -10,11 +10,7 @@ import numpy as np -from pandas._libs import ( - NaT, - internals as libinternals, -) -from pandas._libs.missing import NA +from pandas._libs import internals as libinternals from pandas._typing import ( ArrayLike, DtypeObj, @@ -32,14 +28,12 @@ is_1d_only_ea_obj, is_datetime64tz_dtype, is_dtype_equal, - needs_i8_conversion, ) from pandas.core.dtypes.concat import ( cast_to_common_type, concat_compat, ) from pandas.core.dtypes.dtypes import ExtensionDtype -from pandas.core.dtypes.missing import is_valid_na_for_dtype import pandas.core.algorithms as algos from pandas.core.arrays import ( @@ -381,36 +375,6 @@ def dtype(self): return blk.dtype return ensure_dtype_can_hold_na(blk.dtype) - def _is_valid_na_for(self, dtype: DtypeObj) -> bool: - """ - Check that we are all-NA of a type/dtype that is compatible with this dtype. - Augments `self.is_na` with an additional check of the type of NA values. - """ - if not self.is_na: - return False - if self.block.dtype.kind == "V": - return True - - if self.dtype == object: - values = self.block.values - return all(is_valid_na_for_dtype(x, dtype) for x in values.ravel(order="K")) - - na_value = self.block.fill_value - if na_value is NaT and not is_dtype_equal(self.dtype, dtype): - # e.g. we are dt64 and other is td64 - # fill_values match but we should not cast self.block.values to dtype - # TODO: this will need updating if we ever have non-nano dt64/td64 - return False - - if na_value is NA and needs_i8_conversion(dtype): - # FIXME: kludge; test_append_empty_frame_with_timedelta64ns_nat - # e.g. self.dtype == "Int64" and dtype is td64, we dont want - # to consider these as matching - return False - - # TODO: better to use can_hold_element? - return is_valid_na_for_dtype(na_value, dtype) - @cache_readonly def is_na(self) -> bool: blk = self.block @@ -421,24 +385,14 @@ def is_na(self) -> bool: def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: values: ArrayLike - if upcasted_na is None and self.block.dtype.kind != "V": + if upcasted_na is None and not self.is_na: # No upcasting is necessary fill_value = self.block.fill_value values = self.block.get_values() else: fill_value = upcasted_na - if self._is_valid_na_for(empty_dtype): - # note: always holds when self.block.dtype.kind == "V" - blk_dtype = self.block.dtype - - if blk_dtype == np.dtype("object"): - # we want to avoid filling with np.nan if we are - # using None; we already know that we are all - # nulls - values = self.block.values.ravel(order="K") - if len(values) and values[0] is None: - fill_value = None + if self.is_na: if is_datetime64tz_dtype(empty_dtype): i8values = np.full(self.shape, fill_value.value) @@ -507,8 +461,7 @@ def _concatenate_join_units( empty_dtype = _get_empty_dtype(join_units) - has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units) - upcasted_na = _dtype_to_na_value(empty_dtype, has_none_blocks) + upcasted_na = _dtype_to_na_value(empty_dtype) to_concat = [ ju.get_reindexed_values(empty_dtype=empty_dtype, upcasted_na=upcasted_na) @@ -548,7 +501,7 @@ def _concatenate_join_units( return concat_values -def _dtype_to_na_value(dtype: DtypeObj, has_none_blocks: bool): +def _dtype_to_na_value(dtype: DtypeObj): """ Find the NA value to go with this dtype. """ @@ -587,11 +540,9 @@ def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj: empty_dtype = join_units[0].block.dtype return empty_dtype - has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units) + has_none_blocks = any(unit.is_na for unit in join_units) dtypes = [unit.dtype for unit in join_units if not unit.is_na] - if not len(dtypes): - dtypes = [unit.dtype for unit in join_units if unit.block.dtype.kind != "V"] dtype = find_common_type(dtypes) if has_none_blocks: