diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index ef9981f40efe1..460d19290d56f 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -88,6 +88,7 @@ from pandas.core.internals.blocks import ( ensure_block_shape, new_block, + to_native_types, ) if TYPE_CHECKING: @@ -634,7 +635,7 @@ def replace_list( ) def to_native_types(self, **kwargs): - return self.apply_with_block("to_native_types", **kwargs) + return self.apply(to_native_types, **kwargs) @property def is_mixed_type(self) -> bool: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3fd1ebaca19f0..99e54bace8915 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -97,6 +97,7 @@ FloatingArray, IntegerArray, PandasArray, + TimedeltaArray, ) from pandas.core.base import PandasObject import pandas.core.common as com @@ -260,9 +261,11 @@ def get_block_values_for_json(self) -> np.ndarray: # TODO(EA2D): reshape will be unnecessary with 2D EAs return np.asarray(self.values).reshape(self.shape) + @final @property def fill_value(self): - return np.nan + # Used in reindex_indexer + return na_value_for_dtype(self.dtype, compat=False) @property def mgr_locs(self) -> BlockPlacement: @@ -652,24 +655,11 @@ def should_store(self, value: ArrayLike) -> bool: """ return is_dtype_equal(value.dtype, self.dtype) + @final def to_native_types(self, na_rep="nan", quoting=None, **kwargs): """ convert to our native types format """ - values = self.values - - mask = isna(values) - itemsize = writers.word_len(na_rep) - - if not self.is_object and not quoting and itemsize: - values = values.astype(str) - if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize: - # enlarge for the na_rep - values = values.astype(f" np.ndarray: def array_values(self) -> ExtensionArray: return self.values - def to_native_types(self, na_rep="nan", quoting=None, **kwargs): - """override to use ExtensionArray astype for the conversion""" - values = self.values - mask = isna(values) - - new_values = np.asarray(values.astype(object)) - new_values[mask] = na_rep - return self.make_block(new_values) - def take_nd( self, indexer, @@ -1808,41 +1784,6 @@ def is_bool(self): class FloatBlock(NumericBlock): __slots__ = () - def to_native_types( - self, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs - ): - """ convert to our native types format """ - values = self.values - - # see gh-13418: no special formatting is desired at the - # output (important for appropriate 'quoting' behaviour), - # so do not pass it through the FloatArrayFormatter - if float_format is None and decimal == ".": - mask = isna(values) - - if not quoting: - values = values.astype(str) - else: - values = np.array(values, dtype="object") - - values[mask] = na_rep - values = values.astype(object, copy=False) - return self.make_block(values) - - from pandas.io.formats.format import FloatArrayFormatter - - formatter = FloatArrayFormatter( - values, - na_rep=na_rep, - float_format=float_format, - decimal=decimal, - quoting=quoting, - fixed_width=False, - ) - res = formatter.get_result_as_array() - res = res.astype(object, copy=False) - return self.make_block(res) - class NDArrayBackedExtensionBlock(HybridMixin, Block): """ @@ -1962,18 +1903,6 @@ def array_values(self): def _holder(self): return type(self.array_values()) - @property - def fill_value(self): - return na_value_for_dtype(self.dtype) - - def to_native_types(self, na_rep="NaT", **kwargs): - """ convert to our native types format """ - arr = self.array_values() - - result = arr._format_native_types(na_rep=na_rep, **kwargs) - result = result.astype(object, copy=False) - return self.make_block(result) - class DatetimeBlock(DatetimeLikeBlockMixin): __slots__ = () @@ -1999,7 +1928,6 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): internal_values = Block.internal_values _can_hold_element = DatetimeBlock._can_hold_element - to_native_types = DatetimeBlock.to_native_types diff = DatetimeBlock.diff where = DatetimeBlock.where putmask = DatetimeLikeBlockMixin.putmask @@ -2316,3 +2244,75 @@ def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: # We can't, and don't need to, reshape. values = np.asarray(values).reshape(1, -1) return values + + +def to_native_types( + values: ArrayLike, + *, + na_rep="nan", + quoting=None, + float_format=None, + decimal=".", + **kwargs, +) -> np.ndarray: + """ convert to our native types format """ + values = ensure_wrapped_if_datetimelike(values) + + if isinstance(values, (DatetimeArray, TimedeltaArray)): + result = values._format_native_types(na_rep=na_rep, **kwargs) + result = result.astype(object, copy=False) + return result + + elif isinstance(values, ExtensionArray): + mask = isna(values) + + new_values = np.asarray(values.astype(object)) + new_values[mask] = na_rep + return new_values + + elif values.dtype.kind == "f": + # see GH#13418: no special formatting is desired at the + # output (important for appropriate 'quoting' behaviour), + # so do not pass it through the FloatArrayFormatter + if float_format is None and decimal == ".": + mask = isna(values) + + if not quoting: + values = values.astype(str) + else: + values = np.array(values, dtype="object") + + values[mask] = na_rep + values = values.astype(object, copy=False) + return values + + from pandas.io.formats.format import FloatArrayFormatter + + formatter = FloatArrayFormatter( + values, + na_rep=na_rep, + float_format=float_format, + decimal=decimal, + quoting=quoting, + fixed_width=False, + ) + res = formatter.get_result_as_array() + res = res.astype(object, copy=False) + return res + + else: + + mask = isna(values) + itemsize = writers.word_len(na_rep) + + if values.dtype != _dtype_obj and not quoting and itemsize: + values = values.astype(str) + if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize: + # enlarge for the na_rep + values = values.astype(f"