Skip to content

Commit c0af000

Browse files
jbrockmendelJulianWgs
authored andcommitted
REF: share to_native_types with ArrayManager (pandas-dev#40490)
1 parent d356c24 commit c0af000

File tree

2 files changed

+81
-80
lines changed

2 files changed

+81
-80
lines changed

pandas/core/internals/array_manager.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
from pandas.core.internals.blocks import (
8989
ensure_block_shape,
9090
new_block,
91+
to_native_types,
9192
)
9293

9394
if TYPE_CHECKING:
@@ -634,7 +635,7 @@ def replace_list(
634635
)
635636

636637
def to_native_types(self, **kwargs):
637-
return self.apply_with_block("to_native_types", **kwargs)
638+
return self.apply(to_native_types, **kwargs)
638639

639640
@property
640641
def is_mixed_type(self) -> bool:

pandas/core/internals/blocks.py

+79-79
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@
9797
FloatingArray,
9898
IntegerArray,
9999
PandasArray,
100+
TimedeltaArray,
100101
)
101102
from pandas.core.base import PandasObject
102103
import pandas.core.common as com
@@ -260,9 +261,11 @@ def get_block_values_for_json(self) -> np.ndarray:
260261
# TODO(EA2D): reshape will be unnecessary with 2D EAs
261262
return np.asarray(self.values).reshape(self.shape)
262263

264+
@final
263265
@property
264266
def fill_value(self):
265-
return np.nan
267+
# Used in reindex_indexer
268+
return na_value_for_dtype(self.dtype, compat=False)
266269

267270
@property
268271
def mgr_locs(self) -> BlockPlacement:
@@ -652,24 +655,11 @@ def should_store(self, value: ArrayLike) -> bool:
652655
"""
653656
return is_dtype_equal(value.dtype, self.dtype)
654657

658+
@final
655659
def to_native_types(self, na_rep="nan", quoting=None, **kwargs):
656660
""" convert to our native types format """
657-
values = self.values
658-
659-
mask = isna(values)
660-
itemsize = writers.word_len(na_rep)
661-
662-
if not self.is_object and not quoting and itemsize:
663-
values = values.astype(str)
664-
if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize:
665-
# enlarge for the na_rep
666-
values = values.astype(f"<U{itemsize}")
667-
else:
668-
values = np.array(values, dtype="object")
669-
670-
values[mask] = na_rep
671-
values = values.astype(object, copy=False)
672-
return self.make_block(values)
661+
result = to_native_types(self.values, na_rep=na_rep, quoting=quoting, **kwargs)
662+
return self.make_block(result)
673663

674664
# block actions #
675665
@final
@@ -1498,11 +1488,6 @@ def _holder(self):
14981488
# For extension blocks, the holder is values-dependent.
14991489
return type(self.values)
15001490

1501-
@property
1502-
def fill_value(self):
1503-
# Used in reindex_indexer
1504-
return self.values.dtype.na_value
1505-
15061491
@property
15071492
def _can_hold_na(self):
15081493
# The default ExtensionArray._can_hold_na is True
@@ -1565,15 +1550,6 @@ def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray:
15651550
def array_values(self) -> ExtensionArray:
15661551
return self.values
15671552

1568-
def to_native_types(self, na_rep="nan", quoting=None, **kwargs):
1569-
"""override to use ExtensionArray astype for the conversion"""
1570-
values = self.values
1571-
mask = isna(values)
1572-
1573-
new_values = np.asarray(values.astype(object))
1574-
new_values[mask] = na_rep
1575-
return self.make_block(new_values)
1576-
15771553
def take_nd(
15781554
self,
15791555
indexer,
@@ -1808,41 +1784,6 @@ def is_bool(self):
18081784
class FloatBlock(NumericBlock):
18091785
__slots__ = ()
18101786

1811-
def to_native_types(
1812-
self, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs
1813-
):
1814-
""" convert to our native types format """
1815-
values = self.values
1816-
1817-
# see gh-13418: no special formatting is desired at the
1818-
# output (important for appropriate 'quoting' behaviour),
1819-
# so do not pass it through the FloatArrayFormatter
1820-
if float_format is None and decimal == ".":
1821-
mask = isna(values)
1822-
1823-
if not quoting:
1824-
values = values.astype(str)
1825-
else:
1826-
values = np.array(values, dtype="object")
1827-
1828-
values[mask] = na_rep
1829-
values = values.astype(object, copy=False)
1830-
return self.make_block(values)
1831-
1832-
from pandas.io.formats.format import FloatArrayFormatter
1833-
1834-
formatter = FloatArrayFormatter(
1835-
values,
1836-
na_rep=na_rep,
1837-
float_format=float_format,
1838-
decimal=decimal,
1839-
quoting=quoting,
1840-
fixed_width=False,
1841-
)
1842-
res = formatter.get_result_as_array()
1843-
res = res.astype(object, copy=False)
1844-
return self.make_block(res)
1845-
18461787

18471788
class NDArrayBackedExtensionBlock(HybridMixin, Block):
18481789
"""
@@ -1962,18 +1903,6 @@ def array_values(self):
19621903
def _holder(self):
19631904
return type(self.array_values())
19641905

1965-
@property
1966-
def fill_value(self):
1967-
return na_value_for_dtype(self.dtype)
1968-
1969-
def to_native_types(self, na_rep="NaT", **kwargs):
1970-
""" convert to our native types format """
1971-
arr = self.array_values()
1972-
1973-
result = arr._format_native_types(na_rep=na_rep, **kwargs)
1974-
result = result.astype(object, copy=False)
1975-
return self.make_block(result)
1976-
19771906

19781907
class DatetimeBlock(DatetimeLikeBlockMixin):
19791908
__slots__ = ()
@@ -1999,7 +1928,6 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
19991928

20001929
internal_values = Block.internal_values
20011930
_can_hold_element = DatetimeBlock._can_hold_element
2002-
to_native_types = DatetimeBlock.to_native_types
20031931
diff = DatetimeBlock.diff
20041932
where = DatetimeBlock.where
20051933
putmask = DatetimeLikeBlockMixin.putmask
@@ -2316,3 +2244,75 @@ def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike:
23162244
# We can't, and don't need to, reshape.
23172245
values = np.asarray(values).reshape(1, -1)
23182246
return values
2247+
2248+
2249+
def to_native_types(
2250+
values: ArrayLike,
2251+
*,
2252+
na_rep="nan",
2253+
quoting=None,
2254+
float_format=None,
2255+
decimal=".",
2256+
**kwargs,
2257+
) -> np.ndarray:
2258+
""" convert to our native types format """
2259+
values = ensure_wrapped_if_datetimelike(values)
2260+
2261+
if isinstance(values, (DatetimeArray, TimedeltaArray)):
2262+
result = values._format_native_types(na_rep=na_rep, **kwargs)
2263+
result = result.astype(object, copy=False)
2264+
return result
2265+
2266+
elif isinstance(values, ExtensionArray):
2267+
mask = isna(values)
2268+
2269+
new_values = np.asarray(values.astype(object))
2270+
new_values[mask] = na_rep
2271+
return new_values
2272+
2273+
elif values.dtype.kind == "f":
2274+
# see GH#13418: no special formatting is desired at the
2275+
# output (important for appropriate 'quoting' behaviour),
2276+
# so do not pass it through the FloatArrayFormatter
2277+
if float_format is None and decimal == ".":
2278+
mask = isna(values)
2279+
2280+
if not quoting:
2281+
values = values.astype(str)
2282+
else:
2283+
values = np.array(values, dtype="object")
2284+
2285+
values[mask] = na_rep
2286+
values = values.astype(object, copy=False)
2287+
return values
2288+
2289+
from pandas.io.formats.format import FloatArrayFormatter
2290+
2291+
formatter = FloatArrayFormatter(
2292+
values,
2293+
na_rep=na_rep,
2294+
float_format=float_format,
2295+
decimal=decimal,
2296+
quoting=quoting,
2297+
fixed_width=False,
2298+
)
2299+
res = formatter.get_result_as_array()
2300+
res = res.astype(object, copy=False)
2301+
return res
2302+
2303+
else:
2304+
2305+
mask = isna(values)
2306+
itemsize = writers.word_len(na_rep)
2307+
2308+
if values.dtype != _dtype_obj and not quoting and itemsize:
2309+
values = values.astype(str)
2310+
if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize:
2311+
# enlarge for the na_rep
2312+
values = values.astype(f"<U{itemsize}")
2313+
else:
2314+
values = np.array(values, dtype="object")
2315+
2316+
values[mask] = na_rep
2317+
values = values.astype(object, copy=False)
2318+
return values

0 commit comments

Comments
 (0)