Skip to content

Commit b0fe1f0

Browse files
Revert "REF: implement make_na_array (pandas-dev#43606)"
This reverts commit 4bb4b52.
1 parent e785ff6 commit b0fe1f0

File tree

1 file changed

+52
-46
lines changed

1 file changed

+52
-46
lines changed

pandas/core/internals/concat.py

+52-46
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,7 @@
99

1010
import numpy as np
1111

12-
from pandas._libs import (
13-
NaT,
14-
internals as libinternals,
15-
)
12+
from pandas._libs import internals as libinternals
1613
from pandas._typing import (
1714
ArrayLike,
1815
DtypeObj,
@@ -391,21 +388,59 @@ def is_na(self) -> bool:
391388
return True
392389
return False
393390

394-
def get_reindexed_values(self, empty_dtype: DtypeObj) -> ArrayLike:
391+
def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
395392
values: ArrayLike
396393

397-
if self.is_na:
398-
return make_na_array(empty_dtype, self.shape)
399-
394+
if upcasted_na is None and not self.is_na:
395+
# No upcasting is necessary
396+
fill_value = self.block.fill_value
397+
values = self.block.get_values()
400398
else:
399+
fill_value = upcasted_na
400+
401+
if self.is_na:
402+
403+
if is_datetime64tz_dtype(empty_dtype):
404+
i8values = np.full(self.shape, fill_value.value)
405+
return DatetimeArray(i8values, dtype=empty_dtype)
406+
407+
elif is_1d_only_ea_dtype(empty_dtype):
408+
empty_dtype = cast(ExtensionDtype, empty_dtype)
409+
cls = empty_dtype.construct_array_type()
410+
411+
missing_arr = cls._from_sequence([], dtype=empty_dtype)
412+
ncols, nrows = self.shape
413+
assert ncols == 1, ncols
414+
empty_arr = -1 * np.ones((nrows,), dtype=np.intp)
415+
return missing_arr.take(
416+
empty_arr, allow_fill=True, fill_value=fill_value
417+
)
418+
elif isinstance(empty_dtype, ExtensionDtype):
419+
# TODO: no tests get here, a handful would if we disabled
420+
# the dt64tz special-case above (which is faster)
421+
cls = empty_dtype.construct_array_type()
422+
missing_arr = cls._empty(shape=self.shape, dtype=empty_dtype)
423+
missing_arr[:] = fill_value
424+
return missing_arr
425+
else:
426+
# NB: we should never get here with empty_dtype integer or bool;
427+
# if we did, the missing_arr.fill would cast to gibberish
428+
missing_arr = np.empty(self.shape, dtype=empty_dtype)
429+
missing_arr.fill(fill_value)
430+
return missing_arr
401431

402432
if (not self.indexers) and (not self.block._can_consolidate):
403433
# preserve these for validation in concat_compat
404434
return self.block.values
405435

406-
# No dtype upcasting is done here, it will be performed during
407-
# concatenation itself.
408-
values = self.block.values
436+
if self.block.is_bool:
437+
# External code requested filling/upcasting, bool values must
438+
# be upcasted to object to avoid being upcasted to numeric.
439+
values = self.block.astype(np.object_).values
440+
else:
441+
# No dtype upcasting is done here, it will be performed during
442+
# concatenation itself.
443+
values = self.block.values
409444

410445
if not self.indexers:
411446
# If there's no indexing to be done, we want to signal outside
@@ -420,40 +455,6 @@ def get_reindexed_values(self, empty_dtype: DtypeObj) -> ArrayLike:
420455
return values
421456

422457

423-
def make_na_array(dtype: DtypeObj, shape: Shape) -> ArrayLike:
424-
"""
425-
Construct an np.ndarray or ExtensionArray of the given dtype and shape
426-
holding all-NA values.
427-
"""
428-
if is_datetime64tz_dtype(dtype):
429-
# NaT here is analogous to dtype.na_value below
430-
i8values = np.full(shape, NaT.value)
431-
return DatetimeArray(i8values, dtype=dtype)
432-
433-
elif is_1d_only_ea_dtype(dtype):
434-
dtype = cast(ExtensionDtype, dtype)
435-
cls = dtype.construct_array_type()
436-
437-
missing_arr = cls._from_sequence([], dtype=dtype)
438-
nrows = shape[-1]
439-
taker = -1 * np.ones((nrows,), dtype=np.intp)
440-
return missing_arr.take(taker, allow_fill=True, fill_value=dtype.na_value)
441-
elif isinstance(dtype, ExtensionDtype):
442-
# TODO: no tests get here, a handful would if we disabled
443-
# the dt64tz special-case above (which is faster)
444-
cls = dtype.construct_array_type()
445-
missing_arr = cls._empty(shape=shape, dtype=dtype)
446-
missing_arr[:] = dtype.na_value
447-
return missing_arr
448-
else:
449-
# NB: we should never get here with dtype integer or bool;
450-
# if we did, the missing_arr.fill would cast to gibberish
451-
missing_arr = np.empty(shape, dtype=dtype)
452-
fill_value = _dtype_to_na_value(dtype)
453-
missing_arr.fill(fill_value)
454-
return missing_arr
455-
456-
457458
def _concatenate_join_units(
458459
join_units: list[JoinUnit], concat_axis: int, copy: bool
459460
) -> ArrayLike:
@@ -466,7 +467,12 @@ def _concatenate_join_units(
466467

467468
empty_dtype = _get_empty_dtype(join_units)
468469

469-
to_concat = [ju.get_reindexed_values(empty_dtype=empty_dtype) for ju in join_units]
470+
upcasted_na = _dtype_to_na_value(empty_dtype)
471+
472+
to_concat = [
473+
ju.get_reindexed_values(empty_dtype=empty_dtype, upcasted_na=upcasted_na)
474+
for ju in join_units
475+
]
470476

471477
if len(to_concat) == 1:
472478
# Only one block, nothing to concatenate.

0 commit comments

Comments
 (0)