Skip to content

Commit eba137f

Browse files
committed
More internals hacking
1 parent 08f2479 commit eba137f

File tree

3 files changed

+50
-6
lines changed

3 files changed

+50
-6
lines changed

pandas/core/arrays/base.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -537,8 +537,7 @@ def _values_for_take(self):
537537
@Appender(_take_docstring)
538538
def take(self, indexer, fill_value=_default_fill_value):
539539
# type: (Sequence[int], Optional[Any]) -> ExtensionArray
540-
if fill_value is np.nan:
541-
import pdb; pdb.set_trace()
540+
# assert fill_value is not np.nan
542541
from pandas.core.algorithms import take
543542

544543
data = self._values_for_take()

pandas/core/dtypes/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class _DtypeOpsMixin(object):
1818

1919
# na_value is the default NA value to use for this type. This is used in
2020
# e.g. ExtensionArray.take.
21-
na_value = np.nan
21+
na_value = np.nan # TODO: change to _na_value
2222

2323
def __eq__(self, other):
2424
"""Check whether 'other' is equal to self.

pandas/core/internals.py

+48-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import warnings
2+
23
import copy
34
from warnings import catch_warnings
45
import inspect
@@ -82,7 +83,7 @@
8283
from pandas.util._decorators import cache_readonly
8384
from pandas.util._validators import validate_bool_kwarg
8485
from pandas import compat
85-
from pandas.compat import range, map, zip, u
86+
from pandas.compat import range, map, zip, u, _default_fill_value
8687

8788

8889
class Block(PandasObject):
@@ -1888,6 +1889,10 @@ def _holder(self):
18881889
# For extension blocks, the holder is values-dependent.
18891890
return type(self.values)
18901891

1892+
@property
1893+
def fill_value(self):
1894+
return self.values.dtype.na_value # TODO: change to _na_value
1895+
18911896
@property
18921897
def _can_hold_na(self):
18931898
# The default ExtensionArray._can_hold_na is True
@@ -4386,6 +4391,8 @@ def reindex_indexer(self, new_axis, indexer, axis, fill_value=None,
43864391
43874392
pandas-indexer with -1's only.
43884393
"""
4394+
# TODO: see if we can make fill_value be {col -> fill_value}
4395+
# maybe earlier...
43894396
if indexer is None:
43904397
if new_axis is self.axes[axis] and not copy:
43914398
return self
@@ -4408,8 +4415,10 @@ def reindex_indexer(self, new_axis, indexer, axis, fill_value=None,
44084415
new_blocks = self._slice_take_blocks_ax0(indexer,
44094416
fill_tuple=(fill_value,))
44104417
else:
4418+
if fill_value is None:
4419+
fill_value = _default_fill_value
44114420
new_blocks = [blk.take_nd(indexer, axis=axis, fill_tuple=(
4412-
fill_value if fill_value is not None else blk.fill_value,))
4421+
fill_value if fill_value is not _default_fill_value else blk.fill_value,))
44134422
for blk in self.blocks]
44144423

44154424
new_axes = list(self.axes)
@@ -4436,6 +4445,9 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None):
44364445
if self._is_single_block:
44374446
blk = self.blocks[0]
44384447

4448+
if allow_fill and fill_tuple[0] is _default_fill_value:
4449+
fill_tuple = (blk.fill_value,)
4450+
44394451
if sl_type in ('slice', 'mask'):
44404452
return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))]
44414453
elif not allow_fill or self.ndim == 1:
@@ -5404,6 +5416,25 @@ def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy):
54045416
elif is_uniform_join_units(join_units):
54055417
b = join_units[0].block.concat_same_type(
54065418
[ju.block for ju in join_units], placement=placement)
5419+
elif is_uniform_reindexer(join_units):
5420+
old_block = join_units[0].block
5421+
5422+
new_values = concatenate_join_units(join_units, concat_axis,
5423+
copy=copy)
5424+
if new_values.ndim == 2:
5425+
# XXX: categorical returns a categorical here
5426+
# EA returns a 2d ndarray
5427+
# need to harmoinze these to always be EAs?
5428+
assert new_values.shape[0] == 1
5429+
new_values = new_values[0]
5430+
5431+
assert isinstance(old_block._holder, ABCExtensionArray)
5432+
5433+
b = old_block.make_block_same_class(
5434+
old_block._holder._from_sequence(new_values),
5435+
placement=placement
5436+
)
5437+
54075438
else:
54085439
b = make_block(
54095440
concatenate_join_units(join_units, concat_axis, copy=copy),
@@ -5434,6 +5465,13 @@ def is_uniform_join_units(join_units):
54345465
len(join_units) > 1)
54355466

54365467

5468+
def is_uniform_reindexer(join_units):
5469+
# For when we know we can reindex without changing type
5470+
return (
5471+
all(ju.block and ju.block.is_extension for ju in join_units)
5472+
)
5473+
5474+
54375475
def get_empty_dtype_and_na(join_units):
54385476
"""
54395477
Return dtype and N/A values to use when concatenating specified units.
@@ -5461,12 +5499,15 @@ def get_empty_dtype_and_na(join_units):
54615499

54625500
upcast_classes = defaultdict(list)
54635501
null_upcast_classes = defaultdict(list)
5502+
54645503
for dtype, unit in zip(dtypes, join_units):
54655504
if dtype is None:
54665505
continue
54675506

54685507
if is_categorical_dtype(dtype):
54695508
upcast_cls = 'category'
5509+
elif is_extension_array_dtype(dtype):
5510+
upcast_cls = 'extension'
54705511
elif is_datetimetz(dtype):
54715512
upcast_cls = 'datetimetz'
54725513
elif issubclass(dtype.type, np.bool_):
@@ -5496,6 +5537,8 @@ def get_empty_dtype_and_na(join_units):
54965537
# create the result
54975538
if 'object' in upcast_classes:
54985539
return np.dtype(np.object_), np.nan
5540+
elif 'extension' in upcast_classes:
5541+
return np.dtype(np.object_), None
54995542
elif 'bool' in upcast_classes:
55005543
if has_none_blocks:
55015544
return np.dtype(np.object_), np.nan
@@ -5755,7 +5798,9 @@ def dtype(self):
57555798
if self.block is None:
57565799
raise AssertionError("Block is None, no dtype")
57575800

5758-
if not self.needs_filling:
5801+
if not self.needs_filling or self.block.is_extension:
5802+
# ExtensionDtypes by definition can hold their
5803+
# NA value.
57595804
return self.block.dtype
57605805
else:
57615806
return _get_dtype(maybe_promote(self.block.dtype,

0 commit comments

Comments
 (0)