1
1
import warnings
2
+
2
3
import copy
3
4
from warnings import catch_warnings
4
5
import inspect
82
83
from pandas .util ._decorators import cache_readonly
83
84
from pandas .util ._validators import validate_bool_kwarg
84
85
from pandas import compat
85
- from pandas .compat import range , map , zip , u
86
+ from pandas .compat import range , map , zip , u , _default_fill_value
86
87
87
88
88
89
class Block (PandasObject ):
@@ -1888,6 +1889,10 @@ def _holder(self):
1888
1889
# For extension blocks, the holder is values-dependent.
1889
1890
return type (self .values )
1890
1891
1892
+ @property
1893
+ def fill_value (self ):
1894
+ return self .values .dtype .na_value # TODO: change to _na_value
1895
+
1891
1896
@property
1892
1897
def _can_hold_na (self ):
1893
1898
# The default ExtensionArray._can_hold_na is True
@@ -4386,6 +4391,8 @@ def reindex_indexer(self, new_axis, indexer, axis, fill_value=None,
4386
4391
4387
4392
pandas-indexer with -1's only.
4388
4393
"""
4394
+ # TODO: see if we can make fill_value be {col -> fill_value}
4395
+ # maybe earlier...
4389
4396
if indexer is None :
4390
4397
if new_axis is self .axes [axis ] and not copy :
4391
4398
return self
@@ -4408,8 +4415,10 @@ def reindex_indexer(self, new_axis, indexer, axis, fill_value=None,
4408
4415
new_blocks = self ._slice_take_blocks_ax0 (indexer ,
4409
4416
fill_tuple = (fill_value ,))
4410
4417
else :
4418
+ if fill_value is None :
4419
+ fill_value = _default_fill_value
4411
4420
new_blocks = [blk .take_nd (indexer , axis = axis , fill_tuple = (
4412
- fill_value if fill_value is not None else blk .fill_value ,))
4421
+ fill_value if fill_value is not _default_fill_value else blk .fill_value ,))
4413
4422
for blk in self .blocks ]
4414
4423
4415
4424
new_axes = list (self .axes )
@@ -4436,6 +4445,9 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None):
4436
4445
if self ._is_single_block :
4437
4446
blk = self .blocks [0 ]
4438
4447
4448
+ if allow_fill and fill_tuple [0 ] is _default_fill_value :
4449
+ fill_tuple = (blk .fill_value ,)
4450
+
4439
4451
if sl_type in ('slice' , 'mask' ):
4440
4452
return [blk .getitem_block (slobj , new_mgr_locs = slice (0 , sllen ))]
4441
4453
elif not allow_fill or self .ndim == 1 :
@@ -5404,6 +5416,25 @@ def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy):
5404
5416
elif is_uniform_join_units (join_units ):
5405
5417
b = join_units [0 ].block .concat_same_type (
5406
5418
[ju .block for ju in join_units ], placement = placement )
5419
+ elif is_uniform_reindexer (join_units ):
5420
+ old_block = join_units [0 ].block
5421
+
5422
+ new_values = concatenate_join_units (join_units , concat_axis ,
5423
+ copy = copy )
5424
+ if new_values .ndim == 2 :
5425
+ # XXX: categorical returns a categorical here
5426
+ # EA returns a 2d ndarray
5427
+ # need to harmoinze these to always be EAs?
5428
+ assert new_values .shape [0 ] == 1
5429
+ new_values = new_values [0 ]
5430
+
5431
+ assert isinstance (old_block ._holder , ABCExtensionArray )
5432
+
5433
+ b = old_block .make_block_same_class (
5434
+ old_block ._holder ._from_sequence (new_values ),
5435
+ placement = placement
5436
+ )
5437
+
5407
5438
else :
5408
5439
b = make_block (
5409
5440
concatenate_join_units (join_units , concat_axis , copy = copy ),
@@ -5434,6 +5465,13 @@ def is_uniform_join_units(join_units):
5434
5465
len (join_units ) > 1 )
5435
5466
5436
5467
5468
+ def is_uniform_reindexer (join_units ):
5469
+ # For when we know we can reindex without changing type
5470
+ return (
5471
+ all (ju .block and ju .block .is_extension for ju in join_units )
5472
+ )
5473
+
5474
+
5437
5475
def get_empty_dtype_and_na (join_units ):
5438
5476
"""
5439
5477
Return dtype and N/A values to use when concatenating specified units.
@@ -5461,12 +5499,15 @@ def get_empty_dtype_and_na(join_units):
5461
5499
5462
5500
upcast_classes = defaultdict (list )
5463
5501
null_upcast_classes = defaultdict (list )
5502
+
5464
5503
for dtype , unit in zip (dtypes , join_units ):
5465
5504
if dtype is None :
5466
5505
continue
5467
5506
5468
5507
if is_categorical_dtype (dtype ):
5469
5508
upcast_cls = 'category'
5509
+ elif is_extension_array_dtype (dtype ):
5510
+ upcast_cls = 'extension'
5470
5511
elif is_datetimetz (dtype ):
5471
5512
upcast_cls = 'datetimetz'
5472
5513
elif issubclass (dtype .type , np .bool_ ):
@@ -5496,6 +5537,8 @@ def get_empty_dtype_and_na(join_units):
5496
5537
# create the result
5497
5538
if 'object' in upcast_classes :
5498
5539
return np .dtype (np .object_ ), np .nan
5540
+ elif 'extension' in upcast_classes :
5541
+ return np .dtype (np .object_ ), None
5499
5542
elif 'bool' in upcast_classes :
5500
5543
if has_none_blocks :
5501
5544
return np .dtype (np .object_ ), np .nan
@@ -5755,7 +5798,9 @@ def dtype(self):
5755
5798
if self .block is None :
5756
5799
raise AssertionError ("Block is None, no dtype" )
5757
5800
5758
- if not self .needs_filling :
5801
+ if not self .needs_filling or self .block .is_extension :
5802
+ # ExtensionDtypes by definition can hold their
5803
+ # NA value.
5759
5804
return self .block .dtype
5760
5805
else :
5761
5806
return _get_dtype (maybe_promote (self .block .dtype ,
0 commit comments