Skip to content

Commit 4086e42

Browse files
jbrockmendeljreback
authored andcommitted
CLN: unify logic for form_blocks and make_blocks (pandas-dev#19189)
1 parent d7a2e94 commit 4086e42

File tree

1 file changed

+69
-75
lines changed

1 file changed

+69
-75
lines changed

pandas/core/internals.py

+69-75
Original file line numberDiff line numberDiff line change
@@ -2918,35 +2918,52 @@ def sparse_reindex(self, new_index):
29182918
placement=self.mgr_locs)
29192919

29202920

2921+
def get_block_type(values, dtype=None):
2922+
"""
2923+
Find the appropriate Block subclass to use for the given values and dtype.
2924+
2925+
Parameters
2926+
----------
2927+
values : ndarray-like
2928+
dtype : numpy or pandas dtype
2929+
2930+
Returns
2931+
-------
2932+
cls : class, subclass of Block
2933+
"""
2934+
dtype = dtype or values.dtype
2935+
vtype = dtype.type
2936+
2937+
if is_sparse(values):
2938+
cls = SparseBlock
2939+
elif issubclass(vtype, np.floating):
2940+
cls = FloatBlock
2941+
elif issubclass(vtype, np.timedelta64):
2942+
assert issubclass(vtype, np.integer)
2943+
cls = TimeDeltaBlock
2944+
elif issubclass(vtype, np.complexfloating):
2945+
cls = ComplexBlock
2946+
elif issubclass(vtype, np.datetime64):
2947+
assert not is_datetimetz(values)
2948+
cls = DatetimeBlock
2949+
elif is_datetimetz(values):
2950+
cls = DatetimeTZBlock
2951+
elif issubclass(vtype, np.integer):
2952+
cls = IntBlock
2953+
elif dtype == np.bool_:
2954+
cls = BoolBlock
2955+
elif is_categorical(values):
2956+
cls = CategoricalBlock
2957+
else:
2958+
cls = ObjectBlock
2959+
return cls
2960+
2961+
29212962
def make_block(values, placement, klass=None, ndim=None, dtype=None,
29222963
fastpath=False):
29232964
if klass is None:
29242965
dtype = dtype or values.dtype
2925-
vtype = dtype.type
2926-
2927-
if isinstance(values, SparseArray):
2928-
klass = SparseBlock
2929-
elif issubclass(vtype, np.floating):
2930-
klass = FloatBlock
2931-
elif (issubclass(vtype, np.integer) and
2932-
issubclass(vtype, np.timedelta64)):
2933-
klass = TimeDeltaBlock
2934-
elif (issubclass(vtype, np.integer) and
2935-
not issubclass(vtype, np.datetime64)):
2936-
klass = IntBlock
2937-
elif dtype == np.bool_:
2938-
klass = BoolBlock
2939-
elif issubclass(vtype, np.datetime64):
2940-
assert not hasattr(values, 'tz')
2941-
klass = DatetimeBlock
2942-
elif is_datetimetz(values):
2943-
klass = DatetimeTZBlock
2944-
elif issubclass(vtype, np.complexfloating):
2945-
klass = ComplexBlock
2946-
elif is_categorical(values):
2947-
klass = CategoricalBlock
2948-
else:
2949-
klass = ObjectBlock
2966+
klass = get_block_type(values, dtype)
29502967

29512968
elif klass is DatetimeTZBlock and not is_datetimetz(values):
29522969
return klass(values, ndim=ndim, fastpath=fastpath,
@@ -4662,15 +4679,7 @@ def create_block_manager_from_arrays(arrays, names, axes):
46624679
def form_blocks(arrays, names, axes):
46634680
# put "leftover" items in float bucket, where else?
46644681
# generalize?
4665-
float_items = []
4666-
complex_items = []
4667-
int_items = []
4668-
bool_items = []
4669-
object_items = []
4670-
sparse_items = []
4671-
datetime_items = []
4672-
datetime_tz_items = []
4673-
cat_items = []
4682+
items_dict = defaultdict(list)
46744683
extra_locs = []
46754684

46764685
names_idx = Index(names)
@@ -4688,70 +4697,55 @@ def form_blocks(arrays, names, axes):
46884697
k = names[name_idx]
46894698
v = arrays[name_idx]
46904699

4691-
if is_sparse(v):
4692-
sparse_items.append((i, k, v))
4693-
elif issubclass(v.dtype.type, np.floating):
4694-
float_items.append((i, k, v))
4695-
elif issubclass(v.dtype.type, np.complexfloating):
4696-
complex_items.append((i, k, v))
4697-
elif issubclass(v.dtype.type, np.datetime64):
4698-
if v.dtype != _NS_DTYPE:
4699-
v = conversion.ensure_datetime64ns(v)
4700-
4701-
assert not is_datetimetz(v)
4702-
datetime_items.append((i, k, v))
4703-
elif is_datetimetz(v):
4704-
datetime_tz_items.append((i, k, v))
4705-
elif issubclass(v.dtype.type, np.integer):
4706-
int_items.append((i, k, v))
4707-
elif v.dtype == np.bool_:
4708-
bool_items.append((i, k, v))
4709-
elif is_categorical(v):
4710-
cat_items.append((i, k, v))
4711-
else:
4712-
object_items.append((i, k, v))
4700+
block_type = get_block_type(v)
4701+
items_dict[block_type.__name__].append((i, k, v))
47134702

47144703
blocks = []
4715-
if len(float_items):
4716-
float_blocks = _multi_blockify(float_items)
4704+
if len(items_dict['FloatBlock']):
4705+
float_blocks = _multi_blockify(items_dict['FloatBlock'])
47174706
blocks.extend(float_blocks)
47184707

4719-
if len(complex_items):
4720-
complex_blocks = _multi_blockify(complex_items)
4708+
if len(items_dict['ComplexBlock']):
4709+
complex_blocks = _multi_blockify(items_dict['ComplexBlock'])
47214710
blocks.extend(complex_blocks)
47224711

4723-
if len(int_items):
4724-
int_blocks = _multi_blockify(int_items)
4712+
if len(items_dict['TimeDeltaBlock']):
4713+
timedelta_blocks = _multi_blockify(items_dict['TimeDeltaBlock'])
4714+
blocks.extend(timedelta_blocks)
4715+
4716+
if len(items_dict['IntBlock']):
4717+
int_blocks = _multi_blockify(items_dict['IntBlock'])
47254718
blocks.extend(int_blocks)
47264719

4727-
if len(datetime_items):
4728-
datetime_blocks = _simple_blockify(datetime_items, _NS_DTYPE)
4720+
if len(items_dict['DatetimeBlock']):
4721+
datetime_blocks = _simple_blockify(items_dict['DatetimeBlock'],
4722+
_NS_DTYPE)
47294723
blocks.extend(datetime_blocks)
47304724

4731-
if len(datetime_tz_items):
4725+
if len(items_dict['DatetimeTZBlock']):
47324726
dttz_blocks = [make_block(array,
47334727
klass=DatetimeTZBlock,
47344728
fastpath=True,
4735-
placement=[i], )
4736-
for i, _, array in datetime_tz_items]
4729+
placement=[i])
4730+
for i, _, array in items_dict['DatetimeTZBlock']]
47374731
blocks.extend(dttz_blocks)
47384732

4739-
if len(bool_items):
4740-
bool_blocks = _simple_blockify(bool_items, np.bool_)
4733+
if len(items_dict['BoolBlock']):
4734+
bool_blocks = _simple_blockify(items_dict['BoolBlock'], np.bool_)
47414735
blocks.extend(bool_blocks)
47424736

4743-
if len(object_items) > 0:
4744-
object_blocks = _simple_blockify(object_items, np.object_)
4737+
if len(items_dict['ObjectBlock']) > 0:
4738+
object_blocks = _simple_blockify(items_dict['ObjectBlock'], np.object_)
47454739
blocks.extend(object_blocks)
47464740

4747-
if len(sparse_items) > 0:
4748-
sparse_blocks = _sparse_blockify(sparse_items)
4741+
if len(items_dict['SparseBlock']) > 0:
4742+
sparse_blocks = _sparse_blockify(items_dict['SparseBlock'])
47494743
blocks.extend(sparse_blocks)
47504744

4751-
if len(cat_items) > 0:
4745+
if len(items_dict['CategoricalBlock']) > 0:
47524746
cat_blocks = [make_block(array, klass=CategoricalBlock, fastpath=True,
47534747
placement=[i])
4754-
for i, _, array in cat_items]
4748+
for i, _, array in items_dict['CategoricalBlock']]
47554749
blocks.extend(cat_blocks)
47564750

47574751
if len(extra_locs):

0 commit comments

Comments
 (0)