@@ -2918,35 +2918,52 @@ def sparse_reindex(self, new_index):
2918
2918
placement = self .mgr_locs )
2919
2919
2920
2920
2921
+ def get_block_type (values , dtype = None ):
2922
+ """
2923
+ Find the appropriate Block subclass to use for the given values and dtype.
2924
+
2925
+ Parameters
2926
+ ----------
2927
+ values : ndarray-like
2928
+ dtype : numpy or pandas dtype
2929
+
2930
+ Returns
2931
+ -------
2932
+ cls : class, subclass of Block
2933
+ """
2934
+ dtype = dtype or values .dtype
2935
+ vtype = dtype .type
2936
+
2937
+ if is_sparse (values ):
2938
+ cls = SparseBlock
2939
+ elif issubclass (vtype , np .floating ):
2940
+ cls = FloatBlock
2941
+ elif issubclass (vtype , np .timedelta64 ):
2942
+ assert issubclass (vtype , np .integer )
2943
+ cls = TimeDeltaBlock
2944
+ elif issubclass (vtype , np .complexfloating ):
2945
+ cls = ComplexBlock
2946
+ elif issubclass (vtype , np .datetime64 ):
2947
+ assert not is_datetimetz (values )
2948
+ cls = DatetimeBlock
2949
+ elif is_datetimetz (values ):
2950
+ cls = DatetimeTZBlock
2951
+ elif issubclass (vtype , np .integer ):
2952
+ cls = IntBlock
2953
+ elif dtype == np .bool_ :
2954
+ cls = BoolBlock
2955
+ elif is_categorical (values ):
2956
+ cls = CategoricalBlock
2957
+ else :
2958
+ cls = ObjectBlock
2959
+ return cls
2960
+
2961
+
2921
2962
def make_block (values , placement , klass = None , ndim = None , dtype = None ,
2922
2963
fastpath = False ):
2923
2964
if klass is None :
2924
2965
dtype = dtype or values .dtype
2925
- vtype = dtype .type
2926
-
2927
- if isinstance (values , SparseArray ):
2928
- klass = SparseBlock
2929
- elif issubclass (vtype , np .floating ):
2930
- klass = FloatBlock
2931
- elif (issubclass (vtype , np .integer ) and
2932
- issubclass (vtype , np .timedelta64 )):
2933
- klass = TimeDeltaBlock
2934
- elif (issubclass (vtype , np .integer ) and
2935
- not issubclass (vtype , np .datetime64 )):
2936
- klass = IntBlock
2937
- elif dtype == np .bool_ :
2938
- klass = BoolBlock
2939
- elif issubclass (vtype , np .datetime64 ):
2940
- assert not hasattr (values , 'tz' )
2941
- klass = DatetimeBlock
2942
- elif is_datetimetz (values ):
2943
- klass = DatetimeTZBlock
2944
- elif issubclass (vtype , np .complexfloating ):
2945
- klass = ComplexBlock
2946
- elif is_categorical (values ):
2947
- klass = CategoricalBlock
2948
- else :
2949
- klass = ObjectBlock
2966
+ klass = get_block_type (values , dtype )
2950
2967
2951
2968
elif klass is DatetimeTZBlock and not is_datetimetz (values ):
2952
2969
return klass (values , ndim = ndim , fastpath = fastpath ,
@@ -4662,15 +4679,7 @@ def create_block_manager_from_arrays(arrays, names, axes):
4662
4679
def form_blocks (arrays , names , axes ):
4663
4680
# put "leftover" items in float bucket, where else?
4664
4681
# generalize?
4665
- float_items = []
4666
- complex_items = []
4667
- int_items = []
4668
- bool_items = []
4669
- object_items = []
4670
- sparse_items = []
4671
- datetime_items = []
4672
- datetime_tz_items = []
4673
- cat_items = []
4682
+ items_dict = defaultdict (list )
4674
4683
extra_locs = []
4675
4684
4676
4685
names_idx = Index (names )
@@ -4688,70 +4697,55 @@ def form_blocks(arrays, names, axes):
4688
4697
k = names [name_idx ]
4689
4698
v = arrays [name_idx ]
4690
4699
4691
- if is_sparse (v ):
4692
- sparse_items .append ((i , k , v ))
4693
- elif issubclass (v .dtype .type , np .floating ):
4694
- float_items .append ((i , k , v ))
4695
- elif issubclass (v .dtype .type , np .complexfloating ):
4696
- complex_items .append ((i , k , v ))
4697
- elif issubclass (v .dtype .type , np .datetime64 ):
4698
- if v .dtype != _NS_DTYPE :
4699
- v = conversion .ensure_datetime64ns (v )
4700
-
4701
- assert not is_datetimetz (v )
4702
- datetime_items .append ((i , k , v ))
4703
- elif is_datetimetz (v ):
4704
- datetime_tz_items .append ((i , k , v ))
4705
- elif issubclass (v .dtype .type , np .integer ):
4706
- int_items .append ((i , k , v ))
4707
- elif v .dtype == np .bool_ :
4708
- bool_items .append ((i , k , v ))
4709
- elif is_categorical (v ):
4710
- cat_items .append ((i , k , v ))
4711
- else :
4712
- object_items .append ((i , k , v ))
4700
+ block_type = get_block_type (v )
4701
+ items_dict [block_type .__name__ ].append ((i , k , v ))
4713
4702
4714
4703
blocks = []
4715
- if len (float_items ):
4716
- float_blocks = _multi_blockify (float_items )
4704
+ if len (items_dict [ 'FloatBlock' ] ):
4705
+ float_blocks = _multi_blockify (items_dict [ 'FloatBlock' ] )
4717
4706
blocks .extend (float_blocks )
4718
4707
4719
- if len (complex_items ):
4720
- complex_blocks = _multi_blockify (complex_items )
4708
+ if len (items_dict [ 'ComplexBlock' ] ):
4709
+ complex_blocks = _multi_blockify (items_dict [ 'ComplexBlock' ] )
4721
4710
blocks .extend (complex_blocks )
4722
4711
4723
- if len (int_items ):
4724
- int_blocks = _multi_blockify (int_items )
4712
+ if len (items_dict ['TimeDeltaBlock' ]):
4713
+ timedelta_blocks = _multi_blockify (items_dict ['TimeDeltaBlock' ])
4714
+ blocks .extend (timedelta_blocks )
4715
+
4716
+ if len (items_dict ['IntBlock' ]):
4717
+ int_blocks = _multi_blockify (items_dict ['IntBlock' ])
4725
4718
blocks .extend (int_blocks )
4726
4719
4727
- if len (datetime_items ):
4728
- datetime_blocks = _simple_blockify (datetime_items , _NS_DTYPE )
4720
+ if len (items_dict ['DatetimeBlock' ]):
4721
+ datetime_blocks = _simple_blockify (items_dict ['DatetimeBlock' ],
4722
+ _NS_DTYPE )
4729
4723
blocks .extend (datetime_blocks )
4730
4724
4731
- if len (datetime_tz_items ):
4725
+ if len (items_dict [ 'DatetimeTZBlock' ] ):
4732
4726
dttz_blocks = [make_block (array ,
4733
4727
klass = DatetimeTZBlock ,
4734
4728
fastpath = True ,
4735
- placement = [i ], )
4736
- for i , _ , array in datetime_tz_items ]
4729
+ placement = [i ])
4730
+ for i , _ , array in items_dict [ 'DatetimeTZBlock' ] ]
4737
4731
blocks .extend (dttz_blocks )
4738
4732
4739
- if len (bool_items ):
4740
- bool_blocks = _simple_blockify (bool_items , np .bool_ )
4733
+ if len (items_dict [ 'BoolBlock' ] ):
4734
+ bool_blocks = _simple_blockify (items_dict [ 'BoolBlock' ] , np .bool_ )
4741
4735
blocks .extend (bool_blocks )
4742
4736
4743
- if len (object_items ) > 0 :
4744
- object_blocks = _simple_blockify (object_items , np .object_ )
4737
+ if len (items_dict [ 'ObjectBlock' ] ) > 0 :
4738
+ object_blocks = _simple_blockify (items_dict [ 'ObjectBlock' ] , np .object_ )
4745
4739
blocks .extend (object_blocks )
4746
4740
4747
- if len (sparse_items ) > 0 :
4748
- sparse_blocks = _sparse_blockify (sparse_items )
4741
+ if len (items_dict [ 'SparseBlock' ] ) > 0 :
4742
+ sparse_blocks = _sparse_blockify (items_dict [ 'SparseBlock' ] )
4749
4743
blocks .extend (sparse_blocks )
4750
4744
4751
- if len (cat_items ) > 0 :
4745
+ if len (items_dict [ 'CategoricalBlock' ] ) > 0 :
4752
4746
cat_blocks = [make_block (array , klass = CategoricalBlock , fastpath = True ,
4753
4747
placement = [i ])
4754
- for i , _ , array in cat_items ]
4748
+ for i , _ , array in items_dict [ 'CategoricalBlock' ] ]
4755
4749
blocks .extend (cat_blocks )
4756
4750
4757
4751
if len (extra_locs ):
0 commit comments