Skip to content

Commit dd9babd

Browse files
fix categorical and sparse
1 parent 007efb1 commit dd9babd

File tree

2 files changed

+51
-22
lines changed

2 files changed

+51
-22
lines changed

pandas/core/internals.py

+35-12
Original file line numberDiff line numberDiff line change
@@ -312,11 +312,11 @@ def ftype(self):
312312
def merge(self, other):
313313
return _merge_blocks([self, other])
314314

315-
def concat_same_type(self, others):
315+
def concat_same_type(self, to_concat):
316316
"""
317317
Concatenate list of single blocks of the same type.
318318
"""
319-
values = np.concatenate([self.values] + [o.values for o in others])
319+
values = np.concatenate([blk.values for blk in to_concat])
320320
return self.make_block_same_class(
321321
values, placement=slice(0, len(values), 1))
322322

@@ -2415,6 +2415,19 @@ def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
24152415
# we are expected to return a 2-d ndarray
24162416
return values.reshape(1, len(values))
24172417

2418+
def concat_same_type(self, to_concat):
2419+
"""
2420+
Concatenate list of single blocks of the same type.
2421+
"""
2422+
to_concat = [blk.values for blk in to_concat]
2423+
values = _concat._concat_categorical(to_concat)
2424+
2425+
if is_categorical_dtype(values.dtype):
2426+
return self.make_block_same_class(
2427+
values, placement=slice(0, len(values), 1))
2428+
else:
2429+
return make_block(values, placement=slice(0, len(values), 1))
2430+
24182431

24192432
class DatetimeBlock(DatetimeLikeBlockMixin, Block):
24202433
__slots__ = ()
@@ -2692,20 +2705,18 @@ def shift(self, periods, axis=0, mgr=None):
26922705
return [self.make_block_same_class(new_values,
26932706
placement=self.mgr_locs)]
26942707

2695-
def concat_same_type(self, others):
2708+
def concat_same_type(self, to_concat):
26962709
"""
26972710
Concatenate list of single blocks of the same type.
26982711
"""
2699-
# can maybe replace
2700-
# from pandas.core.dtypes.concat._concat_datetimetz ?
2701-
to_concat = [self.values] + [o.values for o in others]
2702-
2703-
if len(set([str(x.dtype) for x in to_concat])) != 1:
2704-
raise ValueError('to_concat must have the same tz')
2712+
to_concat = [blk.values for blk in to_concat]
2713+
values = _concat._concat_datetime(to_concat)
27052714

2706-
values = to_concat[0]._concat_same_dtype(to_concat, None)
2707-
return self.make_block_same_class(
2708-
values, placement=slice(0, len(values), 1))
2715+
if is_datetimetz(values):
2716+
return self.make_block_same_class(
2717+
values, placement=slice(0, len(values), 1))
2718+
else:
2719+
return make_block(values, placement=slice(0, len(values), 1))
27092720

27102721

27112722
class SparseBlock(NonConsolidatableMixIn, Block):
@@ -2874,6 +2885,18 @@ def sparse_reindex(self, new_index):
28742885
return self.make_block_same_class(values, sparse_index=new_index,
28752886
placement=self.mgr_locs)
28762887

2888+
def concat_same_type(self, to_concat):
2889+
"""
2890+
Concatenate list of single blocks of the same type.
2891+
"""
2892+
to_concat = [blk.values for blk in to_concat]
2893+
values = _concat._concat_sparse(to_concat)
2894+
2895+
return self.make_block_same_class(
2896+
values, placement=slice(0, len(values), 1))
2897+
#else:
2898+
# return make_block(values, placement=slice(0, len(values), 1))
2899+
28772900

28782901
def make_block(values, placement, klass=None, ndim=None, dtype=None,
28792902
fastpath=False):

pandas/core/reshape/concat.py

+16-10
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
"""
44

55
import numpy as np
6-
from pandas import compat, DataFrame, Series, Index, MultiIndex
6+
from pandas import compat, DataFrame, Series, Index, MultiIndex, SparseSeries
77
from pandas.core.index import (_get_objs_combined_axis,
88
_ensure_index, _get_consensus_names,
99
_all_indexes_same)
1010
from pandas.core.categorical import (_factorize_from_iterable,
1111
_factorize_from_iterables)
12-
from pandas.core.internals import concatenate_block_managers
12+
from pandas.core.internals import concatenate_block_managers, SparseBlock
1313
from pandas.core import common as com
1414
from pandas.core.generic import NDFrame
1515
import pandas.core.dtypes.concat as _concat
@@ -364,16 +364,22 @@ def get_result(self):
364364
if self.axis == 0:
365365
name = com._consensus_name_attr(self.objs)
366366

367-
# check if all series are of the same block type:
368-
blocks = [obj._data.blocks[0] for obj in self.objs]
369-
if all([type(b) == type(blocks[0]) for b in blocks[1:]]):
370-
new_block = blocks[0].concat_same_type(blocks[1:])
371-
return (Series(new_block, index=self.new_axes[0],
372-
name=name, fastpath=True)
373-
.__finalize__(self, method='concat'))
374-
375367
# concat Series with length to keep dtype as much
376368
non_empties = [x for x in self.objs if len(x) > 0]
369+
370+
# check if all series are of the same block type:
371+
if len(non_empties) > 0:
372+
blocks = [obj._data.blocks[0] for obj in non_empties]
373+
if all([type(b) == type(blocks[0]) for b in blocks[1:]]):
374+
new_block = blocks[0].concat_same_type(blocks)
375+
if isinstance(new_block, SparseBlock):
376+
cons = SparseSeries
377+
else:
378+
cons = Series
379+
return (cons(new_block, index=self.new_axes[0],
380+
name=name, fastpath=True)
381+
.__finalize__(self, method='concat'))
382+
377383
if len(non_empties) > 0:
378384
values = [x._values for x in non_empties]
379385
else:

0 commit comments

Comments
 (0)