diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index b2a1e10469a0f..52e4c1f4e7d03 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -388,3 +388,5 @@ Bug Fixes - Reading "famafrench" data via ``DataReader`` results in HTTP 404 error because of the website url is changed (:issue:`10591`). - Bug in `read_msgpack` where DataFrame to decode has duplicate column names (:issue:`9618`) + +- Bug in ``concat`` with ``SparseSeries`` (:issue:`10536`) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index c7c578232cd0f..a6533ad600f45 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -18,7 +18,8 @@ from pandas.util.decorators import Appender, Substitution from pandas.core.common import ABCSeries from pandas.io.parsers import TextFileReader - +from pandas.sparse.series import SparseSeries +from pandas.sparse.frame import SparseDataFrame import pandas.core.common as com import pandas.lib as lib @@ -838,6 +839,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, axis = 1 if axis == 0 else 0 self._is_series = isinstance(sample, ABCSeries) + self._is_sp_series = isinstance(sample, SparseSeries) if not 0 <= axis <= sample.ndim: raise AssertionError("axis must be between 0 and {0}, " "input was {1}".format(sample.ndim, axis)) @@ -894,13 +896,21 @@ def get_result(self): if self.axis == 0: new_data = com._concat_compat([x.values for x in self.objs]) name = com._consensus_name_attr(self.objs) - return Series(new_data, index=self.new_axes[0], name=name).__finalize__(self, method='concat') + if self._is_sp_series: + klass = SparseSeries + else: + klass = Series + return klass(new_data, index=self.new_axes[0], name=name).__finalize__(self, method='concat') # combine as columns in a frame else: data = dict(zip(range(len(self.objs)), self.objs)) index, columns = self.new_axes - tmpdf = DataFrame(data, index=index) + if self._is_sp_series: + klass = SparseDataFrame + else: + klass = DataFrame + tmpdf = klass(data, index=index) if columns is not None: tmpdf.columns = columns return tmpdf.__finalize__(self, method='concat') diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index d357182a60b1f..61fbce8c2c7de 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -17,9 +17,11 @@ assert_almost_equal, makeCustomDataframe as mkdf, assertRaisesRegexp) -from pandas import isnull, DataFrame, Index, MultiIndex, Panel, Series, date_range, read_table, read_csv +from pandas import (isnull, DataFrame, Index, MultiIndex, Panel, Series, date_range, + read_table, read_csv, SparseSeries, SparseDataFrame) import pandas.algos as algos import pandas.util.testing as tm +from pandas.sparse.tests.test_sparse import assert_sp_series_equal, assert_sp_frame_equal a_ = np.array @@ -2476,6 +2478,24 @@ def test_concat_invalid_first_argument(self): expected = read_csv(StringIO(data)) assert_frame_equal(result,expected) + def test_concat_sp_series(self): + # GH10536 + data = [0, 1, 1, 2, 3, 0, np.nan] + index = [1, 2, 3, 4, 5, 6, 7] + sp = SparseSeries(data, index=index) + result = concat([sp, sp], axis=0) + expected = SparseSeries(data * 2, index=index * 2, kind='integer') + assert_sp_series_equal(result, expected) + + def test_concat_sp_dataframe(self): + # GH10536 + data = [0, 1, 1, 2, 3, 0, np.nan] + sp = SparseDataFrame(data) + result = concat([sp, sp], axis=1, ignore_index=True) + expected = SparseDataFrame({0: data, 1: data}) + assert_sp_frame_equal(result, expected) + + class TestOrderedMerge(tm.TestCase): def setUp(self):