diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index 88a59fea375ea..a1cdb00260fc4 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -50,7 +50,7 @@ def setup(self, axis): self.empty_right = [df, DataFrame()] def time_concat_series(self, axis): - concat(self.series, axis=axis) + concat(self.series, axis=axis, sort=False) def time_concat_small_frames(self, axis): concat(self.small_frames, axis=axis) diff --git a/asv_bench/benchmarks/panel_ctor.py b/asv_bench/benchmarks/panel_ctor.py index 47b3ad612f9b1..627705284481b 100644 --- a/asv_bench/benchmarks/panel_ctor.py +++ b/asv_bench/benchmarks/panel_ctor.py @@ -1,7 +1,7 @@ import warnings from datetime import datetime, timedelta -from pandas import DataFrame, Panel, DatetimeIndex, date_range +from pandas import DataFrame, Panel, date_range class DifferentIndexes(object): @@ -23,9 +23,9 @@ def time_from_dict(self): class SameIndexes(object): def setup(self): - idx = DatetimeIndex(start=datetime(1990, 1, 1), - end=datetime(2012, 1, 1), - freq='D') + idx = date_range(start=datetime(1990, 1, 1), + end=datetime(2012, 1, 1), + freq='D') df = DataFrame({'a': 0, 'b': 1, 'c': 2}, index=idx) self.data_frames = dict(enumerate([df] * 100)) @@ -40,10 +40,10 @@ def setup(self): start = datetime(1990, 1, 1) end = datetime(2012, 1, 1) df1 = DataFrame({'a': 0, 'b': 1, 'c': 2}, - index=DatetimeIndex(start=start, end=end, freq='D')) + index=date_range(start=start, end=end, freq='D')) end += timedelta(days=1) df2 = DataFrame({'a': 0, 'b': 1, 'c': 2}, - index=DatetimeIndex(start=start, end=end, freq='D')) + index=date_range(start=start, end=end, freq='D')) dfs = [df1] * 50 + [df2] * 50 self.data_frames = dict(enumerate(dfs)) diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py index 576dc495eb984..fb47fa81d8dfd 100644 --- a/asv_bench/benchmarks/reindex.py +++ b/asv_bench/benchmarks/reindex.py @@ -1,6 +1,6 @@ import numpy as np import pandas.util.testing as tm -from pandas import (DataFrame, Series, DatetimeIndex, MultiIndex, Index, +from pandas import (DataFrame, Series, MultiIndex, Index, date_range) from .pandas_vb_common import lib @@ -8,7 +8,7 @@ class Reindex(object): def setup(self): - rng = DatetimeIndex(start='1/1/1970', periods=10000, freq='1min') + rng = date_range(start='1/1/1970', periods=10000, freq='1min') self.df = DataFrame(np.random.rand(10000, 10), index=rng, columns=range(10)) self.df['foo'] = 'bar' diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py index 7ee73fb7ac7b6..0cfbbd536bc8b 100644 --- a/asv_bench/benchmarks/timedelta.py +++ b/asv_bench/benchmarks/timedelta.py @@ -1,8 +1,9 @@ import datetime import numpy as np -from pandas import Series, timedelta_range, to_timedelta, Timestamp, \ - Timedelta, TimedeltaIndex, DataFrame + +from pandas import ( + DataFrame, Series, Timedelta, Timestamp, timedelta_range, to_timedelta) class TimedeltaConstructor(object): @@ -122,8 +123,8 @@ def time_timedelta_nanoseconds(self, series): class TimedeltaIndexing(object): def setup(self): - self.index = TimedeltaIndex(start='1985', periods=1000, freq='D') - self.index2 = TimedeltaIndex(start='1986', periods=1000, freq='D') + self.index = timedelta_range(start='1985', periods=1000, freq='D') + self.index2 = timedelta_range(start='1986', periods=1000, freq='D') self.series = Series(range(1000), index=self.index) self.timedelta = self.index[500] diff --git a/asv_bench/benchmarks/timestamp.py b/asv_bench/benchmarks/timestamp.py index 64f46fe378e53..4c1d6e8533408 100644 --- a/asv_bench/benchmarks/timestamp.py +++ b/asv_bench/benchmarks/timestamp.py @@ -1,8 +1,9 @@ import datetime -from pandas import Timestamp -import pytz import dateutil +import pytz + +from pandas import Timestamp class TimestampConstruction(object): @@ -46,7 +47,7 @@ def time_dayofweek(self, tz, freq): self.ts.dayofweek def time_weekday_name(self, tz, freq): - self.ts.weekday_name + self.ts.day_name def time_dayofyear(self, tz, freq): self.ts.dayofyear diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 1fb43de5f4c5a..cb4241b8d1bfc 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -647,7 +647,7 @@ changes were made: * The ``out`` and ``mode`` parameters are now longer accepted (previously, this raised if they were specified). * Passing a scalar for ``indices`` is no longer allowed. -- The result of concatenating a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``. +- The result of :func:`concat` with a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``. - ``SparseDataFrame.combine`` and ``DataFrame.combine_first`` no longer supports combining a sparse column with a dense column while preserving the sparse subtype. The result will be an object-dtype SparseArray. - Setting :attr:`SparseArray.fill_value` to a fill value with a different dtype is now allowed. - ``DataFrame[column]`` is now a :class:`Series` with sparse values, rather than a :class:`SparseSeries`, when slicing a single column with sparse values (:issue:`23559`). @@ -1103,6 +1103,7 @@ Other API Changes - The order of the arguments of :func:`DataFrame.to_html` and :func:`DataFrame.to_string` is rearranged to be consistent with each other. (:issue:`23614`) - :meth:`CategoricalIndex.reindex` now raises a ``ValueError`` if the target index is non-unique and not equal to the current index. It previously only raised if the target index was not of a categorical dtype (:issue:`23963`). - :func:`Series.to_list` and :func:`Index.to_list` are now aliases of ``Series.tolist`` respectively ``Index.tolist`` (:issue:`8826`) +- The result of ``SparseSeries.unstack`` is now a :class:`DataFrame` with sparse values, rather than a :class:`SparseDataFrame` (issue:`24372`). .. _whatsnew_0240.deprecations: @@ -1613,6 +1614,7 @@ Sparse - Bug in :meth:`SparseArary.unique` not returning the unique values (:issue:`19595`) - Bug in :meth:`SparseArray.nonzero` and :meth:`SparseDataFrame.dropna` returning shifted/incorrect results (:issue:`21172`) - Bug in :meth:`DataFrame.apply` where dtypes would lose sparseness (:issue:`23744`) +- Bug in :func:`concat` when concatenating a list of :class:`Series` with all-sparse values changing the ``fill_value`` and converting to a dense Series (:issue:`24371`) Style ^^^^^ diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 0df0c01dbd47a..a90cfa4e4c906 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -66,19 +66,19 @@ def _get_series_result_type(result, objs=None): return appropriate class of Series concat input is either dict or array-like """ + from pandas import SparseSeries, SparseDataFrame, DataFrame + # concat Series with axis 1 if isinstance(result, dict): # concat Series with axis 1 - if all(is_sparse(c) for c in compat.itervalues(result)): - from pandas.core.sparse.api import SparseDataFrame + if all(isinstance(c, (SparseSeries, SparseDataFrame)) + for c in compat.itervalues(result)): return SparseDataFrame else: - from pandas.core.frame import DataFrame return DataFrame # otherwise it is a SingleBlockManager (axis = 0) if result._block.is_sparse: - from pandas.core.sparse.api import SparseSeries return SparseSeries else: return objs[0]._constructor diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 8319a8cc5417c..713a4b19c1fd5 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -11,8 +11,8 @@ from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.common import ( - ensure_platform_int, is_bool_dtype, is_extension_array_dtype, is_list_like, - is_object_dtype, needs_i8_conversion) + ensure_platform_int, is_bool_dtype, is_extension_array_dtype, + is_integer_dtype, is_list_like, is_object_dtype, needs_i8_conversion) from pandas.core.dtypes.missing import notna from pandas import compat @@ -853,6 +853,7 @@ def check_len(item, name): def _get_dummies_1d(data, prefix, prefix_sep='_', dummy_na=False, sparse=False, drop_first=False, dtype=None): + from pandas.core.reshape.concat import concat # Series avoids inconsistent NaN handling codes, levels = _factorize_from_iterable(Series(data)) @@ -909,7 +910,15 @@ def _make_col_name(prefix, prefix_sep, level): index = None if sparse: - sparse_series = {} + + if is_integer_dtype(dtype): + fill_value = 0 + elif dtype == bool: + fill_value = False + else: + fill_value = 0.0 + + sparse_series = [] N = len(data) sp_indices = [[] for _ in range(len(dummy_cols))] mask = codes != -1 @@ -926,12 +935,12 @@ def _make_col_name(prefix, prefix_sep, level): dummy_cols = dummy_cols[1:] for col, ixs in zip(dummy_cols, sp_indices): sarr = SparseArray(np.ones(len(ixs), dtype=dtype), - sparse_index=IntIndex(N, ixs), fill_value=0, + sparse_index=IntIndex(N, ixs), + fill_value=fill_value, dtype=dtype) - sparse_series[col] = Series(data=sarr, index=index) + sparse_series.append(Series(data=sarr, index=index, name=col)) - out = DataFrame(sparse_series, index=index, columns=dummy_cols, - dtype=dtype) + out = concat(sparse_series, axis=1, copy=False) return out else: diff --git a/pandas/tests/sparse/test_reshape.py b/pandas/tests/sparse/test_reshape.py index b492c47375bcf..d4ba672607982 100644 --- a/pandas/tests/sparse/test_reshape.py +++ b/pandas/tests/sparse/test_reshape.py @@ -35,4 +35,8 @@ def test_sparse_frame_unstack(sparse_df): def test_sparse_series_unstack(sparse_df, multi_index3): frame = pd.SparseSeries(np.ones(3), index=multi_index3).unstack() - tm.assert_sp_frame_equal(frame, sparse_df) + + arr = np.array([1, np.nan, np.nan]) + arrays = {i: pd.SparseArray(np.roll(arr, i)) for i in range(3)} + expected = pd.DataFrame(arrays) + tm.assert_frame_equal(frame, expected)