diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index 3e45b2ca37229..8781c76118aa6 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -80,7 +80,9 @@ These changes conform sparse handling to return the correct types and work to ma - Bug in ``SparseDataFrame.loc[]``, ``.iloc[]`` may results in dense ``Series``, rather than ``SparseSeries`` (:issue:`12787`) - Bug in ``SparseSeries.__repr__`` raises ``TypeError`` when it is longer than ``max_rows`` (:issue:`10560`) - Bug in ``SparseSeries.shape`` ignores ``fill_value`` (:issue:`10452`) +- Bug in ``SparseSeries.reindex`` incorrectly handle ``fill_value`` (:issue:`12797`) - Bug in ``SparseArray.to_dense()`` does not preserve ``dtype`` (:issue:`10648`) +- Bug in ``SparseArray.to_dense()`` incorrectly handle ``fill_value`` (:issue:`12797`) - ``SparseArray.take`` now returns scalar for scalar input, ``SparseArray`` for others. Also now it handles negative indexer as the same rule as ``Index`` (:issue:`10560`, :issue:`12796`) .. ipython:: python diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 585eaf2261420..c21e78f547988 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2385,7 +2385,7 @@ def make_block_same_class(self, values, placement, sparse_index=None, """ return a new block """ if dtype is None: dtype = self.dtype - if fill_value is None: + if fill_value is None and not isinstance(values, SparseArray): fill_value = self.values.fill_value # if not isinstance(values, SparseArray) and values.ndim != self.ndim: @@ -2427,11 +2427,9 @@ def fillna(self, value, limit=None, inplace=False, downcast=None, if limit is not None: raise NotImplementedError("specifying a limit for 'fillna' has " "not been implemented yet") - if issubclass(self.dtype.type, np.floating): - value = float(value) values = self.values if inplace else self.values.copy() - return [self.make_block_same_class(values=values.get_values(value), - fill_value=value, + values = values.fillna(value, downcast=downcast) + return [self.make_block_same_class(values=values, placement=self.mgr_locs)] def shift(self, periods, axis=0, mgr=None): @@ -3843,11 +3841,7 @@ def reindex(self, new_axis, indexer=None, method=None, fill_value=None, indexer = self.items.get_indexer_for(new_axis) if fill_value is None: - # FIXME: is fill_value used correctly in sparse blocks? - if not self._block.is_sparse: - fill_value = self._block.fill_value - else: - fill_value = np.nan + fill_value = np.nan new_values = algos.take_1d(values, indexer, fill_value=fill_value) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 0e8fe97c2e497..644b6720dfaac 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -3036,7 +3036,7 @@ def duplicated(self, keep='first'): Returns ------- - filled : Index + filled : %(klass)s """ @Appender(_index_shared_docs['fillna']) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 602098be2901b..92eb2a9230c3b 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -271,15 +271,7 @@ def to_dense(self, fill=None): """ Convert SparseSeries to (dense) Series """ - values = self.values - - # fill the nans - if fill is None: - fill = self.fill_value - if not com.isnull(fill): - values[com.isnull(values)] = fill - - return values + return self.values def __iter__(self): for i in range(len(self)): @@ -444,6 +436,23 @@ def _valid_sp_values(self): mask = np.isfinite(sp_vals) return sp_vals[mask] + @Appender(_index_shared_docs['fillna'] % _sparray_doc_kwargs) + def fillna(self, value, downcast=None): + if downcast is not None: + raise NotImplementedError + + if issubclass(self.dtype.type, np.floating): + value = float(value) + + if self._null_fill_value: + return self._simple_new(self.sp_values, self.sp_index, + fill_value=value) + else: + new_values = self.sp_values.copy() + new_values[com.isnull(new_values)] = value + return self._simple_new(new_values, self.sp_index, + fill_value=self.fill_value) + def sum(self, axis=None, dtype=None, out=None): """ Sum of non-NA/null values diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 11947d780ad88..dc18eaa0f9bb7 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -119,23 +119,7 @@ def __init__(self, data=None, index=None, columns=None, default_kind=None, @property def _constructor(self): - def wrapper(data=None, index=None, columns=None, - default_fill_value=None, kind=None, fill_value=None, - copy=False): - result = SparseDataFrame(data, index=index, columns=columns, - default_fill_value=fill_value, - default_kind=kind, copy=copy) - - # fill if requested - if fill_value is not None and not isnull(fill_value): - result.fillna(fill_value, inplace=True) - - # set the default_fill_value - # if default_fill_value is not None: - # result._default_fill_value = default_fill_value - return result - - return wrapper + return SparseDataFrame _constructor_sliced = SparseSeries @@ -452,8 +436,8 @@ def _combine_frame(self, other, func, fill_value=None, level=None): return self._constructor(data=new_data, index=new_index, columns=new_columns, - default_fill_value=new_fill_value, - fill_value=new_fill_value).__finalize__(self) + default_fill_value=new_fill_value + ).__finalize__(self) def _combine_match_index(self, other, func, level=None, fill_value=None): new_data = {} @@ -483,8 +467,7 @@ def _combine_match_index(self, other, func, level=None, fill_value=None): return self._constructor( new_data, index=new_index, columns=self.columns, - default_fill_value=fill_value, - fill_value=self.default_fill_value).__finalize__(self) + default_fill_value=fill_value).__finalize__(self) def _combine_match_columns(self, other, func, level=None, fill_value=None): # patched version of DataFrame._combine_match_columns to account for @@ -510,8 +493,7 @@ def _combine_match_columns(self, other, func, level=None, fill_value=None): return self._constructor( new_data, index=self.index, columns=union, - default_fill_value=self.default_fill_value, - fill_value=self.default_fill_value).__finalize__(self) + default_fill_value=self.default_fill_value).__finalize__(self) def _combine_const(self, other, func): new_data = {} @@ -520,8 +502,7 @@ def _combine_const(self, other, func): return self._constructor( data=new_data, index=self.index, columns=self.columns, - default_fill_value=self.default_fill_value, - fill_value=self.default_fill_value).__finalize__(self) + default_fill_value=self.default_fill_value).__finalize__(self) def _reindex_index(self, index, method, copy, level, fill_value=np.nan, limit=None, takeable=False): @@ -715,7 +696,7 @@ def apply(self, func, axis=0, broadcast=False, reduce=False): return self._constructor( new_series, index=self.index, columns=self.columns, default_fill_value=self._default_fill_value, - kind=self._default_kind).__finalize__(self) + default_kind=self._default_kind).__finalize__(self) else: if not broadcast: return self._apply_standard(func, axis, reduce=reduce) diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index 076fa71bdd68c..b3d30fe272d71 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -366,6 +366,28 @@ def test_values_asarray(self): assert_almost_equal(self.arr.to_dense(), self.arr_data) assert_almost_equal(self.arr.sp_values, np.asarray(self.arr)) + def test_to_dense(self): + vals = np.array([1, np.nan, np.nan, 3, np.nan]) + res = SparseArray(vals).to_dense() + tm.assert_numpy_array_equal(res, vals) + + res = SparseArray(vals, fill_value=0).to_dense() + tm.assert_numpy_array_equal(res, vals) + + vals = np.array([1, np.nan, 0, 3, 0]) + res = SparseArray(vals).to_dense() + tm.assert_numpy_array_equal(res, vals) + + res = SparseArray(vals, fill_value=0).to_dense() + tm.assert_numpy_array_equal(res, vals) + + vals = np.array([np.nan, np.nan, np.nan, np.nan, np.nan]) + res = SparseArray(vals).to_dense() + tm.assert_numpy_array_equal(res, vals) + + res = SparseArray(vals, fill_value=0).to_dense() + tm.assert_numpy_array_equal(res, vals) + def test_getitem(self): def _checkit(i): assert_almost_equal(self.arr[i], self.arr.values[i]) @@ -466,6 +488,60 @@ def test_generator_warnings(self): pass assert len(w) == 0 + def test_fillna(self): + s = SparseArray([1, np.nan, np.nan, 3, np.nan]) + res = s.fillna(-1) + exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0) + res = s.fillna(-1) + exp = SparseArray([1, -1, -1, 3, -1], fill_value=0) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([1, np.nan, 0, 3, 0]) + res = s.fillna(-1) + exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0) + res = s.fillna(-1) + exp = SparseArray([1, -1, 0, 3, 0], fill_value=0) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([np.nan, np.nan, np.nan, np.nan]) + res = s.fillna(-1) + exp = SparseArray([-1, -1, -1, -1], fill_value=-1) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0) + res = s.fillna(-1) + exp = SparseArray([-1, -1, -1, -1], fill_value=0) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([0, 0, 0, 0]) + res = s.fillna(-1) + exp = SparseArray([0, 0, 0, 0], fill_value=-1) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([0, 0, 0, 0], fill_value=0) + res = s.fillna(-1) + exp = SparseArray([0, 0, 0, 0], fill_value=0) + tm.assert_sp_array_equal(res, exp) + + def test_fillna_overlap(self): + s = SparseArray([1, np.nan, np.nan, 3, np.nan]) + # filling with existing value doesn't replace existing value with + # fill_value, i.e. existing 3 remains in sp_values + res = s.fillna(3) + exp = np.array([1, 3, 3, 3, 3]) + tm.assert_numpy_array_equal(res.to_dense(), exp) + + s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0) + res = s.fillna(3) + exp = SparseArray([1, 3, 3, 3, 3], fill_value=0) + tm.assert_sp_array_equal(res, exp) + if __name__ == '__main__': import nose diff --git a/pandas/sparse/tests/test_frame.py b/pandas/sparse/tests/test_frame.py index c3778426990b9..90e98aff6028a 100644 --- a/pandas/sparse/tests/test_frame.py +++ b/pandas/sparse/tests/test_frame.py @@ -5,15 +5,13 @@ import nose # noqa from numpy import nan import numpy as np - -from pandas.util.testing import (assert_series_equal, assert_frame_equal, - assertRaisesRegexp) +import pandas as pd from pandas import Series, DataFrame, bdate_range, Panel from pandas.tseries.index import DatetimeIndex import pandas.core.datetools as datetools import pandas.util.testing as tm -from pandas.compat import StringIO, lrange +from pandas.compat import lrange from pandas import compat import pandas.sparse.frame as spf @@ -23,6 +21,7 @@ class TestSparseDataFrame(tm.TestCase, SharedWithSparse): + klass = SparseDataFrame _multiprocess_can_split_ = True @@ -35,6 +34,9 @@ def setUp(self): self.dates = bdate_range('1/1/2011', periods=10) + self.orig = pd.DataFrame(self.data, index=self.dates) + self.iorig = pd.DataFrame(self.data, index=self.dates) + self.frame = SparseDataFrame(self.data, index=self.dates) self.iframe = SparseDataFrame(self.data, index=self.dates, default_kind='integer') @@ -42,11 +44,16 @@ def setUp(self): values = self.frame.values.copy() values[np.isnan(values)] = 0 + self.zorig = pd.DataFrame(values, columns=['A', 'B', 'C', 'D'], + index=self.dates) self.zframe = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], default_fill_value=0, index=self.dates) values = self.frame.values.copy() values[np.isnan(values)] = 2 + + self.fill_orig = pd.DataFrame(values, columns=['A', 'B', 'C', 'D'], + index=self.dates) self.fill_frame = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], default_fill_value=2, index=self.dates) @@ -127,10 +134,11 @@ def test_constructor_ndarray(self): level=1) # wrong length index / columns - assertRaisesRegexp(ValueError, "^Index length", SparseDataFrame, - self.frame.values, index=self.frame.index[:-1]) - assertRaisesRegexp(ValueError, "^Column length", SparseDataFrame, - self.frame.values, columns=self.frame.columns[:-1]) + with tm.assertRaisesRegexp(ValueError, "^Index length"): + SparseDataFrame(self.frame.values, index=self.frame.index[:-1]) + + with tm.assertRaisesRegexp(ValueError, "^Column length"): + SparseDataFrame(self.frame.values, columns=self.frame.columns[:-1]) # GH 9272 def test_constructor_empty(self): @@ -180,7 +188,7 @@ def test_dtypes(self): result = sdf.get_dtype_counts() expected = Series({'float64': 4}) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_shape(self): # GH 10452 @@ -199,14 +207,16 @@ def test_str(self): def test_array_interface(self): res = np.sqrt(self.frame) dres = np.sqrt(self.frame.to_dense()) - assert_frame_equal(res.to_dense(), dres) + tm.assert_frame_equal(res.to_dense(), dres) def test_pickle(self): - def _test_roundtrip(frame): + + def _test_roundtrip(frame, orig): result = self.round_trip_pickle(frame) tm.assert_sp_frame_equal(frame, result) + tm.assert_frame_equal(result.to_dense(), orig, check_dtype=False) - _test_roundtrip(SparseDataFrame()) + _test_roundtrip(SparseDataFrame(), DataFrame()) self._check_all(_test_roundtrip) def test_dense_to_sparse(self): @@ -242,52 +252,24 @@ def test_sparse_to_dense(self): pass def test_sparse_series_ops(self): - import sys - buf = StringIO() - tmp = sys.stderr - sys.stderr = buf - try: - self._check_frame_ops(self.frame) - finally: - sys.stderr = tmp + self._check_frame_ops(self.frame) def test_sparse_series_ops_i(self): - import sys - buf = StringIO() - tmp = sys.stderr - sys.stderr = buf - try: - self._check_frame_ops(self.iframe) - finally: - sys.stderr = tmp + self._check_frame_ops(self.iframe) def test_sparse_series_ops_z(self): - import sys - buf = StringIO() - tmp = sys.stderr - sys.stderr = buf - try: - self._check_frame_ops(self.zframe) - finally: - sys.stderr = tmp + self._check_frame_ops(self.zframe) def test_sparse_series_ops_fill(self): - import sys - buf = StringIO() - tmp = sys.stderr - sys.stderr = buf - try: - self._check_frame_ops(self.fill_frame) - finally: - sys.stderr = tmp + self._check_frame_ops(self.fill_frame) def _check_frame_ops(self, frame): - fill = frame.default_fill_value def _compare_to_dense(a, b, da, db, op): sparse_result = op(a, b) dense_result = op(da, db) + fill = sparse_result.default_fill_value dense_result = dense_result.to_sparse(fill_value=fill) tm.assert_sp_frame_equal(sparse_result, dense_result, exact_indices=False) @@ -341,10 +323,10 @@ def test_op_corners(self): foo = self.frame + self.empty tm.assertIsInstance(foo.index, DatetimeIndex) - assert_frame_equal(foo, self.frame * np.nan) + tm.assert_frame_equal(foo, self.frame * np.nan) foo = self.empty + self.frame - assert_frame_equal(foo, self.frame * np.nan) + tm.assert_frame_equal(foo, self.frame * np.nan) def test_scalar_ops(self): pass @@ -421,7 +403,8 @@ def test_getitem_overload(self): self.assertRaises(Exception, self.frame.__getitem__, indexer[:-1]) def test_setitem(self): - def _check_frame(frame): + + def _check_frame(frame, orig): N = len(frame) # insert SparseSeries @@ -433,10 +416,9 @@ def _check_frame(frame): # insert SparseSeries differently-indexed to_insert = frame['A'][::2] frame['E'] = to_insert - expected = to_insert.to_dense().reindex(frame.index).fillna( - to_insert.fill_value) + expected = to_insert.to_dense().reindex(frame.index) result = frame['E'].to_dense() - assert_series_equal(result, expected, check_names=False) + tm.assert_series_equal(result, expected, check_names=False) self.assertEqual(result.name, 'E') # insert Series @@ -448,10 +430,9 @@ def _check_frame(frame): # insert Series differently-indexed to_insert = frame['A'].to_dense()[::2] frame['G'] = to_insert - expected = to_insert.reindex(frame.index).fillna( - frame.default_fill_value) + expected = to_insert.reindex(frame.index) expected.name = 'G' - assert_series_equal(frame['G'].to_dense(), expected) + tm.assert_series_equal(frame['G'].to_dense(), expected) # insert ndarray frame['H'] = np.random.randn(N) @@ -543,15 +524,16 @@ def test_apply(self): # agg / broadcast broadcasted = self.frame.apply(np.sum, broadcast=True) tm.assertIsInstance(broadcasted, SparseDataFrame) - assert_frame_equal(broadcasted.to_dense(), - self.frame.to_dense().apply(np.sum, broadcast=True)) + + exp = self.frame.to_dense().apply(np.sum, broadcast=True) + tm.assert_frame_equal(broadcasted.to_dense(), exp) self.assertIs(self.empty.apply(np.sqrt), self.empty) from pandas.core import nanops applied = self.frame.apply(np.sum) - assert_series_equal(applied, - self.frame.to_dense().apply(nanops.nansum)) + tm.assert_series_equal(applied, + self.frame.to_dense().apply(nanops.nansum)) def test_apply_nonuq(self): df_orig = DataFrame( @@ -559,7 +541,7 @@ def test_apply_nonuq(self): df = df_orig.to_sparse() rs = df.apply(lambda s: s[0], axis=1) xp = Series([1., 4., 7.], ['a', 'a', 'c']) - assert_series_equal(rs, xp) + tm.assert_series_equal(rs, xp) # df.T breaks df = df_orig.T.to_sparse() @@ -577,19 +559,41 @@ def test_astype(self): def test_fillna(self): df = self.zframe.reindex(lrange(5)) + dense = self.zorig.reindex(lrange(5)) + result = df.fillna(0) - expected = df.to_dense().fillna(0).to_sparse(fill_value=0) - tm.assert_sp_frame_equal(result, expected, exact_indices=False) + expected = dense.fillna(0) + tm.assert_sp_frame_equal(result, expected.to_sparse(fill_value=0), + exact_indices=False) + tm.assert_frame_equal(result.to_dense(), expected) result = df.copy() result.fillna(0, inplace=True) - expected = df.to_dense().fillna(0).to_sparse(fill_value=0) - tm.assert_sp_frame_equal(result, expected, exact_indices=False) + expected = dense.fillna(0) + + tm.assert_sp_frame_equal(result, expected.to_sparse(fill_value=0), + exact_indices=False) + tm.assert_frame_equal(result.to_dense(), expected) result = df.copy() result = df['A'] result.fillna(0, inplace=True) - assert_series_equal(result, df['A'].fillna(0)) + + expected = dense['A'].fillna(0) + # this changes internal SparseArray repr + # tm.assert_sp_series_equal(result, expected.to_sparse(fill_value=0)) + tm.assert_series_equal(result.to_dense(), expected) + + def test_fillna_fill_value(self): + df = pd.DataFrame({'A': [1, 0, 0], 'B': [np.nan, np.nan, 4]}) + + sparse = pd.SparseDataFrame(df) + tm.assert_frame_equal(sparse.fillna(-1).to_dense(), + df.fillna(-1), check_dtype=False) + + sparse = pd.SparseDataFrame(df, default_fill_value=0) + tm.assert_frame_equal(sparse.fillna(-1).to_dense(), + df.fillna(-1), check_dtype=False) def test_rename(self): # just check this works @@ -598,7 +602,7 @@ def test_rename(self): def test_corr(self): res = self.frame.corr() - assert_frame_equal(res, self.frame.to_dense().corr()) + tm.assert_frame_equal(res, self.frame.to_dense().corr()) def test_describe(self): self.frame['foo'] = np.nan @@ -621,6 +625,7 @@ def test_join(self): np.random.randn(len(self.frame)), index=self.frame.index)) def test_reindex(self): + def _check_frame(frame): index = frame.index sidx = index[::2] @@ -628,15 +633,14 @@ def _check_frame(frame): sparse_result = frame.reindex(sidx) dense_result = frame.to_dense().reindex(sidx) - assert_frame_equal(sparse_result.to_dense(), dense_result) + tm.assert_frame_equal(sparse_result.to_dense(), dense_result) - assert_frame_equal(frame.reindex(list(sidx)).to_dense(), - dense_result) + tm.assert_frame_equal(frame.reindex(list(sidx)).to_dense(), + dense_result) sparse_result2 = sparse_result.reindex(index) - dense_result2 = dense_result.reindex(index).fillna( - frame.default_fill_value) - assert_frame_equal(sparse_result2.to_dense(), dense_result2) + dense_result2 = dense_result.reindex(index) + tm.assert_frame_equal(sparse_result2.to_dense(), dense_result2) # propagate CORRECT fill value tm.assert_almost_equal(sparse_result.default_fill_value, @@ -679,9 +683,11 @@ def _check_frame(frame): def test_reindex_fill_value(self): rng = bdate_range('20110110', periods=20) + result = self.zframe.reindex(rng, fill_value=0) - expected = self.zframe.reindex(rng).fillna(0) - tm.assert_sp_frame_equal(result, expected) + exp = self.zorig.reindex(rng, fill_value=0) + exp = exp.to_sparse(self.zframe.default_fill_value) + tm.assert_sp_frame_equal(result, exp) def test_take(self): result = self.frame.take([1, 0, 2], axis=1) @@ -689,9 +695,10 @@ def test_take(self): tm.assert_sp_frame_equal(result, expected) def test_to_dense(self): - def _check(frame): + def _check(frame, orig): dense_dm = frame.to_dense() - assert_frame_equal(frame, dense_dm) + tm.assert_frame_equal(frame, dense_dm) + tm.assert_frame_equal(dense_dm, orig, check_dtype=False) self._check_all(_check) @@ -715,7 +722,8 @@ def _check(frame): self.assertRaises(Exception, _check, self.fill_frame) def test_transpose(self): - def _check(frame): + + def _check(frame, orig): transposed = frame.T untransposed = transposed.T tm.assert_sp_frame_equal(frame, untransposed) @@ -723,46 +731,55 @@ def _check(frame): self._check_all(_check) def test_shift(self): - def _check(frame): - shifted = frame.shift(0) - tm.assert_sp_frame_equal(shifted, frame) - - f = lambda s: s.shift(1) - _dense_frame_compare(frame, f) - - f = lambda s: s.shift(-2) - _dense_frame_compare(frame, f) - f = lambda s: s.shift(2, freq='B') - _dense_frame_compare(frame, f) + def _check(frame, orig): - f = lambda s: s.shift(2, freq=datetools.bday) - _dense_frame_compare(frame, f) + shifted = frame.shift(0) + exp = orig.shift(0) + # int is coerced to float dtype + tm.assert_frame_equal(shifted.to_dense(), exp, check_dtype=False) + shifted = frame.shift(1) + exp = orig.shift(1) + tm.assert_frame_equal(shifted, exp) + + shifted = frame.shift(-2) + exp = orig.shift(-2) + tm.assert_frame_equal(shifted, exp) + + shifted = frame.shift(2, freq='B') + exp = orig.shift(2, freq='B') + exp = exp.to_sparse(frame.default_fill_value) + tm.assert_frame_equal(shifted, exp) + + shifted = frame.shift(2, freq=datetools.bday) + exp = orig.shift(2, freq=datetools.bday) + exp = exp.to_sparse(frame.default_fill_value) + tm.assert_frame_equal(shifted, exp) self._check_all(_check) def test_count(self): result = self.frame.count() dense_result = self.frame.to_dense().count() - assert_series_equal(result, dense_result) + tm.assert_series_equal(result, dense_result) result = self.frame.count(1) dense_result = self.frame.to_dense().count(1) # win32 don't check dtype - assert_series_equal(result, dense_result, check_dtype=False) + tm.assert_series_equal(result, dense_result, check_dtype=False) def test_cumsum(self): result = self.frame.cumsum() expected = self.frame.to_dense().cumsum() tm.assertIsInstance(result, SparseDataFrame) - assert_frame_equal(result.to_dense(), expected) + tm.assert_frame_equal(result.to_dense(), expected) def _check_all(self, check_func): - check_func(self.frame) - check_func(self.iframe) - check_func(self.zframe) - check_func(self.fill_frame) + check_func(self.frame, self.orig) + check_func(self.iframe, self.iorig) + check_func(self.zframe, self.zorig) + check_func(self.fill_frame, self.fill_orig) def test_combine_first(self): df = self.frame @@ -790,7 +807,7 @@ def test_isin(self): sparse_df = DataFrame({'flag': [1., 0., 1.]}).to_sparse(fill_value=0.) xp = sparse_df[sparse_df.flag == 1.] rs = sparse_df[sparse_df.flag.isin([1.])] - assert_frame_equal(xp, rs) + tm.assert_frame_equal(xp, rs) def test_sparse_pow_issue(self): # 2220 @@ -813,7 +830,7 @@ def test_as_blocks(self): df_blocks = df.blocks self.assertEqual(list(df_blocks.keys()), ['float64']) - assert_frame_equal(df_blocks['float64'], df) + tm.assert_frame_equal(df_blocks['float64'], df) def test_nan_columnname(self): # GH 8822 @@ -822,13 +839,6 @@ def test_nan_columnname(self): self.assertTrue(np.isnan(nan_colname_sparse.columns[0])) -def _dense_frame_compare(frame, f): - result = f(frame) - assert (isinstance(frame, SparseDataFrame)) - dense_result = f(frame.to_dense()).fillna(frame.default_fill_value) - assert_frame_equal(result.to_dense(), dense_result) - - if __name__ == '__main__': import nose # noqa nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/sparse/tests/test_indexing.py b/pandas/sparse/tests/test_indexing.py index fb89d4486b890..17c84129b6b46 100644 --- a/pandas/sparse/tests/test_indexing.py +++ b/pandas/sparse/tests/test_indexing.py @@ -255,6 +255,59 @@ def test_take_fill_value(self): exp = orig.take([-1, -2]).to_sparse(fill_value=0) tm.assert_sp_series_equal(sparse.take([-1, -2]), exp) + def test_reindex(self): + orig = pd.Series([1, np.nan, np.nan, 3, np.nan], + index=list('ABCDE')) + sparse = orig.to_sparse() + + res = sparse.reindex(['A', 'E', 'C', 'D']) + exp = orig.reindex(['A', 'E', 'C', 'D']).to_sparse() + tm.assert_sp_series_equal(res, exp) + + # all missing & fill_value + res = sparse.reindex(['B', 'E', 'C']) + exp = orig.reindex(['B', 'E', 'C']).to_sparse() + tm.assert_sp_series_equal(res, exp) + + orig = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan], + index=list('ABCDE')) + sparse = orig.to_sparse() + + res = sparse.reindex(['A', 'E', 'C', 'D']) + exp = orig.reindex(['A', 'E', 'C', 'D']).to_sparse() + tm.assert_sp_series_equal(res, exp) + + def test_reindex_fill_value(self): + orig = pd.Series([1, np.nan, 0, 3, 0], index=list('ABCDE')) + sparse = orig.to_sparse(fill_value=0) + + res = sparse.reindex(['A', 'E', 'C', 'D']) + exp = orig.reindex(['A', 'E', 'C', 'D']).to_sparse(fill_value=0) + tm.assert_sp_series_equal(res, exp) + + # includes missing and fill_value + res = sparse.reindex(['A', 'B', 'C']) + exp = orig.reindex(['A', 'B', 'C']).to_sparse(fill_value=0) + tm.assert_sp_series_equal(res, exp) + + # all missing + orig = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan], + index=list('ABCDE')) + sparse = orig.to_sparse(fill_value=0) + + res = sparse.reindex(['A', 'E', 'C', 'D']) + exp = orig.reindex(['A', 'E', 'C', 'D']).to_sparse(fill_value=0) + tm.assert_sp_series_equal(res, exp) + + # all fill_value + orig = pd.Series([0., 0., 0., 0., 0.], + index=list('ABCDE')) + sparse = orig.to_sparse(fill_value=0) + + res = sparse.reindex(['A', 'E', 'C', 'D']) + exp = orig.reindex(['A', 'E', 'C', 'D']).to_sparse(fill_value=0) + tm.assert_sp_series_equal(res, exp) + class TestSparseDataFrameIndexing(tm.TestCase): diff --git a/pandas/sparse/tests/test_panel.py b/pandas/sparse/tests/test_panel.py index 90d2f0b30ff71..89a90f5be40e6 100644 --- a/pandas/sparse/tests/test_panel.py +++ b/pandas/sparse/tests/test_panel.py @@ -4,7 +4,6 @@ from numpy import nan import pandas as pd -from pandas.util.testing import assert_frame_equal, assert_panel_equal from pandas import DataFrame, bdate_range, Panel from pandas.core.index import Index import pandas.util.testing as tm @@ -49,10 +48,6 @@ class TestSparsePanel(tm.TestCase, test_panel.SafeForLongAndSparse, test_panel.SafeForSparse): _multiprocess_can_split_ = True - @classmethod - def assert_panel_equal(cls, x, y): - tm.assert_sp_panel_equal(x, y) - def setUp(self): self.data_dict = { 'ItemA': panel_data1(), @@ -115,7 +110,7 @@ def test_dense_to_sparse(self): def test_to_dense(self): dwp = self.panel.to_dense() dwp2 = Panel.from_dict(self.data_dict) - assert_panel_equal(dwp, dwp2) + tm.assert_panel_equal(dwp, dwp2) def test_to_frame(self): @@ -191,7 +186,7 @@ def _compare_with_dense(swp, items, major, minor): swp_re = swp.reindex(items=items, major=major, minor=minor) dwp_re = swp.to_dense().reindex(items=items, major=major, minor=minor) - assert_panel_equal(swp_re.to_dense(), dwp_re) + tm.assert_panel_equal(swp_re.to_dense(), dwp_re) _compare_with_dense(self.panel, self.panel.items[:2], self.panel.major_axis[::2], @@ -218,14 +213,15 @@ def _dense_comp(op): dense = panel.to_dense() sparse_result = op(panel) dense_result = op(dense) - assert_panel_equal(sparse_result.to_dense(), dense_result) + tm.assert_panel_equal(sparse_result.to_dense(), + dense_result) def _mixed_comp(op): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = op(panel, panel.to_dense()) expected = op(panel.to_dense(), panel.to_dense()) - assert_panel_equal(result, expected) + tm.assert_panel_equal(result, expected) op1 = lambda x: x + 2 @@ -255,7 +251,7 @@ def _dense_comp(sparse): for idx in sparse.major_axis: dslice = dense.major_xs(idx) sslice = sparse.major_xs(idx) - assert_frame_equal(dslice, sslice) + tm.assert_frame_equal(dslice, sslice) _dense_comp(self.panel) @@ -266,7 +262,7 @@ def _dense_comp(sparse): for idx in sparse.minor_axis: dslice = dense.minor_xs(idx) sslice = sparse.minor_xs(idx).to_dense() - assert_frame_equal(dslice, sslice) + tm.assert_frame_equal(dslice, sslice) _dense_comp(self.panel) diff --git a/pandas/sparse/tests/test_series.py b/pandas/sparse/tests/test_series.py index 3a7c96c219959..3d297ba55297c 100644 --- a/pandas/sparse/tests/test_series.py +++ b/pandas/sparse/tests/test_series.py @@ -6,11 +6,6 @@ from numpy import nan import numpy as np import pandas as pd - -from pandas.util.testing import (assert_almost_equal, assert_series_equal, - assert_index_equal, assert_frame_equal, - assertRaisesRegexp, - assert_numpy_array_equal) from numpy.testing import assert_equal from pandas import Series, DataFrame, bdate_range @@ -124,7 +119,7 @@ def test_construct_DataFrame_with_sp_series(self): # blocking expected = Series({'col': 'float64:sparse'}) result = df.ftypes - assert_series_equal(expected, result) + tm.assert_series_equal(expected, result) def test_series_density(self): # GH2803 @@ -152,6 +147,29 @@ def test_sparse_to_dense(self): series = self.ziseries.to_dense() assert_equal(series, arr) + def test_to_dense_fill_value(self): + s = pd.Series([1, np.nan, np.nan, 3, np.nan]) + res = SparseSeries(s).to_dense() + tm.assert_series_equal(res, s) + + res = SparseSeries(s, fill_value=0).to_dense() + tm.assert_series_equal(res, s) + + s = pd.Series([1, np.nan, 0, 3, 0]) + res = SparseSeries(s, fill_value=0).to_dense() + tm.assert_series_equal(res, s) + + res = SparseSeries(s, fill_value=0).to_dense() + tm.assert_series_equal(res, s) + + s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan]) + res = SparseSeries(s).to_dense() + tm.assert_series_equal(res, s) + + s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan]) + res = SparseSeries(s, fill_value=0).to_dense() + tm.assert_series_equal(res, s) + def test_dense_to_sparse(self): series = self.bseries.to_dense() bseries = series.to_sparse(kind='block') @@ -332,10 +350,10 @@ def _check_all(self, check_func): def test_getitem(self): def _check_getitem(sp, dense): for idx, val in compat.iteritems(dense): - assert_almost_equal(val, sp[idx]) + tm.assert_almost_equal(val, sp[idx]) for i in range(len(dense)): - assert_almost_equal(sp[i], dense[i]) + tm.assert_almost_equal(sp[i], dense[i]) # j = np.float64(i) # assert_almost_equal(sp[j], dense[j]) @@ -360,15 +378,15 @@ def _check_getitem(sp, dense): self.btseries.index[-1] + BDay()) def test_get_get_value(self): - assert_almost_equal(self.bseries.get(10), self.bseries[10]) + tm.assert_almost_equal(self.bseries.get(10), self.bseries[10]) self.assertIsNone(self.bseries.get(len(self.bseries) + 1)) dt = self.btseries.index[10] result = self.btseries.get(dt) expected = self.btseries.to_dense()[dt] - assert_almost_equal(result, expected) + tm.assert_almost_equal(result, expected) - assert_almost_equal(self.bseries.get_value(10), self.bseries[10]) + tm.assert_almost_equal(self.bseries.get_value(10), self.bseries[10]) def test_set_value(self): @@ -407,7 +425,8 @@ def _compare(idx): dense_result = dense.take(idx).values sparse_result = sp.take(idx) self.assertIsInstance(sparse_result, SparseSeries) - assert_almost_equal(dense_result, sparse_result.values.values) + tm.assert_almost_equal(dense_result, + sparse_result.values.values) _compare([1., 2., 3., 4., 5., 0.]) _compare([7, 2, 9, 0, 4]) @@ -429,9 +448,9 @@ def test_setitem(self): def test_setslice(self): self.bseries[5:10] = 7. - assert_series_equal(self.bseries[5:10].to_dense(), - Series(7., index=range(5, 10), - name=self.bseries.name)) + tm.assert_series_equal(self.bseries[5:10].to_dense(), + Series(7., index=range(5, 10), + name=self.bseries.name)) def test_operators(self): def _check_op(a, b, op): @@ -439,7 +458,7 @@ def _check_op(a, b, op): adense = a.to_dense() if isinstance(a, SparseSeries) else a bdense = b.to_dense() if isinstance(b, SparseSeries) else b dense_result = op(adense, bdense) - assert_almost_equal(sp_result.to_dense(), dense_result) + tm.assert_almost_equal(sp_result.to_dense(), dense_result) def check(a, b): _check_op(a, b, operator.add) @@ -564,7 +583,7 @@ def _check(values, index1, index2, fill_value): expected = Series(values, index=int_indices1) expected = expected.reindex(int_indices2).fillna(fill_value) - assert_almost_equal(expected.values, reindexed.sp_values) + tm.assert_almost_equal(expected.values, reindexed.sp_values) # make sure level argument asserts # TODO: expected is not used anywhere...remove? @@ -657,7 +676,7 @@ def test_dropna(self): expected = sp.to_dense().valid() expected = expected[expected != 0] - assert_almost_equal(sp_valid.values, expected.values) + tm.assert_almost_equal(sp_valid.values, expected.values) self.assertTrue(sp_valid.index.equals(expected.index)) self.assertEqual(len(sp_valid.sp_values), 2) @@ -689,7 +708,8 @@ def _check_matches(indices, expected): # must have NaN fill value data = {'a': SparseSeries(np.arange(7), sparse_index=expected2, fill_value=0)} - assertRaisesRegexp(TypeError, "NaN fill value", spf.homogenize, data) + with tm.assertRaisesRegexp(TypeError, "NaN fill value"): + spf.homogenize(data) def test_fill_value_corner(self): cop = self.zbseries.copy() @@ -729,12 +749,12 @@ def test_cumsum(self): expected = self.bseries.to_dense().cumsum() tm.assertIsInstance(result, SparseSeries) self.assertEqual(result.name, self.bseries.name) - assert_series_equal(result.to_dense(), expected) + tm.assert_series_equal(result.to_dense(), expected) result = self.zbseries.cumsum() expected = self.zbseries.to_dense().cumsum() tm.assertIsInstance(result, Series) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_combine_first(self): s = self.bseries @@ -762,15 +782,16 @@ def setUp(self): def test_to_sparse_preserve_multiindex_names_columns(self): sparse_multiindex_frame = self.dense_multiindex_frame.to_sparse() sparse_multiindex_frame = sparse_multiindex_frame.copy() - assert_index_equal(sparse_multiindex_frame.columns, - self.dense_multiindex_frame.columns) + tm.assert_index_equal(sparse_multiindex_frame.columns, + self.dense_multiindex_frame.columns) def test_round_trip_preserve_multiindex_names(self): sparse_multiindex_frame = self.dense_multiindex_frame.to_sparse() round_trip_multiindex_frame = sparse_multiindex_frame.to_dense() - assert_frame_equal(self.dense_multiindex_frame, - round_trip_multiindex_frame, check_column_type=True, - check_names=True) + tm.assert_frame_equal(self.dense_multiindex_frame, + round_trip_multiindex_frame, + check_column_type=True, + check_names=True) class TestSparseSeriesScipyInteraction(tm.TestCase): @@ -898,7 +919,7 @@ def _check_results_to_coo(results, check): (A, il, jl) = results (A_result, il_result, jl_result) = check # convert to dense and compare - assert_numpy_array_equal(A.todense(), A_result.todense()) + tm.assert_numpy_array_equal(A.todense(), A_result.todense()) # or compare directly as difference of sparse # assert(abs(A - A_result).max() < 1e-12) # max is failing in python # 2.6 @@ -910,7 +931,7 @@ def _dense_series_compare(s, f): result = f(s) assert (isinstance(result, SparseSeries)) dense_result = f(s.to_dense()) - assert_series_equal(result.to_dense(), dense_result) + tm.assert_series_equal(result.to_dense(), dense_result) if __name__ == '__main__':