diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 78a19029db567..35f2f566ef85e 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -24,8 +24,6 @@ import pandas.util.testing as tm -from pandas.tests.frame.common import TestData - class SharedWithSparse(object): """ @@ -43,57 +41,57 @@ def _assert_series_equal(self, left, right): """Dispatch to series class dependent assertion""" raise NotImplementedError - def test_copy_index_name_checking(self): + def test_copy_index_name_checking(self, float_frame): # don't want to be able to modify the index stored elsewhere after # making a copy for attr in ('index', 'columns'): - ind = getattr(self.frame, attr) + ind = getattr(float_frame, attr) ind.name = None - cp = self.frame.copy() + cp = float_frame.copy() getattr(cp, attr).name = 'foo' - assert getattr(self.frame, attr).name is None + assert getattr(float_frame, attr).name is None - def test_getitem_pop_assign_name(self): - s = self.frame['A'] + def test_getitem_pop_assign_name(self, float_frame): + s = float_frame['A'] assert s.name == 'A' - s = self.frame.pop('A') + s = float_frame.pop('A') assert s.name == 'A' - s = self.frame.loc[:, 'B'] + s = float_frame.loc[:, 'B'] assert s.name == 'B' s2 = s.loc[:] assert s2.name == 'B' - def test_get_value(self): - for idx in self.frame.index: - for col in self.frame.columns: + def test_get_value(self, float_frame): + for idx in float_frame.index: + for col in float_frame.columns: with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = self.frame.get_value(idx, col) - expected = self.frame[col][idx] + result = float_frame.get_value(idx, col) + expected = float_frame[col][idx] tm.assert_almost_equal(result, expected) - def test_add_prefix_suffix(self): - with_prefix = self.frame.add_prefix('foo#') - expected = pd.Index(['foo#%s' % c for c in self.frame.columns]) + def test_add_prefix_suffix(self, float_frame): + with_prefix = float_frame.add_prefix('foo#') + expected = pd.Index(['foo#%s' % c for c in float_frame.columns]) tm.assert_index_equal(with_prefix.columns, expected) - with_suffix = self.frame.add_suffix('#foo') - expected = pd.Index(['%s#foo' % c for c in self.frame.columns]) + with_suffix = float_frame.add_suffix('#foo') + expected = pd.Index(['%s#foo' % c for c in float_frame.columns]) tm.assert_index_equal(with_suffix.columns, expected) - with_pct_prefix = self.frame.add_prefix('%') - expected = pd.Index(['%{}'.format(c) for c in self.frame.columns]) + with_pct_prefix = float_frame.add_prefix('%') + expected = pd.Index(['%{}'.format(c) for c in float_frame.columns]) tm.assert_index_equal(with_pct_prefix.columns, expected) - with_pct_suffix = self.frame.add_suffix('%') - expected = pd.Index(['{}%'.format(c) for c in self.frame.columns]) + with_pct_suffix = float_frame.add_suffix('%') + expected = pd.Index(['{}%'.format(c) for c in float_frame.columns]) tm.assert_index_equal(with_pct_suffix.columns, expected) - def test_get_axis(self): - f = self.frame + def test_get_axis(self, float_frame): + f = float_frame assert f._get_axis_number(0) == 0 assert f._get_axis_number(1) == 1 assert f._get_axis_number('index') == 0 @@ -118,13 +116,13 @@ def test_get_axis(self): tm.assert_raises_regex(ValueError, 'No axis named', f._get_axis_number, None) - def test_keys(self): - getkeys = self.frame.keys - assert getkeys() is self.frame.columns + def test_keys(self, float_frame): + getkeys = float_frame.keys + assert getkeys() is float_frame.columns - def test_column_contains_typeerror(self): + def test_column_contains_typeerror(self, float_frame): try: - self.frame.columns in self.frame + float_frame.columns in float_frame except TypeError: pass @@ -146,10 +144,10 @@ def test_tab_completion(self): assert key not in dir(df) assert isinstance(df.__getitem__('A'), pd.DataFrame) - def test_not_hashable(self): + def test_not_hashable(self, empty_frame): df = self.klass([1]) pytest.raises(TypeError, hash, df) - pytest.raises(TypeError, hash, self.empty) + pytest.raises(TypeError, hash, empty_frame) def test_new_empty_index(self): df1 = self.klass(randn(0, 3)) @@ -157,29 +155,29 @@ def test_new_empty_index(self): df1.index.name = 'foo' assert df2.index.name is None - def test_array_interface(self): + def test_array_interface(self, float_frame): with np.errstate(all='ignore'): - result = np.sqrt(self.frame) - assert isinstance(result, type(self.frame)) - assert result.index is self.frame.index - assert result.columns is self.frame.columns + result = np.sqrt(float_frame) + assert isinstance(result, type(float_frame)) + assert result.index is float_frame.index + assert result.columns is float_frame.columns - self._assert_frame_equal(result, self.frame.apply(np.sqrt)) + self._assert_frame_equal(result, float_frame.apply(np.sqrt)) - def test_get_agg_axis(self): - cols = self.frame._get_agg_axis(0) - assert cols is self.frame.columns + def test_get_agg_axis(self, float_frame): + cols = float_frame._get_agg_axis(0) + assert cols is float_frame.columns - idx = self.frame._get_agg_axis(1) - assert idx is self.frame.index + idx = float_frame._get_agg_axis(1) + assert idx is float_frame.index - pytest.raises(ValueError, self.frame._get_agg_axis, 2) + pytest.raises(ValueError, float_frame._get_agg_axis, 2) - def test_nonzero(self): - assert self.empty.empty + def test_nonzero(self, float_frame, float_string_frame, empty_frame): + assert empty_frame.empty - assert not self.frame.empty - assert not self.mixed_frame.empty + assert not float_frame.empty + assert not float_string_frame.empty # corner case df = DataFrame({'A': [1., 2., 3.], @@ -202,16 +200,16 @@ def test_items(self): assert isinstance(v, Series) assert (df[k] == v).all() - def test_iter(self): - assert tm.equalContents(list(self.frame), self.frame.columns) + def test_iter(self, float_frame): + assert tm.equalContents(list(float_frame), float_frame.columns) - def test_iterrows(self): - for k, v in self.frame.iterrows(): - exp = self.frame.loc[k] + def test_iterrows(self, float_frame, float_string_frame): + for k, v in float_frame.iterrows(): + exp = float_frame.loc[k] self._assert_series_equal(v, exp) - for k, v in self.mixed_frame.iterrows(): - exp = self.mixed_frame.loc[k] + for k, v in float_string_frame.iterrows(): + exp = float_string_frame.loc[k] self._assert_series_equal(v, exp) def test_iterrows_iso8601(self): @@ -226,11 +224,11 @@ def test_iterrows_iso8601(self): exp = s.loc[k] self._assert_series_equal(v, exp) - def test_itertuples(self): - for i, tup in enumerate(self.frame.itertuples()): + def test_itertuples(self, float_frame): + for i, tup in enumerate(float_frame.itertuples()): s = self.klass._constructor_sliced(tup[1:]) s.name = tup[0] - expected = self.frame.iloc[i, :].reset_index(drop=True) + expected = float_frame.iloc[i, :].reset_index(drop=True) self._assert_series_equal(s, expected) df = self.klass({'floats': np.random.randn(5), @@ -289,11 +287,11 @@ def test_sequence_like_with_categorical(self): for c, col in df.iteritems(): str(s) - def test_len(self): - assert len(self.frame) == len(self.frame.index) + def test_len(self, float_frame): + assert len(float_frame) == len(float_frame.index) - def test_values(self): - frame = self.frame + def test_values(self, float_frame, float_string_frame): + frame = float_frame arr = frame.values frame_cols = frame.columns @@ -306,20 +304,20 @@ def test_values(self): assert value == frame[col][i] # mixed type - arr = self.mixed_frame[['foo', 'A']].values + arr = float_string_frame[['foo', 'A']].values assert arr[0, 0] == 'bar' - df = self.klass({'real': [1, 2, 3], 'complex': [1j, 2j, 3j]}) + df = self.klass({'complex': [1j, 2j, 3j], 'real': [1, 2, 3]}) arr = df.values assert arr[0, 0] == 1j # single block corner case - arr = self.frame[['A', 'B']].values - expected = self.frame.reindex(columns=['A', 'B']).values + arr = float_frame[['A', 'B']].values + expected = float_frame.reindex(columns=['A', 'B']).values assert_almost_equal(arr, expected) - def test_transpose(self): - frame = self.frame + def test_transpose(self, float_frame): + frame = float_frame dft = frame.T for idx, series in compat.iteritems(dft): for col, value in compat.iteritems(series): @@ -343,8 +341,8 @@ def test_swapaxes(self): self._assert_frame_equal(df, df.swapaxes(0, 0)) pytest.raises(ValueError, df.swapaxes, 2, 5) - def test_axis_aliases(self): - f = self.frame + def test_axis_aliases(self, float_frame): + f = float_frame # reg name expected = f.sum(axis=0) @@ -361,23 +359,23 @@ def test_class_axis(self): assert pydoc.getdoc(DataFrame.index) assert pydoc.getdoc(DataFrame.columns) - def test_more_values(self): - values = self.mixed_frame.values - assert values.shape[1] == len(self.mixed_frame.columns) + def test_more_values(self, float_string_frame): + values = float_string_frame.values + assert values.shape[1] == len(float_string_frame.columns) - def test_repr_with_mi_nat(self): + def test_repr_with_mi_nat(self, float_string_frame): df = self.klass({'X': [1, 2]}, index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']]) res = repr(df) exp = ' X\nNaT a 1\n2013-01-01 b 2' assert res == exp - def test_iteritems_names(self): - for k, v in compat.iteritems(self.mixed_frame): + def test_iteritems_names(self, float_string_frame): + for k, v in compat.iteritems(float_string_frame): assert v.name == k - def test_series_put_names(self): - series = self.mixed_frame._series + def test_series_put_names(self, float_string_frame): + series = float_string_frame._series for k, v in compat.iteritems(series): assert v.name == k @@ -408,36 +406,37 @@ def test_with_datetimelikes(self): tm.assert_series_equal(result, expected) -class TestDataFrameMisc(SharedWithSparse, TestData): +class TestDataFrameMisc(SharedWithSparse): klass = DataFrame # SharedWithSparse tests use generic, klass-agnostic assertion _assert_frame_equal = staticmethod(assert_frame_equal) _assert_series_equal = staticmethod(assert_series_equal) - def test_values(self): - self.frame.values[:, 0] = 5. - assert (self.frame.values[:, 0] == 5).all() + def test_values(self, float_frame): + float_frame.values[:, 0] = 5. + assert (float_frame.values[:, 0] == 5).all() - def test_as_matrix_deprecated(self): + def test_as_matrix_deprecated(self, float_frame): # GH18458 with tm.assert_produces_warning(FutureWarning): - result = self.frame.as_matrix(columns=self.frame.columns.tolist()) - expected = self.frame.values + cols = float_frame.columns.tolist() + result = float_frame.as_matrix(columns=cols) + expected = float_frame.values tm.assert_numpy_array_equal(result, expected) - def test_deepcopy(self): - cp = deepcopy(self.frame) + def test_deepcopy(self, float_frame): + cp = deepcopy(float_frame) series = cp['A'] series[:] = 10 for idx, value in compat.iteritems(series): - assert self.frame['A'][idx] != value + assert float_frame['A'][idx] != value - def test_transpose_get_view(self): - dft = self.frame.T + def test_transpose_get_view(self, float_frame): + dft = float_frame.T dft.values[:, 5:10] = 5 - assert (self.frame.values[5:10] == 5).all() + assert (float_frame.values[5:10] == 5).all() def test_inplace_return_self(self): # re #1893 diff --git a/pandas/tests/sparse/frame/conftest.py b/pandas/tests/sparse/frame/conftest.py new file mode 100644 index 0000000000000..f36b4e643d10b --- /dev/null +++ b/pandas/tests/sparse/frame/conftest.py @@ -0,0 +1,116 @@ +import pytest + +import numpy as np + +from pandas import SparseDataFrame, SparseArray, DataFrame, bdate_range + +data = {'A': [np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6], + 'B': [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, 5, 6], + 'C': np.arange(10, dtype=np.float64), + 'D': [0, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan]} +dates = bdate_range('1/1/2011', periods=10) + + +# fixture names must be compatible with the tests in +# tests/frame/test_api.SharedWithSparse + +@pytest.fixture +def float_frame_dense(): + """ + Fixture for dense DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D']; some entries are missing + """ + return DataFrame(data, index=dates) + + +@pytest.fixture +def float_frame(): + """ + Fixture for sparse DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D']; some entries are missing + """ + # default_kind='block' is the default + return SparseDataFrame(data, index=dates, default_kind='block') + + +@pytest.fixture +def float_frame_int_kind(): + """ + Fixture for sparse DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D'] and default_kind='integer'. + Some entries are missing. + """ + return SparseDataFrame(data, index=dates, default_kind='integer') + + +@pytest.fixture +def float_string_frame(): + """ + Fixture for sparse DataFrame of floats and strings with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D', 'foo']; some entries are missing + """ + sdf = SparseDataFrame(data, index=dates) + sdf['foo'] = SparseArray(['bar'] * len(dates)) + return sdf + + +@pytest.fixture +def float_frame_fill0_dense(): + """ + Fixture for dense DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 0 + """ + values = SparseDataFrame(data).values + values[np.isnan(values)] = 0 + return DataFrame(values, columns=['A', 'B', 'C', 'D'], index=dates) + + +@pytest.fixture +def float_frame_fill0(): + """ + Fixture for sparse DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 0 + """ + values = SparseDataFrame(data).values + values[np.isnan(values)] = 0 + return SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], + default_fill_value=0, index=dates) + + +@pytest.fixture +def float_frame_fill2_dense(): + """ + Fixture for dense DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 2 + """ + values = SparseDataFrame(data).values + values[np.isnan(values)] = 2 + return DataFrame(values, columns=['A', 'B', 'C', 'D'], index=dates) + + +@pytest.fixture +def float_frame_fill2(): + """ + Fixture for sparse DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 2 + """ + values = SparseDataFrame(data).values + values[np.isnan(values)] = 2 + return SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], + default_fill_value=2, index=dates) + + +@pytest.fixture +def empty_frame(): + """ + Fixture for empty SparseDataFrame + """ + return SparseDataFrame() diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index be5a1710119ee..30938966b5d1a 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -28,42 +28,6 @@ class TestSparseDataFrame(SharedWithSparse): _assert_frame_equal = staticmethod(tm.assert_sp_frame_equal) _assert_series_equal = staticmethod(tm.assert_sp_series_equal) - def setup_method(self, method): - self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], - 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], - 'C': np.arange(10, dtype=np.float64), - 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} - - self.dates = bdate_range('1/1/2011', periods=10) - - self.orig = pd.DataFrame(self.data, index=self.dates) - self.iorig = pd.DataFrame(self.data, index=self.dates) - - self.frame = SparseDataFrame(self.data, index=self.dates) - self.iframe = SparseDataFrame(self.data, index=self.dates, - default_kind='integer') - self.mixed_frame = self.frame.copy(False) - self.mixed_frame['foo'] = pd.SparseArray(['bar'] * len(self.dates)) - - values = self.frame.values.copy() - values[np.isnan(values)] = 0 - - self.zorig = pd.DataFrame(values, columns=['A', 'B', 'C', 'D'], - index=self.dates) - self.zframe = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], - default_fill_value=0, index=self.dates) - - values = self.frame.values.copy() - values[np.isnan(values)] = 2 - - self.fill_orig = pd.DataFrame(values, columns=['A', 'B', 'C', 'D'], - index=self.dates) - self.fill_frame = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], - default_fill_value=2, - index=self.dates) - - self.empty = SparseDataFrame() - def test_fill_value_when_combine_const(self): # GH12723 dat = np.array([0, 1, np.nan, 3, 4, 5], dtype='float') @@ -73,8 +37,8 @@ def test_fill_value_when_combine_const(self): res = df.add(2, fill_value=0) tm.assert_sp_frame_equal(res, exp) - def test_values(self): - empty = self.empty.values + def test_values(self, empty_frame, float_frame): + empty = empty_frame.values assert empty.shape == (0, 0) no_cols = SparseDataFrame(index=np.arange(10)) @@ -85,28 +49,29 @@ def test_values(self): mat = no_index.values assert mat.shape == (0, 10) - def test_copy(self): - cp = self.frame.copy() + def test_copy(self, float_frame): + cp = float_frame.copy() assert isinstance(cp, SparseDataFrame) - tm.assert_sp_frame_equal(cp, self.frame) + tm.assert_sp_frame_equal(cp, float_frame) # as of v0.15.0 # this is now identical (but not is_a ) - assert cp.index.identical(self.frame.index) + assert cp.index.identical(float_frame.index) - def test_constructor(self): - for col, series in compat.iteritems(self.frame): + def test_constructor(self, float_frame, float_frame_int_kind, + float_frame_fill0): + for col, series in compat.iteritems(float_frame): assert isinstance(series, SparseSeries) - assert isinstance(self.iframe['A'].sp_index, IntIndex) + assert isinstance(float_frame_int_kind['A'].sp_index, IntIndex) # constructed zframe from matrix above - assert self.zframe['A'].fill_value == 0 + assert float_frame_fill0['A'].fill_value == 0 tm.assert_numpy_array_equal(pd.SparseArray([1., 2., 3., 4., 5., 6.]), - self.zframe['A'].values) + float_frame_fill0['A'].values) tm.assert_numpy_array_equal(np.array([0., 0., 0., 0., 1., 2., 3., 4., 5., 6.]), - self.zframe['A'].to_dense().values) + float_frame_fill0['A'].to_dense().values) # construct no data sdf = SparseDataFrame(columns=np.arange(10), index=np.arange(10)) @@ -115,29 +80,29 @@ def test_constructor(self): # construct from nested dict data = {} - for c, s in compat.iteritems(self.frame): + for c, s in compat.iteritems(float_frame): data[c] = s.to_dict() sdf = SparseDataFrame(data) - tm.assert_sp_frame_equal(sdf, self.frame) + tm.assert_sp_frame_equal(sdf, float_frame) # TODO: test data is copied from inputs # init dict with different index - idx = self.frame.index[:5] + idx = float_frame.index[:5] cons = SparseDataFrame( - self.frame, index=idx, columns=self.frame.columns, - default_fill_value=self.frame.default_fill_value, - default_kind=self.frame.default_kind, copy=True) - reindexed = self.frame.reindex(idx) + float_frame, index=idx, columns=float_frame.columns, + default_fill_value=float_frame.default_fill_value, + default_kind=float_frame.default_kind, copy=True) + reindexed = float_frame.reindex(idx) tm.assert_sp_frame_equal(cons, reindexed, exact_indices=False) # assert level parameter breaks reindex with pytest.raises(TypeError): - self.frame.reindex(idx, level=0) + float_frame.reindex(idx, level=0) - repr(self.frame) + repr(float_frame) def test_constructor_dict_order(self): # GH19018 @@ -151,24 +116,26 @@ def test_constructor_dict_order(self): expected = SparseDataFrame(data=d, columns=list('ab')) tm.assert_sp_frame_equal(frame, expected) - def test_constructor_ndarray(self): + def test_constructor_ndarray(self, float_frame): # no index or columns - sp = SparseDataFrame(self.frame.values) + sp = SparseDataFrame(float_frame.values) # 1d - sp = SparseDataFrame(self.data['A'], index=self.dates, columns=['A']) - tm.assert_sp_frame_equal(sp, self.frame.reindex(columns=['A'])) + sp = SparseDataFrame(float_frame['A'].values, index=float_frame.index, + columns=['A']) + tm.assert_sp_frame_equal(sp, float_frame.reindex(columns=['A'])) # raise on level argument - pytest.raises(TypeError, self.frame.reindex, columns=['A'], + pytest.raises(TypeError, float_frame.reindex, columns=['A'], level=1) # wrong length index / columns with tm.assert_raises_regex(ValueError, "^Index length"): - SparseDataFrame(self.frame.values, index=self.frame.index[:-1]) + SparseDataFrame(float_frame.values, index=float_frame.index[:-1]) with tm.assert_raises_regex(ValueError, "^Column length"): - SparseDataFrame(self.frame.values, columns=self.frame.columns[:-1]) + SparseDataFrame(float_frame.values, + columns=float_frame.columns[:-1]) # GH 9272 def test_constructor_empty(self): @@ -176,10 +143,10 @@ def test_constructor_empty(self): assert len(sp.index) == 0 assert len(sp.columns) == 0 - def test_constructor_dataframe(self): - dense = self.frame.to_dense() + def test_constructor_dataframe(self, float_frame): + dense = float_frame.to_dense() sp = SparseDataFrame(dense) - tm.assert_sp_frame_equal(sp, self.frame) + tm.assert_sp_frame_equal(sp, float_frame) def test_constructor_convert_index_once(self): arr = np.array([1.5, 2.5, 3.5]) @@ -292,12 +259,13 @@ def test_dtypes(self): expected = Series({'float64': 4}) tm.assert_series_equal(result, expected) - def test_shape(self): + def test_shape(self, float_frame, float_frame_int_kind, + float_frame_fill0, float_frame_fill2): # see gh-10452 - assert self.frame.shape == (10, 4) - assert self.iframe.shape == (10, 4) - assert self.zframe.shape == (10, 4) - assert self.fill_frame.shape == (10, 4) + assert float_frame.shape == (10, 4) + assert float_frame_int_kind.shape == (10, 4) + assert float_frame_fill0.shape == (10, 4) + assert float_frame_fill2.shape == (10, 4) def test_str(self): df = DataFrame(np.random.randn(10000, 4)) @@ -306,12 +274,14 @@ def test_str(self): sdf = df.to_sparse() str(sdf) - def test_array_interface(self): - res = np.sqrt(self.frame) - dres = np.sqrt(self.frame.to_dense()) + def test_array_interface(self, float_frame): + res = np.sqrt(float_frame) + dres = np.sqrt(float_frame.to_dense()) tm.assert_frame_equal(res.to_dense(), dres) - def test_pickle(self): + def test_pickle(self, float_frame, float_frame_int_kind, float_frame_dense, + float_frame_fill0, float_frame_fill0_dense, + float_frame_fill2, float_frame_fill2_dense): def _test_roundtrip(frame, orig): result = tm.round_trip_pickle(frame) @@ -319,7 +289,10 @@ def _test_roundtrip(frame, orig): tm.assert_frame_equal(result.to_dense(), orig, check_dtype=False) _test_roundtrip(SparseDataFrame(), DataFrame()) - self._check_all(_test_roundtrip) + _test_roundtrip(float_frame, float_frame_dense) + _test_roundtrip(float_frame_int_kind, float_frame_dense) + _test_roundtrip(float_frame_fill0, float_frame_fill0_dense) + _test_roundtrip(float_frame_fill2, float_frame_fill2_dense) def test_dense_to_sparse(self): df = DataFrame({'A': [nan, nan, nan, 1, 2], @@ -353,17 +326,17 @@ def test_density(self): def test_sparse_to_dense(self): pass - def test_sparse_series_ops(self): - self._check_frame_ops(self.frame) + def test_sparse_series_ops(self, float_frame): + self._check_frame_ops(float_frame) - def test_sparse_series_ops_i(self): - self._check_frame_ops(self.iframe) + def test_sparse_series_ops_i(self, float_frame_int_kind): + self._check_frame_ops(float_frame_int_kind) - def test_sparse_series_ops_z(self): - self._check_frame_ops(self.zframe) + def test_sparse_series_ops_z(self, float_frame_fill0): + self._check_frame_ops(float_frame_fill0) - def test_sparse_series_ops_fill(self): - self._check_frame_ops(self.fill_frame) + def test_sparse_series_ops_fill(self, float_frame_fill2): + self._check_frame_ops(float_frame_fill2) def _check_frame_ops(self, frame): @@ -417,18 +390,18 @@ def _compare_to_dense(a, b, da, db, op): _compare_to_dense(s, frame, s, frame.to_dense(), op) # it works! - result = self.frame + self.frame.loc[:, ['A', 'B']] # noqa + result = frame + frame.loc[:, ['A', 'B']] # noqa - def test_op_corners(self): - empty = self.empty + self.empty + def test_op_corners(self, float_frame, empty_frame): + empty = empty_frame + empty_frame assert empty.empty - foo = self.frame + self.empty + foo = float_frame + empty_frame assert isinstance(foo.index, DatetimeIndex) - tm.assert_frame_equal(foo, self.frame * np.nan) + tm.assert_frame_equal(foo, float_frame * np.nan) - foo = self.empty + self.frame - tm.assert_frame_equal(foo, self.frame * np.nan) + foo = empty_frame + float_frame + tm.assert_frame_equal(foo, float_frame * np.nan) def test_scalar_ops(self): pass @@ -443,12 +416,12 @@ def test_getitem(self): pytest.raises(Exception, sdf.__getitem__, ['a', 'd']) - def test_iloc(self): + def test_iloc(self, float_frame): - # 2227 - result = self.frame.iloc[:, 0] + # GH 2227 + result = float_frame.iloc[:, 0] assert isinstance(result, SparseSeries) - tm.assert_sp_series_equal(result, self.frame['A']) + tm.assert_sp_series_equal(result, float_frame['A']) # preserve sparse index type. #2251 data = {'A': [0, 1]} @@ -456,22 +429,22 @@ def test_iloc(self): tm.assert_class_equal(iframe['A'].sp_index, iframe.iloc[:, 0].sp_index) - def test_set_value(self): + def test_set_value(self, float_frame): # ok, as the index gets converted to object - frame = self.frame.copy() + frame = float_frame.copy() with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): res = frame.set_value('foobar', 'B', 1.5) assert res.index.dtype == 'object' - res = self.frame + res = float_frame res.index = res.index.astype(object) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - res = self.frame.set_value('foobar', 'B', 1.5) - assert res is not self.frame + res = float_frame.set_value('foobar', 'B', 1.5) + assert res is not float_frame assert res.index[-1] == 'foobar' with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -482,38 +455,42 @@ def test_set_value(self): res2 = res.set_value('foobar', 'qux', 1.5) assert res2 is not res tm.assert_index_equal(res2.columns, - pd.Index(list(self.frame.columns) + ['qux'])) + pd.Index(list(float_frame.columns) + ['qux'])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): assert res2.get_value('foobar', 'qux') == 1.5 - def test_fancy_index_misc(self): + def test_fancy_index_misc(self, float_frame): # axis = 0 - sliced = self.frame.iloc[-2:, :] - expected = self.frame.reindex(index=self.frame.index[-2:]) + sliced = float_frame.iloc[-2:, :] + expected = float_frame.reindex(index=float_frame.index[-2:]) tm.assert_sp_frame_equal(sliced, expected) # axis = 1 - sliced = self.frame.iloc[:, -2:] - expected = self.frame.reindex(columns=self.frame.columns[-2:]) + sliced = float_frame.iloc[:, -2:] + expected = float_frame.reindex(columns=float_frame.columns[-2:]) tm.assert_sp_frame_equal(sliced, expected) - def test_getitem_overload(self): + def test_getitem_overload(self, float_frame): # slicing - sl = self.frame[:20] - tm.assert_sp_frame_equal(sl, self.frame.reindex(self.frame.index[:20])) + sl = float_frame[:20] + tm.assert_sp_frame_equal(sl, + float_frame.reindex(float_frame.index[:20])) # boolean indexing - d = self.frame.index[5] - indexer = self.frame.index > d + d = float_frame.index[5] + indexer = float_frame.index > d - subindex = self.frame.index[indexer] - subframe = self.frame[indexer] + subindex = float_frame.index[indexer] + subframe = float_frame[indexer] tm.assert_index_equal(subindex, subframe.index) - pytest.raises(Exception, self.frame.__getitem__, indexer[:-1]) + pytest.raises(Exception, float_frame.__getitem__, indexer[:-1]) - def test_setitem(self): + def test_setitem(self, float_frame, float_frame_int_kind, + float_frame_dense, + float_frame_fill0, float_frame_fill0_dense, + float_frame_fill2, float_frame_fill2_dense): def _check_frame(frame, orig): N = len(frame) @@ -566,24 +543,27 @@ def _check_frame(frame, orig): frame['K'] = frame.default_fill_value assert len(frame['K'].sp_values) == 0 - self._check_all(_check_frame) + _check_frame(float_frame, float_frame_dense) + _check_frame(float_frame_int_kind, float_frame_dense) + _check_frame(float_frame_fill0, float_frame_fill0_dense) + _check_frame(float_frame_fill2, float_frame_fill2_dense) - def test_setitem_corner(self): - self.frame['a'] = self.frame['B'] - tm.assert_sp_series_equal(self.frame['a'], self.frame['B'], + def test_setitem_corner(self, float_frame): + float_frame['a'] = float_frame['B'] + tm.assert_sp_series_equal(float_frame['a'], float_frame['B'], check_names=False) - def test_setitem_array(self): - arr = self.frame['B'] + def test_setitem_array(self, float_frame): + arr = float_frame['B'] - self.frame['E'] = arr - tm.assert_sp_series_equal(self.frame['E'], self.frame['B'], + float_frame['E'] = arr + tm.assert_sp_series_equal(float_frame['E'], float_frame['B'], check_names=False) - self.frame['F'] = arr[:-1] - index = self.frame.index[:-1] - tm.assert_sp_series_equal(self.frame['E'].reindex(index), - self.frame['F'].reindex(index), + float_frame['F'] = arr[:-1] + index = float_frame.index[:-1] + tm.assert_sp_series_equal(float_frame['E'].reindex(index), + float_frame['F'].reindex(index), check_names=False) def test_setitem_chained_no_consolidate(self): @@ -595,44 +575,44 @@ def test_setitem_chained_no_consolidate(self): sdf[0][1] = 2 assert len(sdf._data.blocks) == 2 - def test_delitem(self): - A = self.frame['A'] - C = self.frame['C'] + def test_delitem(self, float_frame): + A = float_frame['A'] + C = float_frame['C'] - del self.frame['B'] - assert 'B' not in self.frame - tm.assert_sp_series_equal(self.frame['A'], A) - tm.assert_sp_series_equal(self.frame['C'], C) + del float_frame['B'] + assert 'B' not in float_frame + tm.assert_sp_series_equal(float_frame['A'], A) + tm.assert_sp_series_equal(float_frame['C'], C) - del self.frame['D'] - assert 'D' not in self.frame + del float_frame['D'] + assert 'D' not in float_frame - del self.frame['A'] - assert 'A' not in self.frame + del float_frame['A'] + assert 'A' not in float_frame - def test_set_columns(self): - self.frame.columns = self.frame.columns - pytest.raises(Exception, setattr, self.frame, 'columns', - self.frame.columns[:-1]) + def test_set_columns(self, float_frame): + float_frame.columns = float_frame.columns + pytest.raises(Exception, setattr, float_frame, 'columns', + float_frame.columns[:-1]) - def test_set_index(self): - self.frame.index = self.frame.index - pytest.raises(Exception, setattr, self.frame, 'index', - self.frame.index[:-1]) + def test_set_index(self, float_frame): + float_frame.index = float_frame.index + pytest.raises(Exception, setattr, float_frame, 'index', + float_frame.index[:-1]) - def test_append(self): - a = self.frame[:5] - b = self.frame[5:] + def test_append(self, float_frame): + a = float_frame[:5] + b = float_frame[5:] appended = a.append(b) - tm.assert_sp_frame_equal(appended, self.frame, exact_indices=False) + tm.assert_sp_frame_equal(appended, float_frame, exact_indices=False) - a = self.frame.iloc[:5, :3] - b = self.frame.iloc[5:] + a = float_frame.iloc[:5, :3] + b = float_frame.iloc[5:] with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): # Stacklevel is set for pd.concat, not append appended = a.append(b) - tm.assert_sp_frame_equal(appended.iloc[:, :3], self.frame.iloc[:, :3], + tm.assert_sp_frame_equal(appended.iloc[:, :3], float_frame.iloc[:, :3], exact_indices=False) a = a[['B', 'C', 'A']].head(2) @@ -713,9 +693,9 @@ def test_astype_bool(self): assert res['A'].dtype == np.bool assert res['B'].dtype == np.bool - def test_fillna(self): - df = self.zframe.reindex(lrange(5)) - dense = self.zorig.reindex(lrange(5)) + def test_fillna(self, float_frame_fill0, float_frame_fill0_dense): + df = float_frame_fill0.reindex(lrange(5)) + dense = float_frame_fill0_dense.reindex(lrange(5)) result = df.fillna(0) expected = dense.fillna(0) @@ -795,45 +775,48 @@ def test_sparse_frame_fillna_limit(self): expected = expected.to_sparse() tm.assert_frame_equal(result, expected) - def test_rename(self): - result = self.frame.rename(index=str) - expected = SparseDataFrame(self.data, index=self.dates.strftime( - "%Y-%m-%d %H:%M:%S")) + def test_rename(self, float_frame): + result = float_frame.rename(index=str) + expected = SparseDataFrame(float_frame.values, + index=float_frame.index.strftime( + "%Y-%m-%d %H:%M:%S"), + columns=list('ABCD')) tm.assert_sp_frame_equal(result, expected) - result = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x))) + result = float_frame.rename(columns=lambda x: '%s%d' % (x, 1)) data = {'A1': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B1': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], 'C1': np.arange(10, dtype=np.float64), 'D1': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} - expected = SparseDataFrame(data, index=self.dates) + expected = SparseDataFrame(data, index=float_frame.index) tm.assert_sp_frame_equal(result, expected) - def test_corr(self): - res = self.frame.corr() - tm.assert_frame_equal(res, self.frame.to_dense().corr()) + def test_corr(self, float_frame): + res = float_frame.corr() + tm.assert_frame_equal(res, float_frame.to_dense().corr()) - def test_describe(self): - self.frame['foo'] = np.nan - self.frame.get_dtype_counts() - str(self.frame) - desc = self.frame.describe() # noqa + def test_describe(self, float_frame): + float_frame['foo'] = np.nan + float_frame.get_dtype_counts() + str(float_frame) + desc = float_frame.describe() # noqa - def test_join(self): - left = self.frame.loc[:, ['A', 'B']] - right = self.frame.loc[:, ['C', 'D']] + def test_join(self, float_frame): + left = float_frame.loc[:, ['A', 'B']] + right = float_frame.loc[:, ['C', 'D']] joined = left.join(right) - tm.assert_sp_frame_equal(joined, self.frame, exact_indices=False) + tm.assert_sp_frame_equal(joined, float_frame, exact_indices=False) - right = self.frame.loc[:, ['B', 'D']] + right = float_frame.loc[:, ['B', 'D']] pytest.raises(Exception, left.join, right) with tm.assert_raises_regex(ValueError, 'Other Series must have a name'): - self.frame.join(Series( - np.random.randn(len(self.frame)), index=self.frame.index)) + float_frame.join(Series( + np.random.randn(len(float_frame)), index=float_frame.index)) - def test_reindex(self): + def test_reindex(self, float_frame, float_frame_int_kind, + float_frame_fill0, float_frame_fill2): def _check_frame(frame): index = frame.index @@ -876,26 +859,27 @@ def _check_frame(frame): frame.default_fill_value) assert np.isnan(reindexed['Z'].sp_values).all() - _check_frame(self.frame) - _check_frame(self.iframe) - _check_frame(self.zframe) - _check_frame(self.fill_frame) + _check_frame(float_frame) + _check_frame(float_frame_int_kind) + _check_frame(float_frame_fill0) + _check_frame(float_frame_fill2) # with copy=False - reindexed = self.frame.reindex(self.frame.index, copy=False) + reindexed = float_frame.reindex(float_frame.index, copy=False) reindexed['F'] = reindexed['A'] - assert 'F' in self.frame + assert 'F' in float_frame - reindexed = self.frame.reindex(self.frame.index) + reindexed = float_frame.reindex(float_frame.index) reindexed['G'] = reindexed['A'] - assert 'G' not in self.frame + assert 'G' not in float_frame - def test_reindex_fill_value(self): + def test_reindex_fill_value(self, float_frame_fill0, + float_frame_fill0_dense): rng = bdate_range('20110110', periods=20) - result = self.zframe.reindex(rng, fill_value=0) - exp = self.zorig.reindex(rng, fill_value=0) - exp = exp.to_sparse(self.zframe.default_fill_value) + result = float_frame_fill0.reindex(rng, fill_value=0) + exp = float_frame_fill0_dense.reindex(rng, fill_value=0) + exp = exp.to_sparse(float_frame_fill0.default_fill_value) tm.assert_sp_frame_equal(result, exp) def test_reindex_method(self): @@ -968,20 +952,27 @@ def test_reindex_method(self): with pytest.raises(NotImplementedError): sparse.reindex(columns=range(6), method='ffill') - def test_take(self): - result = self.frame.take([1, 0, 2], axis=1) - expected = self.frame.reindex(columns=['B', 'A', 'C']) + def test_take(self, float_frame): + result = float_frame.take([1, 0, 2], axis=1) + expected = float_frame.reindex(columns=['B', 'A', 'C']) tm.assert_sp_frame_equal(result, expected) - def test_to_dense(self): + def test_to_dense(self, float_frame, float_frame_int_kind, + float_frame_dense, + float_frame_fill0, float_frame_fill0_dense, + float_frame_fill2, float_frame_fill2_dense): def _check(frame, orig): dense_dm = frame.to_dense() tm.assert_frame_equal(frame, dense_dm) tm.assert_frame_equal(dense_dm, orig, check_dtype=False) - self._check_all(_check) + _check(float_frame, float_frame_dense) + _check(float_frame_int_kind, float_frame_dense) + _check(float_frame_fill0, float_frame_fill0_dense) + _check(float_frame_fill2, float_frame_fill2_dense) - def test_stack_sparse_frame(self): + def test_stack_sparse_frame(self, float_frame, float_frame_int_kind, + float_frame_fill0, float_frame_fill2): with catch_warnings(record=True): def _check(frame): @@ -995,14 +986,17 @@ def _check(frame): tm.assert_numpy_array_equal(from_dense_lp.values, from_sparse_lp.values) - _check(self.frame) - _check(self.iframe) + _check(float_frame) + _check(float_frame_int_kind) # for now - pytest.raises(Exception, _check, self.zframe) - pytest.raises(Exception, _check, self.fill_frame) + pytest.raises(Exception, _check, float_frame_fill0) + pytest.raises(Exception, _check, float_frame_fill2) - def test_transpose(self): + def test_transpose(self, float_frame, float_frame_int_kind, + float_frame_dense, + float_frame_fill0, float_frame_fill0_dense, + float_frame_fill2, float_frame_fill2_dense): def _check(frame, orig): transposed = frame.T @@ -1013,9 +1007,14 @@ def _check(frame, orig): tm.assert_frame_equal(frame.T.T.to_dense(), orig.T.T) tm.assert_sp_frame_equal(frame, frame.T.T, exact_indices=False) - self._check_all(_check) + _check(float_frame, float_frame_dense) + _check(float_frame_int_kind, float_frame_dense) + _check(float_frame_fill0, float_frame_fill0_dense) + _check(float_frame_fill2, float_frame_fill2_dense) - def test_shift(self): + def test_shift(self, float_frame, float_frame_int_kind, float_frame_dense, + float_frame_fill0, float_frame_fill0_dense, + float_frame_fill2, float_frame_fill2_dense): def _check(frame, orig): shifted = frame.shift(0) @@ -1042,32 +1041,29 @@ def _check(frame, orig): kind=frame.default_kind) tm.assert_frame_equal(shifted, exp) - self._check_all(_check) + _check(float_frame, float_frame_dense) + _check(float_frame_int_kind, float_frame_dense) + _check(float_frame_fill0, float_frame_fill0_dense) + _check(float_frame_fill2, float_frame_fill2_dense) - def test_count(self): - dense_result = self.frame.to_dense().count() + def test_count(self, float_frame): + dense_result = float_frame.to_dense().count() - result = self.frame.count() + result = float_frame.count() tm.assert_series_equal(result, dense_result) - result = self.frame.count(axis=None) + result = float_frame.count(axis=None) tm.assert_series_equal(result, dense_result) - result = self.frame.count(axis=0) + result = float_frame.count(axis=0) tm.assert_series_equal(result, dense_result) - result = self.frame.count(axis=1) - dense_result = self.frame.to_dense().count(axis=1) + result = float_frame.count(axis=1) + dense_result = float_frame.to_dense().count(axis=1) # win32 don't check dtype tm.assert_series_equal(result, dense_result, check_dtype=False) - def _check_all(self, check_func): - check_func(self.frame, self.orig) - check_func(self.iframe, self.iorig) - check_func(self.zframe, self.zorig) - check_func(self.fill_frame, self.fill_orig) - def test_numpy_transpose(self): sdf = SparseDataFrame([1, 2, 3], index=[1, 2, 3], columns=['a']) result = np.transpose(np.transpose(sdf)) @@ -1076,8 +1072,8 @@ def test_numpy_transpose(self): msg = "the 'axes' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.transpose, sdf, axes=1) - def test_combine_first(self): - df = self.frame + def test_combine_first(self, float_frame): + df = float_frame result = df[::2].combine_first(df) result2 = df[::2].combine_first(df.to_dense()) @@ -1088,8 +1084,8 @@ def test_combine_first(self): tm.assert_sp_frame_equal(result, result2) tm.assert_sp_frame_equal(result, expected) - def test_combine_add(self): - df = self.frame.to_dense() + def test_combine_add(self, float_frame): + df = float_frame.to_dense() df2 = df.copy() df2['C'][:3] = np.nan df['A'][:3] = 5.7 @@ -1214,51 +1210,42 @@ def test_comparison_op_scalar(self): class TestSparseDataFrameAnalytics(object): - def setup_method(self, method): - self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], - 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], - 'C': np.arange(10, dtype=float), - 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} - - self.dates = bdate_range('1/1/2011', periods=10) - - self.frame = SparseDataFrame(self.data, index=self.dates) - def test_cumsum(self): - expected = SparseDataFrame(self.frame.to_dense().cumsum()) + def test_cumsum(self, float_frame): + expected = SparseDataFrame(float_frame.to_dense().cumsum()) - result = self.frame.cumsum() + result = float_frame.cumsum() tm.assert_sp_frame_equal(result, expected) - result = self.frame.cumsum(axis=None) + result = float_frame.cumsum(axis=None) tm.assert_sp_frame_equal(result, expected) - result = self.frame.cumsum(axis=0) + result = float_frame.cumsum(axis=0) tm.assert_sp_frame_equal(result, expected) - def test_numpy_cumsum(self): - result = np.cumsum(self.frame) - expected = SparseDataFrame(self.frame.to_dense().cumsum()) + def test_numpy_cumsum(self, float_frame): + result = np.cumsum(float_frame) + expected = SparseDataFrame(float_frame.to_dense().cumsum()) tm.assert_sp_frame_equal(result, expected) msg = "the 'dtype' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.cumsum, - self.frame, dtype=np.int64) + float_frame, dtype=np.int64) msg = "the 'out' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.cumsum, - self.frame, out=result) + float_frame, out=result) - def test_numpy_func_call(self): + def test_numpy_func_call(self, float_frame): # no exception should be raised even though # numpy passes in 'axis=None' or `axis=-1' funcs = ['sum', 'cumsum', 'var', 'mean', 'prod', 'cumprod', 'std', 'min', 'max'] for func in funcs: - getattr(np, func)(self.frame) + getattr(np, func)(float_frame) - @pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', + @pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH 17386)', strict=True) def test_quantile(self): # GH 17386 @@ -1275,7 +1262,7 @@ def test_quantile(self): tm.assert_series_equal(result, dense_expected) tm.assert_sp_series_equal(result, sparse_expected) - @pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', + @pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH 17386)', strict=True) def test_quantile_multi(self): # GH 17386