diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 189531c7b4459..94be24710362a 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -14,7 +14,7 @@ import pandas as pd from pandas import Categorical, DataFrame, Series, Timestamp, date_range -from pandas.tests.frame.common import TestData, _check_mixed_float +from pandas.tests.frame.common import _check_mixed_float import pandas.util.testing as tm from pandas.util.testing import assert_frame_equal, assert_series_equal @@ -34,15 +34,15 @@ def _skip_if_no_pchip(): pytest.skip('scipy.interpolate.pchip missing') -class TestDataFrameMissingData(TestData): +class TestDataFrameMissingData(): - def test_dropEmptyRows(self): - N = len(self.frame.index) + def test_dropEmptyRows(self, float_frame): + N = len(float_frame.index) mat = np.random.randn(N) mat[:5] = np.nan - frame = DataFrame({'foo': mat}, index=self.frame.index) - original = Series(mat, index=self.frame.index, name='foo') + frame = DataFrame({'foo': mat}, index=float_frame.index) + original = Series(mat, index=float_frame.index, name='foo') expected = original.dropna() inplace_frame1, inplace_frame2 = frame.copy(), frame.copy() @@ -58,21 +58,21 @@ def test_dropEmptyRows(self): assert_series_equal(smaller_frame['foo'], expected) assert_series_equal(inplace_frame2['foo'], expected) - def test_dropIncompleteRows(self): - N = len(self.frame.index) + def test_dropIncompleteRows(self, float_frame): + N = len(float_frame.index) mat = np.random.randn(N) mat[:5] = np.nan - frame = DataFrame({'foo': mat}, index=self.frame.index) + frame = DataFrame({'foo': mat}, index=float_frame.index) frame['bar'] = 5 - original = Series(mat, index=self.frame.index, name='foo') + original = Series(mat, index=float_frame.index, name='foo') inp_frame1, inp_frame2 = frame.copy(), frame.copy() smaller_frame = frame.dropna() assert_series_equal(frame['foo'], original) inp_frame1.dropna(inplace=True) - exp = Series(mat[5:], index=self.frame.index[5:], name='foo') + exp = Series(mat[5:], index=float_frame.index[5:], name='foo') tm.assert_series_equal(smaller_frame['foo'], exp) tm.assert_series_equal(inp_frame1['foo'], exp) @@ -80,8 +80,8 @@ def test_dropIncompleteRows(self): assert_series_equal(frame['foo'], original) assert (frame['bar'] == 5).all() inp_frame2.dropna(subset=['bar'], inplace=True) - tm.assert_index_equal(samesize_frame.index, self.frame.index) - tm.assert_index_equal(inp_frame2.index, self.frame.index) + tm.assert_index_equal(samesize_frame.index, float_frame.index) + tm.assert_index_equal(inp_frame2.index, float_frame.index) @pytest.mark.skipif(PY2, reason="pytest.raises match regex fails") def test_dropna(self): @@ -160,17 +160,17 @@ def test_drop_and_dropna_caching(self): df2['A'].drop([1], inplace=True) assert_series_equal(df2['A'], original.drop([1])) - def test_dropna_corner(self): + def test_dropna_corner(self, float_frame): # bad input msg = "invalid how option: foo" with pytest.raises(ValueError, match=msg): - self.frame.dropna(how='foo') + float_frame.dropna(how='foo') msg = "must specify how or thresh" with pytest.raises(TypeError, match=msg): - self.frame.dropna(how=None) + float_frame.dropna(how=None) # non-existent column - 8303 with pytest.raises(KeyError, match=r"^\['X'\]$"): - self.frame.dropna(subset=['A', 'X']) + float_frame.dropna(subset=['A', 'X']) def test_dropna_multiple_axes(self): df = DataFrame([[1, np.nan, 2, 3], @@ -215,35 +215,39 @@ def test_dropna_tz_aware_datetime(self): index=[0, 3]) assert_frame_equal(result, expected) - def test_fillna(self): - tf = self.tsframe + def test_fillna_datetime(self, datetime_frame): + tf = datetime_frame tf.loc[tf.index[:5], 'A'] = np.nan tf.loc[tf.index[-5:], 'A'] = np.nan - zero_filled = self.tsframe.fillna(0) + zero_filled = datetime_frame.fillna(0) assert (zero_filled.loc[zero_filled.index[:5], 'A'] == 0).all() - padded = self.tsframe.fillna(method='pad') + padded = datetime_frame.fillna(method='pad') assert np.isnan(padded.loc[padded.index[:5], 'A']).all() assert (padded.loc[padded.index[-5:], 'A'] == padded.loc[padded.index[-5], 'A']).all() - # mixed type - mf = self.mixed_frame - mf.loc[mf.index[5:20], 'foo'] = np.nan - mf.loc[mf.index[-10:], 'A'] = np.nan - result = self.mixed_frame.fillna(value=0) - result = self.mixed_frame.fillna(method='pad') - msg = "Must specify a fill 'value' or 'method'" with pytest.raises(ValueError, match=msg): - self.tsframe.fillna() + datetime_frame.fillna() msg = "Cannot specify both 'value' and 'method'" with pytest.raises(ValueError, match=msg): - self.tsframe.fillna(5, method='ffill') + datetime_frame.fillna(5, method='ffill') + + def test_fillna_mixed_type(self, float_string_frame): + + mf = float_string_frame + mf.loc[mf.index[5:20], 'foo'] = np.nan + mf.loc[mf.index[-10:], 'A'] = np.nan + # TODO: make stronger assertion here, GH 25640 + mf.fillna(value=0) + mf.fillna(method='pad') + + def test_fillna_mixed_float(self, mixed_float_frame): # mixed numeric (but no float16) - mf = self.mixed_float.reindex(columns=['A', 'B', 'D']) + mf = mixed_float_frame.reindex(columns=['A', 'B', 'D']) mf.loc[mf.index[-10:], 'A'] = np.nan result = mf.fillna(value=0) _check_mixed_float(result, dtype=dict(C=None)) @@ -251,6 +255,7 @@ def test_fillna(self): result = mf.fillna(method='pad') _check_mixed_float(result, dtype=dict(C=None)) + def test_fillna_other(self): # empty frame (GH #2778) df = DataFrame(columns=['x']) for m in ['pad', 'backfill']: @@ -464,19 +469,19 @@ def test_fillna_datetime_columns(self): index=pd.date_range('20130110', periods=3)) tm.assert_frame_equal(result, expected) - def test_ffill(self): - self.tsframe['A'][:5] = np.nan - self.tsframe['A'][-5:] = np.nan + def test_ffill(self, datetime_frame): + datetime_frame['A'][:5] = np.nan + datetime_frame['A'][-5:] = np.nan - assert_frame_equal(self.tsframe.ffill(), - self.tsframe.fillna(method='ffill')) + assert_frame_equal(datetime_frame.ffill(), + datetime_frame.fillna(method='ffill')) - def test_bfill(self): - self.tsframe['A'][:5] = np.nan - self.tsframe['A'][-5:] = np.nan + def test_bfill(self, datetime_frame): + datetime_frame['A'][:5] = np.nan + datetime_frame['A'][-5:] = np.nan - assert_frame_equal(self.tsframe.bfill(), - self.tsframe.fillna(method='bfill')) + assert_frame_equal(datetime_frame.bfill(), + datetime_frame.fillna(method='bfill')) def test_frame_pad_backfill_limit(self): index = np.arange(10) @@ -602,24 +607,24 @@ def test_fillna_columns(self): expected = df.astype(float).fillna(method='ffill', axis=1) assert_frame_equal(result, expected) - def test_fillna_invalid_method(self): + def test_fillna_invalid_method(self, float_frame): with pytest.raises(ValueError, match='ffil'): - self.frame.fillna(method='ffil') + float_frame.fillna(method='ffil') - def test_fillna_invalid_value(self): + def test_fillna_invalid_value(self, float_frame): # list msg = ("\"value\" parameter must be a scalar or dict, but you passed" " a \"{}\"") with pytest.raises(TypeError, match=msg.format('list')): - self.frame.fillna([1, 2]) + float_frame.fillna([1, 2]) # tuple with pytest.raises(TypeError, match=msg.format('tuple')): - self.frame.fillna((1, 2)) + float_frame.fillna((1, 2)) # frame with series msg = ("\"value\" parameter must be a scalar, dict or Series, but you" " passed a \"DataFrame\"") with pytest.raises(TypeError, match=msg): - self.frame.iloc[:, 0].fillna(self.frame) + float_frame.iloc[:, 0].fillna(float_frame) def test_fillna_col_reordering(self): cols = ["COL." + str(i) for i in range(5, 0, -1)] @@ -628,16 +633,16 @@ def test_fillna_col_reordering(self): filled = df.fillna(method='ffill') assert df.columns.tolist() == filled.columns.tolist() - def test_fill_corner(self): - mf = self.mixed_frame + def test_fill_corner(self, float_frame, float_string_frame): + mf = float_string_frame mf.loc[mf.index[5:20], 'foo'] = np.nan mf.loc[mf.index[-10:], 'A'] = np.nan - filled = self.mixed_frame.fillna(value=0) + filled = float_string_frame.fillna(value=0) assert (filled.loc[filled.index[5:20], 'foo'] == 0).all() - del self.mixed_frame['foo'] + del float_string_frame['foo'] - empty_float = self.frame.reindex(columns=[]) + empty_float = float_frame.reindex(columns=[]) # TODO(wesm): unused? result = empty_float.fillna(value=0) # noqa @@ -652,7 +657,7 @@ def test_fill_value_when_combine_const(self): assert_frame_equal(res, exp) -class TestDataFrameInterpolate(TestData): +class TestDataFrameInterpolate(): def test_interp_basic(self): df = DataFrame({'A': [1, 2, np.nan, 4],