Skip to content

TST: Fixturize tests/frame/test_missing.py #25640

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 19, 2019
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 57 additions & 53 deletions pandas/tests/frame/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import pandas as pd
from pandas import Categorical, DataFrame, Series, Timestamp, date_range
from pandas.tests.frame.common import TestData, _check_mixed_float
from pandas.tests.frame.common import _check_mixed_float
import pandas.util.testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal

Expand All @@ -34,15 +34,15 @@ def _skip_if_no_pchip():
pytest.skip('scipy.interpolate.pchip missing')


class TestDataFrameMissingData(TestData):
class TestDataFrameMissingData():

def test_dropEmptyRows(self):
N = len(self.frame.index)
def test_dropEmptyRows(self, float_frame):
N = len(float_frame.index)
mat = np.random.randn(N)
mat[:5] = np.nan

frame = DataFrame({'foo': mat}, index=self.frame.index)
original = Series(mat, index=self.frame.index, name='foo')
frame = DataFrame({'foo': mat}, index=float_frame.index)
original = Series(mat, index=float_frame.index, name='foo')
expected = original.dropna()
inplace_frame1, inplace_frame2 = frame.copy(), frame.copy()

Expand All @@ -58,30 +58,30 @@ def test_dropEmptyRows(self):
assert_series_equal(smaller_frame['foo'], expected)
assert_series_equal(inplace_frame2['foo'], expected)

def test_dropIncompleteRows(self):
N = len(self.frame.index)
def test_dropIncompleteRows(self, float_frame):
N = len(float_frame.index)
mat = np.random.randn(N)
mat[:5] = np.nan

frame = DataFrame({'foo': mat}, index=self.frame.index)
frame = DataFrame({'foo': mat}, index=float_frame.index)
frame['bar'] = 5
original = Series(mat, index=self.frame.index, name='foo')
original = Series(mat, index=float_frame.index, name='foo')
inp_frame1, inp_frame2 = frame.copy(), frame.copy()

smaller_frame = frame.dropna()
assert_series_equal(frame['foo'], original)
inp_frame1.dropna(inplace=True)

exp = Series(mat[5:], index=self.frame.index[5:], name='foo')
exp = Series(mat[5:], index=float_frame.index[5:], name='foo')
tm.assert_series_equal(smaller_frame['foo'], exp)
tm.assert_series_equal(inp_frame1['foo'], exp)

samesize_frame = frame.dropna(subset=['bar'])
assert_series_equal(frame['foo'], original)
assert (frame['bar'] == 5).all()
inp_frame2.dropna(subset=['bar'], inplace=True)
tm.assert_index_equal(samesize_frame.index, self.frame.index)
tm.assert_index_equal(inp_frame2.index, self.frame.index)
tm.assert_index_equal(samesize_frame.index, float_frame.index)
tm.assert_index_equal(inp_frame2.index, float_frame.index)

@pytest.mark.skipif(PY2, reason="pytest.raises match regex fails")
def test_dropna(self):
Expand Down Expand Up @@ -160,17 +160,17 @@ def test_drop_and_dropna_caching(self):
df2['A'].drop([1], inplace=True)
assert_series_equal(df2['A'], original.drop([1]))

def test_dropna_corner(self):
def test_dropna_corner(self, float_frame):
# bad input
msg = "invalid how option: foo"
with pytest.raises(ValueError, match=msg):
self.frame.dropna(how='foo')
float_frame.dropna(how='foo')
msg = "must specify how or thresh"
with pytest.raises(TypeError, match=msg):
self.frame.dropna(how=None)
float_frame.dropna(how=None)
# non-existent column - 8303
with pytest.raises(KeyError, match=r"^\['X'\]$"):
self.frame.dropna(subset=['A', 'X'])
float_frame.dropna(subset=['A', 'X'])

def test_dropna_multiple_axes(self):
df = DataFrame([[1, np.nan, 2, 3],
Expand Down Expand Up @@ -215,42 +215,46 @@ def test_dropna_tz_aware_datetime(self):
index=[0, 3])
assert_frame_equal(result, expected)

def test_fillna(self):
tf = self.tsframe
def test_fillna_datetime(self, datetime_frame):
tf = datetime_frame
tf.loc[tf.index[:5], 'A'] = np.nan
tf.loc[tf.index[-5:], 'A'] = np.nan

zero_filled = self.tsframe.fillna(0)
zero_filled = datetime_frame.fillna(0)
assert (zero_filled.loc[zero_filled.index[:5], 'A'] == 0).all()

padded = self.tsframe.fillna(method='pad')
padded = datetime_frame.fillna(method='pad')
assert np.isnan(padded.loc[padded.index[:5], 'A']).all()
assert (padded.loc[padded.index[-5:], 'A'] ==
padded.loc[padded.index[-5], 'A']).all()

# mixed type
mf = self.mixed_frame
mf.loc[mf.index[5:20], 'foo'] = np.nan
mf.loc[mf.index[-10:], 'A'] = np.nan
result = self.mixed_frame.fillna(value=0)
result = self.mixed_frame.fillna(method='pad')

msg = "Must specify a fill 'value' or 'method'"
with pytest.raises(ValueError, match=msg):
self.tsframe.fillna()
datetime_frame.fillna()
msg = "Cannot specify both 'value' and 'method'"
with pytest.raises(ValueError, match=msg):
self.tsframe.fillna(5, method='ffill')
datetime_frame.fillna(5, method='ffill')

def test_fillna_mixed_type(self, float_string_frame):

mf = float_string_frame
mf.loc[mf.index[5:20], 'foo'] = np.nan
mf.loc[mf.index[-10:], 'A'] = np.nan
result = float_string_frame.fillna(value=0)
result = float_string_frame.fillna(method='pad') # noqa
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than noqa can just remove assignment

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, unfortunately, this split (along fixtures) requested by @jreback is uncovering deficiencies in the existing test, in that both results are not tested for anything. Don't know what this should be...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure entirely possible to have some mistakes in the existing tests. I'd suggest adding a TODO here to make a stronger assertion and opening a follow up issue.

For time being OK to remove lhs and noqa note; at the very least I suppose this tests that the method doesn't raise


def test_fillna_mixed_float(self, mixed_float_frame):

# mixed numeric (but no float16)
mf = self.mixed_float.reindex(columns=['A', 'B', 'D'])
mf = mixed_float_frame.reindex(columns=['A', 'B', 'D'])
mf.loc[mf.index[-10:], 'A'] = np.nan
result = mf.fillna(value=0)
_check_mixed_float(result, dtype=dict(C=None))

result = mf.fillna(method='pad')
_check_mixed_float(result, dtype=dict(C=None))

def test_fillna_other(self):
# empty frame (GH #2778)
df = DataFrame(columns=['x'])
for m in ['pad', 'backfill']:
Expand Down Expand Up @@ -464,19 +468,19 @@ def test_fillna_datetime_columns(self):
index=pd.date_range('20130110', periods=3))
tm.assert_frame_equal(result, expected)

def test_ffill(self):
self.tsframe['A'][:5] = np.nan
self.tsframe['A'][-5:] = np.nan
def test_ffill(self, datetime_frame):
datetime_frame['A'][:5] = np.nan
datetime_frame['A'][-5:] = np.nan

assert_frame_equal(self.tsframe.ffill(),
self.tsframe.fillna(method='ffill'))
assert_frame_equal(datetime_frame.ffill(),
datetime_frame.fillna(method='ffill'))

def test_bfill(self):
self.tsframe['A'][:5] = np.nan
self.tsframe['A'][-5:] = np.nan
def test_bfill(self, datetime_frame):
datetime_frame['A'][:5] = np.nan
datetime_frame['A'][-5:] = np.nan

assert_frame_equal(self.tsframe.bfill(),
self.tsframe.fillna(method='bfill'))
assert_frame_equal(datetime_frame.bfill(),
datetime_frame.fillna(method='bfill'))

def test_frame_pad_backfill_limit(self):
index = np.arange(10)
Expand Down Expand Up @@ -602,24 +606,24 @@ def test_fillna_columns(self):
expected = df.astype(float).fillna(method='ffill', axis=1)
assert_frame_equal(result, expected)

def test_fillna_invalid_method(self):
def test_fillna_invalid_method(self, float_frame):
with pytest.raises(ValueError, match='ffil'):
self.frame.fillna(method='ffil')
float_frame.fillna(method='ffil')

def test_fillna_invalid_value(self):
def test_fillna_invalid_value(self, float_frame):
# list
msg = ("\"value\" parameter must be a scalar or dict, but you passed"
" a \"{}\"")
with pytest.raises(TypeError, match=msg.format('list')):
self.frame.fillna([1, 2])
float_frame.fillna([1, 2])
# tuple
with pytest.raises(TypeError, match=msg.format('tuple')):
self.frame.fillna((1, 2))
float_frame.fillna((1, 2))
# frame with series
msg = ("\"value\" parameter must be a scalar, dict or Series, but you"
" passed a \"DataFrame\"")
with pytest.raises(TypeError, match=msg):
self.frame.iloc[:, 0].fillna(self.frame)
float_frame.iloc[:, 0].fillna(float_frame)

def test_fillna_col_reordering(self):
cols = ["COL." + str(i) for i in range(5, 0, -1)]
Expand All @@ -628,16 +632,16 @@ def test_fillna_col_reordering(self):
filled = df.fillna(method='ffill')
assert df.columns.tolist() == filled.columns.tolist()

def test_fill_corner(self):
mf = self.mixed_frame
def test_fill_corner(self, float_frame, float_string_frame):
mf = float_string_frame
mf.loc[mf.index[5:20], 'foo'] = np.nan
mf.loc[mf.index[-10:], 'A'] = np.nan

filled = self.mixed_frame.fillna(value=0)
filled = float_string_frame.fillna(value=0)
assert (filled.loc[filled.index[5:20], 'foo'] == 0).all()
del self.mixed_frame['foo']
del float_string_frame['foo']

empty_float = self.frame.reindex(columns=[])
empty_float = float_frame.reindex(columns=[])

# TODO(wesm): unused?
result = empty_float.fillna(value=0) # noqa
Expand All @@ -652,7 +656,7 @@ def test_fill_value_when_combine_const(self):
assert_frame_equal(res, exp)


class TestDataFrameInterpolate(TestData):
class TestDataFrameInterpolate():

def test_interp_basic(self):
df = DataFrame({'A': [1, 2, np.nan, 4],
Expand Down