Skip to content

CLN: Clean reductions/test_reductions.py #32035

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 17, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 66 additions & 61 deletions pandas/tests/reductions/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,60 +66,64 @@ def test_ops(self, opname, obj):
expected = expected.astype("M8[ns]").astype("int64")
assert result.value == expected

def test_nanops(self):
@pytest.mark.parametrize("opname", ["max", "min"])
def test_nanops(self, opname, index_or_series):
# GH#7261
for opname in ["max", "min"]:
for klass in [Index, Series]:
arg_op = "arg" + opname if klass is Index else "idx" + opname

obj = klass([np.nan, 2.0])
assert getattr(obj, opname)() == 2.0

obj = klass([np.nan])
assert pd.isna(getattr(obj, opname)())
assert pd.isna(getattr(obj, opname)(skipna=False))

obj = klass([], dtype=object)
assert pd.isna(getattr(obj, opname)())
assert pd.isna(getattr(obj, opname)(skipna=False))

obj = klass([pd.NaT, datetime(2011, 11, 1)])
# check DatetimeIndex monotonic path
assert getattr(obj, opname)() == datetime(2011, 11, 1)
assert getattr(obj, opname)(skipna=False) is pd.NaT

assert getattr(obj, arg_op)() == 1
result = getattr(obj, arg_op)(skipna=False)
if klass is Series:
assert np.isnan(result)
else:
assert result == -1

obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT])
# check DatetimeIndex non-monotonic path
assert getattr(obj, opname)(), datetime(2011, 11, 1)
assert getattr(obj, opname)(skipna=False) is pd.NaT

assert getattr(obj, arg_op)() == 1
result = getattr(obj, arg_op)(skipna=False)
if klass is Series:
assert np.isnan(result)
else:
assert result == -1

for dtype in ["M8[ns]", "datetime64[ns, UTC]"]:
# cases with empty Series/DatetimeIndex
obj = klass([], dtype=dtype)

assert getattr(obj, opname)() is pd.NaT
assert getattr(obj, opname)(skipna=False) is pd.NaT

with pytest.raises(ValueError, match="empty sequence"):
getattr(obj, arg_op)()
with pytest.raises(ValueError, match="empty sequence"):
getattr(obj, arg_op)(skipna=False)

# argmin/max
klass = index_or_series
arg_op = "arg" + opname if klass is Index else "idx" + opname

obj = klass([np.nan, 2.0])
assert getattr(obj, opname)() == 2.0

obj = klass([np.nan])
assert pd.isna(getattr(obj, opname)())
assert pd.isna(getattr(obj, opname)(skipna=False))

obj = klass([], dtype=object)
assert pd.isna(getattr(obj, opname)())
assert pd.isna(getattr(obj, opname)(skipna=False))

obj = klass([pd.NaT, datetime(2011, 11, 1)])
# check DatetimeIndex monotonic path
assert getattr(obj, opname)() == datetime(2011, 11, 1)
assert getattr(obj, opname)(skipna=False) is pd.NaT

assert getattr(obj, arg_op)() == 1
result = getattr(obj, arg_op)(skipna=False)
if klass is Series:
assert np.isnan(result)
else:
assert result == -1

obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT])
# check DatetimeIndex non-monotonic path
assert getattr(obj, opname)(), datetime(2011, 11, 1)
assert getattr(obj, opname)(skipna=False) is pd.NaT

assert getattr(obj, arg_op)() == 1
result = getattr(obj, arg_op)(skipna=False)
if klass is Series:
assert np.isnan(result)
else:
assert result == -1

@pytest.mark.parametrize("opname", ["max", "min"])
@pytest.mark.parametrize("dtype", ["M8[ns]", "datetime64[ns, UTC]"])
def test_nanops_empty_object(self, opname, index_or_series, dtype):
klass = index_or_series
arg_op = "arg" + opname if klass is Index else "idx" + opname
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this needed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I broke this into its own test since it allows for parametrizing over these dtypes without repeating all of test_nanops (which was / is pretty big), and this was conceptually a bit more specific than just "testing nanops"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i get that, but why is arg_op specifically needed? what is its purpose

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I think the name of the method being tested changes if it's a Series or Index (idxmin or argmin) so it has to be created dynamically in the test?


obj = klass([], dtype=dtype)

assert getattr(obj, opname)() is pd.NaT
assert getattr(obj, opname)(skipna=False) is pd.NaT

with pytest.raises(ValueError, match="empty sequence"):
getattr(obj, arg_op)()
with pytest.raises(ValueError, match="empty sequence"):
getattr(obj, arg_op)(skipna=False)

def test_argminmax(self):
obj = Index(np.arange(5, dtype="int64"))
assert obj.argmin() == 0
assert obj.argmax() == 4
Expand Down Expand Up @@ -224,16 +228,17 @@ def test_minmax_timedelta64(self):
assert idx.argmin() == 0
assert idx.argmax() == 2

for op in ["min", "max"]:
# Return NaT
obj = TimedeltaIndex([])
assert pd.isna(getattr(obj, op)())
@pytest.mark.parametrize("op", ["min", "max"])
def test_minmax_timedelta_empty_or_na(self, op):
# Return NaT
obj = TimedeltaIndex([])
assert getattr(obj, op)() is pd.NaT

obj = TimedeltaIndex([pd.NaT])
assert pd.isna(getattr(obj, op)())
obj = TimedeltaIndex([pd.NaT])
assert getattr(obj, op)() is pd.NaT

obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT])
assert pd.isna(getattr(obj, op)())
obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT])
assert getattr(obj, op)() is pd.NaT

def test_numpy_minmax_timedelta64(self):
td = timedelta_range("16815 days", "16820 days", freq="D")
Expand Down