Skip to content

Remove unused/unreachable code from ops #18964

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 38 additions & 84 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ def add_methods(cls, new_methods, force, select, exclude):
raise TypeError("May only pass either select or exclude")

if select:
# TODO: This is not hit in coverage report. Is it still needed?
select = set(select)
methods = {}
for key, method in new_methods.items():
Expand All @@ -164,6 +165,7 @@ def add_methods(cls, new_methods, force, select, exclude):
new_methods = methods

if exclude:
# TODO: This is not hit in coverage report. Is it still needed?
for k in exclude:
new_methods.pop(k, None)

Expand Down Expand Up @@ -354,24 +356,25 @@ class _TimeOp(_Op):
"""
Wrapper around Series datetime/time/timedelta arithmetic operations.
Generally, you should use classmethod ``_Op.get_op`` as an entry point.

This is only reached in cases where either `self.is_datetime_lhs`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you don't need the single back ticks

or `self.is_timedelta_lhs`.
"""
fill_value = iNaT

def __init__(self, left, right, name, na_op):
super(_TimeOp, self).__init__(left, right, name, na_op)

lvalues = self._convert_to_array(left, name=name)
rvalues = self._convert_to_array(right, name=name, other=lvalues)
lvalues = self._convert_to_array(left)
rvalues = self._convert_to_array(right, other=lvalues)

# left
self.is_offset_lhs = is_offsetlike(left)
self.is_timedelta_lhs = is_timedelta64_dtype(lvalues)
self.is_datetime64_lhs = is_datetime64_dtype(lvalues)
self.is_datetime64tz_lhs = is_datetime64tz_dtype(lvalues)
self.is_datetime_lhs = (self.is_datetime64_lhs or
self.is_datetime64tz_lhs)
self.is_integer_lhs = left.dtype.kind in ['i', 'u']
self.is_floating_lhs = left.dtype.kind == 'f'
assert left.dtype.kind not in ['i', 'u', 'f'], left
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assert one of the is_* e.g.
asert self.is_timedelta_lhs or self is_datetime_lhs


# right
self.is_offset_rhs = is_offsetlike(right)
Expand Down Expand Up @@ -440,44 +443,12 @@ def _validate_timedelta(self, name):
'of a series/ndarray of type datetime64[ns] '
'or a timedelta')

def _validate_offset(self, name):
# assumes self.is_offset_lhs

if self.is_timedelta_rhs:
# 2 timedeltas
if name not in ('__div__', '__rdiv__', '__truediv__',
'__rtruediv__', '__add__', '__radd__', '__sub__',
'__rsub__'):
raise TypeError("can only operate on a timedeltas for addition"
", subtraction, and division, but the operator"
" [{name}] was passed".format(name=name))

elif self.is_datetime_rhs:
if name not in ('__add__', '__radd__'):
raise TypeError("can only operate on a timedelta/DateOffset "
"and a datetime for addition, but the operator"
" [{name}] was passed".format(name=name))

else:
raise TypeError('cannot operate on a series without a rhs '
'of a series/ndarray of type datetime64[ns] '
'or a timedelta')

def _validate(self, lvalues, rvalues, name):
if self.is_datetime_lhs:
return self._validate_datetime(lvalues, rvalues, name)
elif self.is_timedelta_lhs:
return self._validate_timedelta(name)
elif self.is_offset_lhs:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this offset case not possible? (it may not be tested) but is it possible?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because _TimeOp is only used if is_datetime_lhs or is_timedelta_lhs

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, can you doc-string things a bit, will help future readers.

return self._validate_offset(name)

if ((self.is_integer_lhs or self.is_floating_lhs) and
self.is_timedelta_rhs):
self._check_timedelta_with_numeric(name)
else:
raise TypeError('cannot operate on a series without a rhs '
'of a series/ndarray of type datetime64[ns] '
'or a timedelta')
# The only other option is self.is_timedelta_lhs
return self._validate_timedelta(name)

def _check_timedelta_with_numeric(self, name):
if name not in ('__div__', '__truediv__', '__mul__', '__rmul__'):
Expand All @@ -486,9 +457,10 @@ def _check_timedelta_with_numeric(self, name):
"multiplication, but the operator [{name}] "
"was passed".format(name=name))

def _convert_to_array(self, values, name=None, other=None):
def _convert_to_array(self, values, other=None):
"""converts values to ndarray"""
from pandas.core.tools.timedeltas import to_timedelta
name = self.name

ovalues = values
supplied_dtype = None
Expand Down Expand Up @@ -517,15 +489,16 @@ def _convert_to_array(self, values, name=None, other=None):
elif isinstance(values, pd.DatetimeIndex):
values = values.to_series()
# datetime with tz
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fix this comment

elif (isinstance(ovalues, datetime.datetime) and
hasattr(ovalues, 'tzinfo')):
elif isinstance(ovalues, datetime.datetime):
values = pd.DatetimeIndex(values)
# datetime array with tz
elif is_datetimetz(values):
if isinstance(values, ABCSeries):
values = values._values
elif not (isinstance(values, (np.ndarray, ABCSeries)) and
is_datetime64_dtype(values)):
# TODO: This is not hit in tests. What case is it intended
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think can remove this case if change 492 to
elif isinstance(ovalues, (datetime.datetime, np.datetime64))

In [4]: np.datetime64('2013-01-01T00:00:00.000000000')
Out[4]: numpy.datetime64('2013-01-01T00:00:00.000000000')

In [5]: isinstance(np.datetime64('2013-01-01T00:00:00.000000000'), np.ndarray)
Out[5]: False

In [6]: isinstance(np.array(np.datetime64('2013-01-01T00:00:00.000000000')), np.ndarray)
Out[6]: True

In [7]: np.array(np.datetime64('2013-01-01T00:00:00.000000000'))
Out[7]: array(1356998400000000000, dtype='datetime64[ns]')

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good.

# to catch?
values = libts.array_to_datetime(values)
elif (is_datetime64_dtype(values) and
not is_datetime64_ns_dtype(values)):
Expand Down Expand Up @@ -569,10 +542,8 @@ def _convert_for_datetime(self, lvalues, rvalues):

# datetime subtraction means timedelta
if self.is_datetime_lhs and self.is_datetime_rhs:
if self.name in ('__sub__', '__rsub__'):
self.dtype = 'timedelta64[ns]'
else:
self.dtype = 'datetime64[ns]'
assert self.name in ('__sub__', '__rsub__')
self.dtype = 'timedelta64[ns]'
elif self.is_datetime64tz_lhs:
self.dtype = lvalues.dtype
elif self.is_datetime64tz_rhs:
Expand All @@ -595,9 +566,7 @@ def _offset(lvalues, rvalues):
self.na_op = lambda x, y: getattr(x, self.name)(y)
return lvalues, rvalues

if self.is_offset_lhs:
lvalues, rvalues = _offset(lvalues, rvalues)
elif self.is_offset_rhs:
if self.is_offset_rhs:
rvalues, lvalues = _offset(rvalues, lvalues)
else:

Expand All @@ -616,8 +585,6 @@ def _offset(lvalues, rvalues):
self.dtype = 'timedelta64[ns]'

# convert Tick DateOffset to underlying delta
if self.is_offset_lhs:
lvalues = to_timedelta(lvalues, box=False)
if self.is_offset_rhs:
rvalues = to_timedelta(rvalues, box=False)

Expand All @@ -628,7 +595,7 @@ def _offset(lvalues, rvalues):
# time delta division -> unit less
# integer gets converted to timedelta in np < 1.6
if ((self.is_timedelta_lhs and self.is_timedelta_rhs) and
not self.is_integer_rhs and not self.is_integer_lhs and
not self.is_integer_rhs and
self.name in ('__div__', '__truediv__')):
self.dtype = 'float64'
self.fill_value = np.nan
Expand Down Expand Up @@ -720,12 +687,18 @@ def na_op(x, y):
result = missing.fill_zeros(result, x, y, name, fill_zeros)
return result

def safe_na_op(lvalues, rvalues):
def safe_na_op(lvalues, rvalues, na_op):
# We pass na_op explicitly here for namespace/closure reasons;
# the alternative is to make it an argument to `wrapper`, which
# would mess with the signatures of Series methods.
try:
with np.errstate(all='ignore'):
return na_op(lvalues, rvalues)
except Exception:
if isinstance(rvalues, ABCSeries):
# TODO: This case is not hit in tests. For it to be hit would
# require `right` to be an object such that right.values
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

limit the comments to useful info, 'screwup' does not fall in this category

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is more a suggestion that we get rid of this block.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I get that. but this is public code, you can certainly say 'this should be removed'

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok with removing this code here

# is a Series. This should probably be removed.
if is_object_dtype(rvalues):
# if dtype is object, try elementwise op
return libalgos.arrmap_object(rvalues,
Expand All @@ -736,7 +709,7 @@ def safe_na_op(lvalues, rvalues):
lambda x: op(x, rvalues))
raise

def wrapper(left, right, name=name, na_op=na_op):
def wrapper(left, right):

if isinstance(right, ABCDataFrame):
return NotImplemented
Expand All @@ -745,35 +718,31 @@ def wrapper(left, right, name=name, na_op=na_op):

converted = _Op.get_op(left, right, name, na_op)

left, right = converted.left, converted.right
lvalues, rvalues = converted.lvalues, converted.rvalues
dtype = converted.dtype
wrap_results = converted.wrap_results
na_op = converted.na_op

if isinstance(rvalues, ABCSeries):
name = _maybe_match_name(left, rvalues)
res_name = _maybe_match_name(left, rvalues)
lvalues = getattr(lvalues, 'values', lvalues)
rvalues = getattr(rvalues, 'values', rvalues)
# _Op aligns left and right
else:
if isinstance(rvalues, pd.Index):
name = _maybe_match_name(left, rvalues)
res_name = _maybe_match_name(left, rvalues)
else:
name = left.name
res_name = left.name
if (hasattr(lvalues, 'values') and
not isinstance(lvalues, pd.DatetimeIndex)):
lvalues = lvalues.values

result = wrap_results(safe_na_op(lvalues, rvalues))
return construct_result(
left,
result,
index=left.index,
name=name,
dtype=dtype,
)
result = wrap_results(safe_na_op(lvalues, rvalues, converted.na_op))
return construct_result(left, result,
index=left.index, name=res_name, dtype=dtype)

wrapper.__name__ = name
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this is hit in tests, yes? (the name of the wrapper)

Copy link
Member Author

@jbrockmendel jbrockmendel Dec 28, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

0.21.1:

>>> pd.Series.__sub__
<unbound method Series.wrapper>

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm yeah that's not right. can you add tests for this? (I believe we have a suite of tests for the stat methods, so prob just need to add these on).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We recently made similar tests for Index comparison method names in tests.indexes.test_base, I don't see much else to that effect. I'll put something together.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if hasattr(operator, name) and callable(getattr(operator, name)):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

huh? what is this for

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You asked for generic docstrings for these ops. These are about as generic as they get. But there is no e.g. operator.rfloordiv, so we have to condition.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand, but this is an obscure way of doing this. I would rather actually construct a proper doc-string here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I'll remove this, can consider docstrings out of scope for this PR.

wrapper.__doc__ = getattr(operator, name).__doc__
return wrapper


Expand Down Expand Up @@ -857,6 +826,8 @@ def wrapper(self, other, axis=None):
# Validate the axis parameter
if axis is not None:
self._get_axis_number(axis)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes should change (but agreed may not be hit in tests)

# TODO: should this be `axis = self._get_axis_number(axis)`?
# This is not hit in tests.

if isinstance(other, ABCSeries):
name = _maybe_match_name(self, other)
Expand Down Expand Up @@ -1379,23 +1350,6 @@ def f(self, other):

def _arith_method_PANEL(op, name, str_rep=None, fill_zeros=None,
default_axis=None, **eval_kwargs):
# copied from Series na_op above, but without unnecessary branch for
# non-scalar
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, not super concerned about Panel things generally.

def na_op(x, y):
import pandas.core.computation.expressions as expressions

try:
result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)
except TypeError:

# TODO: might need to find_common_type here?
result = np.empty(len(x), dtype=x.dtype)
mask = notna(x)
result[mask] = op(x[mask], y)
result, changed = maybe_upcast_putmask(result, ~mask, np.nan)

result = missing.fill_zeros(result, x, y, name, fill_zeros)
return result

# work only for scalars
def f(self, other):
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/series/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,3 +752,17 @@ def test_dt_accessor_api_for_categorical(self):
AttributeError, "Can only use .dt accessor with datetimelike"):
invalid.dt
assert not hasattr(invalid, 'str')


@pytest.mark.parametrize('opname', [
'__add__', '__radd__',
'__sub__', '__rsub__',
'__mul__', '__rmul__',
# '__div__', '__rdiv__', # TODO: Is this different in py2 vs py3?
'__truediv__', '__rtruediv__',
'__floordiv__', '__rfloordiv__',
'__mod__', '__rmod__', '__divmod__',
'__pow__', '__rpow__'])
def test_generated_op_names(opname):
method = getattr(pd.Series, opname)
assert method.__name__ == opname
57 changes: 57 additions & 0 deletions pandas/tests/series/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -1032,6 +1032,63 @@ def test_frame_sub_datetime64_not_ns(self):
Timedelta(days=2)])
tm.assert_frame_equal(res, expected)

def test_scalar_op_retains_name(self):
# GH#18964
ser = pd.Series([pd.Timedelta(days=1, hours=2)], name='NCC-1701D')

dt = pd.Timestamp(1968, 7, 8)
res = ser + dt
assert res.name == ser.name
res = dt - ser
assert res.name == ser.name

@pytest.mark.xfail(reason='GH#18963 DatetimeIndex+Series[datetime64] '
'returns DatetimeIndex with wrong name.')
def test_dti_op_retains_name(self):
# GH#18964
ser = pd.Series([pd.Timedelta(days=1, hours=2)], name='NCC-1701D')
dti = pd.DatetimeIndex([pd.Timestamp(1968, 7, 8)], name='Serenity')

expected = pd.Series([ser[0] + dti[0]], name=None)
res = ser + dti
tm.assert_series_equal(res, expected)
res = dti + ser # GH#18963 wrong type and name
tm.assert_series_equal(res, expected)

expected = pd.Series([dti[0] - ser[0]], name=None)
res = dti - ser # GH#18963 wrong type and name
tm.assert_series_equal(res, expected)
res = -ser + dti
tm.assert_series_equal(res, expected)

res = ser + dti.rename(name=ser.name)
tm.assert_series_equal(res, expected, check_names=False)
assert res.name == ser.name # TODO: Series.rename(name=...) fails

@pytest.mark.xfail(reason='GH#18824 Series[timedelta64]+TimedeltaIndex'
'gets name from TimedeltaIndex;'
'reverse op raises ValueError')
def test_tdi_op_retains_name(self):
# GH#18964
ser = pd.Series([pd.Timedelta(days=1, hours=2)], name='NCC-1701D')
tdi = pd.TimedeltaIndex(ser, name='Heart Of Gold')

expected = pd.Series([ser[0] + tdi[0]], name=None)
res = ser + tdi # right type, wrong name
tm.assert_series_equal(res, expected)
res = tdi + ser
tm.assert_series_equal(res, expected)

expected = pd.Series([ser[0] - tdi[0]], name=None)
res = ser - tdi # right type, wrong name
tm.assert_series_equal(res, expected)
res = -tdi + ser
tm.assert_series_equal(res, expected)

res = ser + tdi.rename(name=ser.name)
tm.assert_series_equal(res, expected, check_names=False)
assert res.name == ser.name # TODO: Series.rename(name=...) fails

def test_operators_datetimelike(self):
def run_ops(ops, get_ser, test_ser):

Expand Down